Browse Source

Check for trailing zeroes in gzips.

master
Alard 11 years ago
parent
commit
d587465640
2 changed files with 36 additions and 25 deletions
  1. +17
    -18
      megawarc
  2. +19
    -7
      megawarc-fix

+ 17
- 18
megawarc View File

@@ -169,24 +169,23 @@ def test_gz(filename, offset, size, verbose=False, copy_to_file=None):
f = CopyReader(f, copy_to_file)
start_pos = copy_to_file.tell()
try:
gz = subprocess.Popen(["gunzip", "-t", "-q"],
shell=False,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
while True:
buf = f.read(4096)
size -= len(buf)
if len(buf) > 0:
gz.stdin.write(buf)
else:
break
gz.stdin.close()
gz.stdout.close()
gz.stderr.close()
ret = gz.wait()
if ret != 0:
raise IOError("Could not decompress warc.gz. gunzip returned %d." % ret)
with open("/dev/null", "w") as dev_null:
gz = subprocess.Popen(["gunzip", "-tv"],
shell=False,
stdin=subprocess.PIPE,
stdout=dev_null,
stderr=dev_null)
while True:
buf = f.read(4096)
size -= len(buf)
if len(buf) > 0:
gz.stdin.write(buf)
else:
break
gz.stdin.close()
ret = gz.wait()
if ret != 0:
raise IOError("Could not decompress warc.gz. gunzip returned %d." % ret)
except (IOError, OSError) as e:
if verbose:
print >>sys.stderr, e


+ 19
- 7
megawarc-fix View File

@@ -14,6 +14,7 @@ import gzip
import json
import os.path
import re
import subprocess
import sys
import tarfile
import zlib
@@ -92,16 +93,27 @@ def test_gz(filename, offset, size, verbose=False):
with open(filename, "r") as f_stream:
f = RangeFile(f_stream, offset, size)
try:
gz = gzip.GzipFile(fileobj=f, mode="rb")
while True:
buf = gz.read(4096)
if len(buf) == 0:
break
except (IOError, ValueError, zlib.error) as e:
with open("/dev/null", "w") as dev_null:
gz = subprocess.Popen(["gunzip", "-tv"],
shell=False,
stdin=subprocess.PIPE,
stdout=dev_null,
stderr=dev_null)
while True:
buf = f.read(4096)
size -= len(buf)
if len(buf) > 0:
gz.stdin.write(buf)
else:
break
gz.stdin.close()
ret = gz.wait()
if ret != 0:
raise IOError("Could not decompress warc.gz. gunzip returned %d." % ret)
except (IOError, OSError) as e:
if verbose:
print >>sys.stderr, e
return False

return True




Loading…
Cancel
Save