|
|
@@ -14,6 +14,7 @@ import gzip |
|
|
|
import json |
|
|
|
import os.path |
|
|
|
import re |
|
|
|
import subprocess |
|
|
|
import sys |
|
|
|
import tarfile |
|
|
|
import zlib |
|
|
@@ -92,16 +93,27 @@ def test_gz(filename, offset, size, verbose=False): |
|
|
|
with open(filename, "r") as f_stream: |
|
|
|
f = RangeFile(f_stream, offset, size) |
|
|
|
try: |
|
|
|
gz = gzip.GzipFile(fileobj=f, mode="rb") |
|
|
|
while True: |
|
|
|
buf = gz.read(4096) |
|
|
|
if len(buf) == 0: |
|
|
|
break |
|
|
|
except (IOError, ValueError, zlib.error) as e: |
|
|
|
with open("/dev/null", "w") as dev_null: |
|
|
|
gz = subprocess.Popen(["gunzip", "-tv"], |
|
|
|
shell=False, |
|
|
|
stdin=subprocess.PIPE, |
|
|
|
stdout=dev_null, |
|
|
|
stderr=dev_null) |
|
|
|
while True: |
|
|
|
buf = f.read(4096) |
|
|
|
size -= len(buf) |
|
|
|
if len(buf) > 0: |
|
|
|
gz.stdin.write(buf) |
|
|
|
else: |
|
|
|
break |
|
|
|
gz.stdin.close() |
|
|
|
ret = gz.wait() |
|
|
|
if ret != 0: |
|
|
|
raise IOError("Could not decompress warc.gz. gunzip returned %d." % ret) |
|
|
|
except (IOError, OSError) as e: |
|
|
|
if verbose: |
|
|
|
print >>sys.stderr, e |
|
|
|
return False |
|
|
|
|
|
|
|
return True |
|
|
|
|
|
|
|
|
|
|
|