|
|
@@ -7,13 +7,7 @@ import sys |
|
|
|
import tempfile |
|
|
|
|
|
|
|
|
|
|
|
if len(sys.argv) != 2 or sys.argv[1] == '--help' or sys.argv[1] == '-h': |
|
|
|
print('Usage: unzstd-warc FILE', file = sys.stderr) |
|
|
|
print('Decompresses FILE and writes its contents to stdout', file = sys.stderr) |
|
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
|
|
|
with open(sys.argv[1], 'rb') as fp: |
|
|
|
def get_dict(fp): |
|
|
|
magic = fp.read(4) |
|
|
|
assert magic == b'\x5D\x2A\x4D\x18', 'not a valid warc.zst with a custom dictionary' |
|
|
|
dictSize = fp.read(4) |
|
|
@@ -30,8 +24,24 @@ with open(sys.argv[1], 'rb') as fp: |
|
|
|
assert p.returncode == 0, f'unzstd exited non-zero: return code {p.returncode}, stderr: {p.stderr!r}' |
|
|
|
d = out |
|
|
|
#elif d.startswith(b'\x37\xA4\x30\xEC'): # Uncompressed dict, nothing to do |
|
|
|
return d |
|
|
|
|
|
|
|
|
|
|
|
if (len(sys.argv) != 2 and sys.stdin.isatty()) or sys.argv[1:2] == ['--help'] or sys.argv[1:2] == ['-h']: |
|
|
|
print('Usage: unzstd-warc [FILE]', file = sys.stderr) |
|
|
|
print('Decompresses FILE or stdin and writes its contents to stdout', file = sys.stderr) |
|
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
|
|
|
if len(sys.argv) == 2: |
|
|
|
with open(sys.argv[1], 'rb') as fp: |
|
|
|
d = get_dict(fp) |
|
|
|
else: |
|
|
|
d = get_dict(sys.stdin.buffer.raw) |
|
|
|
with tempfile.NamedTemporaryFile() as dfp: |
|
|
|
dfp.write(d) |
|
|
|
pzstd = subprocess.Popen(['zstdcat', '-D', dfp.name, sys.argv[1]]) |
|
|
|
args = ['zstdcat', '-D', dfp.name] |
|
|
|
if len(sys.argv) == 2: |
|
|
|
args.append(sys.argv[1]) |
|
|
|
pzstd = subprocess.Popen(args) |
|
|
|
pzstd.communicate() |