|
@@ -162,13 +162,14 @@ def iter_warc(f): |
|
|
else: |
|
|
else: |
|
|
httpDecompressor = DummyDecompressor() |
|
|
httpDecompressor = DummyDecompressor() |
|
|
if chunked: |
|
|
if chunked: |
|
|
|
|
|
pos = 0 |
|
|
while True: |
|
|
while True: |
|
|
try: |
|
|
try: |
|
|
chunkLineEnd = httpBody.index(b'\r\n') |
|
|
|
|
|
|
|
|
chunkLineEnd = httpBody.index(b'\r\n', pos) |
|
|
except ValueError: |
|
|
except ValueError: |
|
|
print('Error: could not find chunk line end in record {}, skipping'.format(recordID), file = sys.stderr) |
|
|
print('Error: could not find chunk line end in record {}, skipping'.format(recordID), file = sys.stderr) |
|
|
break |
|
|
break |
|
|
chunkLine = httpBody[:chunkLineEnd] |
|
|
|
|
|
|
|
|
chunkLine = httpBody[pos:chunkLineEnd] |
|
|
if b';' in chunkLine: |
|
|
if b';' in chunkLine: |
|
|
chunkLength = chunkLine[:chunkLine.index(b';')].strip() |
|
|
chunkLength = chunkLine[:chunkLine.index(b';')].strip() |
|
|
else: |
|
|
else: |
|
@@ -181,7 +182,7 @@ def iter_warc(f): |
|
|
break |
|
|
break |
|
|
chunk = httpDecompressor.decompress(httpBody[chunkLineEnd + 2 : chunkLineEnd + 2 + chunkLength]) |
|
|
chunk = httpDecompressor.decompress(httpBody[chunkLineEnd + 2 : chunkLineEnd + 2 + chunkLength]) |
|
|
yield HTTPBodyChunk(chunk) |
|
|
yield HTTPBodyChunk(chunk) |
|
|
httpBody = httpBody[chunkLineEnd + 2 + chunkLength + 2:] |
|
|
|
|
|
|
|
|
pos = chunkLineEnd + 2 + chunkLength + 2 |
|
|
else: |
|
|
else: |
|
|
yield HTTPBodyChunk(httpDecompressor.decompress(httpBody)) |
|
|
yield HTTPBodyChunk(httpDecompressor.decompress(httpBody)) |
|
|
else: |
|
|
else: |
|
|