Browse Source

Fix infinite loop when file ends with something that is not a WARC record

master
JustAnotherArchivist 2 years ago
parent
commit
73f35f5591
1 changed files with 4 additions and 2 deletions
  1. +4
    -2
      warc-tiny

+ 4
- 2
warc-tiny View File

@@ -142,13 +142,15 @@ def iter_warc(f):
# Read WARC header
while b'\r\n\r\n' not in buf:
try:
buf = buf + fp.read(16777216)
d = fp.read(16777216)
except EOFError:
break
if not buf:
if not d:
break
buf += d
if not buf:
break
assert b'\r\n\r\n' in buf
warcHeaderBuf, buf = buf.split(b'\r\n\r\n', 1)
assert warcHeaderBuf.startswith(b'WARC/1.0\r\n') or warcHeaderBuf.startswith(b'WARC/1.1\r\n')
assert b'\r\nContent-Length:' in warcHeaderBuf


Loading…
Cancel
Save