diff --git a/warc-tiny b/warc-tiny index 3ffc767..22b7cd7 100755 --- a/warc-tiny +++ b/warc-tiny @@ -142,13 +142,15 @@ def iter_warc(f): # Read WARC header while b'\r\n\r\n' not in buf: try: - buf = buf + fp.read(16777216) + d = fp.read(16777216) except EOFError: break - if not buf: + if not d: break + buf += d if not buf: break + assert b'\r\n\r\n' in buf warcHeaderBuf, buf = buf.split(b'\r\n\r\n', 1) assert warcHeaderBuf.startswith(b'WARC/1.0\r\n') or warcHeaderBuf.startswith(b'WARC/1.1\r\n') assert b'\r\nContent-Length:' in warcHeaderBuf