From 73f35f55913e2af31b8859f53bbdb724a7760831 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Mon, 25 Apr 2022 20:46:49 +0000 Subject: [PATCH] Fix infinite loop when file ends with something that is not a WARC record --- warc-tiny | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/warc-tiny b/warc-tiny index 3ffc767..22b7cd7 100755 --- a/warc-tiny +++ b/warc-tiny @@ -142,13 +142,15 @@ def iter_warc(f): # Read WARC header while b'\r\n\r\n' not in buf: try: - buf = buf + fp.read(16777216) + d = fp.read(16777216) except EOFError: break - if not buf: + if not d: break + buf += d if not buf: break + assert b'\r\n\r\n' in buf warcHeaderBuf, buf = buf.split(b'\r\n\r\n', 1) assert warcHeaderBuf.startswith(b'WARC/1.0\r\n') or warcHeaderBuf.startswith(b'WARC/1.1\r\n') assert b'\r\nContent-Length:' in warcHeaderBuf