diff --git a/warc-dump-responses.c b/warc-dump-responses.c index e2516b4..7cec4f2 100644 --- a/warc-dump-responses.c +++ b/warc-dump-responses.c @@ -31,6 +31,7 @@ int main(int argc, char* argv[]) { char* bufp; char* m0; char* m1; + char* eoh; size_t record_bytes_read; size_t record_length; size_t nscan; @@ -58,8 +59,16 @@ checkstate: } if (memcmp(bufp, "WARC/1.0\r\n", 10) == 0 || memcmp(bufp, "WARC/1.1\r\n", 10) == 0) { // Got some headers; find the record type, content length, and end of headers + eoh = memmem(bufp, n, "\r\n\r\n", 4); + if (!eoh) { + fprintf(stderr, "Error: end of headers not found\n"); + return 1; + } + eoh += 4; + DEBUG_PRINTF("Record body begins at %p (offset %zu)\n", (void*)eoh, eoh - bufp); + m0 = memmem(bufp, n, "\r\nContent-Length:", 17); - if (!m0) { + if (!m0 || m0 >= eoh) { fprintf(stderr, "Error: Content-Length missing\n"); return 1; } @@ -88,7 +97,7 @@ checkstate: DEBUG_PRINTF("Record body length: %zu\n", record_length); m0 = memmem(bufp, n, "\r\nWARC-Type:", 12); - if (!m0) { + if (!m0 || m0 >= eoh) { fprintf(stderr, "Error: WARC-Type missing\n"); return 1; } @@ -110,7 +119,7 @@ checkstate: if (meta && state == STATE_RESPONSE_RECORD) { m0 = memmem(bufp, n, "\r\nWARC-Target-URI:", 18); - if (!m0) { + if (!m0 || m0 >= eoh) { fprintf(stderr, "Error: WARC-Target-URI missing\n"); return 1; } @@ -134,17 +143,9 @@ checkstate: fprintf(stdout, " %zu\n", record_length); } - m0 = memmem(bufp, n, "\r\n\r\n", 4); - if (!m0) { - fprintf(stderr, "Error: end of headers not found\n"); - return 1; - } - m0 += 4; - DEBUG_PRINTF("Record body begins at %p (offset %zu)\n", (void*)m0, m0 - bufp); - - DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", m0 - bufp); - n = n - (m0 - bufp); - bufp = m0; + DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", eoh - bufp); + n = n - (eoh - bufp); + bufp = eoh; record_bytes_read = 0; goto checkstate; } else {