Browse Source

Fix header matches potentially occurring in the record body

master
JustAnotherArchivist 1 year ago
parent
commit
65a47d5091
1 changed files with 15 additions and 14 deletions
  1. +15
    -14
      warc-dump-responses.c

+ 15
- 14
warc-dump-responses.c View File

@@ -31,6 +31,7 @@ int main(int argc, char* argv[]) {
char* bufp;
char* m0;
char* m1;
char* eoh;
size_t record_bytes_read;
size_t record_length;
size_t nscan;
@@ -58,8 +59,16 @@ checkstate:
}
if (memcmp(bufp, "WARC/1.0\r\n", 10) == 0 || memcmp(bufp, "WARC/1.1\r\n", 10) == 0) {
// Got some headers; find the record type, content length, and end of headers
eoh = memmem(bufp, n, "\r\n\r\n", 4);
if (!eoh) {
fprintf(stderr, "Error: end of headers not found\n");
return 1;
}
eoh += 4;
DEBUG_PRINTF("Record body begins at %p (offset %zu)\n", (void*)eoh, eoh - bufp);

m0 = memmem(bufp, n, "\r\nContent-Length:", 17);
if (!m0) {
if (!m0 || m0 >= eoh) {
fprintf(stderr, "Error: Content-Length missing\n");
return 1;
}
@@ -88,7 +97,7 @@ checkstate:
DEBUG_PRINTF("Record body length: %zu\n", record_length);

m0 = memmem(bufp, n, "\r\nWARC-Type:", 12);
if (!m0) {
if (!m0 || m0 >= eoh) {
fprintf(stderr, "Error: WARC-Type missing\n");
return 1;
}
@@ -110,7 +119,7 @@ checkstate:

if (meta && state == STATE_RESPONSE_RECORD) {
m0 = memmem(bufp, n, "\r\nWARC-Target-URI:", 18);
if (!m0) {
if (!m0 || m0 >= eoh) {
fprintf(stderr, "Error: WARC-Target-URI missing\n");
return 1;
}
@@ -134,17 +143,9 @@ checkstate:
fprintf(stdout, " %zu\n", record_length);
}

m0 = memmem(bufp, n, "\r\n\r\n", 4);
if (!m0) {
fprintf(stderr, "Error: end of headers not found\n");
return 1;
}
m0 += 4;
DEBUG_PRINTF("Record body begins at %p (offset %zu)\n", (void*)m0, m0 - bufp);

DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", m0 - bufp);
n = n - (m0 - bufp);
bufp = m0;
DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", eoh - bufp);
n = n - (eoh - bufp);
bufp = eoh;
record_bytes_read = 0;
goto checkstate;
} else {


Loading…
Cancel
Save