Browse Source

Fix header matches potentially occurring in the record body

master
JustAnotherArchivist 1 year ago
parent
commit
65a47d5091
1 changed files with 15 additions and 14 deletions
  1. +15
    -14
      warc-dump-responses.c

+ 15
- 14
warc-dump-responses.c View File

@@ -31,6 +31,7 @@ int main(int argc, char* argv[]) {
char* bufp; char* bufp;
char* m0; char* m0;
char* m1; char* m1;
char* eoh;
size_t record_bytes_read; size_t record_bytes_read;
size_t record_length; size_t record_length;
size_t nscan; size_t nscan;
@@ -58,8 +59,16 @@ checkstate:
} }
if (memcmp(bufp, "WARC/1.0\r\n", 10) == 0 || memcmp(bufp, "WARC/1.1\r\n", 10) == 0) { if (memcmp(bufp, "WARC/1.0\r\n", 10) == 0 || memcmp(bufp, "WARC/1.1\r\n", 10) == 0) {
// Got some headers; find the record type, content length, and end of headers // Got some headers; find the record type, content length, and end of headers
eoh = memmem(bufp, n, "\r\n\r\n", 4);
if (!eoh) {
fprintf(stderr, "Error: end of headers not found\n");
return 1;
}
eoh += 4;
DEBUG_PRINTF("Record body begins at %p (offset %zu)\n", (void*)eoh, eoh - bufp);

m0 = memmem(bufp, n, "\r\nContent-Length:", 17); m0 = memmem(bufp, n, "\r\nContent-Length:", 17);
if (!m0) {
if (!m0 || m0 >= eoh) {
fprintf(stderr, "Error: Content-Length missing\n"); fprintf(stderr, "Error: Content-Length missing\n");
return 1; return 1;
} }
@@ -88,7 +97,7 @@ checkstate:
DEBUG_PRINTF("Record body length: %zu\n", record_length); DEBUG_PRINTF("Record body length: %zu\n", record_length);


m0 = memmem(bufp, n, "\r\nWARC-Type:", 12); m0 = memmem(bufp, n, "\r\nWARC-Type:", 12);
if (!m0) {
if (!m0 || m0 >= eoh) {
fprintf(stderr, "Error: WARC-Type missing\n"); fprintf(stderr, "Error: WARC-Type missing\n");
return 1; return 1;
} }
@@ -110,7 +119,7 @@ checkstate:


if (meta && state == STATE_RESPONSE_RECORD) { if (meta && state == STATE_RESPONSE_RECORD) {
m0 = memmem(bufp, n, "\r\nWARC-Target-URI:", 18); m0 = memmem(bufp, n, "\r\nWARC-Target-URI:", 18);
if (!m0) {
if (!m0 || m0 >= eoh) {
fprintf(stderr, "Error: WARC-Target-URI missing\n"); fprintf(stderr, "Error: WARC-Target-URI missing\n");
return 1; return 1;
} }
@@ -134,17 +143,9 @@ checkstate:
fprintf(stdout, " %zu\n", record_length); fprintf(stdout, " %zu\n", record_length);
} }


m0 = memmem(bufp, n, "\r\n\r\n", 4);
if (!m0) {
fprintf(stderr, "Error: end of headers not found\n");
return 1;
}
m0 += 4;
DEBUG_PRINTF("Record body begins at %p (offset %zu)\n", (void*)m0, m0 - bufp);

DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", m0 - bufp);
n = n - (m0 - bufp);
bufp = m0;
DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", eoh - bufp);
n = n - (eoh - bufp);
bufp = eoh;
record_bytes_read = 0; record_bytes_read = 0;
goto checkstate; goto checkstate;
} else { } else {


Loading…
Cancel
Save