diff --git a/warc-dump-responses.c b/warc-dump-responses.c index b4cfe14..e49eb0b 100644 --- a/warc-dump-responses.c +++ b/warc-dump-responses.c @@ -160,12 +160,26 @@ checkstate: if (record_length + 4 - record_bytes_read > n) { // Only got part of the record body DEBUG_PRINTF("Partial record\n"); + // Handle the case when the terminating CRLFCRLF is truncated + size_t tocopy = record_length - record_bytes_read > n ? n : record_length - record_bytes_read; if (state == STATE_RESPONSE_RECORD) { - DEBUG_PRINTF("Copying %zu bytes to stdout\n", n); - fwrite(bufp, 1, n, stdout); + DEBUG_PRINTF("Copying %zu bytes to stdout\n", tocopy); + fwrite(bufp, 1, tocopy, stdout); } - record_bytes_read += n; + record_bytes_read += tocopy; DEBUG_PRINTF("%zu of %zu bytes from this record written\n", record_bytes_read, record_length); + if (tocopy != n) { + DEBUG_PRINTF("Truncated end of block\n"); + n = n - tocopy; + bufp = bufp + tocopy; + if (n < BUFSIZE) { // Should always be true + DEBUG_PRINTF("Buffer too small (%zu bytes), moving and refilling\n", n); + memmove(buf, bufp, n); + bufp = buf; + n += fread(buf + n, 1, BUFSIZE, stdin); + } + goto checkstate; + } } else { // Remainder of the record is in the buffer. Same logic as above for small records fitting in the buffer with the headers. DEBUG_PRINTF("Full record\n");