3 Commits

Author SHA1 Message Date
  JustAnotherArchivist 18a96ba246 Add test for warc-dump-responses 5 months ago
  JustAnotherArchivist bf79252001 Fix error when the terminating CRLFCRLF of a record is truncated 5 months ago
  JustAnotherArchivist c192e0c5d3 Fix false positive warning about possibly uninitialised record_bytes_read 5 months ago
2 changed files with 39 additions and 4 deletions
Split View
  1. +21
    -0
      .warc-dump-responses-test
  2. +18
    -4
      warc-dump-responses.c

+ 21
- 0
.warc-dump-responses-test View File

@@ -0,0 +1,21 @@
#!/bin/bash
# The first group of sizes covers the range around where the terminating CRLFCRLF gets truncated.
# The second group covers the range around the internal buffer size.
for s in {1048470..1048490} {1048570..1048580}; do
for meta in '' '--meta'; do
expectedHash="$({ if [[ "${meta}" ]]; then printf 'https://example.org/ %d\n' "$s"; fi; yes | tr -d '\n' | head -c "$s"; printf '\n'; } | sha1sum)"
actualHash="$(
{
printf '%s\r\n' 'WARC/1.0' 'Content-Length: '"$s" 'WARC-Type: response' 'WARC-Target-URI: https://example.org/' ''
yes | tr -d '\n' | head -c "$s"
printf '\r\n\r\n'
} | "$(dirname "$0")"/warc-dump-responses ${meta} | sha1sum
)"
if [[ "${expectedHash}" != "${actualHash}" ]]; then
printf 'Error: output mismatch on size %q, meta %q\n' "${s}" "${meta}"
printf 'Expected hash: %q\n' "${expectedHash}"
printf 'Actual hash: %q\n' "${actualHash}"
exit 1
fi
done
done

+ 18
- 4
warc-dump-responses.c View File

@@ -32,7 +32,7 @@ int main(int argc, char* argv[]) {
char* m0;
char* m1;
char* eoh;
size_t record_bytes_read;
size_t record_bytes_read = 0;
size_t record_length;
long int nscan;
bool meta = false;
@@ -160,12 +160,26 @@ checkstate:
if (record_length + 4 - record_bytes_read > n) {
// Only got part of the record body
DEBUG_PRINTF("Partial record\n");
// Handle the case when the terminating CRLFCRLF is truncated
size_t tocopy = record_length - record_bytes_read > n ? n : record_length - record_bytes_read;
if (state == STATE_RESPONSE_RECORD) {
DEBUG_PRINTF("Copying %zu bytes to stdout\n", n);
fwrite(bufp, 1, n, stdout);
DEBUG_PRINTF("Copying %zu bytes to stdout\n", tocopy);
fwrite(bufp, 1, tocopy, stdout);
}
record_bytes_read += n;
record_bytes_read += tocopy;
DEBUG_PRINTF("%zu of %zu bytes from this record written\n", record_bytes_read, record_length);
if (tocopy != n) {
DEBUG_PRINTF("Truncated end of block\n");
n = n - tocopy;
bufp = bufp + tocopy;
if (n < BUFSIZE) { // Should always be true
DEBUG_PRINTF("Buffer too small (%zu bytes), moving and refilling\n", n);
memmove(buf, bufp, n);
bufp = buf;
n += fread(buf + n, 1, BUFSIZE, stdin);
}
goto checkstate;
}
} else {
// Remainder of the record is in the buffer. Same logic as above for small records fitting in the buffer with the headers.
DEBUG_PRINTF("Full record\n");


Loading…
Cancel
Save