From 1737842841dcadf2e26cdf3b855eb18aadb8bd74 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Mon, 9 Jan 2023 18:36:39 +0000 Subject: [PATCH] Add http-response-bodies --- http-response-bodies | 1 + http-response-bodies.c | 228 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 229 insertions(+) create mode 120000 http-response-bodies create mode 100644 http-response-bodies.c diff --git a/http-response-bodies b/http-response-bodies new file mode 120000 index 0000000..4c68fa7 --- /dev/null +++ b/http-response-bodies @@ -0,0 +1 @@ +.make-and-exec \ No newline at end of file diff --git a/http-response-bodies.c b/http-response-bodies.c new file mode 100644 index 0000000..a61ddab --- /dev/null +++ b/http-response-bodies.c @@ -0,0 +1,228 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#ifndef BUFSIZE +#define BUFSIZE 1048576 +#endif + +#define STATE_HEADERS 0 +#define STATE_BODY 1 // Body with a Content-Length header +#define STATE_CHUNK_LINE 2 +#define STATE_CHUNK_CONTENTS 3 + +#ifdef DEBUG +#define DEBUG_PRINTF(...) do { fprintf(stderr, __VA_ARGS__); } while (false) +#else +#define DEBUG_PRINTF(...) do {} while (false) +#endif + +int main(int argc, char* argv[]) { + //TODO --meta or a similar way to get something like that? + + // Read stdin, decode HTTP responses, dump all bodies to stdout. + // One LF is inserted at the end of each response to ensure that a new response always begins on a new line. + // Headers and chunk lines must fit into BUFSIZE. + // Does not fully comply with the HTTP spec. For example, headers must be capitalised canonically, and continuation lines are unsupported. + char buf[2 * BUFSIZE]; + size_t n; + int state = STATE_HEADERS; + char* bufp; + char* m0; + char* m1; + char* eoh; + size_t nscan; + size_t bytes_read; + size_t length; + + while ((n = fread(buf, 1, BUFSIZE, stdin)) > 0) { + bufp = buf; +checkstate: + DEBUG_PRINTF("Have %zu bytes of buffer (at %p)\n", n, (void*)bufp); + DEBUG_PRINTF("Beginning of buffer: "); + for (int i = 0; i < 64; ++i) DEBUG_PRINTF(isprint(*(bufp + i)) ? "%c" : "\\x%02x", *(bufp + i) & 0xFF); + DEBUG_PRINTF("\n"); + if (n == 0) { + break; + } + DEBUG_PRINTF("State: %d\n", state); + if (state == STATE_HEADERS) { + if (n < 9) { + fprintf(stderr, "Error: too little data before HTTP headers\n"); + return 1; + } + if (memcmp(bufp, "HTTP/1.1 ", 9) == 0) { + // Got some headers; find transfer encoding, content length, and end of headers + eoh = memmem(bufp, n, "\r\n\r\n", 4); + if (!eoh) { + fprintf(stderr, "Error: end of headers not found\n"); + return 1; + } + eoh += 4; + DEBUG_PRINTF("Response body begins at %p (offset %zu)\n", (void*)eoh, eoh - bufp); + + m0 = memmem(bufp, n, "\r\nContent-Length:", 17); + if (m0 && m0 < eoh) { + DEBUG_PRINTF("Found Content-Length header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - bufp); + m1 = memmem(m0 + 1, n - (m0 + 1 - bufp), "\r\n", 2); + if (!m1) { + fprintf(stderr, "Error: CRLF after Content-Length missing\n"); + return 1; + } + m0 += 17; + while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0; + if (!sscanf(m0, "%zu%n", &length, &nscan)) { + fprintf(stderr, "Error: invalid Content-Length\n"); + return 1; + } + if (nscan > n - (m0 - bufp)) { + fprintf(stderr, "Error: buffer overread\n"); + return 1; + } + m0 += nscan; + while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0; + if (m0 != m1) { + fprintf(stderr, "Error: invalid Content-Length (noise before EOL)\n"); + return 1; + } + DEBUG_PRINTF("Content length: %zu\n", length); + + state = STATE_BODY; + } else { + m0 = memmem(bufp, n, "\r\nTransfer-Encoding:", 20); + if (!m0 || m0 >= eoh) { + fprintf(stderr, "Error: Content-Length and Transfer-Encoding missing\n"); + return 1; + } + DEBUG_PRINTF("Found Transfer-Encoding header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - bufp); + m1 = memmem(m0 + 1, n - (m0 + 1 - bufp), "\r\n", 2); + if (!m1 || m1 >= eoh - 2) { + fprintf(stderr, "Error: CRLF after Transfer-Encoding missing\n"); + return 1; + } + m0 += 20; + while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0; + if (memcmp(m0, "chunked", 7) != 0) { + fprintf(stderr, "Error: unsupported Transfer-Encoding\n"); + return 1; + } + m0 += 7; + while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0; + if (m0 != m1) { + fprintf(stderr, "Error: unsupported Transfer-Encoding\n"); + return 1; + } + DEBUG_PRINTF("Chunked transfer encoding\n"); + + state = STATE_CHUNK_LINE; + } + + DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", eoh - bufp); + n = n - (eoh - bufp); + bufp = eoh; + bytes_read = 0; + goto checkstate; + } else { + fprintf(stderr, "Error: expected header line, got something else\n"); + return 1; + } + } else if (state == STATE_BODY || state == STATE_CHUNK_CONTENTS) { + if (length + 2 - bytes_read > n) { + // Only got part of the body + DEBUG_PRINTF("Partial body\n"); + DEBUG_PRINTF("Copying %zu bytes to stdout\n", n); + fwrite(bufp, 1, n, stdout); + bytes_read += n; + DEBUG_PRINTF("%zu of %zu bytes from this response written\n", bytes_read, length); + } else { + // Remainder of the response is in the buffer. Same logic as above for small records fitting in the buffer with the headers. + DEBUG_PRINTF("Full body\n"); + DEBUG_PRINTF("Copying %zu bytes to stdout\n", length - bytes_read); + fwrite(bufp, 1, length - bytes_read, stdout); + fprintf(stdout, "\n"); + if (state == STATE_CHUNK_CONTENTS && *(bufp + length - bytes_read) == '\r') { + // Stupid hack to enforce the CRLF + ++length; + } + if (memcmp(bufp + length - bytes_read, "\n", 1) != 0) { + fprintf(stderr, "Error: end of HTTP body not found\n"); + return 1; + } + DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", length + 1 - bytes_read); + n = n - (length + 1 - bytes_read); + bufp = bufp + length + 1 - bytes_read; + if (n < BUFSIZE) { + DEBUG_PRINTF("Buffer too small (%zu bytes), moving and refilling\n", n); + memmove(buf, bufp, n); + bufp = buf; + n += fread(buf + n, 1, BUFSIZE, stdin); + } + if (state == STATE_BODY) { + state = STATE_HEADERS; + } else { + state = STATE_CHUNK_LINE; + } + goto checkstate; + } + } else if (state == STATE_CHUNK_LINE) { + m1 = memmem(bufp, n, "\r\n", 2); + if (!m1) { + fprintf(stderr, "Error: chunk line EOL missing\n"); + return 1; + } + m0 = bufp; + while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0; + if (!sscanf(m0, "%x%n", &length, &nscan)) { + fprintf(stderr, "Error: invalid chunk length\n"); + return 1; + } + if (nscan > n - (m0 - bufp)) { + fprintf(stderr, "Error: buffer overread\n"); + return 1; + } + m0 += nscan; + while (m0 < m1 && (*m0 == ' ' || *m0 == '\t')) ++m0; + if (*m0 != ';' && m0 != m1) { + fprintf(stderr, "Error: invalid Content-Length (noise before EOL)\n"); + return 1; + } + DEBUG_PRINTF("Chunk length: %zu bytes\n", length); + + DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", m1 + 2 - bufp); + n = n - (m1 + 2 - bufp); + bufp = m1 + 2; + + if (length == 0) { + // End of response, must be followed by CRLF + LF + if (n < 3) { + fprintf(stderr, "Error: buffer exhausted while looking for empty chunk CRLF\n"); + return 1; + } + if (*(m1 + 2) != '\r' || *(m1 + 3) != '\n' || *(m1 + 4) != '\n') { + fprintf(stderr, "Error: end of HTTP body not found\n"); + return 1; + } + n -= 3; + bufp += 3; + state = STATE_HEADERS; + } else { + state = STATE_CHUNK_CONTENTS; + } + + if (n < BUFSIZE) { + DEBUG_PRINTF("Buffer too small (%zu bytes), moving and refilling\n", n); + memmove(buf, bufp, n); + bufp = buf; + n += fread(buf + n, 1, BUFSIZE, stdin); + } + goto checkstate; + } + } + if (state != STATE_HEADERS) { + fprintf(stderr, "Error: incomplete body at the end of input\n"); + return 1; + } +}