Browse Source

Add http-response-bodies

master
JustAnotherArchivist 1 year ago
parent
commit
1737842841
2 changed files with 229 additions and 0 deletions
  1. +1
    -0
      http-response-bodies
  2. +228
    -0
      http-response-bodies.c

+ 1
- 0
http-response-bodies View File

@@ -0,0 +1 @@
.make-and-exec

+ 228
- 0
http-response-bodies.c View File

@@ -0,0 +1,228 @@
#define _GNU_SOURCE
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#ifndef BUFSIZE
#define BUFSIZE 1048576
#endif

#define STATE_HEADERS 0
#define STATE_BODY 1 // Body with a Content-Length header
#define STATE_CHUNK_LINE 2
#define STATE_CHUNK_CONTENTS 3

#ifdef DEBUG
#define DEBUG_PRINTF(...) do { fprintf(stderr, __VA_ARGS__); } while (false)
#else
#define DEBUG_PRINTF(...) do {} while (false)
#endif

int main(int argc, char* argv[]) {
//TODO --meta or a similar way to get something like that?

// Read stdin, decode HTTP responses, dump all bodies to stdout.
// One LF is inserted at the end of each response to ensure that a new response always begins on a new line.
// Headers and chunk lines must fit into BUFSIZE.
// Does not fully comply with the HTTP spec. For example, headers must be capitalised canonically, and continuation lines are unsupported.
char buf[2 * BUFSIZE];
size_t n;
int state = STATE_HEADERS;
char* bufp;
char* m0;
char* m1;
char* eoh;
size_t nscan;
size_t bytes_read;
size_t length;

while ((n = fread(buf, 1, BUFSIZE, stdin)) > 0) {
bufp = buf;
checkstate:
DEBUG_PRINTF("Have %zu bytes of buffer (at %p)\n", n, (void*)bufp);
DEBUG_PRINTF("Beginning of buffer: ");
for (int i = 0; i < 64; ++i) DEBUG_PRINTF(isprint(*(bufp + i)) ? "%c" : "\\x%02x", *(bufp + i) & 0xFF);
DEBUG_PRINTF("\n");
if (n == 0) {
break;
}
DEBUG_PRINTF("State: %d\n", state);
if (state == STATE_HEADERS) {
if (n < 9) {
fprintf(stderr, "Error: too little data before HTTP headers\n");
return 1;
}
if (memcmp(bufp, "HTTP/1.1 ", 9) == 0) {
// Got some headers; find transfer encoding, content length, and end of headers
eoh = memmem(bufp, n, "\r\n\r\n", 4);
if (!eoh) {
fprintf(stderr, "Error: end of headers not found\n");
return 1;
}
eoh += 4;
DEBUG_PRINTF("Response body begins at %p (offset %zu)\n", (void*)eoh, eoh - bufp);

m0 = memmem(bufp, n, "\r\nContent-Length:", 17);
if (m0 && m0 < eoh) {
DEBUG_PRINTF("Found Content-Length header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - bufp);
m1 = memmem(m0 + 1, n - (m0 + 1 - bufp), "\r\n", 2);
if (!m1) {
fprintf(stderr, "Error: CRLF after Content-Length missing\n");
return 1;
}
m0 += 17;
while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0;
if (!sscanf(m0, "%zu%n", &length, &nscan)) {
fprintf(stderr, "Error: invalid Content-Length\n");
return 1;
}
if (nscan > n - (m0 - bufp)) {
fprintf(stderr, "Error: buffer overread\n");
return 1;
}
m0 += nscan;
while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0;
if (m0 != m1) {
fprintf(stderr, "Error: invalid Content-Length (noise before EOL)\n");
return 1;
}
DEBUG_PRINTF("Content length: %zu\n", length);

state = STATE_BODY;
} else {
m0 = memmem(bufp, n, "\r\nTransfer-Encoding:", 20);
if (!m0 || m0 >= eoh) {
fprintf(stderr, "Error: Content-Length and Transfer-Encoding missing\n");
return 1;
}
DEBUG_PRINTF("Found Transfer-Encoding header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - bufp);
m1 = memmem(m0 + 1, n - (m0 + 1 - bufp), "\r\n", 2);
if (!m1 || m1 >= eoh - 2) {
fprintf(stderr, "Error: CRLF after Transfer-Encoding missing\n");
return 1;
}
m0 += 20;
while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0;
if (memcmp(m0, "chunked", 7) != 0) {
fprintf(stderr, "Error: unsupported Transfer-Encoding\n");
return 1;
}
m0 += 7;
while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0;
if (m0 != m1) {
fprintf(stderr, "Error: unsupported Transfer-Encoding\n");
return 1;
}
DEBUG_PRINTF("Chunked transfer encoding\n");

state = STATE_CHUNK_LINE;
}

DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", eoh - bufp);
n = n - (eoh - bufp);
bufp = eoh;
bytes_read = 0;
goto checkstate;
} else {
fprintf(stderr, "Error: expected header line, got something else\n");
return 1;
}
} else if (state == STATE_BODY || state == STATE_CHUNK_CONTENTS) {
if (length + 2 - bytes_read > n) {
// Only got part of the body
DEBUG_PRINTF("Partial body\n");
DEBUG_PRINTF("Copying %zu bytes to stdout\n", n);
fwrite(bufp, 1, n, stdout);
bytes_read += n;
DEBUG_PRINTF("%zu of %zu bytes from this response written\n", bytes_read, length);
} else {
// Remainder of the response is in the buffer. Same logic as above for small records fitting in the buffer with the headers.
DEBUG_PRINTF("Full body\n");
DEBUG_PRINTF("Copying %zu bytes to stdout\n", length - bytes_read);
fwrite(bufp, 1, length - bytes_read, stdout);
fprintf(stdout, "\n");
if (state == STATE_CHUNK_CONTENTS && *(bufp + length - bytes_read) == '\r') {
// Stupid hack to enforce the CRLF
++length;
}
if (memcmp(bufp + length - bytes_read, "\n", 1) != 0) {
fprintf(stderr, "Error: end of HTTP body not found\n");
return 1;
}
DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", length + 1 - bytes_read);
n = n - (length + 1 - bytes_read);
bufp = bufp + length + 1 - bytes_read;
if (n < BUFSIZE) {
DEBUG_PRINTF("Buffer too small (%zu bytes), moving and refilling\n", n);
memmove(buf, bufp, n);
bufp = buf;
n += fread(buf + n, 1, BUFSIZE, stdin);
}
if (state == STATE_BODY) {
state = STATE_HEADERS;
} else {
state = STATE_CHUNK_LINE;
}
goto checkstate;
}
} else if (state == STATE_CHUNK_LINE) {
m1 = memmem(bufp, n, "\r\n", 2);
if (!m1) {
fprintf(stderr, "Error: chunk line EOL missing\n");
return 1;
}
m0 = bufp;
while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0;
if (!sscanf(m0, "%x%n", &length, &nscan)) {
fprintf(stderr, "Error: invalid chunk length\n");
return 1;
}
if (nscan > n - (m0 - bufp)) {
fprintf(stderr, "Error: buffer overread\n");
return 1;
}
m0 += nscan;
while (m0 < m1 && (*m0 == ' ' || *m0 == '\t')) ++m0;
if (*m0 != ';' && m0 != m1) {
fprintf(stderr, "Error: invalid Content-Length (noise before EOL)\n");
return 1;
}
DEBUG_PRINTF("Chunk length: %zu bytes\n", length);

DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", m1 + 2 - bufp);
n = n - (m1 + 2 - bufp);
bufp = m1 + 2;

if (length == 0) {
// End of response, must be followed by CRLF + LF
if (n < 3) {
fprintf(stderr, "Error: buffer exhausted while looking for empty chunk CRLF\n");
return 1;
}
if (*(m1 + 2) != '\r' || *(m1 + 3) != '\n' || *(m1 + 4) != '\n') {
fprintf(stderr, "Error: end of HTTP body not found\n");
return 1;
}
n -= 3;
bufp += 3;
state = STATE_HEADERS;
} else {
state = STATE_CHUNK_CONTENTS;
}

if (n < BUFSIZE) {
DEBUG_PRINTF("Buffer too small (%zu bytes), moving and refilling\n", n);
memmove(buf, bufp, n);
bufp = buf;
n += fread(buf + n, 1, BUFSIZE, stdin);
}
goto checkstate;
}
}
if (state != STATE_HEADERS) {
fprintf(stderr, "Error: incomplete body at the end of input\n");
return 1;
}
}

Loading…
Cancel
Save