The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

229 lines
7.1 KiB

  1. #define _GNU_SOURCE
  2. #include <ctype.h>
  3. #include <stdbool.h>
  4. #include <stdio.h>
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #ifndef BUFSIZE
  8. #define BUFSIZE 1048576
  9. #endif
  10. #define STATE_HEADERS 0
  11. #define STATE_BODY 1 // Body with a Content-Length header
  12. #define STATE_CHUNK_LINE 2
  13. #define STATE_CHUNK_CONTENTS 3
  14. #ifdef DEBUG
  15. #define DEBUG_PRINTF(...) do { fprintf(stderr, __VA_ARGS__); } while (false)
  16. #else
  17. #define DEBUG_PRINTF(...) do {} while (false)
  18. #endif
  19. int main(int argc, char* argv[]) {
  20. //TODO --meta or a similar way to get something like that?
  21. // Read stdin, decode HTTP responses, dump all bodies to stdout.
  22. // One LF is inserted at the end of each response to ensure that a new response always begins on a new line.
  23. // Headers and chunk lines must fit into BUFSIZE.
  24. // Does not fully comply with the HTTP spec. For example, headers must be capitalised canonically, and continuation lines are unsupported.
  25. char buf[2 * BUFSIZE];
  26. size_t n;
  27. int state = STATE_HEADERS;
  28. char* bufp;
  29. char* m0;
  30. char* m1;
  31. char* eoh;
  32. size_t nscan;
  33. size_t bytes_read;
  34. size_t length;
  35. while ((n = fread(buf, 1, BUFSIZE, stdin)) > 0) {
  36. bufp = buf;
  37. checkstate:
  38. DEBUG_PRINTF("Have %zu bytes of buffer (at %p)\n", n, (void*)bufp);
  39. DEBUG_PRINTF("Beginning of buffer: ");
  40. for (int i = 0; i < 64; ++i) DEBUG_PRINTF(isprint(*(bufp + i)) ? "%c" : "\\x%02x", *(bufp + i) & 0xFF);
  41. DEBUG_PRINTF("\n");
  42. if (n == 0) {
  43. break;
  44. }
  45. DEBUG_PRINTF("State: %d\n", state);
  46. if (state == STATE_HEADERS) {
  47. if (n < 9) {
  48. fprintf(stderr, "Error: too little data before HTTP headers\n");
  49. return 1;
  50. }
  51. if (memcmp(bufp, "HTTP/1.1 ", 9) == 0) {
  52. // Got some headers; find transfer encoding, content length, and end of headers
  53. eoh = memmem(bufp, n, "\r\n\r\n", 4);
  54. if (!eoh) {
  55. fprintf(stderr, "Error: end of headers not found\n");
  56. return 1;
  57. }
  58. eoh += 4;
  59. DEBUG_PRINTF("Response body begins at %p (offset %zu)\n", (void*)eoh, eoh - bufp);
  60. m0 = memmem(bufp, n, "\r\nContent-Length:", 17);
  61. if (m0 && m0 < eoh) {
  62. DEBUG_PRINTF("Found Content-Length header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - bufp);
  63. m1 = memmem(m0 + 1, n - (m0 + 1 - bufp), "\r\n", 2);
  64. if (!m1) {
  65. fprintf(stderr, "Error: CRLF after Content-Length missing\n");
  66. return 1;
  67. }
  68. m0 += 17;
  69. while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0;
  70. if (!sscanf(m0, "%zu%n", &length, &nscan)) {
  71. fprintf(stderr, "Error: invalid Content-Length\n");
  72. return 1;
  73. }
  74. if (nscan > n - (m0 - bufp)) {
  75. fprintf(stderr, "Error: buffer overread\n");
  76. return 1;
  77. }
  78. m0 += nscan;
  79. while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0;
  80. if (m0 != m1) {
  81. fprintf(stderr, "Error: invalid Content-Length (noise before EOL)\n");
  82. return 1;
  83. }
  84. DEBUG_PRINTF("Content length: %zu\n", length);
  85. state = STATE_BODY;
  86. } else {
  87. m0 = memmem(bufp, n, "\r\nTransfer-Encoding:", 20);
  88. if (!m0 || m0 >= eoh) {
  89. fprintf(stderr, "Error: Content-Length and Transfer-Encoding missing\n");
  90. return 1;
  91. }
  92. DEBUG_PRINTF("Found Transfer-Encoding header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - bufp);
  93. m1 = memmem(m0 + 1, n - (m0 + 1 - bufp), "\r\n", 2);
  94. if (!m1 || m1 >= eoh - 2) {
  95. fprintf(stderr, "Error: CRLF after Transfer-Encoding missing\n");
  96. return 1;
  97. }
  98. m0 += 20;
  99. while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0;
  100. if (memcmp(m0, "chunked", 7) != 0) {
  101. fprintf(stderr, "Error: unsupported Transfer-Encoding\n");
  102. return 1;
  103. }
  104. m0 += 7;
  105. while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0;
  106. if (m0 != m1) {
  107. fprintf(stderr, "Error: unsupported Transfer-Encoding\n");
  108. return 1;
  109. }
  110. DEBUG_PRINTF("Chunked transfer encoding\n");
  111. state = STATE_CHUNK_LINE;
  112. }
  113. DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", eoh - bufp);
  114. n = n - (eoh - bufp);
  115. bufp = eoh;
  116. bytes_read = 0;
  117. goto checkstate;
  118. } else {
  119. fprintf(stderr, "Error: expected header line, got something else\n");
  120. return 1;
  121. }
  122. } else if (state == STATE_BODY || state == STATE_CHUNK_CONTENTS) {
  123. if (length + 2 - bytes_read > n) {
  124. // Only got part of the body
  125. DEBUG_PRINTF("Partial body\n");
  126. DEBUG_PRINTF("Copying %zu bytes to stdout\n", n);
  127. fwrite(bufp, 1, n, stdout);
  128. bytes_read += n;
  129. DEBUG_PRINTF("%zu of %zu bytes from this response written\n", bytes_read, length);
  130. } else {
  131. // Remainder of the response is in the buffer. Same logic as above for small records fitting in the buffer with the headers.
  132. DEBUG_PRINTF("Full body\n");
  133. DEBUG_PRINTF("Copying %zu bytes to stdout\n", length - bytes_read);
  134. fwrite(bufp, 1, length - bytes_read, stdout);
  135. fprintf(stdout, "\n");
  136. if (state == STATE_CHUNK_CONTENTS && *(bufp + length - bytes_read) == '\r') {
  137. // Stupid hack to enforce the CRLF
  138. ++length;
  139. }
  140. if (memcmp(bufp + length - bytes_read, "\n", 1) != 0) {
  141. fprintf(stderr, "Error: end of HTTP body not found\n");
  142. return 1;
  143. }
  144. DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", length + 1 - bytes_read);
  145. n = n - (length + 1 - bytes_read);
  146. bufp = bufp + length + 1 - bytes_read;
  147. if (n < BUFSIZE) {
  148. DEBUG_PRINTF("Buffer too small (%zu bytes), moving and refilling\n", n);
  149. memmove(buf, bufp, n);
  150. bufp = buf;
  151. n += fread(buf + n, 1, BUFSIZE, stdin);
  152. }
  153. if (state == STATE_BODY) {
  154. state = STATE_HEADERS;
  155. } else {
  156. state = STATE_CHUNK_LINE;
  157. }
  158. goto checkstate;
  159. }
  160. } else if (state == STATE_CHUNK_LINE) {
  161. m1 = memmem(bufp, n, "\r\n", 2);
  162. if (!m1) {
  163. fprintf(stderr, "Error: chunk line EOL missing\n");
  164. return 1;
  165. }
  166. m0 = bufp;
  167. while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0;
  168. if (!sscanf(m0, "%x%n", &length, &nscan)) {
  169. fprintf(stderr, "Error: invalid chunk length\n");
  170. return 1;
  171. }
  172. if (nscan > n - (m0 - bufp)) {
  173. fprintf(stderr, "Error: buffer overread\n");
  174. return 1;
  175. }
  176. m0 += nscan;
  177. while (m0 < m1 && (*m0 == ' ' || *m0 == '\t')) ++m0;
  178. if (*m0 != ';' && m0 != m1) {
  179. fprintf(stderr, "Error: invalid Content-Length (noise before EOL)\n");
  180. return 1;
  181. }
  182. DEBUG_PRINTF("Chunk length: %zu bytes\n", length);
  183. DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", m1 + 2 - bufp);
  184. n = n - (m1 + 2 - bufp);
  185. bufp = m1 + 2;
  186. if (length == 0) {
  187. // End of response, must be followed by CRLF + LF
  188. if (n < 3) {
  189. fprintf(stderr, "Error: buffer exhausted while looking for empty chunk CRLF\n");
  190. return 1;
  191. }
  192. if (*(m1 + 2) != '\r' || *(m1 + 3) != '\n' || *(m1 + 4) != '\n') {
  193. fprintf(stderr, "Error: end of HTTP body not found\n");
  194. return 1;
  195. }
  196. n -= 3;
  197. bufp += 3;
  198. state = STATE_HEADERS;
  199. } else {
  200. state = STATE_CHUNK_CONTENTS;
  201. }
  202. if (n < BUFSIZE) {
  203. DEBUG_PRINTF("Buffer too small (%zu bytes), moving and refilling\n", n);
  204. memmove(buf, bufp, n);
  205. bufp = buf;
  206. n += fread(buf + n, 1, BUFSIZE, stdin);
  207. }
  208. goto checkstate;
  209. }
  210. }
  211. if (state != STATE_HEADERS) {
  212. fprintf(stderr, "Error: incomplete body at the end of input\n");
  213. return 1;
  214. }
  215. }