The little things give you away... A collection of various small helper stuff
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 

229 lignes
7.1 KiB

  1. #define _GNU_SOURCE
  2. #include <ctype.h>
  3. #include <stdbool.h>
  4. #include <stdio.h>
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #ifndef BUFSIZE
  8. #define BUFSIZE 1048576
  9. #endif
  10. #define STATE_HEADERS 0
  11. #define STATE_BODY 1 // Body with a Content-Length header
  12. #define STATE_CHUNK_LINE 2
  13. #define STATE_CHUNK_CONTENTS 3
  14. #ifdef DEBUG
  15. #define DEBUG_PRINTF(...) do { fprintf(stderr, __VA_ARGS__); } while (false)
  16. #else
  17. #define DEBUG_PRINTF(...) do {} while (false)
  18. #endif
  19. int main(int argc, char* argv[]) {
  20. //TODO --meta or a similar way to get something like that?
  21. // Read stdin, decode HTTP responses, dump all bodies to stdout.
  22. // One LF is inserted at the end of each response to ensure that a new response always begins on a new line.
  23. // Headers and chunk lines must fit into BUFSIZE.
  24. // Does not fully comply with the HTTP spec. For example, headers must be capitalised canonically, and continuation lines are unsupported.
  25. char buf[2 * BUFSIZE];
  26. size_t n;
  27. int state = STATE_HEADERS;
  28. char* bufp;
  29. char* m0;
  30. char* m1;
  31. char* eoh;
  32. size_t nscan;
  33. size_t bytes_read;
  34. size_t length;
  35. while ((n = fread(buf, 1, BUFSIZE, stdin)) > 0) {
  36. bufp = buf;
  37. checkstate:
  38. DEBUG_PRINTF("Have %zu bytes of buffer (at %p)\n", n, (void*)bufp);
  39. DEBUG_PRINTF("Beginning of buffer: ");
  40. for (int i = 0; i < 64; ++i) DEBUG_PRINTF(isprint(*(bufp + i)) ? "%c" : "\\x%02x", *(bufp + i) & 0xFF);
  41. DEBUG_PRINTF("\n");
  42. if (n == 0) {
  43. break;
  44. }
  45. DEBUG_PRINTF("State: %d\n", state);
  46. if (state == STATE_HEADERS) {
  47. if (n < 9) {
  48. fprintf(stderr, "Error: too little data before HTTP headers\n");
  49. return 1;
  50. }
  51. if (memcmp(bufp, "HTTP/1.1 ", 9) == 0) {
  52. // Got some headers; find transfer encoding, content length, and end of headers
  53. eoh = memmem(bufp, n, "\r\n\r\n", 4);
  54. if (!eoh) {
  55. fprintf(stderr, "Error: end of headers not found\n");
  56. return 1;
  57. }
  58. eoh += 4;
  59. DEBUG_PRINTF("Response body begins at %p (offset %zu)\n", (void*)eoh, eoh - bufp);
  60. m0 = memmem(bufp, n, "\r\nContent-Length:", 17);
  61. if (m0 && m0 < eoh) {
  62. DEBUG_PRINTF("Found Content-Length header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - bufp);
  63. m1 = memmem(m0 + 1, n - (m0 + 1 - bufp), "\r\n", 2);
  64. if (!m1) {
  65. fprintf(stderr, "Error: CRLF after Content-Length missing\n");
  66. return 1;
  67. }
  68. m0 += 17;
  69. while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0;
  70. if (!sscanf(m0, "%zu%n", &length, &nscan)) {
  71. fprintf(stderr, "Error: invalid Content-Length\n");
  72. return 1;
  73. }
  74. if (nscan > n - (m0 - bufp)) {
  75. fprintf(stderr, "Error: buffer overread\n");
  76. return 1;
  77. }
  78. m0 += nscan;
  79. while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0;
  80. if (m0 != m1) {
  81. fprintf(stderr, "Error: invalid Content-Length (noise before EOL)\n");
  82. return 1;
  83. }
  84. DEBUG_PRINTF("Content length: %zu\n", length);
  85. state = STATE_BODY;
  86. } else {
  87. m0 = memmem(bufp, n, "\r\nTransfer-Encoding:", 20);
  88. if (!m0 || m0 >= eoh) {
  89. fprintf(stderr, "Error: Content-Length and Transfer-Encoding missing\n");
  90. return 1;
  91. }
  92. DEBUG_PRINTF("Found Transfer-Encoding header at %p (offset %zu)\n", (void*)(m0 + 2), m0 + 2 - bufp);
  93. m1 = memmem(m0 + 1, n - (m0 + 1 - bufp), "\r\n", 2);
  94. if (!m1 || m1 >= eoh - 2) {
  95. fprintf(stderr, "Error: CRLF after Transfer-Encoding missing\n");
  96. return 1;
  97. }
  98. m0 += 20;
  99. while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0;
  100. if (memcmp(m0, "chunked", 7) != 0) {
  101. fprintf(stderr, "Error: unsupported Transfer-Encoding\n");
  102. return 1;
  103. }
  104. m0 += 7;
  105. while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0;
  106. if (m0 != m1) {
  107. fprintf(stderr, "Error: unsupported Transfer-Encoding\n");
  108. return 1;
  109. }
  110. DEBUG_PRINTF("Chunked transfer encoding\n");
  111. state = STATE_CHUNK_LINE;
  112. }
  113. DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", eoh - bufp);
  114. n = n - (eoh - bufp);
  115. bufp = eoh;
  116. bytes_read = 0;
  117. goto checkstate;
  118. } else {
  119. fprintf(stderr, "Error: expected header line, got something else\n");
  120. return 1;
  121. }
  122. } else if (state == STATE_BODY || state == STATE_CHUNK_CONTENTS) {
  123. if (length + 2 - bytes_read > n) {
  124. // Only got part of the body
  125. DEBUG_PRINTF("Partial body\n");
  126. DEBUG_PRINTF("Copying %zu bytes to stdout\n", n);
  127. fwrite(bufp, 1, n, stdout);
  128. bytes_read += n;
  129. DEBUG_PRINTF("%zu of %zu bytes from this response written\n", bytes_read, length);
  130. } else {
  131. // Remainder of the response is in the buffer. Same logic as above for small records fitting in the buffer with the headers.
  132. DEBUG_PRINTF("Full body\n");
  133. DEBUG_PRINTF("Copying %zu bytes to stdout\n", length - bytes_read);
  134. fwrite(bufp, 1, length - bytes_read, stdout);
  135. fprintf(stdout, "\n");
  136. if (state == STATE_CHUNK_CONTENTS && *(bufp + length - bytes_read) == '\r') {
  137. // Stupid hack to enforce the CRLF
  138. ++length;
  139. }
  140. if (memcmp(bufp + length - bytes_read, "\n", 1) != 0) {
  141. fprintf(stderr, "Error: end of HTTP body not found\n");
  142. return 1;
  143. }
  144. DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", length + 1 - bytes_read);
  145. n = n - (length + 1 - bytes_read);
  146. bufp = bufp + length + 1 - bytes_read;
  147. if (n < BUFSIZE) {
  148. DEBUG_PRINTF("Buffer too small (%zu bytes), moving and refilling\n", n);
  149. memmove(buf, bufp, n);
  150. bufp = buf;
  151. n += fread(buf + n, 1, BUFSIZE, stdin);
  152. }
  153. if (state == STATE_BODY) {
  154. state = STATE_HEADERS;
  155. } else {
  156. state = STATE_CHUNK_LINE;
  157. }
  158. goto checkstate;
  159. }
  160. } else if (state == STATE_CHUNK_LINE) {
  161. m1 = memmem(bufp, n, "\r\n", 2);
  162. if (!m1) {
  163. fprintf(stderr, "Error: chunk line EOL missing\n");
  164. return 1;
  165. }
  166. m0 = bufp;
  167. while (m0 < bufp + n && (*m0 == ' ' || *m0 == '\t')) ++m0;
  168. if (!sscanf(m0, "%x%n", &length, &nscan)) {
  169. fprintf(stderr, "Error: invalid chunk length\n");
  170. return 1;
  171. }
  172. if (nscan > n - (m0 - bufp)) {
  173. fprintf(stderr, "Error: buffer overread\n");
  174. return 1;
  175. }
  176. m0 += nscan;
  177. while (m0 < m1 && (*m0 == ' ' || *m0 == '\t')) ++m0;
  178. if (*m0 != ';' && m0 != m1) {
  179. fprintf(stderr, "Error: invalid Content-Length (noise before EOL)\n");
  180. return 1;
  181. }
  182. DEBUG_PRINTF("Chunk length: %zu bytes\n", length);
  183. DEBUG_PRINTF("Adjusting buffer pointer and n by %zu\n", m1 + 2 - bufp);
  184. n = n - (m1 + 2 - bufp);
  185. bufp = m1 + 2;
  186. if (length == 0) {
  187. // End of response, must be followed by CRLF + LF
  188. if (n < 3) {
  189. fprintf(stderr, "Error: buffer exhausted while looking for empty chunk CRLF\n");
  190. return 1;
  191. }
  192. if (*(m1 + 2) != '\r' || *(m1 + 3) != '\n' || *(m1 + 4) != '\n') {
  193. fprintf(stderr, "Error: end of HTTP body not found\n");
  194. return 1;
  195. }
  196. n -= 3;
  197. bufp += 3;
  198. state = STATE_HEADERS;
  199. } else {
  200. state = STATE_CHUNK_CONTENTS;
  201. }
  202. if (n < BUFSIZE) {
  203. DEBUG_PRINTF("Buffer too small (%zu bytes), moving and refilling\n", n);
  204. memmove(buf, bufp, n);
  205. bufp = buf;
  206. n += fread(buf + n, 1, BUFSIZE, stdin);
  207. }
  208. goto checkstate;
  209. }
  210. }
  211. if (state != STATE_HEADERS) {
  212. fprintf(stderr, "Error: incomplete body at the end of input\n");
  213. return 1;
  214. }
  215. }