The little things give you away... A collection of various small helper stuff
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 

193 lignes
9.6 KiB

  1. // stdin: YouTube URLs or data with little noise besides that
  2. // stdout: lines for videos, channels, playlists, and unknown YouTube IDs found in the input, prefixed with v, c, p, and ?, respectively
  3. #include <stdbool.h>
  4. #include <stdio.h>
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #ifndef DEBUG
  8. #define DEBUG 0
  9. #endif
  10. #define debug_print(fmt, ...) do { if (DEBUG) fprintf(stderr, "%s:%d:%s(): " fmt, __FILE__, __LINE__, __func__, __VA_ARGS__); } while (0)
  11. #define BUFFER_SIZE 1024 * 1024
  12. #define MAX_RESULT_SIZE 1024
  13. // MAX_RESULT_SIZE is the maximum length of an individual match. This must be smaller than BUFFER_SIZE/2.
  14. #define STATE_NONE 0
  15. #define STATE_ID 1
  16. #define STATE_SKIP_UNTIL_NONID 2
  17. #define IS_ID_CHAR(c) (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_' || c == '-')
  18. inline bool is_upper_hex(char* c, size_t len) {
  19. for (size_t i = 0; i < len; i++) {
  20. if (!(('0' <= c[i] && c[i] <= '9') || ('A' <= c[i] && c[i] <= 'F')))
  21. return false;
  22. }
  23. return true;
  24. }
  25. int main(int argc, char** argv) {
  26. char* inbuf = malloc(sizeof(char) * BUFFER_SIZE);
  27. int state = 0;
  28. size_t stateStart = 0;
  29. size_t inbufEnd = 0;
  30. while (1) {
  31. debug_print("inbufEnd = %d, state = %d, stateStart = %d\n", inbufEnd, state, stateStart);
  32. debug_print("Reading %d from stdin\n", BUFFER_SIZE - inbufEnd);
  33. size_t readSize = fread(inbuf + inbufEnd, sizeof(char), BUFFER_SIZE - inbufEnd, stdin);
  34. debug_print("Got %d bytes\n", readSize);
  35. if (readSize == 0) {
  36. if (inbufEnd == 0) {
  37. // Nothing read, nothing left from previous iteration. Bye.
  38. break;
  39. } else {
  40. // No more input data but still something left from the previous read.
  41. // Make sure that the next character cannot be considered valid in any state (NUL qualifies), then let the code below handle things.
  42. inbuf[inbufEnd] = '\0';
  43. readSize += 1;
  44. }
  45. }
  46. for (size_t p = inbufEnd; p < inbufEnd + readSize; p++) {
  47. debug_print("p = %d, character = %c, state = %d, stateStart = %d\n", p, inbuf[p], state, stateStart);
  48. if (state != STATE_NONE && state != STATE_SKIP_UNTIL_NONID && p - stateStart >= MAX_RESULT_SIZE) {
  49. debug_print("%s\n", "max result size exceeded, dropping result and switching to STATE_SKIP_UNTIL_NONID");
  50. state = STATE_SKIP_UNTIL_NONID;
  51. stateStart = 0;
  52. }
  53. switch (state) {
  54. case STATE_NONE:
  55. if (IS_ID_CHAR(inbuf[p])) {
  56. debug_print("%c is an ID char, switching to STATE_ID\n", inbuf[p]);
  57. state = STATE_ID;
  58. stateStart = p;
  59. }
  60. break;
  61. case STATE_ID:
  62. if (!IS_ID_CHAR(inbuf[p])) {
  63. debug_print("%c is not an ID char\n", inbuf[p]);
  64. if (p - stateStart >= 10) {
  65. debug_print("p = %d, stateStart = %d, got %d ID chars: %.*s\n", p, stateStart, p - stateStart, p - stateStart, inbuf + stateStart);
  66. if (p - stateStart == 11) {
  67. printf("v %.*s\n", p - stateStart, inbuf + stateStart);
  68. } else if (p - stateStart == 22) {
  69. printf("c %.*s\n", p - stateStart, inbuf + stateStart);
  70. } else if (p - stateStart == 24 && inbuf[stateStart] == 'U' && inbuf[stateStart + 1] == 'C') {
  71. printf("c %.*s\n", p - stateStart, inbuf + stateStart);
  72. } else {
  73. // Playlist candidates, some of which contain IDs for channels or videos
  74. if (p - stateStart >= 19 && memcmp(inbuf + stateStart, "RDAMPL", 6) == 0) {
  75. // Playlist ID starts with RDAMPL, which is followed by a normal playlist ID, so skip that.
  76. stateStart += 6;
  77. }
  78. if (p - stateStart == 32 || (inbuf[stateStart] == 'P' && inbuf[stateStart + 1] == 'L' && p - stateStart == 34)) {
  79. printf("p %.*s\n", p - stateStart, inbuf + stateStart);
  80. } else if ( (p - stateStart == 16 || (inbuf[stateStart] == 'P' && inbuf[stateStart + 1] == 'L' && p - stateStart == 18))
  81. && is_upper_hex(inbuf + p - 16, 16)
  82. ) {
  83. printf("p %.*s\n", p - stateStart, inbuf + stateStart);
  84. } else if (p - stateStart == 24 && ( (inbuf[stateStart] == 'U' && inbuf[stateStart + 1] == 'U')
  85. || (inbuf[stateStart] == 'L' && inbuf[stateStart + 1] == 'L')
  86. || (inbuf[stateStart] == 'F' && inbuf[stateStart + 1] == 'L')
  87. || (inbuf[stateStart] == 'P' && inbuf[stateStart + 1] == 'U')
  88. )
  89. ) {
  90. printf("p %.*s\n", p - stateStart, inbuf + stateStart);
  91. printf("c %.*s\n", 22, inbuf + stateStart + 2);
  92. } else if (p - stateStart == 26 && memcmp(inbuf + stateStart, "UUSH", 4) == 0) {
  93. printf("p %.*s\n", p - stateStart, inbuf + stateStart);
  94. printf("c %.*s\n", 22, inbuf + stateStart + 4);
  95. } else if (p - stateStart == 13 && ( (inbuf[stateStart] == 'R' && inbuf[stateStart + 1] == 'D')
  96. || (inbuf[stateStart] == 'U' && inbuf[stateStart + 1] == 'L')
  97. || (inbuf[stateStart] == 'E' && inbuf[stateStart + 1] == 'L')
  98. || (inbuf[stateStart] == 'C' && inbuf[stateStart + 1] == 'L')
  99. || (inbuf[stateStart] == 'S' && inbuf[stateStart + 1] == 'L')
  100. || (inbuf[stateStart] == 'L' && inbuf[stateStart + 1] == 'P')
  101. )
  102. ) {
  103. printf("p %.*s\n", p - stateStart, inbuf + stateStart);
  104. printf("v %.*s\n", 11, inbuf + stateStart + 2);
  105. } else if ( p - stateStart == 15 && inbuf[stateStart] == 'R' && inbuf[stateStart + 1] == 'D'
  106. && ( (inbuf[stateStart + 2] == 'M' && inbuf[stateStart + 3] == 'M')
  107. || (inbuf[stateStart + 2] == 'Q' && inbuf[stateStart + 3] == 'M')
  108. || (inbuf[stateStart + 2] == 'E' && inbuf[stateStart + 3] == 'M')
  109. || (inbuf[stateStart + 2] == 'L' && inbuf[stateStart + 3] == 'V')
  110. || (inbuf[stateStart + 2] == 'H' && inbuf[stateStart + 3] == 'C')
  111. )
  112. ) {
  113. printf("p %.*s\n", p - stateStart, inbuf + stateStart);
  114. printf("v %.*s\n", 11, inbuf + stateStart + 4);
  115. } else if ( p - stateStart == 15 && inbuf[stateStart] == 'R' && inbuf[stateStart + 1] == 'D'
  116. && '0' <= inbuf[stateStart + 2] && inbuf[stateStart + 2] <= '4'
  117. && '0' <= inbuf[stateStart + 3] && inbuf[stateStart + 3] <= '9'
  118. ) {
  119. printf("p %.*s\n", p - stateStart, inbuf + stateStart);
  120. printf("v %.*s\n", 11, inbuf + stateStart + 4);
  121. } else if ( p - stateStart == 28 && ( memcmp(inbuf + stateStart, "RDAMVM", 6) == 0
  122. || memcmp(inbuf + stateStart, "RDGMEM", 6) == 0
  123. )
  124. ) {
  125. printf("p %.*s\n", p - stateStart, inbuf + stateStart);
  126. } else if (p - stateStart == 28 && memcmp(inbuf + stateStart, "RDCMUC", 6) == 0) {
  127. printf("p %.*s\n", p - stateStart, inbuf + stateStart);
  128. printf("c %.*s\n", 24, inbuf + stateStart + 4);
  129. } else if ( p - stateStart == 41 && memcmp(inbuf + stateStart, "RDGMEM", 6) == 0
  130. && inbuf[stateStart + 28] == 'V' && inbuf[stateStart + 29] == 'M'
  131. ) {
  132. printf("p %.*s\n", p - stateStart, inbuf + stateStart);
  133. printf("v %.*s\n", 11, inbuf + stateStart + 30);
  134. } else if (p - stateStart == 26 && ( memcmp(inbuf + stateStart, "RDAO", 4) == 0
  135. || memcmp(inbuf + stateStart, "RDEM", 4) == 0
  136. || memcmp(inbuf + stateStart, "RDKM", 4) == 0
  137. )
  138. ) {
  139. printf("p %.*s\n", p - stateStart, inbuf + stateStart);
  140. } else if ( p - stateStart == 43 && 'k' <= inbuf[stateStart + 10] && inbuf[stateStart + 10] <= 'n'
  141. && ( memcmp(inbuf + stateStart, "RDCLAK5uy_", 10) == 0
  142. || memcmp(inbuf + stateStart, "RDTMAK5uy_", 10) == 0
  143. )
  144. ) {
  145. printf("p %.*s\n", p - stateStart, inbuf + stateStart);
  146. } else if ( p - stateStart == 41 && 'k' <= inbuf[stateStart + 8] && inbuf[stateStart + 8] <= 'n'
  147. && memcmp(inbuf + stateStart, "OLAK5uy_", 8) == 0
  148. ) {
  149. printf("p %.*s\n", p - stateStart, inbuf + stateStart);
  150. } else {
  151. printf("? %.*s\n", p - stateStart, inbuf + stateStart);
  152. }
  153. }
  154. }
  155. debug_print("%s\n", "Switching to STATE_NONE");
  156. state = STATE_NONE;
  157. stateStart = 0;
  158. }
  159. break;
  160. case STATE_SKIP_UNTIL_NONID:
  161. if (!IS_ID_CHAR(inbuf[p])) {
  162. debug_print("%s\n", "Switching to STATE_NONE");
  163. state = STATE_NONE;
  164. }
  165. break;
  166. }
  167. }
  168. if (state != STATE_NONE && state != STATE_SKIP_UNTIL_NONID) {
  169. // Need to keep the trailing part of the buffer for the next iteration.
  170. // Because stateStart gets reset to zero when it exceeds MAX_RESULT_SIZE, inbufEnd + readSize - stateStart is guaranteed to be smaller than MAX_RESULT_SIZE.
  171. // Because MAX_RESULT_SIZE < BUFFER_SIZE/2, we can simply copy the last bytes to the beginning of inbuf directly.
  172. debug_print("Copying %d bytes starting from %d to the beginning of the buffer: %.*s\n",
  173. inbufEnd + readSize - stateStart, stateStart, inbufEnd + readSize - stateStart, inbuf + stateStart);
  174. memcpy(inbuf, inbuf + stateStart, inbufEnd + readSize - stateStart);
  175. inbufEnd += readSize - stateStart;
  176. stateStart = 0;
  177. } else {
  178. debug_print("%s\n", "No buffer copying necessary");
  179. inbufEnd = 0;
  180. }
  181. }
  182. }