|
123456789101112131415161718192021222324252627282930313233343536 |
- #!/bin/bash
- # Search 4chan archives based on FoolFuuka
- # Output: one post per line in HTML, prefixed with the post ID
- # Note that posts can appear multiple times in the output in some cases. You're encouraged to filter based on the post ID.
- domain="$1"
- q="$2"
- end=
- nextend=2038-01-19
- while :
- do
- end="${nextend}"
- content=$(curl -s "https://${domain}/_/search/text/${q}/end/${end}/page/${page}/")
-
- declare -i page=1
- while [[ ${page} -lt 201 ]]
- do
- echo "Grabbing https://${domain}/_/search/text/${q}/end/${end}/page/${page}/" >&2
- content=$(curl -s "https://${domain}/_/search/text/${q}/end/${end}/page/${page}/")
- tr -d '\n' <<<"${content}" | grep -Po '<article class="post.*?</article>' | perl -pe 's,^(.*?id="(\d+)".*$),\2 \1,'
-
- # Get last date seen to update end date; subtract one because the search appears to be a bit unreliable
- nextend="$(date --date="@$(($(date --date="$(tr -d '\n' <<<"${content}" | grep -Po '<article class="post.*?</article>' | tail -1 | grep -Po '<time datetime="\K[^"]+')" '+%s') - 86400))" '+%Y-%m-%d')"
-
- if grep -qF '<div class="alert"' <<<"${content}"
- then
- if ! tr -d '\n' <<<"${content}" | grep -Po '<div class="alert".*?</div>' | grep -q 'No results found'
- then
- echo "Error" >&2
- break
- else
- break 2
- fi
- fi
- page+=1
- done
- done
|