The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

29 lines
1.8 KiB

  1. #!/bin/bash
  2. # Search all submissions or comments on Reddit for a search term
  3. # Usage: $0 (submission|comment) QUERY
  4. # Output:
  5. # For submissions: post date timestamp, permalink, url or None if it's a selfpost, body in Python-repr format or None if it's a title-only selfpost
  6. # For comments: post date timestamp, permalink, content in Python-repr format
  7. # For comments before 2017-10-24, the Pushshift API doesn't provide a permalink, so that field is filled with "comment_id/parent_id/username" instead.
  8. # Unfortunately, that means that it may be hard to find those comments on Reddit (unless the parent is a thread, i.e. it's a top-level comment).
  9. mode="$1"
  10. q="$2"
  11. before=2147483647
  12. pipe=$(mktemp -u); mkfifo "${pipe}"; exec 3<>"${pipe}"; rm "${pipe}"; unset pipe
  13. while :
  14. do
  15. {
  16. if [[ "${mode}" == "comment" ]]
  17. then
  18. curl -s "https://api.pushshift.io/reddit/search/comment/?q=${q}&size=500&fields=author,body,created_utc,link_id,parent_id,permalink&before=${before}" | python3 -c 'import json,sys'$'\n''for d in json.loads(sys.stdin.read())["data"]:'$'\n'' print("%d %s %r" % (d["created_utc"], d["permalink"] if "permalink" in d else d["parent_id"] + "/" + d["link_id"] + "/" + d["author"], d["body"]))'
  19. else
  20. curl -s "https://api.pushshift.io/reddit/search/submission/?q=${q}&size=500&fields=author,created_utc,id,is_self,permalink,selftext,url&before=${before}" | python3 -c 'import json,sys'$'\n''for d in json.loads(sys.stdin.read())["data"]:'$'\n'' print("%d %s %s %s" % (d["created_utc"], d["permalink"], d["url"] if not d["is_self"] else "None", repr(d["selftext"]) if "selftext" in d else "None"))'
  21. fi
  22. } | awk 'BEGIN { timestamp = 0; } { timestamp=$1; print; } END { print timestamp >"/dev/fd/3" }'
  23. before=$(head -1 <&3)
  24. if [[ ${before} -eq 0 ]] # No data returned by Pushshift
  25. then
  26. break
  27. fi
  28. done