The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

102 lines
2.7 KiB

  1. #!/bin/bash
  2. function verbose_echo { if [[ "${verbose}" ]]; then echo "$@"; fi; }
  3. function fetch_n_extract {
  4. local url="$1"
  5. verbose_echo "Fetching ${url}" >&2
  6. {
  7. curl -sSL --max-time 10 -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "${url}" | \
  8. grep -Fi -e 'facebook' -e 'flickr' -e 'instagram' -e 'twitter' -e 't.me' -e 'youtube' -e 'youtu.be' -e 'vk.com' | \
  9. tee \
  10. >(
  11. # Facebook
  12. grep -Poi 'facebook\.com/((pages(/category)?|people)/((?!&quot;)[^/ <"'"'"'])+/|groups/|pg/)?((?!&quot;)[^/ <"'"'"'])+' | \
  13. sed 's,^,https://www.,' | \
  14. grep -vi -e '^https://www\.facebook\.com/2008$' -e '^https://www\.facebook\.com/tr\?' -e '^https://www\.facebook\.com/plugins$' -e '^https://www\.facebook\.com/l\.php\?' | \
  15. grep -Pvi '^https://www\.facebook\.com/share(r(\.php)?)?(\?|$)'
  16. ) \
  17. >(
  18. # Flickr
  19. grep -Poi 'flickr\.com/photos/((?!&quot;)[^/ <"'"'"'])+' | \
  20. sed 's,^,https://www.,'
  21. ) \
  22. >(
  23. # Instagram
  24. grep -Poi 'instagram\.com/(p/)?((?!&quot;)[^/ <"'"'"'])+' | \
  25. sed 's,^,https://www.,' | \
  26. grep -Pvi -e '^https://www\.instagram\.com/v?p$'
  27. ) \
  28. >(
  29. # Telegram
  30. grep -Poi '//(www\.)?t\.me/((?!&quot;)[^/ <"'"'"'])+' | \
  31. sed 's,^//,,; s,^www\.,,; s,^,https://,'
  32. ) \
  33. >(
  34. # Twitter
  35. grep -Poi 'twitter\.com/(#!/)?(hashtag/)?((?!&quot;)[^/ <"'"'"'])+' | \
  36. sed 's,^twitter\.com/#!/,twitter.com/,; s,^,https://,' | \
  37. grep -vi -e '^https://twitter\.com/home\?' -e '^https://twitter\.com/widgets\.js$' -e '^https://twitter\.com/share\?' -e '^https://twitter\.com/intent$' | \
  38. sed 's,\([?&]\)ref_src=[^&]\+&\?,\1,; s,?$,,'
  39. ) \
  40. >(
  41. # VKontakte
  42. grep -Poi 'vk\.com/((?!&quot;)[^/ <"'"'"'])+' | \
  43. sed 's,^,https://,'
  44. ) \
  45. >(
  46. # YouTube
  47. grep -Poi '(youtube\.com/((user|channel|c|embed)/)?((?!&quot;)[^/ <"'"'"'])+|youtu\.be/((?!&quot;)[^/ <"'"'"'])+)' | \
  48. awk '/^youtube/ { print "https://www." $0 } /^youtu\.be/ { print "https://" $0 }' | \
  49. grep -vi -e '^https://www\.youtube\.com/vi$'
  50. ) \
  51. >/dev/null
  52. } | awk '!seen[$0]++'
  53. }
  54. # Parse options
  55. printInputUrl=
  56. verbose=
  57. while [[ $# -gt 0 ]]
  58. do
  59. if [[ "$1" == '--print-input-urls' || "$1" == '--print-input-url' ]]
  60. then
  61. printInputUrl=true
  62. shift
  63. elif [[ "$1" == '--verbose' || "$1" == 'v' ]]
  64. then
  65. verbose=1
  66. shift
  67. elif [[ "$1" == '--' ]]
  68. then
  69. # End of options
  70. shift
  71. break
  72. elif [[ "$1" == '--'* ]]
  73. then
  74. echo "Unknown option: $1" >&2
  75. exit 1
  76. else
  77. # Assume end of options
  78. break
  79. fi
  80. done
  81. {
  82. for arg in "$@"
  83. do
  84. echo "${arg}"
  85. done
  86. if [ ! -t 0 ]
  87. then
  88. cat
  89. fi
  90. } | while read -r url
  91. do
  92. if [[ "${printInputUrl}" ]]
  93. then
  94. echo "${url}"
  95. fi
  96. fetch_n_extract "${url}"
  97. done