The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

94 lines
2.2 KiB

  1. #!/bin/bash
  2. function fetch_n_extract {
  3. local url="$1"
  4. {
  5. curl -sSL -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "${url}" | \
  6. grep -Fi -e 'facebook' -e 'flickr' -e 'instagram' -e 'twitter' -e 't.me' -e 'youtube' -e 'youtu.be' -e 'vk.com' | \
  7. tee \
  8. >(
  9. # Facebook
  10. grep -Poi 'facebook\.com/(pages/[^/ <"'"'"']+/|groups/)?[^/ <"'"'"']+' | \
  11. sed 's,^,https://www.,' | \
  12. grep -vi -e '^https://www\.facebook\.com/2008$' -e '^https://www\.facebook\.com/tr\?' -e '^https://www\.facebook\.com/plugins$' | \
  13. grep -Pvi '^https://www\.facebook\.com/sharer(\.php\?|\?|$)'
  14. ) \
  15. >(
  16. # Flickr
  17. grep -Poi 'flickr\.com/photos/[^/ <"'"'"']+' | \
  18. sed 's,^,https://www.,'
  19. ) \
  20. >(
  21. # Instagram
  22. grep -Poi 'instagram\.com/(p/)?[^/ <"'"'"']+' | \
  23. sed 's,^,https://www.,' | \
  24. grep -Pvi -e '^https://www\.instagram\.com/v?p$'
  25. ) \
  26. >(
  27. # Telegram
  28. grep -Poi '//(www\.)?t\.me/[^/ <"'"'"']+' | \
  29. sed 's,^//,,; s,^www\.,,; s,^,https://,'
  30. ) \
  31. >(
  32. # Twitter
  33. grep -Poi 'twitter\.com/(hashtag/)?[^/ <"'"'"']+' | \
  34. sed 's,^,https://,' | \
  35. grep -vi -e '^https://twitter\.com/home\?' -e '^https://twitter\.com/widgets\.js$' -e '^https://twitter\.com/share\?' | \
  36. sed 's,\([?&]\)ref_src=[^&]\+&\?,\1,; s,?$,,'
  37. ) \
  38. >(
  39. # VKontakte
  40. grep -Poi 'vk\.com/[^/ <"'"'"']+' | \
  41. sed 's,^,https://,'
  42. ) \
  43. >(
  44. # YouTube
  45. grep -Poi '(youtube\.com/((user|channel|embed)/)?[^/ <"'"'"']+|youtu\.be/[^/ <"'"'"']+)' | \
  46. awk '/^youtube/ { print "https://www." $0 } /^youtu\.be/ { print "https://" $0 }' | \
  47. grep -vi -e '^https://www\.youtube\.com/vi$'
  48. ) \
  49. >/dev/null
  50. } | awk '!seen[$0]++'
  51. }
  52. # Parse options
  53. printInputUrl=
  54. while [[ $# -gt 0 ]]
  55. do
  56. if [[ "$1" == '--print-input-urls' || "$1" == '--print-input-url' ]]
  57. then
  58. printInputUrl=true
  59. shift
  60. elif [[ "$1" == '--' ]]
  61. then
  62. # End of options
  63. shift
  64. break
  65. elif [[ "$1" == '--'* ]]
  66. then
  67. echo "Unknown option: $1" >&2
  68. exit 1
  69. else
  70. # Assume end of options
  71. break
  72. fi
  73. done
  74. {
  75. for arg in "$@"
  76. do
  77. echo "${arg}"
  78. done
  79. if [ ! -t 0 ]
  80. then
  81. cat
  82. fi
  83. } | while read -r url
  84. do
  85. if [[ "${printInputUrl}" ]]
  86. then
  87. echo "${url}"
  88. fi
  89. fetch_n_extract "${url}"
  90. done