The little things give you away... A collection of various small helper stuff
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 

102 lignes
2.7 KiB

  1. #!/bin/bash
  2. function verbose_echo { if [[ "${verbose}" ]]; then echo "$@"; fi; }
  3. function fetch_n_extract {
  4. local url="$1"
  5. verbose_echo "Fetching ${url}" >&2
  6. {
  7. curl -sSL --max-time 10 -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "${url}" | \
  8. grep -Fi -e 'facebook' -e 'flickr' -e 'instagram' -e 'twitter' -e 't.me' -e 'youtube' -e 'youtu.be' -e 'vk.com' | \
  9. tee \
  10. >(
  11. # Facebook
  12. grep -Poi 'facebook\.com/((pages(/category)?|people)/((?!&quot;)[^/ <"'"'"'])+/|groups/|pg/)?((?!&quot;)[^/ <"'"'"'])+' | \
  13. sed 's,^,https://www.,' | \
  14. grep -vi -e '^https://www\.facebook\.com/2008$' -e '^https://www\.facebook\.com/tr\?' -e '^https://www\.facebook\.com/plugins$' -e '^https://www\.facebook\.com/l\.php\?' | \
  15. grep -Pvi '^https://www\.facebook\.com/share(r(\.php)?)?(\?|$)'
  16. ) \
  17. >(
  18. # Flickr
  19. grep -Poi 'flickr\.com/photos/((?!&quot;)[^/ <"'"'"'])+' | \
  20. sed 's,^,https://www.,'
  21. ) \
  22. >(
  23. # Instagram
  24. grep -Poi 'instagram\.com/(p/)?((?!&quot;)[^/ <"'"'"'])+' | \
  25. sed 's,^,https://www.,' | \
  26. grep -Pvi -e '^https://www\.instagram\.com/v?p$'
  27. ) \
  28. >(
  29. # Telegram
  30. grep -Poi '//(www\.)?t\.me/((?!&quot;)[^/ <"'"'"'])+' | \
  31. sed 's,^//,,; s,^www\.,,; s,^,https://,'
  32. ) \
  33. >(
  34. # Twitter
  35. grep -Poi 'twitter\.com/(#!/)?(hashtag/)?((?!&quot;)[^/ <"'"'"'])+' | \
  36. sed 's,^twitter\.com/#!/,twitter.com/,; s,^,https://,' | \
  37. grep -vi -e '^https://twitter\.com/home\?' -e '^https://twitter\.com/widgets\.js$' -e '^https://twitter\.com/share\?' -e '^https://twitter\.com/intent$' | \
  38. sed 's,\([?&]\)ref_src=[^&]\+&\?,\1,; s,?$,,'
  39. ) \
  40. >(
  41. # VKontakte
  42. grep -Poi 'vk\.com/((?!&quot;)[^/ <"'"'"'])+' | \
  43. sed 's,^,https://,'
  44. ) \
  45. >(
  46. # YouTube
  47. grep -Poi '(youtube\.com/((user|channel|c|embed)/)?((?!&quot;)[^/ <"'"'"'])+|youtu\.be/((?!&quot;)[^/ <"'"'"'])+)' | \
  48. awk '/^youtube/ { print "https://www." $0 } /^youtu\.be/ { print "https://" $0 }' | \
  49. grep -vi -e '^https://www\.youtube\.com/vi$'
  50. ) \
  51. >/dev/null
  52. } | awk '!seen[$0]++'
  53. }
  54. # Parse options
  55. printInputUrl=
  56. verbose=
  57. while [[ $# -gt 0 ]]
  58. do
  59. if [[ "$1" == '--print-input-urls' || "$1" == '--print-input-url' ]]
  60. then
  61. printInputUrl=true
  62. shift
  63. elif [[ "$1" == '--verbose' || "$1" == 'v' ]]
  64. then
  65. verbose=1
  66. shift
  67. elif [[ "$1" == '--' ]]
  68. then
  69. # End of options
  70. shift
  71. break
  72. elif [[ "$1" == '--'* ]]
  73. then
  74. echo "Unknown option: $1" >&2
  75. exit 1
  76. else
  77. # Assume end of options
  78. break
  79. fi
  80. done
  81. {
  82. for arg in "$@"
  83. do
  84. echo "${arg}"
  85. done
  86. if [ ! -t 0 ]
  87. then
  88. cat
  89. fi
  90. } | while read -r url
  91. do
  92. if [[ "${printInputUrl}" ]]
  93. then
  94. echo "${url}"
  95. fi
  96. fetch_n_extract "${url}"
  97. done