|
- #!/bin/bash
- function verbose_echo { if [[ "${verbose}" ]]; then echo "$@"; fi; }
-
- function fetch_n_extract {
- local url="$1"
- verbose_echo "Fetching ${url}" >&2
- {
- curl -sSL --max-time 10 -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "${url}" | \
- grep -Fi -e 'facebook' -e 'flickr' -e 'instagram' -e 'twitter' -e 't.me' -e 'youtube' -e 'youtu.be' -e 'vk.com' | \
- tee \
- >(
- # Facebook
- grep -Poi 'facebook\.com/((pages(/category)?|people)/((?!")[^/ <"'"'"'])+/|groups/|pg/)?((?!")[^/ <"'"'"'])+' | \
- sed 's,^,https://www.,' | \
- grep -vi -e '^https://www\.facebook\.com/2008$' -e '^https://www\.facebook\.com/tr\?' -e '^https://www\.facebook\.com/plugins$' -e '^https://www\.facebook\.com/l\.php\?' | \
- grep -Pvi '^https://www\.facebook\.com/share(r(\.php)?)?(\?|$)'
- ) \
- >(
- # Flickr
- grep -Poi 'flickr\.com/photos/((?!")[^/ <"'"'"'])+' | \
- sed 's,^,https://www.,'
- ) \
- >(
- # Instagram
- grep -Poi 'instagram\.com/(p/)?((?!")[^/ <"'"'"'])+' | \
- sed 's,^,https://www.,' | \
- grep -Pvi -e '^https://www\.instagram\.com/v?p$'
- ) \
- >(
- # Telegram
- grep -Poi '//(www\.)?t\.me/((?!")[^/ <"'"'"'])+' | \
- sed 's,^//,,; s,^www\.,,; s,^,https://,'
- ) \
- >(
- # Twitter
- grep -Poi 'twitter\.com/(#!/)?(hashtag/)?((?!")[^/ <"'"'"'])+' | \
- sed 's,^twitter\.com/#!/,twitter.com/,; s,^,https://,' | \
- grep -vi -e '^https://twitter\.com/home\?' -e '^https://twitter\.com/widgets\.js$' -e '^https://twitter\.com/share\?' -e '^https://twitter\.com/intent$' | \
- sed 's,\([?&]\)ref_src=[^&]\+&\?,\1,; s,?$,,'
- ) \
- >(
- # VKontakte
- grep -Poi 'vk\.com/((?!")[^/ <"'"'"'])+' | \
- sed 's,^,https://,'
- ) \
- >(
- # YouTube
- grep -Poi '(youtube\.com/((user|channel|c|embed)/)?((?!")[^/ <"'"'"'])+|youtu\.be/((?!")[^/ <"'"'"'])+)' | \
- awk '/^youtube/ { print "https://www." $0 } /^youtu\.be/ { print "https://" $0 }' | \
- grep -vi -e '^https://www\.youtube\.com/vi$'
- ) \
- >/dev/null
- } | awk '!seen[$0]++'
- }
-
- # Parse options
- printInputUrl=
- verbose=
- while [[ $# -gt 0 ]]
- do
- if [[ "$1" == '--print-input-urls' || "$1" == '--print-input-url' ]]
- then
- printInputUrl=true
- shift
- elif [[ "$1" == '--verbose' || "$1" == 'v' ]]
- then
- verbose=1
- shift
- elif [[ "$1" == '--' ]]
- then
- # End of options
- shift
- break
- elif [[ "$1" == '--'* ]]
- then
- echo "Unknown option: $1" >&2
- exit 1
- else
- # Assume end of options
- break
- fi
- done
-
- {
- for arg in "$@"
- do
- echo "${arg}"
- done
-
- if [ ! -t 0 ]
- then
- cat
- fi
- } | while read -r url
- do
- if [[ "${printInputUrl}" ]]
- then
- echo "${url}"
- fi
- fetch_n_extract "${url}"
- done
|