#!/bin/bash function verbose_echo { if [[ "${verbose}" ]]; then echo "$@"; fi; } function fetch_n_extract { local url="$1" verbose_echo "Fetching ${url}" >&2 { curl -sSL --max-time 10 -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "${url}" | \ grep -Fi -e 'facebook' -e 'flickr' -e 'instagram' -e 'twitter' -e 't.me' -e 'youtube' -e 'youtu.be' -e 'vk.com' | \ tee \ >( # Facebook grep -Poi 'facebook\.com/((pages(/category)?|people)/((?!")[^/ <"'"'"'])+/|groups/|pg/)?((?!")[^/ <"'"'"'])+' | \ sed 's,^,https://www.,' | \ grep -vi -e '^https://www\.facebook\.com/2008$' -e '^https://www\.facebook\.com/tr\?' -e '^https://www\.facebook\.com/plugins$' -e '^https://www\.facebook\.com/l\.php\?' | \ grep -Pvi '^https://www\.facebook\.com/share(r(\.php)?)?(\?|$)' ) \ >( # Flickr grep -Poi 'flickr\.com/photos/((?!")[^/ <"'"'"'])+' | \ sed 's,^,https://www.,' ) \ >( # Instagram grep -Poi 'instagram\.com/(p/)?((?!")[^/ <"'"'"'])+' | \ sed 's,^,https://www.,' | \ grep -Pvi -e '^https://www\.instagram\.com/v?p$' ) \ >( # Telegram grep -Poi '//(www\.)?t\.me/((?!")[^/ <"'"'"'])+' | \ sed 's,^//,,; s,^www\.,,; s,^,https://,' ) \ >( # Twitter grep -Poi 'twitter\.com/(#!/)?(hashtag/)?((?!")[^/ <"'"'"'])+' | \ sed 's,^twitter\.com/#!/,twitter.com/,; s,^,https://,' | \ grep -vi -e '^https://twitter\.com/home\?' -e '^https://twitter\.com/widgets\.js$' -e '^https://twitter\.com/share\?' -e '^https://twitter\.com/intent$' | \ sed 's,\([?&]\)ref_src=[^&]\+&\?,\1,; s,?$,,' ) \ >( # VKontakte grep -Poi 'vk\.com/((?!")[^/ <"'"'"'])+' | \ sed 's,^,https://,' ) \ >( # YouTube grep -Poi '(youtube\.com/((user|channel|c|embed)/)?((?!")[^/ <"'"'"'])+|youtu\.be/((?!")[^/ <"'"'"'])+)' | \ awk '/^youtube/ { print "https://www." $0 } /^youtu\.be/ { print "https://" $0 }' | \ grep -vi -e '^https://www\.youtube\.com/vi$' ) \ >/dev/null } | awk '!seen[$0]++' } # Parse options printInputUrl= verbose= while [[ $# -gt 0 ]] do if [[ "$1" == '--print-input-urls' || "$1" == '--print-input-url' ]] then printInputUrl=true shift elif [[ "$1" == '--verbose' || "$1" == 'v' ]] then verbose=1 shift elif [[ "$1" == '--' ]] then # End of options shift break elif [[ "$1" == '--'* ]] then echo "Unknown option: $1" >&2 exit 1 else # Assume end of options break fi done { for arg in "$@" do echo "${arg}" done if [ ! -t 0 ] then cat fi } | while read -r url do if [[ "${printInputUrl}" ]] then echo "${url}" fi fetch_n_extract "${url}" done