|
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273 |
- #!/bin/bash
- # Read a list of URLs from stdin, replace suitable social media URLs with correctly capitalised version
- errorUrls=()
- while read -r url
- do
- if [[ "${url}" == '* '* ]]
- then
- prefix="${url::2}"
- url="${url:2}"
- else
- prefix=""
- fi
-
- if [[ "${url}" =~ ^https?://((www|m|[a-z][a-z]-[a-z][a-z]).)?facebook.com/([^/]+/?(\?|$)|pages/[^/]+/[0-9]+/?(\?|$)|profile\.php\?id=[0-9]+(&|$)) ]]
- then
- if [[ "${url}" == *profile.php* ]]
- then
- url="${url%%&*}"
- else
- url="${url%%\?*}"
- fi
- page="$(curl -sL -A 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' -H 'Accept-Language: en-US,en;q=0.5' "https://www.facebook.com/${url#*facebook.com/}")"
- user="$(grep -Po '<div\s[^>]*(?<=\s)data-key\s*=\s*"tab_home".*?</div>' <<< "${page}" | grep -Po '<a\s[^>]*(?<=\s)href="/\K[^/]+')"
- if [[ "${user}" ]]
- then
- echo "${prefix}https://www.facebook.com/${user}/"
- continue
- else
- if grep -q 'id="pagelet_loggedout_sign_up"' <<< "${page}"
- then
- # Profile page which is only visible when logged in
- # Extract canonical URL
- user="$(grep -Po '<link rel="canonical" href="\K[^"]+' <<< "${page}")"
- if [[ "${user}" ]]
- then
- echo "${prefix}${user}"
- continue
- fi
- fi
- fi
- errorUrls+=("${url}")
- echo "${prefix}${url}"
- elif [[ "${url}" =~ ^https?://(www\.)?twitter\.com/[^/]+/?(\?.*)?$ ]]
- then
- url="${url%%\?*}"
- url="${url%/}"
- user="$(snscrape --max-results 1 twitter-user "${url##*/}" | grep -Po '^https?://twitter\.com/\K[^/]+')"
- if [[ "${user}" ]]
- then
- echo "${prefix}https://twitter.com/${user}"
- else
- errorUrls+=("${url}")
- echo "${prefix}${url}"
- fi
- elif [[ "${url}" =~ ^https?://(www\.)?instagram\.com/[^/]+/?$ ]]
- then
- user="${url%/}"
- user="${user##*/}"
- echo "${prefix}https://www.instagram.com/${user,,}/"
- else
- echo "${prefix}${url}"
- fi
- done
-
- if [[ ${#errorUrls[@]} -gt 0 ]]
- then
- echo "" >&2
- echo "Failed to process URLs:" >&2
- for errorUrl in "${errorUrls[@]}"
- do
- echo "${errorUrl}" >&2
- done
- fi
|