|
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374 |
- #!/bin/bash
- # Read a list of URLs from stdin, replace suitable social media URLs with correctly capitalised version
- errorUrls=()
- while read -r url
- do
- if [[ "${url}" == '* '* ]]
- then
- prefix="${url::2}"
- url="${url:2}"
- else
- prefix=""
- fi
-
- if [[ "${url}" =~ ^https?://((www|m|[a-z][a-z]-[a-z][a-z]).)?facebook.com/([^/]+/?(\?|$)|pages/[^/]+/[0-9]+/?(\?|$)|profile\.php\?id=[0-9]+(&|$)) ]]
- then
- if [[ "${url}" == *profile.php* ]]
- then
- url="${url%%&*}"
- else
- url="${url%%\?*}"
- fi
- page="$(curl -sL -A 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' -H 'Accept-Language: en-US,en;q=0.5' "https://www.facebook.com/${url#*facebook.com/}")"
- user="$(grep -Po '<div\s[^>]*(?<=\s)data-key\s*=\s*"tab_home".*?</div>' <<< "${page}" | grep -Po '<a\s[^>]*(?<=\s)href="/\K[^/]+')"
- if [[ "${user}" ]]
- then
- echo "${prefix}https://www.facebook.com/${user}/"
- continue
- else
- if grep -q 'id="pagelet_loggedout_sign_up"' <<< "${page}"
- then
- # Profile page which is only visible when logged in
- # Extract canonical URL
- user="$(grep -Po '<link rel="canonical" href="\K[^"]+' <<< "${page}")"
- if [[ "${user}" ]]
- then
- echo "${prefix}${user}"
- continue
- fi
- fi
- fi
- errorUrls+=("${url}")
- echo "${prefix}${url}"
- elif [[ "${url}" =~ ^https?://(www\.)?twitter\.com/[^/]+/?(\?.*)?$ ]]
- then
- url="${url%%\?*}"
- url="${url%/}"
- unnormalisedUser="${url##*/}"
- user="$(curl -sL -A 'Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' "https://twitter.com/${unnormalisedUser}" | grep -Po '<a class="([^"]*\s)?ProfileHeaderCard-screennameLink(\s[^"]*)?" href="/\K[^/"]+(?=")')"
- if [[ "${user}" ]]
- then
- echo "${prefix}https://twitter.com/${user}"
- else
- errorUrls+=("${url}")
- echo "${prefix}${url}"
- fi
- elif [[ "${url}" =~ ^https?://(www\.)?instagram\.com/[^/]+/?$ ]]
- then
- user="${url%/}"
- user="${user##*/}"
- echo "${prefix}https://www.instagram.com/${user,,}/"
- else
- echo "${prefix}${url}"
- fi
- done
-
- if [[ ${#errorUrls[@]} -gt 0 ]]
- then
- echo "" >&2
- echo "Failed to process URLs:" >&2
- for errorUrl in "${errorUrls[@]}"
- do
- echo "${errorUrl}" >&2
- done
- fi
|