#!/bin/bash # Taking a list of URLs from stdin (optionally in new-viewer style wiki format), every URL is normalised as follows: # - For social media URLs, the correct capitalisation is extracted and extraneous parameters are removed. # - For YouTube user or channel URLs, the canonical base URL is extracted. # - For anything else, retrieval is attempted and the final, post-redirect URL is used. (To not follow redirects, use --other-no-redirects.) otherCurlRedirectOpt='-L' verbose= while [[ $# -gt 0 ]] do if [[ "$1" == '--other-no-redirects' ]] then otherCurlRedirectOpt= elif [[ "$1" == '--verbose' || "$1" == '-v' ]] then verbose=1 else echo "Unknown option: $1" >&2 exit 1 fi shift done function verbose_echo { if [[ "${verbose}" ]] then echo "$@" fi } userAgent='Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0' while read -r line do if [[ "${line}" != 'http://'* && "${line}" != 'https://'* && "${line}" != '* http://'* && "${line}" != '* https://'* ]] then echo "${line}" continue fi if [[ "${line}" == '* '* ]] then prefix="${line::2}" url="${line:2}" else prefix="" url="${line}" fi if [[ "${url}" == *' | '* ]] then suffix=" | ${url#* | }" url="${url%% | *}" else suffix="" fi if [[ "${url}" =~ ^https?://((www|m|[a-z][a-z]-[a-z][a-z]).)?facebook.com/login/.*[?\&]next=https?%3A%2F%2F((www|m|[a-z][a-z]-[a-z][a-z]).)?facebook.com%2F && "${url}" != *'%0A'* && "${url}" != *'%00'* ]] then url="${url##*\?next=}" url="${url##*&next=}" url="${url%%&*}" url="$(printf '%b' "${url//%/\\x}")" fi if [[ "${url}" =~ ^https?://((www|m|[a-z][a-z]-[a-z][a-z]).)?facebook.com/([^/]+/?(\?|$)|pages/[^/]+/[0-9]+/?(\?|$)|pages/category/[^/]+/[^/]+/?(\?|$)|pg/[^/]+([/?]|$)|profile\.php\?id=[0-9]+(&|$)) ]] then verbose_echo "Normalising Facebook URL: ${url}" >&2 if [[ "${url}" == *profile.php* ]] then url="${url%%&*}" else url="${url%%\?*}" fi page="$(curl -sL --max-time 10 -A "${userAgent}" -H 'Accept-Language: en-US,en;q=0.5' "https://www.facebook.com/${url#*facebook.com/}")" user="$(grep -Po '