diff --git a/snscrape-normalise b/snscrape-normalise index e34b54b..3a4494d 100755 --- a/snscrape-normalise +++ b/snscrape-normalise @@ -3,7 +3,7 @@ errorUrls=() while read -r url do - if [[ "${url}" =~ ^https?://(www|m|[a-z][a-z]-[a-z][a-z]).facebook.com/([^/]+/?(\?|$)|pages/[^/]+/[0-9]+/?(\?|$)|profile\.php\?id=[0-9]+(&|$)) ]] + if [[ "${url}" =~ ^https?://((www|m|[a-z][a-z]-[a-z][a-z]).)?facebook.com/([^/]+/?(\?|$)|pages/[^/]+/[0-9]+/?(\?|$)|profile\.php\?id=[0-9]+(&|$)) ]] then if [[ "${url}" == *profile.php* ]] then @@ -11,7 +11,7 @@ do else url="${url%%\?*}" fi - page="$(curl -sL -A 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' -H 'Accept-Language: en-US,en;q=0.5' "https://www.${url#*.}")" + page="$(curl -sL -A 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' -H 'Accept-Language: en-US,en;q=0.5' "https://www.facebook.com/${url#*facebook.com/}")" user="$(grep -Po ']*(?<=\s)data-key\s*=\s*"tab_home".*?' <<< "${page}" | grep -Po ']*(?<=\s)href="/\K[^/]+')" if [[ "${user}" ]] then