From 7c389f1fefa4ccaa113c3c75aab7396200b10039 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sat, 19 Oct 2019 18:14:32 +0000 Subject: [PATCH] Add support for hashbang fragments on Twitter links --- website-extract-social-media | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website-extract-social-media b/website-extract-social-media index d234351..ad182f9 100755 --- a/website-extract-social-media +++ b/website-extract-social-media @@ -30,8 +30,8 @@ function fetch_n_extract { ) \ >( # Twitter - grep -Poi 'twitter\.com/(hashtag/)?[^/ <"'"'"']+' | \ - sed 's,^,https://,' | \ + grep -Poi 'twitter\.com/(#!/)?(hashtag/)?[^/ <"'"'"']+' | \ + sed 's,^twitter\.com/#!/,twitter.com/,; s,^,https://,' | \ grep -vi -e '^https://twitter\.com/home\?' -e '^https://twitter\.com/widgets\.js$' -e '^https://twitter\.com/share\?' -e '^https://twitter\.com/intent$' | \ sed 's,\([?&]\)ref_src=[^&]\+&\?,\1,; s,?$,,' ) \