From 4f34753788340f43656720c5028f66fb0d810fc3 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sat, 19 Oct 2019 13:09:20 +0000 Subject: [PATCH] Add support for Instagram posts and ignore spurious links from the CDN --- website-extract-social-media | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/website-extract-social-media b/website-extract-social-media index 9676e37..681c0a2 100755 --- a/website-extract-social-media +++ b/website-extract-social-media @@ -19,8 +19,9 @@ function fetch_n_extract { ) \ >( # Instagram - grep -Poi 'instagram\.com/[^/ <"'"'"']+' | \ - sed 's,^,https://www.,' + grep -Poi 'instagram\.com/(p/)?[^/ <"'"'"']+' | \ + sed 's,^,https://www.,' | \ + grep -Pvi -e '^https://www\.instagram\.com/v?p$' ) \ >( # Telegram