From cd0b3f621400db776e21916693d685a414294bd3 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sat, 19 Oct 2019 12:51:56 +0000 Subject: [PATCH] Ignore /vi/* on YouTube (video thumbnails) --- website-extract-social-media | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/website-extract-social-media b/website-extract-social-media index 52f613a..db9cf73 100755 --- a/website-extract-social-media +++ b/website-extract-social-media @@ -42,7 +42,8 @@ function fetch_n_extract { >( # YouTube grep -Poi '(youtube\.com/((user|channel|embed)/)?[^/ <"'"'"']+|youtu\.be/[^/ <"'"'"']+)' | \ - awk '/^youtube/ { print "https://www." $0 } /^youtu\.be/ { print "https://" $0 }' + awk '/^youtube/ { print "https://www." $0 } /^youtu\.be/ { print "https://" $0 }' | \ + grep -vi -e '^https://www\.youtube\.com/vi$' ) \ >/dev/null } | awk '!seen[$0]++'