From 2ccf28eb43642ecfe99e5f7ed7ad4e5f8c2fac7e Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sun, 24 Dec 2023 05:59:02 +0000 Subject: [PATCH] Add moinmoin-url-list --- moinmoin-url-list | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100755 moinmoin-url-list diff --git a/moinmoin-url-list b/moinmoin-url-list new file mode 100755 index 0000000..89156a3 --- /dev/null +++ b/moinmoin-url-list @@ -0,0 +1,28 @@ +#!/bin/bash +if [[ $# -ne 1 || "$1" != http* ]]; then + printf 'Usage: %q FRONT_PAGE_URL\n' "$0" >&2 + exit 1 +fi + +url="$1" +if [[ "${url}" == *\?* ]]; then + printf 'Error: URLs with query strings not supported\n' >&2 + exit 1 +fi +url="${url%/*}" + +curl "${url}/TitleIndex" | \ + grep -A 1000000 -F 'href="/TitleIndex?allpages=1"' | \ + grep -B 1000000 -F 'id="pagebottom"' | \ + grep -Po 'href="\K[^"]+' | \ + grep -Fxv '/TitleIndex?allpages=1' | \ + sed 's,\?action=AttachFile$,,' | \ + perl -ne 'print if ! $a{$_}++' | \ + awk -v url="${url}" \ + '{ + print url $0; + print url $0 "?action=info"; + print url $0 "?action=info&general=1"; + print url $0 "?action=raw"; + print url $0 "?action=AttachFile"; + }'