|
@@ -0,0 +1,41 @@ |
|
|
|
|
|
#!/bin/bash |
|
|
|
|
|
|
|
|
|
|
|
if [[ "$1" == '--test' ]] |
|
|
|
|
|
then |
|
|
|
|
|
## Self-test |
|
|
|
|
|
|
|
|
|
|
|
diff -q <("$0" '/bar' <<-'EOF' |
|
|
|
|
|
2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/foo using pattern /foo$ |
|
|
|
|
|
2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/bar using pattern /bar |
|
|
|
|
|
2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/baz/bar using pattern /baz/bar |
|
|
|
|
|
2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/barnope using pattern /barnope |
|
|
|
|
|
EOF |
|
|
|
|
|
) <(cat <<-EOF |
|
|
|
|
|
https://example.org/bar |
|
|
|
|
|
EOF |
|
|
|
|
|
) >/dev/null |
|
|
|
|
|
if [[ $? -eq 0 ]] |
|
|
|
|
|
then |
|
|
|
|
|
echo 'Success!' |
|
|
|
|
|
exit 0 |
|
|
|
|
|
else |
|
|
|
|
|
echo 'Fail!' |
|
|
|
|
|
exit 1 |
|
|
|
|
|
fi |
|
|
|
|
|
fi |
|
|
|
|
|
|
|
|
|
|
|
if [[ -t 0 || $# -ne 1 || "$1" == '--help' ]] |
|
|
|
|
|
then |
|
|
|
|
|
echo "Usage: $0 PATTERN" >&2 |
|
|
|
|
|
echo 'Reads an ArchiveBot job log (or decompressed meta WARC) from stdin, prints all URLs that were ignored using PATTERN to stdout.' >&2 |
|
|
|
|
|
exit 1 |
|
|
|
|
|
fi |
|
|
|
|
|
|
|
|
|
|
|
pattern="$1" |
|
|
|
|
|
{ |
|
|
|
|
|
printf "%s\n" "${pattern}" |
|
|
|
|
|
grep -F ' - archivebot.pipeline.wpull_plugin - INFO - Ignore ' | \ |
|
|
|
|
|
grep -F " using pattern ${pattern}" |
|
|
|
|
|
} | \ |
|
|
|
|
|
awk 'NR==1 { pattern = $0; next; } $NF == pattern' | \ |
|
|
|
|
|
grep -Po ' Ignore \K.*?(?= using pattern )' |