|
- #!/bin/bash
-
- if [[ "$1" == '--test' ]]
- then
- ## Self-test
-
- diff -q <("$0" '/bar' <<-'EOF'
- 2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/foo using pattern /foo$
- 2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/bar using pattern /bar
- 2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/baz/bar using pattern /baz/bar
- 2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/barnope using pattern /barnope
- EOF
- ) <(cat <<-EOF
- https://example.org/bar
- EOF
- ) >/dev/null
- if [[ $? -eq 0 ]]
- then
- echo 'Success!'
- exit 0
- else
- echo 'Fail!'
- exit 1
- fi
- fi
-
- if [[ -t 0 || $# -ne 1 || "$1" == '--help' ]]
- then
- echo "Usage: $0 PATTERN" >&2
- echo 'Reads an ArchiveBot job log (or decompressed meta WARC) from stdin, prints all URLs that were ignored using PATTERN to stdout.' >&2
- exit 1
- fi
-
- pattern="$1"
- {
- printf "%s\n" "${pattern}"
- grep -F ' - archivebot.pipeline.wpull_plugin - INFO - Ignore ' | \
- grep -F " using pattern ${pattern}"
- } | \
- awk 'NR==1 { pattern = $0; next; } $NF == pattern' | \
- grep -Po ' Ignore \K.*?(?= using pattern )'
|