#!/bin/bash if [[ "$1" == '--test' ]] then ## Self-test diff -q <("$0" '/bar' <<-'EOF' 2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/foo using pattern /foo$ 2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/bar using pattern /bar 2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/baz/bar using pattern /baz/bar 2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/barnope using pattern /barnope EOF ) <(cat <<-EOF https://example.org/bar EOF ) >/dev/null if [[ $? -eq 0 ]] then echo 'Success!' exit 0 else echo 'Fail!' exit 1 fi fi if [[ -t 0 || $# -ne 1 || "$1" == '--help' ]] then echo "Usage: $0 PATTERN" >&2 echo 'Reads an ArchiveBot job log (or decompressed meta WARC) from stdin, prints all URLs that were ignored using PATTERN to stdout.' >&2 exit 1 fi pattern="$1" { printf "%s\n" "${pattern}" grep -F ' - archivebot.pipeline.wpull_plugin - INFO - Ignore ' | \ grep -F " using pattern ${pattern}" } | \ awk 'NR==1 { pattern = $0; next; } $NF == pattern' | \ grep -Po ' Ignore \K.*?(?= using pattern )'