The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

42 lines
1.2 KiB

  1. #!/bin/bash
  2. if [[ "$1" == '--test' ]]
  3. then
  4. ## Self-test
  5. diff -q <("$0" '/bar' <<-'EOF'
  6. 2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/foo using pattern /foo$
  7. 2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/bar using pattern /bar
  8. 2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/baz/bar using pattern /baz/bar
  9. 2021-02-21 04:18:06,000 - archivebot.pipeline.wpull_plugin - INFO - Ignore https://example.org/barnope using pattern /barnope
  10. EOF
  11. ) <(cat <<-EOF
  12. https://example.org/bar
  13. EOF
  14. ) >/dev/null
  15. if [[ $? -eq 0 ]]
  16. then
  17. echo 'Success!'
  18. exit 0
  19. else
  20. echo 'Fail!'
  21. exit 1
  22. fi
  23. fi
  24. if [[ -t 0 || $# -ne 1 || "$1" == '--help' ]]
  25. then
  26. echo "Usage: $0 PATTERN" >&2
  27. echo 'Reads an ArchiveBot job log (or decompressed meta WARC) from stdin, prints all URLs that were ignored using PATTERN to stdout.' >&2
  28. exit 1
  29. fi
  30. pattern="$1"
  31. {
  32. printf "%s\n" "${pattern}"
  33. grep -F ' - archivebot.pipeline.wpull_plugin - INFO - Ignore ' | \
  34. grep -F " using pattern ${pattern}"
  35. } | \
  36. awk 'NR==1 { pattern = $0; next; } $NF == pattern' | \
  37. grep -Po ' Ignore \K.*?(?= using pattern )'