From cef61434a0b8d7b25406ce1d7c5ac806b7dc8af3 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sat, 2 Apr 2022 17:46:06 +0000 Subject: [PATCH] Add --urls-from-stdin --- wpull2-requeue | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/wpull2-requeue b/wpull2-requeue index 71aa5c9..364ac25 100755 --- a/wpull2-requeue +++ b/wpull2-requeue @@ -1,11 +1,12 @@ #!/bin/bash -# Usage: wpull2-requeue [ACTION] [FILENAME] [--where] URLPATTERN_OR_WHERE +# Usage: wpull2-requeue [ACTION] [FILENAME] ( [--where] URLPATTERN_OR_WHERE | --urls-from-stdin ) # ACTION can be 'count' (default), 'print', or 'write'. On 'write', the number of modified records is printed. # FILENAME defaults to 'wpull.db' # URLPATTERN_OR_WHERE is URLPATTERN if --where isn't used or WHERE if it is. # URLPATTERN uses SQLite's LIKE syntax with ESCAPE "\", i.e. % matches any number of characters, _ matches exactly one character, and a backslash can be used to escape these special characters. # Must not contain quotes. # WHERE is an arbitrary SQLite 'WHERE' condition. The available tables are 'queued_urls' and 'url_strings', already joined together. +# When --urls-from-stdin is used, one URL per line is expected on stdin. The URL must be in normalised format and not contain any double quotes. if [[ $# -eq 4 || ( $# -ge 2 && $# -le 3 && ( "$1" == 'count' || "$1" == 'print' || "$1" == 'write' )) ]] then @@ -28,6 +29,18 @@ then exit 1 fi +urlsfromstdin= +if [[ "$1" == '--urls-from-stdin' ]] +then + urlsfromstdin=1 + shift + if [[ $# -ne 0 ]] + then + echo "Error: invalid arguments" >&2 + exit 1 + fi +fi + where= if [[ "$1" == '--where' ]] then @@ -38,6 +51,14 @@ fi if [[ "${where}" ]] then where="$1" +elif [[ "${urlsfromstdin}" ]] +then + urls="$(sed 's/^/"/; s/$/", /' | tr -d '\n' | sed 's/, $//')" + if [[ -z "${urls}" ]] + then + exit 0 + fi + where="url IN (${urls})" else where='url LIKE "'"$1"'" ESCAPE "\" AND status = "skipped" AND try_count > 3' fi