Browse Source

Add --urls-from-stdin

master
JustAnotherArchivist 2 years ago
parent
commit
cef61434a0
1 changed files with 22 additions and 1 deletions
  1. +22
    -1
      wpull2-requeue

+ 22
- 1
wpull2-requeue View File

@@ -1,11 +1,12 @@
#!/bin/bash
# Usage: wpull2-requeue [ACTION] [FILENAME] [--where] URLPATTERN_OR_WHERE
# Usage: wpull2-requeue [ACTION] [FILENAME] ( [--where] URLPATTERN_OR_WHERE | --urls-from-stdin )
# ACTION can be 'count' (default), 'print', or 'write'. On 'write', the number of modified records is printed.
# FILENAME defaults to 'wpull.db'
# URLPATTERN_OR_WHERE is URLPATTERN if --where isn't used or WHERE if it is.
# URLPATTERN uses SQLite's LIKE syntax with ESCAPE "\", i.e. % matches any number of characters, _ matches exactly one character, and a backslash can be used to escape these special characters.
# Must not contain quotes.
# WHERE is an arbitrary SQLite 'WHERE' condition. The available tables are 'queued_urls' and 'url_strings', already joined together.
# When --urls-from-stdin is used, one URL per line is expected on stdin. The URL must be in normalised format and not contain any double quotes.

if [[ $# -eq 4 || ( $# -ge 2 && $# -le 3 && ( "$1" == 'count' || "$1" == 'print' || "$1" == 'write' )) ]]
then
@@ -28,6 +29,18 @@ then
exit 1
fi

urlsfromstdin=
if [[ "$1" == '--urls-from-stdin' ]]
then
urlsfromstdin=1
shift
if [[ $# -ne 0 ]]
then
echo "Error: invalid arguments" >&2
exit 1
fi
fi

where=
if [[ "$1" == '--where' ]]
then
@@ -38,6 +51,14 @@ fi
if [[ "${where}" ]]
then
where="$1"
elif [[ "${urlsfromstdin}" ]]
then
urls="$(sed 's/^/"/; s/$/", /' | tr -d '\n' | sed 's/, $//')"
if [[ -z "${urls}" ]]
then
exit 0
fi
where="url IN (${urls})"
else
where='url LIKE "'"$1"'" ESCAPE "\" AND status = "skipped" AND try_count > 3'
fi


Loading…
Cancel
Save