|
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- #!/bin/bash
- # Usage: wpull2-requeue [ACTION] [FILENAME] ( [--where] URLPATTERN_OR_WHERE | --urls-from-stdin )
- # ACTION can be 'count' (default), 'print', or 'write'. On 'write', the number of modified records is printed.
- # FILENAME defaults to 'wpull.db'
- # URLPATTERN_OR_WHERE is URLPATTERN if --where isn't used or WHERE if it is.
- # URLPATTERN uses SQLite's LIKE syntax with ESCAPE "\", i.e. % matches any number of characters, _ matches exactly one character, and a backslash can be used to escape these special characters.
- # Must not contain quotes.
- # WHERE is an arbitrary SQLite 'WHERE' condition. The available tables are 'queued_urls' and 'url_strings', already joined together.
- # When --urls-from-stdin is used, one URL per line is expected on stdin. The URL must be in normalised format and not contain any double quotes.
-
- if [[ $# -eq 4 || ( $# -ge 2 && $# -le 3 && ( "$1" == 'count' || "$1" == 'print' || "$1" == 'write' )) ]]
- then
- action="$1"
- shift
- else
- action=count
- fi
-
- if [[ $# -eq 3 || ( $# -eq 2 && "$1" != '--where' ) ]]
- then
- filename="$1"
- shift
- else
- filename=wpull.db
- fi
- if [[ ! -f "${filename}" ]]
- then
- echo "Error: ${filename} does not exist or is not a regular file" >&2
- exit 1
- fi
-
- urlsfromstdin=
- if [[ "$1" == '--urls-from-stdin' ]]
- then
- urlsfromstdin=1
- shift
- if [[ $# -ne 0 ]]
- then
- echo "Error: invalid arguments" >&2
- exit 1
- fi
- fi
-
- where=
- if [[ "$1" == '--where' ]]
- then
- where=1
- shift
- fi
-
- if [[ "$1" == --* ]]
- then
- echo "Error: Unknown option $1" >&2
- exit 1
- fi
-
- if [[ "${where}" ]]
- then
- where="$1"
- elif [[ "${urlsfromstdin}" ]]
- then
- urls="$(sed 's/^/"/; s/$/", /' | tr -d '\n' | sed 's/, $//')"
- if [[ -z "${urls}" ]]
- then
- exit 0
- fi
- where="url IN (${urls})"
- else
- where='url LIKE "'"$1"'" ESCAPE "\" AND status = "skipped" AND try_count > 3'
- fi
-
- query='FROM queued_urls JOIN url_strings ON url_string_id = url_strings.id WHERE '"${where}"
-
- if [[ "${action}" == 'write' ]]
- then
- sqlite3 "${filename}" <<<'UPDATE queued_urls SET status = "todo", try_count = 0, status_code = NULL WHERE id IN (SELECT queued_urls.id '"${query}"'); SELECT changes()'
- elif [[ "${action}" == 'print' ]]
- then
- sqlite3 "${filename}" <<<"SELECT queued_urls.*, url_strings.* ${query}"
- else
- sqlite3 "${filename}" <<<"SELECT COUNT(queued_urls.id) ${query}"
- fi
|