#!/bin/bash # Usage: wpull2-requeue [ACTION] [FILENAME] ( [--where] URLPATTERN_OR_WHERE | --urls-from-stdin ) # ACTION can be 'count' (default), 'print', or 'write'. On 'write', the number of modified records is printed. # FILENAME defaults to 'wpull.db' # URLPATTERN_OR_WHERE is URLPATTERN if --where isn't used or WHERE if it is. # URLPATTERN uses SQLite's LIKE syntax with ESCAPE "\", i.e. % matches any number of characters, _ matches exactly one character, and a backslash can be used to escape these special characters. # Must not contain quotes. # WHERE is an arbitrary SQLite 'WHERE' condition. The available tables are 'queued_urls' and 'url_strings', already joined together. # When --urls-from-stdin is used, one URL per line is expected on stdin. The URL must be in normalised format and not contain any double quotes. if [[ $# -eq 4 || ( $# -ge 2 && $# -le 3 && ( "$1" == 'count' || "$1" == 'print' || "$1" == 'write' )) ]] then action="$1" shift else action=count fi if [[ $# -eq 3 || ( $# -eq 2 && "$1" != '--where' ) ]] then filename="$1" shift else filename=wpull.db fi if [[ ! -f "${filename}" ]] then echo "Error: ${filename} does not exist or is not a regular file" >&2 exit 1 fi urlsfromstdin= if [[ "$1" == '--urls-from-stdin' ]] then urlsfromstdin=1 shift if [[ $# -ne 0 ]] then echo "Error: invalid arguments" >&2 exit 1 fi fi where= if [[ "$1" == '--where' ]] then where=1 shift fi if [[ "$1" == --* ]] then echo "Error: Unknown option $1" >&2 exit 1 fi if [[ "${where}" ]] then where="$1" elif [[ "${urlsfromstdin}" ]] then urls="$(sed 's/^/"/; s/$/", /' | tr -d '\n' | sed 's/, $//')" if [[ -z "${urls}" ]] then exit 0 fi where="url IN (${urls})" else where='url LIKE "'"$1"'" ESCAPE "\" AND status = "skipped" AND try_count > 3' fi query='FROM queued_urls JOIN url_strings ON url_string_id = url_strings.id WHERE '"${where}" if [[ "${action}" == 'write' ]] then sqlite3 "${filename}" <<<'UPDATE queued_urls SET status = "todo", try_count = 0, status_code = NULL WHERE id IN (SELECT queued_urls.id '"${query}"'); SELECT changes()' elif [[ "${action}" == 'print' ]] then sqlite3 "${filename}" <<<"SELECT queued_urls.*, url_strings.* ${query}" else sqlite3 "${filename}" <<<"SELECT COUNT(queued_urls.id) ${query}" fi