From f05a8a79bcf42999cdd9dbd7caa4d0799d990b2d Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Mon, 29 Mar 2021 23:16:40 +0000 Subject: [PATCH] Clean up wpull DB commands --- wpull2-extract-remaining | 19 ++++++++++++++++--- wpull2-url-origin | 31 ++++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/wpull2-extract-remaining b/wpull2-extract-remaining index 0aeba51..13a43f9 100755 --- a/wpull2-extract-remaining +++ b/wpull2-extract-remaining @@ -1,8 +1,21 @@ #!/bin/bash -# Usage: wpull2-extract-remaining FILENAME -# FILENAME points to a wpull 2.x SQLite DB +# Usage: wpull2-extract-remaining [FILENAME] +# FILENAME points to a wpull 2.x SQLite DB; if not specified, defaults to wpull.db # Prints all remaining URLs from the DB on stdout + +if [[ $# -eq 1 ]] +then + filename="$1" +else + filename=wpull.db +fi +if [[ ! -f "${filename}" ]] +then + echo "Error: ${filename} does not exist or is not a regular file" >&2 + exit 1 +fi + for status in in_progress todo error do - sqlite3 "$1" 'SELECT url_strings.url FROM queued_urls JOIN url_strings ON url_string_id = url_strings.id WHERE status = "'$status'"' + sqlite3 "${filename}" 'SELECT url_strings.url FROM queued_urls JOIN url_strings ON url_string_id = url_strings.id WHERE status = "'$status'"' done diff --git a/wpull2-url-origin b/wpull2-url-origin index 57d7672..9758f8a 100755 --- a/wpull2-url-origin +++ b/wpull2-url-origin @@ -1,3 +1,32 @@ #!/bin/bash +# Usage: wpull2-url-origin [FILENAME] URL +# FILENAME defaults to wpull.db # Trace back where a URL was discovered, all the way back to the root -url="$1"; curId=$(sqlite3 wpull.db 'SELECT id FROM url_strings WHERE url = "'"${url}"'"'); while :; do sqlite3 wpull.db 'SELECT queued_urls.*, url_strings.* FROM queued_urls JOIN url_strings ON queued_urls.url_string_id = url_strings.id WHERE url_strings.id = '$curId; IFS='|' read -r curId level < <(sqlite3 wpull.db 'SELECT parent_url_string_id, level FROM queued_urls WHERE url_string_id = '$curId); if [[ ${level} -eq 0 ]]; then break; fi done +if [[ $# -eq 2 ]] +then + filename="$1" + shift +else + filename=wpull.db +fi +if [[ ! -f "${filename}" ]] +then + echo "Error: ${filename} does not exist or is not a regular file" >&2 + exit 1 +fi +url="$1" +curId=$(sqlite3 "${filename}" 'SELECT id FROM url_strings WHERE url = "'"${url}"'"') +if [[ -z "${curId}" ]] +then + echo "Error: ${url} not found" >&2 + exit 1 +fi +while : +do + sqlite3 "${filename}" 'SELECT queued_urls.*, url_strings.* FROM queued_urls JOIN url_strings ON queued_urls.url_string_id = url_strings.id WHERE url_strings.id = '${curId} + IFS='|' read -r curId level < <(sqlite3 "${filename}" 'SELECT parent_url_string_id, level FROM queued_urls WHERE url_string_id = '${curId}) + if [[ ${level} -eq 0 ]] + then + break + fi +done