Browse Source

Clean up wpull DB commands

master
JustAnotherArchivist 3 years ago
parent
commit
f05a8a79bc
2 changed files with 46 additions and 4 deletions
  1. +16
    -3
      wpull2-extract-remaining
  2. +30
    -1
      wpull2-url-origin

+ 16
- 3
wpull2-extract-remaining View File

@@ -1,8 +1,21 @@
#!/bin/bash
# Usage: wpull2-extract-remaining FILENAME
# FILENAME points to a wpull 2.x SQLite DB
# Usage: wpull2-extract-remaining [FILENAME]
# FILENAME points to a wpull 2.x SQLite DB; if not specified, defaults to wpull.db
# Prints all remaining URLs from the DB on stdout

if [[ $# -eq 1 ]]
then
filename="$1"
else
filename=wpull.db
fi
if [[ ! -f "${filename}" ]]
then
echo "Error: ${filename} does not exist or is not a regular file" >&2
exit 1
fi

for status in in_progress todo error
do
sqlite3 "$1" 'SELECT url_strings.url FROM queued_urls JOIN url_strings ON url_string_id = url_strings.id WHERE status = "'$status'"'
sqlite3 "${filename}" 'SELECT url_strings.url FROM queued_urls JOIN url_strings ON url_string_id = url_strings.id WHERE status = "'$status'"'
done

+ 30
- 1
wpull2-url-origin View File

@@ -1,3 +1,32 @@
#!/bin/bash
# Usage: wpull2-url-origin [FILENAME] URL
# FILENAME defaults to wpull.db
# Trace back where a URL was discovered, all the way back to the root
url="$1"; curId=$(sqlite3 wpull.db 'SELECT id FROM url_strings WHERE url = "'"${url}"'"'); while :; do sqlite3 wpull.db 'SELECT queued_urls.*, url_strings.* FROM queued_urls JOIN url_strings ON queued_urls.url_string_id = url_strings.id WHERE url_strings.id = '$curId; IFS='|' read -r curId level < <(sqlite3 wpull.db 'SELECT parent_url_string_id, level FROM queued_urls WHERE url_string_id = '$curId); if [[ ${level} -eq 0 ]]; then break; fi done
if [[ $# -eq 2 ]]
then
filename="$1"
shift
else
filename=wpull.db
fi
if [[ ! -f "${filename}" ]]
then
echo "Error: ${filename} does not exist or is not a regular file" >&2
exit 1
fi
url="$1"
curId=$(sqlite3 "${filename}" 'SELECT id FROM url_strings WHERE url = "'"${url}"'"')
if [[ -z "${curId}" ]]
then
echo "Error: ${url} not found" >&2
exit 1
fi
while :
do
sqlite3 "${filename}" 'SELECT queued_urls.*, url_strings.* FROM queued_urls JOIN url_strings ON queued_urls.url_string_id = url_strings.id WHERE url_strings.id = '${curId}
IFS='|' read -r curId level < <(sqlite3 "${filename}" 'SELECT parent_url_string_id, level FROM queued_urls WHERE url_string_id = '${curId})
if [[ ${level} -eq 0 ]]
then
break
fi
done

Loading…
Cancel
Save