#!/bin/bash # Usage: wpull2-url-origin [FILENAME] URL # FILENAME defaults to wpull.db # Trace back where a URL was discovered, all the way back to the root if [[ $# -eq 2 ]] then filename="$1" shift else filename=wpull.db fi if [[ ! -f "${filename}" ]] then echo "Error: ${filename} does not exist or is not a regular file" >&2 exit 1 fi url="$1" curId=$(sqlite3 "${filename}" 'SELECT id FROM url_strings WHERE url = "'"${url}"'"') if [[ -z "${curId}" ]] then echo "Error: ${url} not found" >&2 exit 1 fi while : do sqlite3 "${filename}" 'SELECT queued_urls.*, url_strings.* FROM queued_urls JOIN url_strings ON queued_urls.url_string_id = url_strings.id WHERE url_strings.id = '${curId} IFS='|' read -r curId level < <(sqlite3 "${filename}" 'SELECT parent_url_string_id, level FROM queued_urls WHERE url_string_id = '${curId}) if [[ ${level} -eq 0 ]] then break fi done