|
- #!/bin/bash
- # Usage: wpull2-url-origin [FILENAME] URL
- # FILENAME defaults to wpull.db
- # Trace back where a URL was discovered, all the way back to the root
- if [[ $# -eq 2 ]]
- then
- filename="$1"
- shift
- else
- filename=wpull.db
- fi
- if [[ ! -f "${filename}" ]]
- then
- echo "Error: ${filename} does not exist or is not a regular file" >&2
- exit 1
- fi
- url="$1"
- curId=$(sqlite3 "${filename}" 'SELECT id FROM url_strings WHERE url = "'"${url}"'"')
- if [[ -z "${curId}" ]]
- then
- echo "Error: ${url} not found" >&2
- exit 1
- fi
- while :
- do
- sqlite3 "${filename}" 'SELECT queued_urls.*, url_strings.* FROM queued_urls JOIN url_strings ON queued_urls.url_string_id = url_strings.id WHERE url_strings.id = '${curId}
- IFS='|' read -r curId level < <(sqlite3 "${filename}" 'SELECT parent_url_string_id, level FROM queued_urls WHERE url_string_id = '${curId})
- if [[ ${level} -eq 0 ]]
- then
- break
- fi
- done
|