|
|
@@ -1,3 +1,32 @@ |
|
|
|
#!/bin/bash |
|
|
|
# Usage: wpull2-url-origin [FILENAME] URL |
|
|
|
# FILENAME defaults to wpull.db |
|
|
|
# Trace back where a URL was discovered, all the way back to the root |
|
|
|
url="$1"; curId=$(sqlite3 wpull.db 'SELECT id FROM url_strings WHERE url = "'"${url}"'"'); while :; do sqlite3 wpull.db 'SELECT queued_urls.*, url_strings.* FROM queued_urls JOIN url_strings ON queued_urls.url_string_id = url_strings.id WHERE url_strings.id = '$curId; IFS='|' read -r curId level < <(sqlite3 wpull.db 'SELECT parent_url_string_id, level FROM queued_urls WHERE url_string_id = '$curId); if [[ ${level} -eq 0 ]]; then break; fi done |
|
|
|
if [[ $# -eq 2 ]] |
|
|
|
then |
|
|
|
filename="$1" |
|
|
|
shift |
|
|
|
else |
|
|
|
filename=wpull.db |
|
|
|
fi |
|
|
|
if [[ ! -f "${filename}" ]] |
|
|
|
then |
|
|
|
echo "Error: ${filename} does not exist or is not a regular file" >&2 |
|
|
|
exit 1 |
|
|
|
fi |
|
|
|
url="$1" |
|
|
|
curId=$(sqlite3 "${filename}" 'SELECT id FROM url_strings WHERE url = "'"${url}"'"') |
|
|
|
if [[ -z "${curId}" ]] |
|
|
|
then |
|
|
|
echo "Error: ${url} not found" >&2 |
|
|
|
exit 1 |
|
|
|
fi |
|
|
|
while : |
|
|
|
do |
|
|
|
sqlite3 "${filename}" 'SELECT queued_urls.*, url_strings.* FROM queued_urls JOIN url_strings ON queued_urls.url_string_id = url_strings.id WHERE url_strings.id = '${curId} |
|
|
|
IFS='|' read -r curId level < <(sqlite3 "${filename}" 'SELECT parent_url_string_id, level FROM queued_urls WHERE url_string_id = '${curId}) |
|
|
|
if [[ ${level} -eq 0 ]] |
|
|
|
then |
|
|
|
break |
|
|
|
fi |
|
|
|
done |