|
|
@@ -0,0 +1,64 @@ |
|
|
|
#!/usr/bin/env python3 |
|
|
|
import http.client |
|
|
|
import json |
|
|
|
import string |
|
|
|
import sys |
|
|
|
import xml.etree.ElementTree |
|
|
|
|
|
|
|
|
|
|
|
if len(sys.argv[1:]) != 3: |
|
|
|
print('Usage: ia-verify-file ITEM FILENAME SHA1', file = sys.stderr) |
|
|
|
print('Verifies that FILENAME is present in ITEM (on all servers) and has hash SHA1', file = sys.stderr) |
|
|
|
sys.exit(1) |
|
|
|
item, filename, sha1 = sys.argv[1:] |
|
|
|
|
|
|
|
if len(item) > 100 or item[0] not in (string.ascii_letters + string.digits) or item[1:].strip(string.ascii_letters + string.digits + '_-.') != '': |
|
|
|
print('Invalid identifier', file = sys.stderr) |
|
|
|
sys.exit(1) |
|
|
|
|
|
|
|
# Fetch https://archive.org/metadata/ITEM to get the two servers that store the item |
|
|
|
conn = http.client.HTTPSConnection('archive.org') |
|
|
|
conn.request('GET', f'/metadata/{item}') |
|
|
|
r = conn.getresponse() |
|
|
|
if r.status != 200: |
|
|
|
print(f'Error fetching item metadata: {r.status} {r.reason}', file = sys.stderr) |
|
|
|
sys.exit(1) |
|
|
|
o = json.loads(r.read()) |
|
|
|
if not o: |
|
|
|
print(f'Empty item metadata returned (item might not exist)', file = sys.stderr) |
|
|
|
sys.exit(1) |
|
|
|
if 'workable_servers' not in o or 'dir' not in o: |
|
|
|
print(f'Malformed metadata returned: {o!r}', file = sys.stderr) |
|
|
|
sys.exit(1) |
|
|
|
|
|
|
|
if len(o['workable_servers']) < 2: |
|
|
|
print(f'Not enough servers: {o["workable_servers"]!r}', file = sys.stderr) |
|
|
|
sys.exit(1) |
|
|
|
|
|
|
|
# Fetch _files.xml from each server and compare hash |
|
|
|
for server in o['workable_servers']: |
|
|
|
conn = http.client.HTTPSConnection(server) |
|
|
|
conn.request('GET', f'{o["dir"]}/{item}_files.xml') |
|
|
|
r = conn.getresponse() |
|
|
|
if r.status != 200: |
|
|
|
print(f'Error fetching item files from {server}: {r.status} {r.reason}', file = sys.stderr) |
|
|
|
sys.exit(1) |
|
|
|
root = xml.etree.ElementTree.fromstring(r.read().decode('utf-8')) |
|
|
|
if root.tag != 'files' or not all(c.tag == 'file' for c in root): |
|
|
|
print(f'Invalid XML received from {server}', file = sys.stderr) |
|
|
|
sys.exit(1) |
|
|
|
for file in root: |
|
|
|
if file.attrib['name'] == filename: |
|
|
|
for c in file: |
|
|
|
if c.tag == 'sha1': |
|
|
|
if c.text != sha1: |
|
|
|
print(f'SHA-1 mismatch on {server}: expected {sha1}, found {c.text}', file = sys.stderr) |
|
|
|
sys.exit(1) |
|
|
|
break |
|
|
|
else: |
|
|
|
print(f'No SHA-1 found on {server}', file = sys.stderr) |
|
|
|
sys.exit(1) |
|
|
|
break |
|
|
|
else: |
|
|
|
print(f'File not found on {server}', file = sys.stderr) |
|
|
|
sys.exit(1) |