|
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364 |
- #!/usr/bin/env python3
- import http.client
- import json
- import string
- import sys
- import xml.etree.ElementTree
-
-
- if len(sys.argv[1:]) != 3:
- print('Usage: ia-verify-file ITEM FILENAME SHA1', file = sys.stderr)
- print('Verifies that FILENAME is present in ITEM (on all servers) and has hash SHA1', file = sys.stderr)
- sys.exit(1)
- item, filename, sha1 = sys.argv[1:]
-
- if len(item) > 100 or item[0] not in (string.ascii_letters + string.digits) or item[1:].strip(string.ascii_letters + string.digits + '_-.') != '':
- print('Invalid identifier', file = sys.stderr)
- sys.exit(1)
-
- # Fetch https://archive.org/metadata/ITEM to get the two servers that store the item
- conn = http.client.HTTPSConnection('archive.org', timeout = 30)
- conn.request('GET', f'/metadata/{item}')
- r = conn.getresponse()
- if r.status != 200:
- print(f'Error fetching item metadata: {r.status} {r.reason}', file = sys.stderr)
- sys.exit(1)
- o = json.loads(r.read())
- if not o:
- print(f'Empty item metadata returned (item might not exist)', file = sys.stderr)
- sys.exit(1)
- if 'workable_servers' not in o or 'dir' not in o:
- print(f'Malformed metadata returned: {o!r}', file = sys.stderr)
- sys.exit(1)
-
- if len(o['workable_servers']) < 2:
- print(f'Not enough servers: {o["workable_servers"]!r}', file = sys.stderr)
- sys.exit(1)
-
- # Fetch _files.xml from each server and compare hash
- for server in o['workable_servers']:
- conn = http.client.HTTPSConnection(server, timeout = 30)
- conn.request('GET', f'{o["dir"]}/{item}_files.xml')
- r = conn.getresponse()
- if r.status != 200:
- print(f'Error fetching item files from {server}: {r.status} {r.reason}', file = sys.stderr)
- sys.exit(1)
- root = xml.etree.ElementTree.fromstring(r.read().decode('utf-8'))
- if root.tag != 'files' or not all(c.tag == 'file' for c in root):
- print(f'Invalid XML received from {server}', file = sys.stderr)
- sys.exit(1)
- for file in root:
- if file.attrib['name'] == filename:
- for c in file:
- if c.tag == 'sha1':
- if c.text != sha1:
- print(f'SHA-1 mismatch on {server}: expected {sha1}, found {c.text}', file = sys.stderr)
- sys.exit(1)
- break
- else:
- print(f'No SHA-1 found on {server}', file = sys.stderr)
- sys.exit(1)
- break
- else:
- print(f'File not found on {server}', file = sys.stderr)
- sys.exit(1)
|