The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

65 lines
2.2 KiB

  1. #!/usr/bin/env python3
  2. import http.client
  3. import json
  4. import string
  5. import sys
  6. import xml.etree.ElementTree
  7. if len(sys.argv[1:]) != 3:
  8. print('Usage: ia-verify-file ITEM FILENAME SHA1', file = sys.stderr)
  9. print('Verifies that FILENAME is present in ITEM (on all servers) and has hash SHA1', file = sys.stderr)
  10. sys.exit(1)
  11. item, filename, sha1 = sys.argv[1:]
  12. if len(item) > 100 or item[0] not in (string.ascii_letters + string.digits) or item[1:].strip(string.ascii_letters + string.digits + '_-.') != '':
  13. print('Invalid identifier', file = sys.stderr)
  14. sys.exit(1)
  15. # Fetch https://archive.org/metadata/ITEM to get the two servers that store the item
  16. conn = http.client.HTTPSConnection('archive.org', timeout = 30)
  17. conn.request('GET', f'/metadata/{item}')
  18. r = conn.getresponse()
  19. if r.status != 200:
  20. print(f'Error fetching item metadata: {r.status} {r.reason}', file = sys.stderr)
  21. sys.exit(1)
  22. o = json.loads(r.read())
  23. if not o:
  24. print(f'Empty item metadata returned (item might not exist)', file = sys.stderr)
  25. sys.exit(1)
  26. if 'workable_servers' not in o or 'dir' not in o:
  27. print(f'Malformed metadata returned: {o!r}', file = sys.stderr)
  28. sys.exit(1)
  29. if len(o['workable_servers']) < 2:
  30. print(f'Not enough servers: {o["workable_servers"]!r}', file = sys.stderr)
  31. sys.exit(1)
  32. # Fetch _files.xml from each server and compare hash
  33. for server in o['workable_servers']:
  34. conn = http.client.HTTPSConnection(server, timeout = 30)
  35. conn.request('GET', f'{o["dir"]}/{item}_files.xml')
  36. r = conn.getresponse()
  37. if r.status != 200:
  38. print(f'Error fetching item files from {server}: {r.status} {r.reason}', file = sys.stderr)
  39. sys.exit(1)
  40. root = xml.etree.ElementTree.fromstring(r.read().decode('utf-8'))
  41. if root.tag != 'files' or not all(c.tag == 'file' for c in root):
  42. print(f'Invalid XML received from {server}', file = sys.stderr)
  43. sys.exit(1)
  44. for file in root:
  45. if file.attrib['name'] == filename:
  46. for c in file:
  47. if c.tag == 'sha1':
  48. if c.text != sha1:
  49. print(f'SHA-1 mismatch on {server}: expected {sha1}, found {c.text}', file = sys.stderr)
  50. sys.exit(1)
  51. break
  52. else:
  53. print(f'No SHA-1 found on {server}', file = sys.stderr)
  54. sys.exit(1)
  55. break
  56. else:
  57. print(f'File not found on {server}', file = sys.stderr)
  58. sys.exit(1)