The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

88 lines
2.9 KiB

  1. #!/usr/bin/env python3
  2. import re
  3. import requests
  4. import sys
  5. RESPONSE_PATTERN = re.compile(r'''^<\?xml version=(["'])1\.0\1 encoding=(["'])UTF-8\2\?>''' '\n?' r'''<ListBucketResult xmlns=(["'])http://(?:s3\.amazonaws\.com/doc/2006-03-01/|doc\.s3\.amazonaws\.com/2006-03-01)\3>''')
  6. NAME_PATTERN = re.compile(r'<Name>([^<]*)</Name>')
  7. KEY_PATTERN = re.compile(r'<Key>([^<]*)</Key>')
  8. MTIME_PATTERN = re.compile(r'<LastModified>([^<]*)</LastModified>')
  9. PROVIDERS = {
  10. 'amazon': ['https://s3.amazonaws.com/{}/'],
  11. 'google': ['https://storage.googleapis.com/{}/'],
  12. 'scaleway': ['https://s3.nl-ams.scw.cloud/{}/', 'https://s3.fr-par.scw.cloud/{}/'],
  13. 'wasabi': ['https://s3.wasabisys.com/{}/'],
  14. }
  15. def find(url, providers):
  16. print(f'Fetching {url}', file = sys.stderr)
  17. r = requests.get(url, timeout = 60)
  18. print(f'{r.status_code} {url}', file = sys.stderr)
  19. body = r.text
  20. if not RESPONSE_PATTERN.match(body):
  21. raise RuntimeError(f'Invalid body: {body[:200]}...')
  22. # Get bucket name
  23. m = NAME_PATTERN.search(body)
  24. if not m:
  25. raise RuntimeError('Could not find bucket name')
  26. name = m.group(1)
  27. if '&' in name:
  28. raise RuntimeError(f'Unsupported bucket name: {name!r}')
  29. # Get name and mtime of first object
  30. m = KEY_PATTERN.search(body)
  31. if m:
  32. firstKey = m.group(1)
  33. m = MTIME_PATTERN.search(body)
  34. if not m:
  35. raise RuntimeError('Got key but no mtime')
  36. firstMtime = m.group(1)
  37. else:
  38. print('Warning: no key found, cannot verify that it is the same bucket', file = sys.stderr)
  39. firstKey, firstMtime = None, None
  40. # Start searching
  41. for provider in providers:
  42. for testUrlTemplate in PROVIDERS[provider]:
  43. testUrl = testUrlTemplate.format(name)
  44. print(f'Fetching {testUrl}', file = sys.stderr)
  45. r = requests.get(testUrl, timeout = 60)
  46. print(f'{r.status_code} {testUrl}', file = sys.stderr)
  47. if r.status_code != 200:
  48. continue
  49. body = r.text
  50. if not RESPONSE_PATTERN.match(body):
  51. raise RuntimeError(f'Invalid body: {body[:200]}...')
  52. # Compare first object
  53. if not firstKey:
  54. continue
  55. m = KEY_PATTERN.search(body)
  56. if not m:
  57. print(f'No key in {testUrl}', file = sys.stderr)
  58. continue
  59. testFirstKey = m.group(1)
  60. m = MTIME_PATTERN.search(body)
  61. if not m:
  62. print(f'Got key but no mtime in {testUrl}', file = sys.stderr)
  63. continue
  64. testFirstMtime = m.group(1)
  65. if (firstKey, firstMtime) == (testFirstKey, testFirstMtime):
  66. print(f'Found the bucket: {url} == {testUrl}')
  67. if __name__ == '__main__':
  68. if not 2 <= len(sys.argv) <= 3 or sys.argv[1] in ('--help', '-h'):
  69. print('Usage: s3-bucket-find-direct-url URL [PROVIDER]', file = sys.stderr)
  70. print("Searches for an S3 bucket that's available at URL (e.g. CDN or proxy), optionally filtered by PROVIDER", file = sys.stderr)
  71. print(f'Providers: {", ".join(PROVIDERS)}', file = sys.stderr)
  72. sys.exit(1)
  73. url = sys.argv[1]
  74. providers = (sys.argv[2],) if len(sys.argv) == 3 else tuple(PROVIDERS.keys())
  75. find(url, providers)