The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

87 lines
2.8 KiB

  1. #!/usr/bin/env python3
  2. import re
  3. import requests
  4. import sys
  5. RESPONSE_PATTERN = re.compile(r'''^<\?xml version=(["'])1\.0\1 encoding=(["'])UTF-8\2\?>''' '\n?' r'''<ListBucketResult xmlns=(["'])http://(?:s3\.amazonaws\.com/doc/2006-03-01/|doc\.s3\.amazonaws\.com/2006-03-01)\3>''')
  6. NAME_PATTERN = re.compile(r'<Name>([^<]*)</Name>')
  7. KEY_PATTERN = re.compile(r'<Key>([^<]*)</Key>')
  8. MTIME_PATTERN = re.compile(r'<LastModified>([^<]*)</LastModified>')
  9. PROVIDERS = {
  10. 'amazon': ['https://s3.amazonaws.com/{}/'],
  11. 'google': ['https://storage.googleapis.com/{}/'],
  12. 'scaleway': ['https://s3.nl-ams.scw.cloud/{}/', 'https://s3.fr-par.scw.cloud/{}/'],
  13. }
  14. def find(url, providers):
  15. print(f'Fetching {url}', file = sys.stderr)
  16. r = requests.get(url, timeout = 60)
  17. print(f'{r.status_code} {url}', file = sys.stderr)
  18. body = r.text
  19. if not RESPONSE_PATTERN.match(body):
  20. raise RuntimeError(f'Invalid body: {body[:200]}...')
  21. # Get bucket name
  22. m = NAME_PATTERN.search(body)
  23. if not m:
  24. raise RuntimeError('Could not find bucket name')
  25. name = m.group(1)
  26. if '&' in name:
  27. raise RuntimeError(f'Unsupported bucket name: {name!r}')
  28. # Get name and mtime of first object
  29. m = KEY_PATTERN.search(body)
  30. if m:
  31. firstKey = m.group(1)
  32. m = MTIME_PATTERN.search(body)
  33. if not m:
  34. raise RuntimeError('Got key but no mtime')
  35. firstMtime = m.group(1)
  36. else:
  37. print('Warning: no key found, cannot verify that it is the same bucket', file = sys.stderr)
  38. firstKey, firstMtime = None, None
  39. # Start searching
  40. for provider in providers:
  41. for testUrlTemplate in PROVIDERS[provider]:
  42. testUrl = testUrlTemplate.format(name)
  43. print(f'Fetching {testUrl}', file = sys.stderr)
  44. r = requests.get(testUrl, timeout = 60)
  45. print(f'{r.status_code} {testUrl}', file = sys.stderr)
  46. if r.status_code != 200:
  47. continue
  48. body = r.text
  49. if not RESPONSE_PATTERN.match(body):
  50. raise RuntimeError(f'Invalid body: {body[:200]}...')
  51. # Compare first object
  52. if not firstKey:
  53. continue
  54. m = KEY_PATTERN.search(body)
  55. if not m:
  56. print(f'No key in {testUrl}', file = sys.stderr)
  57. continue
  58. testFirstKey = m.group(1)
  59. m = MTIME_PATTERN.search(body)
  60. if not m:
  61. print(f'Got key but no mtime in {testUrl}', file = sys.stderr)
  62. continue
  63. testFirstMtime = m.group(1)
  64. if (firstKey, firstMtime) == (testFirstKey, testFirstMtime):
  65. print(f'Found the bucket: {url} == {testUrl}')
  66. if __name__ == '__main__':
  67. if not 2 <= len(sys.argv) <= 3 or sys.argv[1] in ('--help', '-h'):
  68. print('Usage: s3-bucket-find-direct-url URL [PROVIDER]', file = sys.stderr)
  69. print("Searches for an S3 bucket that's available at URL (e.g. CDN or proxy), optionally filtered by PROVIDER", file = sys.stderr)
  70. print(f'Providers: {", ".join(PROVIDERS)}', file = sys.stderr)
  71. sys.exit(1)
  72. url = sys.argv[1]
  73. providers = (sys.argv[2],) if len(sys.argv) == 3 else tuple(PROVIDERS.keys())
  74. find(url, providers)