|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687 |
- #!/usr/bin/env python3
- import re
- import requests
- import sys
-
-
- RESPONSE_PATTERN = re.compile(r'''^<\?xml version=(["'])1\.0\1 encoding=(["'])UTF-8\2\?>''' '\n?' r'''<ListBucketResult xmlns=(["'])http://(?:s3\.amazonaws\.com/doc/2006-03-01/|doc\.s3\.amazonaws\.com/2006-03-01)\3>''')
- NAME_PATTERN = re.compile(r'<Name>([^<]*)</Name>')
- KEY_PATTERN = re.compile(r'<Key>([^<]*)</Key>')
- MTIME_PATTERN = re.compile(r'<LastModified>([^<]*)</LastModified>')
- PROVIDERS = {
- 'amazon': ['https://s3.amazonaws.com/{}/'],
- 'google': ['https://storage.googleapis.com/{}/'],
- 'scaleway': ['https://s3.nl-ams.scw.cloud/{}/', 'https://s3.fr-par.scw.cloud/{}/'],
- 'wasabi': ['https://s3.wasabisys.com/{}/'],
- }
-
-
- def find(url, providers):
- print(f'Fetching {url}', file = sys.stderr)
- r = requests.get(url, timeout = 60)
- print(f'{r.status_code} {url}', file = sys.stderr)
- body = r.text
- if not RESPONSE_PATTERN.match(body):
- raise RuntimeError(f'Invalid body: {body[:200]}...')
-
- # Get bucket name
- m = NAME_PATTERN.search(body)
- if not m:
- raise RuntimeError('Could not find bucket name')
- name = m.group(1)
- if '&' in name:
- raise RuntimeError(f'Unsupported bucket name: {name!r}')
-
- # Get name and mtime of first object
- m = KEY_PATTERN.search(body)
- if m:
- firstKey = m.group(1)
- m = MTIME_PATTERN.search(body)
- if not m:
- raise RuntimeError('Got key but no mtime')
- firstMtime = m.group(1)
- else:
- print('Warning: no key found, cannot verify that it is the same bucket', file = sys.stderr)
- firstKey, firstMtime = None, None
-
- # Start searching
- for provider in providers:
- for testUrlTemplate in PROVIDERS[provider]:
- testUrl = testUrlTemplate.format(name)
- print(f'Fetching {testUrl}', file = sys.stderr)
- r = requests.get(testUrl, timeout = 60)
- print(f'{r.status_code} {testUrl}', file = sys.stderr)
- if r.status_code != 200:
- continue
- body = r.text
- if not RESPONSE_PATTERN.match(body):
- raise RuntimeError(f'Invalid body: {body[:200]}...')
-
- # Compare first object
- if not firstKey:
- continue
- m = KEY_PATTERN.search(body)
- if not m:
- print(f'No key in {testUrl}', file = sys.stderr)
- continue
- testFirstKey = m.group(1)
- m = MTIME_PATTERN.search(body)
- if not m:
- print(f'Got key but no mtime in {testUrl}', file = sys.stderr)
- continue
- testFirstMtime = m.group(1)
-
- if (firstKey, firstMtime) == (testFirstKey, testFirstMtime):
- print(f'Found the bucket: {url} == {testUrl}')
-
-
- if __name__ == '__main__':
- if not 2 <= len(sys.argv) <= 3 or sys.argv[1] in ('--help', '-h'):
- print('Usage: s3-bucket-find-direct-url URL [PROVIDER]', file = sys.stderr)
- print("Searches for an S3 bucket that's available at URL (e.g. CDN or proxy), optionally filtered by PROVIDER", file = sys.stderr)
- print(f'Providers: {", ".join(PROVIDERS)}', file = sys.stderr)
- sys.exit(1)
-
- url = sys.argv[1]
- providers = (sys.argv[2],) if len(sys.argv) == 3 else tuple(PROVIDERS.keys())
- find(url, providers)
|