From 9743aa7c356cd952248fa04e987125d7ac0164f2 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sat, 13 Jul 2019 21:33:32 +0000 Subject: [PATCH] Add s3-bucket-list --- s3-bucket-list | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 s3-bucket-list diff --git a/s3-bucket-list b/s3-bucket-list new file mode 100644 index 0000000..0455644 --- /dev/null +++ b/s3-bucket-list @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +import html +import http.client +import os +import sys +import urllib.parse + + +# Arguments +i = 1 +withListUrls = False +listUrlsFD = None +startMarker = None +format = '{url}' +args = [] +while i < len(sys.argv): + arg = sys.argv[i] + if arg == '--help': + print('s3-bucket-list [options] HOSTNAME BUCKETNAME', file = sys.stderr) + print('', file = sys.stderr) + print('Options:', file = sys.stderr) + print(f' --format FORMAT Modify the output format; FORMAT defaults to {format!r}; available fields: url, key, size, and all fields returned by S3 (e.g. LastModified)', file = sys.stderr) + print( ' --marker KEY Start after a particular key instead of from the beginning', file = sys.stderr) + print( ' --with-list-urls Enables printing the list URLs retrieved to stderr', file = sys.stderr) + sys.exit(1) + elif arg == '--with-list-urls': + withListUrls = True + try: + listUrlsFD = os.fdopen(3, 'w') + except OSError: + print('Error: FD 3 not open', file = sys.stderr) + sys.exit(1) + elif arg == '--marker': + startMarker = sys.argv[i + 1] + i += 1 + elif arg == '--format': + format = sys.argv[i + 1] + i += 1 + else: + args.append(arg) + i += 1 +assert len(args) == 2, 'Need two arguments: hostname and bucketname' +hostname, bucketname = args + + +conn = http.client.HTTPSConnection(hostname) +params = {} +if startMarker is not None: + params['marker'] = startMarker +baseUrl = f'https://{hostname}/{urllib.parse.quote(bucketname)}' +while True: + queryString = urllib.parse.urlencode(params) + url = f'{baseUrl}{"?" + queryString if queryString else ""}' + if withListUrls: + print(f'{url}', file = listUrlsFD) + conn.request('GET', url[url.index('/', 8):]) + resp = conn.getresponse() + body = resp.read() + if not body.startswith(b'\n'): + raise RuntimeError(f'Invalid body: {body[:200]}...') + + # No risk, no fun! + contents = body.split(b'') + assert all(content.startswith(b'') for content in contents[1:]) + assert all(content.endswith(b'') for content in contents[1:-1]) + assert contents[-1].endswith(b'') + contents[-1] = contents[-1][:-len('')] + for content in contents[1:]: + key = content[5 : content.index(b'')].decode('utf-8') # 5 = len(b'') + url = f'{baseUrl}/{urllib.parse.quote(key)}' + + tags = content.split(b'>') + assert len(tags) % 2 == 0 + assert tags[-1] == b'' + assert tags[-2] == b'true' in body else (False if b'false' in body else None) + assert truncated in (True, False) + + if not truncated: + break + params['marker'] = lastKey