#!/usr/bin/env python3
import html
import http.client
import os
import shlex
import sys
import urllib.parse
# Arguments
i = 1
withListUrls = False
listUrlsFD = None
startMarker = None
format = '{url}'
args = []
while i < len(sys.argv):
arg = sys.argv[i]
if arg == '--help':
print('s3-bucket-list [options] BUCKETURL', file = sys.stderr)
print('', file = sys.stderr)
print('Options:', file = sys.stderr)
print(f' --format FORMAT Modify the output format; FORMAT defaults to {format!r}; available fields: url, key, size, and all fields returned by S3 (e.g. LastModified)', file = sys.stderr)
print( ' --marker KEY Start after a particular key instead of from the beginning', file = sys.stderr)
print( ' --with-list-urls Enables printing the list URLs retrieved to FD 3', file = sys.stderr)
sys.exit(1)
elif arg == '--with-list-urls':
withListUrls = True
try:
listUrlsFD = os.fdopen(3, 'w')
except OSError:
print('Error: FD 3 not open', file = sys.stderr)
sys.exit(1)
elif arg == '--marker':
startMarker = sys.argv[i + 1]
i += 1
elif arg == '--format':
format = sys.argv[i + 1]
i += 1
else:
args.append(arg)
i += 1
assert len(args) == 1, 'Need one argument: bucket URL'
baseUrl = args[0]
assert baseUrl.startswith('http://') or baseUrl.startswith('https://'), 'Argument does not look like an HTTP URL'
if '/' not in baseUrl.split('://', 1)[1] or not baseUrl.endswith('/'):
baseUrl = f'{baseUrl}/'
hostname = baseUrl.split('://', 1)[1].split('/', 1)[0]
conn = http.client.HTTPSConnection(hostname)
params = {}
if startMarker is not None:
params['marker'] = startMarker
attempt = 1
while True:
queryString = urllib.parse.urlencode(params)
url = f'{baseUrl}{"?" + queryString if queryString else ""}'
if withListUrls:
print(f'{url}', file = listUrlsFD)
conn.request('GET', url[url.index('/', 8):])
resp = conn.getresponse()
body = resp.read()
if b'InternalError
We encountered an internal error. Please try again.' in body:
print(f'Got internal error on {url} on attempt {attempt}; {"retrying" if attempt < 10 else "aborting"}', file = sys.stderr)
if attempt >= 10:
if 'marker' in params:
print(f'To retry, use --marker {shlex.quote(params["marker"])}', file = sys.stderr)
break
attempt += 1
continue
if not body.startswith(b'\n') and not body.startswith(b""):
raise RuntimeError(f'Invalid body: {body[:200]}...')
if b'' in body[:200] and 'marker' in params:
raise RuntimeError('Marker loop (empty marker in response despite providing one)')
# No risk, no fun!
contents = body.split(b'')
assert all(content.startswith(b'') for content in contents[1:])
assert all(content.endswith(b'') for content in contents[1:-1])
assert contents[-1].endswith(b'')
contents[-1] = contents[-1][:-len('')]
for content in contents[1:]:
key = html.unescape(content[5 : content.index(b'')].decode('utf-8')) # 5 = len(b'')
url = f'{baseUrl}{urllib.parse.quote(key)}'
tags = content.split(b'>')
assert len(tags) % 2 == 0
assert tags[-1] == b''
assert tags[-2] == b''.join(openTags).decode('utf-8')] = html.unescape(tag[:-(len(openTags[-1]) + 2)].decode('utf-8'))
openTags.pop()
continue
assert False
size = int(fields['Size']) if 'Size' in fields else None
try:
print(format.format(**fields, key = key, url = url, size = size))
except BrokenPipeError:
sys.exit(0)
lastKey = key
truncated = True if b'true' in body else (False if b'false' in body else None)
assert truncated in (True, False)
if not truncated:
break
if 'marker' in params and params['marker'] == lastKey:
raise RuntimeError('Marker loop (same last key as previous marker)')
params['marker'] = lastKey
attempt = 1