The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

123 lines
4.3 KiB

  1. #!/usr/bin/env python3
  2. import html
  3. import http.client
  4. import os
  5. import shlex
  6. import ssl
  7. import sys
  8. import urllib.parse
  9. # Arguments
  10. i = 1
  11. withListUrls = False
  12. listUrlsFD = None
  13. startMarker = None
  14. format = '{url}'
  15. args = []
  16. while i < len(sys.argv):
  17. arg = sys.argv[i]
  18. if arg == '--help':
  19. print('s3-bucket-list [options] BUCKETURL', file = sys.stderr)
  20. print('', file = sys.stderr)
  21. print('Options:', file = sys.stderr)
  22. print(f' --format FORMAT Modify the output format; FORMAT defaults to {format!r}; available fields: url, key, size, and all fields returned by S3 (e.g. LastModified)', file = sys.stderr)
  23. print( ' --marker KEY Start after a particular key instead of from the beginning', file = sys.stderr)
  24. print( ' --with-list-urls Enables printing the list URLs retrieved to FD 3', file = sys.stderr)
  25. sys.exit(1)
  26. elif arg == '--with-list-urls':
  27. withListUrls = True
  28. try:
  29. listUrlsFD = os.fdopen(3, 'w')
  30. except OSError:
  31. print('Error: FD 3 not open', file = sys.stderr)
  32. sys.exit(1)
  33. elif arg == '--marker':
  34. startMarker = sys.argv[i + 1]
  35. i += 1
  36. elif arg == '--format':
  37. format = sys.argv[i + 1]
  38. i += 1
  39. else:
  40. args.append(arg)
  41. i += 1
  42. assert len(args) == 1, 'Need one argument: bucket URL'
  43. baseUrl = args[0]
  44. assert baseUrl.startswith('http://') or baseUrl.startswith('https://'), 'Argument does not look like an HTTP URL'
  45. if '/' not in baseUrl.split('://', 1)[1] or not baseUrl.endswith('/'):
  46. baseUrl = f'{baseUrl}/'
  47. hostname = baseUrl.split('://', 1)[1].split('/', 1)[0]
  48. conn = http.client.HTTPSConnection(hostname, context = ssl._create_unverified_context())
  49. params = {}
  50. if startMarker is not None:
  51. params['marker'] = startMarker
  52. attempt = 1
  53. while True:
  54. queryString = urllib.parse.urlencode(params)
  55. url = f'{baseUrl}{"?" + queryString if queryString else ""}'
  56. if withListUrls:
  57. print(f'{url}', file = listUrlsFD)
  58. conn.request('GET', url[url.index('/', 8):])
  59. resp = conn.getresponse()
  60. body = resp.read()
  61. if b'<Error><Code>InternalError</Code><Message>We encountered an internal error. Please try again.</Message>' in body:
  62. print(f'Got internal error on {url} on attempt {attempt}; {"retrying" if attempt < 10 else "aborting"}', file = sys.stderr)
  63. if attempt >= 10:
  64. if 'marker' in params:
  65. print(f'To retry, use --marker {shlex.quote(params["marker"])}', file = sys.stderr)
  66. break
  67. attempt += 1
  68. continue
  69. if not body.startswith(b'<?xml version="1.0" encoding="UTF-8"?>\n<ListBucketResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/">') and not body.startswith(b"<?xml version='1.0' encoding='UTF-8'?><ListBucketResult xmlns='http://doc.s3.amazonaws.com/2006-03-01'>"):
  70. raise RuntimeError(f'Invalid body: {body[:200]}...')
  71. if b'<Marker></Marker>' in body[:200] and 'marker' in params:
  72. raise RuntimeError('Marker loop (empty marker in response despite providing one)')
  73. # No risk, no fun!
  74. contents = body.split(b'<Contents>')
  75. assert all(content.startswith(b'<Key>') for content in contents[1:])
  76. assert all(content.endswith(b'</Contents>') for content in contents[1:-1])
  77. assert contents[-1].endswith(b'</Contents></ListBucketResult>')
  78. contents[-1] = contents[-1][:-len('</ListBucketResult>')]
  79. for content in contents[1:]:
  80. key = html.unescape(content[5 : content.index(b'</Key>')].decode('utf-8')) # 5 = len(b'<Key>')
  81. url = f'{baseUrl}{urllib.parse.quote(key)}'
  82. tags = content.split(b'>')
  83. assert len(tags) % 2 == 0
  84. assert tags[-1] == b''
  85. assert tags[-2] == b'</Contents'
  86. openTags = [] # Current open tag hierarchy
  87. fields = {}
  88. for tag in tags[:-2]:
  89. if tag.startswith(b'<'):
  90. openTags.append(tag[1:])
  91. continue
  92. assert openTags
  93. if tag.endswith(b'</' + openTags[-1]):
  94. fields[b'>'.join(openTags).decode('utf-8')] = html.unescape(tag[:-(len(openTags[-1]) + 2)].decode('utf-8'))
  95. openTags.pop()
  96. continue
  97. assert False
  98. size = int(fields['Size']) if 'Size' in fields else None
  99. try:
  100. print(format.format(**fields, key = key, url = url, size = size))
  101. except BrokenPipeError:
  102. sys.exit(0)
  103. lastKey = key
  104. truncated = True if b'<IsTruncated>true</IsTruncated>' in body else (False if b'<IsTruncated>false</IsTruncated>' in body else None)
  105. assert truncated in (True, False)
  106. if not truncated:
  107. break
  108. if 'marker' in params and params['marker'] == lastKey:
  109. raise RuntimeError('Marker loop (same last key as previous marker)')
  110. params['marker'] = lastKey
  111. attempt = 1