The little things give you away... A collection of various small helper stuff
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 

123 lignes
4.3 KiB

  1. #!/usr/bin/env python3
  2. import html
  3. import http.client
  4. import os
  5. import shlex
  6. import ssl
  7. import sys
  8. import urllib.parse
  9. # Arguments
  10. i = 1
  11. withListUrls = False
  12. listUrlsFD = None
  13. startMarker = None
  14. format = '{url}'
  15. args = []
  16. while i < len(sys.argv):
  17. arg = sys.argv[i]
  18. if arg == '--help':
  19. print('s3-bucket-list [options] BUCKETURL', file = sys.stderr)
  20. print('', file = sys.stderr)
  21. print('Options:', file = sys.stderr)
  22. print(f' --format FORMAT Modify the output format; FORMAT defaults to {format!r}; available fields: url, key, size, and all fields returned by S3 (e.g. LastModified)', file = sys.stderr)
  23. print( ' --marker KEY Start after a particular key instead of from the beginning', file = sys.stderr)
  24. print( ' --with-list-urls Enables printing the list URLs retrieved to FD 3', file = sys.stderr)
  25. sys.exit(1)
  26. elif arg == '--with-list-urls':
  27. withListUrls = True
  28. try:
  29. listUrlsFD = os.fdopen(3, 'w')
  30. except OSError:
  31. print('Error: FD 3 not open', file = sys.stderr)
  32. sys.exit(1)
  33. elif arg == '--marker':
  34. startMarker = sys.argv[i + 1]
  35. i += 1
  36. elif arg == '--format':
  37. format = sys.argv[i + 1]
  38. i += 1
  39. else:
  40. args.append(arg)
  41. i += 1
  42. assert len(args) == 1, 'Need one argument: bucket URL'
  43. baseUrl = args[0]
  44. assert baseUrl.startswith('http://') or baseUrl.startswith('https://'), 'Argument does not look like an HTTP URL'
  45. if '/' not in baseUrl.split('://', 1)[1] or not baseUrl.endswith('/'):
  46. baseUrl = f'{baseUrl}/'
  47. hostname = baseUrl.split('://', 1)[1].split('/', 1)[0]
  48. conn = http.client.HTTPSConnection(hostname, context = ssl._create_unverified_context())
  49. params = {}
  50. if startMarker is not None:
  51. params['marker'] = startMarker
  52. attempt = 1
  53. while True:
  54. queryString = urllib.parse.urlencode(params)
  55. url = f'{baseUrl}{"?" + queryString if queryString else ""}'
  56. if withListUrls:
  57. print(f'{url}', file = listUrlsFD)
  58. conn.request('GET', url[url.index('/', 8):])
  59. resp = conn.getresponse()
  60. body = resp.read()
  61. if b'<Error><Code>InternalError</Code><Message>We encountered an internal error. Please try again.</Message>' in body:
  62. print(f'Got internal error on {url} on attempt {attempt}; {"retrying" if attempt < 10 else "aborting"}', file = sys.stderr)
  63. if attempt >= 10:
  64. if 'marker' in params:
  65. print(f'To retry, use --marker {shlex.quote(params["marker"])}', file = sys.stderr)
  66. break
  67. attempt += 1
  68. continue
  69. if not body.startswith(b'<?xml version="1.0" encoding="UTF-8"?>\n<ListBucketResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/">') and not body.startswith(b"<?xml version='1.0' encoding='UTF-8'?><ListBucketResult xmlns='http://doc.s3.amazonaws.com/2006-03-01'>"):
  70. raise RuntimeError(f'Invalid body: {body[:200]}...')
  71. if b'<Marker></Marker>' in body[:200] and 'marker' in params:
  72. raise RuntimeError('Marker loop (empty marker in response despite providing one)')
  73. # No risk, no fun!
  74. contents = body.split(b'<Contents>')
  75. assert all(content.startswith(b'<Key>') for content in contents[1:])
  76. assert all(content.endswith(b'</Contents>') for content in contents[1:-1])
  77. assert contents[-1].endswith(b'</Contents></ListBucketResult>')
  78. contents[-1] = contents[-1][:-len('</ListBucketResult>')]
  79. for content in contents[1:]:
  80. key = html.unescape(content[5 : content.index(b'</Key>')].decode('utf-8')) # 5 = len(b'<Key>')
  81. url = f'{baseUrl}{urllib.parse.quote(key)}'
  82. tags = content.split(b'>')
  83. assert len(tags) % 2 == 0
  84. assert tags[-1] == b''
  85. assert tags[-2] == b'</Contents'
  86. openTags = [] # Current open tag hierarchy
  87. fields = {}
  88. for tag in tags[:-2]:
  89. if tag.startswith(b'<'):
  90. openTags.append(tag[1:])
  91. continue
  92. assert openTags
  93. if tag.endswith(b'</' + openTags[-1]):
  94. fields[b'>'.join(openTags).decode('utf-8')] = html.unescape(tag[:-(len(openTags[-1]) + 2)].decode('utf-8'))
  95. openTags.pop()
  96. continue
  97. assert False
  98. size = int(fields['Size']) if 'Size' in fields else None
  99. try:
  100. print(format.format(**fields, key = key, url = url, size = size))
  101. except BrokenPipeError:
  102. sys.exit(0)
  103. lastKey = key
  104. truncated = True if b'<IsTruncated>true</IsTruncated>' in body else (False if b'<IsTruncated>false</IsTruncated>' in body else None)
  105. assert truncated in (True, False)
  106. if not truncated:
  107. break
  108. if 'marker' in params and params['marker'] == lastKey:
  109. raise RuntimeError('Marker loop (same last key as previous marker)')
  110. params['marker'] = lastKey
  111. attempt = 1