Browse Source

Fix in-progress upload listing

master
JustAnotherArchivist 11 months ago
parent
commit
44260ed92e
1 changed files with 10 additions and 9 deletions
  1. +10
    -9
      ia-upload-stream

+ 10
- 9
ia-upload-stream View File

@@ -315,7 +315,7 @@ def list_uploads(item, *, tries = 3):
# No auth needed
url = f'https://s3.us.archive.org/{item}/?uploads'

# This endpoint redirects to the server storing the item under ia######.s3dns.us.archive.org, but those servers present an invalid TLS certificate for *.us.archive.org.
# This endpoint (sometimes? not anymore?) redirects to the server storing the item under ia######.s3dns.us.archive.org, but those servers present an invalid TLS certificate for *.us.archive.org.
class IAS3CertificateFixHTTPAdapter(requests.adapters.HTTPAdapter):
def init_poolmanager(self, *args, **kwargs):
kwargs['assert_hostname'] = 's3.us.archive.org'
@@ -323,14 +323,15 @@ def list_uploads(item, *, tries = 3):

for attempt in range(1, tries + 1):
r = requests.get(url, allow_redirects = False, timeout = TIMEOUT)
if r.status_code == 307 and '.s3dns.us.archive.org' in r.headers['Location']:
s3dnsUrl = r.headers['Location']
s3dnsUrl = s3dnsUrl.replace('http://', 'https://')
s3dnsUrl = s3dnsUrl.replace('.s3dns.us.archive.org:80/', '.s3dns.us.archive.org/')
domain = s3dnsUrl[8:s3dnsUrl.find('/', 9)]
s = requests.Session()
s.mount(f'https://{domain}/', IAS3CertificateFixHTTPAdapter())
r = s.get(s3dnsUrl, timeout = TIMEOUT)
if r.status_code == 200 or (r.status_code == 307 and '.s3dns.us.archive.org' in r.headers['Location']):
if r.status_code == 307:
s3dnsUrl = r.headers['Location']
s3dnsUrl = s3dnsUrl.replace('http://', 'https://')
s3dnsUrl = s3dnsUrl.replace('.s3dns.us.archive.org:80/', '.s3dns.us.archive.org/')
domain = s3dnsUrl[8:s3dnsUrl.find('/', 9)]
s = requests.Session()
s.mount(f'https://{domain}/', IAS3CertificateFixHTTPAdapter())
r = s.get(s3dnsUrl, timeout = TIMEOUT)
if r.status_code == 200:
print(f'In-progress uploads for {item} (initiation datetime, upload ID, filename):')
for upload in re.findall(r'<Upload>.*?</Upload>', r.text):


Loading…
Cancel
Save