Ver a proveniência

Handle rate limiting

master
JustAnotherArchivist há 2 anos
ascendente
cometimento
628aeb052f
1 ficheiros alterados com 6 adições e 0 eliminações
  1. +6
    -0
      ia-cdx-search

+ 6
- 0
ia-cdx-search Ver ficheiro

@@ -6,6 +6,7 @@ import json
import re
import shlex
import sys
import time


HOST = 'web.archive.org'
@@ -23,6 +24,11 @@ def fetch(url, tries, connection):
r = connection.getresponse()
status = r.status
print(f'{status} {url}', file = sys.stderr)
if status == 302 and r.getheader('Location') in ('https://web.archive.org/429.html', '/429.html'):
# The CDX API is stupid and doesn't return 429s directly...
print('Exceeded rate limit, waiting...', file = sys.stderr)
time.sleep(30)
raise RuntimeError(f'Rate-limited on {url}')
if status != 200:
raise RuntimeError(f'Could not fetch {url}')
data = r.read()


Carregando…
Cancelar
Guardar