diff --git a/ia-cdx-search b/ia-cdx-search index abe85b3..94579bd 100755 --- a/ia-cdx-search +++ b/ia-cdx-search @@ -6,6 +6,7 @@ import json import re import shlex import sys +import time HOST = 'web.archive.org' @@ -23,6 +24,11 @@ def fetch(url, tries, connection): r = connection.getresponse() status = r.status print(f'{status} {url}', file = sys.stderr) + if status == 302 and r.getheader('Location') in ('https://web.archive.org/429.html', '/429.html'): + # The CDX API is stupid and doesn't return 429s directly... + print('Exceeded rate limit, waiting...', file = sys.stderr) + time.sleep(30) + raise RuntimeError(f'Rate-limited on {url}') if status != 200: raise RuntimeError(f'Could not fetch {url}') data = r.read()