Browse Source

Handle rate limiting

master
JustAnotherArchivist 2 years ago
parent
commit
628aeb052f
1 changed files with 6 additions and 0 deletions
  1. +6
    -0
      ia-cdx-search

+ 6
- 0
ia-cdx-search View File

@@ -6,6 +6,7 @@ import json
import re
import shlex
import sys
import time


HOST = 'web.archive.org'
@@ -23,6 +24,11 @@ def fetch(url, tries, connection):
r = connection.getresponse()
status = r.status
print(f'{status} {url}', file = sys.stderr)
if status == 302 and r.getheader('Location') in ('https://web.archive.org/429.html', '/429.html'):
# The CDX API is stupid and doesn't return 429s directly...
print('Exceeded rate limit, waiting...', file = sys.stderr)
time.sleep(30)
raise RuntimeError(f'Rate-limited on {url}')
if status != 200:
raise RuntimeError(f'Could not fetch {url}')
data = r.read()


Loading…
Cancel
Save