Browse Source

Turn response handlers into kwarg-only functions for easier extendability without breaking existing code

master
JustAnotherArchivist 3 years ago
parent
commit
a3d6fb35f8
2 changed files with 8 additions and 8 deletions
  1. +2
    -2
      qwarc/__init__.py
  2. +6
    -6
      qwarc/utils.py

+ 2
- 2
qwarc/__init__.py View File

@@ -138,10 +138,10 @@ class Item:
self.stats['requests'] += 1
except (asyncio.TimeoutError, _aiohttp.ClientError) as e:
self.logger.warning(f'Request for {url} failed: {e!r}')
action, writeToWarc = await responseHandler(url, attempt, response, e, self)
action, writeToWarc = await responseHandler(url = url, attempt = attempt, response = response, exc = e, item = self)
exc = e # Pass the exception outward for the history
else:
action, writeToWarc = await responseHandler(url, attempt, response, None, self)
action, writeToWarc = await responseHandler(url = url, attempt = attempt, response = response, exc = None, item = self)
if response and exc is None and writeToWarc:
self.warc.write_client_response(response)
history.append((response, exc))


+ 6
- 6
qwarc/utils.py View File

@@ -127,7 +127,7 @@ def generate_range_items(start, stop, step):
yield f'{i}-{min(i + step - 1, stop)}'


async def handle_response_default(url, attempt, response, exc, item):
async def handle_response_default(*, url, attempt, response, exc, item):
'''
The default response handler, which behaves as follows:
- If there is no response (e.g. timeout error), retry the retrieval after a delay of 5 seconds.
@@ -166,10 +166,10 @@ async def handle_response_default(url, attempt, response, exc, item):
return ACTION_RETRY, True


async def handle_response_ignore_redirects(url, attempt, response, exc, item):
async def handle_response_ignore_redirects(**kwargs):
'''A response handler that does not follow redirects, i.e. treats them as a success instead. It behaves as handle_response_default otherwise.'''

action, writeToWarc = await handle_response_default(url, attempt, response, exc, item)
action, writeToWarc = await handle_response_default(**kwargs)
if action == ACTION_FOLLOW_OR_SUCCESS:
action = ACTION_SUCCESS
return action, writeToWarc
@@ -183,9 +183,9 @@ def handle_response_limit_error_retries(maxRetries, handler = handle_response_de
If you use the same limit many times, you should keep the return value (the response handler) of this method and reuse it to avoid creating a new function every time.
'''

async def _handler(url, attempt, response, exc, item):
action, writeToWarc = await handler(url, attempt, response, exc, item)
if action == ACTION_RETRY and attempt > maxRetries:
async def _handler(**kwargs):
action, writeToWarc = await handler(**kwargs)
if action == ACTION_RETRY and kwargs['attempt'] > maxRetries:
action = ACTION_RETRIES_EXCEEDED
return action, writeToWarc
return _handler


Loading…
Cancel
Save