From a3d6fb35f8037908f919b1ed90d70cd2ddf87efc Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sat, 11 Jul 2020 23:11:25 +0000 Subject: [PATCH] Turn response handlers into kwarg-only functions for easier extendability without breaking existing code --- qwarc/__init__.py | 4 ++-- qwarc/utils.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/qwarc/__init__.py b/qwarc/__init__.py index 8f9acae..7368e7e 100644 --- a/qwarc/__init__.py +++ b/qwarc/__init__.py @@ -138,10 +138,10 @@ class Item: self.stats['requests'] += 1 except (asyncio.TimeoutError, _aiohttp.ClientError) as e: self.logger.warning(f'Request for {url} failed: {e!r}') - action, writeToWarc = await responseHandler(url, attempt, response, e, self) + action, writeToWarc = await responseHandler(url = url, attempt = attempt, response = response, exc = e, item = self) exc = e # Pass the exception outward for the history else: - action, writeToWarc = await responseHandler(url, attempt, response, None, self) + action, writeToWarc = await responseHandler(url = url, attempt = attempt, response = response, exc = None, item = self) if response and exc is None and writeToWarc: self.warc.write_client_response(response) history.append((response, exc)) diff --git a/qwarc/utils.py b/qwarc/utils.py index a8f292a..55ac36f 100644 --- a/qwarc/utils.py +++ b/qwarc/utils.py @@ -127,7 +127,7 @@ def generate_range_items(start, stop, step): yield f'{i}-{min(i + step - 1, stop)}' -async def handle_response_default(url, attempt, response, exc, item): +async def handle_response_default(*, url, attempt, response, exc, item): ''' The default response handler, which behaves as follows: - If there is no response (e.g. timeout error), retry the retrieval after a delay of 5 seconds. @@ -166,10 +166,10 @@ async def handle_response_default(url, attempt, response, exc, item): return ACTION_RETRY, True -async def handle_response_ignore_redirects(url, attempt, response, exc, item): +async def handle_response_ignore_redirects(**kwargs): '''A response handler that does not follow redirects, i.e. treats them as a success instead. It behaves as handle_response_default otherwise.''' - action, writeToWarc = await handle_response_default(url, attempt, response, exc, item) + action, writeToWarc = await handle_response_default(**kwargs) if action == ACTION_FOLLOW_OR_SUCCESS: action = ACTION_SUCCESS return action, writeToWarc @@ -183,9 +183,9 @@ def handle_response_limit_error_retries(maxRetries, handler = handle_response_de If you use the same limit many times, you should keep the return value (the response handler) of this method and reuse it to avoid creating a new function every time. ''' - async def _handler(url, attempt, response, exc, item): - action, writeToWarc = await handler(url, attempt, response, exc, item) - if action == ACTION_RETRY and attempt > maxRetries: + async def _handler(**kwargs): + action, writeToWarc = await handler(**kwargs) + if action == ACTION_RETRY and kwargs['attempt'] > maxRetries: action = ACTION_RETRIES_EXCEEDED return action, writeToWarc return _handler