Browse Source

Add item to response handler arguments (e.g. for logging)

master
JustAnotherArchivist 3 years ago
parent
commit
03336e4988
2 changed files with 8 additions and 8 deletions
  1. +2
    -2
      qwarc/__init__.py
  2. +6
    -6
      qwarc/utils.py

+ 2
- 2
qwarc/__init__.py View File

@@ -116,10 +116,10 @@ class Item:
self.stats['requests'] += 1
except (asyncio.TimeoutError, _aiohttp.ClientError) as e:
self.logger.warning(f'Request for {url} failed: {e!r}')
action, writeToWarc = await responseHandler(url, attempt, response, e)
action, writeToWarc = await responseHandler(url, attempt, response, e, self)
exc = e # Pass the exception outward for the history
else:
action, writeToWarc = await responseHandler(url, attempt, response, None)
action, writeToWarc = await responseHandler(url, attempt, response, None, self)
if response and exc is None and writeToWarc:
self.warc.write_client_response(response)
history.append((response, exc))


+ 6
- 6
qwarc/utils.py View File

@@ -127,7 +127,7 @@ def generate_range_items(start, stop, step):
yield f'{i}-{min(i + step - 1, stop)}'


async def handle_response_default(url, attempt, response, exc):
async def handle_response_default(url, attempt, response, exc, item):
'''
The default response handler, which behaves as follows:
- If there is no response (e.g. timeout error), retry the retrieval after a delay of 5 seconds.
@@ -141,7 +141,7 @@ async def handle_response_default(url, attempt, response, exc):

Note that this handler does not limit the number of retries on errors.

Parameters: url (yarl.URL instance), attempt (int), response (aiohttp.ClientResponse or None), exc (Exception or None)
Parameters: url (yarl.URL instance), attempt (int), response (aiohttp.ClientResponse or None), exc (Exception or None), item (qwarc.Item instance)
At least one of response and exc is not None.
Returns: (one of the qwarc.RESPONSE_* constants, bool signifying whether to write to WARC or not)
The latter is ignored when exc is not None; responses that triggered an exception are never written to WARC.
@@ -166,10 +166,10 @@ async def handle_response_default(url, attempt, response, exc):
return ACTION_RETRY, True


async def handle_response_ignore_redirects(url, attempt, response, exc):
async def handle_response_ignore_redirects(url, attempt, response, exc, item):
'''A response handler that does not follow redirects, i.e. treats them as a success instead. It behaves as handle_response_default otherwise.'''

action, writeToWarc = await handle_response_default(url, attempt, response, exc)
action, writeToWarc = await handle_response_default(url, attempt, response, exc, item)
if action == ACTION_FOLLOW_OR_SUCCESS:
action = ACTION_SUCCESS
return action, writeToWarc
@@ -183,8 +183,8 @@ def handle_response_limit_error_retries(maxRetries, handler = handle_response_de
If you use the same limit many times, you should keep the return value (the response handler) of this method and reuse it to avoid creating a new function every time.
'''

async def _handler(url, attempt, response, exc):
action, writeToWarc = await handler(url, attempt, response, exc)
async def _handler(url, attempt, response, exc, item):
action, writeToWarc = await handler(url, attempt, response, exc, item)
if action == ACTION_RETRY and attempt > maxRetries:
action = ACTION_RETRIES_EXCEEDED
return action, writeToWarc


Loading…
Cancel
Save