From ad22a2327ae57942e65a40d20399efc69a2e7d99 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Mon, 29 Apr 2019 17:51:50 +0000 Subject: [PATCH] Support adding headers to individual requests --- qwarc/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/qwarc/__init__.py b/qwarc/__init__.py index d771a41..28b729a 100644 --- a/qwarc/__init__.py +++ b/qwarc/__init__.py @@ -30,7 +30,7 @@ class Item: self.childItems = [] - async def fetch(self, url, responseHandler = qwarc.utils.handle_response_default, method = 'GET', data = None): + async def fetch(self, url, responseHandler = qwarc.utils.handle_response_default, method = 'GET', data = None, headers = []): ''' HTTP GET or POST a URL @@ -38,6 +38,7 @@ class Item: responseHandler: a callable that determines how the response is handled. See qwarc.utils.handle_response_default for details. method: str, must be 'GET' or 'POST' data: dict or list/tuple of lists/tuples of length two or bytes or file-like or None, the data to be sent in the request body + headers: list of 2-tuples, additional headers for this request only Returns response (a ClientResponse object or None) and history (a tuple of (response, exception) tuples). response can be None and history can be an empty tuple, depending on the circumstances (e.g. timeouts). @@ -47,6 +48,8 @@ class Item: url = yarl.URL(url) # Explicitly convert for normalisation, percent-encoding, etc. assert method in ('GET', 'POST'), 'method must be GET or POST' + headers = self.headers + headers + #TODO Deduplicate headers with later values overriding earlier ones history = [] attempt = 0 #TODO redirectLevel @@ -60,7 +63,7 @@ class Item: try: with _aiohttp.Timeout(60): logging.info('Fetching {}'.format(url)) - response = await self.session.request(method, url, data = data, headers = self.headers, allow_redirects = False) + response = await self.session.request(method, url, data = data, headers = headers, allow_redirects = False) try: ret = await response.text(errors = 'surrogateescape') except: