From 429ac94689b51f70288be3243a0bf19e6034f1a3 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sat, 11 Jul 2020 01:11:15 +0000 Subject: [PATCH] Make it possible to override and remove headers --- qwarc/__init__.py | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/qwarc/__init__.py b/qwarc/__init__.py index 5d21076..55b26d6 100644 --- a/qwarc/__init__.py +++ b/qwarc/__init__.py @@ -33,6 +33,32 @@ class Item: self.childItems = [] + def _merge_headers(self, headers): + d = {} # Preserves order from Python 3.7 (guaranteed) or CPython 3.6 (implementation detail) + keys = {} # casefolded key -> d key + for key, value in self.headers: + d[key] = value + keys[key.casefold()] = key + for key, value in headers: + keyc = key.casefold() + if value is None: + if keyc in keys: + del d[keys[keyc]] + del keys[keyc] + else: + if keyc in keys and key != keys[keyc]: + del d[keys[keyc]] + d[key] = value + keys[keyc] = key + out = [] + for key, value in d.items(): + if isinstance(value, tuple): + for value_ in value: + out.append((key, value_)) + else: + out.append((key, value)) + return out + async def fetch(self, url, responseHandler = qwarc.utils.handle_response_default, method = 'GET', data = None, headers = [], verify_ssl = True): ''' HTTP GET or POST a URL @@ -41,7 +67,9 @@ class Item: responseHandler: a callable that determines how the response is handled. See qwarc.utils.handle_response_default for details. method: str, must be 'GET' or 'POST' data: dict or list/tuple of lists/tuples of length two or bytes or file-like or None, the data to be sent in the request body - headers: list of 2-tuples, additional headers for this request only + headers: list of 2-tuples, additional or overriding headers for this request only + To remove one of the default headers, pass a value of None. + If a header appears multiple times, only the last one is used. To send a header multiple times, pass a tuple of values. verify_ssl: bool, whether the SSL/TLS certificate should be validated Returns response (a ClientResponse object or None) and history (a tuple of (response, exception) tuples). @@ -52,8 +80,7 @@ class Item: url = yarl.URL(url) # Explicitly convert for normalisation, percent-encoding, etc. assert method in ('GET', 'POST'), 'method must be GET or POST' - headers = self.headers + headers - #TODO Deduplicate headers with later values overriding earlier ones + headers = self._merge_headers(headers) history = [] attempt = 0 #TODO redirectLevel