|
|
@@ -33,6 +33,32 @@ class Item: |
|
|
|
|
|
|
|
self.childItems = [] |
|
|
|
|
|
|
|
def _merge_headers(self, headers): |
|
|
|
d = {} # Preserves order from Python 3.7 (guaranteed) or CPython 3.6 (implementation detail) |
|
|
|
keys = {} # casefolded key -> d key |
|
|
|
for key, value in self.headers: |
|
|
|
d[key] = value |
|
|
|
keys[key.casefold()] = key |
|
|
|
for key, value in headers: |
|
|
|
keyc = key.casefold() |
|
|
|
if value is None: |
|
|
|
if keyc in keys: |
|
|
|
del d[keys[keyc]] |
|
|
|
del keys[keyc] |
|
|
|
else: |
|
|
|
if keyc in keys and key != keys[keyc]: |
|
|
|
del d[keys[keyc]] |
|
|
|
d[key] = value |
|
|
|
keys[keyc] = key |
|
|
|
out = [] |
|
|
|
for key, value in d.items(): |
|
|
|
if isinstance(value, tuple): |
|
|
|
for value_ in value: |
|
|
|
out.append((key, value_)) |
|
|
|
else: |
|
|
|
out.append((key, value)) |
|
|
|
return out |
|
|
|
|
|
|
|
async def fetch(self, url, responseHandler = qwarc.utils.handle_response_default, method = 'GET', data = None, headers = [], verify_ssl = True): |
|
|
|
''' |
|
|
|
HTTP GET or POST a URL |
|
|
@@ -41,7 +67,9 @@ class Item: |
|
|
|
responseHandler: a callable that determines how the response is handled. See qwarc.utils.handle_response_default for details. |
|
|
|
method: str, must be 'GET' or 'POST' |
|
|
|
data: dict or list/tuple of lists/tuples of length two or bytes or file-like or None, the data to be sent in the request body |
|
|
|
headers: list of 2-tuples, additional headers for this request only |
|
|
|
headers: list of 2-tuples, additional or overriding headers for this request only |
|
|
|
To remove one of the default headers, pass a value of None. |
|
|
|
If a header appears multiple times, only the last one is used. To send a header multiple times, pass a tuple of values. |
|
|
|
verify_ssl: bool, whether the SSL/TLS certificate should be validated |
|
|
|
|
|
|
|
Returns response (a ClientResponse object or None) and history (a tuple of (response, exception) tuples). |
|
|
@@ -52,8 +80,7 @@ class Item: |
|
|
|
|
|
|
|
url = yarl.URL(url) # Explicitly convert for normalisation, percent-encoding, etc. |
|
|
|
assert method in ('GET', 'POST'), 'method must be GET or POST' |
|
|
|
headers = self.headers + headers |
|
|
|
#TODO Deduplicate headers with later values overriding earlier ones |
|
|
|
headers = self._merge_headers(headers) |
|
|
|
history = [] |
|
|
|
attempt = 0 |
|
|
|
#TODO redirectLevel |
|
|
|