Browse Source

Add baseUrl and evaluate incomplete URLs relative to it

master
JustAnotherArchivist 3 years ago
parent
commit
2324216016
1 changed files with 20 additions and 1 deletions
  1. +20
    -1
      qwarc/__init__.py

+ 20
- 1
qwarc/__init__.py View File

@@ -29,11 +29,26 @@ class Item:
self.session = session
self.headers = headers
self.warc = warc
if not hasattr(self, '_baseUrl'): # To allow subclasses to set the baseUrl before calling super().__init__
self._baseUrl = None
self.stats = {'tx': 0, 'rx': 0, 'requests': 0}
self.logger = logging.LoggerAdapter(logging.getLogger(), {'itemType': self.itemType, 'itemValue': self.itemValue})

self.childItems = []

@property
def baseUrl(self):
return self._baseUrl

@baseUrl.setter
def baseUrl(self, baseUrl):
if baseUrl is None:
self._baseUrl = None
elif isinstance(baseUrl, yarl.URL):
self._baseUrl = baseUrl
else:
self._baseUrl = yarl.URL(baseUrl)

def _merge_headers(self, headers):
d = {} # Preserves order from Python 3.7 (guaranteed) or CPython 3.6 (implementation detail)
keys = {} # casefolded key -> d key
@@ -64,7 +79,7 @@ class Item:
'''
HTTP GET or POST a URL

url: str or yarl.URL
url: str or yarl.URL; if this is not a complete URL, it is evaluated relative to self.baseUrl
responseHandler: None or a callable that determines how the response is handled; if None, self.defaultResponseHandler is used. See qwarc.utils.handle_response_default for details.
method: str, must be 'GET' or 'POST'
data: dict or list/tuple of lists/tuples of length two or bytes or file-like or None, the data to be sent in the request body
@@ -80,6 +95,10 @@ class Item:
#TODO: Rewrite using 'async with self.session.get'

url = yarl.URL(url) # Explicitly convert for normalisation, percent-encoding, etc.
if not url.scheme or not url.host:
if not self.baseUrl:
raise ValueError('Incomplete URL and no baseUrl to join it with')
url = self.baseUrl.join(url)
if responseHandler is None:
responseHandler = self.defaultResponseHandler
assert method in ('GET', 'POST'), 'method must be GET or POST'


Loading…
Cancel
Save