From 922900ac4efcdb59b48729670e0ea60031dcdf0b Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Fri, 26 Jun 2020 17:57:02 +0000 Subject: [PATCH] Add support for selecting a module explicitly using `name+` URL prefix E.g. `git+https://example.org/` --- codearchiver/core.py | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/codearchiver/core.py b/codearchiver/core.py index afb5fee..6437583 100644 --- a/codearchiver/core.py +++ b/codearchiver/core.py @@ -15,13 +15,22 @@ logger = logging.getLogger(__name__) class InputURL: def __init__(self, url): - self._url = url + if 0 < url.find('+') < url.find('://'): + # '+' and '://' appear in the URL in this order and there is at least one character each before the + as well as between the two + self._moduleScheme, self._url = url.split('+', 1) + else: + self._moduleScheme = None + self._url = url self._response = None @property def url(self): return self._url + @property + def moduleScheme(self): + return self._moduleScheme + @property def content(self): if self._response is None: @@ -148,17 +157,38 @@ def get_module_class(inputUrl: InputURL) -> typing.Type[Module]: # This can't be done at the top because the modules need to refer back to the Module class. import codearchiver.modules - # Collect all the Module subclasses + # Collect all the Module subclasses and names modules = set() + modulesByName = {} # name: str -> List[Module] q = queue.Queue() q.put_nowait(Module) while not q.empty(): class_ = q.get_nowait() for c in class_.__subclasses__(): - logger.debug(f'Found module {c.__module__}.{c.__name__}') - modules.add(c) + if c.name is not None: + logger.debug(f'Found {c.name!r} module {c.__module__}.{c.__name__}') + modules.add(c) + if c.name not in modulesByName: + modulesByName[c.name] = [] + modulesByName[c.name].append(c) + else: + logger.debug(f'Found nameless module {c.__module__}.{c.__name__}') q.put_nowait(c) + # Verify that there are no module name collisions + if any(len(x) > 1 for x in modulesByName.values()): + raise RuntimeError(f'Found multiple modules with the same name') + + # Check if the URL references one of the modules directly + if inputUrl.moduleScheme: + if inputUrl.moduleScheme in modulesByName: + module = modulesByName[inputUrl.moduleScheme][0] + logger.info(f'Selecting module {module.__module__}.{module.__name__}') + return module + else: + raise RuntimeError(f'No module with name {inputUrl.moduleScheme!r} exists') + + # Check if exactly one of the modules matches matches = [class_ for class_ in modules if class_.matches(inputUrl)] if len(matches) >= 2: logger.error('Multiple matching modules for input URL')