|
|
@@ -15,13 +15,22 @@ logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
class InputURL: |
|
|
|
def __init__(self, url): |
|
|
|
self._url = url |
|
|
|
if 0 < url.find('+') < url.find('://'): |
|
|
|
# '+' and '://' appear in the URL in this order and there is at least one character each before the + as well as between the two |
|
|
|
self._moduleScheme, self._url = url.split('+', 1) |
|
|
|
else: |
|
|
|
self._moduleScheme = None |
|
|
|
self._url = url |
|
|
|
self._response = None |
|
|
|
|
|
|
|
@property |
|
|
|
def url(self): |
|
|
|
return self._url |
|
|
|
|
|
|
|
@property |
|
|
|
def moduleScheme(self): |
|
|
|
return self._moduleScheme |
|
|
|
|
|
|
|
@property |
|
|
|
def content(self): |
|
|
|
if self._response is None: |
|
|
@@ -148,17 +157,38 @@ def get_module_class(inputUrl: InputURL) -> typing.Type[Module]: |
|
|
|
# This can't be done at the top because the modules need to refer back to the Module class. |
|
|
|
import codearchiver.modules |
|
|
|
|
|
|
|
# Collect all the Module subclasses |
|
|
|
# Collect all the Module subclasses and names |
|
|
|
modules = set() |
|
|
|
modulesByName = {} # name: str -> List[Module] |
|
|
|
q = queue.Queue() |
|
|
|
q.put_nowait(Module) |
|
|
|
while not q.empty(): |
|
|
|
class_ = q.get_nowait() |
|
|
|
for c in class_.__subclasses__(): |
|
|
|
logger.debug(f'Found module {c.__module__}.{c.__name__}') |
|
|
|
modules.add(c) |
|
|
|
if c.name is not None: |
|
|
|
logger.debug(f'Found {c.name!r} module {c.__module__}.{c.__name__}') |
|
|
|
modules.add(c) |
|
|
|
if c.name not in modulesByName: |
|
|
|
modulesByName[c.name] = [] |
|
|
|
modulesByName[c.name].append(c) |
|
|
|
else: |
|
|
|
logger.debug(f'Found nameless module {c.__module__}.{c.__name__}') |
|
|
|
q.put_nowait(c) |
|
|
|
|
|
|
|
# Verify that there are no module name collisions |
|
|
|
if any(len(x) > 1 for x in modulesByName.values()): |
|
|
|
raise RuntimeError(f'Found multiple modules with the same name') |
|
|
|
|
|
|
|
# Check if the URL references one of the modules directly |
|
|
|
if inputUrl.moduleScheme: |
|
|
|
if inputUrl.moduleScheme in modulesByName: |
|
|
|
module = modulesByName[inputUrl.moduleScheme][0] |
|
|
|
logger.info(f'Selecting module {module.__module__}.{module.__name__}') |
|
|
|
return module |
|
|
|
else: |
|
|
|
raise RuntimeError(f'No module with name {inputUrl.moduleScheme!r} exists') |
|
|
|
|
|
|
|
# Check if exactly one of the modules matches |
|
|
|
matches = [class_ for class_ in modules if class_.matches(inputUrl)] |
|
|
|
if len(matches) >= 2: |
|
|
|
logger.error('Multiple matching modules for input URL') |
|
|
|