diff --git a/codearchiver/core.py b/codearchiver/core.py index e48bbef..1854d68 100644 --- a/codearchiver/core.py +++ b/codearchiver/core.py @@ -84,7 +84,13 @@ class IndexField: class Index(list[tuple[str, str]]): '''An index (key-value mapping, possibly with repeated keys) of a file produced by a module''' - fields: tuple[IndexField] = () + fields: tuple[IndexField] = ( + IndexField('codearchiver version', required = True, repeatable = False), + IndexField('Module', required = True, repeatable = False), + IndexField('ID', required = True, repeatable = False), + IndexField('Input URL', required = True, repeatable = False), + IndexField('Filename', required = True, repeatable = False), + ) '''The fields for this index''' _allFieldsCache: typing.Optional[tuple[IndexField]] = None @@ -348,6 +354,9 @@ class Module(metaclass = ModuleMeta): name: typing.Optional[str] = None '''The name of the module. Modules without a name are ignored. Names must be unique and may only contain a-z and hyphens.''' + IndexClass: typing.Optional[typing.Type[Index]] = None + '''The Index class corresponding to this module, if any.''' + @staticmethod def matches(inputUrl: InputURL) -> bool: '''Whether or not this module is for handling `inputUrl`.''' @@ -364,6 +373,19 @@ class Module(metaclass = ModuleMeta): def process(self) -> Result: '''Perform the relevant retrieval(s)''' + def create_index(self, filename: str) -> Index: + '''Create a basic Index instance appropriate for this module''' + + if type(self).IndexClass is None or type(self).name is None: + raise RuntimeError('Module lacks an IndexClass or a name; cannot create index') + idx = type(self).IndexClass() + idx.append('codearchiver version', codearchiver.version.__version__) + idx.append('Module', type(self).name) + idx.append('ID', self._id) + idx.append('Input URL', self._url) + idx.append('Filename', filename) + return idx + def __repr__(self): return f'{type(self).__module__}.{type(self).__name__}({self._inputUrl!r})' diff --git a/codearchiver/modules/git.py b/codearchiver/modules/git.py index a3e5e98..e48ad44 100644 --- a/codearchiver/modules/git.py +++ b/codearchiver/modules/git.py @@ -14,6 +14,7 @@ _logger = logging.getLogger(__name__) class GitIndex(codearchiver.core.Index): fields = ( + codearchiver.core.IndexField(key = 'Git version', required = True, repeatable = False), codearchiver.core.IndexField(key = 'Based on bundle', required = False, repeatable = True), codearchiver.core.IndexField(key = 'Ref', required = True, repeatable = True), codearchiver.core.IndexField(key = 'Root commit', required = True, repeatable = True), @@ -23,6 +24,7 @@ class GitIndex(codearchiver.core.Index): class Git(codearchiver.core.Module): name = 'git' + IndexClass = GitIndex @staticmethod def matches(inputUrl): @@ -45,6 +47,11 @@ class Git(codearchiver.core.Module): _logger.fatal(f'{bundle!r} already exists') raise FileExistsError(f'{bundle!r} already exists') + _, gitVersion, _ = codearchiver.subprocess.run_with_log(['git', '--version']) + if not gitVersion.startswith('git version ') or not gitVersion.endswith('\n') or gitVersion[12:-1].strip('0123456789.') != '': + raise RuntimeError(f'Unexpected output from `git --version`: {gitVersion!r}') + gitVersion = gitVersion[12:-1] + _logger.info(f'Cloning {self._url} into {directory}') codearchiver.subprocess.run_with_log(['git', 'clone', '--verbose', '--progress', '--mirror', self._url, directory], env = {**os.environ, 'GIT_TERMINAL_PROMPT': '0'}) @@ -103,7 +110,8 @@ class Git(codearchiver.core.Module): _logger.info(f'Removing clone') shutil.rmtree(directory) - index = GitIndex() + index = self.create_index(bundle) + index.append('Git version', gitVersion) for oldBundle in basedOnBundles: index.append('Based on bundle', oldBundle) for line in refs.splitlines():