@@ -91,6 +91,8 @@ class Metadata(list[tuple[str, str]]): | |||||
MetadataField('ID', required = True, repeatable = False), | MetadataField('ID', required = True, repeatable = False), | ||||
MetadataField('Input URL', required = True, repeatable = False), | MetadataField('Input URL', required = True, repeatable = False), | ||||
MetadataField('Filename', required = True, repeatable = False), | MetadataField('Filename', required = True, repeatable = False), | ||||
MetadataField('Retrieval start time', required = True, repeatable = False), | |||||
MetadataField('Retrieval end time', required = True, repeatable = False), | |||||
) | ) | ||||
'''The fields for this metadata collection''' | '''The fields for this metadata collection''' | ||||
@@ -376,8 +378,12 @@ class Module(metaclass = ModuleMeta): | |||||
def process(self) -> Result: | def process(self) -> Result: | ||||
'''Perform the relevant retrieval(s)''' | '''Perform the relevant retrieval(s)''' | ||||
def create_metadata(self, filename: str) -> Metadata: | |||||
'''Create a basic Metadata instance appropriate for this module''' | |||||
def create_metadata(self, filename: str, startTime: datetime.datetime, endTime: datetime.datetime) -> Metadata: | |||||
''' | |||||
Create a basic Metadata instance appropriate for this module | |||||
`startTime` and `endTime` must be in UTC (e.g. `datetime.datetime.utcnow()`). They should reflect the moments just before and after all interaction with the remote system. | |||||
''' | |||||
if type(self).MetadataClass is None or type(self).name is None: | if type(self).MetadataClass is None or type(self).name is None: | ||||
raise RuntimeError('Module lacks an MetadataClass or a name; cannot create metadata') | raise RuntimeError('Module lacks an MetadataClass or a name; cannot create metadata') | ||||
@@ -387,6 +393,8 @@ class Module(metaclass = ModuleMeta): | |||||
idx.append('ID', self._id) | idx.append('ID', self._id) | ||||
idx.append('Input URL', self._url) | idx.append('Input URL', self._url) | ||||
idx.append('Filename', filename) | idx.append('Filename', filename) | ||||
idx.append('Retrieval start time', startTime.strftime('%Y-%m-%d %H:%M:%S.%f UTC')) | |||||
idx.append('Retrieval end time', endTime.strftime('%Y-%m-%d %H:%M:%S.%f UTC')) | |||||
return idx | return idx | ||||
def __repr__(self): | def __repr__(self): | ||||
@@ -1,5 +1,6 @@ | |||||
import codearchiver.core | import codearchiver.core | ||||
import codearchiver.subprocess | import codearchiver.subprocess | ||||
import datetime | |||||
import functools | import functools | ||||
import hashlib | import hashlib | ||||
import logging | import logging | ||||
@@ -49,6 +50,7 @@ class Git(codearchiver.core.Module): | |||||
gitVersion = gitVersion[12:-1] | gitVersion = gitVersion[12:-1] | ||||
_logger.info(f'Cloning {self._url} into {directory}') | _logger.info(f'Cloning {self._url} into {directory}') | ||||
startTime = datetime.datetime.utcnow() | |||||
codearchiver.subprocess.run_with_log(['git', 'clone', '--verbose', '--progress', '--mirror', self._url, directory], env = {**os.environ, 'GIT_TERMINAL_PROMPT': '0'}) | codearchiver.subprocess.run_with_log(['git', 'clone', '--verbose', '--progress', '--mirror', self._url, directory], env = {**os.environ, 'GIT_TERMINAL_PROMPT': '0'}) | ||||
if self._extraBranches: | if self._extraBranches: | ||||
@@ -62,6 +64,7 @@ class Git(codearchiver.core.Module): | |||||
else: | else: | ||||
_logger.error(f'Failed to fetch {commit}') | _logger.error(f'Failed to fetch {commit}') | ||||
# This leaves over a FETCH_HEAD file, but git-bundle does not care about that, so it can safely be ignored. | # This leaves over a FETCH_HEAD file, but git-bundle does not care about that, so it can safely be ignored. | ||||
endTime = datetime.datetime.utcnow() | |||||
_logger.info('Collecting repository metadata') | _logger.info('Collecting repository metadata') | ||||
_, refs, _ = codearchiver.subprocess.run_with_log(['git', 'show-ref'], cwd = directory) | _, refs, _ = codearchiver.subprocess.run_with_log(['git', 'show-ref'], cwd = directory) | ||||
@@ -104,7 +107,7 @@ class Git(codearchiver.core.Module): | |||||
_logger.info(f'Removing clone') | _logger.info(f'Removing clone') | ||||
shutil.rmtree(directory) | shutil.rmtree(directory) | ||||
metadata = self.create_metadata(bundle) | |||||
metadata = self.create_metadata(bundle, startTime, endTime) | |||||
metadata.append('Git version', gitVersion) | metadata.append('Git version', gitVersion) | ||||
for oldBundle in basedOnBundles: | for oldBundle in basedOnBundles: | ||||
metadata.append('Based on bundle', oldBundle) | metadata.append('Based on bundle', oldBundle) | ||||