Browse Source

Add retrieval start/end time metadata fields

tags/v1.0
JustAnotherArchivist 1 year ago
parent
commit
811e119835
2 changed files with 14 additions and 3 deletions
  1. +10
    -2
      codearchiver/core.py
  2. +4
    -1
      codearchiver/modules/git.py

+ 10
- 2
codearchiver/core.py View File

@@ -91,6 +91,8 @@ class Metadata(list[tuple[str, str]]):
MetadataField('ID', required = True, repeatable = False),
MetadataField('Input URL', required = True, repeatable = False),
MetadataField('Filename', required = True, repeatable = False),
MetadataField('Retrieval start time', required = True, repeatable = False),
MetadataField('Retrieval end time', required = True, repeatable = False),
)
'''The fields for this metadata collection'''

@@ -376,8 +378,12 @@ class Module(metaclass = ModuleMeta):
def process(self) -> Result:
'''Perform the relevant retrieval(s)'''

def create_metadata(self, filename: str) -> Metadata:
'''Create a basic Metadata instance appropriate for this module'''
def create_metadata(self, filename: str, startTime: datetime.datetime, endTime: datetime.datetime) -> Metadata:
'''
Create a basic Metadata instance appropriate for this module

`startTime` and `endTime` must be in UTC (e.g. `datetime.datetime.utcnow()`). They should reflect the moments just before and after all interaction with the remote system.
'''

if type(self).MetadataClass is None or type(self).name is None:
raise RuntimeError('Module lacks an MetadataClass or a name; cannot create metadata')
@@ -387,6 +393,8 @@ class Module(metaclass = ModuleMeta):
idx.append('ID', self._id)
idx.append('Input URL', self._url)
idx.append('Filename', filename)
idx.append('Retrieval start time', startTime.strftime('%Y-%m-%d %H:%M:%S.%f UTC'))
idx.append('Retrieval end time', endTime.strftime('%Y-%m-%d %H:%M:%S.%f UTC'))
return idx

def __repr__(self):


+ 4
- 1
codearchiver/modules/git.py View File

@@ -1,5 +1,6 @@
import codearchiver.core
import codearchiver.subprocess
import datetime
import functools
import hashlib
import logging
@@ -49,6 +50,7 @@ class Git(codearchiver.core.Module):
gitVersion = gitVersion[12:-1]

_logger.info(f'Cloning {self._url} into {directory}')
startTime = datetime.datetime.utcnow()
codearchiver.subprocess.run_with_log(['git', 'clone', '--verbose', '--progress', '--mirror', self._url, directory], env = {**os.environ, 'GIT_TERMINAL_PROMPT': '0'})

if self._extraBranches:
@@ -62,6 +64,7 @@ class Git(codearchiver.core.Module):
else:
_logger.error(f'Failed to fetch {commit}')
# This leaves over a FETCH_HEAD file, but git-bundle does not care about that, so it can safely be ignored.
endTime = datetime.datetime.utcnow()

_logger.info('Collecting repository metadata')
_, refs, _ = codearchiver.subprocess.run_with_log(['git', 'show-ref'], cwd = directory)
@@ -104,7 +107,7 @@ class Git(codearchiver.core.Module):
_logger.info(f'Removing clone')
shutil.rmtree(directory)

metadata = self.create_metadata(bundle)
metadata = self.create_metadata(bundle, startTime, endTime)
metadata.append('Git version', gitVersion)
for oldBundle in basedOnBundles:
metadata.append('Based on bundle', oldBundle)


Loading…
Cancel
Save