@@ -2,8 +2,11 @@ import abc | |||
import collections | |||
#import codearchiver.modules # In get_module_class | |||
import codearchiver.version | |||
import contextlib | |||
import dataclasses | |||
import functools | |||
import logging | |||
import os | |||
import queue | |||
import requests | |||
import time | |||
@@ -59,13 +62,77 @@ class Result: | |||
id: str | |||
'''A unique ID for this result''' | |||
files: list[str] = dataclasses.field(default_factory = list) | |||
'''List of filenames produced by the run''' | |||
files: list[tuple[str, typing.Optional['Index']]] = dataclasses.field(default_factory = list) | |||
'''List of filenames produced by the run, optionally with an index''' | |||
submoduleResults: list[tuple['Module', 'Result']] = dataclasses.field(default_factory = list) | |||
'''List of related submodules and their results''' | |||
class IndexValidationError(ValueError): | |||
pass | |||
@dataclasses.dataclass | |||
class IndexField: | |||
key: str | |||
required: bool | |||
repeatable: bool | |||
class Index(list[tuple[str, str]]): | |||
'''An index (key-value mapping, possibly with repeated keys) of a file produced by a module''' | |||
fields: list[IndexField] = [] | |||
'''The fields for this index''' | |||
def append(self, *args): | |||
if len(args) == 1: | |||
args = args[0] | |||
return super().append(args) | |||
def validate(self): | |||
'''Check that all keys and values in the index conform to the specification''' | |||
keyCounts = collections.Counter(key for key, _ in self) | |||
keys = set(keyCounts) | |||
permittedKeys = set(field.key for field in type(self).fields) | |||
unrecognisedKeys = keys - permittedKeys | |||
if unrecognisedKeys: | |||
raise IndexValidationError(f'Unrecognised key(s): {", ".join(sorted(unrecognisedKeys))}') | |||
requiredKeys = set(field.key for field in type(self).fields if field.required) | |||
missingRequiredKeys = requiredKeys - keys | |||
if missingRequiredKeys: | |||
raise IndexValidationError(f'Missing required key(s): {", ".join(sorted(missingRequiredKeys))}') | |||
repeatableKeys = set(field.key for field in type(self).fields if field.repeatable) | |||
repeatedKeys = set(key for key, count in keyCounts.items() if count > 1) | |||
repeatedUnrepeatableKeys = repeatedKeys - repeatableKeys | |||
if repeatedUnrepeatableKeys: | |||
raise IndexValidationError(f'Repeated unrepeatable key(s): {", ".join(sorted(repeatedUnrepeatableKeys))}') | |||
def serialise(self) -> str: | |||
'''Convert the index to a string suitable for e.g. a simple text file storage''' | |||
self.validate() | |||
return ''.join(f'{key}: {value}\n' for key, value in self) | |||
@classmethod | |||
def deserialise(cls, f: typing.Union[str, bytes, os.PathLike, typing.TextIO]): | |||
'''Import a serialised index from a filename or file-like object''' | |||
if isinstance(f, (str, bytes, os.PathLike)): | |||
cm = open(f, 'r') | |||
else: | |||
cm = contextlib.nullcontext(f) | |||
with cm as fp: | |||
o = cls((key, value[:-1]) for key, value in map(functools.partial(str.split, sep = ': '), fp)) | |||
o.validate() | |||
return o | |||
class HttpError(Exception): | |||
'''An HTTP request failed too many times.''' | |||
@@ -1,6 +1,7 @@ | |||
import codearchiver.core | |||
import codearchiver.subprocess | |||
import datetime | |||
import functools | |||
import logging | |||
import os.path | |||
import shutil | |||
@@ -10,6 +11,14 @@ import subprocess | |||
logger = logging.getLogger(__name__) | |||
class GitIndex(codearchiver.core.Index): | |||
fields = [ | |||
codearchiver.core.IndexField(key = 'Ref', required = True, repeatable = True), | |||
codearchiver.core.IndexField(key = 'Root commit', required = True, repeatable = True), | |||
codearchiver.core.IndexField(key = 'Commit', required = True, repeatable = True), | |||
] | |||
class Git(codearchiver.core.Module): | |||
name = 'git' | |||
@@ -51,10 +60,22 @@ class Git(codearchiver.core.Module): | |||
logger.info(f'Bundling into {bundle}') | |||
codearchiver.subprocess.run_with_log(['git', 'bundle', 'create', '--progress', f'../{bundle}', '--all'], cwd = directory) | |||
logger.info(f'Collecting repository metadata for index') | |||
_, refs = codearchiver.subprocess.run_with_log(['git', 'show-ref'], cwd = directory) | |||
_, commits = codearchiver.subprocess.run_with_log(['git', 'log', '--reflog', '--format=format:%H% P'], cwd = directory) | |||
logger.info(f'Removing clone') | |||
shutil.rmtree(directory) | |||
return codearchiver.core.Result(id = self._id, files = [bundle]) | |||
index = GitIndex() | |||
for line in refs.splitlines(): | |||
index.append('Ref', line) | |||
for commitHash, *parents in map(functools.partial(str.split, sep = ' '), commits.splitlines()): | |||
index.append('Commit', commitHash) | |||
if not parents: | |||
index.append('Root commit', commitHash) | |||
return codearchiver.core.Result(id = self._id, files = [(bundle, index)]) | |||
def __repr__(self): | |||
return f'{type(self).__module__}.{type(self).__name__}({self._inputUrl!r}, extraBranches = {self._extraBranches!r})' |
@@ -12,13 +12,13 @@ _logger = logging.getLogger(__name__) | |||
class Storage(abc.ABC): | |||
@abc.abstractmethod | |||
def put(self, filename: str): | |||
'''Put a local file into storage. If an error occurs, a partial copy may remain in storage. If it completes, the local input file is removed.''' | |||
def put(self, filename: str, index: typing.Optional[codearchiver.core.Index] = None): | |||
'''Put a local file and (if provided) its index into storage. If an error occurs, a partial copy may remain in storage. If it completes, the local input file is removed.''' | |||
def put_result(self, result: codearchiver.core.Result): | |||
'''Put a module's Result into storage. The semantics are as for `put`, and the exact behaviour regarding partial copies and leftover files on errors is undefined.''' | |||
for fn in result.files: | |||
self.put(fn) | |||
for fn, index in result.files: | |||
self.put(fn, index) | |||
for _, subresult in result.submoduleResults: | |||
self.put_result(subresult) | |||
@@ -43,10 +43,16 @@ class DirectoryStorage(Storage): | |||
if not self._check_directory(): | |||
os.makedirs(self._directory) | |||
def put(self, filename): | |||
def put(self, filename, index = None): | |||
self._ensure_directory() | |||
_logger.info(f'Moving {filename} to {self._directory}') | |||
shutil.move(filename, self._directory) | |||
if not index: | |||
return | |||
indexFilename = os.path.join(self._directory, f'{filename}.codearchiver-index') | |||
_logger.info(f'Writing index for {filename} to {indexFilename}') | |||
with open(indexFilename, 'x') as fp: | |||
fp.write(index.serialise()) | |||
@contextlib.contextmanager | |||
def open(self, filename): | |||