@@ -2,8 +2,11 @@ import abc | |||||
import collections | import collections | ||||
#import codearchiver.modules # In get_module_class | #import codearchiver.modules # In get_module_class | ||||
import codearchiver.version | import codearchiver.version | ||||
import contextlib | |||||
import dataclasses | import dataclasses | ||||
import functools | |||||
import logging | import logging | ||||
import os | |||||
import queue | import queue | ||||
import requests | import requests | ||||
import time | import time | ||||
@@ -59,13 +62,77 @@ class Result: | |||||
id: str | id: str | ||||
'''A unique ID for this result''' | '''A unique ID for this result''' | ||||
files: list[str] = dataclasses.field(default_factory = list) | |||||
'''List of filenames produced by the run''' | |||||
files: list[tuple[str, typing.Optional['Index']]] = dataclasses.field(default_factory = list) | |||||
'''List of filenames produced by the run, optionally with an index''' | |||||
submoduleResults: list[tuple['Module', 'Result']] = dataclasses.field(default_factory = list) | submoduleResults: list[tuple['Module', 'Result']] = dataclasses.field(default_factory = list) | ||||
'''List of related submodules and their results''' | '''List of related submodules and their results''' | ||||
class IndexValidationError(ValueError): | |||||
pass | |||||
@dataclasses.dataclass | |||||
class IndexField: | |||||
key: str | |||||
required: bool | |||||
repeatable: bool | |||||
class Index(list[tuple[str, str]]): | |||||
'''An index (key-value mapping, possibly with repeated keys) of a file produced by a module''' | |||||
fields: list[IndexField] = [] | |||||
'''The fields for this index''' | |||||
def append(self, *args): | |||||
if len(args) == 1: | |||||
args = args[0] | |||||
return super().append(args) | |||||
def validate(self): | |||||
'''Check that all keys and values in the index conform to the specification''' | |||||
keyCounts = collections.Counter(key for key, _ in self) | |||||
keys = set(keyCounts) | |||||
permittedKeys = set(field.key for field in type(self).fields) | |||||
unrecognisedKeys = keys - permittedKeys | |||||
if unrecognisedKeys: | |||||
raise IndexValidationError(f'Unrecognised key(s): {", ".join(sorted(unrecognisedKeys))}') | |||||
requiredKeys = set(field.key for field in type(self).fields if field.required) | |||||
missingRequiredKeys = requiredKeys - keys | |||||
if missingRequiredKeys: | |||||
raise IndexValidationError(f'Missing required key(s): {", ".join(sorted(missingRequiredKeys))}') | |||||
repeatableKeys = set(field.key for field in type(self).fields if field.repeatable) | |||||
repeatedKeys = set(key for key, count in keyCounts.items() if count > 1) | |||||
repeatedUnrepeatableKeys = repeatedKeys - repeatableKeys | |||||
if repeatedUnrepeatableKeys: | |||||
raise IndexValidationError(f'Repeated unrepeatable key(s): {", ".join(sorted(repeatedUnrepeatableKeys))}') | |||||
def serialise(self) -> str: | |||||
'''Convert the index to a string suitable for e.g. a simple text file storage''' | |||||
self.validate() | |||||
return ''.join(f'{key}: {value}\n' for key, value in self) | |||||
@classmethod | |||||
def deserialise(cls, f: typing.Union[str, bytes, os.PathLike, typing.TextIO]): | |||||
'''Import a serialised index from a filename or file-like object''' | |||||
if isinstance(f, (str, bytes, os.PathLike)): | |||||
cm = open(f, 'r') | |||||
else: | |||||
cm = contextlib.nullcontext(f) | |||||
with cm as fp: | |||||
o = cls((key, value[:-1]) for key, value in map(functools.partial(str.split, sep = ': '), fp)) | |||||
o.validate() | |||||
return o | |||||
class HttpError(Exception): | class HttpError(Exception): | ||||
'''An HTTP request failed too many times.''' | '''An HTTP request failed too many times.''' | ||||
@@ -1,6 +1,7 @@ | |||||
import codearchiver.core | import codearchiver.core | ||||
import codearchiver.subprocess | import codearchiver.subprocess | ||||
import datetime | import datetime | ||||
import functools | |||||
import logging | import logging | ||||
import os.path | import os.path | ||||
import shutil | import shutil | ||||
@@ -10,6 +11,14 @@ import subprocess | |||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
class GitIndex(codearchiver.core.Index): | |||||
fields = [ | |||||
codearchiver.core.IndexField(key = 'Ref', required = True, repeatable = True), | |||||
codearchiver.core.IndexField(key = 'Root commit', required = True, repeatable = True), | |||||
codearchiver.core.IndexField(key = 'Commit', required = True, repeatable = True), | |||||
] | |||||
class Git(codearchiver.core.Module): | class Git(codearchiver.core.Module): | ||||
name = 'git' | name = 'git' | ||||
@@ -51,10 +60,22 @@ class Git(codearchiver.core.Module): | |||||
logger.info(f'Bundling into {bundle}') | logger.info(f'Bundling into {bundle}') | ||||
codearchiver.subprocess.run_with_log(['git', 'bundle', 'create', '--progress', f'../{bundle}', '--all'], cwd = directory) | codearchiver.subprocess.run_with_log(['git', 'bundle', 'create', '--progress', f'../{bundle}', '--all'], cwd = directory) | ||||
logger.info(f'Collecting repository metadata for index') | |||||
_, refs = codearchiver.subprocess.run_with_log(['git', 'show-ref'], cwd = directory) | |||||
_, commits = codearchiver.subprocess.run_with_log(['git', 'log', '--reflog', '--format=format:%H% P'], cwd = directory) | |||||
logger.info(f'Removing clone') | logger.info(f'Removing clone') | ||||
shutil.rmtree(directory) | shutil.rmtree(directory) | ||||
return codearchiver.core.Result(id = self._id, files = [bundle]) | |||||
index = GitIndex() | |||||
for line in refs.splitlines(): | |||||
index.append('Ref', line) | |||||
for commitHash, *parents in map(functools.partial(str.split, sep = ' '), commits.splitlines()): | |||||
index.append('Commit', commitHash) | |||||
if not parents: | |||||
index.append('Root commit', commitHash) | |||||
return codearchiver.core.Result(id = self._id, files = [(bundle, index)]) | |||||
def __repr__(self): | def __repr__(self): | ||||
return f'{type(self).__module__}.{type(self).__name__}({self._inputUrl!r}, extraBranches = {self._extraBranches!r})' | return f'{type(self).__module__}.{type(self).__name__}({self._inputUrl!r}, extraBranches = {self._extraBranches!r})' |
@@ -12,13 +12,13 @@ _logger = logging.getLogger(__name__) | |||||
class Storage(abc.ABC): | class Storage(abc.ABC): | ||||
@abc.abstractmethod | @abc.abstractmethod | ||||
def put(self, filename: str): | |||||
'''Put a local file into storage. If an error occurs, a partial copy may remain in storage. If it completes, the local input file is removed.''' | |||||
def put(self, filename: str, index: typing.Optional[codearchiver.core.Index] = None): | |||||
'''Put a local file and (if provided) its index into storage. If an error occurs, a partial copy may remain in storage. If it completes, the local input file is removed.''' | |||||
def put_result(self, result: codearchiver.core.Result): | def put_result(self, result: codearchiver.core.Result): | ||||
'''Put a module's Result into storage. The semantics are as for `put`, and the exact behaviour regarding partial copies and leftover files on errors is undefined.''' | '''Put a module's Result into storage. The semantics are as for `put`, and the exact behaviour regarding partial copies and leftover files on errors is undefined.''' | ||||
for fn in result.files: | |||||
self.put(fn) | |||||
for fn, index in result.files: | |||||
self.put(fn, index) | |||||
for _, subresult in result.submoduleResults: | for _, subresult in result.submoduleResults: | ||||
self.put_result(subresult) | self.put_result(subresult) | ||||
@@ -43,10 +43,16 @@ class DirectoryStorage(Storage): | |||||
if not self._check_directory(): | if not self._check_directory(): | ||||
os.makedirs(self._directory) | os.makedirs(self._directory) | ||||
def put(self, filename): | |||||
def put(self, filename, index = None): | |||||
self._ensure_directory() | self._ensure_directory() | ||||
_logger.info(f'Moving {filename} to {self._directory}') | _logger.info(f'Moving {filename} to {self._directory}') | ||||
shutil.move(filename, self._directory) | shutil.move(filename, self._directory) | ||||
if not index: | |||||
return | |||||
indexFilename = os.path.join(self._directory, f'{filename}.codearchiver-index') | |||||
_logger.info(f'Writing index for {filename} to {indexFilename}') | |||||
with open(indexFilename, 'x') as fp: | |||||
fp.write(index.serialise()) | |||||
@contextlib.contextmanager | @contextlib.contextmanager | ||||
def open(self, filename): | def open(self, filename): | ||||