From 550afa86441800115f434a98f1f7df4c56dc04eb Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sun, 5 Mar 2023 02:58:02 +0000 Subject: [PATCH] Add storage abstraction --- codearchiver/cli.py | 15 +++++++++- codearchiver/storage.py | 66 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 codearchiver/storage.py diff --git a/codearchiver/cli.py b/codearchiver/cli.py index 2114583..75e8375 100644 --- a/codearchiver/cli.py +++ b/codearchiver/cli.py @@ -3,10 +3,12 @@ import contextlib import datetime import inspect import logging +import os import requests.models # Imported in parse_args() and main() after setting up the logger: #import codearchiver.core #import codearchiver.modules +#import codearchiver.storage #import codearchiver.version import tempfile @@ -182,13 +184,24 @@ def main(): setup_logging() args = parse_args() configure_logging(args.verbosity, args.dumpLocals) + _logger = logging.getLogger(__name__) import codearchiver.core import codearchiver.modules + import codearchiver.storage with _dump_locals_on_exception(): inputUrl = codearchiver.core.InputURL(args.url) module = codearchiver.core.get_module_instance(inputUrl) - module.process() + storage = codearchiver.storage.DirectoryStorage(os.getcwd()) + with tempfile.TemporaryDirectory(prefix = 'tmp.codearchiver.', dir = os.getcwd()) as td: + _logger.debug(f'Running in {td}') + os.chdir(td) + try: + result = module.process() + storage.queue_result(result) + storage.put_queue() + finally: + os.chdir('..') if __name__ == '__main__': main() diff --git a/codearchiver/storage.py b/codearchiver/storage.py new file mode 100644 index 0000000..20fb045 --- /dev/null +++ b/codearchiver/storage.py @@ -0,0 +1,66 @@ +import abc +import codearchiver.core +import contextlib +import logging +import os.path +import shutil +import typing + + +_logger = logging.getLogger(__name__) + + +class Storage(abc.ABC): + def __init__(self): + self._queue = [] + + def queue(self, filename: str): + '''Queue a local file for putting into storage. Note that nothing is written to storage until `put_queue` is called.''' + _logger.debug(f'Queueing {filename}') + self._queue.append(filename) + + def queue_result(self, result: codearchiver.core.Result): + for fn in result.files: + self.queue(fn) + for _, subresult in result.submoduleResults: + self.queue_result(subresult) + + @property + def queued_files(self) -> typing.List[str]: + return self._queue[:] + + @abc.abstractmethod + def put_queue(self): + '''Put all queued files into storage. If an error occurs, partial copies may remain in storage. If it completes, the local input copy is removed.''' + + @abc.abstractmethod + @contextlib.contextmanager + def open(self, filename: str) -> typing.Iterator[typing.BinaryIO]: + '''Open a file from storage.''' + + +class DirectoryStorage(Storage): + def __init__(self, directory): + super().__init__() + self._directory = directory + + def _check_directory(self): + exists = os.path.exists(self._directory) + if exists and not os.path.isdir(self._directory): + raise NotADirectoryError(self._directory) + return exists + + def _ensure_directory(self): + if not self._check_directory(): + os.makedirs(self._directory) + + def put_queue(self): + self._ensure_directory() + for fn in self.queued_files: + _logger.info(f'Moving {fn} to {self._directory}') + shutil.move(fn, self._directory) + + @contextlib.contextmanager + def open(self, filename): + with open(filename, 'rb') as fp: + yield fp