Bläddra i källkod

Add undocumented --write-artefacts-fd-3 for codearchiver-bot

tags/v1.1
JustAnotherArchivist 1 år sedan
förälder
incheckning
9fa36654f5
2 ändrade filer med 34 tillägg och 0 borttagningar
  1. +7
    -0
      codearchiver/cli.py
  2. +27
    -0
      codearchiver/storage.py

+ 7
- 0
codearchiver/cli.py Visa fil

@@ -143,6 +143,8 @@ def parse_args():
parser.add_argument('--version', action = 'version', version = f'codearchiver {codearchiver.version.__version__}')
parser.add_argument('-v', '--verbose', '--verbosity', dest = 'verbosity', action = 'count', default = 0, help = 'Increase output verbosity')
parser.add_argument('--dump-locals', dest = 'dumpLocals', action = 'store_true', default = False, help = 'Dump local variables on serious log messages (warnings or higher)')
# Undocumented option to write one line for each artefact filename produced by this process to FD 3.
parser.add_argument('--write-artefacts-fd-3', dest = 'writeArtefactsFd3', action = 'store_true', help = argparse.SUPPRESS)
parser.add_argument('url', help = 'Target URL')
args = parser.parse_args()
return args
@@ -190,6 +192,8 @@ def main():
import codearchiver.storage
with _dump_locals_on_exception():
inputUrl = codearchiver.core.InputURL(args.url)
if args.writeArtefactsFd3:
artefactsFd = os.fdopen(3, 'w')
storage = codearchiver.storage.DirectoryStorage(os.getcwd())
module = codearchiver.core.get_module_instance(inputUrl, storage = storage)
with tempfile.TemporaryDirectory(prefix = 'tmp.codearchiver.', dir = os.getcwd()) as td:
@@ -199,6 +203,9 @@ def main():
result = module.process()
finally:
os.chdir('..')
if args.writeArtefactsFd3:
for filename in storage.newFiles:
print(filename, file = artefactsFd)

if __name__ == '__main__':
main()

+ 27
- 0
codearchiver/storage.py Visa fil

@@ -13,6 +13,12 @@ _logger = logging.getLogger(__name__)


class Storage(abc.ABC):
'''
Interface for storage backing the codearchiver collection
This serves primarily to aid deduplication by locating prior archives of the same or closely related repositories.
Filenames must not contain LF.
'''

@abc.abstractmethod
def put(self, filename: str, metadata: typing.Optional['codearchiver.core.Metadata'] = None):
'''Put a local file and (if provided) its metadata into storage. If an error occurs, a partial copy may remain in storage. If it completes, the local input file is removed.'''
@@ -24,6 +30,15 @@ class Storage(abc.ABC):
for _, subresult in result.submoduleResults:
self.put_result(subresult)

@property
@abc.abstractmethod
def newFiles(self) -> list[str]:
'''
List of all files that have been `.put()` on this instance.
This may include additional files for storing metadata.
'''
# The return value must be a copy of the state.

@abc.abstractmethod
def search_metadata(self, criteria: list[tuple[str, typing.Union[str, tuple[str]]]]) -> collections.abc.Iterator[str]:
'''
@@ -47,6 +62,7 @@ class DirectoryStorage(Storage):
def __init__(self, directory):
super().__init__()
self._directory = directory
self._newFiles = []

def _check_directory(self):
exists = os.path.exists(self._directory)
@@ -60,11 +76,14 @@ class DirectoryStorage(Storage):

def put(self, filename, metadata = None):
self._ensure_directory()
if '\n' in filename:
raise ValueError(fr'filenames cannot contain \n: {filename!r}')
#FIXME: Race condition
if os.path.exists((targetFilename := os.path.join(self._directory, os.path.basename(filename)))):
raise FileExistsError(f'{targetFilename} already exists')
_logger.info(f'Moving {filename} to {self._directory}')
shutil.move(filename, self._directory)
self._newFiles.append(filename)
if not metadata:
return
metadataFilename = os.path.join(self._directory, f'{filename}_codearchiver_metadata.txt')
@@ -72,6 +91,11 @@ class DirectoryStorage(Storage):
_logger.info(f'Writing metadata for {filename} to {metadataFilename}')
with open(metadataFilename, 'x') as fp:
fp.write(metadata.serialise())
self._newFiles.append(metadataFilename)

@property
def newFiles(self):
return self._newFiles.copy()

def search_metadata(self, criteria):
_logger.info(f'Searching metadata by criteria: {criteria!r}')
@@ -82,6 +106,7 @@ class DirectoryStorage(Storage):
files.sort()
for metadataFilename in files:
metadataFilename = metadataFilename[escapedDirPrefixLen:]
assert '\n' not in metadataFilename
_logger.info(f'Searching metadata {metadataFilename}')
with self.open(metadataFilename, 'r') as fp:
idx = codearchiver.core.Metadata.deserialise(fp, validate = False)
@@ -97,5 +122,7 @@ class DirectoryStorage(Storage):

@contextlib.contextmanager
def open(self, filename, mode = 'rb'):
if '\n' in filename:
raise ValueError(fr'filenames cannot contain \n: {filename!r}')
with open(os.path.join(self._directory, filename), mode) as fp:
yield fp

Laddar…
Avbryt
Spara