From 240dcceb101f624acdfeb168d2e62d88e0341a41 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Sat, 4 Mar 2023 21:44:51 +0000 Subject: [PATCH] Add subprocess wrapper for logging stderr --- codearchiver/modules/git.py | 9 +++-- codearchiver/subprocess.py | 73 +++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 4 deletions(-) create mode 100644 codearchiver/subprocess.py diff --git a/codearchiver/modules/git.py b/codearchiver/modules/git.py index b50f6de..7223955 100644 --- a/codearchiver/modules/git.py +++ b/codearchiver/modules/git.py @@ -1,4 +1,5 @@ import codearchiver.core +import codearchiver.subprocess import datetime import logging import os.path @@ -34,21 +35,21 @@ class Git(codearchiver.core.Module): return logger.info(f'Cloning {self._url} into {directory}') - subprocess.run(['git', 'clone', '--verbose', '--mirror', self._url, directory], check = True) + codearchiver.subprocess.run_with_log(['git', 'clone', '--verbose', '--mirror', self._url, directory]) if self._extraBranches: for branch, commit in self._extraBranches.items(): logger.info(f'Fetching commit {commit} as {branch}') - r = subprocess.run(['git', 'fetch', '--verbose', 'origin', commit], cwd = directory) + r = codearchiver.subprocess.run_with_log(['git', 'fetch', '--verbose', '--progress', 'origin', commit], cwd = directory, check = False) if r.returncode == 0: - r2 = subprocess.run(['git', 'update-ref', f'refs/codearchiver/{branch}', commit, ''], cwd = directory) + r2 = codearchiver.subprocess.run_with_log(['git', 'update-ref', f'refs/codearchiver/{branch}', commit, ''], cwd = directory, check = False) if r2.returncode != 0: logger.error(f'Failed to update-ref refs/codearchiver/{branch} to {commit}') else: logger.error(f'Failed to fetch {commit}') logger.info(f'Bundling into {bundle}') - subprocess.run(['git', 'bundle', 'create', f'../{bundle}', '--all'], cwd = directory, check = True) + codearchiver.subprocess.run_with_log(['git', 'bundle', 'create', '--progress', f'../{bundle}', '--all'], cwd = directory) logger.info(f'Removing clone') shutil.rmtree(directory) diff --git a/codearchiver/subprocess.py b/codearchiver/subprocess.py new file mode 100644 index 0000000..73d9339 --- /dev/null +++ b/codearchiver/subprocess.py @@ -0,0 +1,73 @@ +import logging +import os +import select +import selectors +import subprocess + + +_logger = logging.getLogger(__name__) + + +def run_with_log(args, *, check = True, input = None, **kwargs): + ''' + Run a command using `subprocess.Popen(args, **kwargs)` and log all its stderr output via `logging`. + `check` has the same semantics as on `subprocess.run`, i.e. raises an exception if the process exits non-zero. + `input`, if specified, is a `bytes` object that is fed to the subprocess via stdin. + `stdin`, `stdout`, and `stderr` kwargs must not be used. + Returns a tuple with the process's exit status and its stdout output. + ''' + badKwargs = {'stdin', 'stdout', 'stderr'}.intersection(set(kwargs)) + if badKwargs: + raise ValueError(f'Disallowed kwargs: {", ".join(sorted(badKwargs))}') + _logger.info(f'Running subprocess: {args!r}') + if input: + kwargs['stdin'] = subprocess.PIPE + p = subprocess.Popen(args, **kwargs, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + sel = selectors.DefaultSelector() + if input: + sel.register(p.stdin, selectors.EVENT_WRITE) + sel.register(p.stdout, selectors.EVENT_READ) + sel.register(p.stderr, selectors.EVENT_READ) + stdout = [] + stderrBuf = b'' + if input: + stdinView = memoryview(input) + stdinOffset = 0 + PIPE_BUF = getattr(select, 'PIPE_BUF', 512) + while sel.get_map(): + for key, _ in sel.select(): + if key.fileobj is p.stdin: + try: + stdinOffset += os.write(key.fd, stdinView[stdinOffset : stdinOffset + PIPE_BUF]) + except BrokenPipeError: + sel.unregister(key.fileobj) + key.fileobj.close() + else: + if stdinOffset >= len(input): + sel.unregister(key.fileobj) + key.fileobj.close() + else: + data = key.fileobj.read1() + if not data: + sel.unregister(key.fileobj) + key.fileobj.close() + continue + if key.fileobj is p.stderr: + stderrBuf += data + *lines, stderrBuf = stderrBuf.replace(b'\r', b'\n').rsplit(b'\n', 1) + if not lines: + continue + lines = lines[0].decode('utf-8').split('\n') + for line in lines: + _logger.info(line) + else: + stdout.append(data) + if stderrBuf: + _logger.info(stderrBuf.decode('utf-8')) + assert p.poll() is not None + if input and stdinOffset < len(input): + _logger.warning(f'Could not write all input to the stdin pipe (wanted to write {len(input)} bytes, only wrote {stdinOffset})') + _logger.info(f'Process exited with status {p.returncode}') + if check and p.returncode != 0: + raise subprocess.CalledProcessError(returncode = p.returncode, cmd = args) + return (p.returncode, b''.join(stdout).decode('utf-8'))