diff --git a/README.md b/README.md index 25bc655..e15fba8 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ Configuration happens via environment variables: * `HTTP2IRC_GET_URL` and `HTTP2IRC_POST_URL`: GET/POST URLs for IRC channel interaction * `IA_S3_ACCESS` and `IA_S3_SECRET`: authentication for IA * `CODEARCHIVER_BOT_TEST` (optional): enables test mode when set to any non-empty value, with uploads going into items prefixed with `test_` and placed into `test_collection`. +* `CODEARCHIVER_BOT_TIMEOUT` (optional): number of seconds how long a `codearchiver` command may run. Default: unlimited (0) The data produced by `codearchiver-bot` must be kept in its working directory for correct deduplication. However, there's nothing unique there; all data is uploaded to IA continuously, and operation can be restored from there by downloading all `*_codearchiver_metadata.txt` files and creating placeholders (e.g. symlinks to `.uploaded` as the script does by default) for everything else. diff --git a/codearchiver-bot b/codearchiver-bot index bf04360..9da9603 100755 --- a/codearchiver-bot +++ b/codearchiver-bot @@ -9,6 +9,9 @@ for envvar in "${envvars[@]}"; do fi done +# Optional env variables +declare -i timeout="${CODEARCHIVER_BOT_TIMEOUT:-0}" + for dep in awk codearchiver curl ia-upload-stream python3 sha256sum tee zstd; do if ! command -v "${dep}" &>/dev/null; then printf 'Error: %s not found\n' "${dep}" >&2 @@ -155,7 +158,8 @@ function respond { # Run codearchiver, duplicating WARNINGs and higher in the bot output log "Running ${url} (${singlejobid}), logging into ${logname}" - codearchiver --verbose --write-artefacts-fd-3 "${url}" \ + timeout --signal=INT "${timeout}" \ + codearchiver --verbose --write-artefacts-fd-3 "${url}" \ 2> >(tee "${logname}" | grep -Fv -e ' INFO ' | log_loop "From codearchiver ${singlejobid}: ") \ 3> >(tee "${artefactsname}" | log_loop "New artefacts from codearchiver ${singlejobid}: ") status="$?"