@@ -8,6 +8,7 @@ Configuration happens via environment variables: | |||||
* `IA_S3_ACCESS` and `IA_S3_SECRET`: authentication for IA | * `IA_S3_ACCESS` and `IA_S3_SECRET`: authentication for IA | ||||
* `CODEARCHIVER_BOT_TEST` (optional): enables test mode when set to any non-empty value, with uploads going into items prefixed with `test_` and placed into `test_collection`. | * `CODEARCHIVER_BOT_TEST` (optional): enables test mode when set to any non-empty value, with uploads going into items prefixed with `test_` and placed into `test_collection`. | ||||
* `CODEARCHIVER_BOT_TIMEOUT` (optional): number of seconds how long a `codearchiver` command may run. Default: unlimited (0) | * `CODEARCHIVER_BOT_TIMEOUT` (optional): number of seconds how long a `codearchiver` command may run. Default: unlimited (0) | ||||
* `CODEARCHIVER_BOT_NPROC` (optional): number of parallel `codearchiver` processes. Default: 1 | |||||
The data produced by `codearchiver-bot` must be kept in its working directory for correct deduplication. However, there's nothing unique there; all data is uploaded to IA continuously, and operation can be restored from there by downloading all `*_codearchiver_metadata.txt` files and creating placeholders (e.g. symlinks to `.uploaded` as the script does by default) for everything else. | The data produced by `codearchiver-bot` must be kept in its working directory for correct deduplication. However, there's nothing unique there; all data is uploaded to IA continuously, and operation can be restored from there by downloading all `*_codearchiver_metadata.txt` files and creating placeholders (e.g. symlinks to `.uploaded` as the script does by default) for everything else. | ||||
@@ -11,6 +11,8 @@ done | |||||
# Optional env variables | # Optional env variables | ||||
declare -i timeout="${CODEARCHIVER_BOT_TIMEOUT:-0}" | declare -i timeout="${CODEARCHIVER_BOT_TIMEOUT:-0}" | ||||
declare -i nproclimit="${CODEARCHIVER_BOT_NPROC:-1}" | |||||
declare -i nproc=0 | |||||
for dep in awk codearchiver curl ia-upload-stream python3 sha256sum tee zstd; do | for dep in awk codearchiver curl ia-upload-stream python3 sha256sum tee zstd; do | ||||
if ! command -v "${dep}" &>/dev/null; then | if ! command -v "${dep}" &>/dev/null; then | ||||
@@ -136,6 +138,13 @@ function taint_block { | |||||
continue | continue | ||||
fi | fi | ||||
# Block until there's a free slot | |||||
while [[ "${nproc}" -ge "${nproclimit}" ]]; do | |||||
# Wait for one subshell to exit | |||||
wait -n | |||||
nproc+=-1 | |||||
done | |||||
taint_block 'continuing with work loop' | taint_block 'continuing with work loop' | ||||
# Find nonexistent filename for log file with lock | # Find nonexistent filename for log file with lock | ||||
@@ -216,7 +225,7 @@ function taint_block { | |||||
fi | fi | ||||
fi | fi | ||||
) & | ) & | ||||
wait | |||||
nproc+=1 | |||||
done | | done | | ||||
# Upload | # Upload | ||||