@@ -43,6 +43,16 @@ function respond {
send "${nick}: ${message}"
send "${nick}: ${message}"
}
}
function taint_block {
message="Storage is tainted, not ${1}"
if [[ -e '.tainted' ]]; then
log "${message}"
while [[ -e '.tainted' ]]; do
sleep 1
done
fi
}
{ # Group the pipeline without requiring a backslash every time
{ # Group the pipeline without requiring a backslash every time
while :; do
while :; do
# Read from http2irc
# Read from http2irc
@@ -126,6 +136,8 @@ function respond {
continue
continue
fi
fi
taint_block 'continuing with work loop'
# Find nonexistent filename for log file with lock
# Find nonexistent filename for log file with lock
# mkdir is pretty much always atomic, creating files might not be depending on the underlying file system (e.g. networked ones like NFS).
# mkdir is pretty much always atomic, creating files might not be depending on the underlying file system (e.g. networked ones like NFS).
while ! mkdir '.loglock' 2> >(log_loop 'mkdir loglock (work) err: '); do
while ! mkdir '.loglock' 2> >(log_loop 'mkdir loglock (work) err: '); do
@@ -152,6 +164,8 @@ function respond {
# Produces lines of filenames to upload on stdout
# Produces lines of filenames to upload on stdout
log "Running ${url} (${singlejobid}), logging into ${logname}"
log "Running ${url} (${singlejobid}), logging into ${logname}"
(
(
taint_block "launching job ${singlejobid}"
timeout --signal=INT "${timeout}" \
timeout --signal=INT "${timeout}" \
codearchiver --verbose --write-artefacts-fd-3 "${url}" \
codearchiver --verbose --write-artefacts-fd-3 "${url}" \
> >(log_loop "codearchiver ${singlejobid} out: ") \
> >(log_loop "codearchiver ${singlejobid} out: ") \
@@ -170,14 +184,18 @@ function respond {
logname="${logname}.zst"
logname="${logname}.zst"
fi
fi
# Move everything but the log file to ./failed/ if codearchiver exited non-zero
# Verify that there are no artefacts if codearchiver exited non-zero
# Since codearchiver handles errors internally normally, this should not usually happen, but it could occur e.g. if running out of disk space and leaving partial files in the storage.
# With parallelism, this could in theory lead to artefacts of a successful run depending on artefacts from a failed run, which we wouldn't want.
# So, if there are artefacts of a failed process, touch the .tainted file to stop the uploader and new processes starting and send a warning to IRC.
# Emit the log filename for upload always (even on tainted storage), artefacts list and artefacts only on zero exit.
readarray -t artefacts <"${artefactsname}"
readarray -t artefacts <"${artefactsname}"
if [[ "${status}" -ne 0 ]]; then
msg="$(printf 'Moving artefact files'; printf ' %q' "${artefacts[@]}" "${artefactsname}"; printf ' from non-zero exit for job %s to ./failed/\n' "${singlejobid}";)"
if [[ "${status}" -ne 0 && "${#artefacts[@]}" -ne 0 ]]; then
touch '.tainted'
send "Job ${singlejobid} exited non-zero but left artefacts behind!"
msg="$(printf 'Artefact files by non-zero exit process: '; printf ' %q' "${artefacts[@]}")"
log "${msg}"
log "${msg}"
mkdir --parents ./failed/
mv --verbose -- "${artefacts[@]}" "${artefactsname}" ./failed/ 2> >(log_loop 'mv err: ') | log_loop 'mv out: '
else
elif [[ "${status}" -eq 0 ]]; then
for file in "${artefacts[@]}"; do
for file in "${artefacts[@]}"; do
printf '%s\n' "${file}"
printf '%s\n' "${file}"
done
done
@@ -217,6 +235,8 @@ function respond {
# Record SHA-256 hashes for new files
# Record SHA-256 hashes for new files
sha256sum "${filenames[@]}" > >(log_loop 'sha256sum: ')
sha256sum "${filenames[@]}" > >(log_loop 'sha256sum: ')
taint_block 'uploading anything'
# Upload
# Upload
date="$(date '+%Y-%m-%d')"
date="$(date '+%Y-%m-%d')"
identifier="codearchiver_${date//-/}"
identifier="codearchiver_${date//-/}"
@@ -228,6 +248,8 @@ function respond {
fi
fi
uploadsfine=y
uploadsfine=y
for f in "${filenames[@]}"; do
for f in "${filenames[@]}"; do
taint_block "starting upload for $(printf '%q' "${f}")"
log "Uploading $(printf '%q' "${f}") to ${identifier}"
log "Uploading $(printf '%q' "${f}") to ${identifier}"
ia-upload-stream --no-derive "${identifier}" "${f}" \
ia-upload-stream --no-derive "${identifier}" "${f}" \
"collection:${collection}" \
"collection:${collection}" \
@@ -258,6 +280,8 @@ function respond {
sleep 60
sleep 60
done
done
taint_block 'removing any files after upload'
# Replace non-metadata files with a symlink to .uploaded dummy file
# Replace non-metadata files with a symlink to .uploaded dummy file
# No locking with codearchiver processes is necessary because those will only read metadata (which is left alone) or write files.
# No locking with codearchiver processes is necessary because those will only read metadata (which is left alone) or write files.
# However, a lock with the log filename finding is required.
# However, a lock with the log filename finding is required.