Browse Source

Taint storage if any codearchiver process exited non-zero as it might cause parallel processes to rely on broken data

master
JustAnotherArchivist 1 year ago
parent
commit
6fd569a577
1 changed files with 30 additions and 6 deletions
  1. +30
    -6
      codearchiver-bot

+ 30
- 6
codearchiver-bot View File

@@ -43,6 +43,16 @@ function respond {
send "${nick}: ${message}"
}

function taint_block {
message="Storage is tainted, not ${1}"
if [[ -e '.tainted' ]]; then
log "${message}"
while [[ -e '.tainted' ]]; do
sleep 1
done
fi
}

{ # Group the pipeline without requiring a backslash every time
while :; do
# Read from http2irc
@@ -126,6 +136,8 @@ function respond {
continue
fi

taint_block 'continuing with work loop'

# Find nonexistent filename for log file with lock
# mkdir is pretty much always atomic, creating files might not be depending on the underlying file system (e.g. networked ones like NFS).
while ! mkdir '.loglock' 2> >(log_loop 'mkdir loglock (work) err: '); do
@@ -152,6 +164,8 @@ function respond {
# Produces lines of filenames to upload on stdout
log "Running ${url} (${singlejobid}), logging into ${logname}"
(
taint_block "launching job ${singlejobid}"

timeout --signal=INT "${timeout}" \
codearchiver --verbose --write-artefacts-fd-3 "${url}" \
> >(log_loop "codearchiver ${singlejobid} out: ") \
@@ -170,14 +184,18 @@ function respond {
logname="${logname}.zst"
fi

# Move everything but the log file to ./failed/ if codearchiver exited non-zero
# Verify that there are no artefacts if codearchiver exited non-zero
# Since codearchiver handles errors internally normally, this should not usually happen, but it could occur e.g. if running out of disk space and leaving partial files in the storage.
# With parallelism, this could in theory lead to artefacts of a successful run depending on artefacts from a failed run, which we wouldn't want.
# So, if there are artefacts of a failed process, touch the .tainted file to stop the uploader and new processes starting and send a warning to IRC.
# Emit the log filename for upload always (even on tainted storage), artefacts list and artefacts only on zero exit.
readarray -t artefacts <"${artefactsname}"
if [[ "${status}" -ne 0 ]]; then
msg="$(printf 'Moving artefact files'; printf ' %q' "${artefacts[@]}" "${artefactsname}"; printf ' from non-zero exit for job %s to ./failed/\n' "${singlejobid}";)"
if [[ "${status}" -ne 0 && "${#artefacts[@]}" -ne 0 ]]; then
touch '.tainted'
send "Job ${singlejobid} exited non-zero but left artefacts behind!"
msg="$(printf 'Artefact files by non-zero exit process: '; printf ' %q' "${artefacts[@]}")"
log "${msg}"
mkdir --parents ./failed/
mv --verbose -- "${artefacts[@]}" "${artefactsname}" ./failed/ 2> >(log_loop 'mv err: ') | log_loop 'mv out: '
else
elif [[ "${status}" -eq 0 ]]; then
for file in "${artefacts[@]}"; do
printf '%s\n' "${file}"
done
@@ -217,6 +235,8 @@ function respond {
# Record SHA-256 hashes for new files
sha256sum "${filenames[@]}" > >(log_loop 'sha256sum: ')

taint_block 'uploading anything'

# Upload
date="$(date '+%Y-%m-%d')"
identifier="codearchiver_${date//-/}"
@@ -228,6 +248,8 @@ function respond {
fi
uploadsfine=y
for f in "${filenames[@]}"; do
taint_block "starting upload for $(printf '%q' "${f}")"

log "Uploading $(printf '%q' "${f}") to ${identifier}"
ia-upload-stream --no-derive "${identifier}" "${f}" \
"collection:${collection}" \
@@ -258,6 +280,8 @@ function respond {
sleep 60
done

taint_block 'removing any files after upload'

# Replace non-metadata files with a symlink to .uploaded dummy file
# No locking with codearchiver processes is necessary because those will only read metadata (which is left alone) or write files.
# However, a lock with the log filename finding is required.


Loading…
Cancel
Save