From 7ee94bb579a0e09cc43d37943a92618e209dbacc Mon Sep 17 00:00:00 2001 From: Katie Holly Date: Sat, 4 Apr 2020 11:25:53 +0200 Subject: [PATCH] properly quote all variables --- chunk-multiple | 10 ++++----- chunker | 30 +++++++++++++-------------- docker-boot.sh | 2 +- du-all | 14 ++++++------- offload-multiple | 10 ++++----- offload-one | 48 +++++++++++++++++++++--------------------- pack-multiple | 10 ++++----- pack-one | 54 ++++++++++++++++++++++++------------------------ upload-multiple | 10 ++++----- upload-one | 50 ++++++++++++++++++++++---------------------- 10 files changed, 119 insertions(+), 119 deletions(-) mode change 100644 => 100755 docker-boot.sh diff --git a/chunk-multiple b/chunk-multiple index 80b23a3..5512e48 100755 --- a/chunk-multiple +++ b/chunk-multiple @@ -6,13 +6,13 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" while [[ -f RUN ]] do date - $SCRIPT_DIR/chunker - result=$? - if [[ $result -ne 0 ]] + "${SCRIPT_DIR}/chunker" + result="${?}" + if [[ "${result}" -ne 0 ]] then date - echo "chunker exited with $result" - exit $result + echo "chunker exited with ${result}" + exit "${result}" fi echo "Sleeping..." diff --git a/chunker b/chunker index 0c4ec01..85f9b9c 100755 --- a/chunker +++ b/chunker @@ -8,50 +8,50 @@ # can be moved somewhere else. Remember this when running Rsync. # -INCOMING_UPLOADS_DIR=$1 # /home/archiveteam/uploads -CHUNKER_WORKING_DIR=$2 # /home/archiveteam/processed -PACKING_QUEUE_DIR="$CHUNKER_WORKING_DIR/archive" +INCOMING_UPLOADS_DIR="${1}" # /home/archiveteam/uploads +CHUNKER_WORKING_DIR="${2}" # /home/archiveteam/processed +PACKING_QUEUE_DIR="${CHUNKER_WORKING_DIR}/archive" MEGABYTES_PER_CHUNK=$((1024*25)) # if not specified in command-line arguments -if [ -z $INCOMING_UPLOADS_DIR ] +if [ -z "${INCOMING_UPLOADS_DIR}" ] then source ./config.sh || exit 1 fi BYTES_PER_CHUNK=$((1024*1024*MEGABYTES_PER_CHUNK)) -mkdir -p "$CHUNKER_WORKING_DIR" || exit 1 -mkdir -p "$PACKING_QUEUE_DIR" || exit 1 +mkdir -p "${CHUNKER_WORKING_DIR}" || exit 1 +mkdir -p "${PACKING_QUEUE_DIR}" || exit 1 -mkdir -p "$CHUNKER_WORKING_DIR/current" || exit 1 -cur_size=$( du -B1 -s "$CHUNKER_WORKING_DIR/current" | grep -oE "^[0-9]+" ) +mkdir -p "${CHUNKER_WORKING_DIR}/current" || exit 1 +cur_size=$( du -B1 -s "${CHUNKER_WORKING_DIR}/current" | grep -oE "^[0-9]+" ) # find every .warc.gz in the upload directory -find "$INCOMING_UPLOADS_DIR" -type f -regex ".+\.warc\.\(gz\|zst\)$" \ +find "${INCOMING_UPLOADS_DIR}" -type f -regex ".+\.warc\.\(gz\|zst\)$" \ | while read filename do # skip partial uploads - if [[ $filename =~ rsync-tmp ]] + if [[ "${filename}" =~ rsync-tmp ]] then continue fi - cur_size=$((cur_size + $( du -B1 -s $filename | grep -oE "^[0-9]+" ))) + cur_size=$((cur_size + $( du -B1 -s "${filename}" | grep -oE "^[0-9]+" ))) # move to the current/ directory echo "Moving ${filename}" - mkdir -p "$CHUNKER_WORKING_DIR/current" - mv "${filename}" "$CHUNKER_WORKING_DIR/current/" + mkdir -p "${CHUNKER_WORKING_DIR}/current" + mv "${filename}" "${CHUNKER_WORKING_DIR}/current/" # if the current/ directory is large enough, # rename it to archive-XXXXX and start a new current/ - if [[ $cur_size -gt $BYTES_PER_CHUNK ]] + if [[ "${cur_size}" -gt "${BYTES_PER_CHUNK}" ]] then timestamp=$( date +'%Y%m%d%H%M%S' ) uuid=$(cat /proc/sys/kernel/random/uuid | cut -d- -f1) echo "Current archive is full, moving to ${timestamp}_${uuid}." - mv "$CHUNKER_WORKING_DIR/current" "$PACKING_QUEUE_DIR/${timestamp}_${uuid}" + mv "${CHUNKER_WORKING_DIR}/current" "${PACKING_QUEUE_DIR}/${timestamp}_${uuid}" cur_size=0 sleep 3 fi diff --git a/docker-boot.sh b/docker-boot.sh old mode 100644 new mode 100755 index 05fbb58..f3acfb2 --- a/docker-boot.sh +++ b/docker-boot.sh @@ -18,7 +18,7 @@ IA_COLLECTION="${IA_COLLECTION}" IA_ITEM_TITLE="${IA_ITEM_TITLE}" IA_ITEM_PREFIX="${IA_ITEM_PREFIX}" FILE_PREFIX="${FILE_PREFIX}" -IA_ITEM_DATE=${IA_ITEM_DATE_LIT} +IA_ITEM_DATE="${IA_ITEM_DATE_LIT}" OFFLOAD_TARGET="${OFFLOAD_TARGET}" ZST_DICTIONARY_API="${ZST_DICTIONARY_API}" INCOMING_UPLOADS_DIR="/data/incoming" diff --git a/du-all b/du-all index 37bc64c..2c94605 100755 --- a/du-all +++ b/du-all @@ -3,12 +3,12 @@ source ./config.sh || exit 1 du -hs \ - $INCOMING_UPLOADS_DIR \ - $CHUNKER_WORKING_DIR \ - $PACKING_QUEUE_DIR/* \ - $PACKER_WORKING_CHUNKS_DIR/* \ - $PACKER_WORKING_MEGAWARC_DIR/* \ - $UPLOAD_QUEUE_DIR/* \ - $UPLOADER_WORKING_DIR/* \ + "${INCOMING_UPLOADS_DIR}" \ + "${CHUNKER_WORKING_DIR}" \ + "${PACKING_QUEUE_DIR}/"* \ + "${PACKER_WORKING_CHUNKS_DIR}/"* \ + "${PACKER_WORKING_MEGAWARC_DIR}/"* \ + "${UPLOAD_QUEUE_DIR}/"* \ + "${UPLOADER_WORKING_DIR}/"* \ 2> >(grep -v 'du: cannot \(access\|read\)' >&2) diff --git a/offload-multiple b/offload-multiple index 43873d6..a7b61ec 100755 --- a/offload-multiple +++ b/offload-multiple @@ -5,13 +5,13 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" while [[ -f RUN ]] do - $SCRIPT_DIR/offload-one - result=$? - if [[ $result -ne 0 ]] + "${SCRIPT_DIR}/offload-one" + result="${?}" + if [[ "${result}" -ne 0 ]] then date - echo "offloader exited with $result" - exit $result + echo "offloader exited with ${result}" + exit "${result}" fi done diff --git a/offload-one b/offload-one index 2f78346..3e098a4 100755 --- a/offload-one +++ b/offload-one @@ -19,12 +19,12 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" source ./config.sh || exit 1 -mkdir -p "$UPLOAD_QUEUE_DIR" || exit 1 -mkdir -p "$UPLOADER_WORKING_DIR" || exit 1 +mkdir -p "${UPLOAD_QUEUE_DIR}" || exit 1 +mkdir -p "${UPLOADER_WORKING_DIR}" || exit 1 -if [ ! -z "$COMPLETED_DIR" ] +if [ ! -z "${COMPLETED_DIR}" ] then - mkdir -p "$COMPLETED_DIR" || exit 1 + mkdir -p "${COMPLETED_DIR}" || exit 1 fi function mayicontinue { @@ -44,17 +44,17 @@ fi # try to grab an item from UPLOAD_QUEUE_DIR ITEM=none -while [[ $ITEM = none ]] +while [[ "${ITEM}" = none ]] do - possible_item=$( ls -1 "$UPLOAD_QUEUE_DIR" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 ) + possible_item=$( ls -1 "${UPLOAD_QUEUE_DIR}" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 ) if test -n "${possible_item}" then - echo "Trying to grab $possible_item" - if mv "$UPLOAD_QUEUE_DIR/$possible_item" "$UPLOADER_WORKING_DIR/" + echo "Trying to grab ${possible_item}" + if mv "${UPLOAD_QUEUE_DIR}/${possible_item}" "${UPLOADER_WORKING_DIR}/" then - ITEM=$possible_item + ITEM="${possible_item}" else - echo "Failed to move $possible_item" + echo "Failed to move ${possible_item}" sleep 5 fi else @@ -66,10 +66,10 @@ do done -echo "$( date ): Start offloading for item $ITEM" >> uploader.log +echo "$( date ): Start offloading for item ${ITEM}" >> uploader.log result=1 -while [[ $result -ne 0 ]] +while [[ "${result}" -ne 0 ]] do _OFFLOAD_TARGET="${OFFLOAD_TARGET}" if test -z "${_OFFLOAD_TARGET}"; then @@ -83,46 +83,46 @@ do fi echo "Offloading to ${_OFFLOAD_TARGET}/${ITEM}/" rsync -rltv --timeout=900 --contimeout=60 --sockopts=SO_SNDBUF=8388608,SO_RCVBUF=8388608 --progress --stats --no-owner --no-group --partial --partial-dir .rsync-tmp --delay-updates --no-compress --compress-level 0 "${UPLOADER_WORKING_DIR}/${ITEM}/" "${_OFFLOAD_TARGET}/${ITEM}/" - result=$? - if [[ $result -ne 0 ]] + result="${?}" + if [[ "${result}" -ne 0 ]] then date - echo "Error while offloading $ITEM, rsync said $result" + echo "Error while offloading ${ITEM}, rsync said ${result}" echo "Will retry in 30 seconds" sleep 30 fi done -echo "Offloaded $ITEM" +echo "Offloaded ${ITEM}" -echo "$( date ): Completed offloading for item $ITEM" >> uploader.log +echo "$( date ): Completed offloading for item ${ITEM}" >> uploader.log mayicontinue # move or remove megawarc -if [ -z "$COMPLETED_DIR" ] +if [ -z "${COMPLETED_DIR}" ] then # remove rm -rf "${UPLOADER_WORKING_DIR}/${ITEM}" - result=$? + result="${?}" - if [[ $result -ne 0 ]] + if [[ "${result}" -ne 0 ]] then date - echo "rm -rf megawarc exited with $result for $ITEM" + echo "rm -rf megawarc exited with ${result} for ${ITEM}" exit 1 fi else # move mv "${UPLOADER_WORKING_DIR}/${ITEM}" "${COMPLETED_DIR}/" - result=$? + result="${?}" - if [[ $result -ne 0 ]] + if [[ "${result}" -ne 0 ]] then date - echo "rm -rf megawarc exited with $result for $ITEM" + echo "rm -rf megawarc exited with ${result} for ${ITEM}" exit 1 fi fi diff --git a/pack-multiple b/pack-multiple index 46a1b32..f9764b3 100755 --- a/pack-multiple +++ b/pack-multiple @@ -5,13 +5,13 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" while [[ -f RUN ]] do - $SCRIPT_DIR/pack-one - result=$? - if [[ $result -ne 0 ]] + "${SCRIPT_DIR}/pack-one" + result="${?}" + if [[ "${result}" -ne 0 ]] then date - echo "packer exited with $result" - exit $result + echo "packer exited with ${result}" + exit "${result}" fi done diff --git a/pack-one b/pack-one index ea496c6..d752bba 100755 --- a/pack-one +++ b/pack-one @@ -16,19 +16,19 @@ WORKING_DIR="$( pwd )" SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -MEGAWARC=$SCRIPT_DIR/megawarc/megawarc +MEGAWARC="${SCRIPT_DIR}/megawarc/megawarc" -if [[ ! -x $MEGAWARC ]] +if [[ ! -x "${MEGAWARC}" ]] then - echo "$MEGAWARC does not exist or is not executable." + echo "${MEGAWARC} does not exist or is not executable." exit 1 fi source ./config.sh || exit 1 -mkdir -p "$PACKER_WORKING_CHUNKS_DIR" || exit 1 -mkdir -p "$PACKER_WORKING_MEGAWARC_DIR" || exit 1 -mkdir -p "$UPLOAD_QUEUE_DIR" || exit 1 +mkdir -p "${PACKER_WORKING_CHUNKS_DIR}" || exit 1 +mkdir -p "${PACKER_WORKING_MEGAWARC_DIR}" || exit 1 +mkdir -p "${UPLOAD_QUEUE_DIR}" || exit 1 function mayicontinue { @@ -40,7 +40,7 @@ function mayicontinue { # check if the upload queue is empty -# if [ "$( ls -A $UPLOAD_QUEUE_DIR )" ] +# if [ "$( ls -A ${UPLOAD_QUEUE_DIR} )" ] # then # echo "Upload queue not empty. Wait." # sleep 30 @@ -53,17 +53,17 @@ mayicontinue # try to grab a directory from the packing queue ITEM=none -while [[ $ITEM = none ]] +while [[ "${ITEM}" = none ]] do - possible_item=$( ls -1 "$PACKING_QUEUE_DIR/" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 ) + possible_item=$( ls -1 "${PACKING_QUEUE_DIR}/" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 ) if test -n "${possible_item}" then - echo "Trying to grab $possible_item" - if mv "$PACKING_QUEUE_DIR/$possible_item" "$PACKER_WORKING_CHUNKS_DIR/" + echo "Trying to grab ${possible_item}" + if mv "${PACKING_QUEUE_DIR}/${possible_item}" "${PACKER_WORKING_CHUNKS_DIR}/" then - ITEM=$possible_item + ITEM="${possible_item}" else - echo "Failed to move $possible_item" + echo "Failed to move ${possible_item}" sleep 5 fi else @@ -78,24 +78,24 @@ done mayicontinue -echo "$( date ): Starting megawarc for item $ITEM" >> packer.log +echo "$( date ): Starting megawarc for item ${ITEM}" >> packer.log # construct a megawarc -mkdir -p $PACKER_WORKING_MEGAWARC_DIR/$ITEM +mkdir -p "${PACKER_WORKING_MEGAWARC_DIR}/${ITEM}" # megawarcs use relative paths -cd "$PACKER_WORKING_CHUNKS_DIR/" -$MEGAWARC --verbose pack --server $ZST_DICTIONARY_API $PACKER_WORKING_MEGAWARC_DIR/$ITEM/${FILE_PREFIX}${ITEM} $ITEM -result=$? -cd "$WORKING_DIR" +cd "${PACKER_WORKING_CHUNKS_DIR}/" +"${MEGAWARC}" --verbose pack --server "${ZST_DICTIONARY_API}" "${PACKER_WORKING_MEGAWARC_DIR}/${ITEM}/${FILE_PREFIX}${ITEM}" "${ITEM}" +result="${?}" +cd "${WORKING_DIR}" -if [[ $result -ne 0 ]] +if [[ "${result}" -ne 0 ]] then date - echo "megawarc exited with $result for $ITEM" + echo "megawarc exited with ${result} for ${ITEM}" exit 1 fi -echo "$( date ): Completed megawarc for item $ITEM" >> packer.log +echo "$( date ): Completed megawarc for item ${ITEM}" >> packer.log mayicontinue @@ -103,19 +103,19 @@ mayicontinue # remove files echo "megawarc OK, removing source files" -rm -rf "$PACKER_WORKING_CHUNKS_DIR/$ITEM" -result=$? +rm -rf "${PACKER_WORKING_CHUNKS_DIR}/${ITEM}" +result="${?}" -if [[ $result -ne 0 ]] +if [[ "${result}" -ne 0 ]] then date - echo "rm -rf source files exited with $result for $ITEM" + echo "rm -rf source files exited with ${result} for ${ITEM}" exit 1 fi echo "add to upload queue" -mv "$PACKER_WORKING_MEGAWARC_DIR/$ITEM" "$UPLOAD_QUEUE_DIR/" +mv "${PACKER_WORKING_MEGAWARC_DIR}/${ITEM}" "${UPLOAD_QUEUE_DIR}/" exit 0 diff --git a/upload-multiple b/upload-multiple index 1a12f4d..5b1fc6f 100755 --- a/upload-multiple +++ b/upload-multiple @@ -5,13 +5,13 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" while [[ -f RUN ]] do - $SCRIPT_DIR/upload-one - result=$? - if [[ $result -ne 0 ]] + "${SCRIPT_DIR}/upload-one" + result="${?}" + if [[ "${result}" -ne 0 ]] then date - echo "uploader exited with $result" - exit $result + echo "uploader exited with ${result}" + exit "${result}" fi done diff --git a/upload-one b/upload-one index 6359c1a..fa1cd05 100755 --- a/upload-one +++ b/upload-one @@ -18,12 +18,12 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" source ./config.sh || exit 1 -mkdir -p "$UPLOAD_QUEUE_DIR" || exit 1 -mkdir -p "$UPLOADER_WORKING_DIR" || exit 1 +mkdir -p "${UPLOAD_QUEUE_DIR}" || exit 1 +mkdir -p "${UPLOADER_WORKING_DIR}" || exit 1 -if [ ! -z "$COMPLETED_DIR" ] +if [ ! -z "${COMPLETED_DIR}" ] then - mkdir -p "$COMPLETED_DIR" || exit 1 + mkdir -p "${COMPLETED_DIR}" || exit 1 fi function mayicontinue { @@ -38,17 +38,17 @@ mayicontinue # try to grab an item from UPLOAD_QUEUE_DIR ITEM=none -while [[ $ITEM = none ]] +while [[ "${ITEM}" = none ]] do - possible_item=$( ls -1 "$UPLOAD_QUEUE_DIR" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 ) + possible_item=$( ls -1 "${UPLOAD_QUEUE_DIR}" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 ) if test -n "${possible_item}" then - echo "Trying to grab $possible_item" - if mv "$UPLOAD_QUEUE_DIR/$possible_item" "$UPLOADER_WORKING_DIR/" + echo "Trying to grab ${possible_item}" + if mv "${UPLOAD_QUEUE_DIR}/${possible_item}" "${UPLOADER_WORKING_DIR}/" then - ITEM=$possible_item + ITEM="${possible_item}" else - echo "Failed to move $possible_item" + echo "Failed to move ${possible_item}" sleep 5 fi else @@ -60,7 +60,7 @@ do done -echo "$( date ): Start uploading for item $ITEM" >> uploader.log +echo "$( date ): Start uploading for item ${ITEM}" >> uploader.log # upload megawarc size_hint=$( du --bytes -s "${UPLOADER_WORKING_DIR}/${ITEM}" | grep -oE "^[0-9]+" ) @@ -70,7 +70,7 @@ find "${UPLOADER_WORKING_DIR}/${ITEM}" -type f -regextype posix-egrep -regex ".+ | while read filename do result=1 - while [[ $result -ne 0 ]] + while [[ "${result}" -ne 0 ]] do curl -v --location --fail \ --speed-limit 1 --speed-time 900 \ @@ -83,52 +83,52 @@ do --header "x-archive-meta-date:${IA_ITEM_DATE}" \ --header "x-archive-meta-language:eng" \ --header "x-archive-meta-noarchivetorrent:true" \ - --header "x-archive-size-hint:$size_hint" \ + --header "x-archive-size-hint:${size_hint}" \ --header "authorization: LOW ${IA_AUTH}" \ --upload-file "${UPLOADER_WORKING_DIR}/${ITEM}/${filename}" \ "https://s3.us.archive.org/${IA_ITEM_PREFIX}${ITEM}/${filename}" \ > /dev/null - result=$? - if [[ $result -ne 0 ]] + result="${?}" + if [[ "${result}" -ne 0 ]] then date - echo "Error while uploading $ITEM, curl said $result" + echo "Error while uploading ${ITEM}, curl said ${result}" echo "Will retry in 30 seconds" sleep 30 fi done done -echo "Uploaded $ITEM" +echo "Uploaded ${ITEM}" -echo "$( date ): Completed uploading for item $ITEM" >> uploader.log +echo "$( date ): Completed uploading for item ${ITEM}" >> uploader.log mayicontinue # move or remove megawarc -if [ -z "$COMPLETED_DIR" ] +if [ -z "${COMPLETED_DIR}" ] then # remove rm -rf "${UPLOADER_WORKING_DIR}/${ITEM}" - result=$? + result="${?}" - if [[ $result -ne 0 ]] + if [[ "${result}" -ne 0 ]] then date - echo "rm -rf megawarc exited with $result for $ITEM" + echo "rm -rf megawarc exited with ${result} for ${ITEM}" exit 1 fi else # move mv "${UPLOADER_WORKING_DIR}/${ITEM}" "${COMPLETED_DIR}/" - result=$? + result="${?}" - if [[ $result -ne 0 ]] + if [[ "${result}" -ne 0 ]] then date - echo "rm -rf megawarc exited with $result for $ITEM" + echo "rm -rf megawarc exited with ${result} for ${ITEM}" exit 1 fi fi