Browse Source

properly quote all variables

master
Katie Holly 1 year ago
parent
commit
7ee94bb579
No known key found for this signature in database GPG Key ID: 3D3115D5D95856F
10 changed files with 119 additions and 119 deletions
  1. +5
    -5
      chunk-multiple
  2. +15
    -15
      chunker
  3. +1
    -1
      docker-boot.sh
  4. +7
    -7
      du-all
  5. +5
    -5
      offload-multiple
  6. +24
    -24
      offload-one
  7. +5
    -5
      pack-multiple
  8. +27
    -27
      pack-one
  9. +5
    -5
      upload-multiple
  10. +25
    -25
      upload-one

+ 5
- 5
chunk-multiple View File

@@ -6,13 +6,13 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
while [[ -f RUN ]]
do
date
$SCRIPT_DIR/chunker
result=$?
if [[ $result -ne 0 ]]
"${SCRIPT_DIR}/chunker"
result="${?}"
if [[ "${result}" -ne 0 ]]
then
date
echo "chunker exited with $result"
exit $result
echo "chunker exited with ${result}"
exit "${result}"
fi

echo "Sleeping..."


+ 15
- 15
chunker View File

@@ -8,50 +8,50 @@
# can be moved somewhere else. Remember this when running Rsync.
#

INCOMING_UPLOADS_DIR=$1 # /home/archiveteam/uploads
CHUNKER_WORKING_DIR=$2 # /home/archiveteam/processed
PACKING_QUEUE_DIR="$CHUNKER_WORKING_DIR/archive"
INCOMING_UPLOADS_DIR="${1}" # /home/archiveteam/uploads
CHUNKER_WORKING_DIR="${2}" # /home/archiveteam/processed
PACKING_QUEUE_DIR="${CHUNKER_WORKING_DIR}/archive"
MEGABYTES_PER_CHUNK=$((1024*25))

# if not specified in command-line arguments
if [ -z $INCOMING_UPLOADS_DIR ]
if [ -z "${INCOMING_UPLOADS_DIR}" ]
then
source ./config.sh || exit 1
fi

BYTES_PER_CHUNK=$((1024*1024*MEGABYTES_PER_CHUNK))

mkdir -p "$CHUNKER_WORKING_DIR" || exit 1
mkdir -p "$PACKING_QUEUE_DIR" || exit 1
mkdir -p "${CHUNKER_WORKING_DIR}" || exit 1
mkdir -p "${PACKING_QUEUE_DIR}" || exit 1

mkdir -p "$CHUNKER_WORKING_DIR/current" || exit 1
cur_size=$( du -B1 -s "$CHUNKER_WORKING_DIR/current" | grep -oE "^[0-9]+" )
mkdir -p "${CHUNKER_WORKING_DIR}/current" || exit 1
cur_size=$( du -B1 -s "${CHUNKER_WORKING_DIR}/current" | grep -oE "^[0-9]+" )

# find every .warc.gz in the upload directory
find "$INCOMING_UPLOADS_DIR" -type f -regex ".+\.warc\.\(gz\|zst\)$" \
find "${INCOMING_UPLOADS_DIR}" -type f -regex ".+\.warc\.\(gz\|zst\)$" \
| while read filename
do
# skip partial uploads
if [[ $filename =~ rsync-tmp ]]
if [[ "${filename}" =~ rsync-tmp ]]
then
continue
fi

cur_size=$((cur_size + $( du -B1 -s $filename | grep -oE "^[0-9]+" )))
cur_size=$((cur_size + $( du -B1 -s "${filename}" | grep -oE "^[0-9]+" )))

# move to the current/ directory
echo "Moving ${filename}"
mkdir -p "$CHUNKER_WORKING_DIR/current"
mv "${filename}" "$CHUNKER_WORKING_DIR/current/"
mkdir -p "${CHUNKER_WORKING_DIR}/current"
mv "${filename}" "${CHUNKER_WORKING_DIR}/current/"

# if the current/ directory is large enough,
# rename it to archive-XXXXX and start a new current/
if [[ $cur_size -gt $BYTES_PER_CHUNK ]]
if [[ "${cur_size}" -gt "${BYTES_PER_CHUNK}" ]]
then
timestamp=$( date +'%Y%m%d%H%M%S' )
uuid=$(cat /proc/sys/kernel/random/uuid | cut -d- -f1)
echo "Current archive is full, moving to ${timestamp}_${uuid}."
mv "$CHUNKER_WORKING_DIR/current" "$PACKING_QUEUE_DIR/${timestamp}_${uuid}"
mv "${CHUNKER_WORKING_DIR}/current" "${PACKING_QUEUE_DIR}/${timestamp}_${uuid}"
cur_size=0
sleep 3
fi


+ 1
- 1
docker-boot.sh View File

@@ -18,7 +18,7 @@ IA_COLLECTION="${IA_COLLECTION}"
IA_ITEM_TITLE="${IA_ITEM_TITLE}"
IA_ITEM_PREFIX="${IA_ITEM_PREFIX}"
FILE_PREFIX="${FILE_PREFIX}"
IA_ITEM_DATE=${IA_ITEM_DATE_LIT}
IA_ITEM_DATE="${IA_ITEM_DATE_LIT}"
OFFLOAD_TARGET="${OFFLOAD_TARGET}"
ZST_DICTIONARY_API="${ZST_DICTIONARY_API}"
INCOMING_UPLOADS_DIR="/data/incoming"


+ 7
- 7
du-all View File

@@ -3,12 +3,12 @@
source ./config.sh || exit 1

du -hs \
$INCOMING_UPLOADS_DIR \
$CHUNKER_WORKING_DIR \
$PACKING_QUEUE_DIR/* \
$PACKER_WORKING_CHUNKS_DIR/* \
$PACKER_WORKING_MEGAWARC_DIR/* \
$UPLOAD_QUEUE_DIR/* \
$UPLOADER_WORKING_DIR/* \
"${INCOMING_UPLOADS_DIR}" \
"${CHUNKER_WORKING_DIR}" \
"${PACKING_QUEUE_DIR}/"* \
"${PACKER_WORKING_CHUNKS_DIR}/"* \
"${PACKER_WORKING_MEGAWARC_DIR}/"* \
"${UPLOAD_QUEUE_DIR}/"* \
"${UPLOADER_WORKING_DIR}/"* \
2> >(grep -v 'du: cannot \(access\|read\)' >&2)


+ 5
- 5
offload-multiple View File

@@ -5,13 +5,13 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

while [[ -f RUN ]]
do
$SCRIPT_DIR/offload-one
result=$?
if [[ $result -ne 0 ]]
"${SCRIPT_DIR}/offload-one"
result="${?}"
if [[ "${result}" -ne 0 ]]
then
date
echo "offloader exited with $result"
exit $result
echo "offloader exited with ${result}"
exit "${result}"
fi
done


+ 24
- 24
offload-one View File

@@ -19,12 +19,12 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

source ./config.sh || exit 1

mkdir -p "$UPLOAD_QUEUE_DIR" || exit 1
mkdir -p "$UPLOADER_WORKING_DIR" || exit 1
mkdir -p "${UPLOAD_QUEUE_DIR}" || exit 1
mkdir -p "${UPLOADER_WORKING_DIR}" || exit 1

if [ ! -z "$COMPLETED_DIR" ]
if [ ! -z "${COMPLETED_DIR}" ]
then
mkdir -p "$COMPLETED_DIR" || exit 1
mkdir -p "${COMPLETED_DIR}" || exit 1
fi

function mayicontinue {
@@ -44,17 +44,17 @@ fi

# try to grab an item from UPLOAD_QUEUE_DIR
ITEM=none
while [[ $ITEM = none ]]
while [[ "${ITEM}" = none ]]
do
possible_item=$( ls -1 "$UPLOAD_QUEUE_DIR" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 )
possible_item=$( ls -1 "${UPLOAD_QUEUE_DIR}" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 )
if test -n "${possible_item}"
then
echo "Trying to grab $possible_item"
if mv "$UPLOAD_QUEUE_DIR/$possible_item" "$UPLOADER_WORKING_DIR/"
echo "Trying to grab ${possible_item}"
if mv "${UPLOAD_QUEUE_DIR}/${possible_item}" "${UPLOADER_WORKING_DIR}/"
then
ITEM=$possible_item
ITEM="${possible_item}"
else
echo "Failed to move $possible_item"
echo "Failed to move ${possible_item}"
sleep 5
fi
else
@@ -66,10 +66,10 @@ do
done


echo "$( date ): Start offloading for item $ITEM" >> uploader.log
echo "$( date ): Start offloading for item ${ITEM}" >> uploader.log

result=1
while [[ $result -ne 0 ]]
while [[ "${result}" -ne 0 ]]
do
_OFFLOAD_TARGET="${OFFLOAD_TARGET}"
if test -z "${_OFFLOAD_TARGET}"; then
@@ -83,46 +83,46 @@ do
fi
echo "Offloading to ${_OFFLOAD_TARGET}/${ITEM}/"
rsync -rltv --timeout=900 --contimeout=60 --sockopts=SO_SNDBUF=8388608,SO_RCVBUF=8388608 --progress --stats --no-owner --no-group --partial --partial-dir .rsync-tmp --delay-updates --no-compress --compress-level 0 "${UPLOADER_WORKING_DIR}/${ITEM}/" "${_OFFLOAD_TARGET}/${ITEM}/"
result=$?
if [[ $result -ne 0 ]]
result="${?}"
if [[ "${result}" -ne 0 ]]
then
date
echo "Error while offloading $ITEM, rsync said $result"
echo "Error while offloading ${ITEM}, rsync said ${result}"
echo "Will retry in 30 seconds"
sleep 30
fi
done

echo "Offloaded $ITEM"
echo "Offloaded ${ITEM}"

echo "$( date ): Completed offloading for item $ITEM" >> uploader.log
echo "$( date ): Completed offloading for item ${ITEM}" >> uploader.log


mayicontinue


# move or remove megawarc
if [ -z "$COMPLETED_DIR" ]
if [ -z "${COMPLETED_DIR}" ]
then
# remove
rm -rf "${UPLOADER_WORKING_DIR}/${ITEM}"
result=$?
result="${?}"

if [[ $result -ne 0 ]]
if [[ "${result}" -ne 0 ]]
then
date
echo "rm -rf megawarc exited with $result for $ITEM"
echo "rm -rf megawarc exited with ${result} for ${ITEM}"
exit 1
fi
else
# move
mv "${UPLOADER_WORKING_DIR}/${ITEM}" "${COMPLETED_DIR}/"
result=$?
result="${?}"

if [[ $result -ne 0 ]]
if [[ "${result}" -ne 0 ]]
then
date
echo "rm -rf megawarc exited with $result for $ITEM"
echo "rm -rf megawarc exited with ${result} for ${ITEM}"
exit 1
fi
fi


+ 5
- 5
pack-multiple View File

@@ -5,13 +5,13 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

while [[ -f RUN ]]
do
$SCRIPT_DIR/pack-one
result=$?
if [[ $result -ne 0 ]]
"${SCRIPT_DIR}/pack-one"
result="${?}"
if [[ "${result}" -ne 0 ]]
then
date
echo "packer exited with $result"
exit $result
echo "packer exited with ${result}"
exit "${result}"
fi
done


+ 27
- 27
pack-one View File

@@ -16,19 +16,19 @@

WORKING_DIR="$( pwd )"
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
MEGAWARC=$SCRIPT_DIR/megawarc/megawarc
MEGAWARC="${SCRIPT_DIR}/megawarc/megawarc"

if [[ ! -x $MEGAWARC ]]
if [[ ! -x "${MEGAWARC}" ]]
then
echo "$MEGAWARC does not exist or is not executable."
echo "${MEGAWARC} does not exist or is not executable."
exit 1
fi

source ./config.sh || exit 1

mkdir -p "$PACKER_WORKING_CHUNKS_DIR" || exit 1
mkdir -p "$PACKER_WORKING_MEGAWARC_DIR" || exit 1
mkdir -p "$UPLOAD_QUEUE_DIR" || exit 1
mkdir -p "${PACKER_WORKING_CHUNKS_DIR}" || exit 1
mkdir -p "${PACKER_WORKING_MEGAWARC_DIR}" || exit 1
mkdir -p "${UPLOAD_QUEUE_DIR}" || exit 1


function mayicontinue {
@@ -40,7 +40,7 @@ function mayicontinue {


# check if the upload queue is empty
# if [ "$( ls -A $UPLOAD_QUEUE_DIR )" ]
# if [ "$( ls -A ${UPLOAD_QUEUE_DIR} )" ]
# then
# echo "Upload queue not empty. Wait."
# sleep 30
@@ -53,17 +53,17 @@ mayicontinue

# try to grab a directory from the packing queue
ITEM=none
while [[ $ITEM = none ]]
while [[ "${ITEM}" = none ]]
do
possible_item=$( ls -1 "$PACKING_QUEUE_DIR/" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 )
possible_item=$( ls -1 "${PACKING_QUEUE_DIR}/" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 )
if test -n "${possible_item}"
then
echo "Trying to grab $possible_item"
if mv "$PACKING_QUEUE_DIR/$possible_item" "$PACKER_WORKING_CHUNKS_DIR/"
echo "Trying to grab ${possible_item}"
if mv "${PACKING_QUEUE_DIR}/${possible_item}" "${PACKER_WORKING_CHUNKS_DIR}/"
then
ITEM=$possible_item
ITEM="${possible_item}"
else
echo "Failed to move $possible_item"
echo "Failed to move ${possible_item}"
sleep 5
fi
else
@@ -78,24 +78,24 @@ done
mayicontinue


echo "$( date ): Starting megawarc for item $ITEM" >> packer.log
echo "$( date ): Starting megawarc for item ${ITEM}" >> packer.log

# construct a megawarc
mkdir -p $PACKER_WORKING_MEGAWARC_DIR/$ITEM
mkdir -p "${PACKER_WORKING_MEGAWARC_DIR}/${ITEM}"
# megawarcs use relative paths
cd "$PACKER_WORKING_CHUNKS_DIR/"
$MEGAWARC --verbose pack --server $ZST_DICTIONARY_API $PACKER_WORKING_MEGAWARC_DIR/$ITEM/${FILE_PREFIX}${ITEM} $ITEM
result=$?
cd "$WORKING_DIR"
cd "${PACKER_WORKING_CHUNKS_DIR}/"
"${MEGAWARC}" --verbose pack --server "${ZST_DICTIONARY_API}" "${PACKER_WORKING_MEGAWARC_DIR}/${ITEM}/${FILE_PREFIX}${ITEM}" "${ITEM}"
result="${?}"
cd "${WORKING_DIR}"

if [[ $result -ne 0 ]]
if [[ "${result}" -ne 0 ]]
then
date
echo "megawarc exited with $result for $ITEM"
echo "megawarc exited with ${result} for ${ITEM}"
exit 1
fi

echo "$( date ): Completed megawarc for item $ITEM" >> packer.log
echo "$( date ): Completed megawarc for item ${ITEM}" >> packer.log


mayicontinue
@@ -103,19 +103,19 @@ mayicontinue

# remove files
echo "megawarc OK, removing source files"
rm -rf "$PACKER_WORKING_CHUNKS_DIR/$ITEM"
result=$?
rm -rf "${PACKER_WORKING_CHUNKS_DIR}/${ITEM}"
result="${?}"

if [[ $result -ne 0 ]]
if [[ "${result}" -ne 0 ]]
then
date
echo "rm -rf source files exited with $result for $ITEM"
echo "rm -rf source files exited with ${result} for ${ITEM}"
exit 1
fi


echo "add to upload queue"
mv "$PACKER_WORKING_MEGAWARC_DIR/$ITEM" "$UPLOAD_QUEUE_DIR/"
mv "${PACKER_WORKING_MEGAWARC_DIR}/${ITEM}" "${UPLOAD_QUEUE_DIR}/"


exit 0


+ 5
- 5
upload-multiple View File

@@ -5,13 +5,13 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

while [[ -f RUN ]]
do
$SCRIPT_DIR/upload-one
result=$?
if [[ $result -ne 0 ]]
"${SCRIPT_DIR}/upload-one"
result="${?}"
if [[ "${result}" -ne 0 ]]
then
date
echo "uploader exited with $result"
exit $result
echo "uploader exited with ${result}"
exit "${result}"
fi
done


+ 25
- 25
upload-one View File

@@ -18,12 +18,12 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

source ./config.sh || exit 1

mkdir -p "$UPLOAD_QUEUE_DIR" || exit 1
mkdir -p "$UPLOADER_WORKING_DIR" || exit 1
mkdir -p "${UPLOAD_QUEUE_DIR}" || exit 1
mkdir -p "${UPLOADER_WORKING_DIR}" || exit 1

if [ ! -z "$COMPLETED_DIR" ]
if [ ! -z "${COMPLETED_DIR}" ]
then
mkdir -p "$COMPLETED_DIR" || exit 1
mkdir -p "${COMPLETED_DIR}" || exit 1
fi

function mayicontinue {
@@ -38,17 +38,17 @@ mayicontinue

# try to grab an item from UPLOAD_QUEUE_DIR
ITEM=none
while [[ $ITEM = none ]]
while [[ "${ITEM}" = none ]]
do
possible_item=$( ls -1 "$UPLOAD_QUEUE_DIR" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 )
possible_item=$( ls -1 "${UPLOAD_QUEUE_DIR}" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 )
if test -n "${possible_item}"
then
echo "Trying to grab $possible_item"
if mv "$UPLOAD_QUEUE_DIR/$possible_item" "$UPLOADER_WORKING_DIR/"
echo "Trying to grab ${possible_item}"
if mv "${UPLOAD_QUEUE_DIR}/${possible_item}" "${UPLOADER_WORKING_DIR}/"
then
ITEM=$possible_item
ITEM="${possible_item}"
else
echo "Failed to move $possible_item"
echo "Failed to move ${possible_item}"
sleep 5
fi
else
@@ -60,7 +60,7 @@ do
done


echo "$( date ): Start uploading for item $ITEM" >> uploader.log
echo "$( date ): Start uploading for item ${ITEM}" >> uploader.log

# upload megawarc
size_hint=$( du --bytes -s "${UPLOADER_WORKING_DIR}/${ITEM}" | grep -oE "^[0-9]+" )
@@ -70,7 +70,7 @@ find "${UPLOADER_WORKING_DIR}/${ITEM}" -type f -regextype posix-egrep -regex ".+
| while read filename
do
result=1
while [[ $result -ne 0 ]]
while [[ "${result}" -ne 0 ]]
do
curl -v --location --fail \
--speed-limit 1 --speed-time 900 \
@@ -83,52 +83,52 @@ do
--header "x-archive-meta-date:${IA_ITEM_DATE}" \
--header "x-archive-meta-language:eng" \
--header "x-archive-meta-noarchivetorrent:true" \
--header "x-archive-size-hint:$size_hint" \
--header "x-archive-size-hint:${size_hint}" \
--header "authorization: LOW ${IA_AUTH}" \
--upload-file "${UPLOADER_WORKING_DIR}/${ITEM}/${filename}" \
"https://s3.us.archive.org/${IA_ITEM_PREFIX}${ITEM}/${filename}" \
> /dev/null
result=$?
if [[ $result -ne 0 ]]
result="${?}"
if [[ "${result}" -ne 0 ]]
then
date
echo "Error while uploading $ITEM, curl said $result"
echo "Error while uploading ${ITEM}, curl said ${result}"
echo "Will retry in 30 seconds"
sleep 30
fi
done
done

echo "Uploaded $ITEM"
echo "Uploaded ${ITEM}"

echo "$( date ): Completed uploading for item $ITEM" >> uploader.log
echo "$( date ): Completed uploading for item ${ITEM}" >> uploader.log


mayicontinue


# move or remove megawarc
if [ -z "$COMPLETED_DIR" ]
if [ -z "${COMPLETED_DIR}" ]
then
# remove
rm -rf "${UPLOADER_WORKING_DIR}/${ITEM}"
result=$?
result="${?}"

if [[ $result -ne 0 ]]
if [[ "${result}" -ne 0 ]]
then
date
echo "rm -rf megawarc exited with $result for $ITEM"
echo "rm -rf megawarc exited with ${result} for ${ITEM}"
exit 1
fi
else
# move
mv "${UPLOADER_WORKING_DIR}/${ITEM}" "${COMPLETED_DIR}/"
result=$?
result="${?}"

if [[ $result -ne 0 ]]
if [[ "${result}" -ne 0 ]]
then
date
echo "rm -rf megawarc exited with $result for $ITEM"
echo "rm -rf megawarc exited with ${result} for ${ITEM}"
exit 1
fi
fi


Loading…
Cancel
Save