@@ -6,13 +6,13 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | |||
while [[ -f RUN ]] | |||
do | |||
date | |||
$SCRIPT_DIR/chunker | |||
result=$? | |||
if [[ $result -ne 0 ]] | |||
"${SCRIPT_DIR}/chunker" | |||
result="${?}" | |||
if [[ "${result}" -ne 0 ]] | |||
then | |||
date | |||
echo "chunker exited with $result" | |||
exit $result | |||
echo "chunker exited with ${result}" | |||
exit "${result}" | |||
fi | |||
echo "Sleeping..." | |||
@@ -8,50 +8,50 @@ | |||
# can be moved somewhere else. Remember this when running Rsync. | |||
# | |||
INCOMING_UPLOADS_DIR=$1 # /home/archiveteam/uploads | |||
CHUNKER_WORKING_DIR=$2 # /home/archiveteam/processed | |||
PACKING_QUEUE_DIR="$CHUNKER_WORKING_DIR/archive" | |||
INCOMING_UPLOADS_DIR="${1}" # /home/archiveteam/uploads | |||
CHUNKER_WORKING_DIR="${2}" # /home/archiveteam/processed | |||
PACKING_QUEUE_DIR="${CHUNKER_WORKING_DIR}/archive" | |||
MEGABYTES_PER_CHUNK=$((1024*25)) | |||
# if not specified in command-line arguments | |||
if [ -z $INCOMING_UPLOADS_DIR ] | |||
if [ -z "${INCOMING_UPLOADS_DIR}" ] | |||
then | |||
source ./config.sh || exit 1 | |||
fi | |||
BYTES_PER_CHUNK=$((1024*1024*MEGABYTES_PER_CHUNK)) | |||
mkdir -p "$CHUNKER_WORKING_DIR" || exit 1 | |||
mkdir -p "$PACKING_QUEUE_DIR" || exit 1 | |||
mkdir -p "${CHUNKER_WORKING_DIR}" || exit 1 | |||
mkdir -p "${PACKING_QUEUE_DIR}" || exit 1 | |||
mkdir -p "$CHUNKER_WORKING_DIR/current" || exit 1 | |||
cur_size=$( du -B1 -s "$CHUNKER_WORKING_DIR/current" | grep -oE "^[0-9]+" ) | |||
mkdir -p "${CHUNKER_WORKING_DIR}/current" || exit 1 | |||
cur_size=$( du -B1 -s "${CHUNKER_WORKING_DIR}/current" | grep -oE "^[0-9]+" ) | |||
# find every .warc.gz in the upload directory | |||
find "$INCOMING_UPLOADS_DIR" -type f -regex ".+\.warc\.\(gz\|zst\)$" \ | |||
find "${INCOMING_UPLOADS_DIR}" -type f -regex ".+\.warc\.\(gz\|zst\)$" \ | |||
| while read filename | |||
do | |||
# skip partial uploads | |||
if [[ $filename =~ rsync-tmp ]] | |||
if [[ "${filename}" =~ rsync-tmp ]] | |||
then | |||
continue | |||
fi | |||
cur_size=$((cur_size + $( du -B1 -s $filename | grep -oE "^[0-9]+" ))) | |||
cur_size=$((cur_size + $( du -B1 -s "${filename}" | grep -oE "^[0-9]+" ))) | |||
# move to the current/ directory | |||
echo "Moving ${filename}" | |||
mkdir -p "$CHUNKER_WORKING_DIR/current" | |||
mv "${filename}" "$CHUNKER_WORKING_DIR/current/" | |||
mkdir -p "${CHUNKER_WORKING_DIR}/current" | |||
mv "${filename}" "${CHUNKER_WORKING_DIR}/current/" | |||
# if the current/ directory is large enough, | |||
# rename it to archive-XXXXX and start a new current/ | |||
if [[ $cur_size -gt $BYTES_PER_CHUNK ]] | |||
if [[ "${cur_size}" -gt "${BYTES_PER_CHUNK}" ]] | |||
then | |||
timestamp=$( date +'%Y%m%d%H%M%S' ) | |||
uuid=$(cat /proc/sys/kernel/random/uuid | cut -d- -f1) | |||
echo "Current archive is full, moving to ${timestamp}_${uuid}." | |||
mv "$CHUNKER_WORKING_DIR/current" "$PACKING_QUEUE_DIR/${timestamp}_${uuid}" | |||
mv "${CHUNKER_WORKING_DIR}/current" "${PACKING_QUEUE_DIR}/${timestamp}_${uuid}" | |||
cur_size=0 | |||
sleep 3 | |||
fi | |||
@@ -18,7 +18,7 @@ IA_COLLECTION="${IA_COLLECTION}" | |||
IA_ITEM_TITLE="${IA_ITEM_TITLE}" | |||
IA_ITEM_PREFIX="${IA_ITEM_PREFIX}" | |||
FILE_PREFIX="${FILE_PREFIX}" | |||
IA_ITEM_DATE=${IA_ITEM_DATE_LIT} | |||
IA_ITEM_DATE="${IA_ITEM_DATE_LIT}" | |||
OFFLOAD_TARGET="${OFFLOAD_TARGET}" | |||
ZST_DICTIONARY_API="${ZST_DICTIONARY_API}" | |||
INCOMING_UPLOADS_DIR="/data/incoming" | |||
@@ -3,12 +3,12 @@ | |||
source ./config.sh || exit 1 | |||
du -hs \ | |||
$INCOMING_UPLOADS_DIR \ | |||
$CHUNKER_WORKING_DIR \ | |||
$PACKING_QUEUE_DIR/* \ | |||
$PACKER_WORKING_CHUNKS_DIR/* \ | |||
$PACKER_WORKING_MEGAWARC_DIR/* \ | |||
$UPLOAD_QUEUE_DIR/* \ | |||
$UPLOADER_WORKING_DIR/* \ | |||
"${INCOMING_UPLOADS_DIR}" \ | |||
"${CHUNKER_WORKING_DIR}" \ | |||
"${PACKING_QUEUE_DIR}/"* \ | |||
"${PACKER_WORKING_CHUNKS_DIR}/"* \ | |||
"${PACKER_WORKING_MEGAWARC_DIR}/"* \ | |||
"${UPLOAD_QUEUE_DIR}/"* \ | |||
"${UPLOADER_WORKING_DIR}/"* \ | |||
2> >(grep -v 'du: cannot \(access\|read\)' >&2) | |||
@@ -5,13 +5,13 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | |||
while [[ -f RUN ]] | |||
do | |||
$SCRIPT_DIR/offload-one | |||
result=$? | |||
if [[ $result -ne 0 ]] | |||
"${SCRIPT_DIR}/offload-one" | |||
result="${?}" | |||
if [[ "${result}" -ne 0 ]] | |||
then | |||
date | |||
echo "offloader exited with $result" | |||
exit $result | |||
echo "offloader exited with ${result}" | |||
exit "${result}" | |||
fi | |||
done | |||
@@ -19,12 +19,12 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | |||
source ./config.sh || exit 1 | |||
mkdir -p "$UPLOAD_QUEUE_DIR" || exit 1 | |||
mkdir -p "$UPLOADER_WORKING_DIR" || exit 1 | |||
mkdir -p "${UPLOAD_QUEUE_DIR}" || exit 1 | |||
mkdir -p "${UPLOADER_WORKING_DIR}" || exit 1 | |||
if [ ! -z "$COMPLETED_DIR" ] | |||
if [ ! -z "${COMPLETED_DIR}" ] | |||
then | |||
mkdir -p "$COMPLETED_DIR" || exit 1 | |||
mkdir -p "${COMPLETED_DIR}" || exit 1 | |||
fi | |||
function mayicontinue { | |||
@@ -44,17 +44,17 @@ fi | |||
# try to grab an item from UPLOAD_QUEUE_DIR | |||
ITEM=none | |||
while [[ $ITEM = none ]] | |||
while [[ "${ITEM}" = none ]] | |||
do | |||
possible_item=$( ls -1 "$UPLOAD_QUEUE_DIR" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 ) | |||
possible_item=$( ls -1 "${UPLOAD_QUEUE_DIR}" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 ) | |||
if test -n "${possible_item}" | |||
then | |||
echo "Trying to grab $possible_item" | |||
if mv "$UPLOAD_QUEUE_DIR/$possible_item" "$UPLOADER_WORKING_DIR/" | |||
echo "Trying to grab ${possible_item}" | |||
if mv "${UPLOAD_QUEUE_DIR}/${possible_item}" "${UPLOADER_WORKING_DIR}/" | |||
then | |||
ITEM=$possible_item | |||
ITEM="${possible_item}" | |||
else | |||
echo "Failed to move $possible_item" | |||
echo "Failed to move ${possible_item}" | |||
sleep 5 | |||
fi | |||
else | |||
@@ -66,10 +66,10 @@ do | |||
done | |||
echo "$( date ): Start offloading for item $ITEM" >> uploader.log | |||
echo "$( date ): Start offloading for item ${ITEM}" >> uploader.log | |||
result=1 | |||
while [[ $result -ne 0 ]] | |||
while [[ "${result}" -ne 0 ]] | |||
do | |||
_OFFLOAD_TARGET="${OFFLOAD_TARGET}" | |||
if test -z "${_OFFLOAD_TARGET}"; then | |||
@@ -83,46 +83,46 @@ do | |||
fi | |||
echo "Offloading to ${_OFFLOAD_TARGET}/${ITEM}/" | |||
rsync -rltv --timeout=900 --contimeout=60 --sockopts=SO_SNDBUF=8388608,SO_RCVBUF=8388608 --progress --stats --no-owner --no-group --partial --partial-dir .rsync-tmp --delay-updates --no-compress --compress-level 0 "${UPLOADER_WORKING_DIR}/${ITEM}/" "${_OFFLOAD_TARGET}/${ITEM}/" | |||
result=$? | |||
if [[ $result -ne 0 ]] | |||
result="${?}" | |||
if [[ "${result}" -ne 0 ]] | |||
then | |||
date | |||
echo "Error while offloading $ITEM, rsync said $result" | |||
echo "Error while offloading ${ITEM}, rsync said ${result}" | |||
echo "Will retry in 30 seconds" | |||
sleep 30 | |||
fi | |||
done | |||
echo "Offloaded $ITEM" | |||
echo "Offloaded ${ITEM}" | |||
echo "$( date ): Completed offloading for item $ITEM" >> uploader.log | |||
echo "$( date ): Completed offloading for item ${ITEM}" >> uploader.log | |||
mayicontinue | |||
# move or remove megawarc | |||
if [ -z "$COMPLETED_DIR" ] | |||
if [ -z "${COMPLETED_DIR}" ] | |||
then | |||
# remove | |||
rm -rf "${UPLOADER_WORKING_DIR}/${ITEM}" | |||
result=$? | |||
result="${?}" | |||
if [[ $result -ne 0 ]] | |||
if [[ "${result}" -ne 0 ]] | |||
then | |||
date | |||
echo "rm -rf megawarc exited with $result for $ITEM" | |||
echo "rm -rf megawarc exited with ${result} for ${ITEM}" | |||
exit 1 | |||
fi | |||
else | |||
# move | |||
mv "${UPLOADER_WORKING_DIR}/${ITEM}" "${COMPLETED_DIR}/" | |||
result=$? | |||
result="${?}" | |||
if [[ $result -ne 0 ]] | |||
if [[ "${result}" -ne 0 ]] | |||
then | |||
date | |||
echo "rm -rf megawarc exited with $result for $ITEM" | |||
echo "rm -rf megawarc exited with ${result} for ${ITEM}" | |||
exit 1 | |||
fi | |||
fi | |||
@@ -5,13 +5,13 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | |||
while [[ -f RUN ]] | |||
do | |||
$SCRIPT_DIR/pack-one | |||
result=$? | |||
if [[ $result -ne 0 ]] | |||
"${SCRIPT_DIR}/pack-one" | |||
result="${?}" | |||
if [[ "${result}" -ne 0 ]] | |||
then | |||
date | |||
echo "packer exited with $result" | |||
exit $result | |||
echo "packer exited with ${result}" | |||
exit "${result}" | |||
fi | |||
done | |||
@@ -16,19 +16,19 @@ | |||
WORKING_DIR="$( pwd )" | |||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | |||
MEGAWARC=$SCRIPT_DIR/megawarc/megawarc | |||
MEGAWARC="${SCRIPT_DIR}/megawarc/megawarc" | |||
if [[ ! -x $MEGAWARC ]] | |||
if [[ ! -x "${MEGAWARC}" ]] | |||
then | |||
echo "$MEGAWARC does not exist or is not executable." | |||
echo "${MEGAWARC} does not exist or is not executable." | |||
exit 1 | |||
fi | |||
source ./config.sh || exit 1 | |||
mkdir -p "$PACKER_WORKING_CHUNKS_DIR" || exit 1 | |||
mkdir -p "$PACKER_WORKING_MEGAWARC_DIR" || exit 1 | |||
mkdir -p "$UPLOAD_QUEUE_DIR" || exit 1 | |||
mkdir -p "${PACKER_WORKING_CHUNKS_DIR}" || exit 1 | |||
mkdir -p "${PACKER_WORKING_MEGAWARC_DIR}" || exit 1 | |||
mkdir -p "${UPLOAD_QUEUE_DIR}" || exit 1 | |||
function mayicontinue { | |||
@@ -40,7 +40,7 @@ function mayicontinue { | |||
# check if the upload queue is empty | |||
# if [ "$( ls -A $UPLOAD_QUEUE_DIR )" ] | |||
# if [ "$( ls -A ${UPLOAD_QUEUE_DIR} )" ] | |||
# then | |||
# echo "Upload queue not empty. Wait." | |||
# sleep 30 | |||
@@ -53,17 +53,17 @@ mayicontinue | |||
# try to grab a directory from the packing queue | |||
ITEM=none | |||
while [[ $ITEM = none ]] | |||
while [[ "${ITEM}" = none ]] | |||
do | |||
possible_item=$( ls -1 "$PACKING_QUEUE_DIR/" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 ) | |||
possible_item=$( ls -1 "${PACKING_QUEUE_DIR}/" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 ) | |||
if test -n "${possible_item}" | |||
then | |||
echo "Trying to grab $possible_item" | |||
if mv "$PACKING_QUEUE_DIR/$possible_item" "$PACKER_WORKING_CHUNKS_DIR/" | |||
echo "Trying to grab ${possible_item}" | |||
if mv "${PACKING_QUEUE_DIR}/${possible_item}" "${PACKER_WORKING_CHUNKS_DIR}/" | |||
then | |||
ITEM=$possible_item | |||
ITEM="${possible_item}" | |||
else | |||
echo "Failed to move $possible_item" | |||
echo "Failed to move ${possible_item}" | |||
sleep 5 | |||
fi | |||
else | |||
@@ -78,24 +78,24 @@ done | |||
mayicontinue | |||
echo "$( date ): Starting megawarc for item $ITEM" >> packer.log | |||
echo "$( date ): Starting megawarc for item ${ITEM}" >> packer.log | |||
# construct a megawarc | |||
mkdir -p $PACKER_WORKING_MEGAWARC_DIR/$ITEM | |||
mkdir -p "${PACKER_WORKING_MEGAWARC_DIR}/${ITEM}" | |||
# megawarcs use relative paths | |||
cd "$PACKER_WORKING_CHUNKS_DIR/" | |||
$MEGAWARC --verbose pack --server $ZST_DICTIONARY_API $PACKER_WORKING_MEGAWARC_DIR/$ITEM/${FILE_PREFIX}${ITEM} $ITEM | |||
result=$? | |||
cd "$WORKING_DIR" | |||
cd "${PACKER_WORKING_CHUNKS_DIR}/" | |||
"${MEGAWARC}" --verbose pack --server "${ZST_DICTIONARY_API}" "${PACKER_WORKING_MEGAWARC_DIR}/${ITEM}/${FILE_PREFIX}${ITEM}" "${ITEM}" | |||
result="${?}" | |||
cd "${WORKING_DIR}" | |||
if [[ $result -ne 0 ]] | |||
if [[ "${result}" -ne 0 ]] | |||
then | |||
date | |||
echo "megawarc exited with $result for $ITEM" | |||
echo "megawarc exited with ${result} for ${ITEM}" | |||
exit 1 | |||
fi | |||
echo "$( date ): Completed megawarc for item $ITEM" >> packer.log | |||
echo "$( date ): Completed megawarc for item ${ITEM}" >> packer.log | |||
mayicontinue | |||
@@ -103,19 +103,19 @@ mayicontinue | |||
# remove files | |||
echo "megawarc OK, removing source files" | |||
rm -rf "$PACKER_WORKING_CHUNKS_DIR/$ITEM" | |||
result=$? | |||
rm -rf "${PACKER_WORKING_CHUNKS_DIR}/${ITEM}" | |||
result="${?}" | |||
if [[ $result -ne 0 ]] | |||
if [[ "${result}" -ne 0 ]] | |||
then | |||
date | |||
echo "rm -rf source files exited with $result for $ITEM" | |||
echo "rm -rf source files exited with ${result} for ${ITEM}" | |||
exit 1 | |||
fi | |||
echo "add to upload queue" | |||
mv "$PACKER_WORKING_MEGAWARC_DIR/$ITEM" "$UPLOAD_QUEUE_DIR/" | |||
mv "${PACKER_WORKING_MEGAWARC_DIR}/${ITEM}" "${UPLOAD_QUEUE_DIR}/" | |||
exit 0 | |||
@@ -5,13 +5,13 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | |||
while [[ -f RUN ]] | |||
do | |||
$SCRIPT_DIR/upload-one | |||
result=$? | |||
if [[ $result -ne 0 ]] | |||
"${SCRIPT_DIR}/upload-one" | |||
result="${?}" | |||
if [[ "${result}" -ne 0 ]] | |||
then | |||
date | |||
echo "uploader exited with $result" | |||
exit $result | |||
echo "uploader exited with ${result}" | |||
exit "${result}" | |||
fi | |||
done | |||
@@ -18,12 +18,12 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | |||
source ./config.sh || exit 1 | |||
mkdir -p "$UPLOAD_QUEUE_DIR" || exit 1 | |||
mkdir -p "$UPLOADER_WORKING_DIR" || exit 1 | |||
mkdir -p "${UPLOAD_QUEUE_DIR}" || exit 1 | |||
mkdir -p "${UPLOADER_WORKING_DIR}" || exit 1 | |||
if [ ! -z "$COMPLETED_DIR" ] | |||
if [ ! -z "${COMPLETED_DIR}" ] | |||
then | |||
mkdir -p "$COMPLETED_DIR" || exit 1 | |||
mkdir -p "${COMPLETED_DIR}" || exit 1 | |||
fi | |||
function mayicontinue { | |||
@@ -38,17 +38,17 @@ mayicontinue | |||
# try to grab an item from UPLOAD_QUEUE_DIR | |||
ITEM=none | |||
while [[ $ITEM = none ]] | |||
while [[ "${ITEM}" = none ]] | |||
do | |||
possible_item=$( ls -1 "$UPLOAD_QUEUE_DIR" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 ) | |||
possible_item=$( ls -1 "${UPLOAD_QUEUE_DIR}" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 ) | |||
if test -n "${possible_item}" | |||
then | |||
echo "Trying to grab $possible_item" | |||
if mv "$UPLOAD_QUEUE_DIR/$possible_item" "$UPLOADER_WORKING_DIR/" | |||
echo "Trying to grab ${possible_item}" | |||
if mv "${UPLOAD_QUEUE_DIR}/${possible_item}" "${UPLOADER_WORKING_DIR}/" | |||
then | |||
ITEM=$possible_item | |||
ITEM="${possible_item}" | |||
else | |||
echo "Failed to move $possible_item" | |||
echo "Failed to move ${possible_item}" | |||
sleep 5 | |||
fi | |||
else | |||
@@ -60,7 +60,7 @@ do | |||
done | |||
echo "$( date ): Start uploading for item $ITEM" >> uploader.log | |||
echo "$( date ): Start uploading for item ${ITEM}" >> uploader.log | |||
# upload megawarc | |||
size_hint=$( du --bytes -s "${UPLOADER_WORKING_DIR}/${ITEM}" | grep -oE "^[0-9]+" ) | |||
@@ -70,7 +70,7 @@ find "${UPLOADER_WORKING_DIR}/${ITEM}" -type f -regextype posix-egrep -regex ".+ | |||
| while read filename | |||
do | |||
result=1 | |||
while [[ $result -ne 0 ]] | |||
while [[ "${result}" -ne 0 ]] | |||
do | |||
curl -v --location --fail \ | |||
--speed-limit 1 --speed-time 900 \ | |||
@@ -83,52 +83,52 @@ do | |||
--header "x-archive-meta-date:${IA_ITEM_DATE}" \ | |||
--header "x-archive-meta-language:eng" \ | |||
--header "x-archive-meta-noarchivetorrent:true" \ | |||
--header "x-archive-size-hint:$size_hint" \ | |||
--header "x-archive-size-hint:${size_hint}" \ | |||
--header "authorization: LOW ${IA_AUTH}" \ | |||
--upload-file "${UPLOADER_WORKING_DIR}/${ITEM}/${filename}" \ | |||
"https://s3.us.archive.org/${IA_ITEM_PREFIX}${ITEM}/${filename}" \ | |||
> /dev/null | |||
result=$? | |||
if [[ $result -ne 0 ]] | |||
result="${?}" | |||
if [[ "${result}" -ne 0 ]] | |||
then | |||
date | |||
echo "Error while uploading $ITEM, curl said $result" | |||
echo "Error while uploading ${ITEM}, curl said ${result}" | |||
echo "Will retry in 30 seconds" | |||
sleep 30 | |||
fi | |||
done | |||
done | |||
echo "Uploaded $ITEM" | |||
echo "Uploaded ${ITEM}" | |||
echo "$( date ): Completed uploading for item $ITEM" >> uploader.log | |||
echo "$( date ): Completed uploading for item ${ITEM}" >> uploader.log | |||
mayicontinue | |||
# move or remove megawarc | |||
if [ -z "$COMPLETED_DIR" ] | |||
if [ -z "${COMPLETED_DIR}" ] | |||
then | |||
# remove | |||
rm -rf "${UPLOADER_WORKING_DIR}/${ITEM}" | |||
result=$? | |||
result="${?}" | |||
if [[ $result -ne 0 ]] | |||
if [[ "${result}" -ne 0 ]] | |||
then | |||
date | |||
echo "rm -rf megawarc exited with $result for $ITEM" | |||
echo "rm -rf megawarc exited with ${result} for ${ITEM}" | |||
exit 1 | |||
fi | |||
else | |||
# move | |||
mv "${UPLOADER_WORKING_DIR}/${ITEM}" "${COMPLETED_DIR}/" | |||
result=$? | |||
result="${?}" | |||
if [[ $result -ne 0 ]] | |||
if [[ "${result}" -ne 0 ]] | |||
then | |||
date | |||
echo "rm -rf megawarc exited with $result for $ITEM" | |||
echo "rm -rf megawarc exited with ${result} for ${ITEM}" | |||
exit 1 | |||
fi | |||
fi | |||