|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- #!/bin/bash
- # Uploads megawarcs from the upload queue.
- # (Needs a config.sh in the working directory.)
- #
- # ./upload-one
- #
- # 1. Grabs an item from UPLOAD_QUEUE_DIR
- # 2. Reserves the item by moving the directory to the
- # UPLOADER_WORKING_DIR
- # 3. Uploads the item to s3.us.archive.org
- # 4. Removes the source files from the working directory
- # If COMPLETED_DIR is set, uploaded files are moved there.
- #
- # The program exits with 1 on any nontransient error.
- #
-
- SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
- source ./config.sh || exit 1
-
- mkdir -p "${UPLOAD_QUEUE_DIR}" || exit 1
- mkdir -p "${UPLOADER_WORKING_DIR}" || exit 1
-
- if [ ! -z "${COMPLETED_DIR}" ]
- then
- mkdir -p "${COMPLETED_DIR}" || exit 1
- fi
-
- function mayicontinue {
- echo
- # echo "May I continue?"
- # read
- # echo
- }
-
- mayicontinue
-
-
- # try to grab an item from UPLOAD_QUEUE_DIR
- ITEM=none
- while [[ "${ITEM}" = none ]]
- do
- possible_item=$( ls -1 "${UPLOAD_QUEUE_DIR}" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 )
- if test -n "${possible_item}"
- then
- echo "Trying to grab ${possible_item}"
- if mv "${UPLOAD_QUEUE_DIR}/${possible_item}" "${UPLOADER_WORKING_DIR}/"
- then
- ITEM="${possible_item}"
- else
- echo "Failed to move ${possible_item}"
- sleep 5
- fi
- else
- date
- echo "No current item found!"
- sleep 30
- exit 0
- fi
- done
-
-
- echo "$( date ): Start uploading for item ${ITEM}" >> uploader.log
-
- # upload megawarc
- size_hint=$( du --bytes -s "${UPLOADER_WORKING_DIR}/${ITEM}" | grep -oE "^[0-9]+" )
- # (upload the large files first to optimise S3 snowballing)
-
- find "${UPLOADER_WORKING_DIR}/${ITEM}" -type f -regextype posix-egrep -regex ".+\.megawarc\.(warc\.(gz|zst)|tar|json\.gz)$" -printf "%f\n" \
- | while read filename
- do
- result=1
- while [[ "${result}" -ne 0 ]]
- do
- curl -v --location --fail \
- --speed-limit 1 --speed-time 900 \
- --header "x-archive-queue-derive:1" \
- --header "x-amz-auto-make-bucket:1" \
- --header "x-archive-keep-old-version:1" \
- --header "x-archive-meta-collection:${IA_COLLECTION}" \
- --header "x-archive-meta-mediatype:web" \
- --header "x-archive-meta-title:${IA_ITEM_TITLE} ${ITEM}" \
- --header "x-archive-meta-date:${IA_ITEM_DATE}" \
- --header "x-archive-meta-language:eng" \
- --header "x-archive-meta-noarchivetorrent:true" \
- --header "x-archive-size-hint:${size_hint}" \
- --header "authorization: LOW ${IA_AUTH}" \
- --upload-file "${UPLOADER_WORKING_DIR}/${ITEM}/${filename}" \
- "https://s3.us.archive.org/${IA_ITEM_PREFIX}${ITEM}/${filename}" \
- > /dev/null
- result="${?}"
- if [[ "${result}" -ne 0 ]]
- then
- date
- echo "Error while uploading ${ITEM}, curl said ${result}"
- echo "Will retry in 30 seconds"
- sleep 30
- fi
- done
- done
-
- echo "Uploaded ${ITEM}"
-
- echo "$( date ): Completed uploading for item ${ITEM}" >> uploader.log
-
-
- mayicontinue
-
-
- # move or remove megawarc
- if [ -z "${COMPLETED_DIR}" ]
- then
- # remove
- rm -rf "${UPLOADER_WORKING_DIR}/${ITEM}"
- result="${?}"
-
- if [[ "${result}" -ne 0 ]]
- then
- date
- echo "rm -rf megawarc exited with ${result} for ${ITEM}"
- exit 1
- fi
- else
- # move
- mv "${UPLOADER_WORKING_DIR}/${ITEM}" "${COMPLETED_DIR}/"
- result="${?}"
-
- if [[ "${result}" -ne 0 ]]
- then
- date
- echo "rm -rf megawarc exited with ${result} for ${ITEM}"
- exit 1
- fi
- fi
-
- exit 0
-
|