#!/bin/bash # Uploads megawarcs from the upload queue. # (Needs a config.sh in the working directory.) # # ./upload-one # # 1. Grabs an item from UPLOAD_QUEUE_DIR # 2. Reserves the item by moving the directory to the # UPLOADER_WORKING_DIR # 3. Uploads the item to s3.us.archive.org # 4. Removes the source files from the working directory # If COMPLETED_DIR is set, uploaded files are moved there. # # The program exits with 1 on any nontransient error. # SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" source ./config.sh || exit 1 mkdir -p "${UPLOAD_QUEUE_DIR}" || exit 1 mkdir -p "${UPLOADER_WORKING_DIR}" || exit 1 if [ ! -z "${COMPLETED_DIR}" ] then mkdir -p "${COMPLETED_DIR}" || exit 1 fi function mayicontinue { echo # echo "May I continue?" # read # echo } mayicontinue # try to grab an item from UPLOAD_QUEUE_DIR ITEM=none while [[ "${ITEM}" = none ]] do possible_item=$( ls -1 "${UPLOAD_QUEUE_DIR}" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 ) if test -n "${possible_item}" then echo "Trying to grab ${possible_item}" if mv "${UPLOAD_QUEUE_DIR}/${possible_item}" "${UPLOADER_WORKING_DIR}/" then ITEM="${possible_item}" else echo "Failed to move ${possible_item}" sleep 5 fi else date echo "No current item found!" sleep 30 exit 0 fi done echo "$( date ): Start uploading for item ${ITEM}" >> uploader.log # upload megawarc size_hint=$( du --bytes -s "${UPLOADER_WORKING_DIR}/${ITEM}" | grep -oE "^[0-9]+" ) # (upload the large files first to optimise S3 snowballing) find "${UPLOADER_WORKING_DIR}/${ITEM}" -type f -regextype posix-egrep -regex ".+\.megawarc\.(warc\.(gz|zst)|tar|json\.gz)$" -printf "%f\n" \ | while read filename do result=1 while [[ "${result}" -ne 0 ]] do curl -v --location --fail \ --speed-limit 1 --speed-time 900 \ --header "x-archive-queue-derive:1" \ --header "x-amz-auto-make-bucket:1" \ --header "x-archive-keep-old-version:1" \ --header "x-archive-meta-collection:${IA_COLLECTION}" \ --header "x-archive-meta-mediatype:web" \ --header "x-archive-meta-title:${IA_ITEM_TITLE} ${ITEM}" \ --header "x-archive-meta-date:${IA_ITEM_DATE}" \ --header "x-archive-meta-language:eng" \ --header "x-archive-meta-noarchivetorrent:true" \ --header "x-archive-size-hint:${size_hint}" \ --header "authorization: LOW ${IA_AUTH}" \ --upload-file "${UPLOADER_WORKING_DIR}/${ITEM}/${filename}" \ "https://s3.us.archive.org/${IA_ITEM_PREFIX}${ITEM}/${filename}" \ > /dev/null result="${?}" if [[ "${result}" -ne 0 ]] then date echo "Error while uploading ${ITEM}, curl said ${result}" echo "Will retry in 30 seconds" sleep 30 fi done done echo "Uploaded ${ITEM}" echo "$( date ): Completed uploading for item ${ITEM}" >> uploader.log mayicontinue # move or remove megawarc if [ -z "${COMPLETED_DIR}" ] then # remove rm -rf "${UPLOADER_WORKING_DIR}/${ITEM}" result="${?}" if [[ "${result}" -ne 0 ]] then date echo "rm -rf megawarc exited with ${result} for ${ITEM}" exit 1 fi else # move mv "${UPLOADER_WORKING_DIR}/${ITEM}" "${COMPLETED_DIR}/" result="${?}" if [[ "${result}" -ne 0 ]] then date echo "rm -rf megawarc exited with ${result} for ${ITEM}" exit 1 fi fi exit 0