#!/bin/bash # Uploads megawarcs from the upload queue. # # ./upload-one UPLOAD_QUEUE_DIR # # 1. Grabs an item from UPLOAD_QUEUE_DIR # 2. Reserves the item by moving the directory to the working directory # 3. Uploads the item to s3.us.archive.org # 4. Removes the source files from the working directory # # The program exits with 1 on any nontransient error. # # run from the upload directory /archiveteam/ssd1/uploader-1/ # # ./upload-one /archiveteam/ssd1/upload-queue # # UPLOAD_QUEUE_DIR=$1 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" if [ ! -f ./config.sh ] ; then echo "config.sh not found in current directory." exit 1 fi source ./config.sh function mayicontinue { echo # echo "May I continue?" # read # echo } mayicontinue # try to grab an item from UPLOAD_QUEUE_DIR ITEM=none while [[ $ITEM = none ]] do possible_item=$( ls -1 $UPLOAD_QUEUE_DIR | grep 201 | sort | head -n 1 ) if [[ $possible_item =~ 201 ]] then echo "Trying to grab $possible_item" if mv $UPLOAD_QUEUE_DIR/$possible_item . then ITEM=$possible_item else echo "Failed to move $possible_item" sleep 5 fi else date echo "No current item found!" sleep 30 exit 0 fi done echo "$( date ): Start uploading for item $ITEM" >> uploader.log # upload megawarc # (upload the large files first to optimise S3 snowballing) for ext in warc.gz tar json.gz do result=1 while [[ $result -ne 0 ]] do filename=${FILE_PREFIX}${ITEM}.megawarc.${ext} size_hint=$( du --bytes -s ${ITEM}/${filename} | grep -oE "^[0-9]+" ) curl -v --location --fail \ --speed-limit 1 --speed-time 900 \ --header "x-archive-queue-derive:1" \ --header "x-amz-auto-make-bucket:1" \ --header "x-archive-meta-collection:${IA_COLLECTION}" \ --header "x-archive-meta-mediatype:web" \ --header "x-archive-meta-title:${IA_ITEM_TITLE} ${ITEM}" \ --header "x-archive-meta-date:${IA_ITEM_DATE}" \ --header "x-archive-meta-language:eng" \ --header "x-archive-size-hint:$size_hint" \ --header "authorization: LOW ${IA_AUTH}" \ --upload-file ${ITEM}/${filename} \ http://s3.us.archive.org/${IA_ITEM_PREFIX}${ITEM}/${filename} \ > /dev/null result=$? if [[ $result -ne 0 ]] then date echo "Error while uploading $ITEM, curl said $result" echo "Will retry in 30 seconds" sleep 30 fi done done echo "Uploaded $ITEM" echo "$( date ): Completed uploading for item $ITEM" >> uploader.log mayicontinue # remove megawarc rm -rf ${ITEM} result=$? if [[ $result -ne 0 ]] then date echo "rm -rf megawarc exited with $result for $ITEM" exit 1 fi exit 0