|
- #!/bin/bash
- # Uploads megawarcs from the upload queue.
- #
- # ./upload-one UPLOAD_QUEUE_DIR
- #
- # 1. Grabs an item from UPLOAD_QUEUE_DIR
- # 2. Reserves the item by moving the directory to the working directory
- # 3. Uploads the item to s3.us.archive.org
- # 4. Removes the source files from the working directory
- #
- # The program exits with 1 on any nontransient error.
- #
- # run from the upload directory /archiveteam/ssd1/uploader-1/
- #
- # ./upload-one /archiveteam/ssd1/upload-queue
- #
- #
-
- UPLOAD_QUEUE_DIR=$1
-
- SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
- if [ ! -f ./config.sh ] ; then
- echo "config.sh not found in current directory."
- exit 1
- fi
- source ./config.sh
-
-
- function mayicontinue {
- echo
- # echo "May I continue?"
- # read
- # echo
- }
-
- mayicontinue
-
-
- # try to grab an item from UPLOAD_QUEUE_DIR
- ITEM=none
- while [[ $ITEM = none ]]
- do
- possible_item=$( ls -1 $UPLOAD_QUEUE_DIR | grep 201 | sort | head -n 1 )
- if [[ $possible_item =~ 201 ]]
- then
- echo "Trying to grab $possible_item"
- if mv $UPLOAD_QUEUE_DIR/$possible_item .
- then
- ITEM=$possible_item
- else
- echo "Failed to move $possible_item"
- sleep 5
- fi
- else
- date
- echo "No current item found!"
- sleep 30
- exit 0
- fi
- done
-
-
- echo "$( date ): Start uploading for item $ITEM" >> uploader.log
-
- # upload megawarc
- # (upload the large files first to optimise S3 snowballing)
- for ext in warc.gz tar json.gz
- do
- result=1
- while [[ $result -ne 0 ]]
- do
- filename=${FILE_PREFIX}${ITEM}.megawarc.${ext}
- size_hint=$( du --bytes -s ${ITEM}/${filename} | grep -oE "^[0-9]+" )
- curl -v --location --fail \
- --speed-limit 1 --speed-time 900 \
- --header "x-archive-queue-derive:1" \
- --header "x-amz-auto-make-bucket:1" \
- --header "x-archive-meta-collection:${IA_COLLECTION}" \
- --header "x-archive-meta-mediatype:web" \
- --header "x-archive-meta-title:${IA_ITEM_TITLE} ${ITEM}" \
- --header "x-archive-meta-date:${IA_ITEM_DATE}" \
- --header "x-archive-meta-language:eng" \
- --header "x-archive-size-hint:$size_hint" \
- --header "authorization: LOW ${IA_AUTH}" \
- --upload-file ${ITEM}/${filename} \
- http://s3.us.archive.org/${IA_ITEM_PREFIX}${ITEM}/${filename} \
- > /dev/null
- result=$?
- if [[ $result -ne 0 ]]
- then
- date
- echo "Error while uploading $ITEM, curl said $result"
- echo "Will retry in 30 seconds"
- sleep 30
- fi
- done
- done
-
- echo "Uploaded $ITEM"
-
- echo "$( date ): Completed uploading for item $ITEM" >> uploader.log
-
-
- mayicontinue
-
-
- # remove megawarc
- rm -rf ${ITEM}
- result=$?
-
- if [[ $result -ne 0 ]]
- then
- date
- echo "rm -rf megawarc exited with $result for $ITEM"
- exit 1
- fi
-
-
- exit 0
|