|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- #!/bin/bash
- # Uploads megawarcs from the upload queue.
- # (Needs a config.sh in the working directory.)
- #
- # ./upload-one
- #
- # 1. Grabs an item from UPLOAD_QUEUE_DIR
- # 2. Reserves the item by moving the directory to the
- # UPLOADER_WORKING_DIR
- # 3. Uploads the item to s3.us.archive.org
- # 4. Removes the source files from the working directory
- # If COMPLETED_DIR is set, uploaded files are moved there.
- #
- # The program exits with 1 on any nontransient error.
- #
-
- SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
- source ./config.sh || exit 1
-
- mkdir -p "$UPLOAD_QUEUE_DIR" || exit 1
- mkdir -p "$UPLOADER_WORKING_DIR" || exit 1
-
- if [ ! -z "$COMPLETED_DIR" ]
- then
- mkdir -p "$COMPLETED_DIR" || exit 1
- fi
-
- function mayicontinue {
- echo
- # echo "May I continue?"
- # read
- # echo
- }
-
- mayicontinue
-
-
- # try to grab an item from UPLOAD_QUEUE_DIR
- ITEM=none
- while [[ $ITEM = none ]]
- do
- possible_item=$( ls -1 "$UPLOAD_QUEUE_DIR" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 )
- if test -n "${possible_item}"
- then
- echo "Trying to grab $possible_item"
- if mv "$UPLOAD_QUEUE_DIR/$possible_item" "$UPLOADER_WORKING_DIR/"
- then
- ITEM=$possible_item
- else
- echo "Failed to move $possible_item"
- sleep 5
- fi
- else
- date
- echo "No current item found!"
- sleep 30
- exit 0
- fi
- done
-
-
- echo "$( date ): Start uploading for item $ITEM" >> uploader.log
-
- # upload megawarc
- size_hint=$( du --bytes -s "${UPLOADER_WORKING_DIR}/${ITEM}" | grep -oE "^[0-9]+" )
- # (upload the large files first to optimise S3 snowballing)
- for ext in warc.gz tar json.gz
- do
- test "${ext}" == "tar" && ! test -f "${FILE_PREFIX}${ITEM}.megawarc.${ext}" && continue # skip non-existing tar files
- result=1
- while [[ $result -ne 0 ]]
- do
- filename="${FILE_PREFIX}${ITEM}.megawarc.${ext}"
- curl -v --location --fail \
- --speed-limit 1 --speed-time 900 \
- --header "x-archive-queue-derive:1" \
- --header "x-amz-auto-make-bucket:1" \
- --header "x-archive-keep-old-version:1" \
- --header "x-archive-meta-collection:${IA_COLLECTION}" \
- --header "x-archive-meta-mediatype:web" \
- --header "x-archive-meta-title:${IA_ITEM_TITLE} ${ITEM}" \
- --header "x-archive-meta-date:${IA_ITEM_DATE}" \
- --header "x-archive-meta-language:eng" \
- --header "x-archive-meta-notorrent:true" \
- --header "x-archive-size-hint:$size_hint" \
- --header "authorization: LOW ${IA_AUTH}" \
- --upload-file "${UPLOADER_WORKING_DIR}/${ITEM}/${filename}" \
- "https://s3.us.archive.org/${IA_ITEM_PREFIX}${ITEM}/${filename}" \
- > /dev/null
- result=$?
- if [[ $result -ne 0 ]]
- then
- date
- echo "Error while uploading $ITEM, curl said $result"
- echo "Will retry in 30 seconds"
- sleep 30
- fi
- done
- done
-
- echo "Uploaded $ITEM"
-
- echo "$( date ): Completed uploading for item $ITEM" >> uploader.log
-
-
- mayicontinue
-
-
- # move or remove megawarc
- if [ -z "$COMPLETED_DIR" ]
- then
- # remove
- rm -rf "${UPLOADER_WORKING_DIR}/${ITEM}"
- result=$?
-
- if [[ $result -ne 0 ]]
- then
- date
- echo "rm -rf megawarc exited with $result for $ITEM"
- exit 1
- fi
- else
- # move
- mv "${UPLOADER_WORKING_DIR}/${ITEM}" "${COMPLETED_DIR}/"
- result=$?
-
- if [[ $result -ne 0 ]]
- then
- date
- echo "rm -rf megawarc exited with $result for $ITEM"
- exit 1
- fi
- fi
-
- exit 0
-
|