You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

122 lines
2.7 KiB

  1. #!/bin/bash
  2. # Uploads megawarcs from the upload queue.
  3. #
  4. # ./upload-one UPLOAD_QUEUE_DIR
  5. #
  6. # 1. Grabs an item from UPLOAD_QUEUE_DIR
  7. # 2. Reserves the item by moving the directory to the working directory
  8. # 3. Uploads the item to s3.us.archive.org
  9. # 4. Removes the source files from the working directory
  10. #
  11. # The program exits with 1 on any nontransient error.
  12. #
  13. # run from the upload directory /archiveteam/ssd1/uploader-1/
  14. #
  15. # ./upload-one /archiveteam/ssd1/upload-queue
  16. #
  17. #
  18. UPLOAD_QUEUE_DIR=$1
  19. SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
  20. if [ ! -f ./config.sh ] ; then
  21. echo "config.sh not found in current directory."
  22. exit 1
  23. fi
  24. source ./config.sh
  25. function mayicontinue {
  26. echo
  27. # echo "May I continue?"
  28. # read
  29. # echo
  30. }
  31. mayicontinue
  32. # try to grab an item from UPLOAD_QUEUE_DIR
  33. ITEM=none
  34. while [[ $ITEM = none ]]
  35. do
  36. possible_item=$( ls -1 $UPLOAD_QUEUE_DIR | grep 201 | sort | head -n 1 )
  37. if [[ $possible_item =~ 201 ]]
  38. then
  39. echo "Trying to grab $possible_item"
  40. if mv $UPLOAD_QUEUE_DIR/$possible_item .
  41. then
  42. ITEM=$possible_item
  43. else
  44. echo "Failed to move $possible_item"
  45. sleep 5
  46. fi
  47. else
  48. date
  49. echo "No current item found!"
  50. sleep 30
  51. exit 0
  52. fi
  53. done
  54. echo "$( date ): Start uploading for item $ITEM" >> uploader.log
  55. # upload megawarc
  56. # (upload the large files first to optimise S3 snowballing)
  57. for ext in warc.gz tar json.gz
  58. do
  59. result=1
  60. while [[ $result -ne 0 ]]
  61. do
  62. filename=${FILE_PREFIX}${ITEM}.megawarc.${ext}
  63. size_hint=$( du --bytes -s ${ITEM}/${filename} | grep -oE "^[0-9]+" )
  64. curl -v --location --fail \
  65. --speed-limit 1 --speed-time 900 \
  66. --header "x-archive-queue-derive:1" \
  67. --header "x-amz-auto-make-bucket:1" \
  68. --header "x-archive-meta-collection:${IA_COLLECTION}" \
  69. --header "x-archive-meta-mediatype:web" \
  70. --header "x-archive-meta-title:${IA_ITEM_TITLE} ${ITEM}" \
  71. --header "x-archive-meta-date:${IA_ITEM_DATE}" \
  72. --header "x-archive-meta-language:eng" \
  73. --header "x-archive-size-hint:$size_hint" \
  74. --header "authorization: LOW ${IA_AUTH}" \
  75. --upload-file ${ITEM}/${filename} \
  76. http://s3.us.archive.org/${IA_ITEM_PREFIX}${ITEM}/${filename} \
  77. > /dev/null
  78. result=$?
  79. if [[ $result -ne 0 ]]
  80. then
  81. date
  82. echo "Error while uploading $ITEM, curl said $result"
  83. echo "Will retry in 30 seconds"
  84. sleep 30
  85. fi
  86. done
  87. done
  88. echo "Uploaded $ITEM"
  89. echo "$( date ): Completed uploading for item $ITEM" >> uploader.log
  90. mayicontinue
  91. # remove megawarc
  92. rm -rf ${ITEM}
  93. result=$?
  94. if [[ $result -ne 0 ]]
  95. then
  96. date
  97. echo "rm -rf megawarc exited with $result for $ITEM"
  98. exit 1
  99. fi
  100. exit 0