You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

upload-one 3.3 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
10 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. #!/bin/bash
  2. # Uploads megawarcs from the upload queue.
  3. # (Needs a config.sh in the working directory.)
  4. #
  5. # ./upload-one
  6. #
  7. # 1. Grabs an item from UPLOAD_QUEUE_DIR
  8. # 2. Reserves the item by moving the directory to the
  9. # UPLOADER_WORKING_DIR
  10. # 3. Uploads the item to s3.us.archive.org
  11. # 4. Removes the source files from the working directory
  12. # If COMPLETED_DIR is set, uploaded files are moved there.
  13. #
  14. # The program exits with 1 on any nontransient error.
  15. #
  16. SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
  17. source ./config.sh || exit 1
  18. mkdir -p "$UPLOAD_QUEUE_DIR" || exit 1
  19. mkdir -p "$UPLOADER_WORKING_DIR" || exit 1
  20. if [ ! -z "$COMPLETED_DIR" ]
  21. then
  22. mkdir -p "$COMPLETED_DIR" || exit 1
  23. fi
  24. function mayicontinue {
  25. echo
  26. # echo "May I continue?"
  27. # read
  28. # echo
  29. }
  30. mayicontinue
  31. # try to grab an item from UPLOAD_QUEUE_DIR
  32. ITEM=none
  33. while [[ $ITEM = none ]]
  34. do
  35. possible_item=$( ls -1 "$UPLOAD_QUEUE_DIR" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 )
  36. if test -n "${possible_item}"
  37. then
  38. echo "Trying to grab $possible_item"
  39. if mv "$UPLOAD_QUEUE_DIR/$possible_item" "$UPLOADER_WORKING_DIR/"
  40. then
  41. ITEM=$possible_item
  42. else
  43. echo "Failed to move $possible_item"
  44. sleep 5
  45. fi
  46. else
  47. date
  48. echo "No current item found!"
  49. sleep 30
  50. exit 0
  51. fi
  52. done
  53. echo "$( date ): Start uploading for item $ITEM" >> uploader.log
  54. # upload megawarc
  55. size_hint=$( du --bytes -s "${UPLOADER_WORKING_DIR}/${ITEM}" | grep -oE "^[0-9]+" )
  56. # (upload the large files first to optimise S3 snowballing)
  57. for ext in warc.gz tar json.gz
  58. do
  59. test "${ext}" == "tar" && ! test -f "${FILE_PREFIX}${ITEM}.megawarc.${ext}" && continue # skip non-existing tar files
  60. result=1
  61. while [[ $result -ne 0 ]]
  62. do
  63. filename="${FILE_PREFIX}${ITEM}.megawarc.${ext}"
  64. curl -v --location --fail \
  65. --speed-limit 1 --speed-time 900 \
  66. --header "x-archive-queue-derive:1" \
  67. --header "x-amz-auto-make-bucket:1" \
  68. --header "x-archive-keep-old-version:1" \
  69. --header "x-archive-meta-collection:${IA_COLLECTION}" \
  70. --header "x-archive-meta-mediatype:web" \
  71. --header "x-archive-meta-title:${IA_ITEM_TITLE} ${ITEM}" \
  72. --header "x-archive-meta-date:${IA_ITEM_DATE}" \
  73. --header "x-archive-meta-language:eng" \
  74. --header "x-archive-meta-noarchivetorrent:true" \
  75. --header "x-archive-size-hint:$size_hint" \
  76. --header "authorization: LOW ${IA_AUTH}" \
  77. --upload-file "${UPLOADER_WORKING_DIR}/${ITEM}/${filename}" \
  78. "https://s3.us.archive.org/${IA_ITEM_PREFIX}${ITEM}/${filename}" \
  79. > /dev/null
  80. result=$?
  81. if [[ $result -ne 0 ]]
  82. then
  83. date
  84. echo "Error while uploading $ITEM, curl said $result"
  85. echo "Will retry in 30 seconds"
  86. sleep 30
  87. fi
  88. done
  89. done
  90. echo "Uploaded $ITEM"
  91. echo "$( date ): Completed uploading for item $ITEM" >> uploader.log
  92. mayicontinue
  93. # move or remove megawarc
  94. if [ -z "$COMPLETED_DIR" ]
  95. then
  96. # remove
  97. rm -rf "${UPLOADER_WORKING_DIR}/${ITEM}"
  98. result=$?
  99. if [[ $result -ne 0 ]]
  100. then
  101. date
  102. echo "rm -rf megawarc exited with $result for $ITEM"
  103. exit 1
  104. fi
  105. else
  106. # move
  107. mv "${UPLOADER_WORKING_DIR}/${ITEM}" "${COMPLETED_DIR}/"
  108. result=$?
  109. if [[ $result -ne 0 ]]
  110. then
  111. date
  112. echo "rm -rf megawarc exited with $result for $ITEM"
  113. exit 1
  114. fi
  115. fi
  116. exit 0