Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 

138 linhas
3.3 KiB

  1. #!/bin/bash
  2. # Uploads megawarcs from the upload queue.
  3. # (Needs a config.sh in the working directory.)
  4. #
  5. # ./upload-one
  6. #
  7. # 1. Grabs an item from UPLOAD_QUEUE_DIR
  8. # 2. Reserves the item by moving the directory to the
  9. # UPLOADER_WORKING_DIR
  10. # 3. Uploads the item to s3.us.archive.org
  11. # 4. Removes the source files from the working directory
  12. # If COMPLETED_DIR is set, uploaded files are moved there.
  13. #
  14. # The program exits with 1 on any nontransient error.
  15. #
  16. SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
  17. source ./config.sh || exit 1
  18. mkdir -p "${UPLOAD_QUEUE_DIR}" || exit 1
  19. mkdir -p "${UPLOADER_WORKING_DIR}" || exit 1
  20. if [ ! -z "${COMPLETED_DIR}" ]
  21. then
  22. mkdir -p "${COMPLETED_DIR}" || exit 1
  23. fi
  24. function mayicontinue {
  25. echo
  26. # echo "May I continue?"
  27. # read
  28. # echo
  29. }
  30. mayicontinue
  31. # try to grab an item from UPLOAD_QUEUE_DIR
  32. ITEM=none
  33. while [[ "${ITEM}" = none ]]
  34. do
  35. possible_item=$( ls -1 "${UPLOAD_QUEUE_DIR}" | grep -E '[0-9]{14}_[a-f0-9]{8}$' | sort | head -n 1 )
  36. if test -n "${possible_item}"
  37. then
  38. echo "Trying to grab ${possible_item}"
  39. if mv "${UPLOAD_QUEUE_DIR}/${possible_item}" "${UPLOADER_WORKING_DIR}/"
  40. then
  41. ITEM="${possible_item}"
  42. else
  43. echo "Failed to move ${possible_item}"
  44. sleep 5
  45. fi
  46. else
  47. date
  48. echo "No current item found!"
  49. sleep 30
  50. exit 0
  51. fi
  52. done
  53. echo "$( date ): Start uploading for item ${ITEM}" >> uploader.log
  54. # upload megawarc
  55. size_hint=$( du --bytes -s "${UPLOADER_WORKING_DIR}/${ITEM}" | grep -oE "^[0-9]+" )
  56. # (upload the large files first to optimise S3 snowballing)
  57. find "${UPLOADER_WORKING_DIR}/${ITEM}" -type f -regextype posix-egrep -regex ".+\.megawarc\.(warc\.(gz|zst)|tar|json\.gz)$" -printf "%f\n" \
  58. | while read filename
  59. do
  60. result=1
  61. while [[ "${result}" -ne 0 ]]
  62. do
  63. curl -v --location --fail \
  64. --speed-limit 1 --speed-time 900 \
  65. --header "x-archive-queue-derive:1" \
  66. --header "x-amz-auto-make-bucket:1" \
  67. --header "x-archive-keep-old-version:1" \
  68. --header "x-archive-meta-collection:${IA_COLLECTION}" \
  69. --header "x-archive-meta-mediatype:web" \
  70. --header "x-archive-meta-title:${IA_ITEM_TITLE} ${ITEM}" \
  71. --header "x-archive-meta-date:${IA_ITEM_DATE}" \
  72. --header "x-archive-meta-language:eng" \
  73. --header "x-archive-meta-noarchivetorrent:true" \
  74. --header "x-archive-size-hint:${size_hint}" \
  75. --header "authorization: LOW ${IA_AUTH}" \
  76. --upload-file "${UPLOADER_WORKING_DIR}/${ITEM}/${filename}" \
  77. "https://s3.us.archive.org/${IA_ITEM_PREFIX}${ITEM}/${filename}" \
  78. > /dev/null
  79. result="${?}"
  80. if [[ "${result}" -ne 0 ]]
  81. then
  82. date
  83. echo "Error while uploading ${ITEM}, curl said ${result}"
  84. echo "Will retry in 30 seconds"
  85. sleep 30
  86. fi
  87. done
  88. done
  89. echo "Uploaded ${ITEM}"
  90. echo "$( date ): Completed uploading for item ${ITEM}" >> uploader.log
  91. mayicontinue
  92. # move or remove megawarc
  93. if [ -z "${COMPLETED_DIR}" ]
  94. then
  95. # remove
  96. rm -rf "${UPLOADER_WORKING_DIR}/${ITEM}"
  97. result="${?}"
  98. if [[ "${result}" -ne 0 ]]
  99. then
  100. date
  101. echo "rm -rf megawarc exited with ${result} for ${ITEM}"
  102. exit 1
  103. fi
  104. else
  105. # move
  106. mv "${UPLOADER_WORKING_DIR}/${ITEM}" "${COMPLETED_DIR}/"
  107. result="${?}"
  108. if [[ "${result}" -ne 0 ]]
  109. then
  110. date
  111. echo "rm -rf megawarc exited with ${result} for ${ITEM}"
  112. exit 1
  113. fi
  114. fi
  115. exit 0