Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

há 11 anos
há 11 anos
há 11 anos
há 11 anos
há 11 anos
há 11 anos
há 11 anos
há 11 anos
há 11 anos
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. #!/bin/bash
  2. # Move uploaded .warc.gz files to an archive directory.
  3. # When the archive is large enough, make a tar and start with a
  4. # new archive.
  5. #
  6. # Be careful: this script assumes that any file in the upload directory
  7. # that has a name that ends with *.warc.gz is a fully uploaded file and
  8. # can be moved somewhere else. Remember this when running Rsync.
  9. #
  10. INCOMING_UPLOADS_DIR="${1}" # /home/archiveteam/uploads
  11. CHUNKER_WORKING_DIR="${2}" # /home/archiveteam/processed
  12. PACKING_QUEUE_DIR="${CHUNKER_WORKING_DIR}/archive"
  13. MEGABYTES_PER_CHUNK=$((1024*25))
  14. # if not specified in command-line arguments
  15. if [ -z "${INCOMING_UPLOADS_DIR}" ]
  16. then
  17. source ./config.sh || exit 1
  18. fi
  19. BYTES_PER_CHUNK=$((1024*1024*MEGABYTES_PER_CHUNK))
  20. mkdir -p "${CHUNKER_WORKING_DIR}" || exit 1
  21. mkdir -p "${PACKING_QUEUE_DIR}" || exit 1
  22. mkdir -p "${CHUNKER_WORKING_DIR}/current" || exit 1
  23. cur_size=$( du -B1 -s "${CHUNKER_WORKING_DIR}/current" | grep -oE "^[0-9]+" )
  24. # find every .warc.gz in the upload directory
  25. find "${INCOMING_UPLOADS_DIR}" -type f -regex ".+\.warc\.\(gz\|zst\)$" \
  26. | while read filename
  27. do
  28. # skip partial uploads
  29. if [[ "${filename}" =~ rsync-tmp ]]
  30. then
  31. continue
  32. fi
  33. cur_size=$((cur_size + $( du -B1 -s "${filename}" | grep -oE "^[0-9]+" )))
  34. # move to the current/ directory
  35. echo "Moving ${filename}"
  36. mkdir -p "${CHUNKER_WORKING_DIR}/current"
  37. mv "${filename}" "${CHUNKER_WORKING_DIR}/current/"
  38. # if the current/ directory is large enough,
  39. # rename it to archive-XXXXX and start a new current/
  40. if [[ "${cur_size}" -gt "${BYTES_PER_CHUNK}" ]]
  41. then
  42. timestamp=$( date +'%Y%m%d%H%M%S' )
  43. uuid=$(cat /proc/sys/kernel/random/uuid | cut -d- -f1)
  44. echo "Current archive is full, moving to ${timestamp}_${uuid}."
  45. mv "${CHUNKER_WORKING_DIR}/current" "${PACKING_QUEUE_DIR}/${timestamp}_${uuid}"
  46. cur_size=0
  47. sleep 3
  48. fi
  49. done