25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.

11 yıl önce
11 yıl önce
11 yıl önce
11 yıl önce
11 yıl önce
11 yıl önce
11 yıl önce
11 yıl önce
11 yıl önce
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. #!/bin/bash
  2. # Move uploaded .warc.gz files to an archive directory.
  3. # When the archive is large enough, make a tar and start with a
  4. # new archive.
  5. #
  6. # Be careful: this script assumes that any file in the upload directory
  7. # that has a name that ends with *.warc.gz is a fully uploaded file and
  8. # can be moved somewhere else. Remember this when running Rsync.
  9. #
  10. INCOMING_UPLOADS_DIR="${1}" # /home/archiveteam/uploads
  11. CHUNKER_WORKING_DIR="${2}" # /home/archiveteam/processed
  12. PACKING_QUEUE_DIR="${CHUNKER_WORKING_DIR}/archive"
  13. MEGABYTES_PER_CHUNK=$((1024*25))
  14. # if not specified in command-line arguments
  15. if [ -z "${INCOMING_UPLOADS_DIR}" ]
  16. then
  17. source ./config.sh || exit 1
  18. fi
  19. BYTES_PER_CHUNK=$((1024*1024*MEGABYTES_PER_CHUNK))
  20. mkdir -p "${CHUNKER_WORKING_DIR}" || exit 1
  21. mkdir -p "${PACKING_QUEUE_DIR}" || exit 1
  22. mkdir -p "${CHUNKER_WORKING_DIR}/current" || exit 1
  23. cur_size=$( du -B1 -s "${CHUNKER_WORKING_DIR}/current" | grep -oE "^[0-9]+" )
  24. # find every .warc.gz in the upload directory
  25. find "${INCOMING_UPLOADS_DIR}" -type f -regex ".+\.warc\.\(gz\|zst\)$" \
  26. | while read filename
  27. do
  28. # skip partial uploads
  29. if [[ "${filename}" =~ rsync-tmp ]]
  30. then
  31. continue
  32. fi
  33. cur_size=$((cur_size + $( du -B1 -s "${filename}" | grep -oE "^[0-9]+" )))
  34. # move to the current/ directory
  35. echo "Moving ${filename}"
  36. mkdir -p "${CHUNKER_WORKING_DIR}/current"
  37. mv "${filename}" "${CHUNKER_WORKING_DIR}/current/"
  38. # if the current/ directory is large enough,
  39. # rename it to archive-XXXXX and start a new current/
  40. if [[ "${cur_size}" -gt "${BYTES_PER_CHUNK}" ]]
  41. then
  42. timestamp=$( date +'%Y%m%d%H%M%S' )
  43. uuid=$(cat /proc/sys/kernel/random/uuid | cut -d- -f1)
  44. echo "Current archive is full, moving to ${timestamp}_${uuid}."
  45. mv "${CHUNKER_WORKING_DIR}/current" "${PACKING_QUEUE_DIR}/${timestamp}_${uuid}"
  46. cur_size=0
  47. sleep 3
  48. fi
  49. done