|
|
@@ -3,10 +3,19 @@ |
|
|
|
set -euo pipefail |
|
|
|
|
|
|
|
export INCOMING="${INCOMING:-/data/incoming/}" |
|
|
|
export MOVER_WORKING_DIR="${MOVER_WORKING_DIR:-/data/mover-work/}" |
|
|
|
export UPLOAD_QUEUE="${UPLOAD_QUEUE:-/data/upload-queue/}" |
|
|
|
export MEGABYTES_PER_CHUNK="${MEGABYTES_PER_CHUNK:-$((1024*500))}" |
|
|
|
export BYTES_PER_CHUNK=$((1024*1024*MEGABYTES_PER_CHUNK)) |
|
|
|
|
|
|
|
mkdir -p "${MOVER_WORKING_DIR}" |
|
|
|
mkdir -p "${UPLOAD_QUEUE}" |
|
|
|
mkdir -p "${INCOMING}" |
|
|
|
|
|
|
|
while [[ 1 ]] ; do |
|
|
|
# find every .warc.gz in the rsync directory |
|
|
|
mkdir -p "${MOVER_WORKING_DIR}/current" |
|
|
|
export cur_size=$( du -B1 -s "${MOVER_WORKING_DIR}/current" | grep -oE "^[0-9]+" ) |
|
|
|
|
|
|
|
find "${INCOMING}" -type f -not -name ".*"\ |
|
|
|
| while read filename |
|
|
|
do |
|
|
@@ -15,9 +24,19 @@ while [[ 1 ]] ; do |
|
|
|
then |
|
|
|
continue |
|
|
|
fi |
|
|
|
|
|
|
|
echo "Moving ${filename}" |
|
|
|
mkdir -vp "${UPLOAD_QUEUE}" |
|
|
|
mv -v "${filename}" "${UPLOAD_QUEUE}/" |
|
|
|
export cur_size=$((cur_size + $( du -B1 -s "${filename}" | grep -oE "^[0-9]+" ))) |
|
|
|
mkdir -p "${MOVER_WORKING_DIR}/current" |
|
|
|
mv -v "${filename}" "${MOVER_WORKING_DIR}/current/" |
|
|
|
|
|
|
|
if [[ "${cur_size}" -gt "${BYTES_PER_CHUNK}" ]]; then |
|
|
|
timestamp=$( date +'%Y%m%d%H%M%S' ) |
|
|
|
uuid=$(cat /proc/sys/kernel/random/uuid | cut -d- -f1) |
|
|
|
echo "Current archive is full, moving to ${timestamp}_${uuid}." |
|
|
|
mv "${MOVER_WORKING_DIR}/current" "${UPLOAD_QUEUE}/${timestamp}_${uuid}" |
|
|
|
export cur_size=0 |
|
|
|
fi |
|
|
|
done |
|
|
|
|
|
|
|
echo "Sleeping 30 seconds..." |
|
|
|