From aedbabef21f3f024b153899e1e11084a80fa8d4e Mon Sep 17 00:00:00 2001 From: Roelf Wichertjes Date: Thu, 24 Mar 2022 16:02:10 +0100 Subject: [PATCH] Add uploader. --- Dockerfile | 1 + upload-single.sh | 33 +++++++++++++++++++++++++++++++++ uploader.sh | 18 ++++++++++++++++-- 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100755 upload-single.sh diff --git a/Dockerfile b/Dockerfile index 05bd0f1..93eccab 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,6 +18,7 @@ WORKDIR / COPY mover.sh /mover.sh COPY entrypoint.sh /entrypoint.sh COPY uploader.sh /uploader.sh +COPY upload-single.sh /upload-single.sh #ADD https://raw.githubusercontent.com/ArchiveTeam/ArchiveBot/master/pipeline/requirements.txt /requirements.txt #ADD https://raw.githubusercontent.com/ArchiveTeam/ArchiveBot/master/uploader/uploader.py /uploader.py diff --git a/upload-single.sh b/upload-single.sh new file mode 100755 index 0000000..a66c12c --- /dev/null +++ b/upload-single.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -euo pipefail + +if [[ ! -d "$1" ]] ; then + echo "Target directory $1 does not exist!" + exit +fi + +set -x + +cd "$1" +upload="$(basename "$(pwd)")" +export CUR_SIZE=$( du -B1 -s . | grep -oE "^[0-9]+" ) +export YEAR=`date +%Y` +echo ia upload "archiveteam_archivebot_go_${upload}" * \ + --sleep=120 \ + --retries=100 \ + --metadata="noarchivetorrent:true" \ + --metadata="size-hint:${CUR_SIZE}" \ + --metadata="mediatype:web" \ + --metadata="description:ArchiveBot is an Archive Team service to quickly grab smaller at-risk or critical sites to bring copies into the Internet Archive Wayback machine." \ + --metadata="title: Archiveteam: Archivebot GO Pack ${upload}" \ + --metadata="collection:archivebot" \ + --metadata="creator:Archive Team" \ + --metadata="date:${YEAR}" \ + --checksum \ + --delete + +# TEST SLEEP +sleep 6000 + +cd .. +echo rmdir "$upload" diff --git a/uploader.sh b/uploader.sh index 7ec7d2f..e025aae 100755 --- a/uploader.sh +++ b/uploader.sh @@ -1,2 +1,16 @@ -#!/bin/bash -set -exuo pipefail +#!/usr/bin/env bash +set -euo pipefail + +export UPLOAD_QUEUE="${UPLOAD_QUEUE:-/data/upload-queue/}" +mkdir -p "${UPLOAD_QUEUE}" + +cd "${UPLOAD_QUEUE}" +while [ ! -f ../STOP ] +do + echo "Starting new loop..." + for pack in */; do + echo "Trying claim ${pack}..." + flock --verbose -x -nb "$pack" /upload-single.sh "$pack" || echo "Could not claim pack!" + done + sleep 30 +done