|
|
@@ -0,0 +1,78 @@ |
|
|
|
#!/usr/bin/env zsh |
|
|
|
|
|
|
|
set -eu -o pipefail |
|
|
|
|
|
|
|
# Dependencies: |
|
|
|
# - apt install zsh ripgrep rclone curl jq |
|
|
|
# - exastash `es` in PATH |
|
|
|
# - environmental variables for exastash including EXASTASH_NO_GDRIVE=1 |
|
|
|
# - download-scripts `path-to-eye-path` in PATH |
|
|
|
|
|
|
|
# provide as stdin the output of e.g. |
|
|
|
# es x find -t f -- ~/stash/YouTube | take-wanted-ids ~/dead-video-ids |
|
|
|
|
|
|
|
# The temporary files will land in ~/stash/YouTube which should be on your SSDs |
|
|
|
# The finished files will be moved to: |
|
|
|
complete_dir=$1 |
|
|
|
|
|
|
|
# |
|
|
|
|
|
|
|
# Enable job control for wait-until-jobs-below |
|
|
|
set -m |
|
|
|
|
|
|
|
# Wait until the number of shell jobs drops below a limit. |
|
|
|
# This can be used as an alternative to parallel using |
|
|
|
# for i in ...; do something $i &; wait-until-jobs-below 8; done |
|
|
|
wait-until-jobs-below() { |
|
|
|
while test $(jobs | wc -l) -ge "$1"; do |
|
|
|
sleep 0.1 |
|
|
|
done |
|
|
|
} |
|
|
|
|
|
|
|
mkdir -p ~/eye-control |
|
|
|
cat ~/eye-control/download-parallelism || echo -n 50 > ~/eye-control/download-parallelism |
|
|
|
|
|
|
|
mkdir -p -- "$complete_dir" |
|
|
|
|
|
|
|
while read i; do |
|
|
|
if [[ -f ~/eye-control/stop ]]; then |
|
|
|
echo "exiting because stop file is present" |
|
|
|
exit 1 |
|
|
|
fi |
|
|
|
|
|
|
|
echo -E $i |
|
|
|
|
|
|
|
eye_path=$(path-to-eye-path "$i" | sed -r "s,$HOME/stash/,,g") |
|
|
|
complete_eye_dir="$(dirname -- "$complete_dir/$eye_path")" |
|
|
|
info=$(es x info -- "$i") |
|
|
|
file_id=$(echo -E "$info" | jq .id) |
|
|
|
size=$(echo -E "$info" | jq .size) |
|
|
|
|
|
|
|
# If already in namedfiles i.e. the-eye, skip |
|
|
|
echo -E "$info" | rg -q -F '"type": "namedfiles"' && continue || true |
|
|
|
|
|
|
|
is_video=0 |
|
|
|
echo -nE "$i" | rg -q '\.(mp4|webm|flv|video)$' && is_video=1 || true |
|
|
|
|
|
|
|
the_eye_video_bytes_saved=0 |
|
|
|
the_eye_video_files_saved=0 |
|
|
|
if [[ $is_video -eq 1 ]]; then |
|
|
|
the_eye_video_bytes_saved=$size |
|
|
|
the_eye_video_files_saved=1 |
|
|
|
fi |
|
|
|
|
|
|
|
# Ideally, we would created a `namedfiles` entity only after it's in ceph, |
|
|
|
# but this is okay: we can later recreate all the `namedfiles` by scanning |
|
|
|
# all of the files in the ceph directory. |
|
|
|
( |
|
|
|
ionice -c 3 nice es x get -s -- "$i" && \ |
|
|
|
curl --fail-with-body -u "ya: ." -X POST -d "{\"file_id\": $file_id, \"location\": \"the-eye\", \"pathname\": \"$eye_path\"}" https://ya.borg.xyz/cgi-bin/new-namedfiles && \ |
|
|
|
curl -X POST -d "{\"the_eye_any_bytes_saved\": $size, \"the_eye_video_bytes_saved\": $the_eye_video_bytes_saved, \"the_eye_any_files_saved\": 1, \"the_eye_video_files_saved\": $the_eye_video_files_saved}" "http://eye.borg.xyz:31416/metrics" && \ |
|
|
|
mkdir -p -- "$complete_eye_dir" && \ |
|
|
|
mv -- "$i" "$complete_eye_dir"/ || true |
|
|
|
) & |
|
|
|
|
|
|
|
wait-until-jobs-below $(cat ~/eye-control/download-parallelism) |
|
|
|
done |
|
|
|
|
|
|
|
wait |