Browse Source

Refactor and compare file list as well

master
JustAnotherArchivist 3 years ago
parent
commit
6512669cfd
1 changed files with 41 additions and 1 deletions
  1. +41
    -1
      iasha1check

+ 41
- 1
iasha1check View File

@@ -1,3 +1,43 @@
#!/bin/bash
# Fetch the SHA-1 hashes from an IA item and ensure that they match the local files (i.e. that the upload was successful)
identifier="$1"; escapedIdentifier="$(sed 's/[.[\*^$()+?{|]/\\&/g' <<<"${identifier}")"; sha1sum -c <(curl -sL "https://archive.org/download/${identifier}/${identifier}_files.xml" | tr -d '\n' | grep -Po '<file .*?</file>' | grep 'source="original".*<sha1>' | sed 's,^.*name=",,; s,".*<sha1>, ,; s,</sha1>.*$,,' | grep -Pv "^${escapedIdentifier}"'(\.cdx\.(gz|idx)|_meta\.(sqlite|xml)) ' | awk '{ print $2 " " $1 }');

identifier="$1"
escapedIdentifier="$(sed 's/[.[\*^$()+?{|]/\\&/g' <<<"${identifier}")"
readarray -t iasha1sums < <(curl -sL "https://archive.org/download/${identifier}/${identifier}_files.xml" | tr -d '\n' | grep -Po '<file .*?</file>' | grep 'source="original".*<sha1>' | sed 's,^.*name=",,; s,".*<sha1>, ,; s,</sha1>.*$,,' | grep -Pv "^${escapedIdentifier}"'(\.cdx\.(gz|idx)|_meta\.(sqlite|xml)) ' | awk '{ print $2 " " $1 }')

localFiles=()
while IFS= read -r -d $'\0' f; do localFiles+=("${f:2}"); done < <(find . -type f -print0)
readarray -t iaFiles < <(printf "%s\n" "${iasha1sums[@]}" | sed 's,^.\{40\} ,,')
readarray -t localFilesSorted < <(printf "%s\n" "${localFiles[@]}" | sort)
readarray -t iaFilesSorted < <(printf "%s\n" "${iaFiles[@]}" | sort)
readarray -t localMissing < <(comm -13 <(printf "%s\n" "${localFilesSorted[@]}") <(printf "%s\n" "${iaFilesSorted[@]}"))
readarray -t iaMissing < <(comm -23 <(printf "%s\n" "${localFilesSorted[@]}") <(printf "%s\n" "${iaFilesSorted[@]}"))

status=0

if [[ ${#localMissing[@]} -eq 0 && ${#iaMissing[@]} -eq 0 ]]
then
echo "File list comparison: OK"
fi
if [[ ${#iaMissing[@]} -gt 0 ]]
then
echo "Local files that are not in the IA item:"
printf " %s\n" "${iaMissing[@]}"
status=1
fi
if [[ ${#localMissing[@]} -gt 0 ]]
then
echo "IA item files that are not in the local directory:"
printf " %s\n" "${localMissing[@]}"
status=1
fi

echo "SHA-1 comparison:"
sha1sum -c < <(printf "%s\n" "${iasha1sums[@]}") > >(sed 's,^, ,') 2>&1
if [[ $? -ne 0 ]]
then
echo "SHA-1 comparison failed!"
status=1
fi

exit ${status}

Loading…
Cancel
Save