The little things give you away... A collection of various small helper stuff
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

201 lines
7.0 KiB

  1. #!/bin/bash
  2. # Requires Bash 4.3+
  3. if [[ "$1" == '-h' || "$1" == '--help' ]]; then
  4. printf 'Usage: %q\n' "$0"
  5. printf 'Uploads files from each subdirectory of the PWD into an IA item using the dirname as the identifier\n'
  6. printf 'Configuration happens through a subdir/.dir-to-ia.config file, a Bash script that gets sourced to get the config values\n'
  7. printf "Use '%q --example-config' to get an example configuration with explanations\n" "$0"
  8. printf 'The upload log for each item gets written to subdir/.dir-to-ia.log\n'
  9. exit 1
  10. fi
  11. # Default config values
  12. sha256=no
  13. rm=no
  14. rmwait=yes
  15. clobber=no
  16. derive=no
  17. iaconfigfile=
  18. if [[ "$1" == '--example-config' ]]; then
  19. cat <<-EOF
  20. # Calculate SHA-256 hash of each file after uploading
  21. sha256=${sha256}
  22. # Remove local file after successful upload
  23. rm=${rm}
  24. # Delay removal until IA processed the upload
  25. rmwait=${rmwait}
  26. # Clobber existing files in IA item (no = existing copy is moved to history/files/ by IA)
  27. clobber=${clobber}
  28. # Queue derive after upload (applied after every file!)
  29. derive=${derive}
  30. # 'ia' config file (default: empty, meaning to use ia's own default paths and precedence rules)
  31. iaconfigfile=${iaconfigfile}
  32. # Item metadata (array with 'key:value' elements); the only mandatory variable with no default
  33. metadata=('collection:opensource' "date:$(printf '%(%Y-%m-%d)T')")
  34. EOF
  35. exit
  36. fi
  37. function tsprintf {
  38. datefmt='%(%Y-%m-%d %H:%M:%S)T';
  39. if [[ $# -eq 1 ]]; then
  40. printf "${datefmt} %s\n" -1 "$@";
  41. else
  42. # First argument is the format, rest are arguments to printf; pass through `while read` loop to have a better chance of line buffering.
  43. {
  44. printf '%(%Y-%m-%d %H:%M:%S)T ';
  45. printf "$1" "${@:2}";
  46. printf '\n';
  47. } | while IFS= read -r line; do printf '%s\n' "${line}"; done
  48. fi
  49. }
  50. if [[ "${DEBUG}" ]]; then
  51. function dbgprint { for l; do tsprintf "[DEBUG] ${l}" >&2; done; }
  52. else
  53. function dbgprint { :; }
  54. fi
  55. for cmd in ia-upload-stream ia-wait-item-tasks ia; do
  56. if ! command -v "${cmd}" &>/dev/null; then
  57. echo "Error: ${cmd} not found, make sure it is in PATH" >&2
  58. exit 1
  59. fi
  60. done
  61. while :; do
  62. for dir in */; do
  63. dir="${dir%/}"
  64. identifier="${dir}"
  65. if [[ ! "${dir}" =~ ^[a-zA-Z0-9] ]]; then
  66. continue
  67. fi
  68. if [[ ! -e "${dir}/.dir-to-ia.config" ]]; then
  69. continue
  70. fi
  71. dbgprint "Processing ${dir}"
  72. # Everything from here on is executed in a subshell so that the config sourcing can't affect other items.
  73. # It would be possible to do that without a subshell, but this is easier.
  74. (
  75. # Source and check configuration
  76. . "${dir}/.dir-to-ia.config" || { tsprintf "Sourcing ${dir}/.dir-to-ia.config failed" >&2; exit 1; }
  77. configbroken=
  78. for v in sha256 rm rmwait clobber derive; do
  79. if [[ "${!v}" != 'yes' && "${!v}" != 'no' ]]; then
  80. tsprintf "Error in %q: %s must be 'yes' or 'no'" "${dir}/.dir-to-ia.config" "${v}" >&2
  81. configbroken=yes
  82. fi
  83. done
  84. if read -r _ attrs _ < <(declare -p metadata 2>/dev/null); [[ "${attrs}" != *a* ]]; then
  85. tsprintf 'Error in %q: metadata missing or not an array' "${dir}/.dir-to-ia.config" >&2
  86. configbroken=yes
  87. else
  88. for f in "${metadata[@]}"; do
  89. if [[ "${f}" != *:* ]]; then
  90. tsprintf 'Error in %q: metadata field missing colon: %s' "${dir}/.dir-to-ia.config" "${f}" >&2
  91. configbroken=yes
  92. fi
  93. done
  94. fi
  95. if [[ "${configbroken}" ]]; then
  96. exit 1
  97. fi
  98. if [[ "${rm}" == 'no' && "${clobber}" == 'yes' ]]; then
  99. tsprintf 'Error in %q: rm=no and clobber=yes is not permitted' "${dir}/.dir-to-ia.config" >&2
  100. exit 1
  101. fi
  102. dbgprint 'Configuration:' \
  103. " sha256=${sha256}" \
  104. " rm=${rm}" \
  105. " rmwait=${rmwait}" \
  106. " clobber=${clobber}" \
  107. " derive=${derive}" \
  108. " iaconfigfile${iaconfigfile:+: }${iaconfigfile:- not set}" \
  109. " metadata=($(printf %q "${metadata[0]}")$(if [[ ${#metadata[@]} -gt 1 ]]; then printf ' %q' "${metadata[@]:1}"; fi))"
  110. # If removing local files is disabled, check first which ones are already on IA so they can be skipped.
  111. #TODO Do this only if there are files to upload
  112. if [[ "${rm}" == 'no' ]]; then
  113. dbgprint 'Retrieving existing files on IA...'
  114. #TODO Figure out a better way to verify correct retrieval than appending a placeholder.
  115. readarray -d $'\0' -t iafiles < <(curl --silent --location --max-time 10 --fail "https://archive.org/metadata/${identifier}" | python3 -c 'import json, sys; o = json.load(sys.stdin); {print(f["name"], end = "\0") for f in o.get("files", []) if not f["name"].startswith(sys.argv[1])}; print("__dir-to-ia_end__", end = "\0")' "${identifier}")
  116. if [[ "${#iafiles[@]}" -eq 0 || "${iafiles[-1]}" != '__dir-to-ia_end__' ]]; then
  117. tsprintf 'Error: could not retrieve metadata' >&2
  118. exit 1
  119. fi
  120. unset iafiles[-1]
  121. dbgprint "Existing files on IA:$(printf " %q" "${iafiles[@]}")"
  122. fi
  123. # Loop over local files and upload them
  124. while IFS= read -r -d $'\0' fn; do
  125. dbgprint "Considering file ${fn}"
  126. if [[ "${fn}" == .dir-to-ia.* ]]; then
  127. dbgprint "${fn} is a dir-to-ia file, skipping"
  128. continue
  129. fi
  130. if [[ ! -f "${dir}/${fn}" ]]; then
  131. # Should never happen since the `find` command already uses `-type f`
  132. dbgprint "${fn} is not a regular file, skipping"
  133. continue
  134. fi
  135. if [[ "${fn}" == "${identifier}"* ]]; then
  136. dbgprint "${fn} starts with the identifier, skipping"
  137. continue
  138. fi
  139. if [[ "${rm}" == 'no' ]]; then
  140. found=
  141. for remoteFn in "${iafiles[@]}"; do
  142. if [[ "${fn}" == "${remoteFn}" ]]; then
  143. found=yes
  144. break
  145. fi
  146. done
  147. if [[ "${found}" ]]; then
  148. dbgprint "${fn} found in IA item, skipping"
  149. continue
  150. fi
  151. fi
  152. tsprintf 'Uploading %q to %q...' "${fn}" "${identifier}" >&2
  153. cmd=('ia-upload-stream')
  154. if [[ "${clobber}" == 'yes' ]]; then cmd+=('--clobber'); fi
  155. if [[ "${derive}" == 'no' ]]; then cmd+=('--no-derive'); fi
  156. if [[ "${iaconfigfile}" ]]; then cmd+=('--ia-config-file' "${iaconfigfile}"); fi
  157. cmd+=("${identifier}")
  158. cmd+=("${fn}")
  159. cmd+=("${metadata[@]}")
  160. dbgprint "Upload command:$(printf " %q" "${cmd[@]}")"
  161. "${cmd[@]}" <"${dir}/${fn}" || { tsprintf "ia-upload-stream exited with status $?" >&2; exit 1; }
  162. if [[ "${sha256}" == 'yes' ]]; then
  163. tsprintf 'Calculating SHA-256...' >&2
  164. (cd "${dir}" && sha256sum "${fn}") || { tsprintf "sha256sum exited with status $?" >&2; exit 1; }
  165. fi
  166. if [[ "${rm}" == 'yes' ]]; then
  167. if [[ "${rmwait}" == 'yes' ]]; then
  168. tsprintf 'Waiting for IA to process the upload...' >&2
  169. ia-wait-item-tasks "${identifier}" || { tsprintf "ia-wait-item-tasks exited with status $?" >&2; exit 1; }
  170. fi
  171. tsprintf 'IA upload processing finished, removing %q' "${fn}" >&2
  172. # No option to run an extra SHA-1 check or similar since ia-upload-stream already sends an MD5 for each chunk, so corruption should be impossible.
  173. rm "${dir}/${fn}"
  174. fi
  175. done < <(cd "${dir}" && find . -type f -print0 | sed -z 's,^\./,,')
  176. ) &> >(tee -a "${dir}/.dir-to-ia.log" >&2)
  177. dbgprint "Done with ${dir}"
  178. done
  179. sleep 60
  180. done