The little things give you away... A collection of various small helper stuff
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 
 

201 lignes
7.0 KiB

  1. #!/bin/bash
  2. # Requires Bash 4.3+
  3. if [[ "$1" == '-h' || "$1" == '--help' ]]; then
  4. printf 'Usage: %q\n' "$0"
  5. printf 'Uploads files from each subdirectory of the PWD into an IA item using the dirname as the identifier\n'
  6. printf 'Configuration happens through a subdir/.dir-to-ia.config file, a Bash script that gets sourced to get the config values\n'
  7. printf "Use '%q --example-config' to get an example configuration with explanations\n" "$0"
  8. printf 'The upload log for each item gets written to subdir/.dir-to-ia.log\n'
  9. exit 1
  10. fi
  11. # Default config values
  12. sha256=no
  13. rm=no
  14. rmwait=yes
  15. clobber=no
  16. derive=no
  17. iaconfigfile=
  18. if [[ "$1" == '--example-config' ]]; then
  19. cat <<-EOF
  20. # Calculate SHA-256 hash of each file after uploading
  21. sha256=${sha256}
  22. # Remove local file after successful upload
  23. rm=${rm}
  24. # Delay removal until IA processed the upload
  25. rmwait=${rmwait}
  26. # Clobber existing files in IA item (no = existing copy is moved to history/files/ by IA)
  27. clobber=${clobber}
  28. # Queue derive after upload (applied after every file!)
  29. derive=${derive}
  30. # 'ia' config file (default: empty, meaning to use ia's own default paths and precedence rules)
  31. iaconfigfile=${iaconfigfile}
  32. # Item metadata (array with 'key:value' elements); the only mandatory variable with no default
  33. metadata=('collection:opensource' "date:$(printf '%(%Y-%m-%d)T')")
  34. EOF
  35. exit
  36. fi
  37. function tsprintf {
  38. datefmt='%(%Y-%m-%d %H:%M:%S)T';
  39. if [[ $# -eq 1 ]]; then
  40. printf "${datefmt} %s\n" -1 "$@";
  41. else
  42. # First argument is the format, rest are arguments to printf; pass through `while read` loop to have a better chance of line buffering.
  43. {
  44. printf '%(%Y-%m-%d %H:%M:%S)T ';
  45. printf "$1" "${@:2}";
  46. printf '\n';
  47. } | while IFS= read -r line; do printf '%s\n' "${line}"; done
  48. fi
  49. }
  50. if [[ "${DEBUG}" ]]; then
  51. function dbgprint { for l; do tsprintf "[DEBUG] ${l}" >&2; done; }
  52. else
  53. function dbgprint { :; }
  54. fi
  55. for cmd in ia-upload-stream ia-wait-item-tasks ia; do
  56. if ! command -v "${cmd}" &>/dev/null; then
  57. echo "Error: ${cmd} not found, make sure it is in PATH" >&2
  58. exit 1
  59. fi
  60. done
  61. while :; do
  62. for dir in */; do
  63. dir="${dir%/}"
  64. identifier="${dir}"
  65. if [[ ! "${dir}" =~ ^[a-zA-Z0-9] ]]; then
  66. continue
  67. fi
  68. if [[ ! -e "${dir}/.dir-to-ia.config" ]]; then
  69. continue
  70. fi
  71. dbgprint "Processing ${dir}"
  72. # Everything from here on is executed in a subshell so that the config sourcing can't affect other items.
  73. # It would be possible to do that without a subshell, but this is easier.
  74. (
  75. # Source and check configuration
  76. . "${dir}/.dir-to-ia.config" || { tsprintf "Sourcing ${dir}/.dir-to-ia.config failed" >&2; exit 1; }
  77. configbroken=
  78. for v in sha256 rm rmwait clobber derive; do
  79. if [[ "${!v}" != 'yes' && "${!v}" != 'no' ]]; then
  80. tsprintf "Error in %q: %s must be 'yes' or 'no'" "${dir}/.dir-to-ia.config" "${v}" >&2
  81. configbroken=yes
  82. fi
  83. done
  84. if read -r _ attrs _ < <(declare -p metadata 2>/dev/null); [[ "${attrs}" != *a* ]]; then
  85. tsprintf 'Error in %q: metadata missing or not an array' "${dir}/.dir-to-ia.config" >&2
  86. configbroken=yes
  87. else
  88. for f in "${metadata[@]}"; do
  89. if [[ "${f}" != *:* ]]; then
  90. tsprintf 'Error in %q: metadata field missing colon: %s' "${dir}/.dir-to-ia.config" "${f}" >&2
  91. configbroken=yes
  92. fi
  93. done
  94. fi
  95. if [[ "${configbroken}" ]]; then
  96. exit 1
  97. fi
  98. if [[ "${rm}" == 'no' && "${clobber}" == 'yes' ]]; then
  99. tsprintf 'Error in %q: rm=no and clobber=yes is not permitted' "${dir}/.dir-to-ia.config" >&2
  100. exit 1
  101. fi
  102. dbgprint 'Configuration:' \
  103. " sha256=${sha256}" \
  104. " rm=${rm}" \
  105. " rmwait=${rmwait}" \
  106. " clobber=${clobber}" \
  107. " derive=${derive}" \
  108. " iaconfigfile${iaconfigfile:+: }${iaconfigfile:- not set}" \
  109. " metadata=($(printf %q "${metadata[0]}")$(if [[ ${#metadata[@]} -gt 1 ]]; then printf ' %q' "${metadata[@]:1}"; fi))"
  110. # If removing local files is disabled, check first which ones are already on IA so they can be skipped.
  111. #TODO Do this only if there are files to upload
  112. if [[ "${rm}" == 'no' ]]; then
  113. dbgprint 'Retrieving existing files on IA...'
  114. #TODO Figure out a better way to verify correct retrieval than appending a placeholder.
  115. readarray -d $'\0' -t iafiles < <(curl --silent --location --max-time 10 --fail "https://archive.org/metadata/${identifier}" | python3 -c 'import json, sys; o = json.load(sys.stdin); {print(f["name"], end = "\0") for f in o.get("files", []) if not f["name"].startswith(sys.argv[1])}; print("__dir-to-ia_end__", end = "\0")' "${identifier}")
  116. if [[ "${#iafiles[@]}" -eq 0 || "${iafiles[-1]}" != '__dir-to-ia_end__' ]]; then
  117. tsprintf 'Error: could not retrieve metadata' >&2
  118. exit 1
  119. fi
  120. unset iafiles[-1]
  121. dbgprint "Existing files on IA:$(printf " %q" "${iafiles[@]}")"
  122. fi
  123. # Loop over local files and upload them
  124. while IFS= read -r -d $'\0' fn; do
  125. dbgprint "Considering file ${fn}"
  126. if [[ "${fn}" == .dir-to-ia.* ]]; then
  127. dbgprint "${fn} is a dir-to-ia file, skipping"
  128. continue
  129. fi
  130. if [[ ! -f "${dir}/${fn}" ]]; then
  131. # Should never happen since the `find` command already uses `-type f`
  132. dbgprint "${fn} is not a regular file, skipping"
  133. continue
  134. fi
  135. if [[ "${fn}" == "${identifier}"* ]]; then
  136. dbgprint "${fn} starts with the identifier, skipping"
  137. continue
  138. fi
  139. if [[ "${rm}" == 'no' ]]; then
  140. found=
  141. for remoteFn in "${iafiles[@]}"; do
  142. if [[ "${fn}" == "${remoteFn}" ]]; then
  143. found=yes
  144. break
  145. fi
  146. done
  147. if [[ "${found}" ]]; then
  148. dbgprint "${fn} found in IA item, skipping"
  149. continue
  150. fi
  151. fi
  152. tsprintf 'Uploading %q to %q...' "${fn}" "${identifier}" >&2
  153. cmd=('ia-upload-stream')
  154. if [[ "${clobber}" == 'yes' ]]; then cmd+=('--clobber'); fi
  155. if [[ "${derive}" == 'no' ]]; then cmd+=('--no-derive'); fi
  156. if [[ "${iaconfigfile}" ]]; then cmd+=('--ia-config-file' "${iaconfigfile}"); fi
  157. cmd+=("${identifier}")
  158. cmd+=("${fn}")
  159. cmd+=("${metadata[@]}")
  160. dbgprint "Upload command:$(printf " %q" "${cmd[@]}")"
  161. "${cmd[@]}" <"${dir}/${fn}" || { tsprintf "ia-upload-stream exited with status $?" >&2; exit 1; }
  162. if [[ "${sha256}" == 'yes' ]]; then
  163. tsprintf 'Calculating SHA-256...' >&2
  164. (cd "${dir}" && sha256sum "${fn}") || { tsprintf "sha256sum exited with status $?" >&2; exit 1; }
  165. fi
  166. if [[ "${rm}" == 'yes' ]]; then
  167. if [[ "${rmwait}" == 'yes' ]]; then
  168. tsprintf 'Waiting for IA to process the upload...' >&2
  169. ia-wait-item-tasks "${identifier}" || { tsprintf "ia-wait-item-tasks exited with status $?" >&2; exit 1; }
  170. fi
  171. tsprintf 'IA upload processing finished, removing %q' "${fn}" >&2
  172. # No option to run an extra SHA-1 check or similar since ia-upload-stream already sends an MD5 for each chunk, so corruption should be impossible.
  173. rm "${dir}/${fn}"
  174. fi
  175. done < <(cd "${dir}" && find . -type f -print0 | sed -z 's,^\./,,')
  176. ) &> >(tee -a "${dir}/.dir-to-ia.log" >&2)
  177. dbgprint "Done with ${dir}"
  178. done
  179. sleep 60
  180. done