You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

35 lines
919 B

  1. #!/bin/bash
  2. for url
  3. do
  4. name="${url//\//_}"
  5. itemDir="./data/${name}/"
  6. mkdir -p "${itemDir}"
  7. warcName="${name}"
  8. item_dir="${itemDir}" item_value="${url}" warc_file_base="${warcName}" /grab/wget-at \
  9. -U 'mercurial/proto-1.0 (Mercurial 5.3.1)' \
  10. -nv \
  11. --no-cookies \
  12. --content-on-error \
  13. --lua-script mercurial.lua \
  14. -o "${itemDir}/wget.log" \
  15. --no-check-certificate \
  16. --output-document "${itemDir}/wget.tmp" \
  17. --truncate-output \
  18. -e robots=off \
  19. --rotate-dns \
  20. --recursive --level=inf \
  21. --no-parent \
  22. --page-requisites \
  23. --timeout 30 \
  24. --tries inf \
  25. --span-hosts \
  26. --waitretry 30 \
  27. --warc-file "${itemDir}/${warcName}-main" \
  28. --warc-header 'operator: Archive Team' \
  29. --warc-header 'mercurial-dld-script-version: 20201031.01' \
  30. --warc-dedup-url-agnostic \
  31. --warc-header "mercurial-repository: ${url}" \
  32. --warc-header 'warc-type: main' \
  33. "${url}?cmd=capabilities"
  34. done