You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
- #!/bin/bash
- for url
- do
- name="${url//\//_}"
- itemDir="./data/${name}/"
- mkdir -p "${itemDir}"
- warcName="${name}"
- item_dir="${itemDir}" item_value="${url}" warc_file_base="${warcName}" /grab/wget-at \
- -U 'mercurial/proto-1.0 (Mercurial 5.3.1)' \
- -nv \
- --no-cookies \
- --content-on-error \
- --lua-script mercurial.lua \
- -o "${itemDir}/wget.log" \
- --no-check-certificate \
- --output-document "${itemDir}/wget.tmp" \
- --truncate-output \
- -e robots=off \
- --rotate-dns \
- --recursive --level=inf \
- --no-parent \
- --page-requisites \
- --timeout 30 \
- --tries inf \
- --span-hosts \
- --waitretry 30 \
- --warc-file "${itemDir}/${warcName}-main" \
- --warc-header 'operator: Archive Team' \
- --warc-header 'mercurial-dld-script-version: 20201031.01' \
- --warc-dedup-url-agnostic \
- --warc-header "mercurial-repository: ${url}" \
- --warc-header 'warc-type: main' \
- "${url}?cmd=capabilities"
- done
|