|
|
@@ -0,0 +1,34 @@ |
|
|
|
#!/bin/bash |
|
|
|
for url |
|
|
|
do |
|
|
|
name="${url//\//_}" |
|
|
|
itemDir="./data/${name}/" |
|
|
|
mkdir -p "${itemDir}" |
|
|
|
warcName="${name}" |
|
|
|
item_dir="${itemDir}" item_value="${url}" warc_file_base="${warcName}" /grab/wget-at \ |
|
|
|
-U 'mercurial/proto-1.0 (Mercurial 5.3.1)' \ |
|
|
|
-nv \ |
|
|
|
--no-cookies \ |
|
|
|
--content-on-error \ |
|
|
|
--lua-script mercurial.lua \ |
|
|
|
-o "${itemDir}/wget.log" \ |
|
|
|
--no-check-certificate \ |
|
|
|
--output-document "${itemDir}/wget.tmp" \ |
|
|
|
--truncate-output \ |
|
|
|
-e robots=off \ |
|
|
|
--rotate-dns \ |
|
|
|
--recursive --level=inf \ |
|
|
|
--no-parent \ |
|
|
|
--page-requisites \ |
|
|
|
--timeout 30 \ |
|
|
|
--tries inf \ |
|
|
|
--span-hosts \ |
|
|
|
--waitretry 30 \ |
|
|
|
--warc-file "${itemDir}/${warcName}-main" \ |
|
|
|
--warc-header 'operator: Archive Team' \ |
|
|
|
--warc-header 'mercurial-dld-script-version: 20201031.01' \ |
|
|
|
--warc-dedup-url-agnostic \ |
|
|
|
--warc-header "mercurial-repository: ${url}" \ |
|
|
|
--warc-header 'warc-type: main' \ |
|
|
|
"${url}?cmd=capabilities" |
|
|
|
done |