From 8ee9b20718a443c798e93d8d354f8543e2da3f31 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Mon, 26 Aug 2019 13:35:46 +0000 Subject: [PATCH] Remove WARC-Target-URI header from warcinfo record MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WARC 1.1 specification, section 5.14: "A ‘warcinfo’ record shall not have a WARC-Target-URI field." --- qwarc/warc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qwarc/warc.py b/qwarc/warc.py index 8f3e6ae..13687de 100644 --- a/qwarc/warc.py +++ b/qwarc/warc.py @@ -92,7 +92,7 @@ class WARC: digester = warcio.utils.Digester('sha1') digester.update(payload.getvalue()) record = self._warcWriter.create_warc_record( - 'urn:X-qwarc:warcinfo', + None, 'warcinfo', payload = payload, warc_headers_dict = {'Content-Type': 'application/json; charset=utf-8', 'WARC-Block-Digest': str(digester)},