Kaynağa Gözat

"Freeze" log file object before writing to WARC to ensure that further log messages aren't picked up

This is a workaround for https://github.com/webrecorder/warcio/issues/90
master
JustAnotherArchivist 3 yıl önce
ebeveyn
işleme
dbe1ed71ab
2 değiştirilmiş dosya ile 45 ekleme ve 1 silme
  1. +42
    -0
      qwarc/utils.py
  2. +3
    -1
      qwarc/warc.py

+ 42
- 0
qwarc/utils.py Dosyayı Görüntüle

@@ -279,6 +279,48 @@ class ReadonlyFileView:
return getattr(self._fp, key)


class FrozenFileView:
'''
A poor minimal frozen view for a file object. It fixes the bounds of the file, i.e. if something is appended to the underlying file object, it does not become visible in the frozen view. Only seek, tell, and read are implemented.

Note that seeks and reads will affect the underlying file object. The actual data is not really frozen either, and any changes on the underlying file object will affect the frozen view as well.
'''

def __init__(self, fp, begin, end):
'''
fp: file-like object
begin: int, offset from beginning of the file
end: int, offset from beginning of the file
'''

self._fp = fp
self._begin = begin
self._end = end

def seek(self, offset, whence = os.SEEK_SET):
if whence == os.SEEK_SET:
return self._fp.seek(self._begin + offset, whence)
elif whence == os.SEEK_CUR:
return self._fp.seek(offset, whence)
elif whence == os.SEEK_END:
return self._fp.seek(self._end + offset, whence)
raise NotImplementedError

def tell(self):
return self._fp.tell() - self._begin

def read(self, size = -1):
curPos = self._fp.tell()
if curPos < self._begin:
self._fp.seek(self._begin)
elif curPos > self._end:
return self._fp.read(0)

if size == -1:
return self._fp.read(self._end - self._fp.tell())
return self._fp.read(min(size, self._end - self._fp.tell()))


class DummyClientResponse:
'''A ClientResponse-like object for when no actual ClientResponse is available. Always evaluates to False when cast to a bool.'''



+ 3
- 1
qwarc/warc.py Dosyayı Görüntüle

@@ -231,10 +231,12 @@ class WARC:
fp.seek(0, io.SEEK_END)
length = fp.tell()
fp.seek(0)
# Work around https://github.com/webrecorder/warcio/issues/90
payload = qwarc.utils.FrozenFileView(fp, 0, length)
record = self._warcWriter.create_warc_record(
f'file://{self._logFilename}',
'resource',
payload = fp,
payload = payload,
length = length,
warc_headers_dict = {'X-QWARC-Type': 'log', 'Content-Type': 'text/plain; charset=utf-8', 'WARC-Warcinfo-ID': self._metaWarcinfoRecordID},
)


Yükleniyor…
İptal
Kaydet