Browse Source

Handle invalid UTF-8 with surrogate escapes everywhere

Including sending it to the client since every other solution would likely be even worse.
master
JustAnotherArchivist 3 years ago
parent
commit
2a05dffc68
1 changed files with 7 additions and 7 deletions
  1. +7
    -7
      irclog.py

+ 7
- 7
irclog.py View File

@@ -947,7 +947,7 @@ class WebServer:
def _file_iter_with_path(self, fn, path):
# Open fn, iterate over its lines yielding (path, line) tuples
try:
with open(fn, 'r') as fp:
with open(fn, 'r', errors = 'surrogateescape') as fp:
for line in fp:
yield (path, line)
except FileNotFoundError:
@@ -960,7 +960,7 @@ class WebServer:
# splitlines splits on more than desired, in particular also on various things that can occur within IRC messages (which is really anything except CR LF, basically).
# split has the downside of producing a final empty element (because stdout ends with LF) and an empty element when the input is empty.
# So just discard empty lines.
for line in stdout.decode('utf-8').split('\n'):
for line in stdout.decode('utf-8', errors = 'surrogateescape').split('\n'):
if line == '':
continue
fn, line = line.split('\0', 1)
@@ -995,7 +995,7 @@ class WebServer:
command = 'UNKNOWN'
d = datetime.datetime.utcfromtimestamp(ts).replace(tzinfo = datetime.timezone.utc)
date = f'{d:%Y-%m-%d }' if withDate else ''
lineId = hashlib.md5(f'{ts} {command} {content}'.encode('utf-8')).hexdigest()[:8]
lineId = hashlib.md5(f'{ts} {command} {content}'.encode('utf-8', errors = 'surrogateescape')).hexdigest()[:8]
if command in ('NOTICE', 'PRIVMSG'):
author, content = content.split(' ', 1)
else:
@@ -1024,7 +1024,7 @@ class WebServer:
fn = date.strftime('%Y-%m.log')
lines = list(self._raw_to_lines(self._file_iter_with_path(os.path.join(self.config['storage']['path'], request.match_info["path"], fn), request.match_info["path"]), filter = lambda path, ts, command, content: dateStart <= ts <= dateEnd))
return aiohttp.web.Response(
text = ''.join([
body = ''.join([
'<!DOCTYPE html><html lang="en">',
f'<head><title>{html.escape(self._paths[request.match_info["path"]][0])} log for {date:%Y-%m-%d}</title>{self.generalStyleTag}{self.logStyleTag}</head>',
'<body>',
@@ -1035,7 +1035,7 @@ class WebServer:
channelLinks,
'</body>',
'</html>',
]),
]).encode('utf-8', errors = 'surrogateescape'),
content_type = 'text/html'
)

@@ -1158,7 +1158,7 @@ class WebServer:
incomplete = True
lines = self._raw_to_lines(self._stdout_with_path(stdout))
return aiohttp.web.Response(
text = ''.join([
body = ''.join([
'<!DOCTYPE html><html lang="en">',
'<head>',
f'<title>{html.escape(self._paths[request.match_info["path"]][0])} search results for "{html.escape(request.query["q"])}"</title>',
@@ -1174,7 +1174,7 @@ class WebServer:
linkBar,
'</body>',
'</html>'
]),
]).encode('utf-8', errors = 'surrogateescape'),
content_type = 'text/html'
)



Loading…
Cancel
Save