Browse Source

Add hidden and extrasearchchannels

This allows including the EFnet logs in the search of the new hackint channels while still keeping the log files separate.
master
JustAnotherArchivist 3 years ago
parent
commit
9687828387
2 changed files with 76 additions and 20 deletions
  1. +4
    -0
      config.example.toml
  2. +72
    -20
      irclog.py

+ 4
- 0
config.example.toml View File

@@ -34,3 +34,7 @@
#auth = false
# Whether this channel should still be actively logged. Set this to false to stop logging the channel but keep serving the previous logs.
#active = true
# Whether the channel should be hidden from normal access. If this is true, only direct log date accesses are possible, and the log is hidden on the homepage and not directly searchable.
#hidden = false
# Keys of other channels that should be searched in addition to this one when a query is sent against it. If auth is required on another channel referenced here, it must be equal to this channel's.
#extrasearchchannels = []

+ 72
- 20
irclog.py View File

@@ -150,7 +150,7 @@ class Config(dict):
raise InvalidConfig(f'Invalid channel key {key!r}')
if not isinstance(channel, collections.abc.Mapping):
raise InvalidConfig(f'Invalid channel for {key!r}')
if any(x not in ('ircchannel', 'path', 'auth', 'active') for x in channel):
if any(x not in ('ircchannel', 'path', 'auth', 'active', 'hidden', 'extrasearchchannels') for x in channel):
raise InvalidConfig(f'Unknown key(s) found in channel {key!r}')

if 'ircchannel' not in channel:
@@ -193,6 +193,28 @@ class Config(dict):
else:
channel['active'] = True

if 'hidden' not in channel:
channel['hidden'] = False
if channel['hidden'] is not False and channel['hidden'] is not True:
raise InvalidConfig(f'Invalid channel {key!r} hidden: must be true or false')

if 'extrasearchchannels' not in channel:
channel['extrasearchchannels'] = []
if not isinstance(channel['extrasearchchannels'], collections.abc.Sequence):
raise InvalidConfig(f'Invalid channel {key!r} extrasearchchannels: must be a sequence (e.g. list)')
if any(not isinstance(x, str) for x in channel['extrasearchchannels']):
raise InvalidConfig(f'Invalid channel {key!r} extrasearchchannels: must only contain strings')
if any(x == key for x in channel['extrasearchchannels']):
raise InvalidConfig(f'Invalid channel {key!r} extrasearchchannels: cannot refer to self')
# Validation of the values is performed after reading everything

# extrasearchchannels validation after reading all channels
for key, channel in obj['channels'].items():
if any(x not in obj['channels'] for x in channel['extrasearchchannels']):
raise InvalidConfig(f'Invalid channel {key!r} extrasearchchannels: refers to undefined channel')
if any(obj['channels'][x]['auth'] is not False and obj['channels'][x]['auth'] != channel['auth'] for x in channel['extrasearchchannels']):
raise InvalidConfig(f'Invalid channel {key!r} extrasearchchannels: refers to auth-required channel whose auth differs from this channel\'s')

# Default values
finalObj = {'logging': {'level': 'INFO', 'format': '{asctime} {levelname} {name} {message}'}, 'storage': {'path': os.path.abspath(os.path.dirname(self._filename))}, 'irc': {'host': 'irc.hackint.org', 'port': 6697, 'ssl': 'yes', 'nick': 'irclogbot', 'real': 'I am an irclog bot.', 'certfile': None, 'certkeyfile': None}, 'web': {'host': '127.0.0.1', 'port': 8080}, 'channels': {}}
# Default values for channels are already set above.
@@ -741,7 +763,7 @@ class WebServer:
def __init__(self, config):
self.config = config

self._paths = {} # '/path' => ('#channel', auth) where auth is either False (no authentication) or the HTTP header value for basic auth
self._paths = {} # '/path' => ('#channel', auth, hidden, extrasearchpaths) where auth is either False (no authentication) or the HTTP header value for basic auth

self._app = aiohttp.web.Application()
self._app.add_routes([
@@ -755,7 +777,12 @@ class WebServer:
self._configChanged = asyncio.Event()

def update_config(self, config):
self._paths = {channel['path']: (channel['ircchannel'], f'Basic {base64.b64encode(channel["auth"].encode("utf-8")).decode("utf-8")}' if channel['auth'] else False) for channel in config['channels'].values()}
self._paths = {channel['path']: (
channel['ircchannel'],
f'Basic {base64.b64encode(channel["auth"].encode("utf-8")).decode("utf-8")}' if channel['auth'] else False,
channel['hidden'],
[config['channels'][otherchannel]['path'] for otherchannel in channel['extrasearchchannels']]
) for channel in config['channels'].values()}
needRebind = self.config['web'] != config['web'] #TODO only if there are changes to web.host or web.port; everything else can be updated without rebinding
self.config = config
if needRebind:
@@ -794,27 +821,46 @@ class WebServer:
async def get_homepage(self, request):
self.logger.info(f'Received request {id(request)} from {request.remote!r} for {request.path!r}')
lines = []
for path, (channel, auth) in self._paths.items():
for path, (channel, auth, hidden, extrasearchpaths) in self._paths.items():
if hidden:
continue
lines.append(f'{"(PW) " if auth else ""}<a href="/{html.escape(path)}/today">{html.escape(channel)}</a> (<a href="/{html.escape(path)}/search">search</a>)')
return aiohttp.web.Response(text = f'<!DOCTYPE html><html lang="en"><head><title>IRC logs</title></head><body>{"<br />".join(lines)}</body></html>', content_type = 'text/html')

def _raw_to_lines(self, f, filter = lambda dt, command, content: True):
# f: iterable producing str lines (e.g. file-like) on iteration or bytes
# filter: function taking the line fields (ts: float, command: str, content: str) and returning whether to include the line
if isinstance(f, bytes):
f = f.decode('utf-8').splitlines()
for line in f:
def _file_iter_with_path(self, fn, path):
# Open fn, iterate over its lines yielding (path, line) tuples
with open(fn, 'r') as fp:
for line in fp:
yield (path, line)

def _stdout_with_path(self, stdout):
# Process grep output with --with-filenames, --null, and --line-number into (path, line) tuples; this blindly assumes the expected directory structure of '.../path/YYYY-MM.log'.
# Lines are sorted by timestamp, filename, and line number to ensure a consistent and chronological order.
out = []
for line in stdout.decode('utf-8').splitlines():
fn, line = line.split('\0', 1)
_, path, _ = fn.rsplit('/', 2)
ln, line = line.split(':', 1)
ln = int(ln)
ts = float(line.split(' ', 1)[0])
out.append((ts, fn, ln, path, line))
yield from (x[3:] for x in sorted(out, key = lambda y: y[0:3]))

def _raw_to_lines(self, f, filter = lambda path, dt, command, content: True):
# f: iterable producing tuples (path, line) where each line has the format '<ts> " " <command> " " <content>', <ts> is a float, <command> is one of the valid commands, and <content> is any str
# filter: function taking the line fields (path: str, ts: float, command: str, content: str) and returning whether to include the line
for path, line in f:
ts, command, content = line.strip().split(' ', 2)
ts = float(ts)
if not filter(ts, command, content):
if not filter(path, ts, command, content):
continue
yield ts, command, content
yield (path, ts, command, content)

def _render_log(self, lines, path, withDate = False):
# lines: iterable of (timestamp: float, command: str, content: str)
def _render_log(self, lines, withDate = False):
# lines: iterable of (path: str, timestamp: float, command: str, content: str)
# withDate: whether to include the date with the time of the log line
ret = []
for ts, command, content in lines:
for path, ts, command, content in lines:
d = datetime.datetime.utcfromtimestamp(ts).replace(tzinfo = datetime.timezone.utc)
date = f'{d:%Y-%m-%d }' if withDate else ''
lineId = hashlib.md5(f'{ts} {command} {content}'.encode('utf-8')).hexdigest()[:8]
@@ -831,20 +877,26 @@ class WebServer:
dateEnd = (date + datetime.timedelta(days = 1)).timestamp()
#TODO Implement this in a better way...
fn = date.strftime('%Y-%m.log')
with open(os.path.join(self.config['storage']['path'], request.match_info["path"], fn), 'r') as fp:
lines = list(self._raw_to_lines(fp, filter = lambda ts, command, content: dateStart <= ts <= dateEnd))
return aiohttp.web.Response(text = f'<!DOCTYPE html><html lang="en"><head><title>{html.escape(self._paths[request.match_info["path"]][0])} log for {date:%Y-%m-%d}</title>{self.logStyleTag}</head><body><a href="/{html.escape(request.match_info["path"])}/{(date - datetime.timedelta(days = 1)).strftime("%Y-%m-%d")}">Previous day</a> <a href="/{html.escape(request.match_info["path"])}/{(date + datetime.timedelta(days = 1)).strftime("%Y-%m-%d")}">Next day</a><br /><br />' + self._render_log(lines, request.match_info['path']) + '</body></html>', content_type = 'text/html')
lines = list(self._raw_to_lines(self._file_iter_with_path(os.path.join(self.config['storage']['path'], request.match_info["path"], fn), request.match_info["path"]), filter = lambda path, ts, command, content: dateStart <= ts <= dateEnd))
return aiohttp.web.Response(text = f'<!DOCTYPE html><html lang="en"><head><title>{html.escape(self._paths[request.match_info["path"]][0])} log for {date:%Y-%m-%d}</title>{self.logStyleTag}</head><body><a href="/{html.escape(request.match_info["path"])}/{(date - datetime.timedelta(days = 1)).strftime("%Y-%m-%d")}">Previous day</a> <a href="/{html.escape(request.match_info["path"])}/{(date + datetime.timedelta(days = 1)).strftime("%Y-%m-%d")}">Next day</a><br /><br />' + self._render_log(lines) + '</body></html>', content_type = 'text/html')

async def search(self, request):
self.logger.info(f'Received request {id(request)} from {request.remote!r} for {request.path!r}')

if self._paths[request.match_info['path']][2]: # Hidden channels aren't searchable
return aiohttp.web.HTTPNotFound()

if 'q' not in request.query:
return aiohttp.web.Response(text = f'<!DOCTYPE html><html lang="en"><head><title>{html.escape(self._paths[request.match_info["path"]][0])} search</title></head><body><form><input name="q" /><input type="submit" value="Search!" /></form></body></html>', content_type = 'text/html')

proc = await asyncio.create_subprocess_exec('grep', '--fixed-strings', '--recursive', '--no-filename', request.query['q'], os.path.join(self.config['storage']['path'], request.match_info['path'], ''), stdout = asyncio.subprocess.PIPE)
cmd = ['grep', '--fixed-strings', '--recursive', '--with-filename', '--null', '--line-number', request.query['q']]
for path in itertools.chain((request.match_info['path'],), self._paths[request.match_info['path']][3]):
cmd.append(os.path.join(self.config['storage']['path'], path, ''))
proc = await asyncio.create_subprocess_exec(*cmd, stdout = asyncio.subprocess.PIPE)
#TODO Limit size and runtime
stdout, _ = await proc.communicate()
return aiohttp.web.Response(text = f'<!DOCTYPE html><html lang="en"><head><title>{html.escape(self._paths[request.match_info["path"]][0])} search results for "{html.escape(request.query["q"])}"</title>{self.logStyleTag}</head><body>' + self._render_log(self._raw_to_lines(stdout), request.match_info['path'], withDate = True) + '</body></html>', content_type = 'text/html')
lines = self._raw_to_lines(self._stdout_with_path(stdout))
return aiohttp.web.Response(text = f'<!DOCTYPE html><html lang="en"><head><title>{html.escape(self._paths[request.match_info["path"]][0])} search results for "{html.escape(request.query["q"])}"</title>{self.logStyleTag}</head><body>' + self._render_log(lines, withDate = True) + '</body></html>', content_type = 'text/html')


def configure_logging(config):


Loading…
Cancel
Save