Add hidden and extrasearchchannels

This allows including the EFnet logs in the search of the new hackint channels while still keeping the log files separate.
3 years ago · 9687828387
--- a/config.example.toml
+++ b/config.example.toml
@@ -34,3 +34,7 @@
 		#auth = false
 		# Whether this channel should still be actively logged. Set this to false to stop logging the channel but keep serving the previous logs.
 		#active = true
 		# Whether the channel should be hidden from normal access. If this is true, only direct log date accesses are possible, and the log is hidden on the homepage and not directly searchable.
 		#hidden = false
 		# Keys of other channels that should be searched in addition to this one when a query is sent against it. If auth is required on another channel referenced here, it must be equal to this channel's.
 		#extrasearchchannels = []
--- a/irclog.py
+++ b/irclog.py
@@ -150,7 +150,7 @@ class Config(dict):
 					raise InvalidConfig(f'Invalid channel key {key!r}')
 				if not isinstance(channel, collections.abc.Mapping):
 					raise InvalidConfig(f'Invalid channel for {key!r}')
 				if any(x not in ('ircchannel', 'path', 'auth', 'active') for x in channel):
 				if any(x not in ('ircchannel', 'path', 'auth', 'active', 'hidden', 'extrasearchchannels') for x in channel):
 					raise InvalidConfig(f'Unknown key(s) found in channel {key!r}')

 				if 'ircchannel' not in channel:
@@ -193,6 +193,28 @@ class Config(dict):
 				else:
 					channel['active'] = True

 				if 'hidden' not in channel:
 					channel['hidden'] = False
 				if channel['hidden'] is not False and channel['hidden'] is not True:
 					raise InvalidConfig(f'Invalid channel {key!r} hidden: must be true or false')

 				if 'extrasearchchannels' not in channel:
 					channel['extrasearchchannels'] = []
 				if not isinstance(channel['extrasearchchannels'], collections.abc.Sequence):
 					raise InvalidConfig(f'Invalid channel {key!r} extrasearchchannels: must be a sequence (e.g. list)')
 				if any(not isinstance(x, str) for x in channel['extrasearchchannels']):
 					raise InvalidConfig(f'Invalid channel {key!r} extrasearchchannels: must only contain strings')
 				if any(x == key for x in channel['extrasearchchannels']):
 					raise InvalidConfig(f'Invalid channel {key!r} extrasearchchannels: cannot refer to self')
 				# Validation of the values is performed after reading everything

 		# extrasearchchannels validation after reading all channels
 		for key, channel in obj['channels'].items():
 			if any(x not in obj['channels'] for x in channel['extrasearchchannels']):
 				raise InvalidConfig(f'Invalid channel {key!r} extrasearchchannels: refers to undefined channel')
 			if any(obj['channels'][x]['auth'] is not False and obj['channels'][x]['auth'] != channel['auth'] for x in channel['extrasearchchannels']):
 				raise InvalidConfig(f'Invalid channel {key!r} extrasearchchannels: refers to auth-required channel whose auth differs from this channel\'s')

 		# Default values
 		finalObj = {'logging': {'level': 'INFO', 'format': '{asctime} {levelname} {name} {message}'}, 'storage': {'path': os.path.abspath(os.path.dirname(self._filename))}, 'irc': {'host': 'irc.hackint.org', 'port': 6697, 'ssl': 'yes', 'nick': 'irclogbot', 'real': 'I am an irclog bot.', 'certfile': None, 'certkeyfile': None}, 'web': {'host': '127.0.0.1', 'port': 8080}, 'channels': {}}
 		# Default values for channels are already set above.
@@ -741,7 +763,7 @@ class WebServer:
 	def __init__(self, config):
 		self.config = config

 		self._paths = {} # '/path' => ('#channel', auth)  where auth is either False (no authentication) or the HTTP header value for basic auth
 		self._paths = {} # '/path' => ('#channel', auth, hidden, extrasearchpaths)  where auth is either False (no authentication) or the HTTP header value for basic auth

 		self._app = aiohttp.web.Application()
 		self._app.add_routes([
@@ -755,7 +777,12 @@ class WebServer:
 		self._configChanged = asyncio.Event()

 	def update_config(self, config):
 		self._paths = {channel['path']: (channel['ircchannel'], f'Basic {base64.b64encode(channel["auth"].encode("utf-8")).decode("utf-8")}' if channel['auth'] else False) for channel in config['channels'].values()}
 		self._paths = {channel['path']: (
 		                                  channel['ircchannel'],
 		                                  f'Basic {base64.b64encode(channel["auth"].encode("utf-8")).decode("utf-8")}' if channel['auth'] else False,
 		                                  channel['hidden'],
 		                                  [config['channels'][otherchannel]['path'] for otherchannel in channel['extrasearchchannels']]
 		                                ) for channel in config['channels'].values()}
 		needRebind = self.config['web'] != config['web'] #TODO only if there are changes to web.host or web.port; everything else can be updated without rebinding
 		self.config = config
 		if needRebind:
@@ -794,27 +821,46 @@ class WebServer:
 	async def get_homepage(self, request):
 		self.logger.info(f'Received request {id(request)} from {request.remote!r} for {request.path!r}')
 		lines = []
 		for path, (channel, auth) in self._paths.items():
 		for path, (channel, auth, hidden, extrasearchpaths) in self._paths.items():
 			if hidden:
 				continue
 			lines.append(f'{"(PW) " if auth else ""}<a href="/{html.escape(path)}/today">{html.escape(channel)}</a> (<a href="/{html.escape(path)}/search">search</a>)')
 		return aiohttp.web.Response(text = f'<!DOCTYPE html><html lang="en"><head><title>IRC logs</title></head><body>{"<br />".join(lines)}</body></html>', content_type = 'text/html')

 	def _raw_to_lines(self, f, filter = lambda dt, command, content: True):
 		# f: iterable producing str lines (e.g. file-like) on iteration or bytes
 		# filter: function taking the line fields (ts: float, command: str, content: str) and returning whether to include the line
 		if isinstance(f, bytes):
 			f = f.decode('utf-8').splitlines()
 		for line in f:
 	def _file_iter_with_path(self, fn, path):
 		# Open fn, iterate over its lines yielding (path, line) tuples
 		with open(fn, 'r') as fp:
 			for line in fp:
 				yield (path, line)

 	def _stdout_with_path(self, stdout):
 		# Process grep output with --with-filenames, --null, and --line-number into (path, line) tuples; this blindly assumes the expected directory structure of '.../path/YYYY-MM.log'.
 		# Lines are sorted by timestamp, filename, and line number to ensure a consistent and chronological order.
 		out = []
 		for line in stdout.decode('utf-8').splitlines():
 			fn, line = line.split('\0', 1)
 			_, path, _ = fn.rsplit('/', 2)
 			ln, line = line.split(':', 1)
 			ln = int(ln)
 			ts = float(line.split(' ', 1)[0])
 			out.append((ts, fn, ln, path, line))
 		yield from (x[3:] for x in sorted(out, key = lambda y: y[0:3]))

 	def _raw_to_lines(self, f, filter = lambda path, dt, command, content: True):
 		# f: iterable producing tuples (path, line) where each line has the format '<ts> " " <command> " " <content>', <ts> is a float, <command> is one of the valid commands, and <content> is any str
 		# filter: function taking the line fields (path: str, ts: float, command: str, content: str) and returning whether to include the line
 		for path, line in f:
 			ts, command, content = line.strip().split(' ', 2)
 			ts = float(ts)
 			if not filter(ts, command, content):
 			if not filter(path, ts, command, content):
 				continue
 			yield ts, command, content
 			yield (path, ts, command, content)

 	def _render_log(self, lines, path, withDate = False):
 		# lines: iterable of (timestamp: float, command: str, content: str)
 	def _render_log(self, lines, withDate = False):
 		# lines: iterable of (path: str, timestamp: float, command: str, content: str)
 		# withDate: whether to include the date with the time of the log line
 		ret = []
 		for ts, command, content in lines:
 		for path, ts, command, content in lines:
 			d = datetime.datetime.utcfromtimestamp(ts).replace(tzinfo = datetime.timezone.utc)
 			date = f'{d:%Y-%m-%d }' if withDate else ''
 			lineId = hashlib.md5(f'{ts} {command} {content}'.encode('utf-8')).hexdigest()[:8]
@@ -831,20 +877,26 @@ class WebServer:
 		dateEnd = (date + datetime.timedelta(days = 1)).timestamp()
 		#TODO Implement this in a better way...
 		fn = date.strftime('%Y-%m.log')
 		with open(os.path.join(self.config['storage']['path'], request.match_info["path"], fn), 'r') as fp:
 			lines = list(self._raw_to_lines(fp, filter = lambda ts, command, content: dateStart <= ts <= dateEnd))
 		return aiohttp.web.Response(text = f'<!DOCTYPE html><html lang="en"><head><title>{html.escape(self._paths[request.match_info["path"]][0])} log for {date:%Y-%m-%d}</title>{self.logStyleTag}</head><body><a href="/{html.escape(request.match_info["path"])}/{(date - datetime.timedelta(days = 1)).strftime("%Y-%m-%d")}">Previous day</a> <a href="/{html.escape(request.match_info["path"])}/{(date + datetime.timedelta(days = 1)).strftime("%Y-%m-%d")}">Next day</a><br /><br />' + self._render_log(lines, request.match_info['path']) + '</body></html>', content_type = 'text/html')
 		lines = list(self._raw_to_lines(self._file_iter_with_path(os.path.join(self.config['storage']['path'], request.match_info["path"], fn), request.match_info["path"]), filter = lambda path, ts, command, content: dateStart <= ts <= dateEnd))
 		return aiohttp.web.Response(text = f'<!DOCTYPE html><html lang="en"><head><title>{html.escape(self._paths[request.match_info["path"]][0])} log for {date:%Y-%m-%d}</title>{self.logStyleTag}</head><body><a href="/{html.escape(request.match_info["path"])}/{(date - datetime.timedelta(days = 1)).strftime("%Y-%m-%d")}">Previous day</a> <a href="/{html.escape(request.match_info["path"])}/{(date + datetime.timedelta(days = 1)).strftime("%Y-%m-%d")}">Next day</a><br /><br />' + self._render_log(lines) + '</body></html>', content_type = 'text/html')

 	async def search(self, request):
 		self.logger.info(f'Received request {id(request)} from {request.remote!r} for {request.path!r}')

 		if self._paths[request.match_info['path']][2]: # Hidden channels aren't searchable
 			return aiohttp.web.HTTPNotFound()

 		if 'q' not in request.query:
 			return aiohttp.web.Response(text = f'<!DOCTYPE html><html lang="en"><head><title>{html.escape(self._paths[request.match_info["path"]][0])} search</title></head><body><form><input name="q" /><input type="submit" value="Search!" /></form></body></html>', content_type = 'text/html')

 		proc = await asyncio.create_subprocess_exec('grep', '--fixed-strings', '--recursive', '--no-filename', request.query['q'], os.path.join(self.config['storage']['path'], request.match_info['path'], ''), stdout = asyncio.subprocess.PIPE)
 		cmd = ['grep', '--fixed-strings', '--recursive', '--with-filename', '--null', '--line-number', request.query['q']]
 		for path in itertools.chain((request.match_info['path'],), self._paths[request.match_info['path']][3]):
 			cmd.append(os.path.join(self.config['storage']['path'], path, ''))
 		proc = await asyncio.create_subprocess_exec(*cmd, stdout = asyncio.subprocess.PIPE)
 		#TODO Limit size and runtime
 		stdout, _ = await proc.communicate()
 		return aiohttp.web.Response(text = f'<!DOCTYPE html><html lang="en"><head><title>{html.escape(self._paths[request.match_info["path"]][0])} search results for "{html.escape(request.query["q"])}"</title>{self.logStyleTag}</head><body>' + self._render_log(self._raw_to_lines(stdout), request.match_info['path'], withDate = True) + '</body></html>', content_type = 'text/html')
 		lines = self._raw_to_lines(self._stdout_with_path(stdout))
 		return aiohttp.web.Response(text = f'<!DOCTYPE html><html lang="en"><head><title>{html.escape(self._paths[request.match_info["path"]][0])} search results for "{html.escape(request.query["q"])}"</title>{self.logStyleTag}</head><body>' + self._render_log(lines, withDate = True) + '</body></html>', content_type = 'text/html')


 def configure_logging(config):