diff --git a/config.example.toml b/config.example.toml index ace7e68..49bae70 100644 --- a/config.example.toml +++ b/config.example.toml @@ -34,3 +34,7 @@ #auth = false # Whether this channel should still be actively logged. Set this to false to stop logging the channel but keep serving the previous logs. #active = true + # Whether the channel should be hidden from normal access. If this is true, only direct log date accesses are possible, and the log is hidden on the homepage and not directly searchable. + #hidden = false + # Keys of other channels that should be searched in addition to this one when a query is sent against it. If auth is required on another channel referenced here, it must be equal to this channel's. + #extrasearchchannels = [] diff --git a/irclog.py b/irclog.py index 02363ca..0bedb40 100644 --- a/irclog.py +++ b/irclog.py @@ -150,7 +150,7 @@ class Config(dict): raise InvalidConfig(f'Invalid channel key {key!r}') if not isinstance(channel, collections.abc.Mapping): raise InvalidConfig(f'Invalid channel for {key!r}') - if any(x not in ('ircchannel', 'path', 'auth', 'active') for x in channel): + if any(x not in ('ircchannel', 'path', 'auth', 'active', 'hidden', 'extrasearchchannels') for x in channel): raise InvalidConfig(f'Unknown key(s) found in channel {key!r}') if 'ircchannel' not in channel: @@ -193,6 +193,28 @@ class Config(dict): else: channel['active'] = True + if 'hidden' not in channel: + channel['hidden'] = False + if channel['hidden'] is not False and channel['hidden'] is not True: + raise InvalidConfig(f'Invalid channel {key!r} hidden: must be true or false') + + if 'extrasearchchannels' not in channel: + channel['extrasearchchannels'] = [] + if not isinstance(channel['extrasearchchannels'], collections.abc.Sequence): + raise InvalidConfig(f'Invalid channel {key!r} extrasearchchannels: must be a sequence (e.g. list)') + if any(not isinstance(x, str) for x in channel['extrasearchchannels']): + raise InvalidConfig(f'Invalid channel {key!r} extrasearchchannels: must only contain strings') + if any(x == key for x in channel['extrasearchchannels']): + raise InvalidConfig(f'Invalid channel {key!r} extrasearchchannels: cannot refer to self') + # Validation of the values is performed after reading everything + + # extrasearchchannels validation after reading all channels + for key, channel in obj['channels'].items(): + if any(x not in obj['channels'] for x in channel['extrasearchchannels']): + raise InvalidConfig(f'Invalid channel {key!r} extrasearchchannels: refers to undefined channel') + if any(obj['channels'][x]['auth'] is not False and obj['channels'][x]['auth'] != channel['auth'] for x in channel['extrasearchchannels']): + raise InvalidConfig(f'Invalid channel {key!r} extrasearchchannels: refers to auth-required channel whose auth differs from this channel\'s') + # Default values finalObj = {'logging': {'level': 'INFO', 'format': '{asctime} {levelname} {name} {message}'}, 'storage': {'path': os.path.abspath(os.path.dirname(self._filename))}, 'irc': {'host': 'irc.hackint.org', 'port': 6697, 'ssl': 'yes', 'nick': 'irclogbot', 'real': 'I am an irclog bot.', 'certfile': None, 'certkeyfile': None}, 'web': {'host': '127.0.0.1', 'port': 8080}, 'channels': {}} # Default values for channels are already set above. @@ -741,7 +763,7 @@ class WebServer: def __init__(self, config): self.config = config - self._paths = {} # '/path' => ('#channel', auth) where auth is either False (no authentication) or the HTTP header value for basic auth + self._paths = {} # '/path' => ('#channel', auth, hidden, extrasearchpaths) where auth is either False (no authentication) or the HTTP header value for basic auth self._app = aiohttp.web.Application() self._app.add_routes([ @@ -755,7 +777,12 @@ class WebServer: self._configChanged = asyncio.Event() def update_config(self, config): - self._paths = {channel['path']: (channel['ircchannel'], f'Basic {base64.b64encode(channel["auth"].encode("utf-8")).decode("utf-8")}' if channel['auth'] else False) for channel in config['channels'].values()} + self._paths = {channel['path']: ( + channel['ircchannel'], + f'Basic {base64.b64encode(channel["auth"].encode("utf-8")).decode("utf-8")}' if channel['auth'] else False, + channel['hidden'], + [config['channels'][otherchannel]['path'] for otherchannel in channel['extrasearchchannels']] + ) for channel in config['channels'].values()} needRebind = self.config['web'] != config['web'] #TODO only if there are changes to web.host or web.port; everything else can be updated without rebinding self.config = config if needRebind: @@ -794,27 +821,46 @@ class WebServer: async def get_homepage(self, request): self.logger.info(f'Received request {id(request)} from {request.remote!r} for {request.path!r}') lines = [] - for path, (channel, auth) in self._paths.items(): + for path, (channel, auth, hidden, extrasearchpaths) in self._paths.items(): + if hidden: + continue lines.append(f'{"(PW) " if auth else ""}{html.escape(channel)} (search)') return aiohttp.web.Response(text = f'IRC logs{"
".join(lines)}', content_type = 'text/html') - def _raw_to_lines(self, f, filter = lambda dt, command, content: True): - # f: iterable producing str lines (e.g. file-like) on iteration or bytes - # filter: function taking the line fields (ts: float, command: str, content: str) and returning whether to include the line - if isinstance(f, bytes): - f = f.decode('utf-8').splitlines() - for line in f: + def _file_iter_with_path(self, fn, path): + # Open fn, iterate over its lines yielding (path, line) tuples + with open(fn, 'r') as fp: + for line in fp: + yield (path, line) + + def _stdout_with_path(self, stdout): + # Process grep output with --with-filenames, --null, and --line-number into (path, line) tuples; this blindly assumes the expected directory structure of '.../path/YYYY-MM.log'. + # Lines are sorted by timestamp, filename, and line number to ensure a consistent and chronological order. + out = [] + for line in stdout.decode('utf-8').splitlines(): + fn, line = line.split('\0', 1) + _, path, _ = fn.rsplit('/', 2) + ln, line = line.split(':', 1) + ln = int(ln) + ts = float(line.split(' ', 1)[0]) + out.append((ts, fn, ln, path, line)) + yield from (x[3:] for x in sorted(out, key = lambda y: y[0:3])) + + def _raw_to_lines(self, f, filter = lambda path, dt, command, content: True): + # f: iterable producing tuples (path, line) where each line has the format ' " " " " ', is a float, is one of the valid commands, and is any str + # filter: function taking the line fields (path: str, ts: float, command: str, content: str) and returning whether to include the line + for path, line in f: ts, command, content = line.strip().split(' ', 2) ts = float(ts) - if not filter(ts, command, content): + if not filter(path, ts, command, content): continue - yield ts, command, content + yield (path, ts, command, content) - def _render_log(self, lines, path, withDate = False): - # lines: iterable of (timestamp: float, command: str, content: str) + def _render_log(self, lines, withDate = False): + # lines: iterable of (path: str, timestamp: float, command: str, content: str) # withDate: whether to include the date with the time of the log line ret = [] - for ts, command, content in lines: + for path, ts, command, content in lines: d = datetime.datetime.utcfromtimestamp(ts).replace(tzinfo = datetime.timezone.utc) date = f'{d:%Y-%m-%d }' if withDate else '' lineId = hashlib.md5(f'{ts} {command} {content}'.encode('utf-8')).hexdigest()[:8] @@ -831,20 +877,26 @@ class WebServer: dateEnd = (date + datetime.timedelta(days = 1)).timestamp() #TODO Implement this in a better way... fn = date.strftime('%Y-%m.log') - with open(os.path.join(self.config['storage']['path'], request.match_info["path"], fn), 'r') as fp: - lines = list(self._raw_to_lines(fp, filter = lambda ts, command, content: dateStart <= ts <= dateEnd)) - return aiohttp.web.Response(text = f'{html.escape(self._paths[request.match_info["path"]][0])} log for {date:%Y-%m-%d}{self.logStyleTag}Previous day Next day

' + self._render_log(lines, request.match_info['path']) + '', content_type = 'text/html') + lines = list(self._raw_to_lines(self._file_iter_with_path(os.path.join(self.config['storage']['path'], request.match_info["path"], fn), request.match_info["path"]), filter = lambda path, ts, command, content: dateStart <= ts <= dateEnd)) + return aiohttp.web.Response(text = f'{html.escape(self._paths[request.match_info["path"]][0])} log for {date:%Y-%m-%d}{self.logStyleTag}Previous day Next day

' + self._render_log(lines) + '', content_type = 'text/html') async def search(self, request): self.logger.info(f'Received request {id(request)} from {request.remote!r} for {request.path!r}') + if self._paths[request.match_info['path']][2]: # Hidden channels aren't searchable + return aiohttp.web.HTTPNotFound() + if 'q' not in request.query: return aiohttp.web.Response(text = f'{html.escape(self._paths[request.match_info["path"]][0])} search
', content_type = 'text/html') - proc = await asyncio.create_subprocess_exec('grep', '--fixed-strings', '--recursive', '--no-filename', request.query['q'], os.path.join(self.config['storage']['path'], request.match_info['path'], ''), stdout = asyncio.subprocess.PIPE) + cmd = ['grep', '--fixed-strings', '--recursive', '--with-filename', '--null', '--line-number', request.query['q']] + for path in itertools.chain((request.match_info['path'],), self._paths[request.match_info['path']][3]): + cmd.append(os.path.join(self.config['storage']['path'], path, '')) + proc = await asyncio.create_subprocess_exec(*cmd, stdout = asyncio.subprocess.PIPE) #TODO Limit size and runtime stdout, _ = await proc.communicate() - return aiohttp.web.Response(text = f'{html.escape(self._paths[request.match_info["path"]][0])} search results for "{html.escape(request.query["q"])}"{self.logStyleTag}' + self._render_log(self._raw_to_lines(stdout), request.match_info['path'], withDate = True) + '', content_type = 'text/html') + lines = self._raw_to_lines(self._stdout_with_path(stdout)) + return aiohttp.web.Response(text = f'{html.escape(self._paths[request.match_info["path"]][0])} search results for "{html.escape(request.query["q"])}"{self.logStyleTag}' + self._render_log(lines, withDate = True) + '', content_type = 'text/html') def configure_logging(config):