diff --git a/irclog.py b/irclog.py index 975b061..69cb056 100644 --- a/irclog.py +++ b/irclog.py @@ -5,6 +5,8 @@ import base64 import collections import importlib.util import inspect +import ircstates +import irctokens import itertools import logging import os.path @@ -199,7 +201,7 @@ class IRCClientProtocol(asyncio.Protocol): self.userChannels = collections.defaultdict(set) # List of which channels a user is known to be in; nickname:str -> {channel:str, ...} self.sasl = bool(self.config['irc']['certfile'] and self.config['irc']['certkeyfile']) self.authenticated = False - self.usermask = None + self.server = ircstates.Server(self.config['irc']['host']) @staticmethod def nick_command(nick: str): @@ -229,11 +231,6 @@ class IRCClientProtocol(asyncio.Protocol): nick = nick.split('@', 1)[0] return nick - def _maybe_set_usermask(self, usermask): - if b'@' in usermask and b'!' in usermask.split(b'@')[0] and all(x not in usermask for x in (b' ', b'*', b'#', b'&')): - self.usermask = usermask - self.logger.debug(f'Usermask is now {usermask!r}') - def connection_made(self, transport): self.logger.info('IRC connected') self.transport = transport @@ -296,88 +293,82 @@ class IRCClientProtocol(asyncio.Protocol): self.messageQueue.put_nowait((time_, b'> ' + data, None)) def data_received(self, data): - self.logger.debug(f'Data received: {data!r}') time_ = time.time() + self.logger.debug(f'Data received: {data!r}') # Split received data on CRLF. If there's any data left in the buffer, prepend it to the first message and process that. # Then, process all messages except the last one (since data might not end on a CRLF) and keep the remainder in the buffer. # If data does end with CRLF, all messages will have been processed and the buffer will be empty again. messages = data.split(b'\r\n') if self.buffer: - self.message_received(time_, self.buffer + messages[0]) - messages = messages[1:] + messages[0] = self.buffer + messages[0] for message in messages[:-1]: - self.message_received(time_, message) + lines = self.server.recv(message + b'\r\n') + assert len(lines) == 1 + self.server.parse_tokens(lines[0]) + self.message_received(time_, message, lines[0]) self.buffer = messages[-1] - def message_received(self, time_, message): + def message_received(self, time_, message, line): self.logger.debug(f'Message received at {time_}: {message!r}') - rawMessage = message - hasPrefix = False - if message.startswith(b':') and b' ' in message: - # Prefixed message, extract command + parameters (the prefix cannot contain a space) - prefix, message = message.split(b' ', 1) - hasPrefix = True - # Queue message for storage, except QUITs and NICKs which are handled below with user tracking - if not message.startswith(b'QUIT ') and message != b'QUIT' and not message.startswith(b'NICK '): - self.messageQueue.put_nowait((time_, b'< ' + rawMessage, None)) + # Queue message for storage + sendGeneral = True + if line.command in ('QUIT', 'NICK') and line.source: + try: + user = self.server.users[line.hostmask.nickname] + except KeyError: + pass + else: + sendGeneral = False + self.messageQueue.put_nowait((time_, b'< ' + message, user.channels)) + if sendGeneral: + self.messageQueue.put_nowait((time_, b'< ' + message, None)) # PING/PONG - if message.startswith(b'PING '): - self.send(b'PONG ' + message[5:]) + if line.command == 'PING': + self.send(irctokens.build('PONG', line.params).format().encode('utf-8')) # SASL - elif message.startswith(b'CAP ') and self.sasl: - if message[message.find(b' ', 4) + 1:] == b'ACK :sasl': + elif line.command == 'CAP' and self.sasl: + if line.params[-2] == 'ACK' and 'sasl' in line.params[-1].split(' '): self.send(b'AUTHENTICATE EXTERNAL') else: self.logger.error(f'Received unexpected CAP reply {message!r}, terminating connection') self.transport.close() - elif message == b'AUTHENTICATE +': + elif line.command == 'AUTHENTICATE' and line.params == ['+']: self.send(b'AUTHENTICATE +') - elif message.startswith(b'900 '): # "You are now logged in", includes the usermask - words = message.split(b' ') - if len(words) >= 3 and b'!' in words[2] and b'@' in words[2]: - if b'!~' not in words[2]: - # At least Charybdis seems to always return the user without a tilde, even if identd failed. Assume no identd and account for that extra tilde. - words[2] = words[2].replace(b'!', b'!~', 1) - self._maybe_set_usermask(words[2]) - elif message.startswith(b'903 '): # SASL auth successful + elif line.command == '903': # SASL auth successful self.authenticated = True self.send(b'CAP END') - elif any(message.startswith(x) for x in (b'902 ', b'904 ', b'905 ', b'906 ', b'908 ')): + elif line.command in ('902', '904', '905', '906', '908'): self.logger.error('SASL error, terminating connection') self.transport.close() # NICK errors - elif any(message.startswith(x) for x in (b'431 ', b'432 ', b'433 ', b'436 ')): + elif line.command in ('431', '432', '433', '436'): self.logger.error(f'Failed to set nickname: {message!r}, terminating connection') self.transport.close() # USER errors - elif any(message.startswith(x) for x in (b'461 ', b'462 ')): + elif line.command in ('461', '462'): self.logger.error(f'Failed to register: {message!r}, terminating connection') self.transport.close() # JOIN errors - elif any(message.startswith(x) for x in (b'405 ', b'471 ', b'473 ', b'474 ', b'475 ')): + elif line.command in ('405', '471', '473', '474', '475'): self.logger.error(f'Failed to join channel: {message!r}, terminating connection') self.transport.close() # PART errors - elif message.startswith(b'442 '): + elif line.command == '442': self.logger.error(f'Failed to part channel: {message!r}') # JOIN/PART errors - elif message.startswith(b'403 '): + elif line.command == '403': self.logger.error(f'Failed to join or part channel: {message!r}') - # PRIVMSG errors - elif any(message.startswith(x) for x in (b'401 ', b'404 ', b'407 ', b'411 ', b'412 ', b'413 ', b'414 ')): - self.logger.error(f'Failed to send message: {message!r}') - # Connection registration reply - elif message.startswith(b'001 '): + elif line.command == '001': self.logger.info('IRC connection registered') if self.sasl and not self.authenticated: self.logger.error('IRC connection registered but not authenticated, terminating connection') @@ -385,102 +376,6 @@ class IRCClientProtocol(asyncio.Protocol): return self._send_join_part(b'JOIN', self.channels) - # JOIN success - elif message.startswith(b'JOIN ') and not self.usermask: - # If this is my own join message, it should contain the usermask in the prefix - if rawMessage.startswith(b':' + self.config['irc']['nick'].encode('utf-8') + b'!') and b' ' in rawMessage: - usermask = rawMessage.split(b' ', 1)[0][1:] - self._maybe_set_usermask(usermask) - - # Services host change - elif message.startswith(b'396 '): - words = message.split(b' ') - if len(words) >= 3: - # Sanity check inspired by irssi src/irc/core/irc-servers.c - if not any(x in words[2] for x in (b'*', b'?', b'!', b'#', b'&', b' ')) and not any(words[2].startswith(x) for x in (b'@', b':', b'-')) and words[2][-1:] != b'-': - if b'@' in words[2]: # user@host - self._maybe_set_usermask(self.config['irc']['nick'].encode('utf-8') + b'!' + words[2]) - else: # host (get user from previous mask or settings) - if self.usermask: - user = self.usermask.split(b'@')[0].split(b'!')[1] - else: - user = b'~' + self.config['irc']['nick'].encode('utf-8') - self._maybe_set_usermask(self.config['irc']['nick'].encode('utf-8') + b'!' + user + b'@' + words[2]) - - # User tracking (for NICK and QUIT) - decoded = False - if any(message.startswith(x) for x in (b'353 ', b'JOIN ', b'PART ', b'KICK ', b'NICK ', b'QUIT ')) or message == b'QUIT': - try: - if hasPrefix: - prefixStr = prefix.decode('utf-8') - messageStr = message.decode('utf-8') - except UnicodeDecodeError as e: - self.logger.warning(f'Could not decode prefix/message {prefix!r}/{message!r} ({e!s}), user tracking may be wrong') - else: - decoded = True - if message.startswith(b'353 ') and decoded: # RPL_NAMREPLY - _, channel, nicksStr = messageStr.split(' ', 2) - if nicksStr.startswith(':'): # It always should, but who knows... - nicksStr = nicksStr[1:] - nicks = nicksStr.split(' ') - for nick in nicks: - if nick[0] in ('@', '+'): - nick = nick[1:] - if self.valid_channel(channel) and self.valid_nick(nick): - self.userChannels[nick].add(channel) - if (message.startswith(b'JOIN ') or message.startswith(b'PART ')) and decoded and hasPrefix: - nick = self.prefix_to_nick(prefixStr) - channels = messageStr[5:] # Could be more than one channel in theory - for channel in channels.split(','): - if self.valid_channel(channel) and self.valid_nick(nick): - if message.startswith(b'JOIN '): - self.userChannels[nick].add(channel) - else: - self.userChannels[nick].discard(channel) - if message.startswith(b'KICK ') and decoded: # Prefix is supposed to indicate who kicked the user, but we don't care about that for the user tracking. - _, channel, nick = messageStr.split(' ', 2) - if ' ' in nick: # There might be a kick reason after the nick - nick = nick.split(' ', 1)[0] - if self.valid_channel(channel) and self.valid_nick(nick): - self.userChannels[nick].discard(channel) - if message.startswith(b'NICK '): - # If something can't be processed, just send it to storage without user tracking. - sendGeneric = True - if decoded and hasPrefix: - oldNick = self.prefix_to_nick(prefixStr) - newNick = message[5:] - if self.valid_nick(oldNick) and self.valid_nick(newNick) and oldNick in self.userChannels: - self.userChannels[newNick] = self.userChannels[oldNick] - del self.userChannels[oldNick] - if self.userChannels[newNick]: - sendGeneric = False - self.messageQueue.put_nowait((time_, rawMessage, self.userChannels[newNick])) - if sendGeneric: - self.logger.warning(f'Could not process nick change {rawMessage!r}, user tracking may be wrong') - self.messageQueue.put_nowait((time_, rawMessage, None)) - if message.startswith(b'QUIT ') or message == b'QUIT': - # Technically a simple 'QUIT' is not legal per RFC 1459. That's because there must always be a space after the command due to how is defined. - # In practice, it is accepted by ircds though, so it can presumably also be received by a client. - sendGeneric = True - if decoded and hasPrefix: - nick = self.prefix_to_nick(prefixStr) - if nick != self.config['irc']['nick'] and nick in self.userChannels: - if self.userChannels[nick]: - sendGeneric = False - self.messageQueue.put_nowait((time_, rawMessage, self.userChannels[nick])) - del self.userChannels[nick] - if not hasPrefix or (decoded and hasPrefix and nick == self.config['irc']['nick']): - # Oh no, *I* am getting disconnected! :-( - # I'm not actually sure whether the prefix version can happen, but better safe than sorry... - # In this case, it should be logged to all channels as well as the general log. The extra 'general' entry triggers Storage's code to write a message to the general log. - # Side effect: if the connection dies before any channels were joined, this causes the quit to be logged everywhere. However, there won't be a JOIN in the log, so it would still be unambiguous. - # Also, the connection loss after the disconnect triggers another message to be written to the logs. ¯\_(ツ)_/¯ - sendGeneric = False - self.messageQueue.put_nowait((time_, rawMessage, list(self.channels) + ['general'])) - if sendGeneric: - self.logger.warning(f'Could not process quit message {rawMessage!r}, user tracking may be wrong') - self.messageQueue.put_nowait((time_, rawMessage, None)) - async def quit(self): # It appears to be hard to impossible to send a clean quit, wait for it to be actually sent, and only then close the transport. # This is because asyncio.sslproto.SSLTransport doesn't support explicit draining nor waiting for an empty write queue nor write_eof.