No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 

720 líneas
30 KiB

  1. import aiohttp
  2. import aiohttp.web
  3. import asyncio
  4. import base64
  5. import collections
  6. import importlib.util
  7. import inspect
  8. import ircstates
  9. import irctokens
  10. import itertools
  11. import logging
  12. import os.path
  13. import signal
  14. import ssl
  15. import string
  16. import sys
  17. import tempfile
  18. import time
  19. import toml
  20. logger = logging.getLogger('irclog')
  21. SSL_CONTEXTS = {'yes': True, 'no': False, 'insecure': ssl.SSLContext()}
  22. messageConnectionClosed = object() # Signals that the connection was closed by either the bot or the server
  23. messageEOF = object() # Special object to signal the end of messages to Storage
  24. class InvalidConfig(Exception):
  25. '''Error in configuration file'''
  26. def is_valid_pem(path, withCert):
  27. '''Very basic check whether something looks like a valid PEM certificate'''
  28. try:
  29. with open(path, 'rb') as fp:
  30. contents = fp.read()
  31. # All of these raise exceptions if something's wrong...
  32. if withCert:
  33. assert contents.startswith(b'-----BEGIN CERTIFICATE-----\n')
  34. endCertPos = contents.index(b'-----END CERTIFICATE-----\n')
  35. base64.b64decode(contents[28:endCertPos].replace(b'\n', b''), validate = True)
  36. assert contents[endCertPos + 26:].startswith(b'-----BEGIN PRIVATE KEY-----\n')
  37. else:
  38. assert contents.startswith(b'-----BEGIN PRIVATE KEY-----\n')
  39. endCertPos = -26 # Please shoot me.
  40. endKeyPos = contents.index(b'-----END PRIVATE KEY-----\n')
  41. base64.b64decode(contents[endCertPos + 26 + 28: endKeyPos].replace(b'\n', b''), validate = True)
  42. assert contents[endKeyPos + 26:] == b''
  43. return True
  44. except: # Yes, really
  45. return False
  46. class Config(dict):
  47. def __init__(self, filename):
  48. super().__init__()
  49. self._filename = filename
  50. with open(self._filename, 'r') as fp:
  51. obj = toml.load(fp)
  52. # Sanity checks
  53. if any(x not in ('logging', 'storage', 'irc', 'web', 'channels') for x in obj.keys()):
  54. raise InvalidConfig('Unknown sections found in base object')
  55. if any(not isinstance(x, collections.abc.Mapping) for x in obj.values()):
  56. raise InvalidConfig('Invalid section type(s), expected objects/dicts')
  57. if 'logging' in obj:
  58. if any(x not in ('level', 'format') for x in obj['logging']):
  59. raise InvalidConfig('Unknown key found in log section')
  60. if 'level' in obj['logging'] and obj['logging']['level'] not in ('DEBUG', 'INFO', 'WARNING', 'ERROR'):
  61. raise InvalidConfig('Invalid log level')
  62. if 'format' in obj['logging']:
  63. if not isinstance(obj['logging']['format'], str):
  64. raise InvalidConfig('Invalid log format')
  65. try:
  66. #TODO: Replace with logging.Formatter's validate option (3.8+); this test does not cover everything that could be wrong (e.g. invalid format spec or conversion)
  67. # This counts the number of replacement fields. Formatter.parse yields tuples whose second value is the field name; if it's None, there is no field (e.g. literal text).
  68. assert sum(1 for x in string.Formatter().parse(obj['logging']['format']) if x[1] is not None) > 0
  69. except (ValueError, AssertionError) as e:
  70. raise InvalidConfig('Invalid log format: parsing failed') from e
  71. if 'storage' in obj:
  72. if any(x != 'path' for x in obj['storage']):
  73. raise InvalidConfig('Unknown key found in storage section')
  74. if 'path' in obj['storage']:
  75. obj['storage']['path'] = os.path.abspath(os.path.join(os.path.dirname(self._filename), obj['storage']['path']))
  76. try:
  77. #TODO This doesn't seem to work correctly; doesn't fail when the dir is -w
  78. f = tempfile.TemporaryFile(dir = obj['storage']['path'])
  79. f.close()
  80. except (OSError, IOError) as e:
  81. raise InvalidConfig('Invalid storage path: not writable') from e
  82. if 'irc' in obj:
  83. if any(x not in ('host', 'port', 'ssl', 'nick', 'real', 'certfile', 'certkeyfile') for x in obj['irc']):
  84. raise InvalidConfig('Unknown key found in irc section')
  85. if 'host' in obj['irc'] and not isinstance(obj['irc']['host'], str): #TODO: Check whether it's a valid hostname
  86. raise InvalidConfig('Invalid IRC host')
  87. if 'port' in obj['irc'] and (not isinstance(obj['irc']['port'], int) or not 1 <= obj['irc']['port'] <= 65535):
  88. raise InvalidConfig('Invalid IRC port')
  89. if 'ssl' in obj['irc'] and obj['irc']['ssl'] not in ('yes', 'no', 'insecure'):
  90. raise InvalidConfig(f'Invalid IRC SSL setting: {obj["irc"]["ssl"]!r}')
  91. if 'nick' in obj['irc'] and not isinstance(obj['irc']['nick'], str): #TODO: Check whether it's a valid nickname, username, etc.
  92. raise InvalidConfig('Invalid IRC nick')
  93. if len(IRCClientProtocol.nick_command(obj['irc']['nick'])) > 510:
  94. raise InvalidConfig('Invalid IRC nick: NICK command too long')
  95. if 'real' in obj['irc'] and not isinstance(obj['irc']['real'], str):
  96. raise InvalidConfig('Invalid IRC realname')
  97. if len(IRCClientProtocol.user_command(obj['irc']['nick'], obj['irc']['real'])) > 510:
  98. raise InvalidConfig('Invalid IRC nick/realname combination: USER command too long')
  99. if ('certfile' in obj['irc']) != ('certkeyfile' in obj['irc']):
  100. raise InvalidConfig('Invalid IRC cert config: needs both certfile and certkeyfile')
  101. if 'certfile' in obj['irc']:
  102. if not isinstance(obj['irc']['certfile'], str):
  103. raise InvalidConfig('Invalid certificate file: not a string')
  104. obj['irc']['certfile'] = os.path.abspath(os.path.join(os.path.dirname(self._filename), obj['irc']['certfile']))
  105. if not os.path.isfile(obj['irc']['certfile']):
  106. raise InvalidConfig('Invalid certificate file: not a regular file')
  107. if not is_valid_pem(obj['irc']['certfile'], True):
  108. raise InvalidConfig('Invalid certificate file: not a valid PEM cert')
  109. if 'certkeyfile' in obj['irc']:
  110. if not isinstance(obj['irc']['certkeyfile'], str):
  111. raise InvalidConfig('Invalid certificate key file: not a string')
  112. obj['irc']['certkeyfile'] = os.path.abspath(os.path.join(os.path.dirname(self._filename), obj['irc']['certkeyfile']))
  113. if not os.path.isfile(obj['irc']['certkeyfile']):
  114. raise InvalidConfig('Invalid certificate key file: not a regular file')
  115. if not is_valid_pem(obj['irc']['certkeyfile'], False):
  116. raise InvalidConfig('Invalid certificate key file: not a valid PEM key')
  117. if 'web' in obj:
  118. if any(x not in ('host', 'port') for x in obj['web']):
  119. raise InvalidConfig('Unknown key found in web section')
  120. if 'host' in obj['web'] and not isinstance(obj['web']['host'], str): #TODO: Check whether it's a valid hostname (must resolve I guess?)
  121. raise InvalidConfig('Invalid web hostname')
  122. if 'port' in obj['web'] and (not isinstance(obj['web']['port'], int) or not 1 <= obj['web']['port'] <= 65535):
  123. raise InvalidConfig('Invalid web port')
  124. if 'channels' in obj:
  125. seenChannels = {}
  126. for key, channel in obj['channels'].items():
  127. if not isinstance(key, str) or not key:
  128. raise InvalidConfig(f'Invalid channel key {key!r}')
  129. if not isinstance(channel, collections.abc.Mapping):
  130. raise InvalidConfig(f'Invalid channel for {key!r}')
  131. if any(x not in ('ircchannel', 'auth', 'active') for x in channel):
  132. raise InvalidConfig(f'Unknown key(s) found in channel {key!r}')
  133. if 'ircchannel' not in channel:
  134. channel['ircchannel'] = f'#{key}'
  135. if not isinstance(channel['ircchannel'], str):
  136. raise InvalidConfig(f'Invalid channel {key!r} IRC channel: not a string')
  137. if not channel['ircchannel'].startswith('#') and not channel['ircchannel'].startswith('&'):
  138. raise InvalidConfig(f'Invalid channel {key!r} IRC channel: does not start with # or &')
  139. if any(x in channel['ircchannel'][1:] for x in (' ', '\x00', '\x07', '\r', '\n', ',')):
  140. raise InvalidConfig(f'Invalid channel {key!r} IRC channel: contains forbidden characters')
  141. if len(channel['ircchannel']) > 200:
  142. raise InvalidConfig(f'Invalid channel {key!r} IRC channel: too long')
  143. if channel['ircchannel'] in seenChannels:
  144. raise InvalidConfig(f'Invalid channel {key!r} IRC channel: collides with channel {seenWebPaths[channel["ircchannel"]]!r}')
  145. seenChannels[channel['ircchannel']] = key
  146. if 'auth' in channel:
  147. if channel['auth'] is not False and not isinstance(channel['auth'], str):
  148. raise InvalidConfig(f'Invalid channel {key!r} auth: must be false or a string')
  149. if isinstance(channel['auth'], str) and ':' not in channel['auth']:
  150. raise InvalidConfig(f'Invalid channel {key!r} auth: must contain a colon')
  151. else:
  152. channel['auth'] = False
  153. if 'active' in channel:
  154. if channel['active'] is not True and channel['active'] is not False:
  155. raise InvalidConfig(f'Invalid channel {key!r} active: must be true or false')
  156. else:
  157. channel['active'] = True
  158. # Default values
  159. finalObj = {'logging': {'level': 'INFO', 'format': '{asctime} {levelname} {name} {message}'}, 'storage': {'path': os.path.abspath(os.path.dirname(self._filename))}, 'irc': {'host': 'irc.hackint.org', 'port': 6697, 'ssl': 'yes', 'nick': 'irclogbot', 'real': 'I am an irclog bot.', 'certfile': None, 'certkeyfile': None}, 'web': {'host': '127.0.0.1', 'port': 8080}, 'channels': {}}
  160. # Default values for channels are already set above.
  161. # Merge in what was read from the config file and set keys on self
  162. for key in ('logging', 'storage', 'irc', 'web', 'channels'):
  163. if key in obj:
  164. finalObj[key].update(obj[key])
  165. self[key] = finalObj[key]
  166. def __repr__(self):
  167. return f'<Config(logging={self["logging"]!r}, storage={self["storage"]!r}, irc={self["irc"]!r}, web={self["web"]!r}, channels={self["channels"]!r})>'
  168. def reread(self):
  169. return Config(self._filename)
  170. class IRCClientProtocol(asyncio.Protocol):
  171. logger = logging.getLogger('irclog.IRCClientProtocol')
  172. def __init__(self, messageQueue, connectionClosedEvent, loop, config, channels):
  173. self.messageQueue = messageQueue
  174. self.connectionClosedEvent = connectionClosedEvent
  175. self.loop = loop
  176. self.config = config
  177. self.buffer = b''
  178. self.connected = False
  179. self.channels = channels # Currently joined/supposed-to-be-joined channels; set(str)
  180. self.userChannels = collections.defaultdict(set) # List of which channels a user is known to be in; nickname:str -> {channel:str, ...}
  181. self.sasl = bool(self.config['irc']['certfile'] and self.config['irc']['certkeyfile'])
  182. self.authenticated = False
  183. self.server = ircstates.Server(self.config['irc']['host'])
  184. @staticmethod
  185. def nick_command(nick: str):
  186. return b'NICK ' + nick.encode('utf-8')
  187. @staticmethod
  188. def user_command(nick: str, real: str):
  189. nickb = nick.encode('utf-8')
  190. return b'USER ' + nickb + b' ' + nickb + b' ' + nickb + b' :' + real.encode('utf-8')
  191. @staticmethod
  192. def valid_channel(channel: str):
  193. return channel[0] in ('#', '&') and not any(x in channel for x in (' ', '\x00', '\x07', '\r', '\n', ','))
  194. @staticmethod
  195. def valid_nick(nick: str):
  196. # According to RFC 1459, a nick must be '<letter> { <letter> | <number> | <special> }'. This is obviously not true in practice because <special> doesn't include underscores, for example.
  197. # So instead, just do a sanity check similar to the channel one to disallow obvious bullshit.
  198. return not any(x in nick for x in (' ', '\x00', '\x07', '\r', '\n', ','))
  199. @staticmethod
  200. def prefix_to_nick(prefix: str):
  201. nick = prefix[1:]
  202. if '!' in nick:
  203. nick = nick.split('!', 1)[0]
  204. if '@' in nick: # nick@host is also legal
  205. nick = nick.split('@', 1)[0]
  206. return nick
  207. def connection_made(self, transport):
  208. self.logger.info('IRC connected')
  209. self.transport = transport
  210. self.connected = True
  211. if self.sasl:
  212. self.send(b'CAP REQ :sasl')
  213. self.send(self.nick_command(self.config['irc']['nick']))
  214. self.send(self.user_command(self.config['irc']['nick'], self.config['irc']['real']))
  215. def _send_join_part(self, command, channels):
  216. '''Split a JOIN or PART into multiple messages as necessary'''
  217. # command: b'JOIN' or b'PART'; channels: set[str]
  218. channels = [x.encode('utf-8') for x in channels]
  219. if len(command) + sum(1 + len(x) for x in channels) <= 510: # Total length = command + (separator + channel name for each channel, where the separator is a space for the first and then a comma)
  220. # Everything fits into one command.
  221. self.send(command + b' ' + b','.join(channels))
  222. return
  223. # List too long, need to split.
  224. limit = 510 - len(command)
  225. lengths = [1 + len(x) for x in channels] # separator + channel name
  226. chanLengthAcceptable = [l <= limit for l in lengths]
  227. if not all(chanLengthAcceptable):
  228. # There are channel names that are too long to even fit into one message on their own; filter them out and warn about them.
  229. # This should never happen since the config reader would already filter it out.
  230. tooLongChannels = [x for x, a in zip(channels, chanLengthAcceptable) if not a]
  231. channels = [x for x, a in zip(channels, chanLengthAcceptable) if a]
  232. lengths = [l for l, a in zip(lengths, chanLengthAcceptable) if a]
  233. for channel in tooLongChannels:
  234. self.logger.warning(f'Cannot {command} {channel}: name too long')
  235. runningLengths = list(itertools.accumulate(lengths)) # entry N = length of all entries up to and including channel N, including separators
  236. offset = 0
  237. while channels:
  238. i = next((x[0] for x in enumerate(runningLengths) if x[1] - offset > limit), -1)
  239. if i == -1: # Last batch
  240. i = len(channels)
  241. self.send(command + b' ' + b','.join(channels[:i]))
  242. offset = runningLengths[i-1]
  243. channels = channels[i:]
  244. runningLengths = runningLengths[i:]
  245. def update_channels(self, channels: set):
  246. channelsToPart = self.channels - channels
  247. channelsToJoin = channels - self.channels
  248. self.channels = channels
  249. if self.connected:
  250. if channelsToPart:
  251. self._send_join_part(b'PART', channelsToPart)
  252. if channelsToJoin:
  253. self._send_join_part(b'JOIN', channelsToJoin)
  254. def send(self, data):
  255. self.logger.debug(f'Send: {data!r}')
  256. if len(data) > 510:
  257. raise RuntimeError(f'IRC message too long ({len(data)} > 510): {data!r}')
  258. time_ = time.time()
  259. self.transport.write(data + b'\r\n')
  260. self.messageQueue.put_nowait((time_, b'> ' + data, None))
  261. def data_received(self, data):
  262. time_ = time.time()
  263. self.logger.debug(f'Data received: {data!r}')
  264. # Split received data on CRLF. If there's any data left in the buffer, prepend it to the first message and process that.
  265. # Then, process all messages except the last one (since data might not end on a CRLF) and keep the remainder in the buffer.
  266. # If data does end with CRLF, all messages will have been processed and the buffer will be empty again.
  267. messages = data.split(b'\r\n')
  268. if self.buffer:
  269. messages[0] = self.buffer + messages[0]
  270. for message in messages[:-1]:
  271. lines = self.server.recv(message + b'\r\n')
  272. assert len(lines) == 1
  273. self.server.parse_tokens(lines[0])
  274. self.message_received(time_, message, lines[0])
  275. self.buffer = messages[-1]
  276. def message_received(self, time_, message, line):
  277. self.logger.debug(f'Message received at {time_}: {message!r}')
  278. # Queue message for storage
  279. sendGeneral = True
  280. if line.command in ('QUIT', 'NICK') and line.source:
  281. if line.hostmask.nickname == self.server.nickname:
  282. # Self-quit
  283. sendGeneral = False
  284. self.messageQueue.put_nowait((time_, b'< ' + message, list(self.channels) + ['general']))
  285. else:
  286. try:
  287. user = self.server.users[line.hostmask.nickname]
  288. except KeyError:
  289. pass
  290. else:
  291. sendGeneral = False
  292. self.messageQueue.put_nowait((time_, b'< ' + message, user.channels))
  293. if sendGeneral:
  294. self.messageQueue.put_nowait((time_, b'< ' + message, None))
  295. # PING/PONG
  296. if line.command == 'PING':
  297. self.send(irctokens.build('PONG', line.params).format().encode('utf-8'))
  298. # SASL
  299. elif line.command == 'CAP' and self.sasl:
  300. if line.params[-2] == 'ACK' and 'sasl' in line.params[-1].split(' '):
  301. self.send(b'AUTHENTICATE EXTERNAL')
  302. else:
  303. self.logger.error(f'Received unexpected CAP reply {message!r}, terminating connection')
  304. self.transport.close()
  305. elif line.command == 'AUTHENTICATE' and line.params == ['+']:
  306. self.send(b'AUTHENTICATE +')
  307. elif line.command == '903': # SASL auth successful
  308. self.authenticated = True
  309. self.send(b'CAP END')
  310. elif line.command in ('902', '904', '905', '906', '908'):
  311. self.logger.error('SASL error, terminating connection')
  312. self.transport.close()
  313. # NICK errors
  314. elif line.command in ('431', '432', '433', '436'):
  315. self.logger.error(f'Failed to set nickname: {message!r}, terminating connection')
  316. self.transport.close()
  317. # USER errors
  318. elif line.command in ('461', '462'):
  319. self.logger.error(f'Failed to register: {message!r}, terminating connection')
  320. self.transport.close()
  321. # JOIN errors
  322. elif line.command in ('405', '471', '473', '474', '475'):
  323. self.logger.error(f'Failed to join channel: {message!r}, terminating connection')
  324. self.transport.close()
  325. # PART errors
  326. elif line.command == '442':
  327. self.logger.error(f'Failed to part channel: {message!r}')
  328. # JOIN/PART errors
  329. elif line.command == '403':
  330. self.logger.error(f'Failed to join or part channel: {message!r}')
  331. # Connection registration reply
  332. elif line.command == '001':
  333. self.logger.info('IRC connection registered')
  334. if self.sasl and not self.authenticated:
  335. self.logger.error('IRC connection registered but not authenticated, terminating connection')
  336. self.transport.close()
  337. return
  338. self._send_join_part(b'JOIN', self.channels)
  339. # General fatal ERROR
  340. elif line.command == 'ERROR':
  341. self.logger.error(f'Server sent ERROR: {message!r}')
  342. self.transport.close()
  343. async def quit(self):
  344. # The server acknowledges a QUIT by sending an ERROR and closing the connection. The latter triggers connection_lost, so just wait for the closure event.
  345. self.logger.info('Quitting')
  346. self.send(b'QUIT :Bye')
  347. await self.connectionClosedEvent.wait()
  348. self.transport.close()
  349. def connection_lost(self, exc):
  350. time_ = time.time()
  351. self.logger.info('IRC connection lost')
  352. self.connected = False
  353. self.connectionClosedEvent.set()
  354. self.messageQueue.put_nowait((time_, messageConnectionClosed, list(self.channels) + ['general']))
  355. class IRCClient:
  356. logger = logging.getLogger('irclog.IRCClient')
  357. def __init__(self, messageQueue, config):
  358. self.messageQueue = messageQueue
  359. self.config = config
  360. self.channels = {channel['ircchannel'] for channel in config['channels'].values()}
  361. self._transport = None
  362. self._protocol = None
  363. def update_config(self, config):
  364. needReconnect = self.config['irc'] != config['irc']
  365. self.config = config
  366. if self._transport: # if currently connected:
  367. if needReconnect:
  368. self._transport.close()
  369. else:
  370. self.channels = {channel['ircchannel'] for channel in config['channels'].values()}
  371. self._protocol.update_channels(self.channels)
  372. def _get_ssl_context(self):
  373. ctx = SSL_CONTEXTS[self.config['irc']['ssl']]
  374. if self.config['irc']['certfile'] and self.config['irc']['certkeyfile']:
  375. if ctx is True:
  376. ctx = ssl.create_default_context()
  377. if isinstance(ctx, ssl.SSLContext):
  378. ctx.load_cert_chain(self.config['irc']['certfile'], keyfile = self.config['irc']['certkeyfile'])
  379. return ctx
  380. async def run(self, loop, sigintEvent):
  381. connectionClosedEvent = asyncio.Event()
  382. while True:
  383. connectionClosedEvent.clear()
  384. try:
  385. self._transport, self._protocol = await loop.create_connection(lambda: IRCClientProtocol(self.messageQueue, connectionClosedEvent, loop, self.config, self.channels), self.config['irc']['host'], self.config['irc']['port'], ssl = self._get_ssl_context())
  386. try:
  387. await asyncio.wait((connectionClosedEvent.wait(), sigintEvent.wait()), return_when = asyncio.FIRST_COMPLETED)
  388. finally:
  389. if not connectionClosedEvent.is_set():
  390. await self._protocol.quit()
  391. except (ConnectionRefusedError, ssl.SSLError, asyncio.TimeoutError) as e:
  392. self.logger.error(str(e))
  393. await asyncio.wait((asyncio.sleep(5), sigintEvent.wait()), return_when = asyncio.FIRST_COMPLETED)
  394. if sigintEvent.is_set():
  395. self.logger.debug('Got SIGINT, putting EOF and breaking')
  396. self.messageQueue.put_nowait(messageEOF)
  397. break
  398. class Storage:
  399. logger = logging.getLogger('irclog.Storage')
  400. def __init__(self, messageQueue, config):
  401. self.messageQueue = messageQueue
  402. self.config = config
  403. self.files = {} # channel|None -> fileobj; None = general log for anything that wasn't recognised as a message for the channel log
  404. self.active = True
  405. def update_config(self, config):
  406. channelsOld = {channel['ircchannel'] for channel in self.config['channels'].values()}
  407. channelsNew = {channel['ircchannel'] for channel in config['channels'].values()}
  408. channelsRemoved = channelsOld - channelsNew
  409. self.config = config
  410. for channel in channelsRemoved:
  411. if channel in self.files:
  412. self.files[channel].close()
  413. del self.files[channel]
  414. #TODO mkdir as required
  415. #TODO month
  416. for channel in self.config['channels'].values():
  417. if channel['ircchannel'] not in self.files and channel['active']:
  418. self.files[channel['ircchannel']] = open(os.path.join(self.config['storage']['path'], channel['ircchannel'], '2020-10.log'), 'ab')
  419. if None not in self.files:
  420. self.files[None] = open(os.path.join(self.config['storage']['path'], 'general', '2020-10.log'), 'ab')
  421. async def run(self, loop, sigintEvent):
  422. self.update_config(self.config) # Ensure that files are open etc.
  423. #TODO Task to rotate log files at the beginning of a new month
  424. storageTask = asyncio.create_task(self.store_messages(sigintEvent))
  425. flushTask = asyncio.create_task(self.flush_files())
  426. await sigintEvent.wait()
  427. self.logger.debug('Got SIGINT, waiting for remaining messages to be stored')
  428. await storageTask # Wait until everything's stored
  429. self.active = False
  430. self.logger.debug('Waiting for flush task')
  431. await flushTask
  432. self.close()
  433. async def store_messages(self, sigintEvent):
  434. while self.active:
  435. self.logger.debug('Waiting for message')
  436. res = await self.messageQueue.get()
  437. self.logger.debug(f'Got {res!r} from message queue')
  438. if res is messageEOF:
  439. self.logger.debug('Message EOF, breaking store_messages loop')
  440. break
  441. time_, rawMessage, channels = res
  442. if rawMessage is messageConnectionClosed:
  443. rawMessage = b'- Connection closed'
  444. message = rawMessage[2:] # Remove leading > or <
  445. if message.startswith(b':') and b' ' in message:
  446. prefix, message = message.split(b' ', 1)
  447. # Identify channel-bound messages: JOIN, PART, QUIT, MODE, KICK, PRIVMSG, NOTICE (see https://tools.ietf.org/html/rfc1459#section-4.2.1)
  448. if message.startswith(b'JOIN ') or message.startswith(b'PART ') or message.startswith(b'PRIVMSG ') or message.startswith(b'NOTICE '):
  449. # I *think* that the first parameter of JOIN/PART can only ever be a single channel for messages announcing other people joining, but who knows with how awful RFC 1459 is...
  450. channelsRaw = message.split(b' ', 2)[1]
  451. channels = self.decode_channel(time_, rawMessage, channelsRaw.split(b','))
  452. if channels is None:
  453. continue
  454. for channel in channels:
  455. self.store_message(time_, rawMessage, channel)
  456. continue
  457. if message.startswith(b'QUIT ') or message == b'QUIT' or message.startswith(b'NICK '):
  458. # If channels is not None, IRCClientProtocol managed to track the user and identify the channels this needs to be logged to.
  459. # If it isn't, there might be channels in there (for some odd reason?) that are not being logged. In that case, emit one and only one message to the general log as well.
  460. if channels is not None:
  461. for channel in channels:
  462. self.store_message(time_, rawMessage, channel, redirectToGeneral = False)
  463. if channels is None or any(channel not in self.files for channel in channels):
  464. self.store_message(time_, rawMessage, None)
  465. continue
  466. if message.startswith(b'MODE #') or message.startswith(b'MODE &') or message.startswith(b'KICK '):
  467. channel = message.split(b' ', 2)[1]
  468. channel = self.decode_channel(time_, rawMessage, channel)
  469. if channel is None:
  470. continue
  471. self.store_message(time_, rawMessage, channel)
  472. continue
  473. if channels is not None:
  474. for channel in channels:
  475. self.store_message(time_, rawMessage, channel)
  476. else:
  477. self.store_message(time_, rawMessage, None)
  478. def store_message(self, time_, rawMessage, targetChannel, redirectToGeneral = True):
  479. self.logger.debug(f'Logging {rawMessage!r} at {time_} for {targetChannel!r}')
  480. if targetChannel is not None and targetChannel not in self.files:
  481. self.logger.debug(f'Target channel {targetChannel!r} not opened, redirecting to general log is {redirectToGeneral}')
  482. if not redirectToGeneral:
  483. return
  484. targetChannel = None
  485. self.files[targetChannel].write(str(time_).encode('ascii') + b' ' + rawMessage + b'\r\n')
  486. def decode_channel(self, time_, rawMessage, channel):
  487. try:
  488. if isinstance(channel, list):
  489. return [c.decode('utf-8') for c in channel]
  490. return channel.decode('utf-8')
  491. except UnicodeDecodeError as e:
  492. self.logger.warning(f'Failed to decode channel name {channel!r} from {rawMessage!r} at {time_}: {e!s}')
  493. self.store_message(time_, rawMessage, None)
  494. return None
  495. async def flush_files(self):
  496. while self.active:
  497. await asyncio.sleep(1)
  498. self.logger.debug('Exiting flush_files')
  499. def close(self):
  500. for f in self.files.values():
  501. f.close()
  502. self.files = {}
  503. class WebServer:
  504. logger = logging.getLogger('irclog.WebServer')
  505. def __init__(self, config):
  506. self.config = config
  507. self._paths = {} # '/path' => ('#channel', auth, module, moduleargs) where auth is either False (no authentication) or the HTTP header value for basic auth
  508. self._app = aiohttp.web.Application()
  509. self._app.add_routes([aiohttp.web.post('/{path:.+}', self.post)])
  510. self.update_config(config)
  511. self._configChanged = asyncio.Event()
  512. def update_config(self, config):
  513. # self._paths = {channel['webpath']: (channel['ircchannel'], f'Basic {base64.b64encode(channel["auth"].encode("utf-8")).decode("utf-8")}' if channel['auth'] else False) for channel in config['channels'].values()}
  514. needRebind = self.config['web'] != config['web'] #TODO only if there are changes to web.host or web.port; everything else can be updated without rebinding
  515. self.config = config
  516. if needRebind:
  517. self._configChanged.set()
  518. async def run(self, stopEvent):
  519. while True:
  520. runner = aiohttp.web.AppRunner(self._app)
  521. await runner.setup()
  522. site = aiohttp.web.TCPSite(runner, self.config['web']['host'], self.config['web']['port'])
  523. await site.start()
  524. await asyncio.wait((stopEvent.wait(), self._configChanged.wait()), return_when = asyncio.FIRST_COMPLETED)
  525. await runner.cleanup()
  526. if stopEvent.is_set():
  527. break
  528. self._configChanged.clear()
  529. # https://docs.python.org/3/library/asyncio-subprocess.html#asyncio.asyncio.subprocess.Process
  530. # https://stackoverflow.com/questions/1180606/using-subprocess-popen-for-process-with-large-output
  531. # -> https://stackoverflow.com/questions/57730010/python-asyncio-subprocess-write-stdin-and-read-stdout-stderr-continuously
  532. async def post(self, request):
  533. self.logger.info(f'Received request {id(request)} from {request.remote!r} for {request.path!r} with body {(await request.read())!r}')
  534. try:
  535. channel, auth, module, moduleargs, overlongmode = self._paths[request.path]
  536. except KeyError:
  537. self.logger.info(f'Bad request {id(request)}: no path {request.path!r}')
  538. raise aiohttp.web.HTTPNotFound()
  539. if auth:
  540. authHeader = request.headers.get('Authorization')
  541. if not authHeader or authHeader != auth:
  542. self.logger.info(f'Bad request {id(request)}: authentication failed: {authHeader!r} != {auth}')
  543. raise aiohttp.web.HTTPForbidden()
  544. if module is not None:
  545. self.logger.debug(f'Processing request {id(request)} using {module!r}')
  546. try:
  547. message = await module.process(request, *moduleargs)
  548. except aiohttp.web.HTTPException as e:
  549. raise e
  550. except Exception as e:
  551. self.logger.error(f'Bad request {id(request)}: exception in module process function: {type(e).__module__}.{type(e).__name__}: {e!s}')
  552. raise aiohttp.web.HTTPBadRequest()
  553. if '\r' in message or '\n' in message:
  554. self.logger.error(f'Bad request {id(request)}: module process function returned message with linebreaks: {message!r}')
  555. raise aiohttp.web.HTTPBadRequest()
  556. else:
  557. self.logger.debug(f'Processing request {id(request)} using default processor')
  558. message = await self._default_process(request)
  559. self.logger.info(f'Accepted request {id(request)}, putting message {message!r} for {channel} into message queue')
  560. self.messageQueue.put_nowait((channel, message, overlongmode))
  561. raise aiohttp.web.HTTPOk()
  562. async def _default_process(self, request):
  563. try:
  564. message = await request.text()
  565. except Exception as e:
  566. self.logger.info(f'Bad request {id(request)}: exception while reading request data: {e!s}')
  567. raise aiohttp.web.HTTPBadRequest() # Yes, it's always the client's fault. :-)
  568. self.logger.debug(f'Request {id(request)} payload: {message!r}')
  569. # Strip optional [CR] LF at the end of the payload
  570. if message.endswith('\r\n'):
  571. message = message[:-2]
  572. elif message.endswith('\n'):
  573. message = message[:-1]
  574. if '\r' in message or '\n' in message:
  575. self.logger.info(f'Bad request {id(request)}: linebreaks in message')
  576. raise aiohttp.web.HTTPBadRequest()
  577. return message
  578. def configure_logging(config):
  579. #TODO: Replace with logging.basicConfig(..., force = True) (Py 3.8+)
  580. root = logging.getLogger()
  581. root.setLevel(getattr(logging, config['logging']['level']))
  582. root.handlers = [] #FIXME: Undocumented attribute of logging.Logger
  583. formatter = logging.Formatter(config['logging']['format'], style = '{')
  584. stderrHandler = logging.StreamHandler()
  585. stderrHandler.setFormatter(formatter)
  586. root.addHandler(stderrHandler)
  587. async def main():
  588. if len(sys.argv) != 2:
  589. print('Usage: irclog.py CONFIGFILE', file = sys.stderr)
  590. sys.exit(1)
  591. configFile = sys.argv[1]
  592. config = Config(configFile)
  593. configure_logging(config)
  594. loop = asyncio.get_running_loop()
  595. messageQueue = asyncio.Queue()
  596. # tuple(time: float, message: bytes or None, channels: list[str] or None)
  597. # message = None indicates a connection loss
  598. # channels = None indicates that IRCClientProtocol did not identify which channels are affected; it is a set or list of channel names for QUIT or NICK messages and the connection closed message.
  599. irc = IRCClient(messageQueue, config)
  600. webserver = WebServer(config)
  601. storage = Storage(messageQueue, config)
  602. sigintEvent = asyncio.Event()
  603. def sigint_callback():
  604. global logger
  605. nonlocal sigintEvent
  606. logger.info('Got SIGINT, stopping')
  607. sigintEvent.set()
  608. loop.add_signal_handler(signal.SIGINT, sigint_callback)
  609. def sigusr1_callback():
  610. global logger
  611. nonlocal config, irc, webserver, storage
  612. logger.info('Got SIGUSR1, reloading config')
  613. try:
  614. newConfig = config.reread()
  615. except InvalidConfig as e:
  616. logger.error(f'Config reload failed: {e!s} (old config remains active)')
  617. return
  618. config = newConfig
  619. configure_logging(config)
  620. irc.update_config(config)
  621. webserver.update_config(config)
  622. storage.update_config(config)
  623. loop.add_signal_handler(signal.SIGUSR1, sigusr1_callback)
  624. await asyncio.gather(irc.run(loop, sigintEvent), webserver.run(sigintEvent), storage.run(loop, sigintEvent))
  625. if __name__ == '__main__':
  626. asyncio.run(main())