A method to grab the live chat replay from YouTube
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

62 lines
2.2 KiB

  1. import itertools
  2. import os
  3. import qwarc
  4. import qwarc.utils
  5. responseHandler = qwarc.utils.handle_response_limit_error_retries(5)
  6. class LiveChatReplay(qwarc.Item):
  7. itemType = 'chat-replay'
  8. # itemValue = '{videoId}'
  9. @classmethod
  10. def generate(cls):
  11. yield os.environ['YOUTUBE_VIDEOID']
  12. async def process(self):
  13. response, _ = await self.fetch(f'https://www.youtube.com/watch?v={self.itemValue}&disable_polymer=1', responseHandler = responseHandler)
  14. if not response or response.status != 200:
  15. self.logger.error('Could not fetch video page')
  16. return
  17. contents = await response.read()
  18. conversationBarPos = contents.find(b'\\"conversationBar\\":{')
  19. if conversationBarPos < 0:
  20. self.logger.error('Could not find conversation bar')
  21. return
  22. # No regerts
  23. openParens = 0
  24. for pos in itertools.count(start = conversationBarPos + 20):
  25. char = contents[pos:pos+1]
  26. if char in (b'{', b'['):
  27. openParens += 1
  28. elif char in (b'}', b']'):
  29. openParens -= 1
  30. if openParens == 0:
  31. break
  32. conversationBar = contents[conversationBarPos + 20 : pos]
  33. for continuation in qwarc.utils.str_get_all_between(conversationBar, b'\\"continuation\\":\\"', b'\\"'):
  34. if not continuation or continuation.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'%3D', b'%3D%3D'):
  35. self.logger.warning('Skipping unexpected continuation value: {continuation!r}')
  36. continue
  37. cont = continuation
  38. while True:
  39. page, _ = await self.fetch(f'https://www.youtube.com/live_chat_replay?continuation={cont.decode("ascii")}', responseHandler = responseHandler)
  40. if not page or page.status != 200:
  41. self.logger.error(f'Could not fetch continuation {cont!r}')
  42. break
  43. pageContents = await page.read()
  44. contBlock = qwarc.utils.str_get_between(pageContents, b'"liveChatReplayContinuationData":', b'}')
  45. if not contBlock:
  46. break
  47. cont = qwarc.utils.str_get_between(contBlock, b'"continuation":"', b'"')
  48. if not cont:
  49. break
  50. if cont.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'%3D', b'%3D%3D'):
  51. self.logger.warning(f'Skipping unexpected cont value: {cont!r}')
  52. specDependencies = qwarc.utils.SpecDependencies(extra = (('videoId', os.environ['YOUTUBE_VIDEOID']),))