|
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- import itertools
- import os
- import qwarc
- import qwarc.utils
-
-
- responseHandler = qwarc.utils.handle_response_limit_error_retries(5)
-
-
- class LiveChatReplay(qwarc.Item):
- itemType = 'chat-replay'
- # itemValue = '{videoId}'
-
- @classmethod
- def generate(cls):
- yield os.environ['YOUTUBE_VIDEOID']
-
- async def process(self):
- response, _ = await self.fetch(f'https://www.youtube.com/watch?v={self.itemValue}&disable_polymer=1', responseHandler = responseHandler)
- if not response or response.status != 200:
- self.logger.error('Could not fetch video page')
- return
- contents = await response.read()
- conversationBarPos = contents.find(b'\\"conversationBar\\":{')
- if conversationBarPos < 0:
- self.logger.error('Could not find conversation bar')
- return
-
- # No regerts
- openParens = 0
- for pos in itertools.count(start = conversationBarPos + 20):
- char = contents[pos:pos+1]
- if char in (b'{', b'['):
- openParens += 1
- elif char in (b'}', b']'):
- openParens -= 1
- if openParens == 0:
- break
- conversationBar = contents[conversationBarPos + 20 : pos]
- for continuation in qwarc.utils.str_get_all_between(conversationBar, b'\\"continuation\\":\\"', b'\\"'):
- if not continuation or continuation.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'%3D', b'%3D%3D'):
- self.logger.warning('Skipping unexpected continuation value: {continuation!r}')
- continue
- cont = continuation
- while True:
- page, _ = await self.fetch(f'https://www.youtube.com/live_chat_replay?continuation={cont.decode("ascii")}', responseHandler = responseHandler)
- if not page or page.status != 200:
- self.logger.error(f'Could not fetch continuation {cont!r}')
- break
- pageContents = await page.read()
- contBlock = qwarc.utils.str_get_between(pageContents, b'"liveChatReplayContinuationData":', b'}')
- if not contBlock:
- break
- cont = qwarc.utils.str_get_between(contBlock, b'"continuation":"', b'"')
- if not cont:
- break
- if cont.lstrip(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-') not in (b'', b'%3D', b'%3D%3D'):
- self.logger.warning(f'Skipping unexpected cont value: {cont!r}')
-
-
- specDependencies = qwarc.utils.SpecDependencies(extra = (('videoId', os.environ['YOUTUBE_VIDEOID']),))
|