Browse Source

Update youtube-util

pull/11/head
tech234a 3 years ago
parent
commit
3e43461c43
3 changed files with 41 additions and 30 deletions
  1. +2
    -2
      worker.py
  2. +26
    -18
      youtube_channel.py
  3. +13
    -10
      youtube_util.py

+ 2
- 2
worker.py View File

@@ -5,7 +5,7 @@ from os import mkdir, rmdir, listdir, system, environ
from os.path import isdir, isfile, getsize
from json import loads

from youtube_channel import main
from youtube_channel import process_channel

import signal

@@ -159,7 +159,7 @@ def threadrunner():
jobs.put(("submitdiscovery", itemyv["id"], tracker.ItemType.Video))

#channel created playlists
y = main(desit.split(":", 1)[1])
y = process_channel(desit.split(":", 1)[1])
for itemyv in y["playlists"]:
jobs.put(("submitdiscovery", itemyv, tracker.ItemType.Playlist))
for itemyv in y["channels"]:


+ 26
- 18
youtube_channel.py View File

@@ -1,25 +1,31 @@
from requests import session
from youtube_util import getinitialdata, fullyexpand

# TODO: Rate limit detection, HTTP3?
from youtube_util import getinitialdata, fullyexpand, getapikey, getlver

mysession = session()
#extract latest version automatically
try:
lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
except:
lver = "2.20201002.02.01"
homepage = mysession.get("https://www.youtube.com/").text

API_KEY = getapikey(homepage)

params = (
('key', API_KEY),
)

#print(lver)
mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"})
API_VERSION = getlver(getinitialdata(homepage))

def main(channelid: str):
continuationheaders = {"x-youtube-client-name": "1", "x-youtube-client-version": API_VERSION, "Accept-Language": "en-US"}

del homepage

def process_channel(channelid: str):
playlists = set()
shelfres = set()
channellist = set()

# PLAYLISTS
initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/playlists").text)
data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EglwbGF5bGlzdHM%3D"}
initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data).json()


CHANNELS_ID = 0
PLAYLISTS_ID = 0
@@ -42,7 +48,7 @@ def main(channelid: str):
if "shelfRenderer" in itemint.keys():
shelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
elif "gridRenderer" in itemint.keys():
playlistsint = fullyexpand(itemint["gridRenderer"])["items"]
playlistsint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]

for playlist in playlistsint:
playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
@@ -51,7 +57,7 @@ def main(channelid: str):

for item in shelfres:
shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"]
playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]

for playlist in playlistsint:
playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
@@ -61,7 +67,9 @@ def main(channelid: str):
# CHANNELS
cshelfres = set()

initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/channels").text)
# PLAYLISTS
data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EghjaGFubmVscw%3D%3D"}
initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data).json()

shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]

@@ -70,14 +78,14 @@ def main(channelid: str):
if "shelfRenderer" in itemint.keys():
cshelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
elif "gridRenderer" in itemint.keys():
chanlistint = fullyexpand(itemint["gridRenderer"])["items"]
chanlistint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]

for channel in chanlistint:
channellist.add(channel["gridChannelRenderer"]["channelId"])

for item in cshelfres:
shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"]
chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]

for channel in chanlistint:
channellist.add(channel["gridChannelRenderer"]["channelId"])
@@ -89,7 +97,7 @@ if __name__ == "__main__":
chanl = argv
chanl.pop(0)
for channel in chanl:
print(main(channel))
print(process_channel(channel))

# SAMPLES:
# UCqj7Cz7revf5maW9g5pgNcg lots of playlists
@@ -103,4 +111,4 @@ if __name__ == "__main__":

# UCJOh5FKisc0hUlEeWFBlD-w no subscriptions, plenty of featured channels

# UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels
# UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels

+ 13
- 10
youtube_util.py View File

@@ -1,26 +1,29 @@
from requests import session
from json import loads
from urllib.parse import unquote

import requests

def getinitialdata(html: str):
for line in html.splitlines():
if line.strip().startswith('window["ytInitialData"] = '):
return loads(line.split('window["ytInitialData"] = ', 1)[1].strip()[:-1])
return {}

mysession = session()
def getapikey(html: str):
return html.split('"INNERTUBE_API_KEY":"', 1)[-1].split('"', 1)[0]

#extract latest version automatically
try:
lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
except:
lver = "2.20201002.02.01"
mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"})
def getlver(initialdata: dict):
try:
return initialdata["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
except:
return "2.20201002.02.01"

def fullyexpand(inputdict: dict):
def fullyexpand(inputdict: dict, mysession: requests.session, continuationheaders: dict):
lastrequestj = inputdict
while "continuations" in lastrequestj.keys():
lastrequest = mysession.get("https://www.youtube.com/browse_ajax?continuation="+unquote(lastrequestj["continuations"][0]["nextContinuationData"]["continuation"]))
lastrequest = mysession.get("https://www.youtube.com/browse_ajax?continuation="+unquote(lastrequestj["continuations"][0]["nextContinuationData"]["continuation"]), headers=continuationheaders)
lastrequestj = lastrequest.json()[1]["response"]["continuationContents"]["gridContinuation"]
inputdict["items"].extend(lastrequestj["items"])

return inputdict
return inputdict

Loading…
Cancel
Save