From 3e43461c4376b03247d03e5023958d219a9c3b65 Mon Sep 17 00:00:00 2001 From: tech234a <46801700+tech234a@users.noreply.github.com> Date: Sat, 17 Oct 2020 00:34:04 -0400 Subject: [PATCH] Update youtube-util --- worker.py | 4 ++-- youtube_channel.py | 44 ++++++++++++++++++++++++++------------------ youtube_util.py | 23 +++++++++++++---------- 3 files changed, 41 insertions(+), 30 deletions(-) diff --git a/worker.py b/worker.py index bdf4372..40904bb 100644 --- a/worker.py +++ b/worker.py @@ -5,7 +5,7 @@ from os import mkdir, rmdir, listdir, system, environ from os.path import isdir, isfile, getsize from json import loads -from youtube_channel import main +from youtube_channel import process_channel import signal @@ -159,7 +159,7 @@ def threadrunner(): jobs.put(("submitdiscovery", itemyv["id"], tracker.ItemType.Video)) #channel created playlists - y = main(desit.split(":", 1)[1]) + y = process_channel(desit.split(":", 1)[1]) for itemyv in y["playlists"]: jobs.put(("submitdiscovery", itemyv, tracker.ItemType.Playlist)) for itemyv in y["channels"]: diff --git a/youtube_channel.py b/youtube_channel.py index 57ffbbe..37db418 100644 --- a/youtube_channel.py +++ b/youtube_channel.py @@ -1,25 +1,31 @@ from requests import session -from youtube_util import getinitialdata, fullyexpand - -# TODO: Rate limit detection, HTTP3? +from youtube_util import getinitialdata, fullyexpand, getapikey, getlver mysession = session() #extract latest version automatically -try: - lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"] -except: - lver = "2.20201002.02.01" +homepage = mysession.get("https://www.youtube.com/").text + +API_KEY = getapikey(homepage) + +params = ( + ('key', API_KEY), +) -#print(lver) -mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"}) +API_VERSION = getlver(getinitialdata(homepage)) -def main(channelid: str): +continuationheaders = {"x-youtube-client-name": "1", "x-youtube-client-version": API_VERSION, "Accept-Language": "en-US"} + +del homepage + +def process_channel(channelid: str): playlists = set() shelfres = set() channellist = set() # PLAYLISTS - initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/playlists").text) + data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EglwbGF5bGlzdHM%3D"} + initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data).json() + CHANNELS_ID = 0 PLAYLISTS_ID = 0 @@ -42,7 +48,7 @@ def main(channelid: str): if "shelfRenderer" in itemint.keys(): shelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"]) elif "gridRenderer" in itemint.keys(): - playlistsint = fullyexpand(itemint["gridRenderer"])["items"] + playlistsint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"] for playlist in playlistsint: playlists.add(playlist["gridPlaylistRenderer"]["playlistId"]) @@ -51,7 +57,7 @@ def main(channelid: str): for item in shelfres: shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text) - playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"] + playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"] for playlist in playlistsint: playlists.add(playlist["gridPlaylistRenderer"]["playlistId"]) @@ -61,7 +67,9 @@ def main(channelid: str): # CHANNELS cshelfres = set() - initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/channels").text) + # PLAYLISTS + data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EghjaGFubmVscw%3D%3D"} + initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data).json() shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"] @@ -70,14 +78,14 @@ def main(channelid: str): if "shelfRenderer" in itemint.keys(): cshelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"]) elif "gridRenderer" in itemint.keys(): - chanlistint = fullyexpand(itemint["gridRenderer"])["items"] + chanlistint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"] for channel in chanlistint: channellist.add(channel["gridChannelRenderer"]["channelId"]) for item in cshelfres: shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text) - chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"] + chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"] for channel in chanlistint: channellist.add(channel["gridChannelRenderer"]["channelId"]) @@ -89,7 +97,7 @@ if __name__ == "__main__": chanl = argv chanl.pop(0) for channel in chanl: - print(main(channel)) + print(process_channel(channel)) # SAMPLES: # UCqj7Cz7revf5maW9g5pgNcg lots of playlists @@ -103,4 +111,4 @@ if __name__ == "__main__": # UCJOh5FKisc0hUlEeWFBlD-w no subscriptions, plenty of featured channels -# UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels +# UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels \ No newline at end of file diff --git a/youtube_util.py b/youtube_util.py index 54fe2cf..4a73996 100644 --- a/youtube_util.py +++ b/youtube_util.py @@ -1,26 +1,29 @@ -from requests import session from json import loads from urllib.parse import unquote +import requests + def getinitialdata(html: str): for line in html.splitlines(): if line.strip().startswith('window["ytInitialData"] = '): return loads(line.split('window["ytInitialData"] = ', 1)[1].strip()[:-1]) return {} -mysession = session() +def getapikey(html: str): + return html.split('"INNERTUBE_API_KEY":"', 1)[-1].split('"', 1)[0] + #extract latest version automatically -try: - lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"] -except: - lver = "2.20201002.02.01" -mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"}) +def getlver(initialdata: dict): + try: + return initialdata["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"] + except: + return "2.20201002.02.01" -def fullyexpand(inputdict: dict): +def fullyexpand(inputdict: dict, mysession: requests.session, continuationheaders: dict): lastrequestj = inputdict while "continuations" in lastrequestj.keys(): - lastrequest = mysession.get("https://www.youtube.com/browse_ajax?continuation="+unquote(lastrequestj["continuations"][0]["nextContinuationData"]["continuation"])) + lastrequest = mysession.get("https://www.youtube.com/browse_ajax?continuation="+unquote(lastrequestj["continuations"][0]["nextContinuationData"]["continuation"]), headers=continuationheaders) lastrequestj = lastrequest.json()[1]["response"]["continuationContents"]["gridContinuation"] inputdict["items"].extend(lastrequestj["items"]) - return inputdict + return inputdict \ No newline at end of file