From 3ed2f2178b003e3734777f675ebaae7a91af51b6 Mon Sep 17 00:00:00 2001
From: tech234a <46801700+tech234a@users.noreply.github.com>
Date: Fri, 2 Oct 2020 17:53:39 -0400
Subject: [PATCH] Retrieve all channels and playlists on channel items

---
 tracker.py         |   2 +-
 worker.py          |  12 +++--
 youtube_channel.py | 106 +++++++++++++++++++++++++++++++++++++++++++++
 youtube_util.py    |  26 +++++++++++
 4 files changed, 141 insertions(+), 5 deletions(-)
 create mode 100644 youtube_channel.py
 create mode 100644 youtube_util.py

diff --git a/tracker.py b/tracker.py
index 4d29de3..f3782bf 100644
--- a/tracker.py
+++ b/tracker.py
@@ -9,7 +9,7 @@ from os.path import isfile
 from json import loads
 
 # https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py
-VERSION = "20201001.01"
+VERSION = "20201002.01"
 
 TRACKER_ID = "ext-yt-communitycontribs"
 TRACKER_HOST = "trackerproxy.meo.ws"
diff --git a/worker.py b/worker.py
index 8cfbede..bf9738c 100644
--- a/worker.py
+++ b/worker.py
@@ -5,6 +5,8 @@ from os import mkdir, rmdir, listdir, system, environ
 from os.path import isdir, isfile, getsize
 from json import dumps, loads
 
+from youtube_channel import main
+
 import signal
 
 import tracker
@@ -176,10 +178,12 @@ def threadrunner():
                         jobs.put(("submitdiscovery", itemyv["id"], tracker.ItemType.Video))
 
                     #channel created playlists
-                    y = ydl.extract_info("https://www.youtube.com/channel/"+desit.split(":", 1)[1]+"/playlists?view=1", download=False)
-                    for itemyv in y["entries"]:
-                        jobs.put(("submitdiscovery", itemyv["url"].split("?list=", 1)[1], tracker.ItemType.Playlist)) #[38:]
-                    #TODO: saved playlists, featured channels
+                    y = main(desit.split(":", 1)[1])
+                    for itemyv in y["playlists"]:
+                        jobs.put(("submitdiscovery", itemyv, tracker.ItemType.Playlist))
+                    for itemyv in y["channels"]:
+                        jobs.put(("submitdiscovery", itemyv, tracker.ItemType.Channel))
+
                     jobs.put(("complete", None, "channel:"+args))
                 except:
                     print("YouTube-DL error, ignoring but not marking as complete...", "https://www.youtube.com/channel/"+desit.split(":", 1)[1])
diff --git a/youtube_channel.py b/youtube_channel.py
new file mode 100644
index 0000000..57ffbbe
--- /dev/null
+++ b/youtube_channel.py
@@ -0,0 +1,106 @@
+from requests import session
+from youtube_util import getinitialdata, fullyexpand
+
+# TODO: Rate limit detection, HTTP3?
+
+mysession = session()
+#extract latest version automatically
+try:
+    lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
+except:
+    lver = "2.20201002.02.01"
+
+#print(lver)
+mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"})
+
+def main(channelid: str):
+    playlists = set()
+    shelfres  = set()
+    channellist = set()
+
+    # PLAYLISTS
+    initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/playlists").text)
+
+    CHANNELS_ID = 0
+    PLAYLISTS_ID = 0
+
+    current = 0
+    for tab in initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]:
+        if "tabRenderer" in tab.keys():
+            if tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "playlists":
+                PLAYLISTS_ID = current
+            elif tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "channels":
+                CHANNELS_ID = current
+        current += 1
+
+    del current
+
+    shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
+
+    for item in shelflist:
+        itemint = item["itemSectionRenderer"]["contents"][0]
+        if "shelfRenderer" in itemint.keys():
+            shelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
+        elif "gridRenderer" in itemint.keys():
+            playlistsint = fullyexpand(itemint["gridRenderer"])["items"]
+
+            for playlist in playlistsint:
+                playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
+                if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
+                    channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
+
+    for item in shelfres:
+        shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
+        playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"]
+
+        for playlist in playlistsint:
+            playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
+            if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
+                channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
+
+    # CHANNELS
+    cshelfres = set()
+
+    initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/channels").text)
+
+    shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
+
+    for item in shelflist:
+        itemint = item["itemSectionRenderer"]["contents"][0]
+        if "shelfRenderer" in itemint.keys():
+            cshelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
+        elif "gridRenderer" in itemint.keys():
+            chanlistint = fullyexpand(itemint["gridRenderer"])["items"]
+
+            for channel in chanlistint:
+                channellist.add(channel["gridChannelRenderer"]["channelId"])
+
+    for item in cshelfres:
+        shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
+        chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"]
+
+        for channel in chanlistint:
+            channellist.add(channel["gridChannelRenderer"]["channelId"])
+
+    return {"playlists": playlists, "channels": channellist}
+
+if __name__ == "__main__":
+    from sys import argv
+    chanl = argv
+    chanl.pop(0)
+    for channel in chanl:
+        print(main(channel))
+
+# SAMPLES:
+# UCqj7Cz7revf5maW9g5pgNcg lots of playlists
+# UCRwczJ_nk1t9IGHyHfHbXRQ Nathaniel Bandy - created playlists only, featured channels only
+# UCo8bcnLyZH8tBIH9V1mLgqQ the odd 1 is out - shelf, way too many subscriptions
+# UCfXIV2vThxEF8Hq2OE17AeQ no playlists or channels featured
+
+# UCJqV2-l0jqAa7uYN8IGJW7w TONS OF SUBSCRIPTIONS, no featured channels
+
+# UC_1nZUpPS6jFv5Pn3f85CaA TONS OF SUBSCRIPTIONS, some featured channels
+
+# UCJOh5FKisc0hUlEeWFBlD-w no subscriptions, plenty of featured channels
+
+# UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels
diff --git a/youtube_util.py b/youtube_util.py
new file mode 100644
index 0000000..54fe2cf
--- /dev/null
+++ b/youtube_util.py
@@ -0,0 +1,26 @@
+from requests import session
+from json import loads
+from urllib.parse import unquote
+
+def getinitialdata(html: str):
+    for line in html.splitlines():
+        if line.strip().startswith('window["ytInitialData"] = '):
+            return loads(line.split('window["ytInitialData"] = ', 1)[1].strip()[:-1])
+    return {}
+
+mysession = session()
+#extract latest version automatically
+try:
+    lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
+except:
+    lver = "2.20201002.02.01"
+mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"})
+
+def fullyexpand(inputdict: dict):
+    lastrequestj = inputdict
+    while "continuations" in lastrequestj.keys():
+        lastrequest = mysession.get("https://www.youtube.com/browse_ajax?continuation="+unquote(lastrequestj["continuations"][0]["nextContinuationData"]["continuation"]))
+        lastrequestj = lastrequest.json()[1]["response"]["continuationContents"]["gridContinuation"]
+        inputdict["items"].extend(lastrequestj["items"])
+
+    return inputdict