Browse Source

Retrieve all channels and playlists on channel items

master
tech234a 3 years ago
parent
commit
3ed2f2178b
4 changed files with 141 additions and 5 deletions
  1. +1
    -1
      tracker.py
  2. +8
    -4
      worker.py
  3. +106
    -0
      youtube_channel.py
  4. +26
    -0
      youtube_util.py

+ 1
- 1
tracker.py View File

@@ -9,7 +9,7 @@ from os.path import isfile
from json import loads from json import loads


# https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py # https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py
VERSION = "20201001.01"
VERSION = "20201002.01"


TRACKER_ID = "ext-yt-communitycontribs" TRACKER_ID = "ext-yt-communitycontribs"
TRACKER_HOST = "trackerproxy.meo.ws" TRACKER_HOST = "trackerproxy.meo.ws"


+ 8
- 4
worker.py View File

@@ -5,6 +5,8 @@ from os import mkdir, rmdir, listdir, system, environ
from os.path import isdir, isfile, getsize from os.path import isdir, isfile, getsize
from json import dumps, loads from json import dumps, loads


from youtube_channel import main

import signal import signal


import tracker import tracker
@@ -176,10 +178,12 @@ def threadrunner():
jobs.put(("submitdiscovery", itemyv["id"], tracker.ItemType.Video)) jobs.put(("submitdiscovery", itemyv["id"], tracker.ItemType.Video))


#channel created playlists #channel created playlists
y = ydl.extract_info("https://www.youtube.com/channel/"+desit.split(":", 1)[1]+"/playlists?view=1", download=False)
for itemyv in y["entries"]:
jobs.put(("submitdiscovery", itemyv["url"].split("?list=", 1)[1], tracker.ItemType.Playlist)) #[38:]
#TODO: saved playlists, featured channels
y = main(desit.split(":", 1)[1])
for itemyv in y["playlists"]:
jobs.put(("submitdiscovery", itemyv, tracker.ItemType.Playlist))
for itemyv in y["channels"]:
jobs.put(("submitdiscovery", itemyv, tracker.ItemType.Channel))

jobs.put(("complete", None, "channel:"+args)) jobs.put(("complete", None, "channel:"+args))
except: except:
print("YouTube-DL error, ignoring but not marking as complete...", "https://www.youtube.com/channel/"+desit.split(":", 1)[1]) print("YouTube-DL error, ignoring but not marking as complete...", "https://www.youtube.com/channel/"+desit.split(":", 1)[1])


+ 106
- 0
youtube_channel.py View File

@@ -0,0 +1,106 @@
from requests import session
from youtube_util import getinitialdata, fullyexpand

# TODO: Rate limit detection, HTTP3?

mysession = session()
#extract latest version automatically
try:
lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
except:
lver = "2.20201002.02.01"

#print(lver)
mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"})

def main(channelid: str):
playlists = set()
shelfres = set()
channellist = set()

# PLAYLISTS
initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/playlists").text)

CHANNELS_ID = 0
PLAYLISTS_ID = 0

current = 0
for tab in initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]:
if "tabRenderer" in tab.keys():
if tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "playlists":
PLAYLISTS_ID = current
elif tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "channels":
CHANNELS_ID = current
current += 1

del current

shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]

for item in shelflist:
itemint = item["itemSectionRenderer"]["contents"][0]
if "shelfRenderer" in itemint.keys():
shelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
elif "gridRenderer" in itemint.keys():
playlistsint = fullyexpand(itemint["gridRenderer"])["items"]

for playlist in playlistsint:
playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])

for item in shelfres:
shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"]

for playlist in playlistsint:
playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])

# CHANNELS
cshelfres = set()

initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/channels").text)

shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]

for item in shelflist:
itemint = item["itemSectionRenderer"]["contents"][0]
if "shelfRenderer" in itemint.keys():
cshelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
elif "gridRenderer" in itemint.keys():
chanlistint = fullyexpand(itemint["gridRenderer"])["items"]

for channel in chanlistint:
channellist.add(channel["gridChannelRenderer"]["channelId"])

for item in cshelfres:
shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"]

for channel in chanlistint:
channellist.add(channel["gridChannelRenderer"]["channelId"])

return {"playlists": playlists, "channels": channellist}

if __name__ == "__main__":
from sys import argv
chanl = argv
chanl.pop(0)
for channel in chanl:
print(main(channel))

# SAMPLES:
# UCqj7Cz7revf5maW9g5pgNcg lots of playlists
# UCRwczJ_nk1t9IGHyHfHbXRQ Nathaniel Bandy - created playlists only, featured channels only
# UCo8bcnLyZH8tBIH9V1mLgqQ the odd 1 is out - shelf, way too many subscriptions
# UCfXIV2vThxEF8Hq2OE17AeQ no playlists or channels featured

# UCJqV2-l0jqAa7uYN8IGJW7w TONS OF SUBSCRIPTIONS, no featured channels

# UC_1nZUpPS6jFv5Pn3f85CaA TONS OF SUBSCRIPTIONS, some featured channels

# UCJOh5FKisc0hUlEeWFBlD-w no subscriptions, plenty of featured channels

# UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels

+ 26
- 0
youtube_util.py View File

@@ -0,0 +1,26 @@
from requests import session
from json import loads
from urllib.parse import unquote

def getinitialdata(html: str):
for line in html.splitlines():
if line.strip().startswith('window["ytInitialData"] = '):
return loads(line.split('window["ytInitialData"] = ', 1)[1].strip()[:-1])
return {}

mysession = session()
#extract latest version automatically
try:
lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
except:
lver = "2.20201002.02.01"
mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"})

def fullyexpand(inputdict: dict):
lastrequestj = inputdict
while "continuations" in lastrequestj.keys():
lastrequest = mysession.get("https://www.youtube.com/browse_ajax?continuation="+unquote(lastrequestj["continuations"][0]["nextContinuationData"]["continuation"]))
lastrequestj = lastrequest.json()[1]["response"]["continuationContents"]["gridContinuation"]
inputdict["items"].extend(lastrequestj["items"])

return inputdict

Loading…
Cancel
Save