Browse Source

Retrieve all channels and playlists on channel items

master
tech234a 3 years ago
parent
commit
3ed2f2178b
4 changed files with 141 additions and 5 deletions
  1. +1
    -1
      tracker.py
  2. +8
    -4
      worker.py
  3. +106
    -0
      youtube_channel.py
  4. +26
    -0
      youtube_util.py

+ 1
- 1
tracker.py View File

@@ -9,7 +9,7 @@ from os.path import isfile
from json import loads

# https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py
VERSION = "20201001.01"
VERSION = "20201002.01"

TRACKER_ID = "ext-yt-communitycontribs"
TRACKER_HOST = "trackerproxy.meo.ws"


+ 8
- 4
worker.py View File

@@ -5,6 +5,8 @@ from os import mkdir, rmdir, listdir, system, environ
from os.path import isdir, isfile, getsize
from json import dumps, loads

from youtube_channel import main

import signal

import tracker
@@ -176,10 +178,12 @@ def threadrunner():
jobs.put(("submitdiscovery", itemyv["id"], tracker.ItemType.Video))

#channel created playlists
y = ydl.extract_info("https://www.youtube.com/channel/"+desit.split(":", 1)[1]+"/playlists?view=1", download=False)
for itemyv in y["entries"]:
jobs.put(("submitdiscovery", itemyv["url"].split("?list=", 1)[1], tracker.ItemType.Playlist)) #[38:]
#TODO: saved playlists, featured channels
y = main(desit.split(":", 1)[1])
for itemyv in y["playlists"]:
jobs.put(("submitdiscovery", itemyv, tracker.ItemType.Playlist))
for itemyv in y["channels"]:
jobs.put(("submitdiscovery", itemyv, tracker.ItemType.Channel))

jobs.put(("complete", None, "channel:"+args))
except:
print("YouTube-DL error, ignoring but not marking as complete...", "https://www.youtube.com/channel/"+desit.split(":", 1)[1])


+ 106
- 0
youtube_channel.py View File

@@ -0,0 +1,106 @@
from requests import session
from youtube_util import getinitialdata, fullyexpand

# TODO: Rate limit detection, HTTP3?

mysession = session()
#extract latest version automatically
try:
lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
except:
lver = "2.20201002.02.01"

#print(lver)
mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"})

def main(channelid: str):
playlists = set()
shelfres = set()
channellist = set()

# PLAYLISTS
initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/playlists").text)

CHANNELS_ID = 0
PLAYLISTS_ID = 0

current = 0
for tab in initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]:
if "tabRenderer" in tab.keys():
if tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "playlists":
PLAYLISTS_ID = current
elif tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "channels":
CHANNELS_ID = current
current += 1

del current

shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]

for item in shelflist:
itemint = item["itemSectionRenderer"]["contents"][0]
if "shelfRenderer" in itemint.keys():
shelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
elif "gridRenderer" in itemint.keys():
playlistsint = fullyexpand(itemint["gridRenderer"])["items"]

for playlist in playlistsint:
playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])

for item in shelfres:
shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"]

for playlist in playlistsint:
playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])

# CHANNELS
cshelfres = set()

initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/channels").text)

shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]

for item in shelflist:
itemint = item["itemSectionRenderer"]["contents"][0]
if "shelfRenderer" in itemint.keys():
cshelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
elif "gridRenderer" in itemint.keys():
chanlistint = fullyexpand(itemint["gridRenderer"])["items"]

for channel in chanlistint:
channellist.add(channel["gridChannelRenderer"]["channelId"])

for item in cshelfres:
shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"]

for channel in chanlistint:
channellist.add(channel["gridChannelRenderer"]["channelId"])

return {"playlists": playlists, "channels": channellist}

if __name__ == "__main__":
from sys import argv
chanl = argv
chanl.pop(0)
for channel in chanl:
print(main(channel))

# SAMPLES:
# UCqj7Cz7revf5maW9g5pgNcg lots of playlists
# UCRwczJ_nk1t9IGHyHfHbXRQ Nathaniel Bandy - created playlists only, featured channels only
# UCo8bcnLyZH8tBIH9V1mLgqQ the odd 1 is out - shelf, way too many subscriptions
# UCfXIV2vThxEF8Hq2OE17AeQ no playlists or channels featured

# UCJqV2-l0jqAa7uYN8IGJW7w TONS OF SUBSCRIPTIONS, no featured channels

# UC_1nZUpPS6jFv5Pn3f85CaA TONS OF SUBSCRIPTIONS, some featured channels

# UCJOh5FKisc0hUlEeWFBlD-w no subscriptions, plenty of featured channels

# UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels

+ 26
- 0
youtube_util.py View File

@@ -0,0 +1,26 @@
from requests import session
from json import loads
from urllib.parse import unquote

def getinitialdata(html: str):
for line in html.splitlines():
if line.strip().startswith('window["ytInitialData"] = '):
return loads(line.split('window["ytInitialData"] = ', 1)[1].strip()[:-1])
return {}

mysession = session()
#extract latest version automatically
try:
lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
except:
lver = "2.20201002.02.01"
mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"})

def fullyexpand(inputdict: dict):
lastrequestj = inputdict
while "continuations" in lastrequestj.keys():
lastrequest = mysession.get("https://www.youtube.com/browse_ajax?continuation="+unquote(lastrequestj["continuations"][0]["nextContinuationData"]["continuation"]))
lastrequestj = lastrequest.json()[1]["response"]["continuationContents"]["gridContinuation"]
inputdict["items"].extend(lastrequestj["items"])

return inputdict

Loading…
Cancel
Save