archiving community contributions on YouTube: unpublished captions, title and description translations and caption credits
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

114 lines
5.3 KiB

  1. from requests import session
  2. from youtube_util import getinitialdata, fullyexpand, getapikey, getlver
  3. mysession = session()
  4. #extract latest version automatically
  5. homepage = mysession.get("https://www.youtube.com/").text
  6. API_KEY = getapikey(homepage)
  7. params = (
  8. ('key', API_KEY),
  9. )
  10. API_VERSION = getlver(getinitialdata(homepage))
  11. continuationheaders = {"x-youtube-client-name": "1", "x-youtube-client-version": API_VERSION, "Accept-Language": "en-US"}
  12. del homepage
  13. def process_channel(channelid: str):
  14. playlists = set()
  15. shelfres = set()
  16. channellist = set()
  17. # PLAYLISTS
  18. data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EglwbGF5bGlzdHM%3D"}
  19. initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data).json()
  20. CHANNELS_ID = 0
  21. PLAYLISTS_ID = 0
  22. current = 0
  23. for tab in initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]:
  24. if "tabRenderer" in tab.keys():
  25. if tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "playlists":
  26. PLAYLISTS_ID = current
  27. elif tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "channels":
  28. CHANNELS_ID = current
  29. current += 1
  30. del current
  31. shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
  32. for item in shelflist:
  33. itemint = item["itemSectionRenderer"]["contents"][0]
  34. if "shelfRenderer" in itemint.keys():
  35. shelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
  36. elif "gridRenderer" in itemint.keys():
  37. playlistsint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]
  38. for playlist in playlistsint:
  39. playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
  40. if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
  41. channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
  42. for item in shelfres:
  43. shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
  44. playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]
  45. for playlist in playlistsint:
  46. playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
  47. if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
  48. channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
  49. # CHANNELS
  50. cshelfres = set()
  51. # PLAYLISTS
  52. data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EghjaGFubmVscw%3D%3D"}
  53. initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data).json()
  54. shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
  55. for item in shelflist:
  56. itemint = item["itemSectionRenderer"]["contents"][0]
  57. if "shelfRenderer" in itemint.keys():
  58. cshelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
  59. elif "gridRenderer" in itemint.keys():
  60. chanlistint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]
  61. for channel in chanlistint:
  62. channellist.add(channel["gridChannelRenderer"]["channelId"])
  63. for item in cshelfres:
  64. shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
  65. chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]
  66. for channel in chanlistint:
  67. channellist.add(channel["gridChannelRenderer"]["channelId"])
  68. return {"playlists": playlists, "channels": channellist}
  69. if __name__ == "__main__":
  70. from sys import argv
  71. chanl = argv
  72. chanl.pop(0)
  73. for channel in chanl:
  74. print(process_channel(channel))
  75. # SAMPLES:
  76. # UCqj7Cz7revf5maW9g5pgNcg lots of playlists
  77. # UCRwczJ_nk1t9IGHyHfHbXRQ Nathaniel Bandy - created playlists only, featured channels only
  78. # UCo8bcnLyZH8tBIH9V1mLgqQ the odd 1 is out - shelf, way too many subscriptions
  79. # UCfXIV2vThxEF8Hq2OE17AeQ no playlists or channels featured
  80. # UCJqV2-l0jqAa7uYN8IGJW7w TONS OF SUBSCRIPTIONS, no featured channels
  81. # UC_1nZUpPS6jFv5Pn3f85CaA TONS OF SUBSCRIPTIONS, some featured channels
  82. # UCJOh5FKisc0hUlEeWFBlD-w no subscriptions, plenty of featured channels
  83. # UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels