archiving community contributions on YouTube: unpublished captions, title and description translations and caption credits
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

107 lines
4.9 KiB

  1. from requests import session
  2. from youtube_util import getinitialdata, fullyexpand
  3. # TODO: Rate limit detection, HTTP3?
  4. mysession = session()
  5. #extract latest version automatically
  6. try:
  7. lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
  8. except:
  9. lver = "2.20201002.02.01"
  10. #print(lver)
  11. mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"})
  12. def main(channelid: str):
  13. playlists = set()
  14. shelfres = set()
  15. channellist = set()
  16. # PLAYLISTS
  17. initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/playlists").text)
  18. CHANNELS_ID = 0
  19. PLAYLISTS_ID = 0
  20. current = 0
  21. for tab in initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]:
  22. if "tabRenderer" in tab.keys():
  23. if tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "playlists":
  24. PLAYLISTS_ID = current
  25. elif tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "channels":
  26. CHANNELS_ID = current
  27. current += 1
  28. del current
  29. shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
  30. for item in shelflist:
  31. itemint = item["itemSectionRenderer"]["contents"][0]
  32. if "shelfRenderer" in itemint.keys():
  33. shelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
  34. elif "gridRenderer" in itemint.keys():
  35. playlistsint = fullyexpand(itemint["gridRenderer"])["items"]
  36. for playlist in playlistsint:
  37. playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
  38. if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
  39. channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
  40. for item in shelfres:
  41. shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
  42. playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"]
  43. for playlist in playlistsint:
  44. playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
  45. if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
  46. channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
  47. # CHANNELS
  48. cshelfres = set()
  49. initdata = getinitialdata(mysession.get("https://www.youtube.com/channel/"+str(channelid)+"/channels").text)
  50. shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
  51. for item in shelflist:
  52. itemint = item["itemSectionRenderer"]["contents"][0]
  53. if "shelfRenderer" in itemint.keys():
  54. cshelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
  55. elif "gridRenderer" in itemint.keys():
  56. chanlistint = fullyexpand(itemint["gridRenderer"])["items"]
  57. for channel in chanlistint:
  58. channellist.add(channel["gridChannelRenderer"]["channelId"])
  59. for item in cshelfres:
  60. shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
  61. chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"])["items"]
  62. for channel in chanlistint:
  63. channellist.add(channel["gridChannelRenderer"]["channelId"])
  64. return {"playlists": playlists, "channels": channellist}
  65. if __name__ == "__main__":
  66. from sys import argv
  67. chanl = argv
  68. chanl.pop(0)
  69. for channel in chanl:
  70. print(main(channel))
  71. # SAMPLES:
  72. # UCqj7Cz7revf5maW9g5pgNcg lots of playlists
  73. # UCRwczJ_nk1t9IGHyHfHbXRQ Nathaniel Bandy - created playlists only, featured channels only
  74. # UCo8bcnLyZH8tBIH9V1mLgqQ the odd 1 is out - shelf, way too many subscriptions
  75. # UCfXIV2vThxEF8Hq2OE17AeQ no playlists or channels featured
  76. # UCJqV2-l0jqAa7uYN8IGJW7w TONS OF SUBSCRIPTIONS, no featured channels
  77. # UC_1nZUpPS6jFv5Pn3f85CaA TONS OF SUBSCRIPTIONS, some featured channels
  78. # UCJOh5FKisc0hUlEeWFBlD-w no subscriptions, plenty of featured channels
  79. # UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels