archiving community contributions on YouTube: unpublished captions, title and description translations and caption credits
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

129 lines
5.8 KiB

  1. from requests import session
  2. from youtube_util import getinitialdata, fullyexpand, getapikey, getlver
  3. from time import sleep
  4. mysession = session()
  5. #extract latest version automatically
  6. homepage = mysession.get("https://www.youtube.com/").text
  7. API_KEY = getapikey(homepage)
  8. params = (
  9. ('key', API_KEY),
  10. )
  11. API_VERSION = getlver(getinitialdata(homepage))
  12. continuationheaders = {"x-youtube-client-name": "1", "x-youtube-client-version": API_VERSION, "Accept-Language": "en-US"}
  13. del homepage
  14. def process_channel(channelid: str):
  15. playlists = set()
  16. shelfres = set()
  17. channellist = set()
  18. # PLAYLISTS
  19. data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EglwbGF5bGlzdHM%3D"}
  20. while True:
  21. initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data)
  22. if initdata.status_code == 200:
  23. initdata = initdata.json()
  24. break
  25. else:
  26. print("Non-200 API status code, waiting 30 seconds before retrying...")
  27. sleep(30)
  28. CHANNELS_ID = 0
  29. PLAYLISTS_ID = 0
  30. current = 0
  31. for tab in initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]:
  32. if "tabRenderer" in tab.keys():
  33. if tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "playlists":
  34. PLAYLISTS_ID = current
  35. elif tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "channels":
  36. CHANNELS_ID = current
  37. current += 1
  38. del current
  39. shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
  40. for item in shelflist:
  41. itemint = item["itemSectionRenderer"]["contents"][0]
  42. if "shelfRenderer" in itemint.keys():
  43. shelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
  44. elif "gridRenderer" in itemint.keys():
  45. playlistsint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]
  46. for playlist in playlistsint:
  47. playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
  48. if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
  49. channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
  50. for item in shelfres:
  51. shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
  52. playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]
  53. for playlist in playlistsint:
  54. playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
  55. if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
  56. channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
  57. # CHANNELS
  58. cshelfres = set()
  59. # PLAYLISTS
  60. data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EghjaGFubmVscw%3D%3D"}
  61. while True:
  62. initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data).json()
  63. if initdata.status_code == 200:
  64. initdata = initdata.json()
  65. break
  66. else:
  67. print("Non-200 API status code, waiting 30 seconds before retrying...")
  68. sleep(30)
  69. shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
  70. for item in shelflist:
  71. itemint = item["itemSectionRenderer"]["contents"][0]
  72. if "shelfRenderer" in itemint.keys():
  73. cshelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
  74. elif "gridRenderer" in itemint.keys():
  75. chanlistint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]
  76. for channel in chanlistint:
  77. channellist.add(channel["gridChannelRenderer"]["channelId"])
  78. for item in cshelfres:
  79. shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
  80. chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]
  81. for channel in chanlistint:
  82. channellist.add(channel["gridChannelRenderer"]["channelId"])
  83. return {"playlists": playlists, "channels": channellist}
  84. if __name__ == "__main__":
  85. from sys import argv
  86. chanl = argv
  87. chanl.pop(0)
  88. for channel in chanl:
  89. print(process_channel(channel))
  90. # SAMPLES:
  91. # UCqj7Cz7revf5maW9g5pgNcg lots of playlists
  92. # UCRwczJ_nk1t9IGHyHfHbXRQ Nathaniel Bandy - created playlists only, featured channels only
  93. # UCo8bcnLyZH8tBIH9V1mLgqQ the odd 1 is out - shelf, way too many subscriptions
  94. # UCfXIV2vThxEF8Hq2OE17AeQ no playlists or channels featured
  95. # UCJqV2-l0jqAa7uYN8IGJW7w TONS OF SUBSCRIPTIONS, no featured channels
  96. # UC_1nZUpPS6jFv5Pn3f85CaA TONS OF SUBSCRIPTIONS, some featured channels
  97. # UCJOh5FKisc0hUlEeWFBlD-w no subscriptions, plenty of featured channels
  98. # UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels