archiving community contributions on YouTube: unpublished captions, title and description translations and caption credits
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.
 
 

129 lignes
5.8 KiB

  1. from requests import session
  2. from youtube_util import getinitialdata, fullyexpand, getapikey, getlver
  3. from time import sleep
  4. mysession = session()
  5. #extract latest version automatically
  6. homepage = mysession.get("https://www.youtube.com/").text
  7. API_KEY = getapikey(homepage)
  8. params = (
  9. ('key', API_KEY),
  10. )
  11. API_VERSION = getlver(getinitialdata(homepage))
  12. continuationheaders = {"x-youtube-client-name": "1", "x-youtube-client-version": API_VERSION, "Accept-Language": "en-US"}
  13. del homepage
  14. def process_channel(channelid: str):
  15. playlists = set()
  16. shelfres = set()
  17. channellist = set()
  18. # PLAYLISTS
  19. data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EglwbGF5bGlzdHM%3D"}
  20. while True:
  21. initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data)
  22. if initdata.status_code == 200:
  23. initdata = initdata.json()
  24. break
  25. else:
  26. print("Non-200 API status code, waiting 30 seconds before retrying...")
  27. sleep(30)
  28. CHANNELS_ID = 0
  29. PLAYLISTS_ID = 0
  30. current = 0
  31. for tab in initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]:
  32. if "tabRenderer" in tab.keys():
  33. if tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "playlists":
  34. PLAYLISTS_ID = current
  35. elif tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "channels":
  36. CHANNELS_ID = current
  37. current += 1
  38. del current
  39. shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
  40. for item in shelflist:
  41. itemint = item["itemSectionRenderer"]["contents"][0]
  42. if "shelfRenderer" in itemint.keys():
  43. shelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
  44. elif "gridRenderer" in itemint.keys():
  45. playlistsint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]
  46. for playlist in playlistsint:
  47. playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
  48. if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
  49. channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
  50. for item in shelfres:
  51. shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
  52. playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]
  53. for playlist in playlistsint:
  54. playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
  55. if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
  56. channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
  57. # CHANNELS
  58. cshelfres = set()
  59. # PLAYLISTS
  60. data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EghjaGFubmVscw%3D%3D"}
  61. while True:
  62. initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data).json()
  63. if initdata.status_code == 200:
  64. initdata = initdata.json()
  65. break
  66. else:
  67. print("Non-200 API status code, waiting 30 seconds before retrying...")
  68. sleep(30)
  69. shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
  70. for item in shelflist:
  71. itemint = item["itemSectionRenderer"]["contents"][0]
  72. if "shelfRenderer" in itemint.keys():
  73. cshelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
  74. elif "gridRenderer" in itemint.keys():
  75. chanlistint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]
  76. for channel in chanlistint:
  77. channellist.add(channel["gridChannelRenderer"]["channelId"])
  78. for item in cshelfres:
  79. shelfiteminitdata = getinitialdata(mysession.get("https://www.youtube.com/"+str(item)).text)
  80. chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]
  81. for channel in chanlistint:
  82. channellist.add(channel["gridChannelRenderer"]["channelId"])
  83. return {"playlists": playlists, "channels": channellist}
  84. if __name__ == "__main__":
  85. from sys import argv
  86. chanl = argv
  87. chanl.pop(0)
  88. for channel in chanl:
  89. print(process_channel(channel))
  90. # SAMPLES:
  91. # UCqj7Cz7revf5maW9g5pgNcg lots of playlists
  92. # UCRwczJ_nk1t9IGHyHfHbXRQ Nathaniel Bandy - created playlists only, featured channels only
  93. # UCo8bcnLyZH8tBIH9V1mLgqQ the odd 1 is out - shelf, way too many subscriptions
  94. # UCfXIV2vThxEF8Hq2OE17AeQ no playlists or channels featured
  95. # UCJqV2-l0jqAa7uYN8IGJW7w TONS OF SUBSCRIPTIONS, no featured channels
  96. # UC_1nZUpPS6jFv5Pn3f85CaA TONS OF SUBSCRIPTIONS, some featured channels
  97. # UCJOh5FKisc0hUlEeWFBlD-w no subscriptions, plenty of featured channels
  98. # UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels