archiving community contributions on YouTube: unpublished captions, title and description translations and caption credits
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

27 lines
1.1 KiB

  1. from requests import session
  2. from json import loads
  3. from urllib.parse import unquote
  4. def getinitialdata(html: str):
  5. for line in html.splitlines():
  6. if line.strip().startswith('window["ytInitialData"] = '):
  7. return loads(line.split('window["ytInitialData"] = ', 1)[1].strip()[:-1])
  8. return {}
  9. mysession = session()
  10. #extract latest version automatically
  11. try:
  12. lver = getinitialdata(mysession.get("https://www.youtube.com/").text)["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
  13. except:
  14. lver = "2.20201002.02.01"
  15. mysession.headers.update({"x-youtube-client-name": "1", "x-youtube-client-version": lver, "Accept-Language": "en-US"})
  16. def fullyexpand(inputdict: dict):
  17. lastrequestj = inputdict
  18. while "continuations" in lastrequestj.keys():
  19. lastrequest = mysession.get("https://www.youtube.com/browse_ajax?continuation="+unquote(lastrequestj["continuations"][0]["nextContinuationData"]["continuation"]))
  20. lastrequestj = lastrequest.json()[1]["response"]["continuationContents"]["gridContinuation"]
  21. inputdict["items"].extend(lastrequestj["items"])
  22. return inputdict