archiving community contributions on YouTube: unpublished captions, title and description translations and caption credits
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

29 lines
1.1 KiB

  1. from json import loads
  2. from urllib.parse import unquote
  3. import requests
  4. def getinitialdata(html: str):
  5. for line in html.splitlines():
  6. if line.strip().startswith('window["ytInitialData"] = '):
  7. return loads(line.split('window["ytInitialData"] = ', 1)[1].strip()[:-1])
  8. return {}
  9. def getapikey(html: str):
  10. return html.split('"INNERTUBE_API_KEY":"', 1)[-1].split('"', 1)[0]
  11. #extract latest version automatically
  12. def getlver(initialdata: dict):
  13. try:
  14. return initialdata["responseContext"]["serviceTrackingParams"][2]["params"][2]["value"]
  15. except:
  16. return "2.20201002.02.01"
  17. def fullyexpand(inputdict: dict, mysession: requests.session, continuationheaders: dict):
  18. lastrequestj = inputdict
  19. while "continuations" in lastrequestj.keys():
  20. lastrequest = mysession.get("https://www.youtube.com/browse_ajax?continuation="+unquote(lastrequestj["continuations"][0]["nextContinuationData"]["continuation"]), headers=continuationheaders)
  21. lastrequestj = lastrequest.json()[1]["response"]["continuationContents"]["gridContinuation"]
  22. inputdict["items"].extend(lastrequestj["items"])
  23. return inputdict