archiving community contributions on YouTube: unpublished captions, title and description translations and caption credits
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 

104 linhas
3.7 KiB

  1. # This function adapted from https://github.com/cdown/srt/blob/11089f1e021f2e074d04c33fc7ffc4b7b52e7045/srt.py, lines 69 and 189 (MIT License)
  2. def timedelta_to_sbv_timestamp(timedelta_timestamp):
  3. r"""
  4. Convert a :py:class:`~datetime.timedelta` to an SRT timestamp.
  5. .. doctest::
  6. >>> import datetime
  7. >>> delta = datetime.timedelta(hours=1, minutes=23, seconds=4)
  8. >>> timedelta_to_sbv_timestamp(delta)
  9. '01:23:04,000'
  10. :param datetime.timedelta timedelta_timestamp: A datetime to convert to an
  11. SBV timestamp
  12. :returns: The timestamp in SBV format
  13. :rtype: str
  14. """
  15. SECONDS_IN_HOUR = 3600
  16. SECONDS_IN_MINUTE = 60
  17. HOURS_IN_DAY = 24
  18. MICROSECONDS_IN_MILLISECOND = 1000
  19. hrs, secs_remainder = divmod(timedelta_timestamp.seconds, SECONDS_IN_HOUR)
  20. hrs += timedelta_timestamp.days * HOURS_IN_DAY
  21. mins, secs = divmod(secs_remainder, SECONDS_IN_MINUTE)
  22. msecs = timedelta_timestamp.microseconds // MICROSECONDS_IN_MILLISECOND
  23. return "%1d:%02d:%02d.%03d" % (hrs, mins, secs, msecs)
  24. import requests
  25. from bs4 import BeautifulSoup
  26. from datetime import timedelta
  27. from json import loads
  28. #HSID, SSID, SID cookies required
  29. cookies = loads(open("config.json").read())
  30. headers = {
  31. "cookie": "HSID="+cookies["HSID"]+"; SSID="+cookies["SSID"]+"; SID="+cookies["SID"],
  32. }
  33. def getsubs(vid, lang="all"):
  34. if lang == "all":
  35. lparams = (
  36. ("v", vid),
  37. ("ref", "player"),
  38. ("o", "U"),
  39. )
  40. langpage = requests.get("https://www.youtube.com/timedtext_video", params=lparams, headers=headers)
  41. assert not "accounts.google.com" in langpage.url, "Please supply authentication cookie information in config.json. See README.md for more information."
  42. langs = []
  43. langsoup = BeautifulSoup(langpage.text, features="html5lib")
  44. if "create_channel" in langpage.url:
  45. print(vid, "not found.")
  46. elif langsoup.find_all("div", {"class": "not-accepting-caption-submissions"}):
  47. print(vid, "has disabled community-contributed captioning.")
  48. langs = []
  49. else:
  50. langdivs = langsoup.find("ul", class_="yt-uix-languagepicker-language-list").find_all("li", class_="yt-uix-languagepicker-menu-item")
  51. for item in langdivs:
  52. langs.append(item["data-value"])
  53. print(vid, "has the following languages available", ", ".join(langs)+".")
  54. else:
  55. langs = [lang]
  56. for langcode in langs:
  57. pparams = (
  58. ("v", vid),
  59. ("lang", langcode),
  60. ("action_mde_edit_form", 1),
  61. ("bl", "vmp"),
  62. ("ui", "hd"),
  63. ("tab", "captions"),
  64. ("o", "U")
  65. )
  66. page = requests.get("https://www.youtube.com/timedtext_editor", params=pparams, headers=headers)
  67. soup = BeautifulSoup(page.text, features="html5lib")
  68. divs = soup.find_all("div", class_="timed-event-line")
  69. outtext = ""
  70. for item in divs:
  71. text = item.find("textarea").text
  72. startms = int(item.find("input", class_="event-start-time")["data-start-ms"])
  73. endms = int(item.find("input", class_="event-end-time")["data-end-ms"])
  74. outtext += timedelta_to_sbv_timestamp(timedelta(milliseconds=startms)) + "," + timedelta_to_sbv_timestamp(timedelta(milliseconds=endms)) + "\n" + text + "\n\n"
  75. open(vid+"_"+langcode+".sbv", "w", encoding="utf-8").write(outtext[:-1])
  76. if __name__ == "__main__":
  77. from sys import argv
  78. vidl = argv
  79. vidl.pop(0)
  80. for video in vidl:
  81. getsubs(video)