archiving community contributions on YouTube: unpublished captions, title and description translations and caption credits
25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.
 
 

171 satır
5.0 KiB

  1. from typing import Optional, List
  2. from enum import Enum, auto
  3. import requests
  4. from requests.adapters import HTTPAdapter
  5. from requests.packages.urllib3.util.retry import Retry
  6. # https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py
  7. VERSION = "20200921.01"
  8. TRACKER_ID = "ext-yt-communitycontribs"
  9. TRACKER_HOST = "trackerproxy.meo.ws"
  10. BACKFEED_HOST = "blackbird-amqp.meo.ws:23038"
  11. BACKFEED_ENDPOINT = f"http://{BACKFEED_HOST}/{TRACKER_ID}-kj57sxhhzcn2kqjp/"
  12. TRACKER_ENDPOINT = f"http://{TRACKER_HOST}/{TRACKER_ID}"
  13. from os import environ
  14. if "TRACKER_USERNAME" in environ.keys():
  15. TRACKER_USERNAME = environ["TRACKER_USERNAME"]
  16. else:
  17. TRACKER_USERNAME = "Unnamed"
  18. # https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/
  19. retry_strategy = Retry(
  20. total=4,
  21. backoff_factor=2,
  22. status_forcelist=[x for x in range(500, 600)] + [429],
  23. method_whitelist=["GET", "POST"]
  24. )
  25. adapter = HTTPAdapter(max_retries=retry_strategy)
  26. tracker_session = requests.Session()
  27. tracker_session.mount("https://", adapter)
  28. tracker_session.mount("http://", adapter)
  29. class ItemType(Enum):
  30. Video = auto()
  31. Channel = auto()
  32. MixPlaylist = auto()
  33. Playlist = auto()
  34. def add_item_to_tracker(item_type: ItemType, item_id: str) -> bool:
  35. """Feed items into the tracker through backfeed (item names will be deduplicated):
  36. # curl -d 'ITEMNAME' -so/dev/null $amqp_endpoint
  37. # Response codes:
  38. # 200 - Item added to tracker
  39. # 409 - Item is already in tracker
  40. # 404 - Project backfeed channel not found
  41. # 400 - Item name has a bad format
  42. """
  43. type_name = item_type.name.lower()
  44. item_name = f"{type_name}:{item_id}"
  45. req = tracker_session.post(BACKFEED_ENDPOINT, data=item_name)
  46. code = req.status_code
  47. if code == 200:
  48. print(f"[INFO] Item ID \'{item_name}\' added to tracker successfully")
  49. return True
  50. elif code == 409:
  51. print(f"[INFO] Item ID \'{item_name}\' has already been added to tracker")
  52. return True
  53. elif code == 404:
  54. print(f"[ERROR] Unable to add item ID \'{item_name}\' to tracker. Project backfeed channel not found: {BACKFEED_ENDPOINT}")
  55. elif code == 400:
  56. print(f"[ERROR] Item ID \'{item_name}\' has a bad format")
  57. else:
  58. print(f"[ERROR] Unknown response code adding item \'{item_name}\' to tracker: {code}")
  59. return False
  60. def request_item_from_tracker() -> Optional[str]:
  61. data = {
  62. "downloader": TRACKER_USERNAME,
  63. "api_version": "2",
  64. "version": VERSION
  65. }
  66. req = tracker_session.post(f"{TRACKER_ENDPOINT}/request", json=data)
  67. code = req.status_code
  68. if code == 200:
  69. data = req.json()
  70. if "item_name" in data:
  71. item_name = data["item_name"]
  72. print(f"[INFO] Received an item from tracker: {item_name}")
  73. return item_name
  74. else:
  75. print(f"[ERROR] Received item is missing the \'item_name\' key: {data}")
  76. else:
  77. print(f"[ERROR] Unable to get an item from tracker. Status: {code}")
  78. def request_upload_target() -> Optional[str]:
  79. req = tracker_session.get(f"{TRACKER_ENDPOINT}/upload")
  80. code = req.status_code
  81. if code == 200:
  82. data = req.json()
  83. if "upload_target" in data:
  84. upload_target = data["upload_target"]
  85. print(f"[INFO] Received an upload target from tracker: {upload_target}")
  86. return upload_target
  87. else:
  88. print(f"[ERROR] Response is missing the \'upload_target\' key: {data}")
  89. else:
  90. print(f"[ERROR] Unable to get an upload target from tracker. Status: {code}")
  91. def request_all_upload_targets() -> Optional[List[str]]:
  92. req = tracker_session.get(f"{TRACKER_ENDPOINT}/upload_targets")
  93. code = req.status_code
  94. if code == 200:
  95. data = req.json()
  96. print(f"[INFO] Received all upload targets from tracker: {data}")
  97. return data
  98. else:
  99. print(f"[ERROR] Unable to get all upload targets from tracker. Status: {code}")
  100. # `item_name` includes type prefix (video:id, playlist:id, etc)
  101. def mark_item_as_done(item_name: str, item_size_bytes: int) -> bool:
  102. data = {
  103. "downloader": TRACKER_USERNAME,
  104. "version": VERSION,
  105. "item": item_name,
  106. "bytes": {
  107. "data": item_size_bytes
  108. }
  109. }
  110. req = tracker_session.post(f"{TRACKER_ENDPOINT}/done", json=data)
  111. code = req.status_code
  112. if code == 200:
  113. print(f"[INFO] Marked item \'{item_name}\' as done")
  114. return True
  115. elif code > 399 and code < 500:
  116. print(f"[ERROR] Unable to mark item as done. Status: {code}")
  117. else:
  118. print(f"[ERROR] Unknown response code while marking item \'{item_name}\' as done: {code}")
  119. return False
  120. # if __name__ == "__main__":
  121. # print(add_item_to_tracker(ItemType.Channel, "test10"))
  122. # print(request_item_from_tracker())
  123. # print(request_upload_target())
  124. # print(request_all_upload_targets())
  125. # print(mark_item_as_done("test4", 200))