archiving community contributions on YouTube: unpublished captions, title and description translations and caption credits
Non puoi selezionare più di 25 argomenti Gli argomenti devono iniziare con una lettera o un numero, possono includere trattini ('-') e possono essere lunghi fino a 35 caratteri.
 
 

171 righe
5.0 KiB

  1. from typing import Optional, List
  2. from enum import Enum, auto
  3. import requests
  4. from requests.adapters import HTTPAdapter
  5. from requests.packages.urllib3.util.retry import Retry
  6. # https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py
  7. VERSION = "20200921.01"
  8. TRACKER_ID = "ext-yt-communitycontribs"
  9. TRACKER_HOST = "trackerproxy.meo.ws"
  10. BACKFEED_HOST = "blackbird-amqp.meo.ws:23038"
  11. BACKFEED_ENDPOINT = f"http://{BACKFEED_HOST}/{TRACKER_ID}-kj57sxhhzcn2kqjp/"
  12. TRACKER_ENDPOINT = f"http://{TRACKER_HOST}/{TRACKER_ID}"
  13. from os import environ
  14. if "TRACKER_USERNAME" in environ.keys():
  15. TRACKER_USERNAME = environ["TRACKER_USERNAME"]
  16. else:
  17. TRACKER_USERNAME = "Unnamed"
  18. # https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/
  19. retry_strategy = Retry(
  20. total=4,
  21. backoff_factor=2,
  22. status_forcelist=[x for x in range(500, 600)] + [429],
  23. method_whitelist=["GET", "POST"]
  24. )
  25. adapter = HTTPAdapter(max_retries=retry_strategy)
  26. tracker_session = requests.Session()
  27. tracker_session.mount("https://", adapter)
  28. tracker_session.mount("http://", adapter)
  29. class ItemType(Enum):
  30. Video = auto()
  31. Channel = auto()
  32. MixPlaylist = auto()
  33. Playlist = auto()
  34. def add_item_to_tracker(item_type: ItemType, item_id: str) -> bool:
  35. """Feed items into the tracker through backfeed (item names will be deduplicated):
  36. # curl -d 'ITEMNAME' -so/dev/null $amqp_endpoint
  37. # Response codes:
  38. # 200 - Item added to tracker
  39. # 409 - Item is already in tracker
  40. # 404 - Project backfeed channel not found
  41. # 400 - Item name has a bad format
  42. """
  43. type_name = item_type.name.lower()
  44. item_name = f"{type_name}:{item_id}"
  45. req = tracker_session.post(BACKFEED_ENDPOINT, data=item_name)
  46. code = req.status_code
  47. if code == 200:
  48. print(f"[INFO] Item ID \'{item_name}\' added to tracker successfully")
  49. return True
  50. elif code == 409:
  51. print(f"[INFO] Item ID \'{item_name}\' has already been added to tracker")
  52. return True
  53. elif code == 404:
  54. print(f"[ERROR] Unable to add item ID \'{item_name}\' to tracker. Project backfeed channel not found: {BACKFEED_ENDPOINT}")
  55. elif code == 400:
  56. print(f"[ERROR] Item ID \'{item_name}\' has a bad format")
  57. else:
  58. print(f"[ERROR] Unknown response code adding item \'{item_name}\' to tracker: {code}")
  59. return False
  60. def request_item_from_tracker() -> Optional[str]:
  61. data = {
  62. "downloader": TRACKER_USERNAME,
  63. "api_version": "2",
  64. "version": VERSION
  65. }
  66. req = tracker_session.post(f"{TRACKER_ENDPOINT}/request", json=data)
  67. code = req.status_code
  68. if code == 200:
  69. data = req.json()
  70. if "item_name" in data:
  71. item_name = data["item_name"]
  72. print(f"[INFO] Received an item from tracker: {item_name}")
  73. return item_name
  74. else:
  75. print(f"[ERROR] Received item is missing the \'item_name\' key: {data}")
  76. else:
  77. print(f"[ERROR] Unable to get an item from tracker. Status: {code}")
  78. def request_upload_target() -> Optional[str]:
  79. req = tracker_session.get(f"{TRACKER_ENDPOINT}/upload")
  80. code = req.status_code
  81. if code == 200:
  82. data = req.json()
  83. if "upload_target" in data:
  84. upload_target = data["upload_target"]
  85. print(f"[INFO] Received an upload target from tracker: {upload_target}")
  86. return upload_target
  87. else:
  88. print(f"[ERROR] Response is missing the \'upload_target\' key: {data}")
  89. else:
  90. print(f"[ERROR] Unable to get an upload target from tracker. Status: {code}")
  91. def request_all_upload_targets() -> Optional[List[str]]:
  92. req = tracker_session.get(f"{TRACKER_ENDPOINT}/upload_targets")
  93. code = req.status_code
  94. if code == 200:
  95. data = req.json()
  96. print(f"[INFO] Received all upload targets from tracker: {data}")
  97. return data
  98. else:
  99. print(f"[ERROR] Unable to get all upload targets from tracker. Status: {code}")
  100. # `item_name` includes type prefix (video:id, playlist:id, etc)
  101. def mark_item_as_done(item_name: str, item_size_bytes: int) -> bool:
  102. data = {
  103. "downloader": TRACKER_USERNAME,
  104. "version": VERSION,
  105. "item": item_name,
  106. "bytes": {
  107. "data": item_size_bytes
  108. }
  109. }
  110. req = tracker_session.post(f"{TRACKER_ENDPOINT}/done", json=data)
  111. code = req.status_code
  112. if code == 200:
  113. print(f"[INFO] Marked item \'{item_name}\' as done")
  114. return True
  115. elif code > 399 and code < 500:
  116. print(f"[ERROR] Unable to mark item as done. Status: {code}")
  117. else:
  118. print(f"[ERROR] Unknown response code while marking item \'{item_name}\' as done: {code}")
  119. return False
  120. # if __name__ == "__main__":
  121. # print(add_item_to_tracker(ItemType.Channel, "test10"))
  122. # print(request_item_from_tracker())
  123. # print(request_upload_target())
  124. # print(request_all_upload_targets())
  125. # print(mark_item_as_done("test4", 200))