From 503143dc88cf9bb83f315304a5d99647a27a5653 Mon Sep 17 00:00:00 2001 From: afrmtbl Date: Mon, 21 Sep 2020 15:03:01 -0400 Subject: [PATCH] Implement initial tracker API --- tracker.py | 165 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 tracker.py diff --git a/tracker.py b/tracker.py new file mode 100644 index 0000000..8c7a551 --- /dev/null +++ b/tracker.py @@ -0,0 +1,165 @@ +from typing import Optional, List +from enum import Enum, auto +import requests + +# TODO: Implement backoff for 500 response codes + +# https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py +VERSION = "20200921.01" + +TRACKER_ID = "ext-yt-communitycontribs" +TRACKER_HOST = "trackerproxy.meo.ws" + +BACKFEED_HOST = "blackbird-amqp.meo.ws:23038" + +BACKFEED_ENDPOINT = f"http://{BACKFEED_HOST}/{TRACKER_ID}-kj57sxhhzcn2kqjp/" +TRACKER_ENDPOINT = f"http://{TRACKER_HOST}/{TRACKER_ID}" + + +class ItemType(Enum): + Video = auto() + Channel = auto() + MixPlaylist = auto() + Playlist = auto() + + +def add_item_to_tracker(item_type: ItemType, item_id: str) -> bool: + """Feed items into the tracker through backfeed (item names will be deduplicated): + # curl -d 'ITEMNAME' -so/dev/null $amqp_endpoint + + # Response codes: + # 200 - Item added to tracker + # 409 - Item is already in tracker + # 404 - Project backfeed channel not found + # 400 - Item name has a bad format + """ + type_name = item_type.name.lower() + item_name = f"{type_name}:{item_id}" + + req = requests.post(BACKFEED_ENDPOINT, data=item_name) + + code = req.status_code + + if code == 200: + print(f"[INFO] Item ID \'{item_name}\' added to tracker successfully") + return True + elif code == 409: + print(f"[INFO] Item ID \'{item_name}\' has already been added to tracker") + return True + elif code == 404: + print(f"[ERROR] Unable to add item ID \'{item_name}\' to tracker. Project backfeed channel not found: {BACKFEED_ENDPOINT}") + elif code == 400: + print(f"[ERROR] Item ID \'{item_name}\' has a bad format") + else: + print(f"[ERROR] Unknown response code adding item \'{item_name}\' to tracker: {code}") + + return False + + +def request_item_from_tracker() -> Optional[str]: + + data = { + # TODO: Ask Fusl what this should be + # https://www.archiveteam.org/index.php?title=Dev/Seesaw + # ^ says it would be filled in by the Seesaw library + "downloader": "Fusl", + "api_version": "2", + "version": VERSION + } + + req = requests.post(f"{TRACKER_ENDPOINT}/request", json=data) + + code = req.status_code + + if code == 200: + data = req.json() + + if "item_name" in data: + item_name = data["item_name"] + print(f"[INFO] Received an item from tracker: {item_name}") + + return item_name + else: + print(f"[ERROR] Received item is missing the \'item_name\' key: {data}") + + else: + print(f"[ERROR] Unable to get an item from tracker. Status: {code}") + + +def request_upload_target() -> Optional[str]: + req = requests.get( + # "https://httpbin.org/get", + f"{TRACKER_ENDPOINT}/upload", + ) + + code = req.status_code + + if code == 200: + data = req.json() + + if "upload_target" in data: + upload_target = data["upload_target"] + print(f"[INFO] Received an upload target from tracker: {upload_target}") + return upload_target + else: + print(f"[ERROR] Response is missing the \'upload_target\' key: {data}") + + else: + print(f"[ERROR] Unable to get an upload target from tracker. Status: {code}") + + +def request_all_upload_targets() -> Optional[List[str]]: + req = requests.get( + # "https://httpbin.org/get", + f"{TRACKER_ENDPOINT}/upload", + ) + + code = req.status_code + + if code == 200: + data = req.json() + print(f"[INFO] Received all upload targets from tracker: {data}") + return data + else: + print(f"[ERROR] Unable to get all upload targets from tracker. Status: {code}") + + +# `item_name` includes type prefix (video:id, playlist:id, etc) +def mark_item_as_done(item_name: str, item_size_bytes: int) -> bool: + + data = { + # TODO: Ask Fusl what this should be + # https://www.archiveteam.org/index.php?title=Dev/Seesaw + # ^ says it would be filled in by the Seesaw library + "downloader": "Fusl", + "version": VERSION, + "item": item_name, + "bytes": { + "data": item_size_bytes + } + } + + req = requests.post(f"{TRACKER_ENDPOINT}/done", json=data) + + code = req.status_code + + if code == 200: + print(f"[INFO] Marked item \'{item_name}\' as done") + return True + elif code > 399 and code < 500: + print(f"[ERROR] Unable to mark item as done. Status: {code}") + elif code > 499 and code < 600: + # TODO: retry here + pass + else: + print(f"[ERROR] Unknown response code while marking item \'{item_name}\' as done: {code}") + + return False + + +if __name__ == "__main__": + # print(add_item_to_tracker(ItemType.Channel, "test6")) + # print(request_item_from_tracker()) + # print(request_upload_target()) + # print(request_all_upload_targets()) + # print(mark_item_as_done("test4", 200))