Browse Source

Implement initial tracker API

pull/4/head
afrmtbl 3 years ago
parent
commit
503143dc88
No known key found for this signature in database GPG Key ID: F5219A17363BED6
1 changed files with 165 additions and 0 deletions
  1. +165
    -0
      tracker.py

+ 165
- 0
tracker.py View File

@@ -0,0 +1,165 @@
from typing import Optional, List
from enum import Enum, auto
import requests

# TODO: Implement backoff for 500 response codes

# https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py
VERSION = "20200921.01"

TRACKER_ID = "ext-yt-communitycontribs"
TRACKER_HOST = "trackerproxy.meo.ws"

BACKFEED_HOST = "blackbird-amqp.meo.ws:23038"

BACKFEED_ENDPOINT = f"http://{BACKFEED_HOST}/{TRACKER_ID}-kj57sxhhzcn2kqjp/"
TRACKER_ENDPOINT = f"http://{TRACKER_HOST}/{TRACKER_ID}"


class ItemType(Enum):
Video = auto()
Channel = auto()
MixPlaylist = auto()
Playlist = auto()


def add_item_to_tracker(item_type: ItemType, item_id: str) -> bool:
"""Feed items into the tracker through backfeed (item names will be deduplicated):
# curl -d 'ITEMNAME' -so/dev/null $amqp_endpoint

# Response codes:
# 200 - Item added to tracker
# 409 - Item is already in tracker
# 404 - Project backfeed channel not found
# 400 - Item name has a bad format
"""
type_name = item_type.name.lower()
item_name = f"{type_name}:{item_id}"

req = requests.post(BACKFEED_ENDPOINT, data=item_name)

code = req.status_code

if code == 200:
print(f"[INFO] Item ID \'{item_name}\' added to tracker successfully")
return True
elif code == 409:
print(f"[INFO] Item ID \'{item_name}\' has already been added to tracker")
return True
elif code == 404:
print(f"[ERROR] Unable to add item ID \'{item_name}\' to tracker. Project backfeed channel not found: {BACKFEED_ENDPOINT}")
elif code == 400:
print(f"[ERROR] Item ID \'{item_name}\' has a bad format")
else:
print(f"[ERROR] Unknown response code adding item \'{item_name}\' to tracker: {code}")

return False


def request_item_from_tracker() -> Optional[str]:

data = {
# TODO: Ask Fusl what this should be
# https://www.archiveteam.org/index.php?title=Dev/Seesaw
# ^ says it would be filled in by the Seesaw library
"downloader": "Fusl",
"api_version": "2",
"version": VERSION
}

req = requests.post(f"{TRACKER_ENDPOINT}/request", json=data)

code = req.status_code

if code == 200:
data = req.json()

if "item_name" in data:
item_name = data["item_name"]
print(f"[INFO] Received an item from tracker: {item_name}")

return item_name
else:
print(f"[ERROR] Received item is missing the \'item_name\' key: {data}")

else:
print(f"[ERROR] Unable to get an item from tracker. Status: {code}")


def request_upload_target() -> Optional[str]:
req = requests.get(
# "https://httpbin.org/get",
f"{TRACKER_ENDPOINT}/upload",
)

code = req.status_code

if code == 200:
data = req.json()

if "upload_target" in data:
upload_target = data["upload_target"]
print(f"[INFO] Received an upload target from tracker: {upload_target}")
return upload_target
else:
print(f"[ERROR] Response is missing the \'upload_target\' key: {data}")

else:
print(f"[ERROR] Unable to get an upload target from tracker. Status: {code}")


def request_all_upload_targets() -> Optional[List[str]]:
req = requests.get(
# "https://httpbin.org/get",
f"{TRACKER_ENDPOINT}/upload",
)

code = req.status_code

if code == 200:
data = req.json()
print(f"[INFO] Received all upload targets from tracker: {data}")
return data
else:
print(f"[ERROR] Unable to get all upload targets from tracker. Status: {code}")


# `item_name` includes type prefix (video:id, playlist:id, etc)
def mark_item_as_done(item_name: str, item_size_bytes: int) -> bool:

data = {
# TODO: Ask Fusl what this should be
# https://www.archiveteam.org/index.php?title=Dev/Seesaw
# ^ says it would be filled in by the Seesaw library
"downloader": "Fusl",
"version": VERSION,
"item": item_name,
"bytes": {
"data": item_size_bytes
}
}

req = requests.post(f"{TRACKER_ENDPOINT}/done", json=data)

code = req.status_code

if code == 200:
print(f"[INFO] Marked item \'{item_name}\' as done")
return True
elif code > 399 and code < 500:
print(f"[ERROR] Unable to mark item as done. Status: {code}")
elif code > 499 and code < 600:
# TODO: retry here
pass
else:
print(f"[ERROR] Unknown response code while marking item \'{item_name}\' as done: {code}")

return False


if __name__ == "__main__":
# print(add_item_to_tracker(ItemType.Channel, "test6"))
# print(request_item_from_tracker())
# print(request_upload_target())
# print(request_all_upload_targets())
# print(mark_item_as_done("test4", 200))

Loading…
Cancel
Save