Browse Source

Implement request retrying

pull/5/head
afrmtbl 3 years ago
parent
commit
63c51943c2
No known key found for this signature in database GPG Key ID: F5219A17363BED6
1 changed files with 24 additions and 24 deletions
  1. +24
    -24
      tracker.py

+ 24
- 24
tracker.py View File

@@ -1,8 +1,9 @@
from typing import Optional, List
from enum import Enum, auto
import requests

# TODO: Implement backoff for 500 response codes
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

# https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py
VERSION = "20200921.01"
@@ -16,6 +17,19 @@ BACKFEED_ENDPOINT = f"http://{BACKFEED_HOST}/{TRACKER_ID}-kj57sxhhzcn2kqjp/"
TRACKER_ENDPOINT = f"http://{TRACKER_HOST}/{TRACKER_ID}"


# https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/
retry_strategy = Retry(
total=4,
backoff_factor=2,
status_forcelist=[x for x in range(500, 600)] + [429],
method_whitelist=["GET", "POST"]
)
adapter = HTTPAdapter(max_retries=retry_strategy)
tracker_session = requests.Session()
tracker_session.mount("https://", adapter)
tracker_session.mount("http://", adapter)


class ItemType(Enum):
Video = auto()
Channel = auto()
@@ -36,7 +50,7 @@ def add_item_to_tracker(item_type: ItemType, item_id: str) -> bool:
type_name = item_type.name.lower()
item_name = f"{type_name}:{item_id}"

req = requests.post(BACKFEED_ENDPOINT, data=item_name)
req = tracker_session.post(BACKFEED_ENDPOINT, data=item_name)

code = req.status_code

@@ -59,15 +73,12 @@ def add_item_to_tracker(item_type: ItemType, item_id: str) -> bool:
def request_item_from_tracker() -> Optional[str]:

data = {
# TODO: Ask Fusl what this should be
# https://www.archiveteam.org/index.php?title=Dev/Seesaw
# ^ says it would be filled in by the Seesaw library
"downloader": "Fusl",
"api_version": "2",
"version": VERSION
}

req = requests.post(f"{TRACKER_ENDPOINT}/request", json=data)
req = tracker_session.post(f"{TRACKER_ENDPOINT}/request", json=data)

code = req.status_code

@@ -87,10 +98,7 @@ def request_item_from_tracker() -> Optional[str]:


def request_upload_target() -> Optional[str]:
req = requests.get(
# "https://httpbin.org/get",
f"{TRACKER_ENDPOINT}/upload",
)
req = tracker_session.get(f"{TRACKER_ENDPOINT}/upload")

code = req.status_code

@@ -109,10 +117,7 @@ def request_upload_target() -> Optional[str]:


def request_all_upload_targets() -> Optional[List[str]]:
req = requests.get(
# "https://httpbin.org/get",
f"{TRACKER_ENDPOINT}/upload",
)
req = tracker_session.get(f"{TRACKER_ENDPOINT}/upload")

code = req.status_code

@@ -128,9 +133,6 @@ def request_all_upload_targets() -> Optional[List[str]]:
def mark_item_as_done(item_name: str, item_size_bytes: int) -> bool:

data = {
# TODO: Ask Fusl what this should be
# https://www.archiveteam.org/index.php?title=Dev/Seesaw
# ^ says it would be filled in by the Seesaw library
"downloader": "Fusl",
"version": VERSION,
"item": item_name,
@@ -139,7 +141,7 @@ def mark_item_as_done(item_name: str, item_size_bytes: int) -> bool:
}
}

req = requests.post(f"{TRACKER_ENDPOINT}/done", json=data)
req = tracker_session.post(f"{TRACKER_ENDPOINT}/done", json=data)

code = req.status_code

@@ -148,17 +150,15 @@ def mark_item_as_done(item_name: str, item_size_bytes: int) -> bool:
return True
elif code > 399 and code < 500:
print(f"[ERROR] Unable to mark item as done. Status: {code}")
elif code > 499 and code < 600:
# TODO: retry here
pass
else:
print(f"[ERROR] Unknown response code while marking item \'{item_name}\' as done: {code}")

return False


if __name__ == "__main__":
# print(add_item_to_tracker(ItemType.Channel, "test6"))
# if __name__ == "__main__":

# print(add_item_to_tracker(ItemType.Channel, "test10"))
# print(request_item_from_tracker())
# print(request_upload_target())
# print(request_all_upload_targets())


Loading…
Cancel
Save