From 63c51943c242f79e81df7b8afc6d2a415b3ec4ab Mon Sep 17 00:00:00 2001 From: afrmtbl Date: Tue, 22 Sep 2020 11:11:13 -0400 Subject: [PATCH] Implement request retrying --- tracker.py | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tracker.py b/tracker.py index 8c7a551..b05494e 100644 --- a/tracker.py +++ b/tracker.py @@ -1,8 +1,9 @@ from typing import Optional, List from enum import Enum, auto -import requests -# TODO: Implement backoff for 500 response codes +import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry # https://github.com/ArchiveTeam/tencent-weibo-grab/blob/9bae5f9747e014db9227821a9c11557267967023/pipeline.py VERSION = "20200921.01" @@ -16,6 +17,19 @@ BACKFEED_ENDPOINT = f"http://{BACKFEED_HOST}/{TRACKER_ID}-kj57sxhhzcn2kqjp/" TRACKER_ENDPOINT = f"http://{TRACKER_HOST}/{TRACKER_ID}" +# https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/ +retry_strategy = Retry( + total=4, + backoff_factor=2, + status_forcelist=[x for x in range(500, 600)] + [429], + method_whitelist=["GET", "POST"] +) +adapter = HTTPAdapter(max_retries=retry_strategy) +tracker_session = requests.Session() +tracker_session.mount("https://", adapter) +tracker_session.mount("http://", adapter) + + class ItemType(Enum): Video = auto() Channel = auto() @@ -36,7 +50,7 @@ def add_item_to_tracker(item_type: ItemType, item_id: str) -> bool: type_name = item_type.name.lower() item_name = f"{type_name}:{item_id}" - req = requests.post(BACKFEED_ENDPOINT, data=item_name) + req = tracker_session.post(BACKFEED_ENDPOINT, data=item_name) code = req.status_code @@ -59,15 +73,12 @@ def add_item_to_tracker(item_type: ItemType, item_id: str) -> bool: def request_item_from_tracker() -> Optional[str]: data = { - # TODO: Ask Fusl what this should be - # https://www.archiveteam.org/index.php?title=Dev/Seesaw - # ^ says it would be filled in by the Seesaw library "downloader": "Fusl", "api_version": "2", "version": VERSION } - req = requests.post(f"{TRACKER_ENDPOINT}/request", json=data) + req = tracker_session.post(f"{TRACKER_ENDPOINT}/request", json=data) code = req.status_code @@ -87,10 +98,7 @@ def request_item_from_tracker() -> Optional[str]: def request_upload_target() -> Optional[str]: - req = requests.get( - # "https://httpbin.org/get", - f"{TRACKER_ENDPOINT}/upload", - ) + req = tracker_session.get(f"{TRACKER_ENDPOINT}/upload") code = req.status_code @@ -109,10 +117,7 @@ def request_upload_target() -> Optional[str]: def request_all_upload_targets() -> Optional[List[str]]: - req = requests.get( - # "https://httpbin.org/get", - f"{TRACKER_ENDPOINT}/upload", - ) + req = tracker_session.get(f"{TRACKER_ENDPOINT}/upload") code = req.status_code @@ -128,9 +133,6 @@ def request_all_upload_targets() -> Optional[List[str]]: def mark_item_as_done(item_name: str, item_size_bytes: int) -> bool: data = { - # TODO: Ask Fusl what this should be - # https://www.archiveteam.org/index.php?title=Dev/Seesaw - # ^ says it would be filled in by the Seesaw library "downloader": "Fusl", "version": VERSION, "item": item_name, @@ -139,7 +141,7 @@ def mark_item_as_done(item_name: str, item_size_bytes: int) -> bool: } } - req = requests.post(f"{TRACKER_ENDPOINT}/done", json=data) + req = tracker_session.post(f"{TRACKER_ENDPOINT}/done", json=data) code = req.status_code @@ -148,17 +150,15 @@ def mark_item_as_done(item_name: str, item_size_bytes: int) -> bool: return True elif code > 399 and code < 500: print(f"[ERROR] Unable to mark item as done. Status: {code}") - elif code > 499 and code < 600: - # TODO: retry here - pass else: print(f"[ERROR] Unknown response code while marking item \'{item_name}\' as done: {code}") return False -if __name__ == "__main__": - # print(add_item_to_tracker(ItemType.Channel, "test6")) +# if __name__ == "__main__": + + # print(add_item_to_tracker(ItemType.Channel, "test10")) # print(request_item_from_tracker()) # print(request_upload_target()) # print(request_all_upload_targets())