Repos / pytaku / e65548521c
commit e65548521c42cae3adc67306a79d46b3ecb569c2
Author: Bùi Thành Nhân <hi@imnhan.com>
Date: Tue Jun 15 23:37:45 2021 +0700
cope with CF failures by refreshing session
diff --git a/src/mangoapi/base_site.py b/src/mangoapi/base_site.py
index b3a6479..08f4332 100644
--- a/src/mangoapi/base_site.py
+++ b/src/mangoapi/base_site.py
@@ -11,12 +11,20 @@
)
+def create_session():
+ return cloudscraper.create_scraper(
+ {
+ "mobile": False,
+ }
+ )
+
+
class Site(ABC):
def __init__(self):
self.username = None
self.password = None
self.is_logged_in = False
- self._session = cloudscraper.create_scraper(browser="chrome")
+ self._session = create_session()
@abstractmethod
def get_title(self, title_id):
@@ -47,16 +55,18 @@ def login(self, username, password):
raise NotImplementedError()
def _http_request(self, method, url, *args, **kwargs):
- request_func = getattr(self._session, method)
-
if "timeout" not in kwargs:
kwargs["timeout"] = 5
+ request_func = getattr(self._session, method)
try:
resp = request_func(url, *args, **kwargs)
except requests.exceptions.Timeout:
raise SourceSiteTimeoutError(url)
+ if resp.status_code == 403:
+ self._session = create_session()
+
if 500 <= resp.status_code <= 599:
raise SourceSite5xxError(url, resp.status_code, resp.text)
elif resp.status_code != 200:
diff --git a/src/pytaku/scheduler.py b/src/pytaku/scheduler.py
index e56c340..c8dff33 100644
--- a/src/pytaku/scheduler.py
+++ b/src/pytaku/scheduler.py
@@ -1,3 +1,4 @@
+import random
import time
import traceback
from abc import ABC, abstractmethod
@@ -66,7 +67,7 @@ class UpdateOutdatedTitles(Worker):
def run(self):
for title in find_outdated_titles():
if title["site"] == "mangadex":
- print(f"Skipped title {title['id']} from {title['site']}.")
+ # print(f"Skipped title {title['id']} from {title['site']}.")
continue
print(f"Updating title {title['id']} from {title['site']}...", end="")
@@ -74,6 +75,8 @@ def run(self):
updated_title = get_title(title["site"], title["id"])
save_title(updated_title)
print(" done")
+ if title["site"] == "mangasee":
+ time.sleep(random.randint(5, 10))
except (SourceSite5xxError, ReadTimeout, JSONDecodeError) as e:
print(" skipped because of server error:", e.__class__.__name__, str(e))