[fd5e5d21be] use MD@H links by default | pytaku

Repos / pytaku / fd5e5d21be
commit fd5e5d21be3479c8de9d9897ed8902a0c3553a40
Author: Bùi Thành Nhân <hi@imnhan.com>
Date:   Fri Oct 2 19:47:37 2020 +0700

    use MD@H links by default
    
    MD's own image servers are sometimes down (like, right now). It's also
    more polite to hit MD@H servers instead.
    
    Also needed to work around the filesystem's filename length limit.
    
    Ideally, we should probably store both main and fallback image links on
    our side. Or maybe add some sort of wholesale preload logic so we can
    retire the proxy thingy altogether.

diff --git a/pyproject.toml b/pyproject.toml
index 4ed6912..679fba8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pytaku"
-version = "0.3.22"
+version = "0.3.23"
 description = "Self-hostable web-based manga reader"
 authors = ["Bùi Thành Nhân <hi@imnhan.com>"]
 license = "AGPL-3.0-only"
diff --git a/src/mangoapi/mangadex.py b/src/mangoapi/mangadex.py
index ceab26f..044ed4a 100644
--- a/src/mangoapi/mangadex.py
+++ b/src/mangoapi/mangadex.py
@@ -52,7 +52,16 @@ def get_chapter(self, title_id, chapter_id):
         md_json = md_resp.json()
         assert md_json["status"] == "OK"
 
-        server = md_json.get("server_fallback") or md_json["server"]
+        # 'server' value points to a likely temporary MangaDex@Home instance, while
+        # 'server_fallback' would be MD's own server e.g. s5.mangadex.org...
+        # The latter may be down (like, literally at the time of writing), so for now
+        # let's prioritize the MD@Home server.
+        # I don't know how stable MD@Home links are, but it probably won't matter,
+        # since `persistence.load_chapter()` will re-fetch if existing db record is more
+        # than 1 day old anyway.
+        # TODO: A more robust solution is to save both links to db, but I'm not in the
+        # mood for it atm.
+        server = md_json["server"] or md_json.get("server_fallback")
         img_path = f"{server}{md_json['hash']}"
 
         chapter = {
diff --git a/src/pytaku/main.py b/src/pytaku/main.py
index 5cb3b6d..68c7e83 100644
--- a/src/pytaku/main.py
+++ b/src/pytaku/main.py
@@ -74,7 +74,11 @@ def _decode_proxy_url(b64_url):
 def _is_manga_img_url(
     url,
     pattern=re.compile(
-        r"^https://([\w_-]+\.)?(mangadex\.org/(data|images)|mangabeast\d{0,4}.com/manga)/"
+        r"^https://("
+        r"([\w-]+\.)?(mangadex\.org/(data|images)|mangabeast\d{0,4}.com/manga)/"
+        r"|"
+        r"([\w-]+\.)+mangadex\.network:\d{2,6}/[\w-]+/data/"
+        r")"
     ),
 ):
     return pattern.match(url)
@@ -105,7 +109,11 @@ def proxy_view(b64_url):
         md_resp = requests.get(url)
         status_code = md_resp.status_code
         body = md_resp.content
-        headers = {"Content-Type": md_resp.headers["content-type"]}
+        # Normal responsible adults would always include the Content-Type header,
+        # but it's MangaDex@Home we're talking about so ofc they wouldn't.
+        # Therefore, watch out for that empty case:
+        content_type = md_resp.headers.get("content-type")
+        headers = {"Content-Type": content_type} if content_type else {}
         if status_code == 200:
             storage.save(cached_headers_path, json.dumps(headers).encode())
             storage.save(cached_file_path, md_resp.content)
diff --git a/src/pytaku/storages.py b/src/pytaku/storages.py
index 81d7fd2..b6bdb77 100644
--- a/src/pytaku/storages.py
+++ b/src/pytaku/storages.py
@@ -1,3 +1,4 @@
+import textwrap
 from abc import ABC, abstractmethod
 from pathlib import Path
 
@@ -18,16 +19,31 @@ def read(self, path: Path) -> bytes:
 
 class FilesystemStorage(Storage):
     def save(self, path: Path, blob: bytes):
+        path = self._split_long_filename(path)
         if not path.parent.is_dir():
             path.parent.mkdir(parents=True)
         path.write_bytes(blob)
 
     def exists(self, path: Path) -> bool:
+        path = self._split_long_filename(path)
         return path.is_file()
 
     def read(self, path: Path) -> bytes:
+        path = self._split_long_filename(path)
         return path.read_bytes()
 
+    @staticmethod
+    def _split_long_filename(path: Path) -> Path:
+        filename = path.name
+        if len(filename) <= 255:
+            return path
+        else:
+            parts = textwrap.wrap(filename, width=255)
+            newpath = path.parent
+            for part in parts:
+                newpath /= Path(part)
+            return newpath
+
 
 # TODO: support other storages e.g. s3-like
 storage = FilesystemStorage()
diff --git a/tests/mangoapi/test_mangadex.py b/tests/mangoapi/test_mangadex.py
index b75b5b6..7bc8883 100644
--- a/tests/mangoapi/test_mangadex.py
+++ b/tests/mangoapi/test_mangadex.py
@@ -1,6 +1,5 @@
-from pytaku.conf import config
-
 from mangoapi.mangadex import Mangadex
+from pytaku.conf import config
 
 
 def test_get_title():
@@ -85,35 +84,19 @@ def test_get_title():
 
 def test_get_chapter():
     chap = Mangadex().get_chapter("doesn't matter", "696882")
+    pages = chap.pop("pages")
     assert chap == {
         "id": "696882",
         "title_id": "12088",
         "site": "mangadex",
         "name": "Extras",
-        "pages": [
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S1.png",
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S2.png",
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S3.png",
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S4.png",
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S5.png",
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S6.png",
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S7.png",
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S8.png",
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S9.png",
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S10.png",
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S11.png",
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S12.png",
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S13.jpg",
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S14.jpg",
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S15.png",
-            "https://s5.mangadex.org/data/39174bff8c88758a125c32710730223c/S16.png",
-        ],
         "groups": ["Träumerei Scans", "GlassChair"],
         "is_webtoon": False,
         "number": "81.5",
         "num_major": 81,
         "num_minor": 5,
     }
+    assert len(pages) == 16
 
 
 def test_search():