[34d5390109] mangasee with the new url fuckery | pytaku

Repos / pytaku / 34d5390109
commit 34d5390109086a17b0270b6aa3c9ccb35885c621
Author: Bùi Thành Nhân <hi@imnhan.com>
Date:   Wed Mar 24 00:17:12 2021 +0700

    mangasee with the new url fuckery
    
    Mangasee may now use a different chapter link depending on the first
    digit of its raw id. Therefore, we now need to use that raw id as our
    unique `chapter.id`. This naturally required a migration script to
    update existing records too.
    
    Also disable mangadex updates because, you know, it's dead.

diff --git a/src/mangoapi/mangasee.py b/src/mangoapi/mangasee.py
index dc2baef..86a1faf 100644
--- a/src/mangoapi/mangasee.py
+++ b/src/mangoapi/mangasee.py
@@ -35,7 +35,7 @@ def get_title(self, title_id):
             numbers = _parse_chapter_number(ch["Chapter"])
             chapters.append(
                 {
-                    "id": numbers["number"],
+                    "id": numbers["raw_id"],
                     "name": ch["ChapterName"],
                     "volume": "",
                     "groups": [],
@@ -55,17 +55,20 @@ def get_title(self, title_id):
         }
 
     def get_chapter(self, title_id, chapter_id):
-        resp = self.http_get(
-            f"https://mangasee123.com/read-online/{title_id}-chapter-{chapter_id}.html"
-        )
+        numbers = _parse_chapter_number(chapter_id)
+        index = chapter_id[0]
+        suffix = "" if index == "1" else f"-index-{index}"
+        url = f"https://mangasee123.com/read-online/{title_id}-chapter-{numbers['number']}{suffix}.html"
+        print(">>", url)
+        resp = self.http_get(url)
         html = resp.text
 
         title_id = regexes["chapter_title_name"].search(html).group(1)
         chapter_data = json.loads(regexes["chapter_data"].search(html).group(1))
         num_pages = int(chapter_data["Page"])
+        directory = chapter_data["Directory"]
+        img_server = regexes["chapter_img_server"].search(html).group(1)
         img_server = regexes["chapter_img_server"].search(html).group(1)
-
-        numbers = _parse_chapter_number(chapter_data["Chapter"])
 
         result = {
             "id": chapter_id,
@@ -73,7 +76,9 @@ def get_chapter(self, title_id, chapter_id):
             "site": "mangasee",
             "name": chapter_data["ChapterName"] or "",
             "pages": [
-                _generate_img_src(img_server, title_id, chapter_data["Chapter"], p)
+                _generate_img_src(
+                    img_server, title_id, chapter_data["Chapter"], directory, p
+                )
                 for p in range(1, num_pages + 1)
             ],
             "pages_alt": [],
@@ -180,13 +185,35 @@ def _parse_chapter_number(e):
     result = {
         "num_major": major,
         "number": str(major) if not minor else f"{major}.{minor}",
+        "raw_id": e,
     }
     if minor:
         result["num_minor"] = minor
     return result
 
 
-def _generate_img_src(img_srv, title_id, chapter_id, page):
+def _chapter_url(e):
+    """
+    Yet another bright idea:
+
+        (vm.ChapterURLEncode = function (e) {
+            Index = "";
+            var t = e.substring(0, 1);
+            1 != t && (Index = "-index-" + t);
+            var n = parseInt(e.slice(1, -1)),
+            m = "",
+            a = e[e.length - 1];
+            return (
+            0 != a && (m = "." + a),
+            "-chapter-" + n + m + Index + vm.PageOne + ".html"
+            );
+        }),
+
+    e.g. vm.ChapterURLEncode("201420") === "-chapter-142-index-2-page-1.html"
+    """
+
+
+def _generate_img_src(img_srv, title_id, chapter_id, directory, page):
     """
     Chapter ID padding logic:
 
@@ -206,4 +233,8 @@ def _generate_img_src(img_srv, title_id, chapter_id, page):
         padded_chapter = chapter
     else:
         padded_chapter = f"{chapter}.{odd}"
-    return f"https://{img_srv}/manga/{title_id}/{padded_chapter}-{page:03d}.png"
+
+    directory = f"{directory}/" if directory else ""
+    return (
+        f"https://{img_srv}/manga/{title_id}/{directory}{padded_chapter}-{page:03d}.png"
+    )
diff --git a/src/pytaku/main.py b/src/pytaku/main.py
index 88e51d1..3de7507 100644
--- a/src/pytaku/main.py
+++ b/src/pytaku/main.py
@@ -98,6 +98,7 @@ def proxy_view(b64_url):
         - be a polite netizen in general
     """
     url = _decode_proxy_url(b64_url)
+    print("Proxying url:", url)
     if not _is_manga_img_url(url):
         print("Invalid img url:", url)
         return "Nope", 400
diff --git a/src/pytaku/scheduler.py b/src/pytaku/scheduler.py
index aa24b34..e56c340 100644
--- a/src/pytaku/scheduler.py
+++ b/src/pytaku/scheduler.py
@@ -65,6 +65,10 @@ class UpdateOutdatedTitles(Worker):
 
     def run(self):
         for title in find_outdated_titles():
+            if title["site"] == "mangadex":
+                print(f"Skipped title {title['id']} from {title['site']}.")
+                continue
+
             print(f"Updating title {title['id']} from {title['site']}...", end="")
             try:
                 updated_title = get_title(title["site"], title["id"])
diff --git a/src/pytaku/scripts/__init__.py b/src/pytaku/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/pytaku/scripts/migrate_mangasee_chapter_ids.py b/src/pytaku/scripts/migrate_mangasee_chapter_ids.py
new file mode 100644
index 0000000..b4a88ff
--- /dev/null
+++ b/src/pytaku/scripts/migrate_mangasee_chapter_ids.py
@@ -0,0 +1,76 @@
+import subprocess
+from typing import List, Tuple
+
+from mangoapi.mangasee import Mangasee
+from pytaku.database.common import get_conn, run_sql
+from pytaku.persistence import save_title
+
+ms = Mangasee()
+
+
+def fetch_title(title_id: str) -> List[Tuple[str, str]]:
+    """
+    Each tuple is (old_id, new_id)
+    """
+    title = ms.get_title(title_id)
+    updates = [(title_id, ch["number"], ch["id"]) for ch in title["chapters"]]
+    return title, updates
+
+
+def migrate():
+    mangasee_titles = run_sql(
+        "SELECT id FROM title WHERE site = 'mangasee' ORDER BY lower(id);"
+    )
+    print(f"There are {len(mangasee_titles)} titles to update.")
+
+    diffs = []
+    new_titles = []
+    for title_id in mangasee_titles:
+        print(f">> Fetching {title_id}")
+        new_title, new_title_diffs = fetch_title(title_id)
+        diffs += new_title_diffs
+        new_titles.append(new_title)
+
+    print("Diffs:")
+    for diff in diffs:
+        print(diff)
+
+    print("Starting db transaction")
+    conn = get_conn()
+    cursor = conn.cursor()
+    cursor.execute("pragma foreign_keys = off;")
+    cursor.execute("begin transaction;")
+
+    for new_title in new_titles:
+        print(f'Saving title {new_title["id"]}')
+        save_title(new_title)
+
+    for title_id, old_chapter_id, new_chapter_id in diffs:
+        print("Updating", title_id, old_chapter_id, "to", new_chapter_id)
+        cursor.execute(
+            "UPDATE chapter SET id=? WHERE id=? AND title_id=? AND site='mangasee';",
+            (new_chapter_id, old_chapter_id, title_id),
+        )
+        cursor.execute(
+            "UPDATE read SET chapter_id=? WHERE chapter_id=? AND title_id=? AND site='mangasee';",
+            (new_chapter_id, old_chapter_id, title_id),
+        )
+
+    cursor.execute("pragma foreign_key_check;")
+    cursor.execute("commit;")
+    cursor.execute("pragma foreign_keys = on;")
+    print("All done!")
+
+
+def main():
+    subprocess.run(["systemctl", "--user", "stop", "pytaku"], check=True)
+    subprocess.run(["systemctl", "--user", "stop", "pytaku-scheduler"], check=True)
+
+    migrate()
+
+    subprocess.run(["systemctl", "--user", "start", "pytaku"], check=True)
+    subprocess.run(["systemctl", "--user", "start", "pytaku-scheduler"], check=True)
+
+
+if __name__ == "__main__":
+    main()