Repos / pytaku / 34d5390109
commit 34d5390109086a17b0270b6aa3c9ccb35885c621
Author: Bùi Thành Nhân <hi@imnhan.com>
Date: Wed Mar 24 00:17:12 2021 +0700
mangasee with the new url fuckery
Mangasee may now use a different chapter link depending on the first
digit of its raw id. Therefore, we now need to use that raw id as our
unique `chapter.id`. This naturally required a migration script to
update existing records too.
Also disable mangadex updates because, you know, it's dead.
diff --git a/src/mangoapi/mangasee.py b/src/mangoapi/mangasee.py
index dc2baef..86a1faf 100644
--- a/src/mangoapi/mangasee.py
+++ b/src/mangoapi/mangasee.py
@@ -35,7 +35,7 @@ def get_title(self, title_id):
numbers = _parse_chapter_number(ch["Chapter"])
chapters.append(
{
- "id": numbers["number"],
+ "id": numbers["raw_id"],
"name": ch["ChapterName"],
"volume": "",
"groups": [],
@@ -55,17 +55,20 @@ def get_title(self, title_id):
}
def get_chapter(self, title_id, chapter_id):
- resp = self.http_get(
- f"https://mangasee123.com/read-online/{title_id}-chapter-{chapter_id}.html"
- )
+ numbers = _parse_chapter_number(chapter_id)
+ index = chapter_id[0]
+ suffix = "" if index == "1" else f"-index-{index}"
+ url = f"https://mangasee123.com/read-online/{title_id}-chapter-{numbers['number']}{suffix}.html"
+ print(">>", url)
+ resp = self.http_get(url)
html = resp.text
title_id = regexes["chapter_title_name"].search(html).group(1)
chapter_data = json.loads(regexes["chapter_data"].search(html).group(1))
num_pages = int(chapter_data["Page"])
+ directory = chapter_data["Directory"]
+ img_server = regexes["chapter_img_server"].search(html).group(1)
img_server = regexes["chapter_img_server"].search(html).group(1)
-
- numbers = _parse_chapter_number(chapter_data["Chapter"])
result = {
"id": chapter_id,
@@ -73,7 +76,9 @@ def get_chapter(self, title_id, chapter_id):
"site": "mangasee",
"name": chapter_data["ChapterName"] or "",
"pages": [
- _generate_img_src(img_server, title_id, chapter_data["Chapter"], p)
+ _generate_img_src(
+ img_server, title_id, chapter_data["Chapter"], directory, p
+ )
for p in range(1, num_pages + 1)
],
"pages_alt": [],
@@ -180,13 +185,35 @@ def _parse_chapter_number(e):
result = {
"num_major": major,
"number": str(major) if not minor else f"{major}.{minor}",
+ "raw_id": e,
}
if minor:
result["num_minor"] = minor
return result
-def _generate_img_src(img_srv, title_id, chapter_id, page):
+def _chapter_url(e):
+ """
+ Yet another bright idea:
+
+ (vm.ChapterURLEncode = function (e) {
+ Index = "";
+ var t = e.substring(0, 1);
+ 1 != t && (Index = "-index-" + t);
+ var n = parseInt(e.slice(1, -1)),
+ m = "",
+ a = e[e.length - 1];
+ return (
+ 0 != a && (m = "." + a),
+ "-chapter-" + n + m + Index + vm.PageOne + ".html"
+ );
+ }),
+
+ e.g. vm.ChapterURLEncode("201420") === "-chapter-142-index-2-page-1.html"
+ """
+
+
+def _generate_img_src(img_srv, title_id, chapter_id, directory, page):
"""
Chapter ID padding logic:
@@ -206,4 +233,8 @@ def _generate_img_src(img_srv, title_id, chapter_id, page):
padded_chapter = chapter
else:
padded_chapter = f"{chapter}.{odd}"
- return f"https://{img_srv}/manga/{title_id}/{padded_chapter}-{page:03d}.png"
+
+ directory = f"{directory}/" if directory else ""
+ return (
+ f"https://{img_srv}/manga/{title_id}/{directory}{padded_chapter}-{page:03d}.png"
+ )
diff --git a/src/pytaku/main.py b/src/pytaku/main.py
index 88e51d1..3de7507 100644
--- a/src/pytaku/main.py
+++ b/src/pytaku/main.py
@@ -98,6 +98,7 @@ def proxy_view(b64_url):
- be a polite netizen in general
"""
url = _decode_proxy_url(b64_url)
+ print("Proxying url:", url)
if not _is_manga_img_url(url):
print("Invalid img url:", url)
return "Nope", 400
diff --git a/src/pytaku/scheduler.py b/src/pytaku/scheduler.py
index aa24b34..e56c340 100644
--- a/src/pytaku/scheduler.py
+++ b/src/pytaku/scheduler.py
@@ -65,6 +65,10 @@ class UpdateOutdatedTitles(Worker):
def run(self):
for title in find_outdated_titles():
+ if title["site"] == "mangadex":
+ print(f"Skipped title {title['id']} from {title['site']}.")
+ continue
+
print(f"Updating title {title['id']} from {title['site']}...", end="")
try:
updated_title = get_title(title["site"], title["id"])
diff --git a/src/pytaku/scripts/__init__.py b/src/pytaku/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/pytaku/scripts/migrate_mangasee_chapter_ids.py b/src/pytaku/scripts/migrate_mangasee_chapter_ids.py
new file mode 100644
index 0000000..b4a88ff
--- /dev/null
+++ b/src/pytaku/scripts/migrate_mangasee_chapter_ids.py
@@ -0,0 +1,76 @@
+import subprocess
+from typing import List, Tuple
+
+from mangoapi.mangasee import Mangasee
+from pytaku.database.common import get_conn, run_sql
+from pytaku.persistence import save_title
+
+ms = Mangasee()
+
+
+def fetch_title(title_id: str) -> List[Tuple[str, str]]:
+ """
+ Each tuple is (old_id, new_id)
+ """
+ title = ms.get_title(title_id)
+ updates = [(title_id, ch["number"], ch["id"]) for ch in title["chapters"]]
+ return title, updates
+
+
+def migrate():
+ mangasee_titles = run_sql(
+ "SELECT id FROM title WHERE site = 'mangasee' ORDER BY lower(id);"
+ )
+ print(f"There are {len(mangasee_titles)} titles to update.")
+
+ diffs = []
+ new_titles = []
+ for title_id in mangasee_titles:
+ print(f">> Fetching {title_id}")
+ new_title, new_title_diffs = fetch_title(title_id)
+ diffs += new_title_diffs
+ new_titles.append(new_title)
+
+ print("Diffs:")
+ for diff in diffs:
+ print(diff)
+
+ print("Starting db transaction")
+ conn = get_conn()
+ cursor = conn.cursor()
+ cursor.execute("pragma foreign_keys = off;")
+ cursor.execute("begin transaction;")
+
+ for new_title in new_titles:
+ print(f'Saving title {new_title["id"]}')
+ save_title(new_title)
+
+ for title_id, old_chapter_id, new_chapter_id in diffs:
+ print("Updating", title_id, old_chapter_id, "to", new_chapter_id)
+ cursor.execute(
+ "UPDATE chapter SET id=? WHERE id=? AND title_id=? AND site='mangasee';",
+ (new_chapter_id, old_chapter_id, title_id),
+ )
+ cursor.execute(
+ "UPDATE read SET chapter_id=? WHERE chapter_id=? AND title_id=? AND site='mangasee';",
+ (new_chapter_id, old_chapter_id, title_id),
+ )
+
+ cursor.execute("pragma foreign_key_check;")
+ cursor.execute("commit;")
+ cursor.execute("pragma foreign_keys = on;")
+ print("All done!")
+
+
+def main():
+ subprocess.run(["systemctl", "--user", "stop", "pytaku"], check=True)
+ subprocess.run(["systemctl", "--user", "stop", "pytaku-scheduler"], check=True)
+
+ migrate()
+
+ subprocess.run(["systemctl", "--user", "start", "pytaku"], check=True)
+ subprocess.run(["systemctl", "--user", "start", "pytaku-scheduler"], check=True)
+
+
+if __name__ == "__main__":
+ main()