Repos / pytaku / 4259acd8b1
commit 4259acd8b18a0e2fffd1719f8716912299913e67
Author: Bùi Thành Nhân <hi@imnhan.com>
Date:   Sun Aug 9 20:40:35 2020 +0700

    change unique constraint to support mangasee
    
    Unlike mangadex, mangasee doesn't expose site-wide unique chapter IDs.
    This means a chapter's primary key must now be (site, title_id,
    chapter_id) instead of just (site, chapter_id).
    
    With this, mangasee is done, I think.

diff --git a/src/mangoapi/base_site.py b/src/mangoapi/base_site.py
index b7f6df2..23f97ee 100644
--- a/src/mangoapi/base_site.py
+++ b/src/mangoapi/base_site.py
@@ -13,7 +13,7 @@ def get_title(self, title_id):
         pass
 
     @abstractmethod
-    def get_chapter(self, chapter_id):
+    def get_chapter(self, title_id, chapter_id):
         pass
 
     @abstractmethod
diff --git a/src/mangoapi/mangadex.py b/src/mangoapi/mangadex.py
index 3ba5cfe..ff9c5b1 100644
--- a/src/mangoapi/mangadex.py
+++ b/src/mangoapi/mangadex.py
@@ -36,7 +36,7 @@ def get_title(self, title_id):
         }
         return title
 
-    def get_chapter(self, chapter_id):
+    def get_chapter(self, title_id, chapter_id):
         md_resp = requests.get(
             f"https://mangadex.org/api/?id={chapter_id}&type=chapter&saver=0"
         )
diff --git a/src/mangoapi/mangasee.py b/src/mangoapi/mangasee.py
index 36e5cae..07b3524 100644
--- a/src/mangoapi/mangasee.py
+++ b/src/mangoapi/mangasee.py
@@ -9,6 +9,9 @@
 regexes = {
     "title_name": re.compile(r"<title>\s*([^|]+) | MangaSee</title>"),
     "title_chapters": re.compile(r"vm\.Chapters = (\[[^\]]+\])"),
+    "chapter_title_name": re.compile(r'vm\.IndexName = "([^"]+)"'),
+    "chapter_data": re.compile(r"vm\.CurChapter = (\{[^\}]+\})"),
+    "chapter_img_server": re.compile(r'vm\.CurPathName = "([^"]+)"'),
 }
 
 
@@ -24,16 +27,18 @@ def get_title(self, title_id):
         html = resp.text
         name = regexes["title_name"].search(html).group(1).strip()
         chapters_str = regexes["title_chapters"].search(html).group(1)
-        chapters = [
-            {
-                "id": ch["Chapter"],
-                "name": ch["ChapterName"],
-                "volume": "",
-                "groups": [],
-                **_parse_chapter_number(ch["Chapter"]),
-            }
-            for ch in json.loads(chapters_str)
-        ]
+        chapters = []
+        for ch in json.loads(chapters_str):
+            numbers = _parse_chapter_number(ch["Chapter"])
+            chapters.append(
+                {
+                    "id": numbers["number"],
+                    "name": ch["ChapterName"],
+                    "volume": "",
+                    "groups": [],
+                    **numbers,
+                }
+            )
         return {
             "id": title_id,
             "name": name,
@@ -44,8 +49,33 @@ def get_title(self, title_id):
             "descriptions": [],
         }
 
-    def get_chapter(self, chapter_id):
-        pass
+    def get_chapter(self, title_id, chapter_id):
+        resp = requests.get(
+            f"https://mangasee123.com/read-online/{title_id}-chapter-{chapter_id}.html"
+        )
+        assert resp.status_code == 200
+        html = resp.text
+
+        title_id = regexes["chapter_title_name"].search(html).group(1)
+        chapter_data = json.loads(regexes["chapter_data"].search(html).group(1))
+        num_pages = int(chapter_data["Page"])
+        img_server = regexes["chapter_img_server"].search(html).group(1)
+
+        numbers = _parse_chapter_number(chapter_data["Chapter"])
+
+        result = {
+            "id": chapter_id,
+            "title_id": title_id,
+            "name": chapter_data["ChapterName"] or "",
+            "pages": [
+                _generate_img_src(img_server, title_id, chapter_data["Chapter"], p)
+                for p in range(1, num_pages + 1)
+            ],
+            "groups": [],
+            "is_webtoon": False,
+            **numbers,
+        }
+        return result
 
     def search_title(self, query):
         """
@@ -116,8 +146,9 @@ def __init__(self, titles: list):
         )
 
     def search(self, query):
+        query = '"' + query.replace('"', '""') + '"'
         return self.db.cursor().execute(
-            "SELECT id, name FROM titles(?) ORDER BY rank;", (query,)
+            "SELECT id, name FROM titles WHERE titles MATCH ? ORDER BY rank;", (query,),
         )
 
 
@@ -138,8 +169,33 @@ def _parse_chapter_number(e):
     """
     major = int(e[1:-1])
     minor = int(e[-1])
-    return {
+    result = {
         "num_major": major,
-        "num_minor": minor,
         "number": str(major) if not minor else f"{major}.{minor}",
     }
+    if minor:
+        result["num_minor"] = minor
+    return result
+
+
+def _generate_img_src(img_srv, title_id, chapter_id, page):
+    """
+    Chapter ID padding logic:
+
+        vm.ChapterImage = function (ChapterString) {
+          var Chapter = ChapterString.slice(1, -1);
+          var Odd = ChapterString[ChapterString.length - 1];
+          if (Odd == 0) {
+            return Chapter;
+          } else {
+            return Chapter + "." + Odd;
+          }
+        };
+    """
+    chapter = chapter_id[1:-1]
+    odd = chapter_id[len(chapter_id) - 1]
+    if odd == "0":
+        padded_chapter = chapter
+    else:
+        padded_chapter = f"{chapter}.{odd}"
+    return f"https://{img_srv}/manga/{title_id}/{padded_chapter}-{page:03d}.png"
diff --git a/src/pytaku/database/migrations/latest_schema.sql b/src/pytaku/database/migrations/latest_schema.sql
index 21255b4..e1dd9b5 100644
--- a/src/pytaku/database/migrations/latest_schema.sql
+++ b/src/pytaku/database/migrations/latest_schema.sql
@@ -13,7 +13,7 @@ CREATE TABLE title (
 
     unique(id, site)
 );
-CREATE TABLE chapter (
+CREATE TABLE IF NOT EXISTS "old_chapter" (
     id text,
     title_id text,
     site text,
@@ -52,7 +52,7 @@ CREATE TABLE read (
     updated_at text default (datetime('now')),
 
     foreign key (user_id) references user (id),
-    foreign key (chapter_id, site) references chapter (id, site),
+    foreign key (chapter_id, site) references "old_chapter" (id, site),
     unique(user_id, chapter_id, site)
 );
 CREATE TABLE keyval_store (
@@ -60,3 +60,18 @@ CREATE TABLE keyval_store (
     value text not null,
     updated_at text default (datetime('now'))
 );
+CREATE TABLE chapter (
+    id text,
+    title_id text,
+    site text,
+    num_major integer,
+    num_minor integer,
+    name text,
+    pages text,
+    groups text,
+    updated_at text default (datetime('now')), is_webtoon boolean,
+
+    foreign key (title_id, site) references title (id, site),
+    unique(site, title_id, id),
+    unique(site, title_id, num_major, num_minor)
+);
diff --git a/src/pytaku/database/migrations/m0002.sql b/src/pytaku/database/migrations/m0002.sql
index 35d184e..15f1c6e 100644
--- a/src/pytaku/database/migrations/m0002.sql
+++ b/src/pytaku/database/migrations/m0002.sql
@@ -1,3 +1,5 @@
+-- Add key-value store table, for a poor man's inefficient cache
+
 create table keyval_store (
     key text primary key,
     value text not null,
diff --git a/src/pytaku/database/migrations/m0003.sql b/src/pytaku/database/migrations/m0003.sql
new file mode 100644
index 0000000..14d89ec
--- /dev/null
+++ b/src/pytaku/database/migrations/m0003.sql
@@ -0,0 +1,23 @@
+-- Dropping the unique(id, site) constraint.
+-- SQLite doesn't let you do that directly so gotta create a new table
+-- then copy existing data over.
+
+alter table chapter rename to old_chapter;
+
+create table chapter (
+    id text,
+    title_id text,
+    site text,
+    num_major integer,
+    num_minor integer,
+    name text,
+    pages text,
+    groups text,
+    updated_at text default (datetime('now')), is_webtoon boolean,
+
+    foreign key (title_id, site) references title (id, site),
+    unique(site, title_id, id),
+    unique(site, title_id, num_major, num_minor)
+);
+
+insert into chapter select * from old_chapter;
diff --git a/src/pytaku/main.py b/src/pytaku/main.py
index ade49d3..fc182a4 100644
--- a/src/pytaku/main.py
+++ b/src/pytaku/main.py
@@ -51,7 +51,7 @@ def home_view():
     return render_template("home.html")
 
 
-@app.route("/me", methods=["GET"])
+@app.route("/following", methods=["GET"])
 @ensure_session_version
 @require_login
 def follows_view():
@@ -168,7 +168,7 @@ def auth_view():
     return render_template("auth.html")
 
 
-@app.route("/title/<site>/<title_id>")
+@app.route("/m/<site>/<title_id>")
 @ensure_session_version
 @toggle_has_read
 def title_view(site, title_id):
@@ -188,24 +188,27 @@ def title_view(site, title_id):
     return render_template("title.html", **title)
 
 
-@app.route("/chapter/<site>/<chapter_id>")
+@app.route("/m/<site>/<title_id>/<chapter_id>")
 @ensure_session_version
 @toggle_has_read
-def chapter_view(site, chapter_id):
-    chapter = load_chapter(site, chapter_id)
+def chapter_view(site, title_id, chapter_id):
+    chapter = load_chapter(site, title_id, chapter_id)
     if not chapter:
         print("Getting chapter", chapter_id)
-        chapter = get_chapter(chapter_id)
+        chapter = get_chapter(site, title_id, chapter_id)
+        chapter["site"] = site
         save_chapter(chapter)
     else:
         print("Loading chapter", chapter_id, "from db")
 
-    chapter["pages"] = [
-        url_for("proxy_view", b64_url=_encode_proxy_url(p)) for p in chapter["pages"]
-    ]
+    if site in ("mangadex", "mangasee"):
+        chapter["pages"] = [
+            url_for("proxy_view", b64_url=_encode_proxy_url(p))
+            for p in chapter["pages"]
+        ]
 
     # YIIIIKES
-    title = load_title(site, chapter["title_id"])
+    title = load_title(site, title_id)
     prev_chapter, next_chapter = get_prev_next_chapters(title, chapter)
     chapter["prev_chapter"] = prev_chapter
     chapter["next_chapter"] = next_chapter
@@ -229,6 +232,7 @@ def proxy_view(b64_url):
     """Fine I'll do it"""
     url = _decode_proxy_url(b64_url)
     if not _is_manga_img_url(url):
+        print("Invalid img url:", url)
         return "Nope", 400
     md_resp = requests.get(url)
     resp = make_response(md_resp.content, md_resp.status_code)
@@ -245,6 +249,9 @@ def _decode_proxy_url(b64_url):
 
 
 def _is_manga_img_url(
-    url, pattern=re.compile(r"^https://(\w+\.)?mangadex\.org/data/.+$")
+    url,
+    pattern=re.compile(
+        r"^https://([\w_-]+\.)?(mangadex\.org/data|mangabeast\d{0,4}.com/manga)/"
+    ),
 ):
     return pattern.match(url)
diff --git a/src/pytaku/persistence.py b/src/pytaku/persistence.py
index 82f2df8..3b2ef01 100644
--- a/src/pytaku/persistence.py
+++ b/src/pytaku/persistence.py
@@ -121,7 +121,7 @@ def save_chapter(chapter):
         {
             "id": chapter["id"],
             "title_id": chapter["title_id"],
-            "site": "mangadex",
+            "site": chapter["site"],
             "num_major": chapter.get("num_major"),
             "num_minor": chapter.get("num_minor"),
             "name": chapter["name"],
@@ -132,14 +132,14 @@ def save_chapter(chapter):
     )
 
 
-def load_chapter(site, chapter_id):
+def load_chapter(site, title_id, chapter_id):
     result = run_sql(
         """
         SELECT id, title_id, num_major, num_minor, name, pages, groups, is_webtoon
         FROM chapter
-        WHERE id = ? AND site=?;
+        WHERE site=? AND title_id=? AND id=?;
         """,
-        (chapter_id, site),
+        (site, title_id, chapter_id),
     )
     if not result:
         return None
diff --git a/src/pytaku/source_sites.py b/src/pytaku/source_sites.py
index 44577b6..71a0d98 100644
--- a/src/pytaku/source_sites.py
+++ b/src/pytaku/source_sites.py
@@ -26,8 +26,8 @@ def _get_site(name):
     return site
 
 
-def get_chapter(site_name, chapter_id):
-    return _get_site(site_name).get_chapter(chapter_id)
+def get_chapter(site_name, title_id, chapter_id):
+    return _get_site(site_name).get_chapter(title_id, chapter_id)
 
 
 def get_title(site_name, title_id):
@@ -51,4 +51,7 @@ def search_title_all_sites(query):
     Returns dict in the form of {site_name: List[Title]}
     I should really look into proper type annotations huh.
     """
-    return {site_name: search_title(site_name, query) for site_name in ("mangasee",)}
+    return {
+        site_name: search_title(site_name, query)
+        for site_name in ("mangasee", "mangadex")
+    }
diff --git a/src/pytaku/templates/chapter.html b/src/pytaku/templates/chapter.html
index 05248bd..5ad18fa 100644
--- a/src/pytaku/templates/chapter.html
+++ b/src/pytaku/templates/chapter.html
@@ -1,14 +1,14 @@
 {% extends 'base.html' %}
 
 {% block title %}
-Ch.{{ num_major }}{% if num_minor %}.{{ num_minor }}{% endif %}
+Chapter {{ num_major }}{% if num_minor %}.{{ num_minor }}{% endif %}
 {% if name %} - {{ name }}{% endif %}
 {% endblock %}
 
 {% block head %}
 
 {% if next_chapter %}
-<link rel="prefetch" href="{{ url_for('chapter_view', site=site, chapter_id=next_chapter['id'])}}">
+<link rel="prefetch" href="{{ url_for('chapter_view', site=site, title_id=title_id, chapter_id=next_chapter['id'])}}">
 {% endif %}
 
 <style>
@@ -61,7 +61,7 @@ <h1>{{ self.title() }}</h1>
 {% block buttons %}
 <div class="buttons">
   {% if prev_chapter %}
-  {{ ibutton(href=url_for('chapter_view', site=site, chapter_id=prev_chapter['id']), left_icon='chevrons-left', text='Prev') }}
+  {{ ibutton(href=url_for('chapter_view', site=site, title_id=title_id, chapter_id=prev_chapter['id']), left_icon='chevrons-left', text='Prev') }}
   {% else %}
   {{ ibutton(left_icon='chevrons-left', text='Prev', disabled=True) }}
   {% endif %}
@@ -70,7 +70,7 @@ <h1>{{ self.title() }}</h1>
 
   {% if next_chapter %}
 
-    {% set next_url = url_for('chapter_view', site=site, chapter_id=next_chapter['id']) %}
+    {% set next_url = url_for('chapter_view', site=site, title_id=title_id, chapter_id=next_chapter['id']) %}
     {% if session['user'] %}
       {% set next_url = next_url + '?has_read=' + id %}
     {% endif %}
diff --git a/src/pytaku/templates/follows.html b/src/pytaku/templates/follows.html
index ee57dc9..ec88dde 100644
--- a/src/pytaku/templates/follows.html
+++ b/src/pytaku/templates/follows.html
@@ -84,7 +84,7 @@
     <a class="more chapter" href="{{ title_url }}">and {{ title['chapters']|length - 4 }} more...</a>
     {% endif %}
     {% for ch in title['chapters'][-4:] %}
-    <a class="chapter" href="{{ url_for('chapter_view', site=title['site'], chapter_id=ch['id']) }}">
+    <a class="chapter" href="{{ url_for('chapter_view', site=title['site'], title_id=title['id'], chapter_id=ch['id']) }}">
       Chapter {{ ch['num_major'] }}{% if ch['num_minor'] %}.{{ ch['num_minor'] }}{% endif %}
               {% if ch['volume'] %}Volume {{ ch['volume'] }} {% endif %}
               {% if ch['name'] %} - {{ ch['name'] }}{% endif %}
diff --git a/src/pytaku/templates/title.html b/src/pytaku/templates/title.html
index e39ea1d..b5a7b2c 100644
--- a/src/pytaku/templates/title.html
+++ b/src/pytaku/templates/title.html
@@ -59,7 +59,7 @@ <h1>{{ name }}</h1>
     </td>
     {% endif %}
     <td>
-      <a href="{{ url_for('chapter_view', chapter_id=chapter['id'], site=site) }}">
+      <a href="{{ url_for('chapter_view', chapter_id=chapter['id'], title_id=id, site=site) }}">
         Chapter {{ chapter['number'] }}
         {% if chapter['volume'] %}Volume {{ chapter['volume'] }} {% endif %}
         {% if chapter['name'] %}- {{ chapter['name'] }} {% endif %}