[c55e6f4f6e] fix empty body/charset case; use regex | mcross

Repos / mcross / c55e6f4f6e
commit c55e6f4f6e9fde5335c5263f9462ef4db654b36a
Author: Bùi Thành Nhân <hi@imnhan.com>
Date:   Thu Jun 18 11:53:21 2020 +0700

    fix empty body/charset case; use regex

diff --git a/src/mcross/gui/controller.py b/src/mcross/gui/controller.py
index 0749068..de350c2 100644
--- a/src/mcross/gui/controller.py
+++ b/src/mcross/gui/controller.py
@@ -161,21 +161,23 @@ async def load_page(self, url: GeminiUrl):
         )
 
         # Support whatever encoding that python supports
-        try:
-            body_string = resp.body.decode(resp.charset)
-        except LookupError:
-            await self.put_gui_op(
-                self.model.update_content,
-                "\n".join(
-                    [
-                        "Error:",
-                        f"{resp.status} {resp.meta}",
-                        f"Unsupported charset: {resp.charset}",
-                    ]
-                ),
-                "text/plain",
-            )
-            return resp
+        body_string = ""
+        if resp.body and resp.charset:
+            try:
+                body_string = resp.body.decode(resp.charset)
+            except LookupError:
+                await self.put_gui_op(
+                    self.model.update_content,
+                    "\n".join(
+                        [
+                            "Error:",
+                            f"{resp.status} {resp.meta}",
+                            f"Unsupported charset: {resp.charset}",
+                        ]
+                    ),
+                    "text/plain",
+                )
+                return resp
 
         # Sucessfully decoded body string!
         if resp.status.startswith("2"):
@@ -185,13 +187,7 @@ async def load_page(self, url: GeminiUrl):
         else:
             await self.put_gui_op(
                 self.model.update_content,
-                "\n".join(
-                    [
-                        "Error:",
-                        f"{resp.status} {resp.meta}",
-                        body_string if resp.body else "",
-                    ]
-                ),
+                f"Error:\n{resp.status} {resp.meta}\n{body_string}",
                 "text/plain",
             )
         return resp
diff --git a/src/mcross/transport.py b/src/mcross/transport.py
index 3eb448d..aa47ab7 100644
--- a/src/mcross/transport.py
+++ b/src/mcross/transport.py
@@ -31,10 +31,19 @@ def _parse_resp_header(header, pattern=re.compile(r"^(\d\d)\s+(.{,1024})\r\n$"))
     match = pattern.match(header)
     assert match is not None, f"Malformed response header: {header}"
     status = match.group(1)
-    meta = match.group(2)
+    meta = match.group(2).strip()
     return status, meta
 
 
+def _parse_meta(meta, pattern=re.compile(r"^(\S+)\s*;\s*charset=(\S+)$")):
+    match = pattern.match(meta)
+    if not match:
+        return None, None
+    mime_type = match.group(1)
+    charset = match.group(2)
+    return mime_type, charset
+
+
 # TODO: GeminiUrl's context-aware parse() method probably doesn't belong
 # in a "transport" module.
 
@@ -138,22 +147,9 @@ async def raw_get(url: GeminiUrl):
 
         status, meta = _parse_resp_header(header.decode())
 
-        # If success, extract mime type & charset from meta
-        mime_type = None
-        charset = None
-        if status.startswith("2"):
-            if not meta:
-                mime_type = "text/gemini"
-                charset = "utf-8"
-            else:
-                meta_parts = meta.split(";")
-                mime_type = meta_parts[0].strip() or "text/gemini"
-                charset = ""
-                if len(meta_parts) == 2:
-                    charset_part = meta_parts[1].strip()
-                    if charset_part.startswith("charset="):
-                        charset = charset_part[len("charset=") :]
-                charset = charset or "utf-8"
+        mime_type, charset = _parse_meta(meta)
+        mime_type = mime_type or "text/gemini"
+        charset = charset or "utf-8"
 
         resp = Response(
             status=status, meta=meta, url=url, mime_type=mime_type, charset=charset