Repos / mcross / 60a1987a07
commit 60a1987a07c0ad0108eb09bd924b914f61e9f4e8
Author: Bùi Thành Nhân <hi@imnhan.com>
Date:   Thu May 14 14:36:56 2020 +0700

    document parser
    
    next step: actually render pretty text based on node type

diff --git a/README.md b/README.md
index 448020b..4e04916 100644
--- a/README.md
+++ b/README.md
@@ -29,3 +29,8 @@ ## Forces gemini:// in request
 
 Spec says protocol part is optional, but if I omit that one the server will
 respond with `53 No proxying to other hosts!`.
+
+## Newline
+
+Spec says a newline should be \r\n but the server running
+gemini.circumlunar.space just uses \n every time.
diff --git a/src/mccross/document.py b/src/mccross/document.py
new file mode 100644
index 0000000..e27d4ad
--- /dev/null
+++ b/src/mccross/document.py
@@ -0,0 +1,81 @@
+import re
+
+NEWLINE = "\n"
+LINK_LINE_PATTERN = re.compile(r"^=>[ \t]+(\S+)([ \t]+(.+))?$")
+
+
+class GeminiNode:
+    text: str
+
+    def __init__(self, text):
+        self.text = text
+
+    def __repr__(self):
+        return f"{self.__class__.__name__}: {self.text.__repr__()}"
+
+
+class TextNode(GeminiNode):
+    pass
+
+
+class LinkNode(GeminiNode):
+    url: str
+    name: str
+
+    def __init__(self, text, url, name):
+        self.text = text
+        self.url = url
+        self.name = name
+
+    def __repr__(self):
+        result = f"{self.__class__.__name__}: {self.url.__repr__()}"
+        if self.name:
+            result += f" {self.name.__repr__()}"
+        return result
+
+
+class PreformattedNode(GeminiNode):
+    pass
+
+
+def parse(text):
+    """
+    Naive one-pass parser.
+    """
+    nodes = []
+    preformatted = None
+
+    for line in text.strip().split(NEWLINE):
+
+        if line == "```":
+            if preformatted is None:
+                # start preformatted mode
+                preformatted = ""
+            else:
+                nodes.append(PreformattedNode(preformatted))
+                preformatted = None
+
+        elif preformatted is not None:
+            if len(preformatted) > 0:
+                preformatted += "\n"
+            preformatted += line
+
+        elif line.startswith("=> "):
+            match = LINK_LINE_PATTERN.match(line)
+            if not match:
+                nodes.append(TextNode(line))
+                continue
+            url = match.group(1)
+            name = match.group(3)  # may be None
+            nodes.append(LinkNode(text=line, url=url, name=name))
+
+        else:
+            nodes.append(TextNode(line))
+
+    return nodes
+
+
+def test():
+    return parse(
+        """# Project Gemini\n\n## Overview\n\nGemini is a new internet protocol which:\n\n* Is heavier than gopher\n* Is lighter than the web\n* Will not replace either\n* Strives for maximum power to weight ratio\n* Takes user privacy very seriously\n\n## Resources\n\n=> docs/\tGemini documentation\n=> software/\tGemini software\n=> servers/\tKnown Gemini servers\n=> gemini://gus.guru/\tGemini Universal Search engine\n=> https://lists.orbitalfox.eu/listinfo/gemini\tGemini mailing list\n=> https://portal.mozz.us/?url=gemini%3A%2F%2Fgemini.circumlunar.space%2F&fmt=fixed\tGemini-to-web proxy service\n=> https://proxy.vulpes.one/gemini/gemini.circumlunar.space\tAnother Gemini-to-web proxy service\n=> gemini://gemini.conman.org/test/torture/\tGemini client torture test\n\n## Geminispace aggregator (experimental!)\n\n=> capcom/\tCAPCOM\n\n## Free Gemini hosting\n\n=> users/\tUsers with Gemini content on this server\n```\nfooo\nbar\n```\nBye.\n"""
+    )
diff --git a/src/mccross/gui/controller.py b/src/mccross/gui/controller.py
index a49c79d..b040467 100644
--- a/src/mccross/gui/controller.py
+++ b/src/mccross/gui/controller.py
@@ -22,18 +22,20 @@ def go_callback(self, url: str):
         # TODO url validation
 
         print("Requesting", url)
-
         resp = transport.get(url)
+        print("Received", resp)
+
         if resp.status.startswith("2"):
-            self.model.plaintext = resp.body.decode()
+            self.model.update_content(resp.body.decode())
         else:
-            self.model.plaintext = "\n".join(
-                [
-                    "Error:",
-                    f"{resp.status} {resp.meta}",
-                    resp.body.decode() if resp.body else "",
-                ]
+            self.model.update_content(
+                "\n".join(
+                    [
+                        "Error:",
+                        f"{resp.status} {resp.meta}",
+                        resp.body.decode() if resp.body else "",
+                    ]
+                )
             )
 
-        print("Received", resp)
         self.view.render_page()
diff --git a/src/mccross/gui/model.py b/src/mccross/gui/model.py
index b1472d0..d351890 100644
--- a/src/mccross/gui/model.py
+++ b/src/mccross/gui/model.py
@@ -1,2 +1,14 @@
+from .. import document
+
+
 class Model:
     plaintext = "Nothing to see here... yet."
+    gemini_nodes = None
+
+    def update_content(self, plaintext):
+        self.plaintext = plaintext
+        self.gemini_nodes = []
+        try:
+            self.gemini_nodes = document.parse(plaintext)
+        except Exception:
+            print("Invalid gemini document!")
diff --git a/src/mccross/gui/view.py b/src/mccross/gui/view.py
index e60604f..8625556 100644
--- a/src/mccross/gui/view.py
+++ b/src/mccross/gui/view.py
@@ -101,4 +101,10 @@ def _on_go(self, ev=None):
 
     def render_page(self):
         self.text.delete("1.0", "end")
-        self.text.insert("end", self.model.plaintext)
+
+        if not self.model.gemini_nodes:
+            self.text.insert("end", self.model.plaintext)
+        else:
+            self.text.insert(
+                "end", "\n".join(str(node) for node in self.model.gemini_nodes)
+            )