From 59b75c01e877cccb0af31ba0981baad177dd7c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Madeleine=20Sydney=20=C5=9Alaga?= Date: Sun, 8 Mar 2026 20:06:02 -0600 Subject: [PATCH] fix: latex-environment belongs to paragraph --- doerg/doerg-parser/index.js | 6 +- .../net/deertopia/doerg/preview-template.tex | 6 ++ doerg/src/net/deertopia/doerg/element.clj | 84 +++++++++++++++++-- .../test/net/deertopia/doerg/element_test.clj | 41 +++++++++ .../paragraph-ending-with-latex.org | 7 ++ .../paragraph-surrounding-latex.org | 7 ++ .../paragraph-with-multiple-latex.org | 24 ++++++ 7 files changed, 168 insertions(+), 7 deletions(-) create mode 100644 doerg/test/net/deertopia/doerg/element_test/paragraph-ending-with-latex.org create mode 100644 doerg/test/net/deertopia/doerg/element_test/paragraph-surrounding-latex.org create mode 100644 doerg/test/net/deertopia/doerg/element_test/paragraph-with-multiple-latex.org diff --git a/doerg/doerg-parser/index.js b/doerg/doerg-parser/index.js index 78b82f6..d3623e9 100755 --- a/doerg/doerg-parser/index.js +++ b/doerg/doerg-parser/index.js @@ -2,13 +2,17 @@ const { parse } = require ("uniorg-parse/lib/parser.js"); +const opts = { + trackPosition: true +} + async function main () { const chunks = [] for await (const chunk of process.stdin) { chunks.push (chunk) } const orgText = Buffer.concat (chunks).toString ("utf8") - process.stdout.write (JSON.stringify (parse (orgText))) + process.stdout.write (JSON.stringify (parse (orgText, opts))) } main () diff --git a/doerg/resources/net/deertopia/doerg/preview-template.tex b/doerg/resources/net/deertopia/doerg/preview-template.tex index 46dcd51..930fa44 100644 --- a/doerg/resources/net/deertopia/doerg/preview-template.tex +++ b/doerg/resources/net/deertopia/doerg/preview-template.tex @@ -48,6 +48,12 @@ \newcommand{\optic}[3]{\opticname{#1}^\prime\;#2\;#3} \newcommand{\Optic}[5]{\opticname{#1}\;#2\;#3\;#4\;#5} +% Default uses arrow glyphs from the active font, which are kinda ugly in the +% case of Plex. +\tikzcdset{ + arrow style=tikz +} + \begin{document} \setlength\abovedisplayskip{0pt} % Remove padding before equation environments. %% \color[rgb]{0.000,0.000,0.004}\special{dvisvgm:currentcolor on}\setcounter{equation}{0}% diff --git a/doerg/src/net/deertopia/doerg/element.clj b/doerg/src/net/deertopia/doerg/element.clj index 1ee3443..75e9604 100644 --- a/doerg/src/net/deertopia/doerg/element.clj +++ b/doerg/src/net/deertopia/doerg/element.clj @@ -10,7 +10,10 @@ [spec-dict.main :refer [dict]] [net.deertopia.doerg.config :as cfg] [com.rpl.specter :as sp] - [clojure.tools.logging.readable :as lr]) + [clojure.tools.logging.readable :as lr] + [clojure.zip :as z] + [com.rpl.specter.zipper :as sz] + [clojure.core.match :refer [match]]) (:import (java.util UUID)) (:refer-clojure :exclude [read-string])) @@ -34,14 +37,16 @@ (if (zero? (:exit r)) (-> r :out (json/parse-string (comp keyword camel->kebab)))))) -(declare gather-first-section) +(declare gather-first-section gather-latex-paragraphs) -(defn read-string [s & {:keys [post-processors] - :or {post-processors [gather-first-section]}}] +(defn read-string + [s & {:keys [post-processors] + :or {post-processors [gather-first-section + gather-latex-paragraphs]}}] (let [apply-post-processors (apply comp (reverse post-processors))] (with-in-str s - (-> (uniorg :in *in*) - apply-post-processors)))) + (-> (uniorg :in *in*) + apply-post-processors)))) @@ -208,3 +213,70 @@ :children first-section-nodes}) rest)] (assoc node :children new-children))) + +(defn separated-by-explicit-paragraph-break? + "Returh truthy if each successive pair of elements is separated by + at least one explicit paragraph break; i.e. a blank line." + [& elements] + (match elements + [e₁ e₂ & es] + (and (< (-> e₁ :position :end :line) + (-> e₂ :position :start :line)) + (recur es)) + :else true)) + +(defn swallow + ([predator prey] + (assert (greater-element? predator)) + (-> predator + (update :children #(conj % prey)) + (assoc-in [:position :end] (-> prey :position :end)))) + ([predator prey & more-prey] + (reduce swallow predator (cons prey more-prey)))) + +(comment + (-> [1 2 3 4] + (neighbourly-mapcat prn) ) + (def doc (read-string (slurp some-org-file))) + + (let [r (atom []) + blah] + @r)) + +(defn gather-latex-paragraphs [node] + (->> node + (sp/transform + [postorder-walker (sp/must :children)] + (fn [children] + (loop [acc [] + cs (vec children)] + (match cs + ;; CASE: A paragraph followed by a LaTeX environment + ;; followed by a paragraph. If there are no blank lines + ;; separating the three elements, absorb them into a + ;; single paragraph spanning the sum of their parts. + ([(para₁ :guard #(of-type? % "paragraph")) + (tex :guard #(of-type? % "latex-environment")) + (para₂ :guard #(of-type? % "paragraph")) + & rest] + :guard #(apply separated-by-explicit-paragraph-break? %)) + (recur (conj acc + ;; Swallow para₂'s /children/, + ;; not para₂ itself. Nested + ;; paragraphs are not supported + ;; by HTML. + (apply swallow para₁ tex (:children para₂))) + rest) + ;; CASE: A paragraph followed by a LaTeX environment. + ;; If there are no blank lines separating the paragraph + ;; from the LaTeX environment, the LaTeX environment + ;; shall become a child of the paragraph. + ([(para :guard #(of-type? % "paragraph")) + (tex :guard #(of-type? % "latex-environment")) + & rest] + :guard #(apply separated-by-explicit-paragraph-break? %)) + (recur (conj acc (swallow para tex)) rest) + ;; CASE: Irrelevant or empty! + [c & rest] + (recur (conj acc c) rest) + [] acc)))))) diff --git a/doerg/test/net/deertopia/doerg/element_test.clj b/doerg/test/net/deertopia/doerg/element_test.clj index 3406bb1..d1288da 100644 --- a/doerg/test/net/deertopia/doerg/element_test.clj +++ b/doerg/test/net/deertopia/doerg/element_test.clj @@ -49,3 +49,44 @@ first-paragraph-belongs-to-first-section?)) (t/is (not (-> (parse-resource "first-paragraph-under-heading.org") first-paragraph-belongs-to-first-section?))))) + +(defn walk-types [type & types] + [sut/postorder-walker #(apply sut/of-type? % type types)]) + +(defn headline-matches? [re] + ()) + +(defn- paragraph-ends-with-latex? [doc] + (-> (sp/select-first [(walk-types "paragraph") + (sp/must :children) + sp/LAST] + doc) + (sut/of-type? "latex-environment"))) + +(defn- paragraph-has-latex? [doc] + (sp/select-first [(walk-types "paragraph") + (sp/must :children) + sp/ALL + #(sut/of-type? % "latex-environment")] + doc)) + +(defn- paragraph-has-multiple-latex? [doc] + (let [[interleaved fenceposted] + (sp/select [(walk-types "section") + (sp/must :children) + #(some-> % first (sut/of-type? "headline")) + (sp/subselect (sp/view #(drop 1 %)) + sp/ALL (sp/must :children) sp/ALL + (sp/must :type))] + doc)])) + +(t/deftest paragraph-separation + (t/testing "paragraph ending with latex" + (-> (parse-resource "paragraph-ending-with-latex.org") + paragraph-ends-with-latex?)) + (t/testing "paragraph surrounding latex" + (-> (parse-resource "paragraph-surrounding-latex.org") + paragraph-has-latex?)) + (t/testing "paragraph with interleaved latex" + (let [(parse-resource "paragraph-with-multiple-latex.org")]) + (t/is (-)))) diff --git a/doerg/test/net/deertopia/doerg/element_test/paragraph-ending-with-latex.org b/doerg/test/net/deertopia/doerg/element_test/paragraph-ending-with-latex.org new file mode 100644 index 0000000..8f1eebb --- /dev/null +++ b/doerg/test/net/deertopia/doerg/element_test/paragraph-ending-with-latex.org @@ -0,0 +1,7 @@ +#+title: paragraph ending with latex + +here is the paragraph, +\begin{align*} +\text{and here} & +\\ & \text{is the \LaTeX} +\end{align*} diff --git a/doerg/test/net/deertopia/doerg/element_test/paragraph-surrounding-latex.org b/doerg/test/net/deertopia/doerg/element_test/paragraph-surrounding-latex.org new file mode 100644 index 0000000..cdffa8f --- /dev/null +++ b/doerg/test/net/deertopia/doerg/element_test/paragraph-surrounding-latex.org @@ -0,0 +1,7 @@ +#+title: paragraph surrounding latex + +first part of paragraph +\begin{equation*} +\text{some \LaTeX \}:)} +\end{equation*} +last part of paragraph diff --git a/doerg/test/net/deertopia/doerg/element_test/paragraph-with-multiple-latex.org b/doerg/test/net/deertopia/doerg/element_test/paragraph-with-multiple-latex.org new file mode 100644 index 0000000..99b96bc --- /dev/null +++ b/doerg/test/net/deertopia/doerg/element_test/paragraph-with-multiple-latex.org @@ -0,0 +1,24 @@ +#+title: paragraph with multiple latex environments + +* interleaved + +first part of paragraph +\begin{equation*} +\text{first \LaTeX\ environment} +\end{equation*} +second part of paragraph +\begin{equation*} +\text{second \LaTeX\ environment} +\end{equation*} + +* fenceposted + +first fencepost +\begin{equation*} +\text{first fenceposted \LaTeX\ environment} +\end{equation*} +second fencepost +\begin{equation*} +\text{second fenceposted \LaTeX\ environment} +\end{equation*} +third fencepost