From 49990228c9664908bccb610bc2ffb5c44f914102 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Madeleine=20Sydney=20=C5=9Alaga?= Date: Sun, 22 Feb 2026 19:01:12 -0700 Subject: [PATCH] feat: xelatex rendering --- doerg/doerg-tex/deps.edn | 6 + doerg/doerg-tex/deserialise.clj | 5 + doerg/doerg-tex/index.js | 2 +- doerg/doerg-tex/serialise.clj | 3 + .../resources/net/deertopia/doerg/prelude.tex | 1 + .../net/deertopia/doerg/preview-template.tex | 11 + .../net/deertopia/doerg/tuftesque.css | 4 + doerg/src/net/deertopia/doerg/common.clj | 109 +++++++++- doerg/src/net/deertopia/doerg/render.clj | 140 ++++++++---- doerg/src/net/deertopia/doerg/repl.clj | 6 +- doerg/src/net/deertopia/doerg/tex.clj | 199 ++++++++++++++++-- 11 files changed, 423 insertions(+), 63 deletions(-) create mode 100644 doerg/doerg-tex/deps.edn create mode 100644 doerg/doerg-tex/deserialise.clj create mode 100644 doerg/resources/net/deertopia/doerg/preview-template.tex diff --git a/doerg/doerg-tex/deps.edn b/doerg/doerg-tex/deps.edn new file mode 100644 index 0000000..65b9cb6 --- /dev/null +++ b/doerg/doerg-tex/deps.edn @@ -0,0 +1,6 @@ +{:deps {babashka/fs {:mvn/version "0.5.24"} + cheshire/cheshire {:mvn/version "6.1.0"} + com.rpl/specter {:mvn/version "1.1.6"} + mvxcvi/clj-cbor {:mvn/version "1.1.1"} + babashka/process {:mvn/version "0.6.25"}} + :paths ["." "classes"]} diff --git a/doerg/doerg-tex/deserialise.clj b/doerg/doerg-tex/deserialise.clj new file mode 100644 index 0000000..cb0011c --- /dev/null +++ b/doerg/doerg-tex/deserialise.clj @@ -0,0 +1,5 @@ +(ns deserialise + (:require [clj-cbor.core :as cbor] + [clojure.string :as str])) + +(prn (cbor/decode cbor/default-codec System/in :eof)) diff --git a/doerg/doerg-tex/index.js b/doerg/doerg-tex/index.js index de97de9..ea6e854 100755 --- a/doerg/doerg-tex/index.js +++ b/doerg/doerg-tex/index.js @@ -34,7 +34,7 @@ function do_command (cmd) { return null } } catch (e) { - return e + return {type: "error", error: e} } } diff --git a/doerg/doerg-tex/serialise.clj b/doerg/doerg-tex/serialise.clj index 1c8e9dd..417a648 100644 --- a/doerg/doerg-tex/serialise.clj +++ b/doerg/doerg-tex/serialise.clj @@ -8,8 +8,11 @@ (defn c [x] (->> x cbor/encode (map #(format "%02x" %)) (str/join " "))) +#_ (w "\\naturalto") +(w "\\ifxetex blah \\fi") + #_#_#_ (w "c = \\sqrt{a^2 + y^2}") (w "c = \\sqrt{a^ + y^2") diff --git a/doerg/resources/net/deertopia/doerg/prelude.tex b/doerg/resources/net/deertopia/doerg/prelude.tex index a0cb5b4..37a3084 100644 --- a/doerg/resources/net/deertopia/doerg/prelude.tex +++ b/doerg/resources/net/deertopia/doerg/prelude.tex @@ -141,6 +141,7 @@ \newcommand{\definedto}{≔} \newcommand{\equivto}{\simeq} \newcommand{\homotopicto}{\sim} +\newcommand{\homotopyto}{\sim} \newcommand{\naturalto}{\Rightarrow} \newcommand{\isoto}{\cong} \newcommand{\monicto}{\rightarrowtail} diff --git a/doerg/resources/net/deertopia/doerg/preview-template.tex b/doerg/resources/net/deertopia/doerg/preview-template.tex new file mode 100644 index 0000000..9ecc15a --- /dev/null +++ b/doerg/resources/net/deertopia/doerg/preview-template.tex @@ -0,0 +1,11 @@ +\documentclass{article} +\usepackage{amsmath} +\usepackage[active,tightpage,auctex,dvips]{preview} +\usepackage{fontspec} +\usepackage{ifxetex} +\usepackage{syd-plex} + +\begin{document} +\setlength\abovedisplayskip{0pt} +% {{contents}} +\end{document} diff --git a/doerg/resources/net/deertopia/doerg/tuftesque.css b/doerg/resources/net/deertopia/doerg/tuftesque.css index 31925bd..f09c4af 100644 --- a/doerg/resources/net/deertopia/doerg/tuftesque.css +++ b/doerg/resources/net/deertopia/doerg/tuftesque.css @@ -542,3 +542,7 @@ figure.fullwidth figcaption { ; max-width: 55% ; font-size: 1.5rem } + +.latex-fragment +{ fill: currentColor +} diff --git a/doerg/src/net/deertopia/doerg/common.clj b/doerg/src/net/deertopia/doerg/common.clj index cc1b4eb..8cbdc1c 100644 --- a/doerg/src/net/deertopia/doerg/common.clj +++ b/doerg/src/net/deertopia/doerg/common.clj @@ -1,6 +1,12 @@ (ns net.deertopia.doerg.common (:require [babashka.process :as p] - [clojure.string :as str])) + [clojure.string :as str] + [clojure.tools.logging :as l] + [clojure.java.io :as io]) + (:import (java.io FilterInputStream StringWriter InputStream + OutputStream PrintStream ByteArrayOutputStream + ByteArrayInputStream FilterOutputStream) + (java.nio.charset StandardCharsets))) (defn deref-with-timeout [process ms] (let [p (promise) @@ -16,3 +22,104 @@ {:process process :timed-out-after-milliseconds ms})) @p))) + +(defn tee-input-stream + "Return a wrapped `InputStream` that writes all bytes read from + input-stream to sink, à la the UNIX command tee(1)." + [input-stream sink] + (proxy [FilterInputStream] [input-stream] + (read + ([] + (let [c (proxy-super read)] + (when (not= c -1) + (.write sink c)) + c)) + ([^bytes bs] + (let [n (proxy-super read bs)] + (when (not= n -1) + (.write sink bs 0 n)) + n)) + ([^bytes bs off len] + (let [n (proxy-super read bs off len)] + (when (not= n -1) + (.write sink bs off n)) + n))) + (close [] + (try (proxy-super close) + (finally (.close sink)))))) + +(defn tee-output-stream + "Return a wrapped `OutputStream` that writes all bytes written to + output-stream to sink, à la the UNIX command tee(1)." + [output-stream sink] + (proxy [FilterOutputStream] [output-stream] + (write + ([bs-or-b] + (proxy-super write bs-or-b) + (.write sink bs-or-b)) + ([^bytes bs off len] + (proxy-super write bs off len) + (.write sink bs off len))) + (close [] + (try (proxy-super close) + (finally (.close sink)))))) + +#_ +(defn hook-input-stream [input-stream hook] + (proxy [FilterInputStream] [input-stream] + (read + ([] + (let [c (proxy-super read)] + (when (not= c -1) + (hook (byte-array [c]))) + c)) + ([^bytes bs] + (let [n (proxy-super read bs)] + (when (not= n -1) + (let [bs* (byte-array n 0)] + (System/arraycopy bs 0 bs* 0 n) + (hook bs*))) + n)) + ([^bytes bs off len] + (let [n (proxy-super read bs off len)] + (when (not= n -1) + (.write sink bs off n)) + n))) + (close [] + (try (proxy-super close) + (finally (.close sink)))))) + +(comment + (with-open [sink (ByteArrayOutputStream.) + out (ByteArrayOutputStream.) + in (ByteArrayInputStream. (.getBytes "hello worms"))] + (io/copy (tee-input-stream in sink) out) + (def the-out out) + (def the-sink sink) + {:out out + :sink sink}) + (with-open [sink (l/log-stream :info "blah") + out (ByteArrayOutputStream.) + in (ByteArrayInputStream. (.getBytes "hello worms"))] + (io/copy (tee-input-stream in sink) out) + (def the-out out) + (def the-sink sink) + {:out out + :sink sink})) + +(comment + (let [out (ByteArrayOutputStream.)] + (p/shell {:out (tee-output-stream + out (l/log-stream :info "blah"))} + "echo" "hello\n" "worms") + (.toString out))) + +(defn invoke [opts & cmd] + (l/info (str/join " " (cons "$" cmd))) + (let [r (apply p/shell + (merge {:continue true + :in nil :out :string :err :string} + opts) + cmd) + bin (first cmd)] + r)) diff --git a/doerg/src/net/deertopia/doerg/render.clj b/doerg/src/net/deertopia/doerg/render.clj index 77de4e6..3b78d1c 100644 --- a/doerg/src/net/deertopia/doerg/render.clj +++ b/doerg/src/net/deertopia/doerg/render.clj @@ -10,7 +10,8 @@ [hiccup2.core :as hiccup] [clojure.pprint] [net.deertopia.doerg.tex :as tex] - [clojure.zip :as z])) + [clojure.zip :as z] + [babashka.fs :as fs])) ;;; Top-level API @@ -40,33 +41,33 @@ (def ^:dynamic ^:private *document-info*) (declare ^:private gather-footnotes render-renderer-error - view-children-as-seq) + view-children-as-seq render-tex-snippets) (defn org-element-recursive "Recursively render an Org-mode element to Hiccup." [e] - (tex/binding-tex-worker - (->> e - ;; gather-footnotes - (sp/transform - [element/postorder-walker view-children-as-seq] - (fn [node] - (try (org-element node) - (catch Throwable e - (lr/error e "Error in renderer" {:node node}) - (render-renderer-error e)))))))) + (->> e + (sp/transform + [element/postorder-walker view-children-as-seq] + (fn [node] + (try (org-element node) + (catch Throwable e + (lr/error e "Error in renderer" {:node node}) + (render-renderer-error e))))))) (defn org-document "Recursively render an Org-mode document to Hiccup." [doc] - (let [rendered (org-element-recursive (gather-footnotes doc))] - [:html - [:head - [:title "org document"] - doerg-html/head] - [:body - [:article - rendered]]])) + (tex/binding-temml-worker + (let [rendered (-> doc gather-footnotes render-tex-snippets + org-element-recursive)] + [:html + [:head + [:title "org document"] + doerg-html/head] + [:body + [:article + rendered]]]))) ;;; Further dispatching on `org-element` @@ -90,17 +91,6 @@ (sp/view #(update % :children seq)) sp/STAY)) -#_ -(defn- gather-footnotes [doc] - (->> doc - (sp/select - [element/children-walker element/footnotes-section? - element/children-walker - #(element/of-type? % "footnote-definition") - (sp/view (fn [d] - {(:label d) d}))]) - (apply merge))) - (defn- contains-footnote-refs? [node] (some #(element/of-type? % "footnote-reference") (:children node))) @@ -137,6 +127,79 @@ element/footnotes-section?] sp/NONE)))) +(defn- collect-latex-headers [doc] + (->> doc + (sp/select + [element/postorder-walker + #(element/of-keyword-type? % "LATEX_HEADER") + (sp/view :value)]))) + +(defn- read-and-patch-generated-svg [{:keys [file height depth]}] + ;; dvisvgm writes standalone SVG files, to which we need to make a + ;; few changes to use them inline within our HTML. + ;; • XML header: Bad syntax when embedded in an HTML doc. Remove + ;; it. + ;; • Width and height: We override these with our own values + ;; computed by `net.deertopia.doerg.tex` to ensure correct + ;; positioning relative to the surrounding text. More + ;; accurately, we remove the height and width attributes from + ;; the SVG tag, and set the new values for height and + ;; vertical-align in the style attribute + ;; • Viewbox: Must be removed entirely for correct positioning. + (-> (slurp file) + (str/replace-first "" "") + (str/replace-first #" height=['\"][^\"']+[\"']" "") + (str/replace-first #" width=['\"][^\"']+[\"']" "") + (str/replace-first + #"viewBox=['\"][^\"']+[\"']" + (fn [s] + (format "%s style=\"%s\"" + s + (format "height:%.4fem;vertical-align:%.4fem;display:inline-block" + height (- depth))))))) + +(defn- render-tex-snippets [doc] + (let [promises (atom []) + r (->> doc (sp/transform + [element/postorder-walker + #(element/of-type? + % "latex-fragment" "latex-environment")] + (fn [node] + (let [p (promise)] + (swap! promises #(conj % {:promise p :node node})) + (assoc node ::rendered p))))) + f (fn [] + (fs/with-temp-dir [svg-dir {:prefix "doerg-svg"}] + (let [rendered-snippets + (delay (->> @promises + (map #(-> % :node :value)) + (apply tex/render-xelatex svg-dir)))] + (def the-rendered-snippets rendered-snippets) + (doseq [{:keys [promise node]} @promises] + (try (let [{:keys [value contents]} node + temml (tex/render-temml (or contents value))] + (if (tex/erroneous-temml-output? temml) + (let [tex (get @rendered-snippets value)] + (if (:errors tex) + (deliver promise (hiccup/raw temml)) + (->> tex + read-and-patch-generated-svg + hiccup/raw + (deliver promise)))) + (deliver promise (hiccup/raw temml)))) + (catch Exception e + (prn e) + (flush) + (throw e)))) + (when (fs/exists? "/tmp/doerg-svgs") + (fs/delete-tree "/tmp/doerg-svgs")) + (fs/copy-tree svg-dir "/tmp/doerg-svgs"))))] + (future-call (bound-fn* f)) + r)) + +(comment + (render-tex-snippets doc)) + (defn- render-pprint @@ -280,15 +343,12 @@ (str "@" key)) (defmethod org-element "latex-fragment" [{:keys [contents value] :as e}] - (let [display? (str/starts-with? value "\\[")] - #_ - (render-pprint (assoc e :display? display?)) - [:span.latex-fragment - (hiccup/raw (tex/render contents :display? display?))])) - -(defmethod org-element "latex-environment" [{:keys [value]}] [:span.latex-fragment - (hiccup/raw (tex/render value :display? true))]) + (-> e ::rendered (deref #_#_ 2000 "«timed out»"))]) + +(defmethod org-element "latex-environment" [{:keys [value] :as e}] + [:span.latex-fragment + (-> e ::rendered (deref #_#_ 2000 "«timed out»"))]) (defmethod org-element "example-block" [{:keys [value]}] [:pre value]) @@ -307,7 +367,6 @@ ;; Completely ignore the LATEX_COMPILER keyword. (defmethod org-keyword "LATEX_COMPILER" [_] nil) -;; TODO: Real LatEx support. (defmethod org-keyword "LATEX_HEADER" [_] nil) ;; Not sure how to deal with this one yet. @@ -330,4 +389,3 @@ [:span.org-link.external [:a {:href raw-link} (or (seq children) raw-link)]]) - diff --git a/doerg/src/net/deertopia/doerg/repl.clj b/doerg/src/net/deertopia/doerg/repl.clj index 56dfe43..eaddc35 100644 --- a/doerg/src/net/deertopia/doerg/repl.clj +++ b/doerg/src/net/deertopia/doerg/repl.clj @@ -14,7 +14,11 @@ "/home/msyds/org/20250919114912-homepage.org" #_ "/home/msyds/org/20251111182118-path_induction.org" - "/home/msyds/org/20250512144715-natural_transformation_category_theory.org") + #_ + "/home/msyds/org/20250512144715-natural_transformation_category_theory.org" + #_ + "/home/msyds/org/20251021155921-path_action.org" + "/tmp/t.org") (defn- force-create-sym-link [path target] (fs/delete-if-exists path) diff --git a/doerg/src/net/deertopia/doerg/tex.clj b/doerg/src/net/deertopia/doerg/tex.clj index e7d854f..764604c 100644 --- a/doerg/src/net/deertopia/doerg/tex.clj +++ b/doerg/src/net/deertopia/doerg/tex.clj @@ -2,48 +2,209 @@ (:require [babashka.process :as p] [net.deertopia.doerg.common :as common] [clj-cbor.core :as cbor] - [clojure.java.io :as io])) + [clojure.java.io :as io] + [clojure.string :as str] + [clojure.tools.logging :as l] + [babashka.fs :as fs]) + (:import (java.io ByteArrayOutputStream))) + +;;; XeLaTeX -(def ^:dynamic *tex-worker-timeout-duration* +(def ^:private scale-divisor 66873.46948423679) + +(def ^:private font-size 10) + +(def ^:private tightpage-regexp + #"Preview: Tightpage (-?\d+) *(-?\d+) *(-?\d+) *(-?\d+)") + +(def ^:private preview-start-regexp + #"! Preview: Snippet (\d+) started.") + +(def ^:private preview-end-regexp + #"(?:^Preview: Tightpage.*$)?\n! Preview: Snippet (\d+) ended.\((\d+)\+(\d+)x(\d+)\)") + +(defn- invoke [extra-opts & args] + (let [namespace (or (::ns extra-opts) + (first args)) + out-bytes (ByteArrayOutputStream.) + out-stream (common/tee-output-stream + out-bytes + (l/log-stream :info (str namespace "/out"))) + err-stream (l/log-stream :info (str namespace "/err")) + opts (merge extra-opts + {:out out-stream :err err-stream :continue true + :shutdown p/destroy-tree + :pre-start-fn (fn [{:keys [cmd]}] + (l/infof "$ %s" + (str/join " " cmd))) + :exit-fn (fn [{:keys [cmd exit]}] + (l/infof "%s exited w/ status %d" + (first cmd) exit))}) + r (apply p/shell opts args) + out (.toString out-bytes)] + (-> r + (assoc ::out out)))) + +(defn- parse-tightpage [xelatex-out] + (->> (re-find tightpage-regexp xelatex-out) + (drop 1) + (map parse-long))) + +(defn- snippet-dimensions [[tp1 tp2 tp3 tp4] [d1 d2 d3]] + (let [depth (/ (- d2 tp2) scale-divisor font-size)] + {:depth depth + :height (+ depth + (/ (+ d1 tp4) + scale-divisor + font-size)) + :width (/ (+ d3 tp3 (- tp2)) + scale-divisor + font-size)})) + +(defn- parse-xelatex-output [out] + (let [tightpage-info (parse-tightpage out) + m-start (re-matcher preview-start-regexp out) + m-end (re-matcher preview-end-regexp out)] + (loop [acc []] + (if-some [[_ snippet-ix] (re-find m-start)] + (let [r (re-find m-end) + [_ snippet-ix* _ _ _] r + dimensional-info (->> r (drop 2) (map parse-long)) + errors (-> out + (subs (.end m-start) (.start m-end)) + (str/replace-first #"[^!]*" "") + str/trim)] + (assert (= snippet-ix snippet-ix*)) + (recur (conj acc (-> (snippet-dimensions + tightpage-info dimensional-info) + (assoc :errors (if (empty? errors) + nil + errors)))))) + acc)))) + +(defn- invoke-xelatex [& {:keys [file output-dir]}] + (invoke + {:dir output-dir} + "xelatex" "-no-pdf" "-interaction" "nonstopmode" + "-output-directory" output-dir file)) + +;; dvisvgm --page=1- --optimize --clipjoin --relative --no-fonts -v3 --message='processing page {?pageno}: output written to {?svgpath}' --bbox=preview -o %B-%%9p.svg %f + +(defn- invoke-dvisvgm [& {:keys [file output-dir]}] + (invoke + {:dir output-dir} + "dvisvgm" "--page=1-" "--optimize" "--clipjoin" + "--relative" "--no-fonts" "-v3" + "--message=processing page {?pageno}: output written to {?svgpath}" + "--bbox=preview" "-o" "%9p.svg" file)) + +(defn- snippet-file-names + "Return a map of TeX snippets (as strings, including the math + delimiters) to file names as would be output by + `invoke-dvisvgm`. The returned file names are relative to dvisvgm's + output directory." + [snippets] + (let [svgs (for [i (range)] + (format "%09d.svg" i))] + (zipmap (reverse snippets) svgs))) + +(defn- instantiate-preview-template [snippets] + (let [contents (->> (for [s snippets] + (format "\\begin{preview}\n%s\n\\end{preview}" s)) + (str/join "\n"))] + (-> (io/resource "net/deertopia/doerg/preview-template.tex") + slurp + (str/replace-first "% {{contents}}" contents)))) + +(defn render-xelatex + "Render a collection of `snippets` to SVGs in `output-dir` using + XeLaTeX and dvisvgm. Returns a map whose keys are `snippets` and + whose values are maps containing dimensional info, a string of + errors output by XeLaTeX, and the path to the generated SVG + file. Math delimiters are *not* implicitly added to each snippet." + [output-dir & snippets] + (fs/with-temp-dir [dir {:prefix "doerg-xelatex"}] + (let [preview-tex (fs/file dir "preview.tex") + preview-xdv (fs/file dir "preview.xdv") + distinct-snippets (distinct snippets)] + (fs/create-dirs output-dir) + (->> (instantiate-preview-template distinct-snippets) + (spit preview-tex)) + (let [dimensions (-> (invoke-xelatex :output-dir dir :file preview-tex) + ::out parse-xelatex-output) + _ (invoke-dvisvgm :output-dir output-dir :file preview-xdv)] + ;; Adorn each snippet with dimensions and errors parsed from + ;; XeLaTeX's output, and the paths to SVG files generated by + ;; dvisvgm. + (assert (= (count distinct-snippets) (count dimensions))) + (->> (map (fn [ix snippet dimensions] + {snippet + (-> dimensions + (assoc + :file (fs/file output-dir + (format "%09d.svg" (inc ix)))))}) + (range) + distinct-snippets + dimensions) + (into {}))) + #_ + (do (when (fs/exists? "/tmp/doerg-tex-test") ; For debugging + (fs/delete-tree "/tmp/doerg-tex-test")) + (fs/copy-tree dir "/tmp/doerg-tex-test"))))) + +(comment + (render-xelatex "/tmp/doerg-tex-svgs" + "\\(c = \\sqrt{x^2 + y^2}\\)" + "\\(x\\)" "\\(y\\)" "\\(x\\)" + "\\(\\undefinedcommandlol\\)")) + + +;;; Temml + +(def ^:dynamic *temml-worker-timeout-duration* "Number of milliseconds to wait before killing the external Uniorg process." (* 10 1000)) -(def ^:dynamic *worker*) +(def ^:dynamic *temml-worker*) -(defn tex-worker [& {:keys [preamble]}] +(defn temml-worker [& {:keys [preamble]}] (p/process {:shutdown p/destroy-tree - :err :inherit} + :err (l/log-stream :info "temml/err")} #_"doerg-tex" "./doerg-tex/index.js" "--preamble" "resources/net/deertopia/doerg/prelude.tex")) -(defn finish [tw] +(defn close-temml-worker [tw] (.close (:in tw))) -(defmacro with-tex-worker [tw & body] - `(let [~tw (tex-worker)] +(defmacro with-temml-worker [tw & body] + `(let [~tw (temml-worker)] (try (do ~@body) (finally - (finish ~tw) + (close-temml-worker ~tw) (p/destroy-tree ~tw))))) -(defmacro binding-tex-worker [& body] - `(binding [*worker* (tex-worker)] +(defmacro binding-temml-worker [& body] + `(binding [*temml-worker* (temml-worker)] (try ~@body (finally - (finish *worker*))))) + (close-temml-worker *temml-worker*))))) -(defn command [x] - (cbor/encode cbor/default-codec (:in *worker*) x) - (.flush (:in *worker*)) - (cbor/decode cbor/default-codec (:out *worker*))) +(defn command-temml-worker [x] + (cbor/encode cbor/default-codec (:in *temml-worker*) x) + (.flush (:in *temml-worker*)) + (cbor/decode cbor/default-codec (:out *temml-worker*))) -(defn render [s & {:keys [display?]}] +(defn render-temml [s & {:keys [display?]}] (if display? - (command [s]) - (command s))) + (command-temml-worker [s]) + (command-temml-worker s))) + +;; hackky.... +(defn erroneous-temml-output? [s] + (re-find #"(#b22222|temml-error)" s))