Compare commits

...

6 Commits

Author SHA1 Message Date
777968cac5
All checks were successful
build / build (push) Successful in 1m1s
2026-03-10 14:37:34 -06:00
a35453c2be 2026-03-10 14:30:29 -06:00
e20dcf591d 2026-03-10 13:59:00 -06:00
cbfa42bc73 2026-03-10 13:27:18 -06:00
b0a3895a18 2026-03-10 13:02:37 -06:00
1ff453262d 2026-03-10 11:34:26 -06:00
5 changed files with 120 additions and 28 deletions

View File

@@ -10,7 +10,10 @@
[spec-dict.main :refer [dict]]
[net.deertopia.doerg.config :as cfg]
[com.rpl.specter :as sp]
[clojure.tools.logging.readable :as lr])
[clojure.tools.logging.readable :as lr]
[clojure.zip :as z]
[com.rpl.specter.zipper :as sz]
[clojure.core.match :refer [match]])
(:import (java.util UUID))
(:refer-clojure :exclude [read-string]))
@@ -34,14 +37,16 @@
(if (zero? (:exit r))
(-> r :out (json/parse-string (comp keyword camel->kebab))))))
(declare gather-first-section)
(declare gather-first-section gather-latex-paragraphs)
(defn read-string [s & {:keys [post-processors]
:or {post-processors [gather-first-section]}}]
(defn read-string
[s & {:keys [post-processors]
:or {post-processors [gather-first-section
gather-latex-paragraphs]}}]
(let [apply-post-processors (apply comp (reverse post-processors))]
(with-in-str s
(-> (uniorg :in *in*)
apply-post-processors))))
(-> (uniorg :in *in*)
apply-post-processors))))
@@ -209,12 +214,25 @@
rest)]
(assoc node :children new-children)))
(defn- neighbourly-mapcat [coll f]
(let [rest-coll (rest coll)]
(map f
coll
rest-coll
(concat (rest rest-coll) [nil]))))
(defn separated-by-explicit-paragraph-break?
"Returh truthy if each successive pair of elements is separated by
at least one explicit paragraph break; i.e. a blank line."
[& elements]
(match elements
[e e & es]
(and (< (-> e :position :end :line)
(-> e :position :start :line))
(recur es))
:else true))
(defn swallow
([predator prey]
(assert (greater-element? predator))
(-> predator
(update :children #(conj % prey))
(assoc-in [:position :end] (-> prey :position :end))))
([predator prey & more-prey]
(reduce swallow predator (cons prey more-prey))))
(comment
(-> [1 2 3 4]
@@ -222,22 +240,43 @@
(def doc (read-string (slurp some-org-file)))
(let [r (atom [])
blah
(sp/transform
[postorder-walker
(sp/must :children)
(sp/collect-one sp/VAL)
sp/INDEXED-VALS
#_
#(of-type? (second %) "latex-environment")
#_
sp/ALL
#_
sp/INDEXED-VALS]
(fn [siblings x]
x)
doc)]
blah]
@r))
(defn gather-latex-paragraphs [node]
())
(->> node
(sp/transform
[postorder-walker (sp/must :children)]
(fn [children]
(loop [acc []
cs (vec children)]
(match cs
;; CASE: A paragraph followed by a LaTeX environment
;; followed by a paragraph. If there are no blank lines
;; separating the three elements, absorb them into a
;; single paragraph spanning the sum of their parts.
([(para :guard #(of-type? % "paragraph"))
(tex :guard #(of-type? % "latex-environment"))
(para :guard #(of-type? % "paragraph"))
& rest]
:guard #(apply separated-by-explicit-paragraph-break? %))
(recur (conj acc
;; Swallow para₂'s /children/,
;; not para₂ itself. Nested
;; paragraphs are not supported
;; by HTML.
(apply swallow para tex (:children para)))
rest)
;; CASE: A paragraph followed by a LaTeX environment.
;; If there are no blank lines separating the paragraph
;; from the LaTeX environment, the LaTeX environment
;; shall become a child of the paragraph.
([(para :guard #(of-type? % "paragraph"))
(tex :guard #(of-type? % "latex-environment"))
& rest]
:guard #(apply separated-by-explicit-paragraph-break? %))
(recur (conj acc (swallow para tex)) rest)
;; CASE: Irrelevant or empty!
[c & rest]
(recur (conj acc c) rest)
[] acc))))))

View File

@@ -49,3 +49,32 @@
first-paragraph-belongs-to-first-section?))
(t/is (not (-> (parse-resource "first-paragraph-under-heading.org")
first-paragraph-belongs-to-first-section?)))))
(defn walk-types [type & types]
[sut/postorder-walker #(apply sut/of-type? % type types)])
(defn- paragraph-ends-with-latex? [doc]
(-> (sp/select-first [(walk-types "paragraph")
(sp/must :children)
sp/LAST]
doc)
(sut/of-type? "latex-environment")))
(defn- paragraph-has-latex? [doc]
(sp/select-first [(walk-types "paragraph")
(sp/must :children)
sp/ALL
#(sut/of-type? % "latex-environment")]
doc))
(defn- paragraph-has-multiple-latex? [doc]
(sp/select-first [(walk-types "paragraph") (sp/must :children)]
doc))
(t/deftest paragraph-separation
(t/testing "paragraph ending with latex"
(t/is (->> (parse-resource "paragraph-ending-with-latex.org")
paragraph-ends-with-latex?)))
(t/testing "paragraph surrounding latex"
(t/is (->> (parse-resource "paragraph-surrounding-latex.org")
paragraph-has-latex?))))

View File

@@ -0,0 +1,7 @@
#+title: paragraph ending with latex
here is the paragraph,
\begin{align*}
\text{and here} &
\\ & \text{is the \LaTeX}
\end{align*}

View File

@@ -0,0 +1,7 @@
#+title: paragraph surrounding latex
first part of paragraph
\begin{equation*}
\text{some \LaTeX \}:)}
\end{equation*}
last part of paragraph

View File

@@ -0,0 +1,10 @@
#+title: paragraph with multiple latex environments
first part of paragraph
\begin{equation*}
\text{first \LaTeX\ environment}
\end{equation*}
second part of paragraph
\begin{equation*}
\text{second \LaTeX\ environment}
\end{equation*}