From 2735469b07fcea3e2906a1f8b8046752789e695e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Madeleine=20Sydney=20=C5=9Alaga?= Date: Fri, 6 Feb 2026 12:15:49 -0700 Subject: [PATCH] specter and first section --- doerg/README.org | 6 ++ doerg/deps.edn | 4 +- doerg/src/net/deertopia/doerg/element.clj | 92 ++++++++++++++----- doerg/src/net/deertopia/doerg/render.clj | 33 ++++--- doerg/src/net/deertopia/doerg/repl.clj | 13 ++- .../test/net/deertopia/doerg/element_test.clj | 35 ++++++- .../first-paragraph-under-first-section.org | 7 ++ .../first-paragraph-under-heading.org | 5 + 8 files changed, 151 insertions(+), 44 deletions(-) create mode 100644 doerg/README.org create mode 100644 doerg/test/net/deertopia/doerg/element_test/first-paragraph-under-first-section.org create mode 100644 doerg/test/net/deertopia/doerg/element_test/first-paragraph-under-heading.org diff --git a/doerg/README.org b/doerg/README.org new file mode 100644 index 0000000..dfe8ca2 --- /dev/null +++ b/doerg/README.org @@ -0,0 +1,6 @@ +#+title: Doerg specification +#+author: Guppy + +* Footnotes + +- A bunch of metadata should be read into a node of type =doerg-data= diff --git a/doerg/deps.edn b/doerg/deps.edn index 1d8c4ea..956b86c 100644 --- a/doerg/deps.edn +++ b/doerg/deps.edn @@ -5,5 +5,7 @@ babashka/process {:mvn/version "0.6.25"} io.github.msyds/spec-dict {:git/sha "531d629b7f05f37232261cf9e8927a4b5915714f"} - hiccup/hiccup {:mvn/version "2.0.0-RC4"}} + hiccup/hiccup {:mvn/version "2.0.0-RC4"} + com.rpl/specter {:mvn/version "1.1.6"} + lambdaisland/deep-diff2 {:mvn/version "2.12.219"}} :paths ["src" "resources" "test"]} diff --git a/doerg/src/net/deertopia/doerg/element.clj b/doerg/src/net/deertopia/doerg/element.clj index 16efec8..a683f8b 100644 --- a/doerg/src/net/deertopia/doerg/element.clj +++ b/doerg/src/net/deertopia/doerg/element.clj @@ -1,13 +1,14 @@ (ns net.deertopia.doerg.element (:require [babashka.process :as p] [clojure.string :as str] - [clojure.zip :as z] + [clojure.zip] [babashka.fs :as fs] [clojure.java.io :as io] [cheshire.core :as json] [clojure.spec.alpha :as s] [spec-dict.main :refer [dict]] - [net.deertopia.doerg.config :as cfg]) + [net.deertopia.doerg.config :as cfg] + [com.rpl.specter :as sp]) (:refer-clojure :exclude [read-string])) @@ -45,9 +46,14 @@ (if (zero? (:exit r)) (-> r :out (json/parse-string (comp keyword camel->kebab)))))) -(defn read-string [s] - (with-in-str s - (uniorg :in *in*))) +(declare gather-first-section) + +(defn read-string [s & {:keys [post-processors] + :or {post-processors [gather-first-section]}}] + (let [apply-post-processors (apply comp (reverse post-processors))] + (with-in-str s + (-> (uniorg :in *in*) + apply-post-processors)))) @@ -66,8 +72,15 @@ (and (map? element) (contains? element :type))) -(defn of-type? [element type] - (= (:type element) type)) +(defn of-type? + "Return truthy if the Org node `element` is of type `type`. In the + vararg case, return truthy if `element` is of any of the types + listed." + ([element type] + (= (:type element) type)) + ([element type & types] + (contains? (into #{} (cons type types)) + (:type element)))) ;;; Spec @@ -83,19 +96,54 @@ ;;; Zipper (defn doerg-zip [document] - (z/zipper greater-element? - :children - #(assoc %1 :children %2) - document)) + (clojure.zip/zipper greater-element? + :children + #(assoc %1 :children %2) + document)) -(defn cata - "Catamorphism on a zipper." - [loc f] - (let [loc* (if-some [child (z/down loc)] - (loop [current child] - (let [current* (cata current f)] - (if-some [right (z/right current*)] - (recur right) - (z/up current*)))) - loc)] - (z/replace loc* (f (z/node loc*))))) +(def children-walker + "Walk each child of an Org element." + [:children sp/ALL]) + +(def postorder-walker + "Recursively walk each node of an Org element in post-order." + (sp/recursive-path + [] p + (sp/if-path greater-element? + (sp/continue-then-stay children-walker p) + sp/STAY))) + + +;;; Post-processing + +#_ +(defn doerg-data-node? [node] + (of-type? node )) + +#_ +(defn gather-doerg-data [loc] + (assert (of-type? (z/node loc) "org-data") + "`gather-doerg-data` should be applied to the document root.") + (->> loc z/children (split-with doerg-data-node?))) + +(defn- split-sections [nodes] + (let [[of-top-level remaining-nodes] + (->> nodes (split-with #(of-type? % "property-drawer" "keyword"))) + [of-first-section remaining-nodes*] + (->> remaining-nodes (split-with #(not (of-type? % "section"))))] + {:top-level-nodes of-top-level + :first-section-nodes of-first-section + :rest remaining-nodes*})) + +(defn gather-first-section [node] + (assert (of-type? node "org-data") + "`gather-doerg-data` should be applied to the document root.") + (let [{:keys [top-level-nodes first-section-nodes rest]} + (split-sections (:children node)) + ;; TODO: Construct `:contents-begin` and `:contents-end` data + ;; by spanning the children. + new-children (concat top-level-nodes + (list {:type "section" + :children first-section-nodes}) + rest)] + (assoc node :children new-children))) diff --git a/doerg/src/net/deertopia/doerg/render.clj b/doerg/src/net/deertopia/doerg/render.clj index b49e673..2213ee0 100644 --- a/doerg/src/net/deertopia/doerg/render.clj +++ b/doerg/src/net/deertopia/doerg/render.clj @@ -4,6 +4,7 @@ [clojure.tools.logging :as l] [clojure.core.match :refer [match]] [clojure.tools.logging.readable :as lr] + [com.rpl.specter :as sp] [net.deertopia.doerg.html :as doerg-html] [clojure.pprint] [clojure.zip :as z])) @@ -35,25 +36,19 @@ (def ^:dynamic ^:private *document-info*) -(declare ^:private - gather-footnotes render-renderer-error) +(declare ^:private gather-footnotes render-renderer-error + view-children-as-seq) (defn org-element-recursive "Recursively render an Org-mode element to Hiccup." [e] - (let [loc (element/doerg-zip e)] - (-> loc - (element/cata - (fn [node] - (prn node) - (try (let [x (org-element node)] - (println "⇒" (pr-str x)) - (newline) - x) - (catch Throwable e - (lr/error e "Error in renderer" {:node node}) - (render-renderer-error e))))) - z/node))) + (->> e (sp/transform + [element/postorder-walker view-children-as-seq] + (fn [node] + (try (org-element node) + (catch Throwable e + (lr/error e "Error in renderer" {:node node}) + (render-renderer-error e))))))) (defn org-document "Recursively render an Org-mode document to Hiccup." @@ -80,6 +75,14 @@ +(def view-children-as-seq + "Specter path that converts any vectors of :children to lists so + Hiccup correctly interprets children as lists of elements rather + than a single malformed element." + (sp/if-path element/greater-element? + (sp/view #(update % :children seq)) + sp/STAY)) + (defn- gather-footnotes [loc] {}) diff --git a/doerg/src/net/deertopia/doerg/repl.clj b/doerg/src/net/deertopia/doerg/repl.clj index b6c4d63..dc59a48 100644 --- a/doerg/src/net/deertopia/doerg/repl.clj +++ b/doerg/src/net/deertopia/doerg/repl.clj @@ -4,10 +4,13 @@ [net.deertopia.doerg.config :as cfg] [clojure.java.io :as io] [hiccup2.core :as h] + [clojure.pprint] [babashka.fs :as fs])) (def some-org-file - "/home/msyds/org/20251228003307-prerequisite_context_in_korean.org") + "/home/msyds/org/20251228003307-prerequisite_context_in_korean.org" + #_ + "/home/msyds/org/20251111182118-path_induction.org") (defn- force-create-sym-link [path target] (fs/delete-if-exists path) @@ -26,3 +29,11 @@ (fs/delete-if-exists (fs/file dest "index.html")) (->> (h/html (-> src slurp element/read-string render/org-document)) str (spit (fs/file dest "index.html")))) + +(defn render-edn [& {:keys [src dest] + :or {src some-org-file + dest "/tmp/doerg-test/index.edn"}}] + (fs/create-dirs (fs/parent dest)) + (with-open [f (io/writer dest)] + (binding [*out* f] + (-> src slurp element/read-string clojure.pprint/pprint)))) diff --git a/doerg/test/net/deertopia/doerg/element_test.clj b/doerg/test/net/deertopia/doerg/element_test.clj index d2f0790..981c30b 100644 --- a/doerg/test/net/deertopia/doerg/element_test.clj +++ b/doerg/test/net/deertopia/doerg/element_test.clj @@ -3,7 +3,8 @@ [babashka.process :as p] [clojure.test :as t] [clojure.zip :as z] - [clojure.java.io :as io])) + [clojure.java.io :as io] + [com.rpl.specter :as sp])) (defn sleep-vs-timeout [& {:keys [sleep timeout]}] (sut/deref-with-timeout @@ -36,12 +37,17 @@ (defn- first-child-of-type [parent type] (some #(and (sut/of-type? % type) %) (:children parent))) +(defn- parse-resource [path] + (-> (str "net/deertopia/doerg/element_test/" path) + io/resource slurp sut/read-string)) + (t/deftest known-greater-elements (t/testing "known greater elements satisfy `greater-element?`" - (let [s (-> "net/deertopia/doerg/element_test/greater-elements.org" - io/resource slurp) - root (sut/read-string s) - section (first-child-of-type root "section") + (let [root (parse-resource "greater-elements.org") + section (->> root + (sp/select [sut/children-walker + #(sut/of-type? % "section")]) + second) headline (first-child-of-type section "headline") headline-text (first-child-of-type headline "text") paragraph (first-child-of-type section "paragraph") @@ -52,3 +58,22 @@ (t/is (not (sut/greater-element? headline-text))) (t/is (sut/greater-element? paragraph)) (t/is (not (sut/greater-element? paragraph-text)))))) + +(defn- first-paragraph-belongs-to-first-section? [doc] + (let [first-paragraph (sp/select-first [sut/postorder-walker + #(sut/of-type? % "paragraph")] + doc) + first-section (sp/select-first [sut/postorder-walker + #(sut/of-type? % "section")] + doc)] + (if (and first-paragraph first-section) + (boolean (some #(= % first-paragraph) + (:children first-section))) + true))) + +(t/deftest first-paragraph-under-first-section + (t/testing "first paragraph should belong to a section" + (t/is (-> (parse-resource "first-paragraph-under-first-section.org") + first-paragraph-belongs-to-first-section?)) + (t/is (not (-> (parse-resource "first-paragraph-under-heading.org") + first-paragraph-belongs-to-first-section?))))) diff --git a/doerg/test/net/deertopia/doerg/element_test/first-paragraph-under-first-section.org b/doerg/test/net/deertopia/doerg/element_test/first-paragraph-under-first-section.org new file mode 100644 index 0000000..0a7d010 --- /dev/null +++ b/doerg/test/net/deertopia/doerg/element_test/first-paragraph-under-first-section.org @@ -0,0 +1,7 @@ +#+title: first paragraph under first section + +first paragraph is here and not under the first heading + +* first heading + +second paragraph diff --git a/doerg/test/net/deertopia/doerg/element_test/first-paragraph-under-heading.org b/doerg/test/net/deertopia/doerg/element_test/first-paragraph-under-heading.org new file mode 100644 index 0000000..7c5f18b --- /dev/null +++ b/doerg/test/net/deertopia/doerg/element_test/first-paragraph-under-heading.org @@ -0,0 +1,5 @@ +#+title: first paragraph under a heading + +* first heading + +first paragraph is here and not in the first section