specter and first section

This commit is contained in:
2026-02-06 12:15:49 -07:00
parent 5024b0f078
commit 2735469b07
8 changed files with 151 additions and 44 deletions

6
doerg/README.org Normal file
View File

@@ -0,0 +1,6 @@
#+title: Doerg specification
#+author: Guppy
* Footnotes
- A bunch of metadata should be read into a node of type =doerg-data=

View File

@@ -5,5 +5,7 @@
babashka/process {:mvn/version "0.6.25"}
io.github.msyds/spec-dict
{:git/sha "531d629b7f05f37232261cf9e8927a4b5915714f"}
hiccup/hiccup {:mvn/version "2.0.0-RC4"}}
hiccup/hiccup {:mvn/version "2.0.0-RC4"}
com.rpl/specter {:mvn/version "1.1.6"}
lambdaisland/deep-diff2 {:mvn/version "2.12.219"}}
:paths ["src" "resources" "test"]}

View File

@@ -1,13 +1,14 @@
(ns net.deertopia.doerg.element
(:require [babashka.process :as p]
[clojure.string :as str]
[clojure.zip :as z]
[clojure.zip]
[babashka.fs :as fs]
[clojure.java.io :as io]
[cheshire.core :as json]
[clojure.spec.alpha :as s]
[spec-dict.main :refer [dict]]
[net.deertopia.doerg.config :as cfg])
[net.deertopia.doerg.config :as cfg]
[com.rpl.specter :as sp])
(:refer-clojure :exclude [read-string]))
@@ -45,9 +46,14 @@
(if (zero? (:exit r))
(-> r :out (json/parse-string (comp keyword camel->kebab))))))
(defn read-string [s]
(with-in-str s
(uniorg :in *in*)))
(declare gather-first-section)
(defn read-string [s & {:keys [post-processors]
:or {post-processors [gather-first-section]}}]
(let [apply-post-processors (apply comp (reverse post-processors))]
(with-in-str s
(-> (uniorg :in *in*)
apply-post-processors))))
@@ -66,8 +72,15 @@
(and (map? element)
(contains? element :type)))
(defn of-type? [element type]
(= (:type element) type))
(defn of-type?
"Return truthy if the Org node `element` is of type `type`. In the
vararg case, return truthy if `element` is of any of the types
listed."
([element type]
(= (:type element) type))
([element type & types]
(contains? (into #{} (cons type types))
(:type element))))
;;; Spec
@@ -83,19 +96,54 @@
;;; Zipper
(defn doerg-zip [document]
(z/zipper greater-element?
:children
#(assoc %1 :children %2)
document))
(clojure.zip/zipper greater-element?
:children
#(assoc %1 :children %2)
document))
(defn cata
"Catamorphism on a zipper."
[loc f]
(let [loc* (if-some [child (z/down loc)]
(loop [current child]
(let [current* (cata current f)]
(if-some [right (z/right current*)]
(recur right)
(z/up current*))))
loc)]
(z/replace loc* (f (z/node loc*)))))
(def children-walker
"Walk each child of an Org element."
[:children sp/ALL])
(def postorder-walker
"Recursively walk each node of an Org element in post-order."
(sp/recursive-path
[] p
(sp/if-path greater-element?
(sp/continue-then-stay children-walker p)
sp/STAY)))
;;; Post-processing
#_
(defn doerg-data-node? [node]
(of-type? node ))
#_
(defn gather-doerg-data [loc]
(assert (of-type? (z/node loc) "org-data")
"`gather-doerg-data` should be applied to the document root.")
(->> loc z/children (split-with doerg-data-node?)))
(defn- split-sections [nodes]
(let [[of-top-level remaining-nodes]
(->> nodes (split-with #(of-type? % "property-drawer" "keyword")))
[of-first-section remaining-nodes*]
(->> remaining-nodes (split-with #(not (of-type? % "section"))))]
{:top-level-nodes of-top-level
:first-section-nodes of-first-section
:rest remaining-nodes*}))
(defn gather-first-section [node]
(assert (of-type? node "org-data")
"`gather-doerg-data` should be applied to the document root.")
(let [{:keys [top-level-nodes first-section-nodes rest]}
(split-sections (:children node))
;; TODO: Construct `:contents-begin` and `:contents-end` data
;; by spanning the children.
new-children (concat top-level-nodes
(list {:type "section"
:children first-section-nodes})
rest)]
(assoc node :children new-children)))

View File

@@ -4,6 +4,7 @@
[clojure.tools.logging :as l]
[clojure.core.match :refer [match]]
[clojure.tools.logging.readable :as lr]
[com.rpl.specter :as sp]
[net.deertopia.doerg.html :as doerg-html]
[clojure.pprint]
[clojure.zip :as z]))
@@ -35,25 +36,19 @@
(def ^:dynamic ^:private *document-info*)
(declare ^:private
gather-footnotes render-renderer-error)
(declare ^:private gather-footnotes render-renderer-error
view-children-as-seq)
(defn org-element-recursive
"Recursively render an Org-mode element to Hiccup."
[e]
(let [loc (element/doerg-zip e)]
(-> loc
(element/cata
(fn [node]
(prn node)
(try (let [x (org-element node)]
(println "⇒" (pr-str x))
(newline)
x)
(catch Throwable e
(lr/error e "Error in renderer" {:node node})
(render-renderer-error e)))))
z/node)))
(->> e (sp/transform
[element/postorder-walker view-children-as-seq]
(fn [node]
(try (org-element node)
(catch Throwable e
(lr/error e "Error in renderer" {:node node})
(render-renderer-error e)))))))
(defn org-document
"Recursively render an Org-mode document to Hiccup."
@@ -80,6 +75,14 @@
(def view-children-as-seq
"Specter path that converts any vectors of :children to lists so
Hiccup correctly interprets children as lists of elements rather
than a single malformed element."
(sp/if-path element/greater-element?
(sp/view #(update % :children seq))
sp/STAY))
(defn- gather-footnotes [loc]
{})

View File

@@ -4,10 +4,13 @@
[net.deertopia.doerg.config :as cfg]
[clojure.java.io :as io]
[hiccup2.core :as h]
[clojure.pprint]
[babashka.fs :as fs]))
(def some-org-file
"/home/msyds/org/20251228003307-prerequisite_context_in_korean.org")
"/home/msyds/org/20251228003307-prerequisite_context_in_korean.org"
#_
"/home/msyds/org/20251111182118-path_induction.org")
(defn- force-create-sym-link [path target]
(fs/delete-if-exists path)
@@ -26,3 +29,11 @@
(fs/delete-if-exists (fs/file dest "index.html"))
(->> (h/html (-> src slurp element/read-string render/org-document))
str (spit (fs/file dest "index.html"))))
(defn render-edn [& {:keys [src dest]
:or {src some-org-file
dest "/tmp/doerg-test/index.edn"}}]
(fs/create-dirs (fs/parent dest))
(with-open [f (io/writer dest)]
(binding [*out* f]
(-> src slurp element/read-string clojure.pprint/pprint))))

View File

@@ -3,7 +3,8 @@
[babashka.process :as p]
[clojure.test :as t]
[clojure.zip :as z]
[clojure.java.io :as io]))
[clojure.java.io :as io]
[com.rpl.specter :as sp]))
(defn sleep-vs-timeout [& {:keys [sleep timeout]}]
(sut/deref-with-timeout
@@ -36,12 +37,17 @@
(defn- first-child-of-type [parent type]
(some #(and (sut/of-type? % type) %) (:children parent)))
(defn- parse-resource [path]
(-> (str "net/deertopia/doerg/element_test/" path)
io/resource slurp sut/read-string))
(t/deftest known-greater-elements
(t/testing "known greater elements satisfy `greater-element?`"
(let [s (-> "net/deertopia/doerg/element_test/greater-elements.org"
io/resource slurp)
root (sut/read-string s)
section (first-child-of-type root "section")
(let [root (parse-resource "greater-elements.org")
section (->> root
(sp/select [sut/children-walker
#(sut/of-type? % "section")])
second)
headline (first-child-of-type section "headline")
headline-text (first-child-of-type headline "text")
paragraph (first-child-of-type section "paragraph")
@@ -52,3 +58,22 @@
(t/is (not (sut/greater-element? headline-text)))
(t/is (sut/greater-element? paragraph))
(t/is (not (sut/greater-element? paragraph-text))))))
(defn- first-paragraph-belongs-to-first-section? [doc]
(let [first-paragraph (sp/select-first [sut/postorder-walker
#(sut/of-type? % "paragraph")]
doc)
first-section (sp/select-first [sut/postorder-walker
#(sut/of-type? % "section")]
doc)]
(if (and first-paragraph first-section)
(boolean (some #(= % first-paragraph)
(:children first-section)))
true)))
(t/deftest first-paragraph-under-first-section
(t/testing "first paragraph should belong to a section"
(t/is (-> (parse-resource "first-paragraph-under-first-section.org")
first-paragraph-belongs-to-first-section?))
(t/is (not (-> (parse-resource "first-paragraph-under-heading.org")
first-paragraph-belongs-to-first-section?)))))

View File

@@ -0,0 +1,7 @@
#+title: first paragraph under first section
first paragraph is here and not under the first heading
* first heading
second paragraph

View File

@@ -0,0 +1,5 @@
#+title: first paragraph under a heading
* first heading
first paragraph is here and not in the first section