wip: feat: org-element specs
Some checks failed
build / build (push) Failing after 27s

This commit is contained in:
2026-03-12 11:00:42 -06:00
parent 59b75c01e8
commit 96d1766386
3 changed files with 196 additions and 50 deletions

View File

@@ -9,7 +9,8 @@
com.rpl/specter {:mvn/version "1.1.6"}
lambdaisland/deep-diff2 {:mvn/version "2.12.219"}
mvxcvi/clj-cbor {:mvn/version "1.1.1"}
ch.qos.logback/logback-classic {:mvn/version "1.1.3"}}
ch.qos.logback/logback-classic {:mvn/version "1.1.3"}
org.clojure/test.check {:mvn/version "1.1.3"}}
:paths ["src" "resources" "test"]
:aliases
{:test {:extra-deps {lambdaisland/kaocha {:mvn/version "1.91.1392"}}

View File

@@ -1,21 +1,26 @@
(ns net.deertopia.doerg.element
(:require [babashka.process :as p]
[net.deertopia.doerg.common :as common]
[clojure.string :as str]
[clojure.zip]
[babashka.fs :as fs]
[clojure.java.io :as io]
[cheshire.core :as json]
[clojure.spec.alpha :as s]
[spec-dict.main :refer [dict]]
[net.deertopia.doerg.config :as cfg]
[com.rpl.specter :as sp]
[clojure.tools.logging.readable :as lr]
[clojure.zip :as z]
[com.rpl.specter.zipper :as sz]
[clojure.core.match :refer [match]])
(:import (java.util UUID))
(:refer-clojure :exclude [read-string]))
(:refer-clojure :exclude [read-string type])
(:require
[babashka.fs :as fs]
[babashka.process :as p]
[cheshire.core :as json]
[clojure.core.match :refer [match]]
[clojure.java.io :as io]
[clojure.set :as set]
[clojure.spec.alpha :as s]
[clojure.string :as str]
[clojure.test.check.generators :as gen]
[clojure.tools.logging.readable :as lr]
[clojure.zip :as z]
[com.rpl.specter :as sp]
[com.rpl.specter.zipper :as sz]
[net.deertopia.doerg.common :as common]
[net.deertopia.doerg.config :as cfg]
[spec-dict.main :refer [dict]]
[clojure.tools.logging :as l])
(:import
(java.util UUID)))
(def ^:dynamic *uniorg-timeout-duration*
@@ -37,7 +42,7 @@
(if (zero? (:exit r))
(-> r :out (json/parse-string (comp keyword camel->kebab))))))
(declare gather-first-section gather-latex-paragraphs)
(declare gather-first-section gather-latex-paragraphs element-types)
(defn read-string
[s & {:keys [post-processors]
@@ -65,6 +70,9 @@
(and (map? element)
(contains? element :type)))
(defn type [element]
(:type element))
(defn of-type?
"Return truthy if the Org node `element` is of type `type`. In the
vararg case, return truthy if `element` is of any of the types
@@ -201,6 +209,22 @@
:first-section-nodes of-first-section
:rest remaining-nodes*}))
(defn- element-bounds [& nodes]
(reduce (fn [acc {:keys [contents-begin contents-end]}]
(if (and (nat-int? contents-begin)
(nat-int? contents-end))
(-> acc
(update
:contents-begin
#(min (or % Integer/MAX_VALUE) contents-begin))
(update
:contents-end
#(max (or % Integer/MIN_VALUE) contents-end)))
acc))
{:contents-begin nil
:contents-end nil}
nodes))
(defn gather-first-section [node]
(assert (of-type? node "org-data")
"`gather-doerg-data` should be applied to the document root.")
@@ -208,9 +232,11 @@
(split-sections (:children node))
;; TODO: Construct `:contents-begin` and `:contents-end` data
;; by spanning the children.
first-section (merge {:type "section"
:children first-section-nodes}
(apply element-bounds first-section-nodes))
new-children (concat top-level-nodes
(list {:type "section"
:children first-section-nodes})
(list first-section)
rest)]
(assoc node :children new-children)))
@@ -234,15 +260,6 @@
([predator prey & more-prey]
(reduce swallow predator (cons prey more-prey))))
(comment
(-> [1 2 3 4]
(neighbourly-mapcat prn) )
(def doc (read-string (slurp some-org-file)))
(let [r (atom [])
blah]
@r))
(defn gather-latex-paragraphs [node]
(->> node
(sp/transform
@@ -280,3 +297,128 @@
[c & rest]
(recur (conj acc c) rest)
[] acc))))))
;;; Specs (top-level)
;; Data taken from uniorg/index.d.ts
(comment
(defn- typescript-enum->set [s]
(as-> s
it
(str/split it #" \| ")
(map camel->kebab it)
(into #{} it))))
(def greater-element-types
#{"org-data" "section" "property-drawer" "drawer" "list" "list-item"
"quote-block" "verse-block" "center-block" "special-block"
"footnote-definition" "table"})
(def element-types
#{"list-item-tag" "src-block" "comment-block" "latex-environment"
"keyword" "paragraph" "node-property" "example-block" "clock"
"planning" "diary-sexp" "fixed-width" "export-block"
"horizontal-rule" "comment" "table-row" "headline"})
(def recursive-object-types
#{"citation" "footnote-reference" "superscript" "table-cell" "link"
"italic" "citation-common-prefix" "subscript" "citation-prefix"
"citation-common-suffix" "strike-through" "citation-reference"
"bold" "underline"})
(def object-types
#{"line-break" "citation-suffix" "statistics-cookie" "timestamp"
"text" "verbatim" "citation-key" "export-snippet" "latex-fragment"
"entity" "code"})
(s/def ::greater-element-type greater-element-types)
(s/def ::element-type element-types)
(s/def ::object-type object-types)
(s/def ::recursive-object-type recursive-object-types)
(s/def ::contents-begin nat-int?)
(s/def ::contents-end nat-int?)
(defmulti node-spec :type)
(defn- unimplemented-spec [x]
(lr/warnf "unimplemented method for %s" (:type x))
any?)
(defmethod node-spec :default [x] (unimplemented-spec x))
(def ^:private nfe
"NFE — “no further expectations.” Used in sub-specs of `::element`
et al. for elements with no additional structure beyond that
provided by their parents."
(s/with-gen (constantly true)
(constantly (gen/return {}))))
(s/def ::object
(dict {:type string?}))
(s/def ::element
(dict ^:opt {:contents-begin ::contents-begin
:contents-end ::contents-end}
{:children (s/coll-of nfe :kind vector?)
:type string?}))
(s/def ::node nil)
(s/def ::greater-element
(dict {:contents-begin ::contents-begin
:contents-end ::contents-end
:children (s/coll-of ::node :kind vector?)
:type string?}))
(s/def ::recursive-object
(dict ^:opt {:contents-begin ::contents-begin
:contents-end ::contents-end}
{:children (s/coll-of ::node :kind vector?)}))
(s/def ::node (s/multi-spec node-spec :type))
(comment
(use 'net.deertopia.doerg.repl)
(def doc (-> some-org-file slurp read-string))
(s/explain ::node doc))
(s/def ::todo-keyword string?)
(s/def ::priority string?)
(s/def ::commented boolean?)
(s/def ::level nat-int?)
(s/def ::tags (s/coll-of string? :kind vector?))
;;; Specs (specific objects)
(def ^:private string-value (dict {:value string?}))
(defmethod node-spec "text" [_] (s/merge ::object string-value))
(defmethod node-spec "verbatim" [_] (s/merge ::object string-value))
(defmethod node-spec "code" [_] (s/merge ::object string-value))
(defmethod node-spec "bold" [_] ::recursive-object)
(defmethod node-spec "italic" [_] ::recursive-object)
;;; Specs (specific elements)
(defmethod node-spec "headline" [_]
(s/merge ::element
(dict {:todo-keyword (s/nilable ::todo-keyword)
:priority (s/nilable ::priority)
:level ::level
:commented ::commented
:raw-value string?
:tags ::tags})))
;;; Specs (specific greater elements)
(defmethod node-spec "org-data" [_]
::greater-element)
(defmethod node-spec "section" [_]
::greater-element)

View File

@@ -57,28 +57,31 @@
())
(defn- paragraph-ends-with-latex? [doc]
(-> (sp/select-first [(walk-types "paragraph")
(sp/must :children)
sp/LAST]
doc)
(sut/of-type? "latex-environment")))
(let [type (-> (sp/select-first [(walk-types "paragraph")
(sp/must :children)
sp/LAST]
doc)
sut/type)]
(t/is type "latex-environment")))
(defn- paragraph-has-latex? [doc]
(sp/select-first [(walk-types "paragraph")
(sp/must :children)
sp/ALL
#(sut/of-type? % "latex-environment")]
doc))
(t/is (sp/select-first [(walk-types "paragraph")
(sp/must :children)
sp/ALL
#(sut/of-type? % "latex-environment")]
doc)))
(defn- paragraph-has-multiple-latex? [doc]
(let [[interleaved fenceposted]
(sp/select [(walk-types "section")
(sp/must :children)
#(some-> % first (sut/of-type? "headline"))
(sp/subselect (sp/view #(drop 1 %))
sp/ALL (sp/must :children) sp/ALL
(sp/must :type))]
doc)]))
(let [paragraphs (sp/select (walk-types "paragraph") doc)]
(t/is (= 2 (count paragraphs)))
(let [[p p] paragraphs]
(t/are [p ts] (= ts
(sp/select [(sp/must :children)
sp/ALL (sp/view sut/type)] p))
p ["text" "latex-environment"
"text" "latex-environment"]
p ["text" "latex-environment"
"text" "latex-environment" "text"]))))
(t/deftest paragraph-separation
(t/testing "paragraph ending with latex"
@@ -88,5 +91,5 @@
(-> (parse-resource "paragraph-surrounding-latex.org")
paragraph-has-latex?))
(t/testing "paragraph with interleaved latex"
(let [(parse-resource "paragraph-with-multiple-latex.org")])
(t/is (-))))
(-> (parse-resource "paragraph-with-multiple-latex.org")
paragraph-has-multiple-latex?)))