From 96d1766386f4b7f95f383b8d0c24395c00a7931d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Madeleine=20Sydney=20=C5=9Alaga?= Date: Thu, 12 Mar 2026 11:00:42 -0600 Subject: [PATCH] wip: feat: org-element specs --- doerg/deps.edn | 3 +- doerg/src/net/deertopia/doerg/element.clj | 200 +++++++++++++++--- .../test/net/deertopia/doerg/element_test.clj | 43 ++-- 3 files changed, 196 insertions(+), 50 deletions(-) diff --git a/doerg/deps.edn b/doerg/deps.edn index 2c1cbff..f71a480 100644 --- a/doerg/deps.edn +++ b/doerg/deps.edn @@ -9,7 +9,8 @@ com.rpl/specter {:mvn/version "1.1.6"} lambdaisland/deep-diff2 {:mvn/version "2.12.219"} mvxcvi/clj-cbor {:mvn/version "1.1.1"} - ch.qos.logback/logback-classic {:mvn/version "1.1.3"}} + ch.qos.logback/logback-classic {:mvn/version "1.1.3"} + org.clojure/test.check {:mvn/version "1.1.3"}} :paths ["src" "resources" "test"] :aliases {:test {:extra-deps {lambdaisland/kaocha {:mvn/version "1.91.1392"}} diff --git a/doerg/src/net/deertopia/doerg/element.clj b/doerg/src/net/deertopia/doerg/element.clj index 75e9604..9622682 100644 --- a/doerg/src/net/deertopia/doerg/element.clj +++ b/doerg/src/net/deertopia/doerg/element.clj @@ -1,21 +1,26 @@ (ns net.deertopia.doerg.element - (:require [babashka.process :as p] - [net.deertopia.doerg.common :as common] - [clojure.string :as str] - [clojure.zip] - [babashka.fs :as fs] - [clojure.java.io :as io] - [cheshire.core :as json] - [clojure.spec.alpha :as s] - [spec-dict.main :refer [dict]] - [net.deertopia.doerg.config :as cfg] - [com.rpl.specter :as sp] - [clojure.tools.logging.readable :as lr] - [clojure.zip :as z] - [com.rpl.specter.zipper :as sz] - [clojure.core.match :refer [match]]) - (:import (java.util UUID)) - (:refer-clojure :exclude [read-string])) + (:refer-clojure :exclude [read-string type]) + (:require + [babashka.fs :as fs] + [babashka.process :as p] + [cheshire.core :as json] + [clojure.core.match :refer [match]] + [clojure.java.io :as io] + [clojure.set :as set] + [clojure.spec.alpha :as s] + [clojure.string :as str] + [clojure.test.check.generators :as gen] + [clojure.tools.logging.readable :as lr] + [clojure.zip :as z] + [com.rpl.specter :as sp] + [com.rpl.specter.zipper :as sz] + [net.deertopia.doerg.common :as common] + [net.deertopia.doerg.config :as cfg] + [spec-dict.main :refer [dict]] + [clojure.tools.logging :as l]) + (:import + (java.util UUID))) + (def ^:dynamic *uniorg-timeout-duration* @@ -37,7 +42,7 @@ (if (zero? (:exit r)) (-> r :out (json/parse-string (comp keyword camel->kebab)))))) -(declare gather-first-section gather-latex-paragraphs) +(declare gather-first-section gather-latex-paragraphs element-types) (defn read-string [s & {:keys [post-processors] @@ -65,6 +70,9 @@ (and (map? element) (contains? element :type))) +(defn type [element] + (:type element)) + (defn of-type? "Return truthy if the Org node `element` is of type `type`. In the vararg case, return truthy if `element` is of any of the types @@ -201,6 +209,22 @@ :first-section-nodes of-first-section :rest remaining-nodes*})) +(defn- element-bounds [& nodes] + (reduce (fn [acc {:keys [contents-begin contents-end]}] + (if (and (nat-int? contents-begin) + (nat-int? contents-end)) + (-> acc + (update + :contents-begin + #(min (or % Integer/MAX_VALUE) contents-begin)) + (update + :contents-end + #(max (or % Integer/MIN_VALUE) contents-end))) + acc)) + {:contents-begin nil + :contents-end nil} + nodes)) + (defn gather-first-section [node] (assert (of-type? node "org-data") "`gather-doerg-data` should be applied to the document root.") @@ -208,9 +232,11 @@ (split-sections (:children node)) ;; TODO: Construct `:contents-begin` and `:contents-end` data ;; by spanning the children. + first-section (merge {:type "section" + :children first-section-nodes} + (apply element-bounds first-section-nodes)) new-children (concat top-level-nodes - (list {:type "section" - :children first-section-nodes}) + (list first-section) rest)] (assoc node :children new-children))) @@ -234,15 +260,6 @@ ([predator prey & more-prey] (reduce swallow predator (cons prey more-prey)))) -(comment - (-> [1 2 3 4] - (neighbourly-mapcat prn) ) - (def doc (read-string (slurp some-org-file))) - - (let [r (atom []) - blah] - @r)) - (defn gather-latex-paragraphs [node] (->> node (sp/transform @@ -280,3 +297,128 @@ [c & rest] (recur (conj acc c) rest) [] acc)))))) + + +;;; Specs (top-level) + +;; Data taken from uniorg/index.d.ts + +(comment + (defn- typescript-enum->set [s] + (as-> s + it + (str/split it #" \| ") + (map camel->kebab it) + (into #{} it)))) + +(def greater-element-types + #{"org-data" "section" "property-drawer" "drawer" "list" "list-item" + "quote-block" "verse-block" "center-block" "special-block" + "footnote-definition" "table"}) + +(def element-types + #{"list-item-tag" "src-block" "comment-block" "latex-environment" + "keyword" "paragraph" "node-property" "example-block" "clock" + "planning" "diary-sexp" "fixed-width" "export-block" + "horizontal-rule" "comment" "table-row" "headline"}) + +(def recursive-object-types + #{"citation" "footnote-reference" "superscript" "table-cell" "link" + "italic" "citation-common-prefix" "subscript" "citation-prefix" + "citation-common-suffix" "strike-through" "citation-reference" + "bold" "underline"}) + +(def object-types + #{"line-break" "citation-suffix" "statistics-cookie" "timestamp" + "text" "verbatim" "citation-key" "export-snippet" "latex-fragment" + "entity" "code"}) + +(s/def ::greater-element-type greater-element-types) +(s/def ::element-type element-types) +(s/def ::object-type object-types) +(s/def ::recursive-object-type recursive-object-types) + +(s/def ::contents-begin nat-int?) +(s/def ::contents-end nat-int?) + +(defmulti node-spec :type) + +(defn- unimplemented-spec [x] + (lr/warnf "unimplemented method for %s" (:type x)) + any?) + +(defmethod node-spec :default [x] (unimplemented-spec x)) + +(def ^:private nfe + "NFE — “no further expectations.” Used in sub-specs of `::element` + et al. for elements with no additional structure beyond that + provided by their parents." + (s/with-gen (constantly true) + (constantly (gen/return {})))) + +(s/def ::object + (dict {:type string?})) + +(s/def ::element + (dict ^:opt {:contents-begin ::contents-begin + :contents-end ::contents-end} + {:children (s/coll-of nfe :kind vector?) + :type string?})) + +(s/def ::node nil) + +(s/def ::greater-element + (dict {:contents-begin ::contents-begin + :contents-end ::contents-end + :children (s/coll-of ::node :kind vector?) + :type string?})) + +(s/def ::recursive-object + (dict ^:opt {:contents-begin ::contents-begin + :contents-end ::contents-end} + {:children (s/coll-of ::node :kind vector?)})) + +(s/def ::node (s/multi-spec node-spec :type)) + +(comment + (use 'net.deertopia.doerg.repl) + (def doc (-> some-org-file slurp read-string)) + (s/explain ::node doc)) + +(s/def ::todo-keyword string?) +(s/def ::priority string?) +(s/def ::commented boolean?) +(s/def ::level nat-int?) +(s/def ::tags (s/coll-of string? :kind vector?)) + + +;;; Specs (specific objects) + +(def ^:private string-value (dict {:value string?})) + +(defmethod node-spec "text" [_] (s/merge ::object string-value)) +(defmethod node-spec "verbatim" [_] (s/merge ::object string-value)) +(defmethod node-spec "code" [_] (s/merge ::object string-value)) +(defmethod node-spec "bold" [_] ::recursive-object) +(defmethod node-spec "italic" [_] ::recursive-object) + + +;;; Specs (specific elements) + +(defmethod node-spec "headline" [_] + (s/merge ::element + (dict {:todo-keyword (s/nilable ::todo-keyword) + :priority (s/nilable ::priority) + :level ::level + :commented ::commented + :raw-value string? + :tags ::tags}))) + + +;;; Specs (specific greater elements) + +(defmethod node-spec "org-data" [_] + ::greater-element) + +(defmethod node-spec "section" [_] + ::greater-element) diff --git a/doerg/test/net/deertopia/doerg/element_test.clj b/doerg/test/net/deertopia/doerg/element_test.clj index d1288da..08495ad 100644 --- a/doerg/test/net/deertopia/doerg/element_test.clj +++ b/doerg/test/net/deertopia/doerg/element_test.clj @@ -57,28 +57,31 @@ ()) (defn- paragraph-ends-with-latex? [doc] - (-> (sp/select-first [(walk-types "paragraph") - (sp/must :children) - sp/LAST] - doc) - (sut/of-type? "latex-environment"))) + (let [type (-> (sp/select-first [(walk-types "paragraph") + (sp/must :children) + sp/LAST] + doc) + sut/type)] + (t/is type "latex-environment"))) (defn- paragraph-has-latex? [doc] - (sp/select-first [(walk-types "paragraph") - (sp/must :children) - sp/ALL - #(sut/of-type? % "latex-environment")] - doc)) + (t/is (sp/select-first [(walk-types "paragraph") + (sp/must :children) + sp/ALL + #(sut/of-type? % "latex-environment")] + doc))) (defn- paragraph-has-multiple-latex? [doc] - (let [[interleaved fenceposted] - (sp/select [(walk-types "section") - (sp/must :children) - #(some-> % first (sut/of-type? "headline")) - (sp/subselect (sp/view #(drop 1 %)) - sp/ALL (sp/must :children) sp/ALL - (sp/must :type))] - doc)])) + (let [paragraphs (sp/select (walk-types "paragraph") doc)] + (t/is (= 2 (count paragraphs))) + (let [[p₁ p₂] paragraphs] + (t/are [p ts] (= ts + (sp/select [(sp/must :children) + sp/ALL (sp/view sut/type)] p)) + p₁ ["text" "latex-environment" + "text" "latex-environment"] + p₂ ["text" "latex-environment" + "text" "latex-environment" "text"])))) (t/deftest paragraph-separation (t/testing "paragraph ending with latex" @@ -88,5 +91,5 @@ (-> (parse-resource "paragraph-surrounding-latex.org") paragraph-has-latex?)) (t/testing "paragraph with interleaved latex" - (let [(parse-resource "paragraph-with-multiple-latex.org")]) - (t/is (-)))) + (-> (parse-resource "paragraph-with-multiple-latex.org") + paragraph-has-multiple-latex?)))