diff --git a/deps-lock.json b/deps-lock.json index 8579cce..e4b903d 100644 --- a/deps-lock.json +++ b/deps-lock.json @@ -675,6 +675,11 @@ "mvn-repo": "https://repo1.maven.org/maven2/", "hash": "sha256-g5qUXfrO6lvVB5+CSPM0bdIULesJFGyj9dG/riYxCWc=" }, + { + "mvn-path": "org/clojure/pom.contrib/1.4.0/pom.contrib-1.4.0.pom", + "mvn-repo": "https://repo1.maven.org/maven2/", + "hash": "sha256-CZNnaHYIH0kbpa+C+mtaA3o77joSWgLW1eJ6op81Z4c=" + }, { "mvn-path": "org/clojure/spec.alpha/0.2.194/spec.alpha-0.2.194.jar", "mvn-repo": "https://repo1.maven.org/maven2/", @@ -715,6 +720,16 @@ "mvn-repo": "https://repo1.maven.org/maven2/", "hash": "sha256-UZ45jnJMYvCsnWsZ15+P8QAdqYWD/eAb1wUrB+Ga1ow=" }, + { + "mvn-path": "org/clojure/test.check/1.1.3/test.check-1.1.3.jar", + "mvn-repo": "https://repo1.maven.org/maven2/", + "hash": "sha256-iwY3sTmCc0dC+z9NEp8KnZ49dSsa7AxvZc7UVV9ytkk=" + }, + { + "mvn-path": "org/clojure/test.check/1.1.3/test.check-1.1.3.pom", + "mvn-repo": "https://repo1.maven.org/maven2/", + "hash": "sha256-pPIf6yOG+/zUKlIgDRTA94u+9omfdIzO/LCkSZ4P3sU=" + }, { "mvn-path": "org/clojure/tools.cli/1.1.230/tools.cli-1.1.230.jar", "mvn-repo": "https://repo1.maven.org/maven2/", diff --git a/doerg/deps.edn b/doerg/deps.edn index 2c1cbff..f71a480 100644 --- a/doerg/deps.edn +++ b/doerg/deps.edn @@ -9,7 +9,8 @@ com.rpl/specter {:mvn/version "1.1.6"} lambdaisland/deep-diff2 {:mvn/version "2.12.219"} mvxcvi/clj-cbor {:mvn/version "1.1.1"} - ch.qos.logback/logback-classic {:mvn/version "1.1.3"}} + ch.qos.logback/logback-classic {:mvn/version "1.1.3"} + org.clojure/test.check {:mvn/version "1.1.3"}} :paths ["src" "resources" "test"] :aliases {:test {:extra-deps {lambdaisland/kaocha {:mvn/version "1.91.1392"}} diff --git a/doerg/scratch.clj b/doerg/scratch.clj new file mode 100644 index 0000000..d792bd0 --- /dev/null +++ b/doerg/scratch.clj @@ -0,0 +1,27 @@ +(ns scratch + (:require [clojure.spec.alpha :as s] + [spec-dict.main :refer [dict]] + [clojure.test.check.generators :as gen])) + + +(defmulti node-spec :type) + +(s/def ::node + (s/multi-spec node-spec :type)) + +(s/def ::children (s/coll-of ::node :kind vector?)) + +(s/def ::value nat-int?) + +(defmethod node-spec :branch [_] + (dict {:children ::children}) + #_(s/keys :req-un [::children])) + +(defmethod node-spec :leaf [_] + (dict {:value ::value}) + #_ + (s/keys :req-un [::value])) + +(comment + (binding [s/*recursion-limit* 1] + (gen/generate (s/gen ::node)))) diff --git a/doerg/src/net/deertopia/doerg/element.clj b/doerg/src/net/deertopia/doerg/element.clj index 75e9604..ae644ca 100644 --- a/doerg/src/net/deertopia/doerg/element.clj +++ b/doerg/src/net/deertopia/doerg/element.clj @@ -1,21 +1,25 @@ (ns net.deertopia.doerg.element - (:require [babashka.process :as p] - [net.deertopia.doerg.common :as common] - [clojure.string :as str] - [clojure.zip] - [babashka.fs :as fs] - [clojure.java.io :as io] + (:refer-clojure :exclude [read-string type]) + (:require [babashka.fs :as fs] + [babashka.process :as p] [cheshire.core :as json] + [clojure.core.match :refer [match]] + [clojure.java.io :as io] + [clojure.set :as set] [clojure.spec.alpha :as s] - [spec-dict.main :refer [dict]] - [net.deertopia.doerg.config :as cfg] - [com.rpl.specter :as sp] + [clojure.string :as str] + [clojure.test.check.generators :as gen] [clojure.tools.logging.readable :as lr] [clojure.zip :as z] + [com.rpl.specter :as sp] [com.rpl.specter.zipper :as sz] - [clojure.core.match :refer [match]]) - (:import (java.util UUID)) - (:refer-clojure :exclude [read-string])) + [net.deertopia.doerg.common :as common] + [net.deertopia.doerg.config :as cfg] + [spec-dict.main :refer [dict]] + [clojure.tools.logging :as l]) + (:import + (java.util UUID))) + (def ^:dynamic *uniorg-timeout-duration* @@ -37,7 +41,7 @@ (if (zero? (:exit r)) (-> r :out (json/parse-string (comp keyword camel->kebab)))))) -(declare gather-first-section gather-latex-paragraphs) +(declare gather-first-section gather-latex-paragraphs element-types) (defn read-string [s & {:keys [post-processors] @@ -65,6 +69,9 @@ (and (map? element) (contains? element :type))) +(defn type [element] + (:type element)) + (defn of-type? "Return truthy if the Org node `element` is of type `type`. In the vararg case, return truthy if `element` is of any of the types @@ -201,6 +208,22 @@ :first-section-nodes of-first-section :rest remaining-nodes*})) +(defn- element-bounds [& nodes] + (reduce (fn [acc {:keys [contents-begin contents-end]}] + (if (and (nat-int? contents-begin) + (nat-int? contents-end)) + (-> acc + (update + :contents-begin + #(min (or % Integer/MAX_VALUE) contents-begin)) + (update + :contents-end + #(max (or % Integer/MIN_VALUE) contents-end))) + acc)) + {:contents-begin nil + :contents-end nil} + nodes)) + (defn gather-first-section [node] (assert (of-type? node "org-data") "`gather-doerg-data` should be applied to the document root.") @@ -208,9 +231,11 @@ (split-sections (:children node)) ;; TODO: Construct `:contents-begin` and `:contents-end` data ;; by spanning the children. + first-section (merge {:type "section" + :children first-section-nodes} + (apply element-bounds first-section-nodes)) new-children (concat top-level-nodes - (list {:type "section" - :children first-section-nodes}) + (list first-section) rest)] (assoc node :children new-children))) @@ -234,15 +259,6 @@ ([predator prey & more-prey] (reduce swallow predator (cons prey more-prey)))) -(comment - (-> [1 2 3 4] - (neighbourly-mapcat prn) ) - (def doc (read-string (slurp some-org-file))) - - (let [r (atom []) - blah] - @r)) - (defn gather-latex-paragraphs [node] (->> node (sp/transform @@ -280,3 +296,144 @@ [c & rest] (recur (conj acc c) rest) [] acc)))))) + + +;;; Specs (top-level) + +;; Data taken from uniorg/index.d.ts + +(comment + (defn- typescript-enum->set [s] + (as-> s + it + (str/split it #" \| ") + (map camel->kebab it) + (into #{} it)))) + +(def greater-element-types + #{"org-data" "section" "property-drawer" "drawer" "plain-list" + "list-item" "quote-block" "verse-block" "center-block" + "special-block" "footnote-definition" "table"}) + +(def element-types + #{"list-item-tag" "src-block" "comment-block" "latex-environment" + "keyword" "paragraph" "node-property" "example-block" "clock" + "planning" "diary-sexp" "fixed-width" "export-block" + "horizontal-rule" "comment" "table-row" "headline"}) + +(def recursive-object-types + #{"citation" "footnote-reference" "superscript" "table-cell" "link" + "italic" "citation-common-prefix" "subscript" "citation-prefix" + "citation-common-suffix" "strike-through" "citation-reference" + "bold" "underline"}) + +(def object-types + #{"line-break" "citation-suffix" "statistics-cookie" "timestamp" + "text" "verbatim" "citation-key" "export-snippet" "latex-fragment" + "entity" "code"}) + +(s/def ::greater-element-type greater-element-types) +(s/def ::element-type element-types) +(s/def ::object-type (set/union recursive-object-types object-types)) +(s/def ::recursive-object-type recursive-object-types) + +(s/def ::contents-begin nat-int?) +(s/def ::contents-end nat-int?) + +(defmulti node-spec :type) + +(defn- unimplemented-spec [x] + (lr/warnf "unimplemented method for %s" x) + (s/with-gen any? + (constantly (gen/return {})))) + +(defmethod node-spec :default [x] (unimplemented-spec x)) + +(def ^:private nfe + "NFE — “no further expectations.” Used in sub-specs of `::element` + et al. for elements with no additional structure beyond that + provided by their parents." + (s/with-gen any? + (constantly (gen/return {})))) + +(s/def ::node + (s/multi-spec node-spec :type)) + +(s/def :object/type ::object-type) + +(s/def ::object + (s/keys :req-un [:object/type])) + +(s/def :element/children (s/coll-of ::object :kind vector?)) +(s/def :element/type ::element-type) + +(s/def ::element + (s/keys :opt-un [::contents-begin ::contents-end] + :req-un [:element/children :element/type])) + +(s/def :greater-element/children + (s/coll-of (s/merge + (dict {:type (set/union greater-element-types + element-types)}) + ::node) + :kind vector?)) + +(s/def :greater-element/type ::greater-element-type) + +(s/def ::greater-element + (s/keys :req-un [::contents-begin ::contents-end + :greater-element/children + :greater-element/type])) + +(s/def :recursive-object/children + (s/coll-of + (s/merge + (dict {:type ::object-type}) + ::node) + :kind vector?)) + +(s/def :recursive-object/type ::object-type) + +(s/def ::recursive-object + (s/keys :opt-un [::contents-begin ::contents-end] + :req-un [:recursive-object/children + :recursive-object/type])) + +(comment + (use 'net.deertopia.doerg.repl) + (def doc (-> some-org-file slurp read-string)) + (s/explain ::node doc) + (binding [s/*recursion-limit* 1] + (gen/generate (s/gen ::node)))) + +(s/def ::todo-keyword string?) +(s/def ::priority string?) +(s/def ::commented boolean?) +(s/def ::level nat-int?) +(s/def ::tags (s/coll-of string? :kind vector?)) + + +;;; Specs (specific elements) + +(def ^:private string-value (dict {:value string?})) + +(defmethod node-spec "text" [_] (s/merge ::object string-value)) +(defmethod node-spec "verbatim" [_] (s/merge ::object string-value)) +(defmethod node-spec "code" [_] (s/merge ::object string-value)) +(defmethod node-spec "bold" [_] ::recursive-object) +(defmethod node-spec "italic" [_] ::recursive-object) + +(defmethod node-spec "headline" [_] + (s/merge ::element + (dict {:todo-keyword (s/nilable ::todo-keyword) + :priority (s/nilable ::priority) + :level ::level + :commented ::commented + :raw-value string? + :tags ::tags}))) + +(defmethod node-spec "org-data" [_] + ::greater-element) + +(defmethod node-spec "section" [_] + ::greater-element) diff --git a/doerg/test/net/deertopia/doerg/element_test.clj b/doerg/test/net/deertopia/doerg/element_test.clj index d1288da..08495ad 100644 --- a/doerg/test/net/deertopia/doerg/element_test.clj +++ b/doerg/test/net/deertopia/doerg/element_test.clj @@ -57,28 +57,31 @@ ()) (defn- paragraph-ends-with-latex? [doc] - (-> (sp/select-first [(walk-types "paragraph") - (sp/must :children) - sp/LAST] - doc) - (sut/of-type? "latex-environment"))) + (let [type (-> (sp/select-first [(walk-types "paragraph") + (sp/must :children) + sp/LAST] + doc) + sut/type)] + (t/is type "latex-environment"))) (defn- paragraph-has-latex? [doc] - (sp/select-first [(walk-types "paragraph") - (sp/must :children) - sp/ALL - #(sut/of-type? % "latex-environment")] - doc)) + (t/is (sp/select-first [(walk-types "paragraph") + (sp/must :children) + sp/ALL + #(sut/of-type? % "latex-environment")] + doc))) (defn- paragraph-has-multiple-latex? [doc] - (let [[interleaved fenceposted] - (sp/select [(walk-types "section") - (sp/must :children) - #(some-> % first (sut/of-type? "headline")) - (sp/subselect (sp/view #(drop 1 %)) - sp/ALL (sp/must :children) sp/ALL - (sp/must :type))] - doc)])) + (let [paragraphs (sp/select (walk-types "paragraph") doc)] + (t/is (= 2 (count paragraphs))) + (let [[p₁ p₂] paragraphs] + (t/are [p ts] (= ts + (sp/select [(sp/must :children) + sp/ALL (sp/view sut/type)] p)) + p₁ ["text" "latex-environment" + "text" "latex-environment"] + p₂ ["text" "latex-environment" + "text" "latex-environment" "text"])))) (t/deftest paragraph-separation (t/testing "paragraph ending with latex" @@ -88,5 +91,5 @@ (-> (parse-resource "paragraph-surrounding-latex.org") paragraph-has-latex?)) (t/testing "paragraph with interleaved latex" - (let [(parse-resource "paragraph-with-multiple-latex.org")]) - (t/is (-)))) + (-> (parse-resource "paragraph-with-multiple-latex.org") + paragraph-has-multiple-latex?)))