diff --git a/deps-lock.json b/deps-lock.json index 8579cce..e4b903d 100644 --- a/deps-lock.json +++ b/deps-lock.json @@ -675,6 +675,11 @@ "mvn-repo": "https://repo1.maven.org/maven2/", "hash": "sha256-g5qUXfrO6lvVB5+CSPM0bdIULesJFGyj9dG/riYxCWc=" }, + { + "mvn-path": "org/clojure/pom.contrib/1.4.0/pom.contrib-1.4.0.pom", + "mvn-repo": "https://repo1.maven.org/maven2/", + "hash": "sha256-CZNnaHYIH0kbpa+C+mtaA3o77joSWgLW1eJ6op81Z4c=" + }, { "mvn-path": "org/clojure/spec.alpha/0.2.194/spec.alpha-0.2.194.jar", "mvn-repo": "https://repo1.maven.org/maven2/", @@ -715,6 +720,16 @@ "mvn-repo": "https://repo1.maven.org/maven2/", "hash": "sha256-UZ45jnJMYvCsnWsZ15+P8QAdqYWD/eAb1wUrB+Ga1ow=" }, + { + "mvn-path": "org/clojure/test.check/1.1.3/test.check-1.1.3.jar", + "mvn-repo": "https://repo1.maven.org/maven2/", + "hash": "sha256-iwY3sTmCc0dC+z9NEp8KnZ49dSsa7AxvZc7UVV9ytkk=" + }, + { + "mvn-path": "org/clojure/test.check/1.1.3/test.check-1.1.3.pom", + "mvn-repo": "https://repo1.maven.org/maven2/", + "hash": "sha256-pPIf6yOG+/zUKlIgDRTA94u+9omfdIzO/LCkSZ4P3sU=" + }, { "mvn-path": "org/clojure/tools.cli/1.1.230/tools.cli-1.1.230.jar", "mvn-repo": "https://repo1.maven.org/maven2/", diff --git a/doerg/deps.edn b/doerg/deps.edn index 2c1cbff..f71a480 100644 --- a/doerg/deps.edn +++ b/doerg/deps.edn @@ -9,7 +9,8 @@ com.rpl/specter {:mvn/version "1.1.6"} lambdaisland/deep-diff2 {:mvn/version "2.12.219"} mvxcvi/clj-cbor {:mvn/version "1.1.1"} - ch.qos.logback/logback-classic {:mvn/version "1.1.3"}} + ch.qos.logback/logback-classic {:mvn/version "1.1.3"} + org.clojure/test.check {:mvn/version "1.1.3"}} :paths ["src" "resources" "test"] :aliases {:test {:extra-deps {lambdaisland/kaocha {:mvn/version "1.91.1392"}} diff --git a/doerg/doerg-parser/index.js b/doerg/doerg-parser/index.js index 78b82f6..d3623e9 100755 --- a/doerg/doerg-parser/index.js +++ b/doerg/doerg-parser/index.js @@ -2,13 +2,17 @@ const { parse } = require ("uniorg-parse/lib/parser.js"); +const opts = { + trackPosition: true +} + async function main () { const chunks = [] for await (const chunk of process.stdin) { chunks.push (chunk) } const orgText = Buffer.concat (chunks).toString ("utf8") - process.stdout.write (JSON.stringify (parse (orgText))) + process.stdout.write (JSON.stringify (parse (orgText, opts))) } main () diff --git a/doerg/resources/net/deertopia/doerg/preview-template.tex b/doerg/resources/net/deertopia/doerg/preview-template.tex index 46dcd51..930fa44 100644 --- a/doerg/resources/net/deertopia/doerg/preview-template.tex +++ b/doerg/resources/net/deertopia/doerg/preview-template.tex @@ -48,6 +48,12 @@ \newcommand{\optic}[3]{\opticname{#1}^\prime\;#2\;#3} \newcommand{\Optic}[5]{\opticname{#1}\;#2\;#3\;#4\;#5} +% Default uses arrow glyphs from the active font, which are kinda ugly in the +% case of Plex. +\tikzcdset{ + arrow style=tikz +} + \begin{document} \setlength\abovedisplayskip{0pt} % Remove padding before equation environments. %% \color[rgb]{0.000,0.000,0.004}\special{dvisvgm:currentcolor on}\setcounter{equation}{0}% diff --git a/doerg/resources/net/deertopia/doerg/tuftesque.css b/doerg/resources/net/deertopia/doerg/tuftesque.css index 5e6b6b7..6fa781d 100644 --- a/doerg/resources/net/deertopia/doerg/tuftesque.css +++ b/doerg/resources/net/deertopia/doerg/tuftesque.css @@ -550,10 +550,8 @@ figure.fullwidth figcaption { .latex-fragment.display-math { display: block -; width: 55% /* Center it — do we want to do that? */ ; align-items: center ; justify-content: center ; display: flex -; max-width: 55% } diff --git a/doerg/scratch.clj b/doerg/scratch.clj new file mode 100644 index 0000000..d792bd0 --- /dev/null +++ b/doerg/scratch.clj @@ -0,0 +1,27 @@ +(ns scratch + (:require [clojure.spec.alpha :as s] + [spec-dict.main :refer [dict]] + [clojure.test.check.generators :as gen])) + + +(defmulti node-spec :type) + +(s/def ::node + (s/multi-spec node-spec :type)) + +(s/def ::children (s/coll-of ::node :kind vector?)) + +(s/def ::value nat-int?) + +(defmethod node-spec :branch [_] + (dict {:children ::children}) + #_(s/keys :req-un [::children])) + +(defmethod node-spec :leaf [_] + (dict {:value ::value}) + #_ + (s/keys :req-un [::value])) + +(comment + (binding [s/*recursion-limit* 1] + (gen/generate (s/gen ::node)))) diff --git a/doerg/src/net/deertopia/doerg/element.clj b/doerg/src/net/deertopia/doerg/element.clj index 1ee3443..7a03077 100644 --- a/doerg/src/net/deertopia/doerg/element.clj +++ b/doerg/src/net/deertopia/doerg/element.clj @@ -1,18 +1,25 @@ (ns net.deertopia.doerg.element - (:require [babashka.process :as p] - [net.deertopia.doerg.common :as common] - [clojure.string :as str] - [clojure.zip] - [babashka.fs :as fs] - [clojure.java.io :as io] + (:refer-clojure :exclude [read-string type]) + (:require [babashka.fs :as fs] + [babashka.process :as p] [cheshire.core :as json] + [clojure.core.match :refer [match]] + [clojure.java.io :as io] + [clojure.set :as set] [clojure.spec.alpha :as s] - [spec-dict.main :refer [dict]] - [net.deertopia.doerg.config :as cfg] + [clojure.string :as str] + [clojure.test.check.generators :as gen] + [clojure.tools.logging.readable :as lr] + [clojure.zip :as z] [com.rpl.specter :as sp] - [clojure.tools.logging.readable :as lr]) - (:import (java.util UUID)) - (:refer-clojure :exclude [read-string])) + [com.rpl.specter.zipper :as sz] + [net.deertopia.doerg.common :as common] + [net.deertopia.doerg.config :as cfg] + [spec-dict.main :refer [dict]] + [clojure.tools.logging :as l]) + (:import + (java.util UUID))) + (def ^:dynamic *uniorg-timeout-duration* @@ -34,14 +41,16 @@ (if (zero? (:exit r)) (-> r :out (json/parse-string (comp keyword camel->kebab)))))) -(declare gather-first-section) +(declare gather-first-section gather-latex-paragraphs element-types) -(defn read-string [s & {:keys [post-processors] - :or {post-processors [gather-first-section]}}] +(defn read-string + [s & {:keys [post-processors] + :or {post-processors [gather-first-section + gather-latex-paragraphs]}}] (let [apply-post-processors (apply comp (reverse post-processors))] (with-in-str s - (-> (uniorg :in *in*) - apply-post-processors)))) + (-> (uniorg :in *in*) + apply-post-processors)))) @@ -60,6 +69,9 @@ (and (map? element) (contains? element :type))) +(defn type [element] + (:type element)) + (defn of-type? "Return truthy if the Org node `element` is of type `type`. In the vararg case, return truthy if `element` is of any of the types @@ -196,6 +208,22 @@ :first-section-nodes of-first-section :rest remaining-nodes*})) +(defn- element-bounds [& nodes] + (reduce (fn [acc {:keys [contents-begin contents-end]}] + (if (and (nat-int? contents-begin) + (nat-int? contents-end)) + (-> acc + (update + :contents-begin + #(min (or % Integer/MAX_VALUE) contents-begin)) + (update + :contents-end + #(max (or % Integer/MIN_VALUE) contents-end))) + acc)) + {:contents-begin nil + :contents-end nil} + nodes)) + (defn gather-first-section [node] (assert (of-type? node "org-data") "`gather-doerg-data` should be applied to the document root.") @@ -203,8 +231,206 @@ (split-sections (:children node)) ;; TODO: Construct `:contents-begin` and `:contents-end` data ;; by spanning the children. + first-section (merge {:type "section" + :children first-section-nodes} + (apply element-bounds first-section-nodes)) new-children (concat top-level-nodes - (list {:type "section" - :children first-section-nodes}) + (list first-section) rest)] (assoc node :children new-children))) + +(defn separated-by-explicit-paragraph-break? + "Returh truthy if each successive pair of elements is separated by + at least one explicit paragraph break; i.e. a blank line." + [& elements] + (match elements + [e₁ e₂ & es] + (and (< (-> e₁ :position :end :line) + (-> e₂ :position :start :line)) + (recur es)) + :else true)) + +(defn swallow + ([predator prey] + (assert (greater-element? predator)) + (-> predator + (update :children #(conj % prey)) + (assoc-in [:position :end] (-> prey :position :end)))) + ([predator prey & more-prey] + (reduce swallow predator (cons prey more-prey)))) + +(defn gather-latex-paragraphs [node] + (->> node + (sp/transform + [postorder-walker (sp/must :children)] + (fn [children] + (loop [acc [] + cs (vec children)] + (match cs + ;; CASE: A paragraph followed by a LaTeX environment. + ;; If there are no blank lines separating the paragraph + ;; from the LaTeX environment, the LaTeX environment + ;; shall become a child of the paragraph. + ([(para :guard #(of-type? % "paragraph")) + (tex :guard #(of-type? % "latex-environment")) + & rest] + :guard #(apply separated-by-explicit-paragraph-break? %)) + (recur acc (vec (cons (swallow para tex) rest))) + ;; CASE: + ([(para₁ :guard #(of-type? % "paragraph")) + (para₂ :guard #(of-type? % "paragraph")) + & rest] + :guard #(apply separated-by-explicit-paragraph-break? %)) + (recur acc (vec (cons (apply swallow para₁ (:children para₂)) + rest))) + ;; CASE: Irrelevant or empty! + [c & rest] + (recur (conj acc c) rest) + [] acc)))))) + + +;;; Specs (top-level) + +;; Data taken from uniorg/index.d.ts + +(comment + (defn- typescript-enum->set [s] + (as-> s + it + (str/split it #" \| ") + (map camel->kebab it) + (into #{} it)))) + +(def greater-element-types + #{"org-data" "section" "property-drawer" "drawer" "plain-list" + "list-item" "quote-block" "verse-block" "center-block" + "special-block" "footnote-definition" "table"}) + +(def element-types + #{"list-item-tag" "src-block" "comment-block" "latex-environment" + "keyword" "paragraph" "node-property" "example-block" "clock" + "planning" "diary-sexp" "fixed-width" "export-block" + "horizontal-rule" "comment" "table-row" "headline"}) + +(def recursive-object-types + #{"citation" "footnote-reference" "superscript" "table-cell" "link" + "italic" "citation-common-prefix" "subscript" "citation-prefix" + "citation-common-suffix" "strike-through" "citation-reference" + "bold" "underline"}) + +(def object-types + #{"line-break" "citation-suffix" "statistics-cookie" "timestamp" + "text" "verbatim" "citation-key" "export-snippet" "latex-fragment" + "entity" "code"}) + +(s/def ::greater-element-type greater-element-types) +(s/def ::element-type element-types) +(s/def ::object-type (set/union recursive-object-types object-types)) +(s/def ::recursive-object-type recursive-object-types) + +(s/def ::contents-begin nat-int?) +(s/def ::contents-end nat-int?) + +(defmulti node-spec :type) + +(defn- unimplemented-spec [x] + (lr/warnf "unimplemented method for %s" x) + (s/with-gen any? + (constantly (gen/return {})))) + +(defmethod node-spec :default [x] (unimplemented-spec x)) + +(def ^:private nfe + "NFE — “no further expectations.” Used in sub-specs of `::element` + et al. for elements with no additional structure beyond that + provided by their parents." + (s/with-gen any? + (constantly (gen/return {})))) + +(s/def ::node + (s/multi-spec node-spec :type)) + +(s/def :object/type ::object-type) + +(s/def ::object + (s/keys :req-un [:object/type])) + +(s/def :element/children (s/coll-of ::object :kind vector?)) +(s/def :element/type ::element-type) + +(s/def ::element + (s/keys :opt-un [::contents-begin ::contents-end] + :req-un [:element/children :element/type])) + +(s/def :greater-element/children + (s/coll-of (s/merge + (dict {:type (set/union greater-element-types + element-types)}) + ::node) + :kind vector?)) + +(s/def :greater-element/type ::greater-element-type) + +(s/def ::greater-element + (s/keys :req-un [::contents-begin ::contents-end + :greater-element/children + :greater-element/type])) + +(s/def :recursive-object/children + (s/coll-of + (s/merge + (dict {:type ::object-type}) + ::node) + :kind vector?)) + +(s/def :recursive-object/type ::object-type) + +(s/def ::recursive-object + (s/keys :opt-un [::contents-begin ::contents-end] + :req-un [:recursive-object/children + :recursive-object/type])) + +(comment + (use 'net.deertopia.doerg.repl) + (def doc (-> "test/net/deertopia/doerg/element_test/paragraph-with-multiple-latex.org" + slurp + (read-string :post-processors + [gather-first-section]))) + (s/explain ::node doc) + (binding [s/*recursion-limit* 1] + (gen/generate (s/gen ::node))) + (sp/select [postorder-walker (sp/must :children) + (sp/continuous-subseqs)] + doc)) + +(s/def ::todo-keyword string?) +(s/def ::priority string?) +(s/def ::commented boolean?) +(s/def ::level nat-int?) +(s/def ::tags (s/coll-of string? :kind vector?)) + + +;;; Specs (specific elements) + +(def ^:private string-value (dict {:value string?})) + +(defmethod node-spec "text" [_] (s/merge ::object string-value)) +(defmethod node-spec "verbatim" [_] (s/merge ::object string-value)) +(defmethod node-spec "code" [_] (s/merge ::object string-value)) +(defmethod node-spec "bold" [_] ::recursive-object) +(defmethod node-spec "italic" [_] ::recursive-object) + +(defmethod node-spec "headline" [_] + (s/merge ::element + (dict {:todo-keyword (s/nilable ::todo-keyword) + :priority (s/nilable ::priority) + :level ::level + :commented ::commented + :raw-value string? + :tags ::tags}))) + +(defmethod node-spec "org-data" [_] + ::greater-element) + +(defmethod node-spec "section" [_] + ::greater-element) diff --git a/doerg/test/net/deertopia/doerg/element_test.clj b/doerg/test/net/deertopia/doerg/element_test.clj index 3406bb1..a275b9a 100644 --- a/doerg/test/net/deertopia/doerg/element_test.clj +++ b/doerg/test/net/deertopia/doerg/element_test.clj @@ -49,3 +49,43 @@ first-paragraph-belongs-to-first-section?)) (t/is (not (-> (parse-resource "first-paragraph-under-heading.org") first-paragraph-belongs-to-first-section?))))) + +(defn walk-types [type & types] + [sut/postorder-walker #(apply sut/of-type? % type types)]) + +(defn- paragraph-ends-with-latex? [doc] + (let [type (-> (sp/select-first [(walk-types "paragraph") + (sp/must :children) + sp/LAST] + doc) + sut/type)] + (t/is type "latex-environment"))) + +(defn- paragraph-has-latex? [doc] + (t/is (sp/select-first [(walk-types "paragraph") + (sp/must :children) + sp/ALL + #(sut/of-type? % "latex-environment")] + doc))) + +(defn- paragraph-has-multiple-latex? [doc] + (let [paragraphs (sp/select (walk-types "paragraph") doc)] + (t/is (= 2 (count paragraphs))) + (let [[p₁ p₂] paragraphs] + (doseq [[p ts] [[p₁ ["text" "latex-environment" + "text" "latex-environment"]] + [p₂ ["text" "latex-environment" + "text" "latex-environment" "text"]]]] + (t/is (= ts (sp/select [(sp/must :children) + sp/ALL (sp/view sut/type)] p))))))) + +(t/deftest paragraph-separation + (t/testing "paragraph ending with latex" + (-> (parse-resource "paragraph-ending-with-latex.org") + paragraph-ends-with-latex?)) + (t/testing "paragraph surrounding latex" + (-> (parse-resource "paragraph-surrounding-latex.org") + paragraph-has-latex?)) + (t/testing "paragraph with interleaved latex" + (-> (parse-resource "paragraph-with-multiple-latex.org") + paragraph-has-multiple-latex?))) diff --git a/doerg/test/net/deertopia/doerg/element_test/paragraph-ending-with-latex.org b/doerg/test/net/deertopia/doerg/element_test/paragraph-ending-with-latex.org new file mode 100644 index 0000000..8f1eebb --- /dev/null +++ b/doerg/test/net/deertopia/doerg/element_test/paragraph-ending-with-latex.org @@ -0,0 +1,7 @@ +#+title: paragraph ending with latex + +here is the paragraph, +\begin{align*} +\text{and here} & +\\ & \text{is the \LaTeX} +\end{align*} diff --git a/doerg/test/net/deertopia/doerg/element_test/paragraph-surrounding-latex.org b/doerg/test/net/deertopia/doerg/element_test/paragraph-surrounding-latex.org new file mode 100644 index 0000000..cdffa8f --- /dev/null +++ b/doerg/test/net/deertopia/doerg/element_test/paragraph-surrounding-latex.org @@ -0,0 +1,7 @@ +#+title: paragraph surrounding latex + +first part of paragraph +\begin{equation*} +\text{some \LaTeX \}:)} +\end{equation*} +last part of paragraph diff --git a/doerg/test/net/deertopia/doerg/element_test/paragraph-with-multiple-latex.org b/doerg/test/net/deertopia/doerg/element_test/paragraph-with-multiple-latex.org new file mode 100644 index 0000000..99b96bc --- /dev/null +++ b/doerg/test/net/deertopia/doerg/element_test/paragraph-with-multiple-latex.org @@ -0,0 +1,24 @@ +#+title: paragraph with multiple latex environments + +* interleaved + +first part of paragraph +\begin{equation*} +\text{first \LaTeX\ environment} +\end{equation*} +second part of paragraph +\begin{equation*} +\text{second \LaTeX\ environment} +\end{equation*} + +* fenceposted + +first fencepost +\begin{equation*} +\text{first fenceposted \LaTeX\ environment} +\end{equation*} +second fencepost +\begin{equation*} +\text{second fenceposted \LaTeX\ environment} +\end{equation*} +third fencepost