Compare commits
2 Commits
main
...
96d1766386
| Author | SHA1 | Date | |
|---|---|---|---|
| 96d1766386 | |||
| 59b75c01e8 |
@@ -9,7 +9,8 @@
|
|||||||
com.rpl/specter {:mvn/version "1.1.6"}
|
com.rpl/specter {:mvn/version "1.1.6"}
|
||||||
lambdaisland/deep-diff2 {:mvn/version "2.12.219"}
|
lambdaisland/deep-diff2 {:mvn/version "2.12.219"}
|
||||||
mvxcvi/clj-cbor {:mvn/version "1.1.1"}
|
mvxcvi/clj-cbor {:mvn/version "1.1.1"}
|
||||||
ch.qos.logback/logback-classic {:mvn/version "1.1.3"}}
|
ch.qos.logback/logback-classic {:mvn/version "1.1.3"}
|
||||||
|
org.clojure/test.check {:mvn/version "1.1.3"}}
|
||||||
:paths ["src" "resources" "test"]
|
:paths ["src" "resources" "test"]
|
||||||
:aliases
|
:aliases
|
||||||
{:test {:extra-deps {lambdaisland/kaocha {:mvn/version "1.91.1392"}}
|
{:test {:extra-deps {lambdaisland/kaocha {:mvn/version "1.91.1392"}}
|
||||||
|
|||||||
@@ -2,13 +2,17 @@
|
|||||||
|
|
||||||
const { parse } = require ("uniorg-parse/lib/parser.js");
|
const { parse } = require ("uniorg-parse/lib/parser.js");
|
||||||
|
|
||||||
|
const opts = {
|
||||||
|
trackPosition: true
|
||||||
|
}
|
||||||
|
|
||||||
async function main () {
|
async function main () {
|
||||||
const chunks = []
|
const chunks = []
|
||||||
for await (const chunk of process.stdin) {
|
for await (const chunk of process.stdin) {
|
||||||
chunks.push (chunk)
|
chunks.push (chunk)
|
||||||
}
|
}
|
||||||
const orgText = Buffer.concat (chunks).toString ("utf8")
|
const orgText = Buffer.concat (chunks).toString ("utf8")
|
||||||
process.stdout.write (JSON.stringify (parse (orgText)))
|
process.stdout.write (JSON.stringify (parse (orgText, opts)))
|
||||||
}
|
}
|
||||||
|
|
||||||
main ()
|
main ()
|
||||||
|
|||||||
@@ -48,6 +48,12 @@
|
|||||||
\newcommand{\optic}[3]{\opticname{#1}^\prime\;#2\;#3}
|
\newcommand{\optic}[3]{\opticname{#1}^\prime\;#2\;#3}
|
||||||
\newcommand{\Optic}[5]{\opticname{#1}\;#2\;#3\;#4\;#5}
|
\newcommand{\Optic}[5]{\opticname{#1}\;#2\;#3\;#4\;#5}
|
||||||
|
|
||||||
|
% Default uses arrow glyphs from the active font, which are kinda ugly in the
|
||||||
|
% case of Plex.
|
||||||
|
\tikzcdset{
|
||||||
|
arrow style=tikz
|
||||||
|
}
|
||||||
|
|
||||||
\begin{document}
|
\begin{document}
|
||||||
\setlength\abovedisplayskip{0pt} % Remove padding before equation environments.
|
\setlength\abovedisplayskip{0pt} % Remove padding before equation environments.
|
||||||
%% \color[rgb]{0.000,0.000,0.004}\special{dvisvgm:currentcolor on}\setcounter{equation}{0}%
|
%% \color[rgb]{0.000,0.000,0.004}\special{dvisvgm:currentcolor on}\setcounter{equation}{0}%
|
||||||
|
|||||||
@@ -1,18 +1,26 @@
|
|||||||
(ns net.deertopia.doerg.element
|
(ns net.deertopia.doerg.element
|
||||||
(:require [babashka.process :as p]
|
(:refer-clojure :exclude [read-string type])
|
||||||
[net.deertopia.doerg.common :as common]
|
(:require
|
||||||
[clojure.string :as str]
|
[babashka.fs :as fs]
|
||||||
[clojure.zip]
|
[babashka.process :as p]
|
||||||
[babashka.fs :as fs]
|
[cheshire.core :as json]
|
||||||
[clojure.java.io :as io]
|
[clojure.core.match :refer [match]]
|
||||||
[cheshire.core :as json]
|
[clojure.java.io :as io]
|
||||||
[clojure.spec.alpha :as s]
|
[clojure.set :as set]
|
||||||
[spec-dict.main :refer [dict]]
|
[clojure.spec.alpha :as s]
|
||||||
[net.deertopia.doerg.config :as cfg]
|
[clojure.string :as str]
|
||||||
[com.rpl.specter :as sp]
|
[clojure.test.check.generators :as gen]
|
||||||
[clojure.tools.logging.readable :as lr])
|
[clojure.tools.logging.readable :as lr]
|
||||||
(:import (java.util UUID))
|
[clojure.zip :as z]
|
||||||
(:refer-clojure :exclude [read-string]))
|
[com.rpl.specter :as sp]
|
||||||
|
[com.rpl.specter.zipper :as sz]
|
||||||
|
[net.deertopia.doerg.common :as common]
|
||||||
|
[net.deertopia.doerg.config :as cfg]
|
||||||
|
[spec-dict.main :refer [dict]]
|
||||||
|
[clojure.tools.logging :as l])
|
||||||
|
(:import
|
||||||
|
(java.util UUID)))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(def ^:dynamic *uniorg-timeout-duration*
|
(def ^:dynamic *uniorg-timeout-duration*
|
||||||
@@ -34,14 +42,16 @@
|
|||||||
(if (zero? (:exit r))
|
(if (zero? (:exit r))
|
||||||
(-> r :out (json/parse-string (comp keyword camel->kebab))))))
|
(-> r :out (json/parse-string (comp keyword camel->kebab))))))
|
||||||
|
|
||||||
(declare gather-first-section)
|
(declare gather-first-section gather-latex-paragraphs element-types)
|
||||||
|
|
||||||
(defn read-string [s & {:keys [post-processors]
|
(defn read-string
|
||||||
:or {post-processors [gather-first-section]}}]
|
[s & {:keys [post-processors]
|
||||||
|
:or {post-processors [gather-first-section
|
||||||
|
gather-latex-paragraphs]}}]
|
||||||
(let [apply-post-processors (apply comp (reverse post-processors))]
|
(let [apply-post-processors (apply comp (reverse post-processors))]
|
||||||
(with-in-str s
|
(with-in-str s
|
||||||
(-> (uniorg :in *in*)
|
(-> (uniorg :in *in*)
|
||||||
apply-post-processors))))
|
apply-post-processors))))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -60,6 +70,9 @@
|
|||||||
(and (map? element)
|
(and (map? element)
|
||||||
(contains? element :type)))
|
(contains? element :type)))
|
||||||
|
|
||||||
|
(defn type [element]
|
||||||
|
(:type element))
|
||||||
|
|
||||||
(defn of-type?
|
(defn of-type?
|
||||||
"Return truthy if the Org node `element` is of type `type`. In the
|
"Return truthy if the Org node `element` is of type `type`. In the
|
||||||
vararg case, return truthy if `element` is of any of the types
|
vararg case, return truthy if `element` is of any of the types
|
||||||
@@ -196,6 +209,22 @@
|
|||||||
:first-section-nodes of-first-section
|
:first-section-nodes of-first-section
|
||||||
:rest remaining-nodes*}))
|
:rest remaining-nodes*}))
|
||||||
|
|
||||||
|
(defn- element-bounds [& nodes]
|
||||||
|
(reduce (fn [acc {:keys [contents-begin contents-end]}]
|
||||||
|
(if (and (nat-int? contents-begin)
|
||||||
|
(nat-int? contents-end))
|
||||||
|
(-> acc
|
||||||
|
(update
|
||||||
|
:contents-begin
|
||||||
|
#(min (or % Integer/MAX_VALUE) contents-begin))
|
||||||
|
(update
|
||||||
|
:contents-end
|
||||||
|
#(max (or % Integer/MIN_VALUE) contents-end)))
|
||||||
|
acc))
|
||||||
|
{:contents-begin nil
|
||||||
|
:contents-end nil}
|
||||||
|
nodes))
|
||||||
|
|
||||||
(defn gather-first-section [node]
|
(defn gather-first-section [node]
|
||||||
(assert (of-type? node "org-data")
|
(assert (of-type? node "org-data")
|
||||||
"`gather-doerg-data` should be applied to the document root.")
|
"`gather-doerg-data` should be applied to the document root.")
|
||||||
@@ -203,8 +232,193 @@
|
|||||||
(split-sections (:children node))
|
(split-sections (:children node))
|
||||||
;; TODO: Construct `:contents-begin` and `:contents-end` data
|
;; TODO: Construct `:contents-begin` and `:contents-end` data
|
||||||
;; by spanning the children.
|
;; by spanning the children.
|
||||||
|
first-section (merge {:type "section"
|
||||||
|
:children first-section-nodes}
|
||||||
|
(apply element-bounds first-section-nodes))
|
||||||
new-children (concat top-level-nodes
|
new-children (concat top-level-nodes
|
||||||
(list {:type "section"
|
(list first-section)
|
||||||
:children first-section-nodes})
|
|
||||||
rest)]
|
rest)]
|
||||||
(assoc node :children new-children)))
|
(assoc node :children new-children)))
|
||||||
|
|
||||||
|
(defn separated-by-explicit-paragraph-break?
|
||||||
|
"Returh truthy if each successive pair of elements is separated by
|
||||||
|
at least one explicit paragraph break; i.e. a blank line."
|
||||||
|
[& elements]
|
||||||
|
(match elements
|
||||||
|
[e₁ e₂ & es]
|
||||||
|
(and (< (-> e₁ :position :end :line)
|
||||||
|
(-> e₂ :position :start :line))
|
||||||
|
(recur es))
|
||||||
|
:else true))
|
||||||
|
|
||||||
|
(defn swallow
|
||||||
|
([predator prey]
|
||||||
|
(assert (greater-element? predator))
|
||||||
|
(-> predator
|
||||||
|
(update :children #(conj % prey))
|
||||||
|
(assoc-in [:position :end] (-> prey :position :end))))
|
||||||
|
([predator prey & more-prey]
|
||||||
|
(reduce swallow predator (cons prey more-prey))))
|
||||||
|
|
||||||
|
(defn gather-latex-paragraphs [node]
|
||||||
|
(->> node
|
||||||
|
(sp/transform
|
||||||
|
[postorder-walker (sp/must :children)]
|
||||||
|
(fn [children]
|
||||||
|
(loop [acc []
|
||||||
|
cs (vec children)]
|
||||||
|
(match cs
|
||||||
|
;; CASE: A paragraph followed by a LaTeX environment
|
||||||
|
;; followed by a paragraph. If there are no blank lines
|
||||||
|
;; separating the three elements, absorb them into a
|
||||||
|
;; single paragraph spanning the sum of their parts.
|
||||||
|
([(para₁ :guard #(of-type? % "paragraph"))
|
||||||
|
(tex :guard #(of-type? % "latex-environment"))
|
||||||
|
(para₂ :guard #(of-type? % "paragraph"))
|
||||||
|
& rest]
|
||||||
|
:guard #(apply separated-by-explicit-paragraph-break? %))
|
||||||
|
(recur (conj acc
|
||||||
|
;; Swallow para₂'s /children/,
|
||||||
|
;; not para₂ itself. Nested
|
||||||
|
;; paragraphs are not supported
|
||||||
|
;; by HTML.
|
||||||
|
(apply swallow para₁ tex (:children para₂)))
|
||||||
|
rest)
|
||||||
|
;; CASE: A paragraph followed by a LaTeX environment.
|
||||||
|
;; If there are no blank lines separating the paragraph
|
||||||
|
;; from the LaTeX environment, the LaTeX environment
|
||||||
|
;; shall become a child of the paragraph.
|
||||||
|
([(para :guard #(of-type? % "paragraph"))
|
||||||
|
(tex :guard #(of-type? % "latex-environment"))
|
||||||
|
& rest]
|
||||||
|
:guard #(apply separated-by-explicit-paragraph-break? %))
|
||||||
|
(recur (conj acc (swallow para tex)) rest)
|
||||||
|
;; CASE: Irrelevant or empty!
|
||||||
|
[c & rest]
|
||||||
|
(recur (conj acc c) rest)
|
||||||
|
[] acc))))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; Specs (top-level)
|
||||||
|
|
||||||
|
;; Data taken from uniorg/index.d.ts
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(defn- typescript-enum->set [s]
|
||||||
|
(as-> s
|
||||||
|
it
|
||||||
|
(str/split it #" \| ")
|
||||||
|
(map camel->kebab it)
|
||||||
|
(into #{} it))))
|
||||||
|
|
||||||
|
(def greater-element-types
|
||||||
|
#{"org-data" "section" "property-drawer" "drawer" "list" "list-item"
|
||||||
|
"quote-block" "verse-block" "center-block" "special-block"
|
||||||
|
"footnote-definition" "table"})
|
||||||
|
|
||||||
|
(def element-types
|
||||||
|
#{"list-item-tag" "src-block" "comment-block" "latex-environment"
|
||||||
|
"keyword" "paragraph" "node-property" "example-block" "clock"
|
||||||
|
"planning" "diary-sexp" "fixed-width" "export-block"
|
||||||
|
"horizontal-rule" "comment" "table-row" "headline"})
|
||||||
|
|
||||||
|
(def recursive-object-types
|
||||||
|
#{"citation" "footnote-reference" "superscript" "table-cell" "link"
|
||||||
|
"italic" "citation-common-prefix" "subscript" "citation-prefix"
|
||||||
|
"citation-common-suffix" "strike-through" "citation-reference"
|
||||||
|
"bold" "underline"})
|
||||||
|
|
||||||
|
(def object-types
|
||||||
|
#{"line-break" "citation-suffix" "statistics-cookie" "timestamp"
|
||||||
|
"text" "verbatim" "citation-key" "export-snippet" "latex-fragment"
|
||||||
|
"entity" "code"})
|
||||||
|
|
||||||
|
(s/def ::greater-element-type greater-element-types)
|
||||||
|
(s/def ::element-type element-types)
|
||||||
|
(s/def ::object-type object-types)
|
||||||
|
(s/def ::recursive-object-type recursive-object-types)
|
||||||
|
|
||||||
|
(s/def ::contents-begin nat-int?)
|
||||||
|
(s/def ::contents-end nat-int?)
|
||||||
|
|
||||||
|
(defmulti node-spec :type)
|
||||||
|
|
||||||
|
(defn- unimplemented-spec [x]
|
||||||
|
(lr/warnf "unimplemented method for %s" (:type x))
|
||||||
|
any?)
|
||||||
|
|
||||||
|
(defmethod node-spec :default [x] (unimplemented-spec x))
|
||||||
|
|
||||||
|
(def ^:private nfe
|
||||||
|
"NFE — “no further expectations.” Used in sub-specs of `::element`
|
||||||
|
et al. for elements with no additional structure beyond that
|
||||||
|
provided by their parents."
|
||||||
|
(s/with-gen (constantly true)
|
||||||
|
(constantly (gen/return {}))))
|
||||||
|
|
||||||
|
(s/def ::object
|
||||||
|
(dict {:type string?}))
|
||||||
|
|
||||||
|
(s/def ::element
|
||||||
|
(dict ^:opt {:contents-begin ::contents-begin
|
||||||
|
:contents-end ::contents-end}
|
||||||
|
{:children (s/coll-of nfe :kind vector?)
|
||||||
|
:type string?}))
|
||||||
|
|
||||||
|
(s/def ::node nil)
|
||||||
|
|
||||||
|
(s/def ::greater-element
|
||||||
|
(dict {:contents-begin ::contents-begin
|
||||||
|
:contents-end ::contents-end
|
||||||
|
:children (s/coll-of ::node :kind vector?)
|
||||||
|
:type string?}))
|
||||||
|
|
||||||
|
(s/def ::recursive-object
|
||||||
|
(dict ^:opt {:contents-begin ::contents-begin
|
||||||
|
:contents-end ::contents-end}
|
||||||
|
{:children (s/coll-of ::node :kind vector?)}))
|
||||||
|
|
||||||
|
(s/def ::node (s/multi-spec node-spec :type))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(use 'net.deertopia.doerg.repl)
|
||||||
|
(def doc (-> some-org-file slurp read-string))
|
||||||
|
(s/explain ::node doc))
|
||||||
|
|
||||||
|
(s/def ::todo-keyword string?)
|
||||||
|
(s/def ::priority string?)
|
||||||
|
(s/def ::commented boolean?)
|
||||||
|
(s/def ::level nat-int?)
|
||||||
|
(s/def ::tags (s/coll-of string? :kind vector?))
|
||||||
|
|
||||||
|
|
||||||
|
;;; Specs (specific objects)
|
||||||
|
|
||||||
|
(def ^:private string-value (dict {:value string?}))
|
||||||
|
|
||||||
|
(defmethod node-spec "text" [_] (s/merge ::object string-value))
|
||||||
|
(defmethod node-spec "verbatim" [_] (s/merge ::object string-value))
|
||||||
|
(defmethod node-spec "code" [_] (s/merge ::object string-value))
|
||||||
|
(defmethod node-spec "bold" [_] ::recursive-object)
|
||||||
|
(defmethod node-spec "italic" [_] ::recursive-object)
|
||||||
|
|
||||||
|
|
||||||
|
;;; Specs (specific elements)
|
||||||
|
|
||||||
|
(defmethod node-spec "headline" [_]
|
||||||
|
(s/merge ::element
|
||||||
|
(dict {:todo-keyword (s/nilable ::todo-keyword)
|
||||||
|
:priority (s/nilable ::priority)
|
||||||
|
:level ::level
|
||||||
|
:commented ::commented
|
||||||
|
:raw-value string?
|
||||||
|
:tags ::tags})))
|
||||||
|
|
||||||
|
|
||||||
|
;;; Specs (specific greater elements)
|
||||||
|
|
||||||
|
(defmethod node-spec "org-data" [_]
|
||||||
|
::greater-element)
|
||||||
|
|
||||||
|
(defmethod node-spec "section" [_]
|
||||||
|
::greater-element)
|
||||||
|
|||||||
@@ -49,3 +49,47 @@
|
|||||||
first-paragraph-belongs-to-first-section?))
|
first-paragraph-belongs-to-first-section?))
|
||||||
(t/is (not (-> (parse-resource "first-paragraph-under-heading.org")
|
(t/is (not (-> (parse-resource "first-paragraph-under-heading.org")
|
||||||
first-paragraph-belongs-to-first-section?)))))
|
first-paragraph-belongs-to-first-section?)))))
|
||||||
|
|
||||||
|
(defn walk-types [type & types]
|
||||||
|
[sut/postorder-walker #(apply sut/of-type? % type types)])
|
||||||
|
|
||||||
|
(defn headline-matches? [re]
|
||||||
|
())
|
||||||
|
|
||||||
|
(defn- paragraph-ends-with-latex? [doc]
|
||||||
|
(let [type (-> (sp/select-first [(walk-types "paragraph")
|
||||||
|
(sp/must :children)
|
||||||
|
sp/LAST]
|
||||||
|
doc)
|
||||||
|
sut/type)]
|
||||||
|
(t/is type "latex-environment")))
|
||||||
|
|
||||||
|
(defn- paragraph-has-latex? [doc]
|
||||||
|
(t/is (sp/select-first [(walk-types "paragraph")
|
||||||
|
(sp/must :children)
|
||||||
|
sp/ALL
|
||||||
|
#(sut/of-type? % "latex-environment")]
|
||||||
|
doc)))
|
||||||
|
|
||||||
|
(defn- paragraph-has-multiple-latex? [doc]
|
||||||
|
(let [paragraphs (sp/select (walk-types "paragraph") doc)]
|
||||||
|
(t/is (= 2 (count paragraphs)))
|
||||||
|
(let [[p₁ p₂] paragraphs]
|
||||||
|
(t/are [p ts] (= ts
|
||||||
|
(sp/select [(sp/must :children)
|
||||||
|
sp/ALL (sp/view sut/type)] p))
|
||||||
|
p₁ ["text" "latex-environment"
|
||||||
|
"text" "latex-environment"]
|
||||||
|
p₂ ["text" "latex-environment"
|
||||||
|
"text" "latex-environment" "text"]))))
|
||||||
|
|
||||||
|
(t/deftest paragraph-separation
|
||||||
|
(t/testing "paragraph ending with latex"
|
||||||
|
(-> (parse-resource "paragraph-ending-with-latex.org")
|
||||||
|
paragraph-ends-with-latex?))
|
||||||
|
(t/testing "paragraph surrounding latex"
|
||||||
|
(-> (parse-resource "paragraph-surrounding-latex.org")
|
||||||
|
paragraph-has-latex?))
|
||||||
|
(t/testing "paragraph with interleaved latex"
|
||||||
|
(-> (parse-resource "paragraph-with-multiple-latex.org")
|
||||||
|
paragraph-has-multiple-latex?)))
|
||||||
|
|||||||
@@ -0,0 +1,7 @@
|
|||||||
|
#+title: paragraph ending with latex
|
||||||
|
|
||||||
|
here is the paragraph,
|
||||||
|
\begin{align*}
|
||||||
|
\text{and here} &
|
||||||
|
\\ & \text{is the \LaTeX}
|
||||||
|
\end{align*}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
#+title: paragraph surrounding latex
|
||||||
|
|
||||||
|
first part of paragraph
|
||||||
|
\begin{equation*}
|
||||||
|
\text{some \LaTeX \}:)}
|
||||||
|
\end{equation*}
|
||||||
|
last part of paragraph
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
#+title: paragraph with multiple latex environments
|
||||||
|
|
||||||
|
* interleaved
|
||||||
|
|
||||||
|
first part of paragraph
|
||||||
|
\begin{equation*}
|
||||||
|
\text{first \LaTeX\ environment}
|
||||||
|
\end{equation*}
|
||||||
|
second part of paragraph
|
||||||
|
\begin{equation*}
|
||||||
|
\text{second \LaTeX\ environment}
|
||||||
|
\end{equation*}
|
||||||
|
|
||||||
|
* fenceposted
|
||||||
|
|
||||||
|
first fencepost
|
||||||
|
\begin{equation*}
|
||||||
|
\text{first fenceposted \LaTeX\ environment}
|
||||||
|
\end{equation*}
|
||||||
|
second fencepost
|
||||||
|
\begin{equation*}
|
||||||
|
\text{second fenceposted \LaTeX\ environment}
|
||||||
|
\end{equation*}
|
||||||
|
third fencepost
|
||||||
Reference in New Issue
Block a user