Compare commits
9 Commits
main
...
2c755422a7
| Author | SHA1 | Date | |
|---|---|---|---|
| 2c755422a7 | |||
| 777968cac5 | |||
| a35453c2be | |||
| e20dcf591d | |||
| cbfa42bc73 | |||
| b0a3895a18 | |||
| 1ff453262d | |||
| d0840233c9 | |||
| 86db8d0fe2 |
@@ -2,13 +2,17 @@
|
|||||||
|
|
||||||
const { parse } = require ("uniorg-parse/lib/parser.js");
|
const { parse } = require ("uniorg-parse/lib/parser.js");
|
||||||
|
|
||||||
|
const opts = {
|
||||||
|
trackPosition: true
|
||||||
|
}
|
||||||
|
|
||||||
async function main () {
|
async function main () {
|
||||||
const chunks = []
|
const chunks = []
|
||||||
for await (const chunk of process.stdin) {
|
for await (const chunk of process.stdin) {
|
||||||
chunks.push (chunk)
|
chunks.push (chunk)
|
||||||
}
|
}
|
||||||
const orgText = Buffer.concat (chunks).toString ("utf8")
|
const orgText = Buffer.concat (chunks).toString ("utf8")
|
||||||
process.stdout.write (JSON.stringify (parse (orgText)))
|
process.stdout.write (JSON.stringify (parse (orgText, opts)))
|
||||||
}
|
}
|
||||||
|
|
||||||
main ()
|
main ()
|
||||||
|
|||||||
@@ -48,6 +48,12 @@
|
|||||||
\newcommand{\optic}[3]{\opticname{#1}^\prime\;#2\;#3}
|
\newcommand{\optic}[3]{\opticname{#1}^\prime\;#2\;#3}
|
||||||
\newcommand{\Optic}[5]{\opticname{#1}\;#2\;#3\;#4\;#5}
|
\newcommand{\Optic}[5]{\opticname{#1}\;#2\;#3\;#4\;#5}
|
||||||
|
|
||||||
|
% Default uses arrow glyphs from the active font, which are kinda ugly in the
|
||||||
|
% case of Plex.
|
||||||
|
\tikzcdset{
|
||||||
|
arrow style=tikz
|
||||||
|
}
|
||||||
|
|
||||||
\begin{document}
|
\begin{document}
|
||||||
\setlength\abovedisplayskip{0pt} % Remove padding before equation environments.
|
\setlength\abovedisplayskip{0pt} % Remove padding before equation environments.
|
||||||
%% \color[rgb]{0.000,0.000,0.004}\special{dvisvgm:currentcolor on}\setcounter{equation}{0}%
|
%% \color[rgb]{0.000,0.000,0.004}\special{dvisvgm:currentcolor on}\setcounter{equation}{0}%
|
||||||
|
|||||||
@@ -10,7 +10,10 @@
|
|||||||
[spec-dict.main :refer [dict]]
|
[spec-dict.main :refer [dict]]
|
||||||
[net.deertopia.doerg.config :as cfg]
|
[net.deertopia.doerg.config :as cfg]
|
||||||
[com.rpl.specter :as sp]
|
[com.rpl.specter :as sp]
|
||||||
[clojure.tools.logging.readable :as lr])
|
[clojure.tools.logging.readable :as lr]
|
||||||
|
[clojure.zip :as z]
|
||||||
|
[com.rpl.specter.zipper :as sz]
|
||||||
|
[clojure.core.match :refer [match]])
|
||||||
(:import (java.util UUID))
|
(:import (java.util UUID))
|
||||||
(:refer-clojure :exclude [read-string]))
|
(:refer-clojure :exclude [read-string]))
|
||||||
|
|
||||||
@@ -34,14 +37,16 @@
|
|||||||
(if (zero? (:exit r))
|
(if (zero? (:exit r))
|
||||||
(-> r :out (json/parse-string (comp keyword camel->kebab))))))
|
(-> r :out (json/parse-string (comp keyword camel->kebab))))))
|
||||||
|
|
||||||
(declare gather-first-section)
|
(declare gather-first-section gather-latex-paragraphs)
|
||||||
|
|
||||||
(defn read-string [s & {:keys [post-processors]
|
(defn read-string
|
||||||
:or {post-processors [gather-first-section]}}]
|
[s & {:keys [post-processors]
|
||||||
|
:or {post-processors [gather-first-section
|
||||||
|
gather-latex-paragraphs]}}]
|
||||||
(let [apply-post-processors (apply comp (reverse post-processors))]
|
(let [apply-post-processors (apply comp (reverse post-processors))]
|
||||||
(with-in-str s
|
(with-in-str s
|
||||||
(-> (uniorg :in *in*)
|
(-> (uniorg :in *in*)
|
||||||
apply-post-processors))))
|
apply-post-processors))))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -208,3 +213,70 @@
|
|||||||
:children first-section-nodes})
|
:children first-section-nodes})
|
||||||
rest)]
|
rest)]
|
||||||
(assoc node :children new-children)))
|
(assoc node :children new-children)))
|
||||||
|
|
||||||
|
(defn separated-by-explicit-paragraph-break?
|
||||||
|
"Returh truthy if each successive pair of elements is separated by
|
||||||
|
at least one explicit paragraph break; i.e. a blank line."
|
||||||
|
[& elements]
|
||||||
|
(match elements
|
||||||
|
[e₁ e₂ & es]
|
||||||
|
(and (< (-> e₁ :position :end :line)
|
||||||
|
(-> e₂ :position :start :line))
|
||||||
|
(recur es))
|
||||||
|
:else true))
|
||||||
|
|
||||||
|
(defn swallow
|
||||||
|
([predator prey]
|
||||||
|
(assert (greater-element? predator))
|
||||||
|
(-> predator
|
||||||
|
(update :children #(conj % prey))
|
||||||
|
(assoc-in [:position :end] (-> prey :position :end))))
|
||||||
|
([predator prey & more-prey]
|
||||||
|
(reduce swallow predator (cons prey more-prey))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(-> [1 2 3 4]
|
||||||
|
(neighbourly-mapcat prn) )
|
||||||
|
(def doc (read-string (slurp some-org-file)))
|
||||||
|
|
||||||
|
(let [r (atom [])
|
||||||
|
blah]
|
||||||
|
@r))
|
||||||
|
|
||||||
|
(defn gather-latex-paragraphs [node]
|
||||||
|
(->> node
|
||||||
|
(sp/transform
|
||||||
|
[postorder-walker (sp/must :children)]
|
||||||
|
(fn [children]
|
||||||
|
(loop [acc []
|
||||||
|
cs (vec children)]
|
||||||
|
(match cs
|
||||||
|
;; CASE: A paragraph followed by a LaTeX environment
|
||||||
|
;; followed by a paragraph. If there are no blank lines
|
||||||
|
;; separating the three elements, absorb them into a
|
||||||
|
;; single paragraph spanning the sum of their parts.
|
||||||
|
([(para₁ :guard #(of-type? % "paragraph"))
|
||||||
|
(tex :guard #(of-type? % "latex-environment"))
|
||||||
|
(para₂ :guard #(of-type? % "paragraph"))
|
||||||
|
& rest]
|
||||||
|
:guard #(apply separated-by-explicit-paragraph-break? %))
|
||||||
|
(recur (conj acc
|
||||||
|
;; Swallow para₂'s /children/,
|
||||||
|
;; not para₂ itself. Nested
|
||||||
|
;; paragraphs are not supported
|
||||||
|
;; by HTML.
|
||||||
|
(apply swallow para₁ tex (:children para₂)))
|
||||||
|
rest)
|
||||||
|
;; CASE: A paragraph followed by a LaTeX environment.
|
||||||
|
;; If there are no blank lines separating the paragraph
|
||||||
|
;; from the LaTeX environment, the LaTeX environment
|
||||||
|
;; shall become a child of the paragraph.
|
||||||
|
([(para :guard #(of-type? % "paragraph"))
|
||||||
|
(tex :guard #(of-type? % "latex-environment"))
|
||||||
|
& rest]
|
||||||
|
:guard #(apply separated-by-explicit-paragraph-break? %))
|
||||||
|
(recur (conj acc (swallow para tex)) rest)
|
||||||
|
;; CASE: Irrelevant or empty!
|
||||||
|
[c & rest]
|
||||||
|
(recur (conj acc c) rest)
|
||||||
|
[] acc))))))
|
||||||
|
|||||||
@@ -49,3 +49,44 @@
|
|||||||
first-paragraph-belongs-to-first-section?))
|
first-paragraph-belongs-to-first-section?))
|
||||||
(t/is (not (-> (parse-resource "first-paragraph-under-heading.org")
|
(t/is (not (-> (parse-resource "first-paragraph-under-heading.org")
|
||||||
first-paragraph-belongs-to-first-section?)))))
|
first-paragraph-belongs-to-first-section?)))))
|
||||||
|
|
||||||
|
(defn walk-types [type & types]
|
||||||
|
[sut/postorder-walker #(apply sut/of-type? % type types)])
|
||||||
|
|
||||||
|
(defn headline-matches? [re]
|
||||||
|
())
|
||||||
|
|
||||||
|
(defn- paragraph-ends-with-latex? [doc]
|
||||||
|
(-> (sp/select-first [(walk-types "paragraph")
|
||||||
|
(sp/must :children)
|
||||||
|
sp/LAST]
|
||||||
|
doc)
|
||||||
|
(sut/of-type? "latex-environment")))
|
||||||
|
|
||||||
|
(defn- paragraph-has-latex? [doc]
|
||||||
|
(sp/select-first [(walk-types "paragraph")
|
||||||
|
(sp/must :children)
|
||||||
|
sp/ALL
|
||||||
|
#(sut/of-type? % "latex-environment")]
|
||||||
|
doc))
|
||||||
|
|
||||||
|
(defn- paragraph-has-multiple-latex? [doc]
|
||||||
|
(let [[interleaved fenceposted]
|
||||||
|
(sp/select [(walk-types "section")
|
||||||
|
(sp/must :children)
|
||||||
|
#(some-> % first (sut/of-type? "headline"))
|
||||||
|
(sp/subselect (sp/view #(drop 1 %))
|
||||||
|
sp/ALL (sp/must :children) sp/ALL
|
||||||
|
(sp/must :type))]
|
||||||
|
doc)]))
|
||||||
|
|
||||||
|
(t/deftest paragraph-separation
|
||||||
|
(t/testing "paragraph ending with latex"
|
||||||
|
(-> (parse-resource "paragraph-ending-with-latex.org")
|
||||||
|
paragraph-ends-with-latex?))
|
||||||
|
(t/testing "paragraph surrounding latex"
|
||||||
|
(-> (parse-resource "paragraph-surrounding-latex.org")
|
||||||
|
paragraph-has-latex?))
|
||||||
|
(t/testing "paragraph with interleaved latex"
|
||||||
|
(let [(parse-resource "paragraph-with-multiple-latex.org")])
|
||||||
|
(t/is (-))))
|
||||||
|
|||||||
@@ -0,0 +1,7 @@
|
|||||||
|
#+title: paragraph ending with latex
|
||||||
|
|
||||||
|
here is the paragraph,
|
||||||
|
\begin{align*}
|
||||||
|
\text{and here} &
|
||||||
|
\\ & \text{is the \LaTeX}
|
||||||
|
\end{align*}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
#+title: paragraph surrounding latex
|
||||||
|
|
||||||
|
first part of paragraph
|
||||||
|
\begin{equation*}
|
||||||
|
\text{some \LaTeX \}:)}
|
||||||
|
\end{equation*}
|
||||||
|
last part of paragraph
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
#+title: paragraph with multiple latex environments
|
||||||
|
|
||||||
|
* interleaved
|
||||||
|
|
||||||
|
first part of paragraph
|
||||||
|
\begin{equation*}
|
||||||
|
\text{first \LaTeX\ environment}
|
||||||
|
\end{equation*}
|
||||||
|
second part of paragraph
|
||||||
|
\begin{equation*}
|
||||||
|
\text{second \LaTeX\ environment}
|
||||||
|
\end{equation*}
|
||||||
|
|
||||||
|
* fenceposted
|
||||||
|
|
||||||
|
first fencepost
|
||||||
|
\begin{equation*}
|
||||||
|
\text{first fenceposted \LaTeX\ environment}
|
||||||
|
\end{equation*}
|
||||||
|
second fencepost
|
||||||
|
\begin{equation*}
|
||||||
|
\text{second fenceposted \LaTeX\ environment}
|
||||||
|
\end{equation*}
|
||||||
|
third fencepost
|
||||||
Reference in New Issue
Block a user