diff --git a/doerg/src/net/deertopia/doerg/element.clj b/doerg/src/net/deertopia/doerg/element.clj index 82e6dd8..75e9604 100644 --- a/doerg/src/net/deertopia/doerg/element.clj +++ b/doerg/src/net/deertopia/doerg/element.clj @@ -37,14 +37,16 @@ (if (zero? (:exit r)) (-> r :out (json/parse-string (comp keyword camel->kebab)))))) -(declare gather-first-section) +(declare gather-first-section gather-latex-paragraphs) -(defn read-string [s & {:keys [post-processors] - :or {post-processors [gather-first-section]}}] +(defn read-string + [s & {:keys [post-processors] + :or {post-processors [gather-first-section + gather-latex-paragraphs]}}] (let [apply-post-processors (apply comp (reverse post-processors))] (with-in-str s - (-> (uniorg :in *in*) - apply-post-processors)))) + (-> (uniorg :in *in*) + apply-post-processors)))) @@ -212,25 +214,6 @@ rest)] (assoc node :children new-children))) -(defn- neighbourly-mapcat [coll f] - (let [rest-coll (rest coll)] - (map f - coll - rest-coll - (concat (rest rest-coll) [nil])))) - -(defn- =>> [wa-loc wa-loc->b] - (if-some [loc₀ (z/down wa-loc)] - (loop [loc-read loc₀ - loc-write loc₀] - (let [loc-write' (z/replace loc-write (wa-loc->b loc-read)) - next-loc-read (z/next loc-read) - next-loc-write (z/next loc-write')] - (if (z/end? next-loc-read) - (z/up loc-write') - (recur next-loc-read next-loc-write)))) - wa-loc)) - (defn separated-by-explicit-paragraph-break? "Returh truthy if each successive pair of elements is separated by at least one explicit paragraph break; i.e. a blank line." @@ -257,30 +240,43 @@ (def doc (read-string (slurp some-org-file))) (let [r (atom []) - blah - (sp/transform - [postorder-walker - (sp/must :children)] - (fn [children] - (loop [acc [] - cs (vec children)] - (match cs - ([(para₁ :guard #(of-type? % "paragraph")) - (tex :guard #(of-type? % "latex-environment")) - (para₂ :guard #(of-type? % "paragraph")) - & rest] - :guard #(apply separated-by-explicit-paragraph-break? %)) - (recur (conj acc (swallow para₁ tex para₂)) rest) - ([(para :guard #(of-type? % "paragraph")) - (tex :guard #(of-type? % "latex-environment")) - & rest] - :guard #(apply separated-by-explicit-paragraph-break? %)) - (recur (conj acc (swallow para tex)) rest) - [c & rest] - (recur (conj acc c) rest) - [] acc))) - doc)] + blah] @r)) (defn gather-latex-paragraphs [node] - ()) + (->> node + (sp/transform + [postorder-walker (sp/must :children)] + (fn [children] + (loop [acc [] + cs (vec children)] + (match cs + ;; CASE: A paragraph followed by a LaTeX environment + ;; followed by a paragraph. If there are no blank lines + ;; separating the three elements, absorb them into a + ;; single paragraph spanning the sum of their parts. + ([(para₁ :guard #(of-type? % "paragraph")) + (tex :guard #(of-type? % "latex-environment")) + (para₂ :guard #(of-type? % "paragraph")) + & rest] + :guard #(apply separated-by-explicit-paragraph-break? %)) + (recur (conj acc + ;; Swallow para₂'s /children/, + ;; not para₂ itself. Nested + ;; paragraphs are not supported + ;; by HTML. + (apply swallow para₁ tex (:children para₂))) + rest) + ;; CASE: A paragraph followed by a LaTeX environment. + ;; If there are no blank lines separating the paragraph + ;; from the LaTeX environment, the LaTeX environment + ;; shall become a child of the paragraph. + ([(para :guard #(of-type? % "paragraph")) + (tex :guard #(of-type? % "latex-environment")) + & rest] + :guard #(apply separated-by-explicit-paragraph-break? %)) + (recur (conj acc (swallow para tex)) rest) + ;; CASE: Irrelevant or empty! + [c & rest] + (recur (conj acc c) rest) + [] acc)))))) diff --git a/doerg/test/net/deertopia/doerg/element_test.clj b/doerg/test/net/deertopia/doerg/element_test.clj index 3406bb1..a4f732b 100644 --- a/doerg/test/net/deertopia/doerg/element_test.clj +++ b/doerg/test/net/deertopia/doerg/element_test.clj @@ -49,3 +49,26 @@ first-paragraph-belongs-to-first-section?)) (t/is (not (-> (parse-resource "first-paragraph-under-heading.org") first-paragraph-belongs-to-first-section?))))) + +(defn- first-paragraph-ends-with-latex? [doc] + (-> (sp/select-first [sut/postorder-walker + #(sut/of-type? % "paragraph") + (sp/must :children) + sp/LAST] + doc) + (sut/of-type? "latex-environment"))) + +(defn- first-paragraph-has-latex? [doc] + (sp/select-first [sut/postorder-walker + #(sut/of-type? % "paragraph") + (sp/must :children) + #(sut/of-type? % "latex-environment")] + doc)) + +(t/deftest paragraph-separation + (t/testing "paragraph ending with latex" + (t/is (->> (parse-resource "paragraph-ending-with-latex.org") + first-paragraph-ends-with-latex?))) + (t/testing "paragraph surrounding latex" + (t/is (->> (parse-resource "paragraph-surrounding-latex.org") + first-paragraph-has-latex?)))) diff --git a/doerg/test/net/deertopia/doerg/element_test/paragraph-ending-with-latex.org b/doerg/test/net/deertopia/doerg/element_test/paragraph-ending-with-latex.org new file mode 100644 index 0000000..8f1eebb --- /dev/null +++ b/doerg/test/net/deertopia/doerg/element_test/paragraph-ending-with-latex.org @@ -0,0 +1,7 @@ +#+title: paragraph ending with latex + +here is the paragraph, +\begin{align*} +\text{and here} & +\\ & \text{is the \LaTeX} +\end{align*} diff --git a/doerg/test/net/deertopia/doerg/element_test/paragraph-surrounding-latex.org b/doerg/test/net/deertopia/doerg/element_test/paragraph-surrounding-latex.org new file mode 100644 index 0000000..cdffa8f --- /dev/null +++ b/doerg/test/net/deertopia/doerg/element_test/paragraph-surrounding-latex.org @@ -0,0 +1,7 @@ +#+title: paragraph surrounding latex + +first part of paragraph +\begin{equation*} +\text{some \LaTeX \}:)} +\end{equation*} +last part of paragraph