Files
net-deertopia/doerg/src/net/deertopia/doerg/element.clj

213 lines
6.8 KiB
Clojure
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
(ns net.deertopia.doerg.element
(:require [babashka.process :as p]
[net.deertopia.doerg.common :as common]
[clojure.string :as str]
[clojure.zip]
[babashka.fs :as fs]
[clojure.java.io :as io]
[cheshire.core :as json]
[clojure.spec.alpha :as s]
[spec-dict.main :refer [dict]]
[net.deertopia.doerg.config :as cfg]
[com.rpl.specter :as sp]
[clojure.tools.logging.readable :as lr])
(:import (java.util UUID))
(:refer-clojure :exclude [read-string]))
(defonce ^:private uniorg-script-path-atom (atom nil))
(def ^:dynamic *uniorg-timeout-duration*
"Number of milliseconds to wait before killing the external Uniorg
process."
(* 10 1000))
(defn- camel->kebab [s]
(->> (str/split s #"(?<=[a-z])(?=[A-Z])")
(map str/lower-case)
(str/join "-")))
(defn uniorg [& {:keys [in]
:or {in *in*}}]
(let [r (-> (p/process
{:in in :out :string}
"doerg-parser")
(common/deref-with-timeout *uniorg-timeout-duration*))]
(if (zero? (:exit r))
(-> r :out (json/parse-string (comp keyword camel->kebab))))))
(declare gather-first-section)
(defn read-string [s & {:keys [post-processors]
:or {post-processors [gather-first-section]}}]
(let [apply-post-processors (apply comp (reverse post-processors))]
(with-in-str s
(-> (uniorg :in *in*)
apply-post-processors))))
(defn greater-element?
"Return truthy if `e` is a greater org-element; i.e. one that can
have children."
[e]
;; Not 100% sure if this is a valid definition. It seems that
;; Uniorg sets `:children` to an empty vector when a great element
;; lacks children.
(and (map? e) (contains? e :children)))
(defn org-element? [element]
#_
(s/valid? ::org-element element)
(and (map? element)
(contains? element :type)))
(defn of-type?
"Return truthy if the Org node `element` is of type `type`. In the
vararg case, return truthy if `element` is of any of the types
listed."
([element type]
(= (:type element) type))
([element type & types]
(contains? (into #{} (cons type types))
(:type element))))
(defn of-keyword-type? [element key]
(and (of-type? element "keyword")
(= (:key element) key)))
(defn footnotes-section? [element]
(and (of-type? element "section")
(when-some [footnotes-headline (first (:children element))]
(= "Footnotes" (:raw-value footnotes-headline)))))
(defn display-math?
"Return truthy if `element` should be considered display math."
[element]
(or (of-type? element "latex-environment")
(and (of-type? element "latex-fragment")
(-> element :contents (str/starts-with? "\\[")))))
;;; Spec
(s/def ::org-element
(dict {:type string?}
^:opt {:contents-begin nat-int?
:contents-end nat-int?
:children (s/coll-of ::org-element
:kind seq?)}))
;;; Zipper
(defn doerg-zip [document]
(clojure.zip/zipper greater-element?
:children
#(assoc %1 :children %2)
document))
(def children-walker
"Walk each child of an Org element."
[:children sp/ALL])
(def postorder-walker
"Recursively walk each node of an Org element in post-order."
(sp/recursive-path
[] p
(sp/if-path greater-element?
(sp/continue-then-stay children-walker p)
sp/STAY)))
(def postorder-walker*
(sp/recursive-path
[] p
(sp/if-path greater-element?
(sp/continue-then-stay :children p)
sp/STAY)))
;;; Post-processing
(def property-handlers
"A map of node-property keys to functions. The functions will be
called with two arguments: the pre-existing top-level document data
(a map), and the node-property value. The function is expected to
return the document data map with the new property merged in."
{"ID" (fn [data id]
(let [new-id (UUID/fromString id)]
(when (contains? data :id)
(lr/warnf (str "Found multiple :ID: definitions."
" Replacing %s with %s.")
(:id data) new-id))
(assoc data :id new-id)))
"DeertopiaVisibility"
(fn [data visibility]
(let [v (case visibility
"public" :public
"private" :private
"graphonly" :graph-only
(do (lr/warn "Unknown visibility: %s" visibility)
:private))]
(assoc data :net.deertopia/visibility v)))})
(def keyword-handlers
"Like `property-handlers`, but for top-level keywords."
{"TITLE" #(assoc %1 :title %2)})
(defn- apply-handlers [handlers values]
(reduce (fn [data {:keys [key value]}]
(let [f (get handlers key)]
(f data value)))
{}
values))
(defn handle-properties [doc]
(let [props (some-> doc :children first)]
(when (of-type? props "property-drawer")
(->> props
(sp/select [children-walker
#(contains? property-handlers (:key %))])
(apply-handlers property-handlers)))))
(defn handle-keywords [doc]
(->> doc
(sp/select [children-walker
#(and (of-type? % "keyword")
(contains? keyword-handlers (:key %)))])
(apply-handlers keyword-handlers)))
(defn gather-doerg-data [doc]
(assoc doc :net.deertopia.doerg/data
(merge (handle-properties doc)
(handle-keywords doc))))
(defn- split-sections
"Given a list of top-level nodes as spat out by the `uniorg`
parser, return a map with the following keys
• :top-level-nodes The nodes that /should/ be at the top-level.
• :first-section-nodes The nodes that should be wrapped in a new
section node.
• :rest Everything else."
[nodes]
(let [[of-top-level remaining-nodes]
(->> nodes (split-with #(of-type? % "property-drawer" "keyword")))
[of-first-section remaining-nodes*]
(->> remaining-nodes (split-with #(not (of-type? % "section"))))]
{:top-level-nodes of-top-level
:first-section-nodes of-first-section
:rest remaining-nodes*}))
(defn gather-first-section [node]
(assert (of-type? node "org-data")
"`gather-doerg-data` should be applied to the document root.")
(let [{:keys [top-level-nodes first-section-nodes rest]}
(split-sections (:children node))
;; TODO: Construct `:contents-begin` and `:contents-end` data
;; by spanning the children.
new-children (concat top-level-nodes
(list {:type "section"
:children first-section-nodes})
rest)]
(assoc node :children new-children)))