Files
comp-syntax-gu-mlt/scripts/syllables-to-jamo

45 lines
1.3 KiB
Clojure
Executable File

#!/usr/bin/env bb
(require '[babashka.fs :as fs]
'[clojure.string :as str])
(defn syllables->jamo [s]
"Convert Hangul syllables in string S to their jamo components."
(->> s
(map int)
(mapcat
(fn [c]
(if (<= 0xAC00 c 0xD7A3)
;; Hangul syllable - decompose
(let [code (- c 0xAC00)
lead (quot code (* 21 28))
medial (quot (mod code (* 21 28)) 28)
final (mod code 28)
lead-jamo (+ 0x1100 lead)
medial-jamo (+ 0x1161 medial)
final-jamo (if (> final 0) (+ 0x11A7 final) nil)]
(remove nil? [lead-jamo medial-jamo final-jamo]))
;; Not a Hangul syllable
[c])))
(map char)
(apply str)))
(defn -main [& args]
(if (seq args)
(doseq [f args]
(let [x (-> (slurp f)
(str/replace #"\p{block=HangulSyllables}+"
syllables->jamo))]
(spit f x)))
(loop [line (read-line)]
(when line
(-> line
(str/replace #"\p{block=HangulSyllables}+"
syllables->jamo)
println)
(recur (read-line))))))
(when (= *file* (System/getProperty "babashka.file"))
(apply -main *command-line-args*))