forked from GitHub/comp-syntax-gu-mlt
hangul syllables
This commit is contained in:
44
scripts/syllables-to-jamo
Executable file
44
scripts/syllables-to-jamo
Executable file
@@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env bb
|
||||
|
||||
(require '[babashka.fs :as fs]
|
||||
'[clojure.string :as str])
|
||||
|
||||
(defn syllables->jamo [s]
|
||||
"Convert Hangul syllables in string S to their jamo components."
|
||||
(->> s
|
||||
(map int)
|
||||
(mapcat
|
||||
(fn [c]
|
||||
(if (<= 0xAC00 c 0xD7A3)
|
||||
;; Hangul syllable - decompose
|
||||
(let [code (- c 0xAC00)
|
||||
lead (quot code (* 21 28))
|
||||
medial (quot (mod code (* 21 28)) 28)
|
||||
final (mod code 28)
|
||||
lead-jamo (+ 0x1100 lead)
|
||||
medial-jamo (+ 0x1161 medial)
|
||||
final-jamo (if (> final 0) (+ 0x11A7 final) nil)]
|
||||
(remove nil? [lead-jamo medial-jamo final-jamo]))
|
||||
;; Not a Hangul syllable
|
||||
[c])))
|
||||
(map char)
|
||||
(apply str)))
|
||||
|
||||
|
||||
(defn -main [& args]
|
||||
(if (seq args)
|
||||
(doseq [f args]
|
||||
(let [x (-> (slurp f)
|
||||
(str/replace #"\p{block=HangulSyllables}+"
|
||||
syllables->jamo))]
|
||||
(spit f x)))
|
||||
(loop [line (read-line)]
|
||||
(when line
|
||||
(-> line
|
||||
(str/replace #"\p{block=HangulSyllables}+"
|
||||
syllables->jamo)
|
||||
println)
|
||||
(recur (read-line))))))
|
||||
|
||||
(when (= *file* (System/getProperty "babashka.file"))
|
||||
(apply -main *command-line-args*))
|
||||
Reference in New Issue
Block a user