forked from GitHub/comp-syntax-gu-mlt
hangul syllables
This commit is contained in:
@@ -33,6 +33,7 @@
|
|||||||
inputs.gf.packages.${system}.gf-with-rgl
|
inputs.gf.packages.${system}.gf-with-rgl
|
||||||
pkgs.graphviz
|
pkgs.graphviz
|
||||||
gf-lsp.gf-lsp
|
gf-lsp.gf-lsp
|
||||||
|
pkgs.babashka
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|||||||
6
lab1/grammar/korean/HangulCoding.gf
Normal file
6
lab1/grammar/korean/HangulCoding.gf
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
resource HangulCoding = open HangulCodingRes in {
|
||||||
|
flags coding=utf8 ;
|
||||||
|
oper
|
||||||
|
this_uses_syllables : Str = coding "가" ;
|
||||||
|
this_uses_jamo : Str = coding "가" ;
|
||||||
|
}
|
||||||
9
lab1/grammar/korean/HangulCodingRes.gf
Normal file
9
lab1/grammar/korean/HangulCodingRes.gf
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
resource HangulCodingRes = {
|
||||||
|
flags coding=utf8 ;
|
||||||
|
oper
|
||||||
|
coding : Str -> Str
|
||||||
|
= \s -> case s of {
|
||||||
|
"가" => "SYLLABLES" ;
|
||||||
|
"가" => "JAMO"
|
||||||
|
} ;
|
||||||
|
}
|
||||||
58
lab1/grammar/korean/HangulJamo.gf
Normal file
58
lab1/grammar/korean/HangulJamo.gf
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
resource HangulJamo = open Prelude in {
|
||||||
|
flags coding=utf8 ;
|
||||||
|
oper
|
||||||
|
lemmaToStem : (lemma : Str) -> Str
|
||||||
|
= \lemma -> case lemma of {
|
||||||
|
stem + "다" => stem ;
|
||||||
|
_ => Predef.error ("lemmaToStem was applied to a non-lemma," ++ lemma)
|
||||||
|
} ;
|
||||||
|
|
||||||
|
infinitive : (stem : Str) -> Str
|
||||||
|
= \stem -> case stem of {
|
||||||
|
_ + #a_o + #batchim => stem + "아" ;
|
||||||
|
_ + (#a | #eo) => stem ;
|
||||||
|
init + #o => init + "ᅪ" ;
|
||||||
|
init + #eu => init + "ᅥ" ;
|
||||||
|
init + #i => init + "ㅕ" ;
|
||||||
|
init + #u => init + "ᅯ" ;
|
||||||
|
init + #ha => init + "해" ;
|
||||||
|
_ => stem + "어"
|
||||||
|
} ;
|
||||||
|
|
||||||
|
present_haeyo : (stem : Str) -> Str
|
||||||
|
= \stem -> infinitive stem + "요" ;
|
||||||
|
|
||||||
|
past_haeyo : (stem : Str) -> Str
|
||||||
|
= \stem -> infinitive stem + "ᆻ어요" ;
|
||||||
|
|
||||||
|
oper
|
||||||
|
a_o : pattern Str = #("ᅡ" | "ᅩ") ;
|
||||||
|
a : pattern Str = #"ᅡ" ;
|
||||||
|
o : pattern Str = #"ᅩ" ;
|
||||||
|
eo : pattern Str = #"ᅥ" ;
|
||||||
|
eu : pattern Str = #"ᅳ" ;
|
||||||
|
i : pattern Str = #"ᅵ" ;
|
||||||
|
u : pattern Str = #"ᅮ" ;
|
||||||
|
ha : pattern Str = #"하" ;
|
||||||
|
|
||||||
|
oper
|
||||||
|
consonant : pattern Str =
|
||||||
|
#("ᄀ" | "ᄁ" | "ᄂ" | "ᄃ" | "ᄄ" | "ᄅ" | "ᄆ" | "ᄇ"
|
||||||
|
| "ᄈ" | "ᄉ" | "ᄊ" | "ᄋ" | "ᄌ" | "ᄍ" | "ᄎ" | "ᄏ"
|
||||||
|
| "ᄐ" | "ᄑ" | "ᄒ" | "ᆨ" | "ᆩ" | "ᆪ" | "ᆫ" | "ᆬ"
|
||||||
|
| "ᆭ" | "ᆮ" | "ᆯ" | "ᆰ" | "ᆱ" | "ᆲ" | "ᆳ" | "ᆴ"
|
||||||
|
| "ᆵ" | "ᆶ" | "ᆷ" | "ᆸ" | "ᆹ" | "ᆺ" | "ᆻ" | "ᆼ"
|
||||||
|
| "ᆽ" | "ᆾ" | "ᆿ" | "ᇀ" | "ᇁ" | "ᇂ") ;
|
||||||
|
batchim : pattern Str =
|
||||||
|
#("ᆨ" | "ᆩ" | "ᆪ" | "ᆫ" | "ᆬ" | "ᆭ" | "ᆮ" | "ᆯ" | "ᆰ"
|
||||||
|
| "ᆱ" | "ᆲ" | "ᆳ" | "ᆴ" | "ᆵ" | "ᆶ" | "ᆷ" | "ᆸ" | "ᆹ"
|
||||||
|
| "ᆺ" | "ᆻ" | "ᆼ" | "ᆽ" | "ᆾ" | "ᆿ" | "ᇀ" | "ᇁ" | "ᇂ") ;
|
||||||
|
choseong : pattern Str =
|
||||||
|
#("ᄀ" | "ᄁ" | "ᄂ" | "ᄃ" | "ᄄ" | "ᄅ" | "ᄆ" | "ᄇ" | "ᄈ"
|
||||||
|
| "ᄉ" | "ᄊ" | "ᄋ" | "ᄌ" | "ᄍ" | "ᄎ" | "ᄏ" | "ᄐ" | "ᄑ"
|
||||||
|
| "ᄒ") ;
|
||||||
|
vowel : pattern Str =
|
||||||
|
#("ᅡ" | "ᅢ" | "ᅣ" | "ᅤ" | "ᅥ" | "ᅦ" | "ᅧ" | "ᅨ" | "ᅩ"
|
||||||
|
| "ᅪ" | "ᅫ" | "ᅬ" | "ᅭ" | "ᅮ" | "ᅯ" | "ᅰ" | "ᅱ" | "ᅲ"
|
||||||
|
| "ᅳ" | "ᅴ" | "ᅵ") ;
|
||||||
|
}
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
--# -path=.:../abstract
|
--# -path=.:../abstract
|
||||||
concrete MicroLangKor of MicroLang = open MicroResKor, Prelude in {
|
concrete MicroLangKor of MicroLang = open MicroResKor, Prelude in {
|
||||||
|
|
||||||
|
flags coding=utf8 ;
|
||||||
|
|
||||||
-----------------------------------------------------
|
-----------------------------------------------------
|
||||||
---------------- Grammar part -----------------------
|
---------------- Grammar part -----------------------
|
||||||
-----------------------------------------------------
|
-----------------------------------------------------
|
||||||
@@ -35,101 +37,102 @@ concrete MicroLangKor of MicroLang = open MicroResKor, Prelude in {
|
|||||||
aPl_Det = {s = []} ;
|
aPl_Det = {s = []} ;
|
||||||
the_Det = {s = []} ;
|
the_Det = {s = []} ;
|
||||||
thePl_Det = {s = []} ;
|
thePl_Det = {s = []} ;
|
||||||
this_Det = {s = "이"} ;
|
this_Det = {s = "이"} ;
|
||||||
thisPl_Det = {s = "이"} ;
|
thisPl_Det = {s = "이"} ;
|
||||||
that_Det = {s = "그"} ;
|
that_Det = {s = "그"} ;
|
||||||
thatPl_Det = {s = "그"} ;
|
thatPl_Det = {s = "그"} ;
|
||||||
|
|
||||||
-----------------------------------------------------
|
-----------------------------------------------------
|
||||||
---------------- Lexicon part -----------------------
|
---------------- Lexicon part -----------------------
|
||||||
-----------------------------------------------------
|
-----------------------------------------------------
|
||||||
|
|
||||||
-- lin already_Adv = mkAdv "벌써" ;
|
-- lin already_Adv = mkAdv "벌써" ;
|
||||||
lin animal_N = mkN "동물" ;
|
lin animal_N = mkN "동물" ;
|
||||||
lin apple_N = mkN "사과" ;
|
lin apple_N = mkN "사과" ;
|
||||||
lin baby_N = mkN "아기" ;
|
lin baby_N = mkN "아기" ;
|
||||||
-- lin bad_A = mkA "나쁜" ;
|
-- lin bad_A = mkA "나쁜" ;
|
||||||
lin beer_N = mkN "beer" ;
|
-- lin beer_N = mkN "beer" ;
|
||||||
-- lin big_A = mkA "큰" ;
|
-- lin big_A = mkA "큰" ;
|
||||||
lin bike_N = mkN "bike" ;
|
-- lin bike_N = mkN "bike" ;
|
||||||
lin bird_N = mkN "bird" ;
|
-- lin bird_N = mkN "bird" ;
|
||||||
-- lin black_A = mkA "black" ;
|
-- lin black_A = mkA "black" ;
|
||||||
lin blood_N = mkN "피" ;
|
lin blood_N = mkN "피" ;
|
||||||
-- lin blue_A = mkA "blue" ;
|
-- lin blue_A = mkA "blue" ;
|
||||||
lin boat_N = mkN "boat" ;
|
-- lin boat_N = mkN "boat" ;
|
||||||
lin book_N = mkN "책" ;
|
lin book_N = mkN "책" ;
|
||||||
lin boy_N = mkN "소녁" ;
|
lin boy_N = mkN "소녁" ;
|
||||||
lin bread_N = mkN "bread" ;
|
lin bread_N = mkN "빵" ;
|
||||||
-- lin break_V2 = mkV2 (mkV "break" "broke" "broken") ;
|
-- lin break_V2 = mkV2 (mkV "break" "broke" "broken") ;
|
||||||
-- lin buy_V2 = mkV2 (mkV "buy" "bought" "bought") ;
|
-- lin buy_V2 = mkV2 (mkV "buy" "bought" "bought") ;
|
||||||
lin car_N = mkN "자동차" ;
|
lin car_N = mkN "자동차" ;
|
||||||
lin cat_N = mkN "고양이" ;
|
lin cat_N = mkN "고양이" ;
|
||||||
lin child_N = mkN "어린이" ;
|
lin child_N = mkN "어린이" ;
|
||||||
lin city_N = mkN "city" ;
|
-- lin city_N = mkN "city" ;
|
||||||
-- lin clean_A = mkA "정소한" ;
|
-- lin clean_A = mkA "정소한" ;
|
||||||
-- lin clever_A = mkA "똑똑한" ;
|
-- lin clever_A = mkA "똑똑한" ;
|
||||||
lin cloud_N = mkN "cloud" ;
|
-- lin cloud_N = mkN "cloud" ;
|
||||||
-- lin cold_A = mkA "차가운" ;
|
-- lin cold_A = mkA "차가운" ;
|
||||||
lin come_V = regVerb_a_o "오다" ;
|
-- lin come_V = regVerb "오다" ; -- JAMO
|
||||||
lin computer_N = mkN "컴퓨터" ;
|
lin come_V = regVerb "오다" ; -- SYLLABLES
|
||||||
lin cow_N = mkN "cow" ;
|
lin computer_N = mkN "컴퓨터" ;
|
||||||
-- lin dirty_A = mkA "더러운" ;
|
-- lin cow_N = mkN "cow" ;
|
||||||
lin dog_N = mkN "개" ;
|
-- lin dirty_A = mkA "더러운" ;
|
||||||
|
lin dog_N = mkN "개" ;
|
||||||
-- lin drink_V2 = mkV2 (mkV "drink" "drank" "drunk") ;
|
-- lin drink_V2 = mkV2 (mkV "drink" "drank" "drunk") ;
|
||||||
-- lin eat_V2 = mkV2 (mkV "eat" "ate" "eaten") ;
|
-- lin eat_V2 = mkV2 (mkV "eat" "ate" "eaten") ;
|
||||||
-- lin find_V2 = mkV2 (mkV "find" "found" "found") ;
|
-- lin find_V2 = mkV2 (mkV "find" "found" "found") ;
|
||||||
lin fire_N = mkN "fire" ;
|
-- lin fire_N = mkN "fire" ;
|
||||||
lin fish_N = mkN "생선" ;
|
lin fish_N = mkN "생선" ;
|
||||||
lin flower_N = mkN "flower" ;
|
-- lin flower_N = mkN "flower" ;
|
||||||
lin friend_N = mkN "진구" ;
|
lin friend_N = mkN "진구" ;
|
||||||
lin girl_N = mkN "소녀" ;
|
lin girl_N = mkN "소녀" ;
|
||||||
-- lin good_A = mkA "좋은" ;
|
-- lin good_A = mkA "좋은" ;
|
||||||
-- lin go_V = mkV "go" "went" "gone" ;
|
-- lin go_V = mkV "go" "went" "gone" ;
|
||||||
lin grammar_N = mkN "grammar" ;
|
-- lin grammar_N = mkN "grammar" ;
|
||||||
-- lin green_A = mkA "green" ;
|
-- lin green_A = mkA "green" ;
|
||||||
-- lin heavy_A = mkA "heavy" ;
|
-- lin heavy_A = mkA "heavy" ;
|
||||||
lin horse_N = mkN "horse" ;
|
-- lin horse_N = mkN "horse" ;
|
||||||
-- lin hot_A = mkA "hot" ;
|
-- lin hot_A = mkA "hot" ;
|
||||||
lin house_N = mkN "집" ;
|
lin house_N = mkN "집" ;
|
||||||
-- lin john_PN = mkPN "John" ;
|
-- lin john_PN = mkPN "John" ;
|
||||||
-- lin jump_V = mkV "jump" ;
|
-- lin jump_V = mkV "jump" ;
|
||||||
-- lin kill_V2 = mkV2 "kill" ;
|
-- lin kill_V2 = mkV2 "kill" ;
|
||||||
-- lin know_VS = mkVS (mkV "know" "knew" "known") ;
|
-- lin know_VS = mkVS (mkV "know" "knew" "known") ;
|
||||||
lin language_N = mkN "언어" ;
|
lin language_N = mkN "언어" ;
|
||||||
-- lin live_V = mkV "live" ;
|
-- lin live_V = mkV "live" ;
|
||||||
-- lin love_V2 = mkV2 (mkV "love") ;
|
-- lin love_V2 = mkV2 (mkV "love") ;
|
||||||
lin man_N = mkN "남자" ;
|
lin man_N = mkN "남자" ;
|
||||||
lin milk_N = mkN "우유" ;
|
lin milk_N = mkN "우유" ;
|
||||||
lin music_N = mkN "음악" ;
|
lin music_N = mkN "음악" ;
|
||||||
-- lin new_A = mkA "new" ;
|
-- lin new_A = mkA "new" ;
|
||||||
-- lin now_Adv = mkAdv "지금" ;
|
-- lin now_Adv = mkAdv "지금" ;
|
||||||
-- lin old_A = mkA "낡안" ;
|
-- lin old_A = mkA "낡안" ;
|
||||||
-- lin paris_PN = mkPN "Paris" ;
|
-- lin paris_PN = mkPN "Paris" ;
|
||||||
-- lin play_V = mkV "놀" ;
|
-- lin play_V = mkV "놀" ;
|
||||||
-- lin read_V2 = mkV2 (mkV "read" "read" "read") ;
|
-- lin read_V2 = mkV2 (mkV "read" "read" "read") ;
|
||||||
-- lin ready_A = mkA "ready" ;
|
-- lin ready_A = mkA "ready" ;
|
||||||
-- lin red_A = mkA "red" ;
|
-- lin red_A = mkA "red" ;
|
||||||
lin river_N = mkN "river" ;
|
lin river_N = mkN "river" ;
|
||||||
-- lin run_V = mkV "run" "ran" "run" ;
|
-- lin run_V = mkV "run" "ran" "run" ;
|
||||||
lin sea_N = mkN "바다" ;
|
lin sea_N = mkN "바다" ;
|
||||||
-- lin see_V2 = mkV2 (mkV "see" "saw" "seen") ;
|
-- lin see_V2 = mkV2 (mkV "see" "saw" "seen") ;
|
||||||
lin ship_N = mkN "ship" ;
|
-- lin ship_N = mkN "ship" ;
|
||||||
lin sleep_V = regVerb_a_o "자다" ;
|
-- lin sleep_V = regVerb "자다" ;
|
||||||
-- lin small_A = mkA "작은" ;
|
-- lin small_A = mkA "작은" ;
|
||||||
lin star_N = mkN "별" ;
|
lin star_N = mkN "별" ;
|
||||||
-- lin swim_V = mkV "swim" "swam" "swum" ;
|
-- lin swim_V = mkV "swim" "swam" "swum" ;
|
||||||
-- lin teach_V2 = mkV2 (mkV "teach" "taught" "taught") ;
|
-- lin teach_V2 = mkV2 (mkV "teach" "taught" "taught") ;
|
||||||
lin train_N = mkN "train" ;
|
-- lin train_N = mkN "train" ;
|
||||||
-- lin travel_V = mkV "travel" ;
|
-- lin travel_V = mkV "travel" ;
|
||||||
lin tree_N = mkN "tree" ;
|
-- lin tree_N = mkN "tree" ;
|
||||||
-- lin understand_V2 = mkV2 (mkV "understand" "understood" "understood") ;
|
-- lin understand_V2 = mkV2 (mkV "understand" "understood" "understood") ;
|
||||||
-- lin wait_V2 = mkV2 "wait" "for" ;
|
-- lin wait_V2 = mkV2 "wait" "for" ;
|
||||||
-- lin walk_V = mkV "walk" ;
|
-- lin walk_V = mkV "walk" ;
|
||||||
-- lin warm_A = mkA "따뜻한" ;
|
-- lin warm_A = mkA "따뜻한" ;
|
||||||
lin water_N = mkN "물" ;
|
lin water_N = mkN "물" ;
|
||||||
-- lin white_A = mkA "하얗은" ;
|
-- lin white_A = mkA "하얗은" ;
|
||||||
lin wine_N = mkN "wine" ;
|
-- lin wine_N = mkN "wine" ;
|
||||||
lin woman_N = mkN "여자" ;
|
lin woman_N = mkN "여자" ;
|
||||||
-- lin yellow_A = mkA "yellow" ;
|
-- lin yellow_A = mkA "yellow" ;
|
||||||
-- lin young_A = mkA "young" ;
|
-- lin young_A = mkA "young" ;
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
resource MicroResKor = open Prelude, Hangul in {
|
resource MicroResKor = open Prelude, HangulJamo in {
|
||||||
param
|
param
|
||||||
VForm = Lemma | VPresent | VPast ;
|
VForm = Lemma | VPresent | VPast ;
|
||||||
|
|
||||||
@@ -10,35 +10,18 @@ resource MicroResKor = open Prelude, Hangul in {
|
|||||||
|
|
||||||
lemmaToStem : (lemma : Str) -> Str
|
lemmaToStem : (lemma : Str) -> Str
|
||||||
= \lemma -> case lemma of {
|
= \lemma -> case lemma of {
|
||||||
stem + "다" => stem ;
|
stem + "다" => stem ;
|
||||||
_ => Predef.error ("lemmaToStem was applied to a non-lemma," ++ lemma)
|
_ => Predef.error ("lemmaToStem was applied to a non-lemma," ++ lemma)
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
regVerb_eo : (lemma : Str) -> Verb
|
regVerb : (lemma : Str) -> Verb
|
||||||
= \lemma ->
|
= \lemma ->
|
||||||
let stem = lemmaToStem lemma ;
|
let stem = lemmaToStem lemma ;
|
||||||
in {
|
in {
|
||||||
s = table {
|
s = table {
|
||||||
Lemma => lemma ;
|
Lemma => lemma ;
|
||||||
VPresent => stem + "어요" ;
|
VPresent => present_haeyo stem ;
|
||||||
VPast => stem + "었어요"
|
VPast => past_haeyo stem
|
||||||
}
|
}
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
regVerb_a_o : (lemma : Str) -> Verb
|
|
||||||
= \lemma ->
|
|
||||||
let stem = lemmaToStem lemma ;
|
|
||||||
in {
|
|
||||||
s = table {
|
|
||||||
Lemma => lemma ;
|
|
||||||
VPresent => stem + "아요" ;
|
|
||||||
VPast => stem + "았어요"
|
|
||||||
}
|
|
||||||
} ;
|
|
||||||
|
|
||||||
blah : Str -> Str
|
|
||||||
= \s -> case s of {
|
|
||||||
_ + "ᅡ" => "아-final" ;
|
|
||||||
_ => "not 아-final"
|
|
||||||
} ;
|
|
||||||
}
|
}
|
||||||
|
|||||||
12
scripts/search-hangul
Executable file
12
scripts/search-hangul
Executable file
@@ -0,0 +1,12 @@
|
|||||||
|
#!/usr/bin/env bb
|
||||||
|
|
||||||
|
(require '[babashka.fs :as fs]
|
||||||
|
'[clojure.string :as str])
|
||||||
|
|
||||||
|
(doseq [f (fs/glob "lab1/grammar/korean" "**.gf")
|
||||||
|
[line-number line] (map-indexed (fn [i x] [i x])
|
||||||
|
(-> f str slurp str/split-lines))]
|
||||||
|
(when (re-find #"\p{block=HangulJamo}" line)
|
||||||
|
(printf "JAMO: %s:%d: %s\n" (str f) line-number line))
|
||||||
|
(when (re-find #"\p{block=HangulSyllables}" line)
|
||||||
|
(printf "SYLLABLE: %s:%d: %s\n" (str f) line-number line)))
|
||||||
44
scripts/syllables-to-jamo
Executable file
44
scripts/syllables-to-jamo
Executable file
@@ -0,0 +1,44 @@
|
|||||||
|
#!/usr/bin/env bb
|
||||||
|
|
||||||
|
(require '[babashka.fs :as fs]
|
||||||
|
'[clojure.string :as str])
|
||||||
|
|
||||||
|
(defn syllables->jamo [s]
|
||||||
|
"Convert Hangul syllables in string S to their jamo components."
|
||||||
|
(->> s
|
||||||
|
(map int)
|
||||||
|
(mapcat
|
||||||
|
(fn [c]
|
||||||
|
(if (<= 0xAC00 c 0xD7A3)
|
||||||
|
;; Hangul syllable - decompose
|
||||||
|
(let [code (- c 0xAC00)
|
||||||
|
lead (quot code (* 21 28))
|
||||||
|
medial (quot (mod code (* 21 28)) 28)
|
||||||
|
final (mod code 28)
|
||||||
|
lead-jamo (+ 0x1100 lead)
|
||||||
|
medial-jamo (+ 0x1161 medial)
|
||||||
|
final-jamo (if (> final 0) (+ 0x11A7 final) nil)]
|
||||||
|
(remove nil? [lead-jamo medial-jamo final-jamo]))
|
||||||
|
;; Not a Hangul syllable
|
||||||
|
[c])))
|
||||||
|
(map char)
|
||||||
|
(apply str)))
|
||||||
|
|
||||||
|
|
||||||
|
(defn -main [& args]
|
||||||
|
(if (seq args)
|
||||||
|
(doseq [f args]
|
||||||
|
(let [x (-> (slurp f)
|
||||||
|
(str/replace #"\p{block=HangulSyllables}+"
|
||||||
|
syllables->jamo))]
|
||||||
|
(spit f x)))
|
||||||
|
(loop [line (read-line)]
|
||||||
|
(when line
|
||||||
|
(-> line
|
||||||
|
(str/replace #"\p{block=HangulSyllables}+"
|
||||||
|
syllables->jamo)
|
||||||
|
println)
|
||||||
|
(recur (read-line))))))
|
||||||
|
|
||||||
|
(when (= *file* (System/getProperty "babashka.file"))
|
||||||
|
(apply -main *command-line-args*))
|
||||||
Reference in New Issue
Block a user