diff --git a/flake.nix b/flake.nix index 44758b8..8163c0c 100644 --- a/flake.nix +++ b/flake.nix @@ -33,6 +33,7 @@ inputs.gf.packages.${system}.gf-with-rgl pkgs.graphviz gf-lsp.gf-lsp + pkgs.babashka ]; }; }); diff --git a/lab1/grammar/korean/HangulCoding.gf b/lab1/grammar/korean/HangulCoding.gf new file mode 100644 index 0000000..0835535 --- /dev/null +++ b/lab1/grammar/korean/HangulCoding.gf @@ -0,0 +1,6 @@ +resource HangulCoding = open HangulCodingRes in { + flags coding=utf8 ; + oper + this_uses_syllables : Str = coding "가" ; + this_uses_jamo : Str = coding "가" ; +} diff --git a/lab1/grammar/korean/HangulCodingRes.gf b/lab1/grammar/korean/HangulCodingRes.gf new file mode 100644 index 0000000..38502a8 --- /dev/null +++ b/lab1/grammar/korean/HangulCodingRes.gf @@ -0,0 +1,9 @@ +resource HangulCodingRes = { + flags coding=utf8 ; + oper + coding : Str -> Str + = \s -> case s of { + "가" => "SYLLABLES" ; + "가" => "JAMO" + } ; +} diff --git a/lab1/grammar/korean/HangulJamo.gf b/lab1/grammar/korean/HangulJamo.gf new file mode 100644 index 0000000..38205e0 --- /dev/null +++ b/lab1/grammar/korean/HangulJamo.gf @@ -0,0 +1,58 @@ +resource HangulJamo = open Prelude in { + flags coding=utf8 ; + oper + lemmaToStem : (lemma : Str) -> Str + = \lemma -> case lemma of { + stem + "다" => stem ; + _ => Predef.error ("lemmaToStem was applied to a non-lemma," ++ lemma) + } ; + + infinitive : (stem : Str) -> Str + = \stem -> case stem of { + _ + #a_o + #batchim => stem + "아" ; + _ + (#a | #eo) => stem ; + init + #o => init + "ᅪ" ; + init + #eu => init + "ᅥ" ; + init + #i => init + "ㅕ" ; + init + #u => init + "ᅯ" ; + init + #ha => init + "해" ; + _ => stem + "어" + } ; + + present_haeyo : (stem : Str) -> Str + = \stem -> infinitive stem + "요" ; + + past_haeyo : (stem : Str) -> Str + = \stem -> infinitive stem + "ᆻ어요" ; + + oper + a_o : pattern Str = #("ᅡ" | "ᅩ") ; + a : pattern Str = #"ᅡ" ; + o : pattern Str = #"ᅩ" ; + eo : pattern Str = #"ᅥ" ; + eu : pattern Str = #"ᅳ" ; + i : pattern Str = #"ᅵ" ; + u : pattern Str = #"ᅮ" ; + ha : pattern Str = #"하" ; + + oper + consonant : pattern Str = + #("ᄀ" | "ᄁ" | "ᄂ" | "ᄃ" | "ᄄ" | "ᄅ" | "ᄆ" | "ᄇ" + | "ᄈ" | "ᄉ" | "ᄊ" | "ᄋ" | "ᄌ" | "ᄍ" | "ᄎ" | "ᄏ" + | "ᄐ" | "ᄑ" | "ᄒ" | "ᆨ" | "ᆩ" | "ᆪ" | "ᆫ" | "ᆬ" + | "ᆭ" | "ᆮ" | "ᆯ" | "ᆰ" | "ᆱ" | "ᆲ" | "ᆳ" | "ᆴ" + | "ᆵ" | "ᆶ" | "ᆷ" | "ᆸ" | "ᆹ" | "ᆺ" | "ᆻ" | "ᆼ" + | "ᆽ" | "ᆾ" | "ᆿ" | "ᇀ" | "ᇁ" | "ᇂ") ; + batchim : pattern Str = + #("ᆨ" | "ᆩ" | "ᆪ" | "ᆫ" | "ᆬ" | "ᆭ" | "ᆮ" | "ᆯ" | "ᆰ" + | "ᆱ" | "ᆲ" | "ᆳ" | "ᆴ" | "ᆵ" | "ᆶ" | "ᆷ" | "ᆸ" | "ᆹ" + | "ᆺ" | "ᆻ" | "ᆼ" | "ᆽ" | "ᆾ" | "ᆿ" | "ᇀ" | "ᇁ" | "ᇂ") ; + choseong : pattern Str = + #("ᄀ" | "ᄁ" | "ᄂ" | "ᄃ" | "ᄄ" | "ᄅ" | "ᄆ" | "ᄇ" | "ᄈ" + | "ᄉ" | "ᄊ" | "ᄋ" | "ᄌ" | "ᄍ" | "ᄎ" | "ᄏ" | "ᄐ" | "ᄑ" + | "ᄒ") ; + vowel : pattern Str = + #("ᅡ" | "ᅢ" | "ᅣ" | "ᅤ" | "ᅥ" | "ᅦ" | "ᅧ" | "ᅨ" | "ᅩ" + | "ᅪ" | "ᅫ" | "ᅬ" | "ᅭ" | "ᅮ" | "ᅯ" | "ᅰ" | "ᅱ" | "ᅲ" + | "ᅳ" | "ᅴ" | "ᅵ") ; +} diff --git a/lab1/grammar/korean/MicroLangKor.gf b/lab1/grammar/korean/MicroLangKor.gf index 1357c6a..53cd88a 100644 --- a/lab1/grammar/korean/MicroLangKor.gf +++ b/lab1/grammar/korean/MicroLangKor.gf @@ -1,6 +1,8 @@ --# -path=.:../abstract concrete MicroLangKor of MicroLang = open MicroResKor, Prelude in { + flags coding=utf8 ; + ----------------------------------------------------- ---------------- Grammar part ----------------------- ----------------------------------------------------- @@ -35,101 +37,102 @@ concrete MicroLangKor of MicroLang = open MicroResKor, Prelude in { aPl_Det = {s = []} ; the_Det = {s = []} ; thePl_Det = {s = []} ; - this_Det = {s = "이"} ; - thisPl_Det = {s = "이"} ; - that_Det = {s = "그"} ; - thatPl_Det = {s = "그"} ; + this_Det = {s = "이"} ; + thisPl_Det = {s = "이"} ; + that_Det = {s = "그"} ; + thatPl_Det = {s = "그"} ; ----------------------------------------------------- ---------------- Lexicon part ----------------------- ----------------------------------------------------- --- lin already_Adv = mkAdv "벌써" ; -lin animal_N = mkN "동물" ; -lin apple_N = mkN "사과" ; -lin baby_N = mkN "아기" ; --- lin bad_A = mkA "나쁜" ; -lin beer_N = mkN "beer" ; --- lin big_A = mkA "큰" ; -lin bike_N = mkN "bike" ; -lin bird_N = mkN "bird" ; +-- lin already_Adv = mkAdv "벌써" ; +lin animal_N = mkN "동물" ; +lin apple_N = mkN "사과" ; +lin baby_N = mkN "아기" ; +-- lin bad_A = mkA "나쁜" ; +-- lin beer_N = mkN "beer" ; +-- lin big_A = mkA "큰" ; +-- lin bike_N = mkN "bike" ; +-- lin bird_N = mkN "bird" ; -- lin black_A = mkA "black" ; -lin blood_N = mkN "피" ; +lin blood_N = mkN "피" ; -- lin blue_A = mkA "blue" ; -lin boat_N = mkN "boat" ; -lin book_N = mkN "책" ; -lin boy_N = mkN "소녁" ; -lin bread_N = mkN "bread" ; +-- lin boat_N = mkN "boat" ; +lin book_N = mkN "책" ; +lin boy_N = mkN "소녁" ; +lin bread_N = mkN "빵" ; -- lin break_V2 = mkV2 (mkV "break" "broke" "broken") ; -- lin buy_V2 = mkV2 (mkV "buy" "bought" "bought") ; -lin car_N = mkN "자동차" ; -lin cat_N = mkN "고양이" ; -lin child_N = mkN "어린이" ; -lin city_N = mkN "city" ; --- lin clean_A = mkA "정소한" ; --- lin clever_A = mkA "똑똑한" ; -lin cloud_N = mkN "cloud" ; --- lin cold_A = mkA "차가운" ; -lin come_V = regVerb_a_o "오다" ; -lin computer_N = mkN "컴퓨터" ; -lin cow_N = mkN "cow" ; --- lin dirty_A = mkA "더러운" ; -lin dog_N = mkN "개" ; +lin car_N = mkN "자동차" ; +lin cat_N = mkN "고양이" ; +lin child_N = mkN "어린이" ; +-- lin city_N = mkN "city" ; +-- lin clean_A = mkA "정소한" ; +-- lin clever_A = mkA "똑똑한" ; +-- lin cloud_N = mkN "cloud" ; +-- lin cold_A = mkA "차가운" ; +-- lin come_V = regVerb "오다" ; -- JAMO +lin come_V = regVerb "오다" ; -- SYLLABLES +lin computer_N = mkN "컴퓨터" ; +-- lin cow_N = mkN "cow" ; +-- lin dirty_A = mkA "더러운" ; +lin dog_N = mkN "개" ; -- lin drink_V2 = mkV2 (mkV "drink" "drank" "drunk") ; -- lin eat_V2 = mkV2 (mkV "eat" "ate" "eaten") ; -- lin find_V2 = mkV2 (mkV "find" "found" "found") ; -lin fire_N = mkN "fire" ; -lin fish_N = mkN "생선" ; -lin flower_N = mkN "flower" ; -lin friend_N = mkN "진구" ; -lin girl_N = mkN "소녀" ; --- lin good_A = mkA "좋은" ; +-- lin fire_N = mkN "fire" ; +lin fish_N = mkN "생선" ; +-- lin flower_N = mkN "flower" ; +lin friend_N = mkN "진구" ; +lin girl_N = mkN "소녀" ; +-- lin good_A = mkA "좋은" ; -- lin go_V = mkV "go" "went" "gone" ; -lin grammar_N = mkN "grammar" ; +-- lin grammar_N = mkN "grammar" ; -- lin green_A = mkA "green" ; -- lin heavy_A = mkA "heavy" ; -lin horse_N = mkN "horse" ; +-- lin horse_N = mkN "horse" ; -- lin hot_A = mkA "hot" ; -lin house_N = mkN "집" ; +lin house_N = mkN "집" ; -- lin john_PN = mkPN "John" ; -- lin jump_V = mkV "jump" ; -- lin kill_V2 = mkV2 "kill" ; -- lin know_VS = mkVS (mkV "know" "knew" "known") ; -lin language_N = mkN "언어" ; +lin language_N = mkN "언어" ; -- lin live_V = mkV "live" ; -- lin love_V2 = mkV2 (mkV "love") ; -lin man_N = mkN "남자" ; -lin milk_N = mkN "우유" ; -lin music_N = mkN "음악" ; +lin man_N = mkN "남자" ; +lin milk_N = mkN "우유" ; +lin music_N = mkN "음악" ; -- lin new_A = mkA "new" ; --- lin now_Adv = mkAdv "지금" ; --- lin old_A = mkA "낡안" ; +-- lin now_Adv = mkAdv "지금" ; +-- lin old_A = mkA "낡안" ; -- lin paris_PN = mkPN "Paris" ; --- lin play_V = mkV "놀" ; +-- lin play_V = mkV "놀" ; -- lin read_V2 = mkV2 (mkV "read" "read" "read") ; -- lin ready_A = mkA "ready" ; -- lin red_A = mkA "red" ; lin river_N = mkN "river" ; -- lin run_V = mkV "run" "ran" "run" ; -lin sea_N = mkN "바다" ; +lin sea_N = mkN "바다" ; -- lin see_V2 = mkV2 (mkV "see" "saw" "seen") ; -lin ship_N = mkN "ship" ; -lin sleep_V = regVerb_a_o "자다" ; --- lin small_A = mkA "작은" ; -lin star_N = mkN "별" ; +-- lin ship_N = mkN "ship" ; +-- lin sleep_V = regVerb "자다" ; +-- lin small_A = mkA "작은" ; +lin star_N = mkN "별" ; -- lin swim_V = mkV "swim" "swam" "swum" ; -- lin teach_V2 = mkV2 (mkV "teach" "taught" "taught") ; -lin train_N = mkN "train" ; +-- lin train_N = mkN "train" ; -- lin travel_V = mkV "travel" ; -lin tree_N = mkN "tree" ; +-- lin tree_N = mkN "tree" ; -- lin understand_V2 = mkV2 (mkV "understand" "understood" "understood") ; -- lin wait_V2 = mkV2 "wait" "for" ; -- lin walk_V = mkV "walk" ; --- lin warm_A = mkA "따뜻한" ; -lin water_N = mkN "물" ; --- lin white_A = mkA "하얗은" ; -lin wine_N = mkN "wine" ; -lin woman_N = mkN "여자" ; +-- lin warm_A = mkA "따뜻한" ; +lin water_N = mkN "물" ; +-- lin white_A = mkA "하얗은" ; +-- lin wine_N = mkN "wine" ; +lin woman_N = mkN "여자" ; -- lin yellow_A = mkA "yellow" ; -- lin young_A = mkA "young" ; diff --git a/lab1/grammar/korean/MicroResKor.gf b/lab1/grammar/korean/MicroResKor.gf index fae4bed..0f6cf98 100644 --- a/lab1/grammar/korean/MicroResKor.gf +++ b/lab1/grammar/korean/MicroResKor.gf @@ -1,4 +1,4 @@ -resource MicroResKor = open Prelude, Hangul in { +resource MicroResKor = open Prelude, HangulJamo in { param VForm = Lemma | VPresent | VPast ; @@ -10,35 +10,18 @@ resource MicroResKor = open Prelude, Hangul in { lemmaToStem : (lemma : Str) -> Str = \lemma -> case lemma of { - stem + "다" => stem ; + stem + "다" => stem ; _ => Predef.error ("lemmaToStem was applied to a non-lemma," ++ lemma) } ; - regVerb_eo : (lemma : Str) -> Verb + regVerb : (lemma : Str) -> Verb = \lemma -> let stem = lemmaToStem lemma ; in { s = table { Lemma => lemma ; - VPresent => stem + "어요" ; - VPast => stem + "었어요" + VPresent => present_haeyo stem ; + VPast => past_haeyo stem } } ; - - regVerb_a_o : (lemma : Str) -> Verb - = \lemma -> - let stem = lemmaToStem lemma ; - in { - s = table { - Lemma => lemma ; - VPresent => stem + "아요" ; - VPast => stem + "았어요" - } - } ; - - blah : Str -> Str - = \s -> case s of { - _ + "ᅡ" => "아-final" ; - _ => "not 아-final" - } ; } diff --git a/scripts/search-hangul b/scripts/search-hangul new file mode 100755 index 0000000..66e732f --- /dev/null +++ b/scripts/search-hangul @@ -0,0 +1,12 @@ +#!/usr/bin/env bb + +(require '[babashka.fs :as fs] + '[clojure.string :as str]) + +(doseq [f (fs/glob "lab1/grammar/korean" "**.gf") + [line-number line] (map-indexed (fn [i x] [i x]) + (-> f str slurp str/split-lines))] + (when (re-find #"\p{block=HangulJamo}" line) + (printf "JAMO: %s:%d: %s\n" (str f) line-number line)) + (when (re-find #"\p{block=HangulSyllables}" line) + (printf "SYLLABLE: %s:%d: %s\n" (str f) line-number line))) diff --git a/scripts/syllables-to-jamo b/scripts/syllables-to-jamo new file mode 100755 index 0000000..071842b --- /dev/null +++ b/scripts/syllables-to-jamo @@ -0,0 +1,44 @@ +#!/usr/bin/env bb + +(require '[babashka.fs :as fs] + '[clojure.string :as str]) + +(defn syllables->jamo [s] + "Convert Hangul syllables in string S to their jamo components." + (->> s + (map int) + (mapcat + (fn [c] + (if (<= 0xAC00 c 0xD7A3) + ;; Hangul syllable - decompose + (let [code (- c 0xAC00) + lead (quot code (* 21 28)) + medial (quot (mod code (* 21 28)) 28) + final (mod code 28) + lead-jamo (+ 0x1100 lead) + medial-jamo (+ 0x1161 medial) + final-jamo (if (> final 0) (+ 0x11A7 final) nil)] + (remove nil? [lead-jamo medial-jamo final-jamo])) + ;; Not a Hangul syllable + [c]))) + (map char) + (apply str))) + + +(defn -main [& args] + (if (seq args) + (doseq [f args] + (let [x (-> (slurp f) + (str/replace #"\p{block=HangulSyllables}+" + syllables->jamo))] + (spit f x))) + (loop [line (read-line)] + (when line + (-> line + (str/replace #"\p{block=HangulSyllables}+" + syllables->jamo) + println) + (recur (read-line)))))) + +(when (= *file* (System/getProperty "babashka.file")) + (apply -main *command-line-args*))