Compare commits

...

18 Commits

Author SHA1 Message Date
54c02b4be4 wip: noun morphology 2026-02-15 15:24:44 -07:00
16fb8b3645 fix: make convert-to-jamo hook buffer-local 2026-02-15 15:24:44 -07:00
533d2e7e9f adjectives 2026-02-12 16:19:46 -07:00
8999d3e38c jamo comments 2026-02-12 15:53:06 -07:00
23af7c0ea6 basic korean grammar 2026-02-12 15:35:32 -07:00
000dfc756c cleanup 2026-02-12 14:38:34 -07:00
109fc3547e syllables->jamo on save 2026-02-12 14:38:34 -07:00
fd36692419 hangul syllables 2026-02-12 14:24:49 -07:00
0ba6518630 2026-02-07 21:32:31 -07:00
99a260a729 2026-02-07 19:27:51 -07:00
c481320ed6 2026-02-07 19:26:24 -07:00
f6293e2834 2026-02-07 19:20:34 -07:00
9f59998ef2 2026-02-07 18:57:59 -07:00
8f07d8c73d korean determiners 2026-01-29 09:30:53 -07:00
997b50276f c 2026-01-28 14:19:47 -07:00
f85bb5ef54 b 2026-01-28 14:19:47 -07:00
65a023a1f1 a 2026-01-28 07:42:03 -07:00
51df41ed8d dev shell 2026-01-27 19:15:43 -07:00
13 changed files with 591 additions and 3 deletions

6
.dir-locals.el Normal file
View File

@@ -0,0 +1,6 @@
((nil
. ((eval
. (progn (add-to-list 'load-path (project-root (project-current)))
(require 'helpers)
(add-hook 'before-save-hook
#'hangul-convert-buffer-to-jamo t))))))

1
.envrc Normal file
View File

@@ -0,0 +1 @@
use flake

4
.gitignore vendored
View File

@@ -1 +1,3 @@
*.gfo
*.gfo
.gf-lsp
.direnv

133
flake.lock generated Normal file
View File

@@ -0,0 +1,133 @@
{
"nodes": {
"bnfc": {
"flake": false,
"locked": {
"lastModified": 1694438405,
"narHash": "sha256-UmrJlHrMlB4tOxQEnBA1blh3aUV28dJHFZs5LNUbNLU=",
"owner": "BNFC",
"repo": "bnfc",
"rev": "1ead871febe45b8adecad286a90650414e24d8a4",
"type": "github"
},
"original": {
"owner": "BNFC",
"ref": "master",
"repo": "bnfc",
"type": "github"
}
},
"gf": {
"inputs": {
"bnfc": "bnfc",
"gf-core": "gf-core",
"gf-rgl": "gf-rgl",
"gf-wordnet": "gf-wordnet",
"nixpkgs": "nixpkgs"
},
"locked": {
"lastModified": 1695852170,
"narHash": "sha256-xgA9ltioXjh5gYdgmzWACMFeFJu3w4ytMqQlb649oH8=",
"owner": "anka-213",
"repo": "cclaw-nix-stuff",
"rev": "bb591a7d0b6e81f5ae053d2e99a0f8dd9fb5d2a9",
"type": "github"
},
"original": {
"owner": "anka-213",
"ref": "nix-flakes",
"repo": "cclaw-nix-stuff",
"type": "github"
}
},
"gf-core": {
"flake": false,
"locked": {
"lastModified": 1695655790,
"narHash": "sha256-de5Fk5TK5aUL1YQphoYNBrpJj8GRuPJis7komT95+q8=",
"owner": "GrammaticalFramework",
"repo": "gf-core",
"rev": "7d9015e2e159b376cf2ba8332093c9623375557e",
"type": "github"
},
"original": {
"owner": "GrammaticalFramework",
"ref": "master",
"repo": "gf-core",
"type": "github"
}
},
"gf-rgl": {
"flake": false,
"locked": {
"lastModified": 1695810223,
"narHash": "sha256-deTXlcYreUl/pHnFZbjSrZIq8L/XunLTODm7aE9LKSA=",
"owner": "GrammaticalFramework",
"repo": "gf-rgl",
"rev": "f19dcc01f99252feb79823830863389e6cf0fc7f",
"type": "github"
},
"original": {
"owner": "GrammaticalFramework",
"ref": "master",
"repo": "gf-rgl",
"type": "github"
}
},
"gf-wordnet": {
"flake": false,
"locked": {
"lastModified": 1695803720,
"narHash": "sha256-LG5NVsB81Any5P/2WgEpELJKZQmySloHk1F42E7wD1k=",
"owner": "GrammaticalFramework",
"repo": "gf-wordnet",
"rev": "39efb2f91ccb9575c8d96bc272bd2d9f90c1eb23",
"type": "github"
},
"original": {
"owner": "GrammaticalFramework",
"ref": "master",
"repo": "gf-wordnet",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1665056165,
"narHash": "sha256-2C7VfNphJa0FxPoT+suMOmUDVrQ5RIE+NKfDWqElvE4=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "27a89ba43b0fb735ce867e8ab3d2442f8cc61dad",
"type": "github"
},
"original": {
"id": "nixpkgs",
"type": "indirect"
}
},
"nixpkgs_2": {
"locked": {
"lastModified": 1769170682,
"narHash": "sha256-oMmN1lVQU0F0W2k6OI3bgdzp2YOHWYUAw79qzDSjenU=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "c5296fdd05cfa2c187990dd909864da9658df755",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"gf": "gf",
"nixpkgs": "nixpkgs_2"
}
}
},
"root": "root",
"version": 7
}

41
flake.nix Normal file
View File

@@ -0,0 +1,41 @@
{
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
gf.url = "github:anka-213/cclaw-nix-stuff/nix-flakes";
};
outputs = { self, nixpkgs, ... }@inputs:
let
supportedSystems = [
"aarch64-darwin"
"aarch64-linux"
"x86_64-darwin"
"x86_64-linux"
];
each-system = f: nixpkgs.lib.genAttrs supportedSystems (system: f rec {
pkgs = import nixpkgs { inherit system; };
inherit (pkgs) lib;
inherit system;
});
in {
devShells =
each-system ({ pkgs, system, ... }:
let
gf-lsp = import
(pkgs.fetchzip {
url = "https://github.com/anka-213/gf-lsp/archive/refs/tags/1.0.6.0.tar.gz";
hash = "sha256-UAI2qUslzLOWYjTirZJ0y4DZbkPZnVXTY0XtFO8+Rks=";
}) {inherit system;};
in {
default = pkgs.mkShell {
packages = [
inputs.gf.packages.${system}.gf-with-rgl
pkgs.graphviz
gf-lsp.gf-lsp
pkgs.babashka
];
};
});
};
}

56
helpers.el Normal file
View File

@@ -0,0 +1,56 @@
(defun hangul-syllables-to-jamo (str)
"Convert HANGUL SYLLABLES characters in STR to their HANGUL JAMO
equivalents."
(let ((result "")
(i 0))
(while (< i (length str))
(let ((char (aref str i)))
(if (and (>= char #xAC00) (<= char #xD7A3))
;; Hangul syllable
(let* ((code (- char #xAC00))
(lead (/ code (* 21 28)))
(medial (/ (% code (* 21 28)) 28))
(final (% code 28))
(lead-jamo (+ #x1100 lead))
(medial-jamo (+ #x1161 medial))
(final-jamo (if (> final 0) (+ #x11A7 final) nil)))
(setq result
(concat result (char-to-string lead-jamo)
(char-to-string medial-jamo)
(if final-jamo (char-to-string final-jamo) ""))))
;; Not a Hangul syllable
(setq result (concat result (char-to-string char)))))
(setq i (1+ i)))
result))
(defun hangul-convert-region-to-jamo (beg end)
(interactive "r")
(replace-region-contents
beg end (lambda ()
(hangul-syllables-to-jamo (buffer-substring (point-min)
(point-max))))))
(defun hangul-convert-buffer-to-jamo ()
(interactive)
(hangul-convert-region-to-jamo (point-min) (point-max))
(message "Converted Hangul Syllables in buffer to Jamo."))
(require 'dash)
(defconst gf-hangul/choseong
(cl-loop for i from #x1100 to #x1112 collect i))
(defconst gf-hangul/jungseong
(cl-loop for i from #x1161 to #x1175 collect i))
(defconst gf-hangul/batchim
(cl-loop for i from #x11a8 to #x11c2 collect i))
(defun gf-hangul/make-pattern (name seq)
(format "'%s' : pattern Str = #(%s) ;"
name (->> seq
(--map (concat "\"" it "\""))
(-interpose " | ")
(apply #'concat))))
(provide 'helpers)

View File

@@ -49,6 +49,11 @@ abstract MicroLang = {
aPl_Det : Det ; -- indefinite plural ---s
the_Det : Det ; -- definite singular ---s
thePl_Det : Det ; -- definite plural ---s
this_Det : Det ;
thisPl_Det: Det ;
that_Det : Det ;
thatPl_Det: Det ;
UseN : N -> CN ; -- house
AdjCN : AP -> CN -> CN ; -- big house
@@ -162,4 +167,4 @@ fun
yellow_A : A ;
young_A : A ;
}
}

View File

@@ -30,7 +30,7 @@ concrete MicroLangEng of MicroLang = open MicroResEng, Prelude in {
PredVPS np vp = {
s = np.s ! Nom ++ vp.verb.s ! agr2vform np.a ++ vp.compl
} ;
UseV v = {
verb = v ;
compl = [] ;
@@ -62,6 +62,10 @@ concrete MicroLangEng of MicroLang = open MicroResEng, Prelude in {
aPl_Det = {s = "" ; n = Pl} ;
the_Det = {s = "the" ; n = Sg} ;
thePl_Det = {s = "the" ; n = Pl} ;
this_Det = {s = "this"; n = Sg} ;
thisPl_Det = {s = "these"; n = Pl} ;
that_Det = {s = "that"; n = Sg} ;
thatPl_Det = {s = "those"; n = Pl} ;
UseN n = n ;

View File

@@ -0,0 +1,72 @@
resource HangulJamo = open Prelude in {
flags coding=utf8 ;
oper
lemmaToStem : (lemma : Str) -> Str
= \lemma -> case lemma of {
stem + "다" => stem ;
_ => Predef.error ("lemmaToStem was applied to a non-lemma," ++ lemma)
} ;
infinitive : (stem : Str) -> Str
= \stem -> case stem of {
-- 하다 ⇒ 해
init + #ha => init + "해" ;
-- 찾다 ⇒ 찾아, 좁다 ⇒ 좁아
_ + #a_o + #batchim => stem + "아" ;
-- 가다 ⇒ 가,
_ + (#a | #eo) => stem ;
-- 오다 ⇒ 와
init + #o => init + "ᅪ" ;
-- 따르다 ⇒ 따러
init + #eu => init + "ᅥ" ;
-- 기다리다 ⇒ 기다려
init + #i => init + "ᅧ" ;
--
init + #u => init + "ᅯ" ;
-- 맛있다 ⇒ 맛있어
_ => stem + "어"
} ;
present_haeyo : (stem : Str) -> Str
= \stem -> infinitive stem + "요" ;
past_haeyo : (stem : Str) -> Str
= \stem -> infinitive stem + "ᆻ어요" ;
vc_allomorph : (s,vowel,consonant : Str) -> Str
= \s,v,c -> case s of {
_ + #vowel => v ;
_ => c
} ;
oper
a_o : pattern Str = #("ᅡ" | "ᅩ") ;
a : pattern Str = #"ᅡ" ;
o : pattern Str = #"ᅩ" ;
eo : pattern Str = #"ᅥ" ;
eu : pattern Str = #"ᅳ" ;
i : pattern Str = #"ᅵ" ;
u : pattern Str = #"ᅮ" ;
ha : pattern Str = #"하" ;
oper
consonant : pattern Str =
#("ᄀ" | "ᄁ" | "ᄂ" | "ᄃ" | "ᄄ" | "ᄅ" | "ᄆ" | "ᄇ"
| "ᄈ" | "ᄉ" | "ᄊ" | "ᄋ" | "ᄌ" | "ᄍ" | "ᄎ" | "ᄏ"
| "ᄐ" | "ᄑ" | "ᄒ" | "ᆨ" | "ᆩ" | "ᆪ" | "ᆫ" | "ᆬ"
| "ᆭ" | "ᆮ" | "ᆯ" | "ᆰ" | "ᆱ" | "ᆲ" | "ᆳ" | "ᆴ"
| "ᆵ" | "ᆶ" | "ᆷ" | "ᆸ" | "ᆹ" | "ᆺ" | "ᆻ" | "ᆼ"
| "ᆽ" | "ᆾ" | "ᆿ" | "ᇀ" | "ᇁ" | "ᇂ") ;
batchim : pattern Str =
#("ᆨ" | "ᆩ" | "ᆪ" | "ᆫ" | "ᆬ" | "ᆭ" | "ᆮ" | "ᆯ" | "ᆰ"
| "ᆱ" | "ᆲ" | "ᆳ" | "ᆴ" | "ᆵ" | "ᆶ" | "ᆷ" | "ᆸ" | "ᆹ"
| "ᆺ" | "ᆻ" | "ᆼ" | "ᆽ" | "ᆾ" | "ᆿ" | "ᇀ" | "ᇁ" | "ᇂ") ;
choseong : pattern Str =
#("ᄀ" | "ᄁ" | "ᄂ" | "ᄃ" | "ᄄ" | "ᄅ" | "ᄆ" | "ᄇ" | "ᄈ"
| "ᄉ" | "ᄊ" | "ᄋ" | "ᄌ" | "ᄍ" | "ᄎ" | "ᄏ" | "ᄐ" | "ᄑ"
| "ᄒ") ;
vowel : pattern Str =
#("ᅡ" | "ᅢ" | "ᅣ" | "ᅤ" | "ᅥ" | "ᅦ" | "ᅧ" | "ᅨ" | "ᅩ"
| "ᅪ" | "ᅫ" | "ᅬ" | "ᅭ" | "ᅮ" | "ᅯ" | "ᅰ" | "ᅱ" | "ᅲ"
| "ᅳ" | "ᅴ" | "ᅵ") ;
}

View File

@@ -0,0 +1,165 @@
--# -path=.:../abstract
concrete MicroLangKor of MicroLang = open MicroResKor, Prelude in {
flags coding=utf8 ;
-----------------------------------------------------
---------------- Grammar part -----------------------
-----------------------------------------------------
lincat
Utt = {s : Str} ;
S = {s : Str} ;
VP = {verb : Verb ; compl : N} ; ---s special case of Mini
Comp = Noun ;
AP = Adjective ;
CN = Noun ;
NP = Noun ;
Pron = {s : Str} ;
Det = {s : Str} ;
Prep = {s : Str} ;
V = Verb ;
V2 = Verb2 ;
A = Adjective ;
N = Noun ;
Adv = {s : Str} ;
lin
UttS s = {s = s.s} ;
UttNP np = {s = np.s ! NBare} ;
PredVPS np vp = {s = np.s ! NSubject ++ vp.compl ! NObject ++ vp.verb.s ! VPresent} ;
UseV v = {verb = v ; compl = {s = \\_ => []}} ;
ComplV2 v2 np = {verb = v2 ; compl = np.s};
UseN n = n ;
DetCN det cn = {s = det.s ++ cn.s} ;
a_Det = {s = []} ;
aPl_Det = {s = []} ;
the_Det = {s = []} ;
thePl_Det = {s = []} ;
this_Det = {s = "이"} ;
thisPl_Det = {s = "이"} ;
that_Det = {s = "그"} ;
thatPl_Det = {s = "그"} ;
PositA a = a ;
AdjCN ap cn = {s = ap.s ! VAdnomial ++ cn.s} ;
-----------------------------------------------------
---------------- Lexicon part -----------------------
-----------------------------------------------------
-- lin already_Adv = mkAdv "벌써" ;
lin animal_N = mkN "동물" ;
lin apple_N = mkN "사과" ;
lin baby_N = mkN "아기" ;
lin bad_A = mkA "나쁘다" ;
-- lin beer_N = mkN "beer" ;
lin big_A = mkA "크다" ;
-- lin bike_N = mkN "bike" ;
-- lin bird_N = mkN "bird" ;
-- lin black_A = mkA "black" ;
lin blood_N = mkN "피" ;
-- lin blue_A = mkA "blue" ;
-- lin boat_N = mkN "boat" ;
lin book_N = mkN "책" ;
lin boy_N = mkN "소녁" ;
lin bread_N = mkN "빵" ;
-- lin break_V2 = mkV2 (mkV "break" "broke" "broken") ;
lin buy_V2 = mkV2 "사다" ;
lin car_N = mkN "자동차" ;
lin cat_N = mkN "고양이" ;
lin child_N = mkN "어린이" ;
-- lin city_N = mkN "city" ;
lin clean_A = mkA "정소하다" ;
lin clever_A = mkA "똑똑하다" ;
-- lin cloud_N = mkN "cloud" ;
-- lin cold_A = mkA "차가운" ;
lin come_V = mkV "오다" ;
lin computer_N = mkN "컴퓨터" ;
-- lin cow_N = mkN "cow" ;
-- lin dirty_A = mkA "더러운" ;
lin dog_N = mkN "개" ;
lin drink_V2 = mkV2 "마시다" ;
lin eat_V2 = mkV2 "먹다" ;
lin find_V2 = mkV2 "찾다" ;
-- lin fire_N = mkN "fire" ;
lin fish_N = mkN "생선" ;
-- lin flower_N = mkN "flower" ;
lin friend_N = mkN "진구" ;
lin girl_N = mkN "소녀" ;
-- lin good_A = mkA "좋은" ;
lin go_V = mkV "가다" ;
-- lin grammar_N = mkN "grammar" ;
-- lin green_A = mkA "green" ;
-- lin heavy_A = mkA "heavy" ;
-- lin horse_N = mkN "horse" ;
-- lin hot_A = mkA "hot" ;
lin house_N = mkN "집" ;
-- lin john_PN = mkPN "John" ;
-- lin jump_V = mkV "jump" ;
lin kill_V2 = mkV2 "죽다" ;
-- lin know_V2 = mkV "알다" ;
lin language_N = mkN "언어" ;
-- lin live_V = mkV "live" ;
-- lin love_V2 = mkV2 (mkV "love") ;
lin man_N = mkN "남자" ;
lin milk_N = mkN "우유" ;
lin music_N = mkN "음악" ;
-- lin new_A = mkA "new" ;
lin now_Adv = mkAdv "지금" ;
-- lin old_A = mkA "낡안" ;
-- lin paris_PN = mkPN "Paris" ;
lin play_V = mkV "놀다" ;
lin read_V2 = mkV2 "읽다" ;
-- lin ready_A = mkA "ready" ;
-- lin red_A = mkA "red" ;
lin river_N = mkN "강" ;
-- lin run_V = mkV "run" "ran" "run" ;
lin sea_N = mkN "바다" ;
lin see_V2 = mkV2 "보다" ;
-- lin ship_N = mkN "ship" ;
lin sleep_V = mkV "자다" ;
-- lin small_A = mkA "작은" ;
lin star_N = mkN "별" ;
lin swim_V = mkV "수영하다" ;
lin teach_V2 = mkV2 "가르치다" ;
-- lin train_N = mkN "train" ;
-- lin travel_V = mkV "travel" ;
-- lin tree_N = mkN "tree" ;
-- lin understand_V2 = mkV2 (mkV "understand" "understood" "understood") ;
lin wait_V2 = mkV2 "기다리다" ;
lin walk_V = mkV "걷다" ;
-- lin warm_A = mkA "따뜻한" ;
lin water_N = mkN "물" ;
-- lin white_A = mkA "하얗은" ;
-- lin wine_N = mkN "wine" ;
lin woman_N = mkN "여자" ;
-- lin yellow_A = mkA "yellow" ;
-- lin young_A = mkA "young" ;
---------------------------
-- Paradigms part ---------
---------------------------
oper
mkN : Str -> Noun
= \s -> lin N (noun s) ;
mkV : Str -> V = \lemma -> lin V (regVerb lemma) ;
mkV2 = overload {
mkV2 : Str -> V2
= \lemma -> lin V2 (mkV lemma) ;
mkV2 : V -> V2
= \v -> lin V2 v ;
} ;
mkAdv : Str -> Adv
= \s -> lin Adv {s = s} ;
mkPrep : Str -> Prep
= \s -> lin Prep {s = s} ;
mkA : Str -> A
= \lemma -> lin A (regAdjective lemma) ;
}

View File

@@ -0,0 +1,47 @@
resource MicroResKor = open Prelude, HangulJamo in {
param
VForm = VLemma | VPresent | VPast | VAdnomial ;
NForm = NBare | NTopic | NSubject | NObject | NPluralSubject | NPluralTopic ;
oper
Noun : Type = {s : NForm => Str} ;
Verb : Type = {s : VForm => Str} ;
Verb2 : Type = Verb ;
Adjective : Type = Verb ;
lemmaToStem : (lemma : Str) -> Str
= \lemma -> case lemma of {
stem + "다" => stem ;
_ => Predef.error ("lemmaToStem was applied to a non-lemma," ++ lemma)
} ;
reg : (descriptive : Bool) -> (lemma : Str) -> Verb
= \descriptive,lemma ->
let stem = lemmaToStem lemma ;
in {
s = table {
VLemma => lemma ;
VPresent => present_haeyo stem ;
VPast => past_haeyo stem ;
VAdnomial =>
if_then_else Str descriptive
(stem + vc_allomorph stem "ᆫ" "은")
(stem + "는")
}
} ;
regVerb : (lemma : Str) -> Verb = reg False ;
regAdjective : (lemma : Str) -> Adjective = reg True ;
noun : (bare : Str) -> Noun
= \bare -> {
s = table {
NBare => bare ;
NTopic => bare + vc_allomorph bare "는" "은" ;
NSubject => bare + vc_allomorph bare "이" "가" ;
NObject => bare + vc_allomorph bare "를" "을" ;
NPluralTopic => bare + "들은" ;
NPluralSubject => bare + "들이"
}
} ;
}

12
scripts/search-hangul Executable file
View File

@@ -0,0 +1,12 @@
#!/usr/bin/env bb
(require '[babashka.fs :as fs]
'[clojure.string :as str])
(doseq [f (fs/glob "lab1/grammar/korean" "**.gf")
[line-number line] (map-indexed (fn [i x] [i x])
(-> f str slurp str/split-lines))]
(when (re-find #"\p{block=HangulJamo}" line)
(printf "JAMO: %s:%d: %s\n" (str f) line-number line))
(when (re-find #"\p{block=HangulSyllables}" line)
(printf "SYLLABLE: %s:%d: %s\n" (str f) line-number line)))

44
scripts/syllables-to-jamo Executable file
View File

@@ -0,0 +1,44 @@
#!/usr/bin/env bb
(require '[babashka.fs :as fs]
'[clojure.string :as str])
(defn syllables->jamo [s]
"Convert Hangul syllables in string S to their jamo components."
(->> s
(map int)
(mapcat
(fn [c]
(if (<= 0xAC00 c 0xD7A3)
;; Hangul syllable - decompose
(let [code (- c 0xAC00)
lead (quot code (* 21 28))
medial (quot (mod code (* 21 28)) 28)
final (mod code 28)
lead-jamo (+ 0x1100 lead)
medial-jamo (+ 0x1161 medial)
final-jamo (if (> final 0) (+ 0x11A7 final) nil)]
(remove nil? [lead-jamo medial-jamo final-jamo]))
;; Not a Hangul syllable
[c])))
(map char)
(apply str)))
(defn -main [& args]
(if (seq args)
(doseq [f args]
(let [x (-> (slurp f)
(str/replace #"\p{block=HangulSyllables}+"
syllables->jamo))]
(spit f x)))
(loop [line (read-line)]
(when line
(-> line
(str/replace #"\p{block=HangulSyllables}+"
syllables->jamo)
println)
(recur (read-line))))))
(when (= *file* (System/getProperty "babashka.file"))
(apply -main *command-line-args*))