From 6bbc7e804fd6d78a01a045fca549b8472451a096 Mon Sep 17 00:00:00 2001 From: aarne Date: Sun, 21 Jan 2007 21:07:35 +0000 Subject: [PATCH] pronunciation variant of LangThai (still buggy) --- lib/resource-1.0/thai/LexiconTha.gf | 6 +- lib/resource-1.0/thai/Makefile | 2 + lib/resource-1.0/thai/NounTha.gf | 6 +- lib/resource-1.0/thai/README | 21 +++---- lib/resource-1.0/thai/StringsTha.gf | 3 + lib/resource-1.0/thai/StringsThai.gf | 3 + .../thai/pronunciation/StringsTha.gf | 62 +++++++++++++++++++ 7 files changed, 84 insertions(+), 19 deletions(-) create mode 100644 lib/resource-1.0/thai/pronunciation/StringsTha.gf diff --git a/lib/resource-1.0/thai/LexiconTha.gf b/lib/resource-1.0/thai/LexiconTha.gf index 6c29e33bb..695b5f1f3 100644 --- a/lib/resource-1.0/thai/LexiconTha.gf +++ b/lib/resource-1.0/thai/LexiconTha.gf @@ -18,7 +18,7 @@ lin -- bank_N = regN "bank" ; -- beautiful_A = regADeg "beautiful" ; -- become_VA = mkVA (irregV "become" "became" "become") ; --- beer_N = regN "beer" ; + beer_N = mkN biar_s kew_s ; -- beg_V2V = mkV2V (regDuplV "beg") noPrep toP ; big_A = ss yay_s ; -- bike_N = regN "bike" ; @@ -64,7 +64,7 @@ lin -- doctor_N = regN "doctor" ; -- dog_N = regN "dog" ; -- door_N = regN "door" ; --- drink_V2 = dirV2 (irregV "drink" "drank" "drunk") ; + drink_V2 = dirV2 (regV dvm_s) ; -- easy_A2V = mkA2V (regA "easy") forP ; -- eat_V2 = dirV2 (irregV "eat" "ate" "eaten") ; -- empty_A = regADeg "empty" ; @@ -206,7 +206,7 @@ lin -- window_N = regN "window" ; -- wine_N = regN "wine" ; -- win_V2 = dirV2 (irregDuplV "win" "won" "won") ; - woman_N = mkN yig_s khon_s ; + woman_N = mkN (puu_s ++ yig_s) khon_s ; -- wonder_VQ = mkVQ (regV "wonder") ; -- wood_N = regN "wood" ; -- write_V2 = dirV2 (irregV "write" "wrote" "written") ; diff --git a/lib/resource-1.0/thai/Makefile b/lib/resource-1.0/thai/Makefile index cf72fd95a..654dcdaff 100644 --- a/lib/resource-1.0/thai/Makefile +++ b/lib/resource-1.0/thai/Makefile @@ -1,2 +1,4 @@ strings: runghc -i../../../src FileThai.hs StringsThai.gf >StringsTha.gf +pronstrings: + runghc -i../../../src FileThai.hs -p StringsThai.gf >pronunciation/StringsTha.gf diff --git a/lib/resource-1.0/thai/NounTha.gf b/lib/resource-1.0/thai/NounTha.gf index b1bf2c072..9085bbf6c 100644 --- a/lib/resource-1.0/thai/NounTha.gf +++ b/lib/resource-1.0/thai/NounTha.gf @@ -30,7 +30,7 @@ concrete NounTha of Noun = CatTha ** open StringsTha, ResTha, Prelude in { hasC = quant.hasC ; } ; DetPl quant num ord = { - s1 = [] ; + s1 = num.s ; s2 = quant.s ++ ord.s ; hasC = orB num.hasC quant.hasC ; } ; @@ -59,8 +59,8 @@ concrete NounTha of Noun = CatTha ** open StringsTha, ResTha, Prelude in { DefArt = {s = [] ; hasC = False} ; IndefArt = {s = [] ; hasC = False} ; --- MassDet = {s = [] ; n = Sg} ; --- + MassDet = {s = [] ; hasC = False} ; + UseN n = n ; -- UseN2 n = n ; -- UseN3 n = n ; diff --git a/lib/resource-1.0/thai/README b/lib/resource-1.0/thai/README index 58a282585..8735bc33d 100644 --- a/lib/resource-1.0/thai/README +++ b/lib/resource-1.0/thai/README @@ -9,22 +9,17 @@ Literal strings are defined as constants, all in the generated file StringsTha.gf -This file is obtained from +This file is obtained by 'make strings' from StringsThai.gf -by - - make strings - -using the transliteration defined in - - GF/src/GF/Text/Thai.hs - -and documented in - - thai.html +using the transliteration defined in GF/src/GF/Text/Thai.hs +and documented in thai.html. From this, also approximate pronunciation can be derived. -We also plan to use this unique word list for segmenting +Use then the command 'make pronstrings' and the main file + + LangThaiPron.gf + +We also plan to use the unique word list for segmenting Thai strings. diff --git a/lib/resource-1.0/thai/StringsTha.gf b/lib/resource-1.0/thai/StringsTha.gf index 4d2722f54..28ca06cf5 100644 --- a/lib/resource-1.0/thai/StringsTha.gf +++ b/lib/resource-1.0/thai/StringsTha.gf @@ -7,15 +7,18 @@ resource StringsTha = { oper baan_s = "บ้าน" ; -- house +biar_s = "เบีอร์" ; ca_s = "จะ" ; -- Modal cet_s = "เจ็ด" ; -- seven chan_s = "ฉัน" ; -- I chay_s = "ไช่" ; -- be-not +dvm_s = "ดึม" ; -- drink et_s = "เอ็ด" ; -- one' haa_s = "ห้า" ; -- five hay_s = "ให้" ; -- give hok_s = "หก" ; -- six kaaw_s = "เกา" ; -- nine +kew_s = "แก้ว" ; -- glass (drink Classif) khaw_s = "เขว" ; -- he khon_s = "คน" ; -- people Classif khoog_s = "ของ" ; -- Possessive diff --git a/lib/resource-1.0/thai/StringsThai.gf b/lib/resource-1.0/thai/StringsThai.gf index e0b4bcc32..4544fc634 100644 --- a/lib/resource-1.0/thai/StringsThai.gf +++ b/lib/resource-1.0/thai/StringsThai.gf @@ -7,15 +7,18 @@ resource StringsTha = { oper baan_s = "bT2a:n" ; -- house +biar_s = "ebi:OrK" ; ca_s = "ca." ; -- Modal cet_s = "ecSd" ; -- seven chan_s = "c1an" ; -- I chay_s = "a&c2T1" ; -- be-not +dvm_s = "dvm" ; -- drink et_s = "eOSd" ; -- one' haa_s = "hT2a:" ; -- five hay_s = "a%hT2" ; -- give hok_s = "ho?k" ; -- six kaaw_s = "eka:" ; -- nine +kew_s = "e'kT2w" ; -- glass (drink Classif) khaw_s = "ek1w" ; -- he khon_s = "k2n" ; -- people Classif khoog_s = "k1Og" ; -- Possessive diff --git a/lib/resource-1.0/thai/pronunciation/StringsTha.gf b/lib/resource-1.0/thai/pronunciation/StringsTha.gf new file mode 100644 index 000000000..eb9c99905 --- /dev/null +++ b/lib/resource-1.0/thai/pronunciation/StringsTha.gf @@ -0,0 +1,62 @@ +-- The only place where literal Thai strings are defined +-- (except for Lexicon and Structural). +-- Convert this into StringsThai by 'make strings' + +resource StringsTha = { + +oper + +baan_s = "b^aan" ; -- house +biar_s = "b-eiiO" ; +ca_s = "c`a" ; -- Modal +cet_s = "c`et" ; -- seven +chan_s = "ch~an" ; -- I +chay_s = "ch^ay" ; -- be-not +dvm_s = "d-vm" ; -- drink +et_s = "O`et" ; -- one' +haa_s = "h^aa" ; -- five +hay_s = "h^ay" ; -- give +hok_s = "k`o" ; -- six +kaaw_s = "k-eaa" ; -- nine +kew_s = "k^äw" ; -- glass (drink Classif) +khaw_s = "kh`ew" ; -- he +khon_s = "kh-on" ; -- people Classif +khoog_s = "kh~Og" ; -- Possessive +khun_s = "kh-un" ; -- you +lag_s = "l~ag" ; -- houses Classif +lap_s = "l`ap" ; -- sleep2 +lem_s = "l^em" ; -- books Classif +may_s = "m-ay" ; -- not +m'ay_s = "m~ay" ; -- Question +mvvn_s = "m`vvn" ; -- ten thousand +nag_s = "n~ag" ; -- book1 +nan_s = "n~an" ; -- that +noon_s = "n-On" ; -- sleep1 +nvg_s = "n~vg" ; -- one +pay_s = "p-ay" ; -- go +peet_s = "p`ät" ; -- eight +pen_s = "p-en" ; -- be, can-know +phan_s = "ph-an" ; -- thousand +puu_s = "ph^uu" ; -- woman1 +rak_s = "r'ak" ; -- love +raw_s = "r-eaa" ; -- we +rooy_s = "r~Oy" ; -- hundred +saam_s = "s~aam" ; -- three +seen_s = "s~än" ; -- hundred thousand +si_s = "s'i" ; -- Imperative +sii_s = "s`ii" ; -- four +sip_s = "s`ip" ; -- ten +soog_s = "s~Og" ; -- two +svv_s = "s~vvO" ; -- book2 +thii_s = "th^ii" ; -- Ord +tog_s = "t^Og" ; -- must +waa_s = "w^aa" ; -- that Conj +way_s = "w~ay" ; -- can-potent +yaa_s = "O`aay" ; -- Neg Imper +yaak_s = "O`aayk" ; -- want +yay_s = "y~ay" ; -- big +yig_s = "y~ig" ; -- woman2 +yii_s = "y^ii" ; -- two' + + +}