mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-24 03:52:50 -06:00
refactored Thai pronunciation; not perfect yet
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
module ThaiScript where
|
||||
|
||||
import Data.Char
|
||||
import Data.List
|
||||
import qualified Data.Map as Map
|
||||
|
||||
test = do
|
||||
@@ -53,27 +54,27 @@ thai2uni = map fromEnum
|
||||
uni2thai :: [Int] -> String
|
||||
uni2thai = map toEnum
|
||||
|
||||
{-
|
||||
uni2pron :: [Int] -> String
|
||||
uni2pron is = case is of
|
||||
0xe40:c:0xe32 :cs -> pron c ++ tone c cs "aw" ++ uni2pron cs
|
||||
0xe40:c:0xe34 :cs -> pron c ++ tone c cs "\601\601" ++ uni2pron cs
|
||||
0xe40:c:0xe35:0xe22:cs -> pron c ++ tone c cs "iia" ++ uni2pron cs
|
||||
0xe40:c:0xe37:0xe2d:cs -> pron c ++ tone c cs "\649\649" ++ uni2pron cs
|
||||
0xe40:c:0xe47 :cs -> pron c ++ tone c cs "e" ++ uni2pron cs
|
||||
0xe41:c:0xe47 :cs -> pron c ++ tone c cs "\x25b" ++ uni2pron cs
|
||||
|
||||
v:0xe2b:c:cs | isConsonant c && bvow v
|
||||
v:0xe2b:c:cs | bvow v && isConsonant c
|
||||
-> pron c ++ tone 0xe2b cs (pron v) ++ uni2pron cs -- h-
|
||||
v:b:c:cs | clust b c && bvow v -- kr- etc
|
||||
-> pron b ++ pron c ++ tone c cs (pron v) ++ uni2pron cs
|
||||
-> pron b ++ pron c ++ tone b (c:cs) (pron v) ++ uni2pron cs
|
||||
v:c:cs | bvow v -> pron c ++ tone c cs (pron v) ++ uni2pron cs -- e .. ay
|
||||
|
||||
c:0xe31:0xe27:cs -> pron c ++ tone c cs "uua" ++ uni2pron cs
|
||||
c:0xe31:0xe27:cs -> pron c ++ tone c cs "ua" ++ uni2pron cs
|
||||
|
||||
0xe2b:c:v:cs | isConsonant c && cvow v
|
||||
-> pron c ++ tone 0xe2b cs (pron v) ++ uni2pron cs -- h-
|
||||
b:c:v:cs | clust b c && cvow v -- kr- etc
|
||||
-> pron b ++ pron c ++ tone c cs (pron v) ++ uni2pron cs
|
||||
-> pron b ++ pron c ++ tone b (c:cs) (pron v) ++ uni2pron cs
|
||||
0xe2d:v:cs | cvow v -> tone 0xe2d cs (pron v) ++ uni2pron cs -- O-
|
||||
c:v:cs | cvow v -> pron c ++ tone c cs (pron v) ++ uni2pron cs -- a .. u:
|
||||
|
||||
@@ -85,10 +86,82 @@ uni2pron is = case is of
|
||||
pron c = lookThai [] pronunc c
|
||||
cvow v = (0xe30 <= v && v <= 0xe39) || v == 0xe2d -- central vowels
|
||||
bvow v = 0xe40 <= v && v <= 0xe44 -- begin vowels
|
||||
clust b c = isConsonant b && (elem c [0xe23, 0xe25])
|
||||
clust b c = isConsonant b && (elem c [0xe23, 0xe25, 0xe27])
|
||||
-}
|
||||
|
||||
tone :: Int -> [Int] -> String -> String
|
||||
tone c cs v = case (lookThai Low cclass c, isLive cs, toneMark (c:cs)) of
|
||||
|
||||
uni2pron :: [Int] -> String
|
||||
uni2pron is = case getSyllable is of
|
||||
|
||||
-- diph order as in Smyth, p. 15
|
||||
[0xe40] : cc : [] : d : cs | kill d -> prons cc ++ tone cc d cs "e" ++ endWith cs -- e-8 -> e
|
||||
[0xe41] : cc : [] : d : cs | kill d -> prons cc ++ tone cc d cs "\x25b" ++ endWith cs -- ä-8 -> ä
|
||||
[v] : cc : [] : d : cs -> prons cc ++ tone cc d cs (pron v) ++ endWith cs -- e/ä/o/ay/ay
|
||||
[0xe40] : cc : [0xe22] : d : cs -> prons cc ++ tone cc d cs "\601\601y" ++ endWith cs -- e-y -> ööy
|
||||
[0xe40] : cc : [0xe2d] : d : cs -> prons cc ++ tone cc d cs "\601\601" ++ endWith cs -- e-O -> öö
|
||||
[0xe40] : cc : [0xe2d,0xe30] : d : cs -> prons cc ++ tone cc d cs "\601" ++ endWith cs -- e-Oa -> ö
|
||||
[0xe40] : cc : [0xe30] : d : cs -> prons cc ++ tone cc d cs "e" ++ endWith cs -- e-a
|
||||
[0xe40] : cc : [0xe32] : d : cs -> prons cc ++ tone cc d cs "aw" ++ endWith cs -- e-a
|
||||
[0xe40] : cc : [0xe32,0xe30] : d : cs -> prons cc ++ tone cc d cs "\596" ++ endWith cs -- e-Aa -> O
|
||||
[0xe40] : cc : [0xe34] : d : cs -> prons cc ++ tone cc d cs "\601\601" ++ endWith cs -- e-i -> öö
|
||||
[0xe40] : cc : [0xe35,0xe22] : d : cs -> prons cc ++ tone cc d cs "ia" ++ endWith cs -- e-iiy-> ia
|
||||
[0xe40] : cc : [0xe34,0xe22,0xe30] : d : cs -> prons cc ++ tone cc d cs "ia" ++ endWith cs -- e-iya-> ia
|
||||
[0xe40] : cc : [0xe35,0xe2d] : d : cs -> prons cc ++ tone cc d cs "\x289a" ++ endWith cs -- e-iiO-> üa
|
||||
--[0xe40] : cc : [0xe37,0xe2d] : d : cs -> prons cc ++ tone cc d cs "\649\649" ++ endWith cs
|
||||
|
||||
[0xe41] : cc : [0xe30] : d : cs -> prons cc ++ tone cc d cs "\x25b" ++ endWith cs -- ä-a -> ä
|
||||
[0xe42] : cc : [0xe30] : d : cs -> prons cc ++ tone cc d cs "o" ++ endWith cs -- o-a -> o
|
||||
[] : [0xe2d] : v : d : cs -> tone [0xe2d] d cs (prons v) ++ endWith cs -- Ov -> v
|
||||
[] : cc : [0xe31,0xe27] : d : cs -> prons cc ++ tone cc d cs "ua" ++ endWith cs -- Caw -> Cua
|
||||
[] : bb : [] : cc : [] : d : cs -> prons bb ++ "a" ++ prons cc ++ tone cc [] cs "o" ++ endWith cs -- CaCoC
|
||||
[] : bb : [] : cc : [] -> prons bb ++ "o" ++ endWith [cc] -- CoC
|
||||
[] : cc : v : d : cs -> prons cc ++ tone cc d cs (prons v) ++ endWith cs -- Cv- (normal)
|
||||
|
||||
_ -> prons is --- shouldn't happen
|
||||
|
||||
where
|
||||
prons cc = case cc of
|
||||
c:0xe4c:cs -> prons cs
|
||||
0xe2b:c:cs | isConsonant c -> concatMap pron (c:cs) ---- only some conss
|
||||
_ -> concatMap pron cc
|
||||
endWith ss = case ss of
|
||||
(c:cs) -> encs c ++ uni2pron (concat cs)
|
||||
_ -> []
|
||||
encs cs = case cs of
|
||||
[] -> []
|
||||
_ -> prons (init cs) ++ enc (last cs)
|
||||
|
||||
enc c = lookThai [] pronunc_end c
|
||||
pron c = lookThai [] pronunc c
|
||||
kill d = elem 0xe47 d
|
||||
|
||||
getSyllable :: [Int] -> [[Int]] -- (V?),(C|CC|hC),(V*),(D*),(C*),[[],C]?
|
||||
getSyllable s = case s of
|
||||
v:cs | preVowel v -> [v]:getCons v cs
|
||||
[] -> []
|
||||
c:_ -> []:getCons c s
|
||||
where
|
||||
getCons v s = case s of
|
||||
b:c:cs | b == 0xe2b && isConsonant c -> [b,c]:getVow v cs -- hC
|
||||
b:cs | b == 0xe2d -> [b] :getVow v cs -- O
|
||||
b:c:cs | isConsonant b && (elem c [0xe23, 0xe25, 0xe27]) -> [b,c]:getVow v cs -- C(l|r|w) cluster
|
||||
b:c:d:[] | isConsonant b && isConsonant c && isConsonant d -> [b] :[]:[c]:[]:[d]:[] -- CaCoC
|
||||
b:c:[] | isConsonant b && isConsonant c -> [b] :[]:[c]:[] -- CoC
|
||||
b:cs | isConsonant b -> [b] :getVow v cs -- C
|
||||
_ -> [s] --- shouldn't happen ??
|
||||
getVow v0 s = case span (\x -> inVow v0 x || diacritic x) s of
|
||||
(v,c:cs) -> let (d,w) = partition diacritic v in w:d:[c]:getSyllable cs
|
||||
(v,_) -> let (d,w) = partition diacritic v in [w,d]
|
||||
inVow v0 x = inVowel x || case v0 of
|
||||
0xe40 -> elem x [0xe22] -- after e-, also y is a part of a vowel
|
||||
_ -> False
|
||||
|
||||
inVowel v = (0xe30 <= v && v <= 0xe39) || v == 0xe2d -- infix vowels
|
||||
preVowel v = 0xe40 <= v && v <= 0xe44 -- prefix vowels
|
||||
diacritic x = 0xe47 <= x && x <= 0xe4d -- tones, killers
|
||||
|
||||
tone :: [Int] -> [Int] -> [[Int]] -> String -> String
|
||||
tone cc@(c:_) d cs v = case (lookThai Low cclass c, isLive cs1, toneMark d) of
|
||||
(_,_,3) -> high v
|
||||
(_,_,4) -> rising v
|
||||
(Low,_,1) -> falling v
|
||||
@@ -103,6 +176,8 @@ tone c cs v = case (lookThai Low cclass c, isLive cs, toneMark (c:cs)) of
|
||||
(Mid,False,_) -> low v
|
||||
(High,True,_) -> rising v
|
||||
(High,False,_) -> low v
|
||||
where
|
||||
cs1 = concat (take 1 cs)
|
||||
|
||||
toneMark :: [Int] -> Int
|
||||
toneMark is = case is of
|
||||
@@ -115,6 +190,9 @@ toneMark is = case is of
|
||||
|
||||
isLong :: String -> Bool
|
||||
isLong s = case s of
|
||||
'i':'a':_ -> True
|
||||
'u':'a':_ -> True
|
||||
'\x289':a:_ -> True
|
||||
c:d:_ | c == d -> True --- must be vowels
|
||||
_:cs -> isLong cs
|
||||
_ -> False
|
||||
@@ -215,18 +293,18 @@ allThaiChars = [
|
||||
TC {unicode = 3626, translit = "s", cclass = High, liveness = False, pronunc = "s", pronunc_end = "t"},
|
||||
TC {unicode = 3627, translit = "h", cclass = High, liveness = True, pronunc = "h", pronunc_end = ""},
|
||||
TC {unicode = 3628, translit = "l'", cclass = Low, liveness = True, pronunc = "l", pronunc_end = "n"},
|
||||
TC {unicode = 3629, translit = "O", cclass = Mid, liveness = True, pronunc = "\596", pronunc_end = "\596"},
|
||||
TC {unicode = 3629, translit = "O", cclass = Mid, liveness = True, pronunc = "\596\596", pronunc_end = "\596\596"},
|
||||
TC {unicode = 3630, translit = "h'", cclass = Low, liveness = True, pronunc = "h", pronunc_end = ""},
|
||||
|
||||
TC {unicode = 3632, translit = "a.", cclass = Low, liveness = True, pronunc = "a", pronunc_end = "a"},
|
||||
TC {unicode = 3633, translit = "a", cclass = Low, liveness = True, pronunc = "a", pronunc_end = "a"},
|
||||
TC {unicode = 3632, translit = "a.", cclass = Low, liveness = False, pronunc = "a", pronunc_end = "a"},
|
||||
TC {unicode = 3633, translit = "a", cclass = Low, liveness = False, pronunc = "a", pronunc_end = "a"},
|
||||
TC {unicode = 3634, translit = "a:", cclass = Low, liveness = True, pronunc = "aa", pronunc_end = "aa"},
|
||||
TC {unicode = 3635, translit = "a+", cclass = Low, liveness = True, pronunc = "am", pronunc_end = "am"},
|
||||
TC {unicode = 3636, translit = "i", cclass = Low, liveness = True, pronunc = "i", pronunc_end = "i"},
|
||||
TC {unicode = 3636, translit = "i", cclass = Low, liveness = False, pronunc = "i", pronunc_end = "i"},
|
||||
TC {unicode = 3637, translit = "i:", cclass = Low, liveness = True, pronunc = "ii", pronunc_end = "ii"},
|
||||
TC {unicode = 3638, translit = "v", cclass = Low, liveness = True, pronunc = "\x289", pronunc_end = "\x289"},
|
||||
TC {unicode = 3638, translit = "v", cclass = Low, liveness = False, pronunc = "\x289", pronunc_end = "\x289"},
|
||||
TC {unicode = 3639, translit = "v:", cclass = Low, liveness = True, pronunc = "\x289\x289", pronunc_end = "\x289\x289"},
|
||||
TC {unicode = 3640, translit = "u", cclass = Low, liveness = True, pronunc = "u", pronunc_end = "u"},
|
||||
TC {unicode = 3640, translit = "u", cclass = Low, liveness = False, pronunc = "u", pronunc_end = "u"},
|
||||
TC {unicode = 3641, translit = "u:", cclass = Low, liveness = True, pronunc = "uu", pronunc_end = "uu"},
|
||||
TC {unicode = 3648, translit = "e", cclass = Low, liveness = True, pronunc = "ee", pronunc_end = "ee"},
|
||||
TC {unicode = 3649, translit = "e'", cclass = Low, liveness = True, pronunc = "\x25b\x25b", pronunc_end = "0x25b\x25b"},
|
||||
|
||||
Reference in New Issue
Block a user