mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-20 10:19:32 -06:00
exception cases added to ThaiScript
This commit is contained in:
@@ -6,6 +6,7 @@ import qualified Data.Map as Map
|
||||
import System
|
||||
|
||||
-- convert all files *Tha.gf into *Thp.gf with "t" changed to (thpron "t" "p")
|
||||
|
||||
main = allThpron
|
||||
|
||||
allThpron = do
|
||||
@@ -21,39 +22,17 @@ fileThpron file = do
|
||||
|
||||
appThpron s = case s of
|
||||
'"':cs -> let (w,_:rest) = break (=='"') cs in mkThpron w ++ appThpron rest
|
||||
'T':'h':'a':'A':rest -> "ThpA" ++ appThpron rest -- AllThaAbs
|
||||
'T':'h':'a':c:rest | isAlpha c -> "Tha" ++ c : appThpron rest -- Thank
|
||||
'T':'h':'a':rest -> "Thp" ++ appThpron rest
|
||||
c:cs -> c:appThpron cs
|
||||
_ -> s
|
||||
|
||||
mkThpron s = "(thpron \"" ++ s ++ "\" \"" ++ thai2pron s ++ "\")"
|
||||
|
||||
-- heuristics for finding syllables
|
||||
uniSyllables :: [ThaiDat] -> [[ThaiDat]]
|
||||
uniSyllables = reverse . (syll [] []) where
|
||||
syll sys sy is = case is of
|
||||
c:cs | isPreVowel c -> new [] is
|
||||
c:d:cs | isConsonant c && isConsonant d -> new [c] (d:cs) ---- no consonant clusters
|
||||
c:cs -> continue [c] cs ---- more rules to follow
|
||||
_ -> sy:sys
|
||||
where
|
||||
new old = syll ((sy ++ old) : sys) []
|
||||
continue old = syll sys (sy ++ old)
|
||||
|
||||
|
||||
isPreVowel :: ThaiDat -> Bool
|
||||
isPreVowel i = Ce <= i && i <= CaE
|
||||
|
||||
isVowel :: ThaiDat -> Bool
|
||||
isVowel i = CaP <= i && i <= CaE
|
||||
|
||||
isConsonant :: ThaiDat -> Bool
|
||||
isConsonant i = Ck <= i && i <= Ch' && i /= CO
|
||||
|
||||
isMark :: ThaiDat -> Bool
|
||||
isMark i = CS <= i && i <= CK
|
||||
|
||||
|
||||
-- the following functions involving pron (=pronunciation) work on syllables
|
||||
|
||||
thai2pron = uni2pron . thai2uni
|
||||
trans2pron = uni2pron . trans2uni
|
||||
trans2thai = uni2thai . trans2uni
|
||||
@@ -70,10 +49,31 @@ uni2pron = dat2pron . uni2dat
|
||||
|
||||
uni2dat = map (maybe CR dat . (\u -> Map.lookup u uniMap)) --- CR as exception value
|
||||
|
||||
dat2pron :: [ThaiDat] -> String
|
||||
dat2pron is = case getSyllable is of
|
||||
--high = accent '\x301'
|
||||
--low = accent '\x300'
|
||||
--rising = accent '\x306'
|
||||
--falling = accent '\x302'
|
||||
|
||||
-- diph order as in Smyth, p. 15
|
||||
dat2pron :: [ThaiDat] -> String
|
||||
dat2pron is = case is of
|
||||
|
||||
-- exceptional words
|
||||
[Ce,Ck1,CaL] -> "kha\x301w"
|
||||
[Cc1,Ca,Cn] -> "cha\x301n"
|
||||
[CaE,Ch,Cm] -> "m" ++ high "ay"
|
||||
[Ct,CT2,CO,Cg] -> "t" ++ falling i_O ++ i_ng
|
||||
[Ce,Cg,Ci,Cn] -> i_ng ++ i_oe ++ "n"
|
||||
[CaE,Cd,CT2] -> "d" ++ falling "aay"
|
||||
[Ce,Ck,CT2,CaL] -> "k" ++ falling "aaw"
|
||||
[CaE,Cm,CT2] -> "m" ++ high "aay"
|
||||
[CO,Cy,CaL,Ck] -> "y" ++ low "aak"
|
||||
[CO,Cy,CT1,CaL] -> "y" ++ low "aa"
|
||||
[CO,Cy,CT1,CaL,Cg] -> "y" ++ low "aa" ++ i_ng
|
||||
[CO,Cy,CT1,CuL] -> "y" ++ low "uu"
|
||||
[Cp3,Cr] -> "phan" --- not in Smyth
|
||||
|
||||
-- words following the rules (mostly from Smyth's Essential Grammar)
|
||||
_ -> case getSyllable is of
|
||||
[Ce] : cc : [] : d : cs | brev d -> prons cc ++ tone cc d cs "e" ++ endWith cs -- e-8 -> e
|
||||
[Ce'] : cc : [] : d : cs | brev d -> prons cc ++ tone cc d cs i_ae ++ endWith cs -- ä-8 -> ä
|
||||
[v] :[CO]: [] : d : cs -> tone[CO]d cs (pron v) ++ endWith cs -- e/ä/o/ay/ay
|
||||
@@ -93,23 +93,22 @@ dat2pron is = case getSyllable is of
|
||||
[CoL] : cc : [CaP] : d : cs -> prons cc ++ tone cc d cs "o" ++ endWith cs -- o-a -> o
|
||||
[] :[CO]: v : d : cs -> tone[CO]d cs (prons v) ++ endWith cs -- Ov -> v
|
||||
[] : cc : [Ca,Cw] : d : cs -> prons cc ++ tone cc d cs "ua" ++ endWith cs -- Caw -> Cua
|
||||
[] : bb : [] : cc : []: d : cs -> prons bb ++ "a" ++ prons cc ++ tone cc [] cs "o" ++ endWith cs -- CaCoC
|
||||
[] : bb : [] : cc : [] -> prons bb ++ "o" ++ endWith [cc] -- CoC
|
||||
[] : bb : [] : cc : []: d : cs -> prons bb ++ "a" ++ prons cc ++ tone cc [] cs "o" ++ endWith (d:cs) -- CaCoC
|
||||
[] : bb : [] : cc : [] -> prons bb ++ tone bb [] [cc] "o" ++ endWith [cc] -- CoC
|
||||
[] : bb : [] : cc : v : d : cs -> prons bb ++ "a" ++ prons cc ++ tone cc [] cs (prons v) ++ endWith cs -- CaCvC
|
||||
[] : cc : v : d : cs -> prons cc ++ tone cc d cs (prons v) ++ endWith cs -- Cv- (normal)
|
||||
|
||||
_ -> prons is --- shouldn't happen
|
||||
|
||||
where
|
||||
prons cc = case cc of
|
||||
c :CK:cs -> prons cs -- killer
|
||||
_ :CK:cs -> prons cs -- killer
|
||||
Ch:c:cs | isConsonant c -> concatMap pron (c:cs) -- hC, ---- only some conss
|
||||
_ -> concatMap pron cc
|
||||
endWith ss = case ss of
|
||||
(c:cs) -> encs c ++ dat2pron (concat cs)
|
||||
_ -> []
|
||||
encs cs = case cs of
|
||||
[] -> []
|
||||
_ -> prons (init cs) ++ enc (last cs)
|
||||
endWith ss = case concat ss of
|
||||
_:CK:cs -> dat2pron cs
|
||||
c :cs -> enc c ++ dat2pron cs
|
||||
_ -> []
|
||||
|
||||
enc c = lookThai [] enp c
|
||||
pron c = lookThai [] pro c
|
||||
@@ -117,25 +116,31 @@ dat2pron is = case getSyllable is of
|
||||
|
||||
getSyllable :: [ThaiDat] -> [[ThaiDat]] -- (V?),(C|CC|hC),(V*),(D*),(C*),[[],C]?
|
||||
getSyllable s = case s of
|
||||
v:cs | preVowel v -> [v]:getCons v cs
|
||||
v:cs | isPreVowel v -> [v]:getCons v cs
|
||||
[] -> []
|
||||
c:_ -> []:getCons c s
|
||||
where
|
||||
getCons v s = case s of
|
||||
b:c:cs | b == Ch && isConsonant c -> [b,c]:getVow v cs -- hC
|
||||
b:cs | b == CO -> [b] :getVow v cs -- O
|
||||
b:c:cs | isCluster b c -> [b,c]:getVow v cs -- C(l|r|w) cluster
|
||||
b:cs | preVowel v -> [b] :getVow v cs
|
||||
b:c:d:[] | all isConsonant [b,c,d] -> [b] :[]:[c]:[]:[d]:[] -- CaCoC
|
||||
b:c:[] | all isConsonant [b,c] -> [b] :[]:[c]:[] -- CoC
|
||||
b:cs | isConsonant b -> [b] :getVow v cs -- C
|
||||
Ch:c:cs | isConsonant c && isLow c -> let (cc:ccs) = getCons v (c:cs) in (Ch:cc):ccs -- hC
|
||||
CO:cs -> [CO] :getVow v cs -- O
|
||||
Cs:Cr:cs -> [Cs] :getVow v cs -- O
|
||||
b:Cr:Cr:[] | isConsonant b -> [b] :[Ca]:[]:[Cr]:[] -- Crr -> Can
|
||||
b:Cr:Cr:[c] | all isConsonant [b,c] -> [b] :[Ca]:[]:[c]:[] -- CrrC -> CaC
|
||||
b:c:cs | isCluster b c -> [b,c] :getVow v cs -- C(l|r|w) cluster
|
||||
b:cs | isPreVowel v -> [b] :getVow v cs
|
||||
b:Cw:Cy:[] | isConsonant b -> [b] :[Cu,Ca]:[]:[Cy]:[] -- Cuay
|
||||
b:c:d:[] | all isConsonant [b,c,d] -> [b] :[]:[c]:[]:[d]:[] -- CaCoC
|
||||
b:c:[] | all isConsonant [b,c] -> [b] :[]:[c]:[] -- CoC
|
||||
b:c:cs | all isConsonant [b,c] -> [b] :[]:[c]:getVow c cs -- CaCvC
|
||||
b:cs | isConsonant b -> [b] :getVow v cs -- C
|
||||
_ -> [s] --- shouldn't happen ??
|
||||
getVow v0 s = case span (\x -> inVow v0 x || diacritic x) s of
|
||||
(v,c:cs) -> let (d,w) = partition diacritic v in w:d:[c]:getSyllable cs
|
||||
(v,_) -> let (d,w) = partition diacritic v in [w,d]
|
||||
inVow v0 x = inVowel x || case v0 of
|
||||
getVow v0 s = case span (\x -> inVow v0 x || isDiacritic x) s of
|
||||
(v,c:cs) -> let (d,w) = partition isDiacritic v in w:d:[c]:getSyllable cs
|
||||
(v,_) -> let (d,w) = partition isDiacritic v in [w,d]
|
||||
inVow v0 x = isInVowel x || case v0 of
|
||||
Ce -> elem x [Cy] -- after e-, also y is a part of a vowel
|
||||
_ -> False
|
||||
_ -> False --- elem x [Cw]
|
||||
isLow c = lookThai Low ccl c == Low
|
||||
|
||||
isCluster b c =
|
||||
(pronTha c == "r" && elem (pronTha b) ["k","kh","p","ph","t"])
|
||||
@@ -144,9 +149,20 @@ isCluster b c =
|
||||
|
||||
pronTha c = lookThai [] pro c
|
||||
|
||||
inVowel v = (CaP <= v && v <= CuL) || v == CO -- infix vowels
|
||||
preVowel v = Ce <= v && v <= CaE -- prefix vowels
|
||||
diacritic x = CS <= x && x <= CK -- tones, killers
|
||||
isInVowel :: ThaiDat -> Bool
|
||||
isInVowel v = (CaP <= v && v <= CuL) || v == CO -- infix vowels
|
||||
|
||||
isPreVowel :: ThaiDat -> Bool
|
||||
isPreVowel i = Ce <= i && i <= CaE
|
||||
|
||||
isVowel :: ThaiDat -> Bool
|
||||
isVowel i = CaP <= i && i <= CaE
|
||||
|
||||
isConsonant :: ThaiDat -> Bool
|
||||
isConsonant i = Ck <= i && i <= Ch' && i /= CO
|
||||
|
||||
isDiacritic :: ThaiDat -> Bool
|
||||
isDiacritic i = CS <= i && i <= CK
|
||||
|
||||
tone :: [ThaiDat] -> [ThaiDat] -> [[ThaiDat]] -> String -> String
|
||||
tone cc@(c:_) d cs v = case (lookThai Low ccl c, isLive cs1, toneMark d) of
|
||||
@@ -166,6 +182,7 @@ tone cc@(c:_) d cs v = case (lookThai Low ccl c, isLive cs1, toneMark d) of
|
||||
(High,False,_) -> low v
|
||||
where
|
||||
cs1 = concat (take 1 cs)
|
||||
tone _ _ _ v = mid v
|
||||
|
||||
toneMark :: [ThaiDat] -> Int
|
||||
toneMark is = case is of
|
||||
@@ -378,3 +395,16 @@ tabs s = case break (=='\t') s of
|
||||
_ -> [s]
|
||||
|
||||
|
||||
-- heuristics for finding syllables - unreliable, unfinished
|
||||
uniSyllables :: [ThaiDat] -> [[ThaiDat]]
|
||||
uniSyllables = reverse . (syll [] []) where
|
||||
syll sys sy is = case is of
|
||||
c:cs | isPreVowel c -> new [] is
|
||||
c:d:cs | isConsonant c && isConsonant d -> new [c] (d:cs) ---- no consonant clusters
|
||||
c:cs -> continue [c] cs ---- more rules to follow
|
||||
_ -> sy:sys
|
||||
where
|
||||
new old = syll ((sy ++ old) : sys) []
|
||||
continue old = syll sys (sy ++ old)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user