mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-24 03:52:50 -06:00
thai pronunciation better now
This commit is contained in:
@@ -20,6 +20,8 @@ import Data.Char
|
|||||||
import GF.Text.UTF8
|
import GF.Text.UTF8
|
||||||
import Data.List
|
import Data.List
|
||||||
|
|
||||||
|
import Debug.Trace
|
||||||
|
|
||||||
|
|
||||||
mkThai :: String -> String
|
mkThai :: String -> String
|
||||||
mkThai = concat . map mkThaiWord . words
|
mkThai = concat . map mkThaiWord . words
|
||||||
@@ -79,59 +81,101 @@ allThaiCodes :: [Int]
|
|||||||
allThaiCodes = [0x0e00 .. 0x0e7f]
|
allThaiCodes = [0x0e00 .. 0x0e7f]
|
||||||
|
|
||||||
|
|
||||||
-- derive the pronunciation of a syllable
|
---------------------
|
||||||
|
-- heuristic pronunciation of codes
|
||||||
|
---------------------
|
||||||
|
|
||||||
pronSyll :: [Int] -> String
|
-- this works for one syllable
|
||||||
pronSyll s = cons1 ++ voc ++ cons2 where
|
|
||||||
voc = toned tone $ pronThaiChar vo
|
|
||||||
|
|
||||||
cons1 = concatMap pronThaiChar co1 ----
|
mkPronSyllable s = pronSyllable $ getSyllable $ map mkThaiChar $ unchar s
|
||||||
cons2 = mkThaiPron $ unwords $ map recodeThai co2 -- takes care of final ----
|
|
||||||
|
|
||||||
(vo,cc@(co1,co2)) = case s of
|
data Syllable = Syll {
|
||||||
c:cs | initVowel c -> (c,getCons cs)
|
initv :: [Int],
|
||||||
c1:c:c2 | internVowel c -> (c,([c1],getFinal c2))
|
initc :: [Int],
|
||||||
c1:0x0e2d:c2 -> (0x0e42,([c1],getFinal c2))
|
midv :: [Int],
|
||||||
c0:c1:c:c2 | cluster c0 c1 && internVowel c -> (c,([c0,c1],getFinal c2))
|
finalc :: [Int],
|
||||||
c0:c1:0x0e2d:c2 | cluster c0 c1 -> (0x0e42,([c0,c1],getFinal c2))
|
finalv :: [Int],
|
||||||
_ -> (0x0e42,getCons s) ---- "o"
|
tone :: [Int],
|
||||||
|
shorten :: Bool,
|
||||||
|
kill :: Bool
|
||||||
|
}
|
||||||
|
deriving Show
|
||||||
|
|
||||||
getCons cs = case cs of
|
data Tone = TMid | TLow | THigh | TRise | TFall
|
||||||
c0:c1:c2 | cluster c1 c1 -> ([c0,c1],getFinal c2)
|
deriving Show
|
||||||
c:c2 -> ([c],getFinal c2)
|
|
||||||
|
|
||||||
getFinal = snd . getToneFinal
|
data CClass = CLow | CMid | CHigh
|
||||||
toneMark = fst . getToneFinal
|
deriving Show
|
||||||
|
|
||||||
getToneFinal c = case c of
|
pronSyllable :: Syllable -> String
|
||||||
[ _,0x0e4c] -> ([], []) -- killer
|
pronSyllable s =
|
||||||
[t,_,0x0e4c] -> ([t],[]) -- killer
|
concatMap pronThaiChar (reverse $ initc s) ++
|
||||||
_ -> splitAt (length c - 1) c
|
tonem ++
|
||||||
|
vowel ++
|
||||||
|
finalCons
|
||||||
|
-- concatMap pronThaiChar (reverse $ finalc s)
|
||||||
|
|
||||||
initVowel c = 0x0e40 <= c && c <= 0x0e44
|
where
|
||||||
internVowel c = 0x0e30 <= c && c <= 0x0e39
|
|
||||||
|
|
||||||
cluster c0 c1 =
|
vowel = case (initv s, midv s, finalv s, shorten s, tone s) of
|
||||||
c0 == 0x0e2b -- h
|
(i,m,f,_,_) -> concatMap pronThaiChar (reverse $ f ++ m ++ i) ----
|
||||||
|| c1 == 0x0e23 -- r
|
|
||||||
|| c1 == 0x0e25 -- l
|
|
||||||
|| c1 == 0x0e27 -- w
|
|
||||||
|
|
||||||
classC = case co1 of
|
finalCons =
|
||||||
_ -> "L" ----
|
let (c,cs) = splitAt 1 $ finalc s
|
||||||
|
in
|
||||||
|
case c of
|
||||||
|
[] -> []
|
||||||
|
[k] -> concatMap pronThaiChar (reverse cs) ++ finalThai k
|
||||||
|
|
||||||
lengthV = case vo of
|
iclass = case take 1 (reverse $ initc s) of
|
||||||
_ -> False ----
|
[c] -> classThai c
|
||||||
|
[] -> CMid -- O
|
||||||
|
|
||||||
liveness = case co2 of
|
isLong = not (shorten s) && case vowel of
|
||||||
_ -> False ----
|
_:_:_ -> True ----
|
||||||
|
_ -> False
|
||||||
|
|
||||||
tone = case (classC,lengthV,liveness,toneMark) of
|
isLive = case finalCons of
|
||||||
_ -> ""
|
c | elem c ["n","m","g"] -> True
|
||||||
|
"" -> isLong
|
||||||
|
_ -> False
|
||||||
|
|
||||||
toned t v = t ++ v ----
|
tonem = case (iclass,isLive,isLong,tone s) of
|
||||||
|
(_,_,_, [0x0e4a]) -> tHigh
|
||||||
|
(_,_,_, [0x0e4b]) -> tRise
|
||||||
|
(CLow,_,_,[0x0e49]) -> tRise
|
||||||
|
(_,_,_, [0x0e49]) -> tFall
|
||||||
|
(CLow,_,_,[0x0e48]) -> tFall
|
||||||
|
(_, _,_,[0x0e48]) -> tLow
|
||||||
|
(CHigh,True,_,_) -> tRise
|
||||||
|
(_, True,_,_) -> tMid
|
||||||
|
(CLow,False,False,_) -> tHigh
|
||||||
|
(CLow,False,_,_) -> tFall
|
||||||
|
_ -> tLow
|
||||||
|
|
||||||
-- [0x0e00 .. 0x0e7f]
|
(tMid,tHigh,tLow,tRise,tFall) = ("-","'","`","~","^")
|
||||||
|
|
||||||
|
isVowel c = 0x0e30 <= c && c <= 0x0e44 ----
|
||||||
|
isCons c = 0x0e01 <= c && c <= 0x0e2f ----
|
||||||
|
isTone c = 0x0e48 <= c && c <= 0x0e4b
|
||||||
|
|
||||||
|
getSyllable :: [Int] -> Syllable
|
||||||
|
getSyllable = foldl get (Syll [] [] [] [] [] [] False False) where
|
||||||
|
get syll c = case c of
|
||||||
|
0x0e47 -> syll {shorten = True}
|
||||||
|
0x0e4c -> syll {kill = True, finalc = tail (finalc syll)} --- always last
|
||||||
|
0x0e2d
|
||||||
|
| null (initc syll) -> syll {initc = [c]} -- "O"
|
||||||
|
| otherwise -> syll {midv = c : midv syll}
|
||||||
|
_
|
||||||
|
| isVowel c -> if null (initc syll)
|
||||||
|
then syll {initv = c : initv syll}
|
||||||
|
else syll {midv = c : midv syll}
|
||||||
|
| isCons c -> if null (midv syll)
|
||||||
|
then syll {initc = c : initc syll}
|
||||||
|
else syll {finalc = c : finalc syll}
|
||||||
|
| isTone c -> syll {tone = [c]}
|
||||||
|
_ -> syll ---- check this
|
||||||
|
|
||||||
|
|
||||||
-- to test
|
-- to test
|
||||||
@@ -147,7 +191,12 @@ test3 = do
|
|||||||
testThai :: String -> IO ()
|
testThai :: String -> IO ()
|
||||||
testThai s = do
|
testThai s = do
|
||||||
putStrLn $ encodeUTF8 $ mkThai s
|
putStrLn $ encodeUTF8 $ mkThai s
|
||||||
putStrLn $ unwords $ map mkThaiPron $ words s
|
putStrLn $ unwords $ map mkPronSyllable $ words s
|
||||||
|
|
||||||
|
testSyllable s =
|
||||||
|
let y = getSyllable $ map mkThaiChar $ unchar s
|
||||||
|
in
|
||||||
|
putStrLn $ pronSyllable $ trace (show y) y
|
||||||
|
|
||||||
thaiFile :: FilePath -> Maybe FilePath -> IO ()
|
thaiFile :: FilePath -> Maybe FilePath -> IO ()
|
||||||
thaiFile f mo = do
|
thaiFile f mo = do
|
||||||
@@ -155,20 +204,16 @@ thaiFile f mo = do
|
|||||||
let put = maybe putStr writeFile mo
|
let put = maybe putStr writeFile mo
|
||||||
put $ encodeUTF8 $ thaiStrings s
|
put $ encodeUTF8 $ thaiStrings s
|
||||||
|
|
||||||
mkThaiPron s = case fst $ pronAndOrth s of
|
finalThai c = maybe "" return (Map.lookup c thaiFinalMap)
|
||||||
Just p -> p
|
thaiFinalMap = Map.fromList $ zip allThaiCodes finals
|
||||||
_ -> concat $ render $ unchar s
|
|
||||||
where
|
|
||||||
render s = case s of
|
|
||||||
[c] -> finalThai c : []
|
|
||||||
c:cs -> pronThai c : render cs
|
|
||||||
_ -> []
|
|
||||||
|
|
||||||
finalThai c = maybe c return (Map.lookup c thaiFinalMap)
|
classThai c = maybe CLow readClass (Map.lookup c thaiClassMap)
|
||||||
thaiFinalMap = Map.fromList $ zip allThaiTrans finals
|
thaiClassMap = Map.fromList $ zip allThaiCodes heights
|
||||||
|
|
||||||
classThai c = maybe c return (Map.lookup c thaiClassMap)
|
readClass s = case s of
|
||||||
thaiClassMap = Map.fromList $ zip allThaiTrans heights
|
'L' -> CLow
|
||||||
|
'M' -> CMid
|
||||||
|
'H' -> CHigh
|
||||||
|
|
||||||
|
|
||||||
thaiTable :: String
|
thaiTable :: String
|
||||||
|
|||||||
Reference in New Issue
Block a user