thai pronunciation better now

This commit is contained in:
aarne
2007-01-08 16:12:27 +00:00
parent 3e9d2b4a2f
commit ec2b12cf65

View File

@@ -20,6 +20,8 @@ import Data.Char
import GF.Text.UTF8 import GF.Text.UTF8
import Data.List import Data.List
import Debug.Trace
mkThai :: String -> String mkThai :: String -> String
mkThai = concat . map mkThaiWord . words mkThai = concat . map mkThaiWord . words
@@ -79,59 +81,101 @@ allThaiCodes :: [Int]
allThaiCodes = [0x0e00 .. 0x0e7f] allThaiCodes = [0x0e00 .. 0x0e7f]
-- derive the pronunciation of a syllable ---------------------
-- heuristic pronunciation of codes
---------------------
pronSyll :: [Int] -> String -- this works for one syllable
pronSyll s = cons1 ++ voc ++ cons2 where
voc = toned tone $ pronThaiChar vo
cons1 = concatMap pronThaiChar co1 ---- mkPronSyllable s = pronSyllable $ getSyllable $ map mkThaiChar $ unchar s
cons2 = mkThaiPron $ unwords $ map recodeThai co2 -- takes care of final ----
(vo,cc@(co1,co2)) = case s of data Syllable = Syll {
c:cs | initVowel c -> (c,getCons cs) initv :: [Int],
c1:c:c2 | internVowel c -> (c,([c1],getFinal c2)) initc :: [Int],
c1:0x0e2d:c2 -> (0x0e42,([c1],getFinal c2)) midv :: [Int],
c0:c1:c:c2 | cluster c0 c1 && internVowel c -> (c,([c0,c1],getFinal c2)) finalc :: [Int],
c0:c1:0x0e2d:c2 | cluster c0 c1 -> (0x0e42,([c0,c1],getFinal c2)) finalv :: [Int],
_ -> (0x0e42,getCons s) ---- "o" tone :: [Int],
shorten :: Bool,
kill :: Bool
}
deriving Show
getCons cs = case cs of data Tone = TMid | TLow | THigh | TRise | TFall
c0:c1:c2 | cluster c1 c1 -> ([c0,c1],getFinal c2) deriving Show
c:c2 -> ([c],getFinal c2)
getFinal = snd . getToneFinal data CClass = CLow | CMid | CHigh
toneMark = fst . getToneFinal deriving Show
getToneFinal c = case c of pronSyllable :: Syllable -> String
[ _,0x0e4c] -> ([], []) -- killer pronSyllable s =
[t,_,0x0e4c] -> ([t],[]) -- killer concatMap pronThaiChar (reverse $ initc s) ++
_ -> splitAt (length c - 1) c tonem ++
vowel ++
finalCons
-- concatMap pronThaiChar (reverse $ finalc s)
initVowel c = 0x0e40 <= c && c <= 0x0e44 where
internVowel c = 0x0e30 <= c && c <= 0x0e39
cluster c0 c1 = vowel = case (initv s, midv s, finalv s, shorten s, tone s) of
c0 == 0x0e2b -- h (i,m,f,_,_) -> concatMap pronThaiChar (reverse $ f ++ m ++ i) ----
|| c1 == 0x0e23 -- r
|| c1 == 0x0e25 -- l
|| c1 == 0x0e27 -- w
classC = case co1 of finalCons =
_ -> "L" ---- let (c,cs) = splitAt 1 $ finalc s
in
case c of
[] -> []
[k] -> concatMap pronThaiChar (reverse cs) ++ finalThai k
lengthV = case vo of iclass = case take 1 (reverse $ initc s) of
_ -> False ---- [c] -> classThai c
[] -> CMid -- O
liveness = case co2 of isLong = not (shorten s) && case vowel of
_ -> False ---- _:_:_ -> True ----
_ -> False
tone = case (classC,lengthV,liveness,toneMark) of isLive = case finalCons of
_ -> "" c | elem c ["n","m","g"] -> True
"" -> isLong
_ -> False
toned t v = t ++ v ---- tonem = case (iclass,isLive,isLong,tone s) of
(_,_,_, [0x0e4a]) -> tHigh
(_,_,_, [0x0e4b]) -> tRise
(CLow,_,_,[0x0e49]) -> tRise
(_,_,_, [0x0e49]) -> tFall
(CLow,_,_,[0x0e48]) -> tFall
(_, _,_,[0x0e48]) -> tLow
(CHigh,True,_,_) -> tRise
(_, True,_,_) -> tMid
(CLow,False,False,_) -> tHigh
(CLow,False,_,_) -> tFall
_ -> tLow
-- [0x0e00 .. 0x0e7f] (tMid,tHigh,tLow,tRise,tFall) = ("-","'","`","~","^")
isVowel c = 0x0e30 <= c && c <= 0x0e44 ----
isCons c = 0x0e01 <= c && c <= 0x0e2f ----
isTone c = 0x0e48 <= c && c <= 0x0e4b
getSyllable :: [Int] -> Syllable
getSyllable = foldl get (Syll [] [] [] [] [] [] False False) where
get syll c = case c of
0x0e47 -> syll {shorten = True}
0x0e4c -> syll {kill = True, finalc = tail (finalc syll)} --- always last
0x0e2d
| null (initc syll) -> syll {initc = [c]} -- "O"
| otherwise -> syll {midv = c : midv syll}
_
| isVowel c -> if null (initc syll)
then syll {initv = c : initv syll}
else syll {midv = c : midv syll}
| isCons c -> if null (midv syll)
then syll {initc = c : initc syll}
else syll {finalc = c : finalc syll}
| isTone c -> syll {tone = [c]}
_ -> syll ---- check this
-- to test -- to test
@@ -147,7 +191,12 @@ test3 = do
testThai :: String -> IO () testThai :: String -> IO ()
testThai s = do testThai s = do
putStrLn $ encodeUTF8 $ mkThai s putStrLn $ encodeUTF8 $ mkThai s
putStrLn $ unwords $ map mkThaiPron $ words s putStrLn $ unwords $ map mkPronSyllable $ words s
testSyllable s =
let y = getSyllable $ map mkThaiChar $ unchar s
in
putStrLn $ pronSyllable $ trace (show y) y
thaiFile :: FilePath -> Maybe FilePath -> IO () thaiFile :: FilePath -> Maybe FilePath -> IO ()
thaiFile f mo = do thaiFile f mo = do
@@ -155,20 +204,16 @@ thaiFile f mo = do
let put = maybe putStr writeFile mo let put = maybe putStr writeFile mo
put $ encodeUTF8 $ thaiStrings s put $ encodeUTF8 $ thaiStrings s
mkThaiPron s = case fst $ pronAndOrth s of finalThai c = maybe "" return (Map.lookup c thaiFinalMap)
Just p -> p thaiFinalMap = Map.fromList $ zip allThaiCodes finals
_ -> concat $ render $ unchar s
where
render s = case s of
[c] -> finalThai c : []
c:cs -> pronThai c : render cs
_ -> []
finalThai c = maybe c return (Map.lookup c thaiFinalMap) classThai c = maybe CLow readClass (Map.lookup c thaiClassMap)
thaiFinalMap = Map.fromList $ zip allThaiTrans finals thaiClassMap = Map.fromList $ zip allThaiCodes heights
classThai c = maybe c return (Map.lookup c thaiClassMap) readClass s = case s of
thaiClassMap = Map.fromList $ zip allThaiTrans heights 'L' -> CLow
'M' -> CMid
'H' -> CHigh
thaiTable :: String thaiTable :: String