diff --git a/lib/src/Make.hs b/lib/src/Make.hs index 48ea24b3a..bfcc21475 100644 --- a/lib/src/Make.hs +++ b/lib/src/Make.hs @@ -54,6 +54,7 @@ langsCoding = [ (("spanish", "Spa"),"Romance"), (("swedish", "Swe"),"Scand"), (("thai", "Tha"),""), + (("thai", "Thp"),""), -- Thai pronunciation (("turkish", "Tur"),""), (("urdu", "Urd"),"Hindustani") ] @@ -66,7 +67,7 @@ langs = map fst langsCoding langsLangAll = langs -- languagues that are almost complete and for which Lang is normally compiled -langsLang = langs `except` langsIncomplete ---- [] +langsLang = langs `except` (langsIncomplete ++ ["Thp"]) -- languagues that have notpresent marked langsPresent = langsLang `except` ["Lav","Nep","Pes","Tha"] diff --git a/lib/src/thai/StringsTha.gf b/lib/src/thai/StringsTha.gf index 7a0664eb6..2f11cde1d 100644 --- a/lib/src/thai/StringsTha.gf +++ b/lib/src/thai/StringsTha.gf @@ -1,9 +1,4 @@ --- The only place where literal Thai strings are defined --- (except for Lexicon and Structural). --- Convert this into StringsThai by --- gf --- > rf -file=thai/src/StringsTha.gf | ps -env=quotes -to_thai | wf -file=thai/StringsTha.gf --- สุคสันต์วันเกิด! +-- a repository of literal Thai strings resource StringsTha = { @@ -11,6 +6,9 @@ flags coding = utf8 ; oper +-- if Thai is paired with Pronunciation, return the latter +thpron : Str -> Str -> Str = \t,p -> p ; + aphai_s = "อภัย" ; -- excuse2 baan_s = "บ้าน" ; -- house biar_s = "เบียร์" ; -- beer @@ -30,7 +28,7 @@ haa_s = "ห้า" ; -- five hay_s = "ให้" ; -- give hoog_s = "ห้อง" ; -- room hok_s = "หก" ; -- six -jai_s = "ใj" ; -- understand2 +jai_s = "ใจ" ; -- understand2 kaaw_s = "เกา" ; -- nine kam_s = "กำ" ; -- Progr1 kew_s = "แก้ว" ; -- glass (drink Classif) diff --git a/lib/src/thai/TextTha.gf b/lib/src/thai/TextTha.gf index c39715d46..04cbb28b7 100644 --- a/lib/src/thai/TextTha.gf +++ b/lib/src/thai/TextTha.gf @@ -1,4 +1,4 @@ -concrete TextTha of Text = CommonX ** { +concrete TextTha of Text = CommonX ** open ResTha in { -- No punctuation - but make sure to leave spaces between sentences! diff --git a/lib/src/thai/ThaiScript.hs b/lib/src/thai/ThaiScript.hs index 7900cf2d1..79100772d 100644 --- a/lib/src/thai/ThaiScript.hs +++ b/lib/src/thai/ThaiScript.hs @@ -1,37 +1,31 @@ -module ThaiScript where +module Main where import Data.Char import Data.List import qualified Data.Map as Map +import System -testFile = "src/test.txt" -resultFile = "src/results.txt" +-- convert all files *Tha.gf into *Thp.gf with "t" changed to (thpron "t" "p") +main = allThpron -test = do - s <- readFile testFile - writeFile resultFile [] - mapM_ (testOne . tabs) $ lines s +allThpron = do + System.system "ls *Tha*.gf ../api/*Tha*.gf >srcThai.txt" + files <- readFile "srcThai.txt" >>= return . lines + mapM_ fileThpron files -testOne ws = case ws of - m:t:p:r:_ -> appendFile resultFile $ concat [mn,"\t",t,"\t",p,"\t",r,"\t",result,"\n"] where - result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t)))) - mn = if result == r - then m - else if result == p then (m ++ "+") else (m ++ "-") - _ -> return () +fileThpron file = do + s <- readFile file + let tgt = appThpron file + writeFile tgt (appThpron s) + putStrLn ("wrote " ++ tgt) -testOneS ws = case ws of - m:t:p:r:_ -> appendFile resultFile $ concat [m,"\t",t,"\t",pn,"\t",r,"\n"] where - result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t)))) - pn = if m == "+" - then r - else p - _ -> return () +appThpron s = case s of + '"':cs -> let (w,_:rest) = break (=='"') cs in mkThpron w ++ appThpron rest + 'T':'h':'a':rest -> "Thp" ++ appThpron rest + c:cs -> c:appThpron cs + _ -> s -tabs s = case break (=='\t') s of - ([], _:ws) -> tabs ws - (w , _:ws) -> w:tabs ws - _ -> [s] +mkThpron s = "(thpron \"" ++ s ++ "\" \"" ++ thai2pron s ++ "\")" -- heuristics for finding syllables uniSyllables :: [Int] -> [[Int]] @@ -311,6 +305,36 @@ allThaiChars = [ TC {unicode = 3673, translit = "N9", cclass = Low, liveness = False, pronunc = "9", pronunc_end = "9"} ] +-- testing with Wikipedia Swadesh list + +testFile = "src/test.txt" +resultFile = "src/results.txt" + +test = do + s <- readFile testFile + writeFile resultFile [] + mapM_ (testOne . tabs) $ lines s + +testOne ws = case ws of + m:t:p:r:_ -> appendFile resultFile $ concat [mn,"\t",t,"\t",p,"\t",r,"\t",result,"\n"] where + result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t)))) + mn = if result == r + then m + else if result == p then (m ++ "+") else (m ++ "-") + _ -> return () + +testOneS ws = case ws of + m:t:p:r:_ -> appendFile resultFile $ concat [m,"\t",t,"\t",pn,"\t",r,"\n"] where + result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t)))) + pn = if m == "+" + then r + else p + _ -> return () + +tabs s = case break (=='\t') s of + ([], _:ws) -> tabs ws + (w , _:ws) -> w:tabs ws + _ -> [s] {-