mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-13 14:59:32 -06:00
a script for generating Thai files with pronunciation
This commit is contained in:
@@ -54,6 +54,7 @@ langsCoding = [
|
||||
(("spanish", "Spa"),"Romance"),
|
||||
(("swedish", "Swe"),"Scand"),
|
||||
(("thai", "Tha"),""),
|
||||
(("thai", "Thp"),""), -- Thai pronunciation
|
||||
(("turkish", "Tur"),""),
|
||||
(("urdu", "Urd"),"Hindustani")
|
||||
]
|
||||
@@ -66,7 +67,7 @@ langs = map fst langsCoding
|
||||
langsLangAll = langs
|
||||
|
||||
-- languagues that are almost complete and for which Lang is normally compiled
|
||||
langsLang = langs `except` langsIncomplete ---- []
|
||||
langsLang = langs `except` (langsIncomplete ++ ["Thp"])
|
||||
|
||||
-- languagues that have notpresent marked
|
||||
langsPresent = langsLang `except` ["Lav","Nep","Pes","Tha"]
|
||||
|
||||
@@ -1,9 +1,4 @@
|
||||
-- The only place where literal Thai strings are defined
|
||||
-- (except for Lexicon and Structural).
|
||||
-- Convert this into StringsThai by
|
||||
-- gf
|
||||
-- > rf -file=thai/src/StringsTha.gf | ps -env=quotes -to_thai | wf -file=thai/StringsTha.gf
|
||||
-- สุคสันต์วันเกิด!
|
||||
-- a repository of literal Thai strings
|
||||
|
||||
resource StringsTha = {
|
||||
|
||||
@@ -11,6 +6,9 @@ flags coding = utf8 ;
|
||||
|
||||
oper
|
||||
|
||||
-- if Thai is paired with Pronunciation, return the latter
|
||||
thpron : Str -> Str -> Str = \t,p -> p ;
|
||||
|
||||
aphai_s = "อภัย" ; -- excuse2
|
||||
baan_s = "บ้าน" ; -- house
|
||||
biar_s = "เบียร์" ; -- beer
|
||||
@@ -30,7 +28,7 @@ haa_s = "ห้า" ; -- five
|
||||
hay_s = "ให้" ; -- give
|
||||
hoog_s = "ห้อง" ; -- room
|
||||
hok_s = "หก" ; -- six
|
||||
jai_s = "ใj" ; -- understand2
|
||||
jai_s = "ใจ" ; -- understand2
|
||||
kaaw_s = "เกา" ; -- nine
|
||||
kam_s = "กำ" ; -- Progr1
|
||||
kew_s = "แก้ว" ; -- glass (drink Classif)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
concrete TextTha of Text = CommonX ** {
|
||||
concrete TextTha of Text = CommonX ** open ResTha in {
|
||||
|
||||
-- No punctuation - but make sure to leave spaces between sentences!
|
||||
|
||||
|
||||
@@ -1,37 +1,31 @@
|
||||
module ThaiScript where
|
||||
module Main where
|
||||
|
||||
import Data.Char
|
||||
import Data.List
|
||||
import qualified Data.Map as Map
|
||||
import System
|
||||
|
||||
testFile = "src/test.txt"
|
||||
resultFile = "src/results.txt"
|
||||
-- convert all files *Tha.gf into *Thp.gf with "t" changed to (thpron "t" "p")
|
||||
main = allThpron
|
||||
|
||||
test = do
|
||||
s <- readFile testFile
|
||||
writeFile resultFile []
|
||||
mapM_ (testOne . tabs) $ lines s
|
||||
allThpron = do
|
||||
System.system "ls *Tha*.gf ../api/*Tha*.gf >srcThai.txt"
|
||||
files <- readFile "srcThai.txt" >>= return . lines
|
||||
mapM_ fileThpron files
|
||||
|
||||
testOne ws = case ws of
|
||||
m:t:p:r:_ -> appendFile resultFile $ concat [mn,"\t",t,"\t",p,"\t",r,"\t",result,"\n"] where
|
||||
result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
|
||||
mn = if result == r
|
||||
then m
|
||||
else if result == p then (m ++ "+") else (m ++ "-")
|
||||
_ -> return ()
|
||||
fileThpron file = do
|
||||
s <- readFile file
|
||||
let tgt = appThpron file
|
||||
writeFile tgt (appThpron s)
|
||||
putStrLn ("wrote " ++ tgt)
|
||||
|
||||
testOneS ws = case ws of
|
||||
m:t:p:r:_ -> appendFile resultFile $ concat [m,"\t",t,"\t",pn,"\t",r,"\n"] where
|
||||
result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
|
||||
pn = if m == "+"
|
||||
then r
|
||||
else p
|
||||
_ -> return ()
|
||||
appThpron s = case s of
|
||||
'"':cs -> let (w,_:rest) = break (=='"') cs in mkThpron w ++ appThpron rest
|
||||
'T':'h':'a':rest -> "Thp" ++ appThpron rest
|
||||
c:cs -> c:appThpron cs
|
||||
_ -> s
|
||||
|
||||
tabs s = case break (=='\t') s of
|
||||
([], _:ws) -> tabs ws
|
||||
(w , _:ws) -> w:tabs ws
|
||||
_ -> [s]
|
||||
mkThpron s = "(thpron \"" ++ s ++ "\" \"" ++ thai2pron s ++ "\")"
|
||||
|
||||
-- heuristics for finding syllables
|
||||
uniSyllables :: [Int] -> [[Int]]
|
||||
@@ -311,6 +305,36 @@ allThaiChars = [
|
||||
TC {unicode = 3673, translit = "N9", cclass = Low, liveness = False, pronunc = "9", pronunc_end = "9"}
|
||||
]
|
||||
|
||||
-- testing with Wikipedia Swadesh list
|
||||
|
||||
testFile = "src/test.txt"
|
||||
resultFile = "src/results.txt"
|
||||
|
||||
test = do
|
||||
s <- readFile testFile
|
||||
writeFile resultFile []
|
||||
mapM_ (testOne . tabs) $ lines s
|
||||
|
||||
testOne ws = case ws of
|
||||
m:t:p:r:_ -> appendFile resultFile $ concat [mn,"\t",t,"\t",p,"\t",r,"\t",result,"\n"] where
|
||||
result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
|
||||
mn = if result == r
|
||||
then m
|
||||
else if result == p then (m ++ "+") else (m ++ "-")
|
||||
_ -> return ()
|
||||
|
||||
testOneS ws = case ws of
|
||||
m:t:p:r:_ -> appendFile resultFile $ concat [m,"\t",t,"\t",pn,"\t",r,"\n"] where
|
||||
result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
|
||||
pn = if m == "+"
|
||||
then r
|
||||
else p
|
||||
_ -> return ()
|
||||
|
||||
tabs s = case break (=='\t') s of
|
||||
([], _:ws) -> tabs ws
|
||||
(w , _:ws) -> w:tabs ws
|
||||
_ -> [s]
|
||||
|
||||
|
||||
{-
|
||||
|
||||
Reference in New Issue
Block a user