mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-24 18:28:55 -06:00
a script for generating Thai files with pronunciation
This commit is contained in:
@@ -54,6 +54,7 @@ langsCoding = [
|
|||||||
(("spanish", "Spa"),"Romance"),
|
(("spanish", "Spa"),"Romance"),
|
||||||
(("swedish", "Swe"),"Scand"),
|
(("swedish", "Swe"),"Scand"),
|
||||||
(("thai", "Tha"),""),
|
(("thai", "Tha"),""),
|
||||||
|
(("thai", "Thp"),""), -- Thai pronunciation
|
||||||
(("turkish", "Tur"),""),
|
(("turkish", "Tur"),""),
|
||||||
(("urdu", "Urd"),"Hindustani")
|
(("urdu", "Urd"),"Hindustani")
|
||||||
]
|
]
|
||||||
@@ -66,7 +67,7 @@ langs = map fst langsCoding
|
|||||||
langsLangAll = langs
|
langsLangAll = langs
|
||||||
|
|
||||||
-- languagues that are almost complete and for which Lang is normally compiled
|
-- languagues that are almost complete and for which Lang is normally compiled
|
||||||
langsLang = langs `except` langsIncomplete ---- []
|
langsLang = langs `except` (langsIncomplete ++ ["Thp"])
|
||||||
|
|
||||||
-- languagues that have notpresent marked
|
-- languagues that have notpresent marked
|
||||||
langsPresent = langsLang `except` ["Lav","Nep","Pes","Tha"]
|
langsPresent = langsLang `except` ["Lav","Nep","Pes","Tha"]
|
||||||
|
|||||||
@@ -1,9 +1,4 @@
|
|||||||
-- The only place where literal Thai strings are defined
|
-- a repository of literal Thai strings
|
||||||
-- (except for Lexicon and Structural).
|
|
||||||
-- Convert this into StringsThai by
|
|
||||||
-- gf
|
|
||||||
-- > rf -file=thai/src/StringsTha.gf | ps -env=quotes -to_thai | wf -file=thai/StringsTha.gf
|
|
||||||
-- สุคสันต์วันเกิด!
|
|
||||||
|
|
||||||
resource StringsTha = {
|
resource StringsTha = {
|
||||||
|
|
||||||
@@ -11,6 +6,9 @@ flags coding = utf8 ;
|
|||||||
|
|
||||||
oper
|
oper
|
||||||
|
|
||||||
|
-- if Thai is paired with Pronunciation, return the latter
|
||||||
|
thpron : Str -> Str -> Str = \t,p -> p ;
|
||||||
|
|
||||||
aphai_s = "อภัย" ; -- excuse2
|
aphai_s = "อภัย" ; -- excuse2
|
||||||
baan_s = "บ้าน" ; -- house
|
baan_s = "บ้าน" ; -- house
|
||||||
biar_s = "เบียร์" ; -- beer
|
biar_s = "เบียร์" ; -- beer
|
||||||
@@ -30,7 +28,7 @@ haa_s = "ห้า" ; -- five
|
|||||||
hay_s = "ให้" ; -- give
|
hay_s = "ให้" ; -- give
|
||||||
hoog_s = "ห้อง" ; -- room
|
hoog_s = "ห้อง" ; -- room
|
||||||
hok_s = "หก" ; -- six
|
hok_s = "หก" ; -- six
|
||||||
jai_s = "ใj" ; -- understand2
|
jai_s = "ใจ" ; -- understand2
|
||||||
kaaw_s = "เกา" ; -- nine
|
kaaw_s = "เกา" ; -- nine
|
||||||
kam_s = "กำ" ; -- Progr1
|
kam_s = "กำ" ; -- Progr1
|
||||||
kew_s = "แก้ว" ; -- glass (drink Classif)
|
kew_s = "แก้ว" ; -- glass (drink Classif)
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
concrete TextTha of Text = CommonX ** {
|
concrete TextTha of Text = CommonX ** open ResTha in {
|
||||||
|
|
||||||
-- No punctuation - but make sure to leave spaces between sentences!
|
-- No punctuation - but make sure to leave spaces between sentences!
|
||||||
|
|
||||||
|
|||||||
@@ -1,37 +1,31 @@
|
|||||||
module ThaiScript where
|
module Main where
|
||||||
|
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Data.List
|
import Data.List
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
|
import System
|
||||||
|
|
||||||
testFile = "src/test.txt"
|
-- convert all files *Tha.gf into *Thp.gf with "t" changed to (thpron "t" "p")
|
||||||
resultFile = "src/results.txt"
|
main = allThpron
|
||||||
|
|
||||||
test = do
|
allThpron = do
|
||||||
s <- readFile testFile
|
System.system "ls *Tha*.gf ../api/*Tha*.gf >srcThai.txt"
|
||||||
writeFile resultFile []
|
files <- readFile "srcThai.txt" >>= return . lines
|
||||||
mapM_ (testOne . tabs) $ lines s
|
mapM_ fileThpron files
|
||||||
|
|
||||||
testOne ws = case ws of
|
fileThpron file = do
|
||||||
m:t:p:r:_ -> appendFile resultFile $ concat [mn,"\t",t,"\t",p,"\t",r,"\t",result,"\n"] where
|
s <- readFile file
|
||||||
result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
|
let tgt = appThpron file
|
||||||
mn = if result == r
|
writeFile tgt (appThpron s)
|
||||||
then m
|
putStrLn ("wrote " ++ tgt)
|
||||||
else if result == p then (m ++ "+") else (m ++ "-")
|
|
||||||
_ -> return ()
|
|
||||||
|
|
||||||
testOneS ws = case ws of
|
appThpron s = case s of
|
||||||
m:t:p:r:_ -> appendFile resultFile $ concat [m,"\t",t,"\t",pn,"\t",r,"\n"] where
|
'"':cs -> let (w,_:rest) = break (=='"') cs in mkThpron w ++ appThpron rest
|
||||||
result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
|
'T':'h':'a':rest -> "Thp" ++ appThpron rest
|
||||||
pn = if m == "+"
|
c:cs -> c:appThpron cs
|
||||||
then r
|
_ -> s
|
||||||
else p
|
|
||||||
_ -> return ()
|
|
||||||
|
|
||||||
tabs s = case break (=='\t') s of
|
mkThpron s = "(thpron \"" ++ s ++ "\" \"" ++ thai2pron s ++ "\")"
|
||||||
([], _:ws) -> tabs ws
|
|
||||||
(w , _:ws) -> w:tabs ws
|
|
||||||
_ -> [s]
|
|
||||||
|
|
||||||
-- heuristics for finding syllables
|
-- heuristics for finding syllables
|
||||||
uniSyllables :: [Int] -> [[Int]]
|
uniSyllables :: [Int] -> [[Int]]
|
||||||
@@ -311,6 +305,36 @@ allThaiChars = [
|
|||||||
TC {unicode = 3673, translit = "N9", cclass = Low, liveness = False, pronunc = "9", pronunc_end = "9"}
|
TC {unicode = 3673, translit = "N9", cclass = Low, liveness = False, pronunc = "9", pronunc_end = "9"}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
-- testing with Wikipedia Swadesh list
|
||||||
|
|
||||||
|
testFile = "src/test.txt"
|
||||||
|
resultFile = "src/results.txt"
|
||||||
|
|
||||||
|
test = do
|
||||||
|
s <- readFile testFile
|
||||||
|
writeFile resultFile []
|
||||||
|
mapM_ (testOne . tabs) $ lines s
|
||||||
|
|
||||||
|
testOne ws = case ws of
|
||||||
|
m:t:p:r:_ -> appendFile resultFile $ concat [mn,"\t",t,"\t",p,"\t",r,"\t",result,"\n"] where
|
||||||
|
result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
|
||||||
|
mn = if result == r
|
||||||
|
then m
|
||||||
|
else if result == p then (m ++ "+") else (m ++ "-")
|
||||||
|
_ -> return ()
|
||||||
|
|
||||||
|
testOneS ws = case ws of
|
||||||
|
m:t:p:r:_ -> appendFile resultFile $ concat [m,"\t",t,"\t",pn,"\t",r,"\n"] where
|
||||||
|
result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
|
||||||
|
pn = if m == "+"
|
||||||
|
then r
|
||||||
|
else p
|
||||||
|
_ -> return ()
|
||||||
|
|
||||||
|
tabs s = case break (=='\t') s of
|
||||||
|
([], _:ws) -> tabs ws
|
||||||
|
(w , _:ws) -> w:tabs ws
|
||||||
|
_ -> [s]
|
||||||
|
|
||||||
|
|
||||||
{-
|
{-
|
||||||
|
|||||||
Reference in New Issue
Block a user