a script for generating Thai files with pronunciation

2026-05-24 18:28:55 -06:00 · 2011-12-03 12:07:46 +00:00
parent 9e1a3a5d60
commit dfce0ef551
4 changed files with 57 additions and 34 deletions
--- a/lib/src/Make.hs
+++ b/lib/src/Make.hs
@@ -54,6 +54,7 @@ langsCoding = [
  (("spanish",  "Spa"),"Romance"),
  (("swedish",  "Swe"),"Scand"), 
  (("thai",     "Tha"),""),
  (("thai",     "Thp"),""),  -- Thai pronunciation
  (("turkish",  "Tur"),""),
  (("urdu",     "Urd"),"Hindustani")
  ]
@@ -66,7 +67,7 @@ langs = map fst langsCoding
 langsLangAll = langs
 -- languagues that are almost complete and for which Lang is normally compiled
-langsLang = langs `except` langsIncomplete ---- []
+langsLang = langs `except` (langsIncomplete ++ ["Thp"])
 -- languagues that have notpresent marked
 langsPresent = langsLang `except` ["Lav","Nep","Pes","Tha"]
--- a/lib/src/thai/StringsTha.gf
+++ b/lib/src/thai/StringsTha.gf
@@ -1,9 +1,4 @@
-- The only place where literal Thai strings are defined 
+-- a repository of literal Thai strings
 -- (except for Lexicon and Structural).
 -- Convert this into StringsThai by 
 -- gf
 -- > rf -file=thai/src/StringsTha.gf | ps -env=quotes -to_thai | wf -file=thai/StringsTha.gf
 -- สุคสันต์วันเกิด!
 resource StringsTha = {
@@ -11,6 +6,9 @@ flags coding = utf8 ;
 oper
 -- if Thai is paired with Pronunciation, return the latter
 thpron : Str -> Str -> Str = \t,p -> p ;
 aphai_s = "อภัย" ; -- excuse2
 baan_s = "บ้าน" ; -- house
 biar_s = "เบียร์" ; -- beer
@@ -30,7 +28,7 @@ haa_s = "ห้า" ; -- five
 hay_s = "ให้" ; -- give
 hoog_s = "ห้อง" ; -- room
 hok_s = "หก" ; -- six
-jai_s = "ใj" ; -- understand2
+jai_s = "ใจ" ; -- understand2
 kaaw_s = "เกา" ; -- nine
 kam_s = "กำ" ; -- Progr1
 kew_s = "แก้ว" ; -- glass (drink Classif)
--- a/lib/src/thai/TextTha.gf
+++ b/lib/src/thai/TextTha.gf
@@ -1,4 +1,4 @@
-concrete TextTha of Text = CommonX ** {
+concrete TextTha of Text = CommonX ** open ResTha in {
 -- No punctuation - but make sure to leave spaces between sentences!
--- a/lib/src/thai/ThaiScript.hs
+++ b/lib/src/thai/ThaiScript.hs
@@ -1,37 +1,31 @@
-module ThaiScript where
+module Main where
 import Data.Char
 import Data.List
 import qualified Data.Map as Map
 import System
-testFile   = "src/test.txt"
+-- convert all files *Tha.gf into *Thp.gf with "t" changed to (thpron "t" "p")
-resultFile = "src/results.txt"
+main = allThpron
-test = do
+allThpron = do
-  s <- readFile testFile
+  System.system "ls *Tha*.gf ../api/*Tha*.gf >srcThai.txt"
-  writeFile resultFile []
+  files <- readFile "srcThai.txt" >>= return . lines
-  mapM_ (testOne . tabs) $ lines s
+  mapM_ fileThpron files
-testOne ws = case ws of
+fileThpron file = do
-  m:t:p:r:_ -> appendFile resultFile $ concat [mn,"\t",t,"\t",p,"\t",r,"\t",result,"\n"] where
+  s <- readFile file
-                   result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
+  let tgt = appThpron file
-                   mn = if result == r 
+  writeFile tgt (appThpron s)
-                      then m
+  putStrLn ("wrote " ++ tgt)
                      else if result == p then (m ++ "+") else (m ++ "-") 
  _ -> return ()
-testOneS ws = case ws of
+appThpron s = case s of
-  m:t:p:r:_ -> appendFile resultFile $ concat [m,"\t",t,"\t",pn,"\t",r,"\n"] where
+  '"':cs -> let (w,_:rest) = break (=='"') cs in mkThpron w ++ appThpron rest
-                   result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
+  'T':'h':'a':rest -> "Thp" ++ appThpron rest
-                   pn = if m == "+" 
+  c:cs -> c:appThpron cs
-                      then r
+  _ -> s
                      else p
  _ -> return ()
-tabs s = case break (=='\t') s of
+mkThpron s = "(thpron \"" ++ s ++ "\" \"" ++ thai2pron s ++ "\")"
  ([], _:ws) -> tabs ws
  (w , _:ws) -> w:tabs ws
  _ -> [s]
 -- heuristics for finding syllables
 uniSyllables :: [Int] -> [[Int]]
@@ -311,6 +305,36 @@ allThaiChars = [
  TC {unicode = 3673, translit = "N9", cclass = Low, liveness = False, pronunc = "9", pronunc_end = "9"}
 ]
 -- testing with Wikipedia Swadesh list
 testFile   = "src/test.txt"
 resultFile = "src/results.txt"
 test = do
  s <- readFile testFile
  writeFile resultFile []
  mapM_ (testOne . tabs) $ lines s
 testOne ws = case ws of
  m:t:p:r:_ -> appendFile resultFile $ concat [mn,"\t",t,"\t",p,"\t",r,"\t",result,"\n"] where
                   result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
                   mn = if result == r 
                      then m
                      else if result == p then (m ++ "+") else (m ++ "-") 
  _ -> return ()
 testOneS ws = case ws of
  m:t:p:r:_ -> appendFile resultFile $ concat [m,"\t",t,"\t",pn,"\t",r,"\n"] where
                   result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
                   pn = if m == "+" 
                      then r
                      else p
  _ -> return ()
 tabs s = case break (=='\t') s of
  ([], _:ws) -> tabs ws
  (w , _:ws) -> w:tabs ws
  _ -> [s]
 {-
`@@ -1,4 +1,4 @@`
	`concrete TextTha of Text = CommonX ** {`	`concrete TextTha of Text = CommonX ** open ResTha in {`

	`-- No punctuation - but make sure to leave spaces between sentences!`	`-- No punctuation - but make sure to leave spaces between sentences!`