a script for generating Thai files with pronunciation

2026-05-29 12:48:54 -06:00 · 2011-12-03 12:07:46 +00:00
parent 9e1a3a5d60
commit dfce0ef551
4 changed files with 57 additions and 34 deletions
--- a/lib/src/Make.hs
+++ b/lib/src/Make.hs
@@ -54,6 +54,7 @@ langsCoding = [
  (("spanish",  "Spa"),"Romance"),
  (("swedish",  "Swe"),"Scand"), 
  (("thai",     "Tha"),""),
+  (("thai",     "Thp"),""),  -- Thai pronunciation
  (("turkish",  "Tur"),""),
  (("urdu",     "Urd"),"Hindustani")
  ]
@@ -66,7 +67,7 @@ langs = map fst langsCoding
 langsLangAll = langs

 -- languagues that are almost complete and for which Lang is normally compiled
-langsLang = langs `except` langsIncomplete ---- []
+langsLang = langs `except` (langsIncomplete ++ ["Thp"])

 -- languagues that have notpresent marked
 langsPresent = langsLang `except` ["Lav","Nep","Pes","Tha"]
--- a/lib/src/thai/StringsTha.gf
+++ b/lib/src/thai/StringsTha.gf
@@ -1,9 +1,4 @@
-- The only place where literal Thai strings are defined 
-- (except for Lexicon and Structural).
-- Convert this into StringsThai by 
-- gf
-- > rf -file=thai/src/StringsTha.gf | ps -env=quotes -to_thai | wf -file=thai/StringsTha.gf
-- สุคสันต์วันเกิด!
+-- a repository of literal Thai strings

 resource StringsTha = {

@@ -11,6 +6,9 @@ flags coding = utf8 ;

 oper

+-- if Thai is paired with Pronunciation, return the latter
+thpron : Str -> Str -> Str = \t,p -> p ;
+
 aphai_s = "อภัย" ; -- excuse2
 baan_s = "บ้าน" ; -- house
 biar_s = "เบียร์" ; -- beer
@@ -30,7 +28,7 @@ haa_s = "ห้า" ; -- five
 hay_s = "ให้" ; -- give
 hoog_s = "ห้อง" ; -- room
 hok_s = "หก" ; -- six
-jai_s = "ใj" ; -- understand2
+jai_s = "ใจ" ; -- understand2
 kaaw_s = "เกา" ; -- nine
 kam_s = "กำ" ; -- Progr1
 kew_s = "แก้ว" ; -- glass (drink Classif)
--- a/lib/src/thai/TextTha.gf
+++ b/lib/src/thai/TextTha.gf
@@ -1,4 +1,4 @@
-concrete TextTha of Text = CommonX ** {
+concrete TextTha of Text = CommonX ** open ResTha in {

 -- No punctuation - but make sure to leave spaces between sentences!

--- a/lib/src/thai/ThaiScript.hs
+++ b/lib/src/thai/ThaiScript.hs
@@ -1,37 +1,31 @@
-module ThaiScript where
+module Main where

 import Data.Char
 import Data.List
 import qualified Data.Map as Map
+import System

-testFile   = "src/test.txt"
-resultFile = "src/results.txt"
+-- convert all files *Tha.gf into *Thp.gf with "t" changed to (thpron "t" "p")
+main = allThpron

-test = do
-  s <- readFile testFile
-  writeFile resultFile []
-  mapM_ (testOne . tabs) $ lines s
+allThpron = do
+  System.system "ls *Tha*.gf ../api/*Tha*.gf >srcThai.txt"
+  files <- readFile "srcThai.txt" >>= return . lines
+  mapM_ fileThpron files

-testOne ws = case ws of
-  m:t:p:r:_ -> appendFile resultFile $ concat [mn,"\t",t,"\t",p,"\t",r,"\t",result,"\n"] where
-                   result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
-                   mn = if result == r 
-                      then m
-                      else if result == p then (m ++ "+") else (m ++ "-") 
-  _ -> return ()
+fileThpron file = do
+  s <- readFile file
+  let tgt = appThpron file
+  writeFile tgt (appThpron s)
+  putStrLn ("wrote " ++ tgt)

-testOneS ws = case ws of
-  m:t:p:r:_ -> appendFile resultFile $ concat [m,"\t",t,"\t",pn,"\t",r,"\n"] where
-                   result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
-                   pn = if m == "+" 
-                      then r
-                      else p
-  _ -> return ()
+appThpron s = case s of
+  '"':cs -> let (w,_:rest) = break (=='"') cs in mkThpron w ++ appThpron rest
+  'T':'h':'a':rest -> "Thp" ++ appThpron rest
+  c:cs -> c:appThpron cs
+  _ -> s

-tabs s = case break (=='\t') s of
-  ([], _:ws) -> tabs ws
-  (w , _:ws) -> w:tabs ws
-  _ -> [s]
+mkThpron s = "(thpron \"" ++ s ++ "\" \"" ++ thai2pron s ++ "\")"

 -- heuristics for finding syllables
 uniSyllables :: [Int] -> [[Int]]
@@ -311,6 +305,36 @@ allThaiChars = [
  TC {unicode = 3673, translit = "N9", cclass = Low, liveness = False, pronunc = "9", pronunc_end = "9"}
 ]

+-- testing with Wikipedia Swadesh list
+
+testFile   = "src/test.txt"
+resultFile = "src/results.txt"
+
+test = do
+  s <- readFile testFile
+  writeFile resultFile []
+  mapM_ (testOne . tabs) $ lines s
+
+testOne ws = case ws of
+  m:t:p:r:_ -> appendFile resultFile $ concat [mn,"\t",t,"\t",p,"\t",r,"\t",result,"\n"] where
+                   result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
+                   mn = if result == r 
+                      then m
+                      else if result == p then (m ++ "+") else (m ++ "-") 
+  _ -> return ()
+
+testOneS ws = case ws of
+  m:t:p:r:_ -> appendFile resultFile $ concat [m,"\t",t,"\t",pn,"\t",r,"\n"] where
+                   result = unwords (intersperse "," (map thai2pron (filter (/=",") (words t))))
+                   pn = if m == "+" 
+                      then r
+                      else p
+  _ -> return ()
+
+tabs s = case break (=='\t') s of
+  ([], _:ws) -> tabs ws
+  (w , _:ws) -> w:tabs ws
+  _ -> [s]


 {-