From 21ad6f214c2298a23415ed67bd424c5bb36550d2 Mon Sep 17 00:00:00 2001 From: aarne Date: Thu, 25 Jan 2007 17:49:39 +0000 Subject: [PATCH] produce a readable word list --- lib/resource-1.0/thai/FileThai.hs | 17 +++++++++++++++++ lib/resource-1.0/thai/Makefile | 3 +++ src/GF/Text/Thai.hs | 18 +++++++++++------- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/lib/resource-1.0/thai/FileThai.hs b/lib/resource-1.0/thai/FileThai.hs index 58936180c..13c94325f 100644 --- a/lib/resource-1.0/thai/FileThai.hs +++ b/lib/resource-1.0/thai/FileThai.hs @@ -14,6 +14,7 @@ module Main (main) where import GF.Text.Thai +import GF.Text.UTF8 import Data.List import System @@ -22,5 +23,21 @@ main = do xx <- getArgs case xx of "-p":f:[] -> thaiPronFile f Nothing + "-w":f:[] -> thaiWordList f f :[] -> thaiFile f Nothing _ -> putStrLn "usage: filethai (-p) File" + + +-- adapted to the format of StringsThai + +thaiWordList :: FilePath -> IO () +thaiWordList f = do + ss <- readFile f >>= return . lines + mapM_ mkLine ss + where + mkLine s = case words s of + o : "=" : s : ";" : "--" : es -> + putStrLn $ thai s ++ "\t" ++ pron s ++ "\t" ++ unwords es + _ -> return () + thai = encodeUTF8 . mkThaiWord . init . tail + pron = mkThaiPron . init . tail diff --git a/lib/resource-1.0/thai/Makefile b/lib/resource-1.0/thai/Makefile index 654dcdaff..54bd73510 100644 --- a/lib/resource-1.0/thai/Makefile +++ b/lib/resource-1.0/thai/Makefile @@ -2,3 +2,6 @@ strings: runghc -i../../../src FileThai.hs StringsThai.gf >StringsTha.gf pronstrings: runghc -i../../../src FileThai.hs -p StringsThai.gf >pronunciation/StringsTha.gf +wordlist: + runghc -i../../../src FileThai.hs -w StringsThai.gf + diff --git a/src/GF/Text/Thai.hs b/src/GF/Text/Thai.hs index 93b22b0d6..8fc25213a 100644 --- a/src/GF/Text/Thai.hs +++ b/src/GF/Text/Thai.hs @@ -11,7 +11,7 @@ -- AR 27/12/2006. Execute test2 to see the transliteration table. -module GF.Text.Thai (mkThai,thaiFile,thaiPronFile) where +module GF.Text.Thai (mkThai,mkThaiWord,mkThaiPron,thaiFile,thaiPronFile) where import qualified Data.Map as Map import Data.Char @@ -93,7 +93,9 @@ allThaiCodes = [0x0e00 .. 0x0e7f] -- this works for one syllable -mkPronSyllable s = pronSyllable $ getSyllable $ map mkThaiChar $ unchar s +mkPronSyllable s = case fst $ pronAndOrth s of + Just p -> p + _ -> pronSyllable $ getSyllable $ map mkThaiChar $ unchar s data Syllable = Syll { initv :: [Int], @@ -118,11 +120,13 @@ pronSyllable s = initCons ++ tonem ++ vowel ++ finalCons where - vowel = case (initv s, midv s, finalv s, shorten s, tone s) of - ([0x0e40],[0x0e30,0x0e2d],_,_,_) -> "ö" -- eOa - ([0x0e40],[0x0e30,0x0e32],_,_,_) -> "o" -- ea:a - ([],[],[],_,_) -> "o" - (i,m,f,_,_) -> concatMap pronThaiChar (reverse $ f ++ m ++ i) ---- + vowel = case (initv s, midv s, finalv s, finalc s, shorten s, tone s) of + ([0x0e40],[0x0e35],[0x0e2d],[0x0e22],_,_) -> "ia" -- ei:-ya. + ([0x0e40],[0x0e35],_,[0x0e22],_,_) -> "ia" -- ei:-y + ([0x0e40],[0x0e30,0x0e2d],_,_,_,_) -> "ö" -- eOa. + ([0x0e40],[0x0e30,0x0e32],_,_,_,_) -> "o" -- ea:a. + ([],[],[],_,_,_) -> "o" + (i,m,f,_,_,_) -> concatMap pronThaiChar (reverse $ f ++ m ++ i) ---- initCons = concatMap pronThaiChar $ case (reverse $ initc s) of 0x0e2b:cs@(_:_) -> cs -- high h