forked from GitHub/gf-core
produce a readable word list
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
module Main (main) where
|
||||
|
||||
import GF.Text.Thai
|
||||
import GF.Text.UTF8
|
||||
import Data.List
|
||||
import System
|
||||
|
||||
@@ -22,5 +23,21 @@ main = do
|
||||
xx <- getArgs
|
||||
case xx of
|
||||
"-p":f:[] -> thaiPronFile f Nothing
|
||||
"-w":f:[] -> thaiWordList f
|
||||
f :[] -> thaiFile f Nothing
|
||||
_ -> putStrLn "usage: filethai (-p) File"
|
||||
|
||||
|
||||
-- adapted to the format of StringsThai
|
||||
|
||||
thaiWordList :: FilePath -> IO ()
|
||||
thaiWordList f = do
|
||||
ss <- readFile f >>= return . lines
|
||||
mapM_ mkLine ss
|
||||
where
|
||||
mkLine s = case words s of
|
||||
o : "=" : s : ";" : "--" : es ->
|
||||
putStrLn $ thai s ++ "\t" ++ pron s ++ "\t" ++ unwords es
|
||||
_ -> return ()
|
||||
thai = encodeUTF8 . mkThaiWord . init . tail
|
||||
pron = mkThaiPron . init . tail
|
||||
|
||||
@@ -2,3 +2,6 @@ strings:
|
||||
runghc -i../../../src FileThai.hs StringsThai.gf >StringsTha.gf
|
||||
pronstrings:
|
||||
runghc -i../../../src FileThai.hs -p StringsThai.gf >pronunciation/StringsTha.gf
|
||||
wordlist:
|
||||
runghc -i../../../src FileThai.hs -w StringsThai.gf
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
-- AR 27/12/2006. Execute test2 to see the transliteration table.
|
||||
|
||||
module GF.Text.Thai (mkThai,thaiFile,thaiPronFile) where
|
||||
module GF.Text.Thai (mkThai,mkThaiWord,mkThaiPron,thaiFile,thaiPronFile) where
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import Data.Char
|
||||
@@ -93,7 +93,9 @@ allThaiCodes = [0x0e00 .. 0x0e7f]
|
||||
|
||||
-- this works for one syllable
|
||||
|
||||
mkPronSyllable s = pronSyllable $ getSyllable $ map mkThaiChar $ unchar s
|
||||
mkPronSyllable s = case fst $ pronAndOrth s of
|
||||
Just p -> p
|
||||
_ -> pronSyllable $ getSyllable $ map mkThaiChar $ unchar s
|
||||
|
||||
data Syllable = Syll {
|
||||
initv :: [Int],
|
||||
@@ -118,11 +120,13 @@ pronSyllable s =
|
||||
initCons ++ tonem ++ vowel ++ finalCons
|
||||
where
|
||||
|
||||
vowel = case (initv s, midv s, finalv s, shorten s, tone s) of
|
||||
([0x0e40],[0x0e30,0x0e2d],_,_,_) -> "ö" -- eOa
|
||||
([0x0e40],[0x0e30,0x0e32],_,_,_) -> "o" -- ea:a
|
||||
([],[],[],_,_) -> "o"
|
||||
(i,m,f,_,_) -> concatMap pronThaiChar (reverse $ f ++ m ++ i) ----
|
||||
vowel = case (initv s, midv s, finalv s, finalc s, shorten s, tone s) of
|
||||
([0x0e40],[0x0e35],[0x0e2d],[0x0e22],_,_) -> "ia" -- ei:-ya.
|
||||
([0x0e40],[0x0e35],_,[0x0e22],_,_) -> "ia" -- ei:-y
|
||||
([0x0e40],[0x0e30,0x0e2d],_,_,_,_) -> "ö" -- eOa.
|
||||
([0x0e40],[0x0e30,0x0e32],_,_,_,_) -> "o" -- ea:a.
|
||||
([],[],[],_,_,_) -> "o"
|
||||
(i,m,f,_,_,_) -> concatMap pronThaiChar (reverse $ f ++ m ++ i) ----
|
||||
|
||||
initCons = concatMap pronThaiChar $ case (reverse $ initc s) of
|
||||
0x0e2b:cs@(_:_) -> cs -- high h
|
||||
|
||||
Reference in New Issue
Block a user