mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-20 16:42:51 -06:00
produce a readable word list
This commit is contained in:
@@ -14,6 +14,7 @@
|
|||||||
module Main (main) where
|
module Main (main) where
|
||||||
|
|
||||||
import GF.Text.Thai
|
import GF.Text.Thai
|
||||||
|
import GF.Text.UTF8
|
||||||
import Data.List
|
import Data.List
|
||||||
import System
|
import System
|
||||||
|
|
||||||
@@ -22,5 +23,21 @@ main = do
|
|||||||
xx <- getArgs
|
xx <- getArgs
|
||||||
case xx of
|
case xx of
|
||||||
"-p":f:[] -> thaiPronFile f Nothing
|
"-p":f:[] -> thaiPronFile f Nothing
|
||||||
|
"-w":f:[] -> thaiWordList f
|
||||||
f :[] -> thaiFile f Nothing
|
f :[] -> thaiFile f Nothing
|
||||||
_ -> putStrLn "usage: filethai (-p) File"
|
_ -> putStrLn "usage: filethai (-p) File"
|
||||||
|
|
||||||
|
|
||||||
|
-- adapted to the format of StringsThai
|
||||||
|
|
||||||
|
thaiWordList :: FilePath -> IO ()
|
||||||
|
thaiWordList f = do
|
||||||
|
ss <- readFile f >>= return . lines
|
||||||
|
mapM_ mkLine ss
|
||||||
|
where
|
||||||
|
mkLine s = case words s of
|
||||||
|
o : "=" : s : ";" : "--" : es ->
|
||||||
|
putStrLn $ thai s ++ "\t" ++ pron s ++ "\t" ++ unwords es
|
||||||
|
_ -> return ()
|
||||||
|
thai = encodeUTF8 . mkThaiWord . init . tail
|
||||||
|
pron = mkThaiPron . init . tail
|
||||||
|
|||||||
@@ -2,3 +2,6 @@ strings:
|
|||||||
runghc -i../../../src FileThai.hs StringsThai.gf >StringsTha.gf
|
runghc -i../../../src FileThai.hs StringsThai.gf >StringsTha.gf
|
||||||
pronstrings:
|
pronstrings:
|
||||||
runghc -i../../../src FileThai.hs -p StringsThai.gf >pronunciation/StringsTha.gf
|
runghc -i../../../src FileThai.hs -p StringsThai.gf >pronunciation/StringsTha.gf
|
||||||
|
wordlist:
|
||||||
|
runghc -i../../../src FileThai.hs -w StringsThai.gf
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
|
|
||||||
-- AR 27/12/2006. Execute test2 to see the transliteration table.
|
-- AR 27/12/2006. Execute test2 to see the transliteration table.
|
||||||
|
|
||||||
module GF.Text.Thai (mkThai,thaiFile,thaiPronFile) where
|
module GF.Text.Thai (mkThai,mkThaiWord,mkThaiPron,thaiFile,thaiPronFile) where
|
||||||
|
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
import Data.Char
|
import Data.Char
|
||||||
@@ -93,7 +93,9 @@ allThaiCodes = [0x0e00 .. 0x0e7f]
|
|||||||
|
|
||||||
-- this works for one syllable
|
-- this works for one syllable
|
||||||
|
|
||||||
mkPronSyllable s = pronSyllable $ getSyllable $ map mkThaiChar $ unchar s
|
mkPronSyllable s = case fst $ pronAndOrth s of
|
||||||
|
Just p -> p
|
||||||
|
_ -> pronSyllable $ getSyllable $ map mkThaiChar $ unchar s
|
||||||
|
|
||||||
data Syllable = Syll {
|
data Syllable = Syll {
|
||||||
initv :: [Int],
|
initv :: [Int],
|
||||||
@@ -118,11 +120,13 @@ pronSyllable s =
|
|||||||
initCons ++ tonem ++ vowel ++ finalCons
|
initCons ++ tonem ++ vowel ++ finalCons
|
||||||
where
|
where
|
||||||
|
|
||||||
vowel = case (initv s, midv s, finalv s, shorten s, tone s) of
|
vowel = case (initv s, midv s, finalv s, finalc s, shorten s, tone s) of
|
||||||
([0x0e40],[0x0e30,0x0e2d],_,_,_) -> "ö" -- eOa
|
([0x0e40],[0x0e35],[0x0e2d],[0x0e22],_,_) -> "ia" -- ei:-ya.
|
||||||
([0x0e40],[0x0e30,0x0e32],_,_,_) -> "o" -- ea:a
|
([0x0e40],[0x0e35],_,[0x0e22],_,_) -> "ia" -- ei:-y
|
||||||
([],[],[],_,_) -> "o"
|
([0x0e40],[0x0e30,0x0e2d],_,_,_,_) -> "ö" -- eOa.
|
||||||
(i,m,f,_,_) -> concatMap pronThaiChar (reverse $ f ++ m ++ i) ----
|
([0x0e40],[0x0e30,0x0e32],_,_,_,_) -> "o" -- ea:a.
|
||||||
|
([],[],[],_,_,_) -> "o"
|
||||||
|
(i,m,f,_,_,_) -> concatMap pronThaiChar (reverse $ f ++ m ++ i) ----
|
||||||
|
|
||||||
initCons = concatMap pronThaiChar $ case (reverse $ initc s) of
|
initCons = concatMap pronThaiChar $ case (reverse $ initc s) of
|
||||||
0x0e2b:cs@(_:_) -> cs -- high h
|
0x0e2b:cs@(_:_) -> cs -- high h
|
||||||
|
|||||||
Reference in New Issue
Block a user