forked from GitHub/gf-core
more thai transliteration; numeral thai grammars
This commit is contained in:
5
examples/numerals/mkThai.gfs
Normal file
5
examples/numerals/mkThai.gfs
Normal file
@@ -0,0 +1,5 @@
|
||||
i -old -optimize=none -abs=Numerals -cnc=decimal decimal.gf
|
||||
i -old -optimize=none -abs=Numerals -cnc=thai_decimal thaiDU.gf
|
||||
i -old -optimize=none -abs=Numerals -cnc=thai thaiU.gf
|
||||
i -old -optimize=none -abs=Numerals -cnc=thai_pronounce thaiP.gf
|
||||
sf -unlexer=unwords
|
||||
67
examples/numerals/thai.gf
Normal file
67
examples/numerals/thai.gf
Normal file
@@ -0,0 +1,67 @@
|
||||
include numerals.Abs.gf ;
|
||||
|
||||
-- Thai transliteration, produces thaiU.gf by GF/Text/Thai.hs
|
||||
-- AR 28/12/2006
|
||||
|
||||
lincat
|
||||
Numeral = {s : Str} ;
|
||||
Digit = {s : DForm => Str} ;
|
||||
Sub10 = {s : DForm => Str} ;
|
||||
Sub100 = {s : NForm => Str} ;
|
||||
Sub1000 = {s : NForm => Str} ;
|
||||
Sub1000000 = {s : Str} ;
|
||||
|
||||
lin
|
||||
num x = x ;
|
||||
|
||||
pot01 = mkNum "hnvg" "hnvg" "eOMd'" ;
|
||||
|
||||
n2 = mkNum "s-Og" "y'i:E" "s-Og" ;
|
||||
n3 = regNum "s-a:m" ;
|
||||
n4 = regNum "s-i:E" ;
|
||||
n5 = regNum "ha:E" ;
|
||||
n6 = regNum "ho?k" ;
|
||||
n7 = regNum "ecMd'" ;
|
||||
n8 = regNum "e:pd'" ;
|
||||
n9 = regNum "eka:" ;
|
||||
|
||||
|
||||
pot0 d = d ;
|
||||
|
||||
pot110 = {s = sip} ;
|
||||
pot111 = {s = table {
|
||||
Unit => ["s'ib et"] ;
|
||||
Thousand => ["hnvg hmv:En hnvg p2an"]
|
||||
}
|
||||
} ;
|
||||
pot1to19 d = {s = table {
|
||||
Unit => "s'ib" ++ d.s ! After ;
|
||||
Thousand => ["hnvg hmv:En"] ++ d.s ! Indep ++ "p2an"
|
||||
}
|
||||
} ;
|
||||
pot0as1 d = {s = \\n => d.s ! Indep ++ phan ! n} ;
|
||||
pot1 d = {s = \\n => d.s ! ModTen ++ sip ! n} ;
|
||||
pot1plus d e = {
|
||||
s = \\n => d.s ! ModTen ++ sip ! n ++ e.s ! After ++ phan ! n
|
||||
} ;
|
||||
pot1as2 n = n ;
|
||||
pot2 d = {s = \\n => d.s ! Indep ++ roy ! n} ;
|
||||
pot2plus d e = {s = \\n => d.s ! Indep ++ roy ! n ++ e.s ! n} ;
|
||||
pot2as3 n = {s = n.s ! Unit} ;
|
||||
pot3 n = {s = n.s ! Thousand} ;
|
||||
pot3plus n m = {s = n.s ! Thousand ++ m.s ! Unit} ;
|
||||
|
||||
param
|
||||
DForm = Indep | ModTen | After ;
|
||||
NForm = Unit | Thousand ;
|
||||
|
||||
oper
|
||||
mkNum : Str -> Str -> Str -> {s : DForm => Str} = \x,y,z ->
|
||||
{s = table {Indep => x ; ModTen => y ; After => z}} ;
|
||||
regNum : Str -> {s : DForm => Str} = \x ->
|
||||
mkNum x x x ;
|
||||
|
||||
|
||||
sip = table {Unit => "s'ib" ; Thousand => "hmv:En"} ;
|
||||
roy = table {Unit => "rEOy'" ; Thousand => "se:n"} ;
|
||||
phan = table {Unit => [] ; Thousand => "p2an"} ;
|
||||
48
examples/numerals/thaiDU.gf
Normal file
48
examples/numerals/thaiDU.gf
Normal file
@@ -0,0 +1,48 @@
|
||||
include numerals.Abs.gf ;
|
||||
|
||||
-- Thai digits. AR 28/12/2006
|
||||
|
||||
flags lexer=chars ; unlexer=concat ; flags coding=utf8 ;
|
||||
|
||||
param Zeros = noz | zz ;
|
||||
|
||||
lincat Numeral = { s : Str } ;
|
||||
lincat Digit = { s : Str } ;
|
||||
lincat Sub10 = { s : Str } ;
|
||||
lincat Sub100 = {s : Zeros => Str} ;
|
||||
lincat Sub1000 = {s : Zeros => Str} ;
|
||||
lincat Sub1000000 = {s : Zeros => Str} ;
|
||||
|
||||
oper ss : Str -> {s : Str} = \s -> {s = s} ;
|
||||
oper mkz : Str -> {s : Zeros => Str} = \s -> {s = table {_ => s}} ;
|
||||
|
||||
lin num n = {s = n.s ! noz} ;
|
||||
lin n2 = ss "๒" ;
|
||||
lin n3 = ss "๓" ;
|
||||
lin n4 = ss "๔" ;
|
||||
lin n5 = ss "๕" ;
|
||||
lin n6 = ss "๖" ;
|
||||
lin n7 = ss "๗" ;
|
||||
lin n8 = ss "๘" ;
|
||||
lin n9 = ss "๙" ;
|
||||
|
||||
lin pot01 = ss "๑" ;
|
||||
lin pot0 d = d ;
|
||||
|
||||
lin pot110 = mkz ("๑" ++ "๐") ;
|
||||
lin pot111 = mkz ("๑" ++ "๑") ;
|
||||
lin pot1to19 d = mkz ("๑" ++ d.s) ;
|
||||
|
||||
lin pot0as1 n = {s = table {noz => n.s ; zz => "๐" ++ n.s}} ;
|
||||
|
||||
lin pot1 d = mkz (d.s ++ "๐") ;
|
||||
lin pot1plus d e = mkz (d.s ++ e.s) ;
|
||||
|
||||
lin pot1as2 n = {s = table {noz => n.s ! noz ; zz => "๐" ++ n.s ! zz}} ;
|
||||
lin pot2 d = mkz (d.s ++ "๐" ++ "๐") ;
|
||||
lin pot2plus d e = mkz (d.s ++ e.s ! zz) ;
|
||||
|
||||
lin pot2as3 n = {s = table {noz => n.s ! noz ; zz => "๐" ++ n.s ! zz}} ;
|
||||
|
||||
lin pot3 n = mkz (n.s ! noz ++ "๐" ++ "๐" ++ "๐") ;
|
||||
lin pot3plus n m = {s = table {z => n.s ! z ++ m.s ! zz}} ;
|
||||
69
examples/numerals/thaiP.gf
Normal file
69
examples/numerals/thaiP.gf
Normal file
@@ -0,0 +1,69 @@
|
||||
include numerals.Abs.gf ;
|
||||
|
||||
-- Thai pronunciation (mostly following Smyth's Essential Grammar)
|
||||
-- AR 28/12/2006
|
||||
|
||||
flags coding=utf8 ;
|
||||
|
||||
lincat
|
||||
Numeral = {s : Str} ;
|
||||
Digit = {s : DForm => Str} ;
|
||||
Sub10 = {s : DForm => Str} ;
|
||||
Sub100 = {s : NForm => Str} ;
|
||||
Sub1000 = {s : NForm => Str} ;
|
||||
Sub1000000 = {s : Str} ;
|
||||
|
||||
lin
|
||||
num x = x ;
|
||||
|
||||
pot01 = mkNum "nỳng" "nỳng" "èt" ;
|
||||
|
||||
n2 = mkNum "söong" "yîi" "söong" ;
|
||||
n3 = regNum "säam" ;
|
||||
n4 = regNum "sìi" ;
|
||||
n5 = regNum "hâa" ;
|
||||
n6 = regNum "hòk" ;
|
||||
n7 = regNum "cèt" ;
|
||||
n8 = regNum "pèet" ;
|
||||
n9 = regNum "kâaw" ;
|
||||
|
||||
|
||||
pot0 d = d ;
|
||||
|
||||
pot110 = {s = sip} ;
|
||||
pot111 = {s = table {
|
||||
Unit => ["sìp èt"] ;
|
||||
Thousand => ["nỳng mỳyn nỳng phan"]
|
||||
}
|
||||
} ;
|
||||
pot1to19 d = {s = table {
|
||||
Unit => "sìp" ++ d.s ! After ;
|
||||
Thousand => ["nỳng mỳyn"] ++ d.s ! Indep ++ "phan"
|
||||
}
|
||||
} ;
|
||||
pot0as1 d = {s = \\n => d.s ! Indep ++ phan ! n} ;
|
||||
pot1 d = {s = \\n => d.s ! ModTen ++ sip ! n} ;
|
||||
pot1plus d e = {
|
||||
s = \\n => d.s ! ModTen ++ sip ! n ++ e.s ! After ++ phan ! n
|
||||
} ;
|
||||
pot1as2 n = n ;
|
||||
pot2 d = {s = \\n => d.s ! Indep ++ roy ! n} ;
|
||||
pot2plus d e = {s = \\n => d.s ! Indep ++ roy ! n ++ e.s ! n} ;
|
||||
pot2as3 n = {s = n.s ! Unit} ;
|
||||
pot3 n = {s = n.s ! Thousand} ;
|
||||
pot3plus n m = {s = n.s ! Thousand ++ m.s ! Unit} ;
|
||||
|
||||
param
|
||||
DForm = Indep | ModTen | After ;
|
||||
NForm = Unit | Thousand ;
|
||||
|
||||
oper
|
||||
mkNum : Str -> Str -> Str -> {s : DForm => Str} = \x,y,z ->
|
||||
{s = table {Indep => x ; ModTen => y ; After => z}} ;
|
||||
regNum : Str -> {s : DForm => Str} = \x ->
|
||||
mkNum x x x ;
|
||||
|
||||
|
||||
sip = table {Unit => "sìp" ; Thousand => "mỳyn"} ;
|
||||
roy = table {Unit => "róoy" ; Thousand => "sëen"} ;
|
||||
phan = table {Unit => [] ; Thousand => "phan"} ;
|
||||
68
examples/numerals/thaiU.gf
Normal file
68
examples/numerals/thaiU.gf
Normal file
@@ -0,0 +1,68 @@
|
||||
include numerals.Abs.gf ;
|
||||
|
||||
-- Thai UTF8, produced from thai.gf by GF/Text/Thai.hs. AR 28/12/2006
|
||||
|
||||
flags coding=utf8 ;
|
||||
|
||||
lincat
|
||||
Numeral = {s : Str} ;
|
||||
Digit = {s : DForm => Str} ;
|
||||
Sub10 = {s : DForm => Str} ;
|
||||
Sub100 = {s : NForm => Str} ;
|
||||
Sub1000 = {s : NForm => Str} ;
|
||||
Sub1000000 = {s : Str} ;
|
||||
|
||||
lin
|
||||
num x = x ;
|
||||
|
||||
pot01 = mkNum "หนึง" "หนึง" "เอ็ด" ;
|
||||
|
||||
n2 = mkNum "สอง" "ยี่" "สอง" ;
|
||||
n3 = regNum "สาม" ;
|
||||
n4 = regNum "สี่" ;
|
||||
n5 = regNum "หา่" ;
|
||||
n6 = regNum "หก" ;
|
||||
n7 = regNum "เจ็ด" ;
|
||||
n8 = regNum "แปด" ;
|
||||
n9 = regNum "เกา" ;
|
||||
|
||||
|
||||
pot0 d = d ;
|
||||
|
||||
pot110 = {s = sip} ;
|
||||
pot111 = {s = table {
|
||||
Unit => ["ศิบเฏ"] ;
|
||||
Thousand => ["หนึงหมื่นหนึงพะน"]
|
||||
}
|
||||
} ;
|
||||
pot1to19 d = {s = table {
|
||||
Unit => "ศิบ" ++ d.s ! After ;
|
||||
Thousand => ["หนึงหมื่น"] ++ d.s ! Indep ++ "พะน"
|
||||
}
|
||||
} ;
|
||||
pot0as1 d = {s = \\n => d.s ! Indep ++ phan ! n} ;
|
||||
pot1 d = {s = \\n => d.s ! ModTen ++ sip ! n} ;
|
||||
pot1plus d e = {
|
||||
s = \\n => d.s ! ModTen ++ sip ! n ++ e.s ! After ++ phan ! n
|
||||
} ;
|
||||
pot1as2 n = n ;
|
||||
pot2 d = {s = \\n => d.s ! Indep ++ roy ! n} ;
|
||||
pot2plus d e = {s = \\n => d.s ! Indep ++ roy ! n ++ e.s ! n} ;
|
||||
pot2as3 n = {s = n.s ! Unit} ;
|
||||
pot3 n = {s = n.s ! Thousand} ;
|
||||
pot3plus n m = {s = n.s ! Thousand ++ m.s ! Unit} ;
|
||||
|
||||
param
|
||||
DForm = Indep | ModTen | After ;
|
||||
NForm = Unit | Thousand ;
|
||||
|
||||
oper
|
||||
mkNum : Str -> Str -> Str -> {s : DForm => Str} = \x,y,z ->
|
||||
{s = table {Indep => x ; ModTen => y ; After => z}} ;
|
||||
regNum : Str -> {s : DForm => Str} = \x ->
|
||||
mkNum x x x ;
|
||||
|
||||
|
||||
sip = table {Unit => "ศิบ" ; Thousand => "หมื่น"} ;
|
||||
roy = table {Unit => "ร่อย" ; Thousand => "ซแน"} ;
|
||||
phan = table {Unit => [] ; Thousand => "พะน"} ;
|
||||
@@ -22,7 +22,7 @@ import Data.List
|
||||
|
||||
|
||||
mkThai :: String -> String
|
||||
mkThai = unwords . map mkThaiWord . words
|
||||
mkThai = concat . map mkThaiWord . words
|
||||
|
||||
type ThaiChar = Char
|
||||
|
||||
@@ -35,6 +35,15 @@ mkThaiChar c = maybe 0 id $ Map.lookup c thaiMap
|
||||
thaiMap :: Map.Map String Int
|
||||
thaiMap = Map.fromList $ zip allThaiTrans allThaiCodes
|
||||
|
||||
-- convert all string literals in a text
|
||||
|
||||
thaiStrings :: String -> String
|
||||
thaiStrings s = case s of
|
||||
'"':cs -> let (t,_:r) = span (/='"') cs in
|
||||
'"':mkThai t ++ "\"" ++ thaiStrings r
|
||||
c:cs -> c:thaiStrings cs
|
||||
_ -> s
|
||||
|
||||
|
||||
-- each character is either [letter] or [letter+nonletter]
|
||||
|
||||
@@ -42,6 +51,7 @@ unchar :: String -> [String]
|
||||
unchar s = case s of
|
||||
c:d:cs
|
||||
| isAlpha d -> [c] : unchar (d:cs)
|
||||
| d == '?' -> unchar cs -- use "o?" to represent implicit 'o'
|
||||
| otherwise -> [c,d] : unchar cs
|
||||
[_] -> [s]
|
||||
_ -> []
|
||||
@@ -52,7 +62,9 @@ allThaiTrans = words $
|
||||
"t1 t2 t3 n d' t' t4 t5 t6 n b p p1 f p2 f' " ++
|
||||
"p3 m y' r - l - w s' r' s- h l' O h' - " ++
|
||||
"a a. a: a+ i i: v v: u u: - - - - - - " ++
|
||||
"e e: o: a% a& "
|
||||
"e e: o: a% a& L R M E T - - - - - - " ++
|
||||
"N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 - - - - - - "
|
||||
|
||||
|
||||
allThaiCodes :: [Int]
|
||||
allThaiCodes = [0x0e00 .. 0x0e7f]
|
||||
@@ -73,6 +85,12 @@ testThai s = do
|
||||
putStrLn $ encodeUTF8 $ mkThai s
|
||||
putStrLn $ unwords $ map mkThaiPron $ words s
|
||||
|
||||
thaiFile :: FilePath -> Maybe FilePath -> IO ()
|
||||
thaiFile f mo = do
|
||||
s <- readFile f
|
||||
let put = maybe putStr writeFile mo
|
||||
put $ encodeUTF8 $ thaiStrings s
|
||||
|
||||
mkThaiPron = concat . render . unchar where
|
||||
render s = case s of
|
||||
[c] -> maybe c return (Map.lookup c thaiFinalMap): []
|
||||
@@ -101,12 +119,13 @@ showThai s = case s of
|
||||
|
||||
pronThai s = case s of
|
||||
[c,p]
|
||||
| isUpper c && isDigit p -> show p
|
||||
| isDigit p -> c:"h"
|
||||
| p==':' -> c:[c]
|
||||
| elem p "%&" -> c:"y"
|
||||
| p=='+' -> c:"m"
|
||||
| otherwise -> [c]
|
||||
"O" -> ""
|
||||
[c] | isUpper c -> "" --- O
|
||||
_ -> s
|
||||
|
||||
hex = map hx . reverse . digs where
|
||||
@@ -116,5 +135,7 @@ hex = map hx . reverse . digs where
|
||||
|
||||
heights :: String
|
||||
finals :: String
|
||||
heights = " MHHLLLLMHLLLLMMHLLLMMHLLLMMHHLLLLLL-L-LHHHHLML "
|
||||
finals = " kkkkkkgt-tt-ntttttntttttnpp--pppmyn-n-wttt-n-- "
|
||||
heights =
|
||||
" MHHLLLLMHLLLLMMHLLLMMHLLLMMHHLLLLLL-L-LHHHHLML" ++ replicate 99 ' '
|
||||
finals =
|
||||
" kkkkkkgt-tt-ntttttntttttnpp--pppmyn-n-wttt-n--" ++ replicate 99 ' '
|
||||
|
||||
Reference in New Issue
Block a user