diff --git a/examples/numerals/mkThai.gfs b/examples/numerals/mkThai.gfs new file mode 100644 index 000000000..8f29d4e24 --- /dev/null +++ b/examples/numerals/mkThai.gfs @@ -0,0 +1,5 @@ +i -old -optimize=none -abs=Numerals -cnc=decimal decimal.gf +i -old -optimize=none -abs=Numerals -cnc=thai_decimal thaiDU.gf +i -old -optimize=none -abs=Numerals -cnc=thai thaiU.gf +i -old -optimize=none -abs=Numerals -cnc=thai_pronounce thaiP.gf +sf -unlexer=unwords diff --git a/examples/numerals/thai.gf b/examples/numerals/thai.gf new file mode 100644 index 000000000..da6648a42 --- /dev/null +++ b/examples/numerals/thai.gf @@ -0,0 +1,67 @@ +include numerals.Abs.gf ; + +-- Thai transliteration, produces thaiU.gf by GF/Text/Thai.hs +-- AR 28/12/2006 + +lincat + Numeral = {s : Str} ; + Digit = {s : DForm => Str} ; + Sub10 = {s : DForm => Str} ; + Sub100 = {s : NForm => Str} ; + Sub1000 = {s : NForm => Str} ; + Sub1000000 = {s : Str} ; + +lin + num x = x ; + + pot01 = mkNum "hnvg" "hnvg" "eOMd'" ; + + n2 = mkNum "s-Og" "y'i:E" "s-Og" ; + n3 = regNum "s-a:m" ; + n4 = regNum "s-i:E" ; + n5 = regNum "ha:E" ; + n6 = regNum "ho?k" ; + n7 = regNum "ecMd'" ; + n8 = regNum "e:pd'" ; + n9 = regNum "eka:" ; + + + pot0 d = d ; + + pot110 = {s = sip} ; + pot111 = {s = table { + Unit => ["s'ib et"] ; + Thousand => ["hnvg hmv:En hnvg p2an"] + } + } ; + pot1to19 d = {s = table { + Unit => "s'ib" ++ d.s ! After ; + Thousand => ["hnvg hmv:En"] ++ d.s ! Indep ++ "p2an" + } + } ; + pot0as1 d = {s = \\n => d.s ! Indep ++ phan ! n} ; + pot1 d = {s = \\n => d.s ! ModTen ++ sip ! n} ; + pot1plus d e = { + s = \\n => d.s ! ModTen ++ sip ! n ++ e.s ! After ++ phan ! n + } ; + pot1as2 n = n ; + pot2 d = {s = \\n => d.s ! Indep ++ roy ! n} ; + pot2plus d e = {s = \\n => d.s ! Indep ++ roy ! n ++ e.s ! n} ; + pot2as3 n = {s = n.s ! Unit} ; + pot3 n = {s = n.s ! Thousand} ; + pot3plus n m = {s = n.s ! Thousand ++ m.s ! Unit} ; + +param + DForm = Indep | ModTen | After ; + NForm = Unit | Thousand ; + +oper + mkNum : Str -> Str -> Str -> {s : DForm => Str} = \x,y,z -> + {s = table {Indep => x ; ModTen => y ; After => z}} ; + regNum : Str -> {s : DForm => Str} = \x -> + mkNum x x x ; + + + sip = table {Unit => "s'ib" ; Thousand => "hmv:En"} ; + roy = table {Unit => "rEOy'" ; Thousand => "se:n"} ; + phan = table {Unit => [] ; Thousand => "p2an"} ; diff --git a/examples/numerals/thaiDU.gf b/examples/numerals/thaiDU.gf new file mode 100644 index 000000000..1cbf81fbc --- /dev/null +++ b/examples/numerals/thaiDU.gf @@ -0,0 +1,48 @@ +include numerals.Abs.gf ; + +-- Thai digits. AR 28/12/2006 + +flags lexer=chars ; unlexer=concat ; flags coding=utf8 ; + +param Zeros = noz | zz ; + +lincat Numeral = { s : Str } ; +lincat Digit = { s : Str } ; +lincat Sub10 = { s : Str } ; +lincat Sub100 = {s : Zeros => Str} ; +lincat Sub1000 = {s : Zeros => Str} ; +lincat Sub1000000 = {s : Zeros => Str} ; + +oper ss : Str -> {s : Str} = \s -> {s = s} ; +oper mkz : Str -> {s : Zeros => Str} = \s -> {s = table {_ => s}} ; + +lin num n = {s = n.s ! noz} ; +lin n2 = ss "๒" ; +lin n3 = ss "๓" ; +lin n4 = ss "๔" ; +lin n5 = ss "๕" ; +lin n6 = ss "๖" ; +lin n7 = ss "๗" ; +lin n8 = ss "๘" ; +lin n9 = ss "๙" ; + +lin pot01 = ss "๑" ; +lin pot0 d = d ; + +lin pot110 = mkz ("๑" ++ "๐") ; +lin pot111 = mkz ("๑" ++ "๑") ; +lin pot1to19 d = mkz ("๑" ++ d.s) ; + +lin pot0as1 n = {s = table {noz => n.s ; zz => "๐" ++ n.s}} ; + +lin pot1 d = mkz (d.s ++ "๐") ; +lin pot1plus d e = mkz (d.s ++ e.s) ; + +lin pot1as2 n = {s = table {noz => n.s ! noz ; zz => "๐" ++ n.s ! zz}} ; +lin pot2 d = mkz (d.s ++ "๐" ++ "๐") ; +lin pot2plus d e = mkz (d.s ++ e.s ! zz) ; + +lin pot2as3 n = {s = table {noz => n.s ! noz ; zz => "๐" ++ n.s ! zz}} ; + +lin pot3 n = mkz (n.s ! noz ++ "๐" ++ "๐" ++ "๐") ; +lin pot3plus n m = {s = table {z => n.s ! z ++ m.s ! zz}} ; diff --git a/examples/numerals/thaiP.gf b/examples/numerals/thaiP.gf new file mode 100644 index 000000000..c35b2dfbf --- /dev/null +++ b/examples/numerals/thaiP.gf @@ -0,0 +1,69 @@ +include numerals.Abs.gf ; + +-- Thai pronunciation (mostly following Smyth's Essential Grammar) +-- AR 28/12/2006 + +flags coding=utf8 ; + +lincat + Numeral = {s : Str} ; + Digit = {s : DForm => Str} ; + Sub10 = {s : DForm => Str} ; + Sub100 = {s : NForm => Str} ; + Sub1000 = {s : NForm => Str} ; + Sub1000000 = {s : Str} ; + +lin + num x = x ; + + pot01 = mkNum "nỳng" "nỳng" "èt" ; + + n2 = mkNum "söong" "yîi" "söong" ; + n3 = regNum "säam" ; + n4 = regNum "sìi" ; + n5 = regNum "hâa" ; + n6 = regNum "hòk" ; + n7 = regNum "cèt" ; + n8 = regNum "pèet" ; + n9 = regNum "kâaw" ; + + + pot0 d = d ; + + pot110 = {s = sip} ; + pot111 = {s = table { + Unit => ["sìp èt"] ; + Thousand => ["nỳng mỳyn nỳng phan"] + } + } ; + pot1to19 d = {s = table { + Unit => "sìp" ++ d.s ! After ; + Thousand => ["nỳng mỳyn"] ++ d.s ! Indep ++ "phan" + } + } ; + pot0as1 d = {s = \\n => d.s ! Indep ++ phan ! n} ; + pot1 d = {s = \\n => d.s ! ModTen ++ sip ! n} ; + pot1plus d e = { + s = \\n => d.s ! ModTen ++ sip ! n ++ e.s ! After ++ phan ! n + } ; + pot1as2 n = n ; + pot2 d = {s = \\n => d.s ! Indep ++ roy ! n} ; + pot2plus d e = {s = \\n => d.s ! Indep ++ roy ! n ++ e.s ! n} ; + pot2as3 n = {s = n.s ! Unit} ; + pot3 n = {s = n.s ! Thousand} ; + pot3plus n m = {s = n.s ! Thousand ++ m.s ! Unit} ; + +param + DForm = Indep | ModTen | After ; + NForm = Unit | Thousand ; + +oper + mkNum : Str -> Str -> Str -> {s : DForm => Str} = \x,y,z -> + {s = table {Indep => x ; ModTen => y ; After => z}} ; + regNum : Str -> {s : DForm => Str} = \x -> + mkNum x x x ; + + + sip = table {Unit => "sìp" ; Thousand => "mỳyn"} ; + roy = table {Unit => "róoy" ; Thousand => "sëen"} ; + phan = table {Unit => [] ; Thousand => "phan"} ; diff --git a/examples/numerals/thaiU.gf b/examples/numerals/thaiU.gf new file mode 100644 index 000000000..61777416d --- /dev/null +++ b/examples/numerals/thaiU.gf @@ -0,0 +1,68 @@ +include numerals.Abs.gf ; + +-- Thai UTF8, produced from thai.gf by GF/Text/Thai.hs. AR 28/12/2006 + +flags coding=utf8 ; + +lincat + Numeral = {s : Str} ; + Digit = {s : DForm => Str} ; + Sub10 = {s : DForm => Str} ; + Sub100 = {s : NForm => Str} ; + Sub1000 = {s : NForm => Str} ; + Sub1000000 = {s : Str} ; + +lin + num x = x ; + + pot01 = mkNum "หนึง" "หนึง" "เอ็ด" ; + + n2 = mkNum "สอง" "ยี่" "สอง" ; + n3 = regNum "สาม" ; + n4 = regNum "สี่" ; + n5 = regNum "หา่" ; + n6 = regNum "หก" ; + n7 = regNum "เจ็ด" ; + n8 = regNum "แปด" ; + n9 = regNum "เกา" ; + + + pot0 d = d ; + + pot110 = {s = sip} ; + pot111 = {s = table { + Unit => ["ศิบเฏ"] ; + Thousand => ["หนึงหมื่นหนึงพะน"] + } + } ; + pot1to19 d = {s = table { + Unit => "ศิบ" ++ d.s ! After ; + Thousand => ["หนึงหมื่น"] ++ d.s ! Indep ++ "พะน" + } + } ; + pot0as1 d = {s = \\n => d.s ! Indep ++ phan ! n} ; + pot1 d = {s = \\n => d.s ! ModTen ++ sip ! n} ; + pot1plus d e = { + s = \\n => d.s ! ModTen ++ sip ! n ++ e.s ! After ++ phan ! n + } ; + pot1as2 n = n ; + pot2 d = {s = \\n => d.s ! Indep ++ roy ! n} ; + pot2plus d e = {s = \\n => d.s ! Indep ++ roy ! n ++ e.s ! n} ; + pot2as3 n = {s = n.s ! Unit} ; + pot3 n = {s = n.s ! Thousand} ; + pot3plus n m = {s = n.s ! Thousand ++ m.s ! Unit} ; + +param + DForm = Indep | ModTen | After ; + NForm = Unit | Thousand ; + +oper + mkNum : Str -> Str -> Str -> {s : DForm => Str} = \x,y,z -> + {s = table {Indep => x ; ModTen => y ; After => z}} ; + regNum : Str -> {s : DForm => Str} = \x -> + mkNum x x x ; + + + sip = table {Unit => "ศิบ" ; Thousand => "หมื่น"} ; + roy = table {Unit => "ร่อย" ; Thousand => "ซแน"} ; + phan = table {Unit => [] ; Thousand => "พะน"} ; diff --git a/src/GF/Text/Thai.hs b/src/GF/Text/Thai.hs index 8e344cc06..7fede0676 100644 --- a/src/GF/Text/Thai.hs +++ b/src/GF/Text/Thai.hs @@ -22,7 +22,7 @@ import Data.List mkThai :: String -> String -mkThai = unwords . map mkThaiWord . words +mkThai = concat . map mkThaiWord . words type ThaiChar = Char @@ -35,6 +35,15 @@ mkThaiChar c = maybe 0 id $ Map.lookup c thaiMap thaiMap :: Map.Map String Int thaiMap = Map.fromList $ zip allThaiTrans allThaiCodes +-- convert all string literals in a text + +thaiStrings :: String -> String +thaiStrings s = case s of + '"':cs -> let (t,_:r) = span (/='"') cs in + '"':mkThai t ++ "\"" ++ thaiStrings r + c:cs -> c:thaiStrings cs + _ -> s + -- each character is either [letter] or [letter+nonletter] @@ -42,6 +51,7 @@ unchar :: String -> [String] unchar s = case s of c:d:cs | isAlpha d -> [c] : unchar (d:cs) + | d == '?' -> unchar cs -- use "o?" to represent implicit 'o' | otherwise -> [c,d] : unchar cs [_] -> [s] _ -> [] @@ -52,7 +62,9 @@ allThaiTrans = words $ "t1 t2 t3 n d' t' t4 t5 t6 n b p p1 f p2 f' " ++ "p3 m y' r - l - w s' r' s- h l' O h' - " ++ "a a. a: a+ i i: v v: u u: - - - - - - " ++ - "e e: o: a% a& " + "e e: o: a% a& L R M E T - - - - - - " ++ + "N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 - - - - - - " + allThaiCodes :: [Int] allThaiCodes = [0x0e00 .. 0x0e7f] @@ -73,6 +85,12 @@ testThai s = do putStrLn $ encodeUTF8 $ mkThai s putStrLn $ unwords $ map mkThaiPron $ words s +thaiFile :: FilePath -> Maybe FilePath -> IO () +thaiFile f mo = do + s <- readFile f + let put = maybe putStr writeFile mo + put $ encodeUTF8 $ thaiStrings s + mkThaiPron = concat . render . unchar where render s = case s of [c] -> maybe c return (Map.lookup c thaiFinalMap): [] @@ -101,12 +119,13 @@ showThai s = case s of pronThai s = case s of [c,p] + | isUpper c && isDigit p -> show p | isDigit p -> c:"h" | p==':' -> c:[c] | elem p "%&" -> c:"y" | p=='+' -> c:"m" | otherwise -> [c] - "O" -> "" + [c] | isUpper c -> "" --- O _ -> s hex = map hx . reverse . digs where @@ -116,5 +135,7 @@ hex = map hx . reverse . digs where heights :: String finals :: String -heights = " MHHLLLLMHLLLLMMHLLLMMHLLLMMHHLLLLLL-L-LHHHHLML " -finals = " kkkkkkgt-tt-ntttttntttttnpp--pppmyn-n-wttt-n-- " +heights = + " MHHLLLLMHLLLLMMHLLLMMHLLLMMHHLLLLLL-L-LHHHHLML" ++ replicate 99 ' ' +finals = + " kkkkkkgt-tt-ntttttntttttnpp--pppmyn-n-wttt-n--" ++ replicate 99 ' '