commands for displaying transliteration tables

This commit is contained in:
aarne
2008-06-15 15:24:11 +00:00
parent 15c9fada0a
commit caa0e37c56
5 changed files with 135 additions and 28 deletions

View File

@@ -1,5 +1,6 @@
module GF.Text.Lexing (stringOp) where
import GF.Text.Transliterations
import GF.Text.UTF8
import Data.Char
@@ -19,7 +20,7 @@ stringOp name = case name of
"unwords" -> Just $ appUnlexer unwords
"to_utf8" -> Just encodeUTF8
"from_utf8" -> Just decodeUTF8
_ -> Nothing
_ -> transliterate name
appLexer :: (String -> [String]) -> String -> String
appLexer f = unwords . filter (not . null) . f

View File

@@ -0,0 +1,87 @@
module GF.Text.Transliterations (transliterate,transliteration,characterTable) where
import GF.Text.UTF8
import Data.Char
import qualified Data.Map as Map
transliterate :: String -> Maybe (String -> String)
transliterate s = case s of
'f':'r':'o':'m':'_':t -> fmap appTransFromUnicode $ transliteration t
't':'o':'_':t -> fmap appTransToUnicode $ transliteration t
_ -> Nothing
transliteration :: String -> Maybe Transliteration
transliteration s = case s of
"devanagari" -> Just transDevanagari
"thai" -> Just transThai
_ -> Nothing
characterTable :: Transliteration -> String
characterTable = unlines . map prOne . Map.assocs . trans_from_unicode where
prOne (i,s) = unwords ["|", show i, "|", encodeUTF8 [toEnum i], "|", s, "|"]
data Transliteration = Trans {
trans_to_unicode :: Map.Map String Int,
trans_from_unicode :: Map.Map Int String
}
appTransToUnicode :: Transliteration -> String -> String
appTransToUnicode trans =
concat .
map (\c -> maybe c (return . toEnum) $
Map.lookup c (trans_to_unicode trans)
) .
unchar
appTransFromUnicode :: Transliteration -> String -> String
appTransFromUnicode trans =
concat .
map (maybe "?" id .
flip Map.lookup (trans_from_unicode trans)
) .
map fromEnum
-- conventions:
-- each character is either [letter] or [letter+nonletter]
-- when using a sparse range of unicodes, mark missing codes as "-" in transliterations
mkTransliteration :: [String] -> [Int] -> Transliteration
mkTransliteration ts us = Trans (Map.fromList (tzip ts us)) (Map.fromList (uzip us ts))
where
tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"]
uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"]
unchar :: String -> [String]
unchar s = case s of
c:d:cs
| isAlpha d -> [c] : unchar (d:cs)
| isSpace d -> [c] : unchar cs
| otherwise -> [c,d] : unchar cs
[_] -> [s]
_ -> []
transThai :: Transliteration
transThai = mkTransliteration allTrans allCodes where
allTrans = words $
"- k k1 - k2 - k3 g c c1 c2 s' c3 y' d' t' " ++
"t1 t2 t3 n' d t t4 t5 t6 n b p p1 f p2 f' " ++
"p3 m y r - l - w s- s. s h l' O h' - " ++
"a. a a: a+ i i: v v: u u: - - - - - - " ++
"e e' o: a% a& L R S T1 T2 T3 T4 K - - - " ++
"N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 - - - - - - "
allCodes = [0x0e00 .. 0x0e7f]
transDevanagari :: Transliteration
transDevanagari = mkTransliteration allTrans allCodes where
allTrans = words $
"~ * - - " ++
"a- A- i- I- u- U- R- - - - e- E- - - o- O- " ++
"k K g G N: c C j J n: t. T. d. D. n. t " ++
"T d D n - p P b B m y r - l - - v " ++
"S s. s h - - r. - A i I u U R - - " ++
"- e E o O "
allCodes = [0x0901 .. 0x094c]