mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-23 01:52:50 -06:00
converted Hindi to the revised encoding
This commit is contained in:
@@ -23,7 +23,8 @@ characterTable = unlines . map prOne . Map.assocs . trans_from_unicode where
|
|||||||
|
|
||||||
data Transliteration = Trans {
|
data Transliteration = Trans {
|
||||||
trans_to_unicode :: Map.Map String Int,
|
trans_to_unicode :: Map.Map String Int,
|
||||||
trans_from_unicode :: Map.Map Int String
|
trans_from_unicode :: Map.Map Int String,
|
||||||
|
invisible_chars :: [String]
|
||||||
}
|
}
|
||||||
|
|
||||||
appTransToUnicode :: Transliteration -> String -> String
|
appTransToUnicode :: Transliteration -> String -> String
|
||||||
@@ -32,6 +33,7 @@ appTransToUnicode trans =
|
|||||||
map (\c -> maybe c (return . toEnum) $
|
map (\c -> maybe c (return . toEnum) $
|
||||||
Map.lookup c (trans_to_unicode trans)
|
Map.lookup c (trans_to_unicode trans)
|
||||||
) .
|
) .
|
||||||
|
filter (flip notElem (invisible_chars trans)) .
|
||||||
unchar
|
unchar
|
||||||
|
|
||||||
appTransFromUnicode :: Transliteration -> String -> String
|
appTransFromUnicode :: Transliteration -> String -> String
|
||||||
@@ -46,9 +48,10 @@ appTransFromUnicode trans =
|
|||||||
-- conventions:
|
-- conventions:
|
||||||
-- each character is either [letter] or [letter+nonletter]
|
-- each character is either [letter] or [letter+nonletter]
|
||||||
-- when using a sparse range of unicodes, mark missing codes as "-" in transliterations
|
-- when using a sparse range of unicodes, mark missing codes as "-" in transliterations
|
||||||
|
-- characters can be invisible: ignored in translation to unicode
|
||||||
|
|
||||||
mkTransliteration :: [String] -> [Int] -> Transliteration
|
mkTransliteration :: [String] -> [Int] -> Transliteration
|
||||||
mkTransliteration ts us = Trans (Map.fromList (tzip ts us)) (Map.fromList (uzip us ts))
|
mkTransliteration ts us = Trans (Map.fromList (tzip ts us)) (Map.fromList (uzip us ts)) []
|
||||||
where
|
where
|
||||||
tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"]
|
tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"]
|
||||||
uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"]
|
uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"]
|
||||||
@@ -75,9 +78,9 @@ transThai = mkTransliteration allTrans allCodes where
|
|||||||
allCodes = [0x0e00 .. 0x0e7f]
|
allCodes = [0x0e00 .. 0x0e7f]
|
||||||
|
|
||||||
transDevanagari :: Transliteration
|
transDevanagari :: Transliteration
|
||||||
transDevanagari = mkTransliteration allTrans allCodes where
|
transDevanagari = (mkTransliteration allTrans allCodes){invisible_chars = ["a"]} where
|
||||||
allTrans = words $
|
allTrans = words $
|
||||||
"~ * - - " ++
|
"M N - - " ++
|
||||||
"a- A- i- I- u- U- R- - - - e- E- - - o- O- " ++
|
"a- A- i- I- u- U- R- - - - e- E- - - o- O- " ++
|
||||||
"k K g G N: c C j J n: t. T. d. D. n. t " ++
|
"k K g G N: c C j J n: t. T. d. D. n. t " ++
|
||||||
"T d D n - p P b B m y r - l - - v " ++
|
"T d D n - p P b B m y r - l - - v " ++
|
||||||
|
|||||||
Reference in New Issue
Block a user