1
0
forked from GitHub/gf-core

bronzeage grammar restored except for three ones with issues ; arabic transliteration added

This commit is contained in:
aarne
2008-08-16 18:55:28 +00:00
parent ddbeff3028
commit 66c927937a
25 changed files with 697 additions and 2 deletions

View File

@@ -445,6 +445,7 @@ allCommands enc pgf = Map.fromList [
let out = maybe "no such transliteration" characterTable $ transliteration t
return $ fromString out,
options = [
("arabic", "Arabic"),
("devanagari","Devanagari"),
("thai", "Thai")
]
@@ -584,6 +585,7 @@ stringOpOptions = [
("bind","bind tokens separated by Prelude.BIND, i.e. &+"),
("chars","lexer that makes every non-space character a token"),
("from_cp1251","decode from cp1251 (Cyrillic used in Bulgarian resource)"),
("from_arabic","from unicode to GF Arabic transliteration"),
("from_devanagari","from unicode to GF Devanagari transliteration"),
("from_thai","from unicode to GF Thai transliteration"),
("from_utf8","decode from utf8"),
@@ -591,6 +593,7 @@ stringOpOptions = [
("lexcode","code-like lexer"),
("lexmixed","mixture of text and code (code between $...$)"),
("to_cp1251","encode to cp1251 (Cyrillic used in Bulgarian resource)"),
("to_arabic","from GF Arabic transliteration to unicode"),
("to_devanagari","from GF Devanagari transliteration to unicode"),
("to_html","wrap in a html file with linebreaks"),
("to_thai","from GF Thai transliteration to unicode"),

View File

@@ -25,9 +25,10 @@ transliterate s = case s of
transliteration :: String -> Maybe Transliteration
transliteration s = case s of
"arabic" -> Just transArabic
"devanagari" -> Just transDevanagari
"thai" -> Just transThai
"urdu" -> Just transUrdu
---- "urdu" -> Just transUrdu
_ -> Nothing
characterTable :: Transliteration -> String
@@ -101,5 +102,15 @@ allTransUrduHindi = words $
transUrdu :: Transliteration
transUrdu =
(mkTransliteration allTransUrduHindi allCodes){invisible_chars = ["a"]} where
allCodes = [0x0901 .. 0x094c]
allCodes = [0x0901 .. 0x094c] ---- TODO: this is devanagari
transArabic :: Transliteration
transArabic = mkTransliteration allTrans allCodes where
allTrans = words $
" V A: A? w? A- y? A b t. t v g H K d " ++ -- 0621 - 062f
"W r z s C S D T Z c G " ++ -- 0630 - 063a
" f q k l m n h w y. y a. u. i. a u " ++ -- 0641 - 064f
"i v2 o a: V+ V- i: a+" -- 0650 - 0657
allCodes = [0x0621..0x062f] ++ [0x0630..0x063a] ++
[0x0641..0x064f] ++ [0x0650..0x0657]