mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-22 03:09:33 -06:00
bronzeage grammar restored except for three ones with issues ; arabic transliteration added
This commit is contained in:
@@ -445,6 +445,7 @@ allCommands enc pgf = Map.fromList [
|
||||
let out = maybe "no such transliteration" characterTable $ transliteration t
|
||||
return $ fromString out,
|
||||
options = [
|
||||
("arabic", "Arabic"),
|
||||
("devanagari","Devanagari"),
|
||||
("thai", "Thai")
|
||||
]
|
||||
@@ -584,6 +585,7 @@ stringOpOptions = [
|
||||
("bind","bind tokens separated by Prelude.BIND, i.e. &+"),
|
||||
("chars","lexer that makes every non-space character a token"),
|
||||
("from_cp1251","decode from cp1251 (Cyrillic used in Bulgarian resource)"),
|
||||
("from_arabic","from unicode to GF Arabic transliteration"),
|
||||
("from_devanagari","from unicode to GF Devanagari transliteration"),
|
||||
("from_thai","from unicode to GF Thai transliteration"),
|
||||
("from_utf8","decode from utf8"),
|
||||
@@ -591,6 +593,7 @@ stringOpOptions = [
|
||||
("lexcode","code-like lexer"),
|
||||
("lexmixed","mixture of text and code (code between $...$)"),
|
||||
("to_cp1251","encode to cp1251 (Cyrillic used in Bulgarian resource)"),
|
||||
("to_arabic","from GF Arabic transliteration to unicode"),
|
||||
("to_devanagari","from GF Devanagari transliteration to unicode"),
|
||||
("to_html","wrap in a html file with linebreaks"),
|
||||
("to_thai","from GF Thai transliteration to unicode"),
|
||||
|
||||
@@ -25,9 +25,10 @@ transliterate s = case s of
|
||||
|
||||
transliteration :: String -> Maybe Transliteration
|
||||
transliteration s = case s of
|
||||
"arabic" -> Just transArabic
|
||||
"devanagari" -> Just transDevanagari
|
||||
"thai" -> Just transThai
|
||||
"urdu" -> Just transUrdu
|
||||
---- "urdu" -> Just transUrdu
|
||||
_ -> Nothing
|
||||
|
||||
characterTable :: Transliteration -> String
|
||||
@@ -101,5 +102,15 @@ allTransUrduHindi = words $
|
||||
transUrdu :: Transliteration
|
||||
transUrdu =
|
||||
(mkTransliteration allTransUrduHindi allCodes){invisible_chars = ["a"]} where
|
||||
allCodes = [0x0901 .. 0x094c]
|
||||
allCodes = [0x0901 .. 0x094c] ---- TODO: this is devanagari
|
||||
|
||||
transArabic :: Transliteration
|
||||
transArabic = mkTransliteration allTrans allCodes where
|
||||
allTrans = words $
|
||||
" V A: A? w? A- y? A b t. t v g H K d " ++ -- 0621 - 062f
|
||||
"W r z s C S D T Z c G " ++ -- 0630 - 063a
|
||||
" f q k l m n h w y. y a. u. i. a u " ++ -- 0641 - 064f
|
||||
"i v2 o a: V+ V- i: a+" -- 0650 - 0657
|
||||
allCodes = [0x0621..0x062f] ++ [0x0630..0x063a] ++
|
||||
[0x0641..0x064f] ++ [0x0650..0x0657]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user