forked from GitHub/gf-core
reintroduce the compiler API
This commit is contained in:
40
src/compiler/api/GF/Text/Clitics.hs
Normal file
40
src/compiler/api/GF/Text/Clitics.hs
Normal file
@@ -0,0 +1,40 @@
|
||||
module GF.Text.Clitics (getClitics,getCliticsText) where
|
||||
|
||||
import Data.List
|
||||
|
||||
-- AR 6/2/2011
|
||||
-- Analyse word as stem+clitic whenever
|
||||
-- (1) clitic is in clitic list
|
||||
-- (2) either
|
||||
-- (a) stem is in Lexicon
|
||||
-- (b) stem can be analysed as stem0+clitic0
|
||||
--
|
||||
-- Examples:
|
||||
-- Italian amarmi = amar+mi
|
||||
-- Finnish autossanikohan = autossa+ni+kohan
|
||||
--
|
||||
-- The analysis gives all results, including the case where the whole word is in Lexicon.
|
||||
--
|
||||
-- The clitics in the list are expected to be reversed.
|
||||
|
||||
getClitics :: (String -> Bool) -> [String] -> String -> [[String]]
|
||||
getClitics isLex rclitics = map (reverse . map reverse) . clits . reverse where
|
||||
clits rword = ifLex rword [rclit:more |
|
||||
rclit <- rclitics, stem <- splits rclit rword, more <- clits stem]
|
||||
splits c = maybe [] return . stripPrefix c
|
||||
|
||||
ifLex w ws = if isLex (reverse w) then [w] : ws else ws
|
||||
|
||||
|
||||
getCliticsText :: (String -> Bool) -> [String] -> [String] -> [String]
|
||||
getCliticsText isLex rclitics =
|
||||
map unwords . sequence . map (map render . getClitics isLex rclitics)
|
||||
where
|
||||
render = unwords . intersperse "&+"
|
||||
|
||||
|
||||
-- example
|
||||
|
||||
--getClitics1 = getClitics exlex1 exclits1
|
||||
--exlex1 = flip elem ["auto", "naise", "rahan","maa","maahan","maahankaan"]
|
||||
--exclits1 = map reverse ["ni","ko","han","pas","nsa","kin","kaan"]
|
||||
75
src/compiler/api/GF/Text/Coding.hs
Normal file
75
src/compiler/api/GF/Text/Coding.hs
Normal file
@@ -0,0 +1,75 @@
|
||||
module GF.Text.Coding where
|
||||
|
||||
import qualified Data.ByteString as BS
|
||||
import Data.ByteString.Internal
|
||||
import GHC.IO
|
||||
import GHC.IO.Buffer
|
||||
import GHC.IO.Encoding
|
||||
import GHC.IO.Exception
|
||||
import Control.Monad
|
||||
|
||||
encodeUnicode :: TextEncoding -> String -> ByteString
|
||||
encodeUnicode enc s =
|
||||
unsafePerformIO $ do
|
||||
let len = length s
|
||||
cbuf0 <- newCharBuffer (len*4) ReadBuffer
|
||||
foldM (\i c -> writeCharBuf (bufRaw cbuf0) i c) 0 s
|
||||
let cbuf = cbuf0{bufR=len}
|
||||
case enc of
|
||||
TextEncoding {mkTextEncoder=mk} -> do encoder <- mk
|
||||
bss <- translate (encode encoder) cbuf
|
||||
close encoder
|
||||
return (BS.concat bss)
|
||||
where
|
||||
translate cod cbuf
|
||||
| i < w = do bbuf <- newByteBuffer 128 WriteBuffer
|
||||
(_,cbuf,bbuf) <- cod cbuf bbuf
|
||||
if isEmptyBuffer bbuf
|
||||
then ioe_invalidCharacter1
|
||||
else do let bs = PS (bufRaw bbuf) (bufL bbuf) (bufR bbuf-bufL bbuf)
|
||||
bss <- translate cod cbuf
|
||||
return (bs:bss)
|
||||
| otherwise = return []
|
||||
where
|
||||
i = bufL cbuf
|
||||
w = bufR cbuf
|
||||
|
||||
decodeUnicode :: TextEncoding -> ByteString -> String
|
||||
decodeUnicode enc bs = unsafePerformIO $ decodeUnicodeIO enc bs
|
||||
|
||||
decodeUnicodeIO enc (PS fptr l len) = do
|
||||
let bbuf = (emptyBuffer fptr len ReadBuffer) { bufL=l, bufR=l+len }
|
||||
cbuf <- newCharBuffer 128 WriteBuffer
|
||||
case enc of
|
||||
TextEncoding {mkTextDecoder=mk} -> do decoder <- mk
|
||||
s <- translate (encode decoder) bbuf cbuf
|
||||
close decoder
|
||||
return s
|
||||
where
|
||||
translate cod bbuf cbuf
|
||||
| i < w = do
|
||||
(_,bbuf,cbuf) <- cod bbuf cbuf
|
||||
if isEmptyBuffer cbuf
|
||||
then ioe_invalidCharacter2
|
||||
else unpack cod bbuf cbuf
|
||||
| otherwise = return []
|
||||
where
|
||||
i = bufL bbuf
|
||||
w = bufR bbuf
|
||||
|
||||
unpack cod bbuf cbuf
|
||||
| i < w = do (c,i') <- readCharBuf (bufRaw cbuf) i
|
||||
cs <- unpack cod bbuf cbuf{bufL=i'}
|
||||
return (c:cs)
|
||||
| otherwise = translate cod bbuf cbuf{bufL=0,bufR=0}
|
||||
where
|
||||
i = bufL cbuf
|
||||
w = bufR cbuf
|
||||
|
||||
ioe_invalidCharacter1 = ioException
|
||||
(IOError Nothing InvalidArgument ""
|
||||
("invalid byte sequence for this encoding") Nothing Nothing)
|
||||
|
||||
ioe_invalidCharacter2 = ioException
|
||||
(IOError Nothing InvalidArgument ""
|
||||
("invalid byte sequence for this decoding") Nothing Nothing)
|
||||
448
src/compiler/api/GF/Text/Lexing.hs
Normal file
448
src/compiler/api/GF/Text/Lexing.hs
Normal file
@@ -0,0 +1,448 @@
|
||||
-- | Lexers and unlexers - they work on space-separated word strings
|
||||
module GF.Text.Lexing (stringOp,opInEnv,bindTok) where
|
||||
|
||||
import GF.Text.Transliterations
|
||||
|
||||
import Data.Char (isSpace,toUpper,toLower)
|
||||
import Data.List (intersperse)
|
||||
|
||||
stringOp :: (String -> Bool) -> String -> Maybe (String -> String)
|
||||
stringOp good name = case name of
|
||||
"chars" -> Just $ appLexer (filter (not . all isSpace) . map return)
|
||||
"lextext" -> Just $ appLexer (lexText good)
|
||||
"lexcode" -> Just $ appLexer lexCode
|
||||
"lexmixed" -> Just $ appLexer (lexMixed good)
|
||||
"lexgreek" -> Just $ appLexer lexAGreek
|
||||
"lexgreek2" -> Just $ appLexer lexAGreek2
|
||||
"words" -> Just $ appLexer words
|
||||
"unchars" -> Just $ appUnlexer concat
|
||||
"unlextext" -> Just $ appUnlexer (unlexText . unquote . bindTok)
|
||||
"unlexcode" -> Just $ appUnlexer unlexCode
|
||||
"unlexmixed" -> Just $ appUnlexer (unlexMixed good . unquote . bindTok)
|
||||
"unlexgreek" -> Just $ appUnlexer unlexAGreek
|
||||
"unlexnone" -> Just id
|
||||
"unlexid" -> Just id
|
||||
"unwords" -> Just $ appUnlexer unwords
|
||||
"to_html" -> Just wrapHTML
|
||||
_ -> transliterate name
|
||||
|
||||
-- perform op in environments beg--end, t.ex. between "--"
|
||||
--- suboptimal implementation
|
||||
opInEnv :: String -> String -> (String -> String) -> (String -> String)
|
||||
opInEnv beg end op = concat . altern False . chop (lbeg, beg) [] where
|
||||
chop mk@(lg, mark) s0 s =
|
||||
let (tag,rest) = splitAt lg s in
|
||||
if tag==mark then (reverse s0) : mark : chop (switch mk) [] rest
|
||||
else case s of
|
||||
c:cs -> chop mk (c:s0) cs
|
||||
[] -> [reverse s0]
|
||||
switch (lg,mark) = if mark==beg then (lend,end) else (lbeg,beg)
|
||||
(lbeg,lend) = (length beg, length end)
|
||||
altern m ts = case ts of
|
||||
t:ws | not m && t==beg -> t : altern True ws
|
||||
t:ws | m && t==end -> t : altern False ws
|
||||
t:ws -> (if m then op t else t) : altern m ws
|
||||
[] -> []
|
||||
|
||||
appLexer :: (String -> [String]) -> String -> String
|
||||
appLexer f = unwords . filter (not . null) . f
|
||||
|
||||
appUnlexer :: ([String] -> String) -> String -> String
|
||||
----appUnlexer f = unlines . map (f . words) . lines
|
||||
appUnlexer f = f . words
|
||||
|
||||
wrapHTML :: String -> String
|
||||
wrapHTML = unlines . tag . intersperse "<br>" . lines where
|
||||
tag ss = "<html>":"<head>":"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />":"</head>":"<body>" : ss ++ ["</body>","</html>"]
|
||||
|
||||
|
||||
-- * Text lexing
|
||||
-- | Text lexing with standard word capitalization of the first word of every sentence
|
||||
lexText :: (String -> Bool) -> String -> [String]
|
||||
lexText good = lexText' (uncapitInit good)
|
||||
|
||||
-- | Text lexing with custom treatment of the first word of every sentence.
|
||||
lexText' :: (String->String) -> String -> [String]
|
||||
lexText' uncap1 = uncap . lext where
|
||||
lext s = case s of
|
||||
c:cs | isMajorPunct c -> [c] : uncap (lext cs)
|
||||
c:cs | isMinorPunct c -> [c] : lext cs
|
||||
c:cs | isSpace c -> lext cs
|
||||
_:_ -> let (w,cs) = break (\x -> isSpace x || isPunct x) s in w : lext cs
|
||||
_ -> [s]
|
||||
uncap s = case s of
|
||||
w:ws -> uncap1 w:ws
|
||||
_ -> s
|
||||
|
||||
unlexText :: [String] -> String
|
||||
unlexText = capitInit . unlext where
|
||||
unlext s = case s of
|
||||
w:[] -> w
|
||||
w:[c]:[] | isPunct c -> w ++ [c]
|
||||
w:[c]:cs | isMajorPunct c -> w ++ [c] ++ " " ++ capitInit (unlext cs)
|
||||
w:[c]:cs | isMinorPunct c -> w ++ [c] ++ " " ++ unlext cs
|
||||
w:ws -> w ++ " " ++ unlext ws
|
||||
_ -> []
|
||||
|
||||
-- | Bind tokens separated by Prelude.BIND, i.e. &+
|
||||
bindTok :: [String] -> [String]
|
||||
bindTok ws = case ws of
|
||||
w1:"&+":w2:ws -> bindTok ((w1++w2):ws)
|
||||
"&+":ws -> bindTok ws
|
||||
"&|":(c:cs):ws-> bindTok ((toUpper c:cs) : ws)
|
||||
"&|":ws -> bindTok ws
|
||||
w:ws -> w:bindTok ws
|
||||
[] -> []
|
||||
|
||||
-- * Code lexing
|
||||
|
||||
-- | Haskell lexer, usable for much code
|
||||
lexCode :: String -> [String]
|
||||
lexCode ss = case lex ss of
|
||||
[(w@(_:_),ws)] -> w : lexCode ws
|
||||
_ -> []
|
||||
|
||||
|
||||
-- * Ancient Greek lexing
|
||||
|
||||
lexTextAGreek :: String -> [String]
|
||||
lexTextAGreek s = lext s where
|
||||
lext s = case s of
|
||||
c:cs | isAGreekPunct c -> [c] : (lext cs)
|
||||
c:cs | isSpace c -> lext cs
|
||||
_:_ -> let (w,cs) = break (\x -> isSpace x || isAGreekPunct x) s
|
||||
in w : lext cs
|
||||
[] -> []
|
||||
|
||||
-- Philological greek text may use vowel length indicators. Then '.' is not a sentence
|
||||
-- separator, nor is 'v. ' for vowel v. Sentence ends at 'v..' or 'c. ' with non-vowel c.
|
||||
|
||||
lexTextAGreek2 :: String -> [String]
|
||||
lexTextAGreek2 s = lext s where
|
||||
lext s = case s of
|
||||
c:cs | isAGreekPunct c -> [c] : (lext cs)
|
||||
c:cs | isSpace c -> lext cs
|
||||
_:_ -> let (w,cs) = break (\x -> isSpace x || isAGreekPunct x) s
|
||||
in case cs of
|
||||
'.':'.':d:ds | isSpace d
|
||||
-> (w++['.']) : lext ('.':d:ds)
|
||||
'.':d:ds | isAGreekPunct d || isSpace d
|
||||
-> (w++['.']) : lext (d:ds)
|
||||
'.':d:ds | not (isSpace d)
|
||||
-> case lext (d:ds) of
|
||||
e:es -> (w++['.']++e) : es
|
||||
es -> (w++['.']) : es
|
||||
'.':[] -> (w++['.']) : []
|
||||
_ -> w : lext cs
|
||||
[] -> []
|
||||
|
||||
unlexTextAGreek :: [String] -> String
|
||||
unlexTextAGreek = unlext where
|
||||
unlext s = case s of
|
||||
w:[] -> w
|
||||
w:[c]:[] | isAGreekPunct c -> w ++ [c]
|
||||
w:[c]:cs | isAGreekPunct c -> w ++ [c] ++ " " ++ unlext cs
|
||||
w:ws -> w ++ " " ++ unlext ws
|
||||
[] -> []
|
||||
|
||||
isAGreekPunct = flip elem ".,;··" -- colon: first version · not in charset,
|
||||
-- second version · = 00B7 standard code point
|
||||
|
||||
-- * Text lexing and unlexing for Ancient Greek:
|
||||
-- 1. no capitalization of initial word,
|
||||
-- 2. grave/acute accent switch on final syllables of words not followed by punctuation,
|
||||
-- 3. accent move from/to support word to/from following clitic words (iterated).
|
||||
|
||||
lexAGreek :: String -> [String]
|
||||
lexAGreek = fromAGreek . lexTextAGreek
|
||||
|
||||
lexAGreek2 :: String -> [String]
|
||||
lexAGreek2 = fromAGreek . lexTextAGreek2
|
||||
|
||||
unlexAGreek :: [String] -> String
|
||||
unlexAGreek = unlexTextAGreek . toAGreek
|
||||
|
||||
-- Note: unlexAGreek does not glue punctuation with the previous word, so that short
|
||||
-- vowel indication (like a.) differs from sentence end (a .).
|
||||
|
||||
-- | normalize = change grave accent on sentence internal words to acute,
|
||||
-- and shift inherited acutes to the following enclitic (where they are
|
||||
-- visible only as shown in the list of enclitics above)
|
||||
|
||||
normalize :: String -> String
|
||||
normalize = (unlexTextAGreek . fromAGreek . lexTextAGreek)
|
||||
|
||||
fromAGreek :: [String] -> [String]
|
||||
fromAGreek s = case s of
|
||||
w:[]:vs -> w:[]:(fromAGreek vs)
|
||||
w:(v:vs) | isAGreekPunct (head v) -> w:v:(fromAGreek vs)
|
||||
w:v:vs | wasEnclitic v && wasEnclitic w ->
|
||||
getEnclitic w : fromAGreek (v:vs)
|
||||
w:v:vs | wasEnclitic v && wasProclitic w -> -- "ei)' tines*"
|
||||
getProclitic w : getEnclitic v : fromAGreek vs
|
||||
w:v:vs | wasEnclitic v && (hasEndCircum w ||
|
||||
(hasEndAcute w && hasSingleAccent w)) ->
|
||||
w : getEnclitic v : fromAGreek vs -- ok "sofoi' tines*"
|
||||
w:v:vs | wasEnclitic v && hasPrefinalAcute w ->
|
||||
w : getEnclitic v : fromAGreek vs
|
||||
w:v:vs | wasEnclitic v && hasEndAcute w -> -- ok "a)'nvrwpoi' tines*"
|
||||
dropLastAccent w : getEnclitic v : fromAGreek vs
|
||||
w:v:vs | wasEnclitic w ->
|
||||
getEnclitic w : fromAGreek (v:vs)
|
||||
w:ws -> (toAcute w) : (fromAGreek ws)
|
||||
ws -> ws
|
||||
|
||||
-- | de-normalize = change acute accent of end syllables in sentence internal
|
||||
-- (non-enclitic) words to grave accent, and move accents of enclitics to the
|
||||
-- previous word to produce ordinary ancient greek
|
||||
|
||||
denormalize :: String -> String
|
||||
denormalize = (unlexTextAGreek . toAGreek . lexTextAGreek)
|
||||
|
||||
toAGreek :: [String] -> [String]
|
||||
toAGreek s = case s of
|
||||
w:[]:vs -> w:[]:(toAGreek vs)
|
||||
w:v:vs | isAGreekPunct (head v) -> w:[]:v:(toAGreek vs) -- w:[] for following -to_ancientgreek
|
||||
w:v:vs | isEnclitic v && isEnclitic w ->
|
||||
addAcute w : toAGreek (dropAccent v:vs) -- BR 11 Anm.2
|
||||
w:v:vs | isEnclitic v && isProclitic w -> -- BR 11 a.beta
|
||||
addAcute w: (toAGreek (dropAccent v:vs))
|
||||
w:v:vs | isEnclitic v && (hasEndCircum w || hasEndAcute w) ->
|
||||
w:(toAGreek (dropAccent v:vs)) -- BR 11 a.alpha,beta
|
||||
w:v:vs | isEnclitic v && hasPrefinalAcute w ->
|
||||
w:v: toAGreek vs -- bisyllabic v keeps its accent BR 11 b.
|
||||
w:v:vs | isEnclitic v ->
|
||||
(addAcute w):(toAGreek (dropAccent v:vs)) -- BR 11 a.gamma
|
||||
w:v:vs | isEnclitic w -> w:(toAGreek (v:vs))
|
||||
w:ws -> (toGrave w) : (toAGreek ws)
|
||||
ws -> ws
|
||||
|
||||
-- | Change accent on the final syllable of a word
|
||||
|
||||
toGrave :: String -> String
|
||||
toGrave = reverse . grave . reverse where
|
||||
grave s = case s of
|
||||
'\'':cs -> '`':cs
|
||||
c:cs | isAGreekVowel c -> c:cs
|
||||
c:cs -> c: grave cs
|
||||
_ -> s
|
||||
|
||||
toAcute :: String -> String
|
||||
toAcute = reverse . acute . reverse where
|
||||
acute s = case s of
|
||||
'`':cs -> '\'':cs
|
||||
c:cs | isAGreekVowel c -> c:cs
|
||||
c:cs -> c: acute cs
|
||||
_ -> s
|
||||
|
||||
isAGreekVowel = flip elem "aeioyhw"
|
||||
|
||||
-- | Accent moves for enclitics and proclitics (atona)
|
||||
|
||||
enclitics = [
|
||||
"moy","moi","me", -- personal pronouns
|
||||
"soy","soi","se",
|
||||
"oy(","oi(","e(",
|
||||
"tis*","ti","tina'", -- indefinite pronoun
|
||||
"tino's*","tini'",
|
||||
"tine's*","tina's*",
|
||||
"tinw~n","tisi'","tisi'n",
|
||||
"poy","poi", -- indefinite adverbs
|
||||
"pove'n","pws*",
|
||||
"ph|","pote'",
|
||||
"ge","te","toi", -- particles
|
||||
"nyn","per","pw"
|
||||
-- suffix -"de"
|
||||
-- praes.indik. of fhmi', ei)mi' (except fh's*, ei)~)
|
||||
] -- and more, BR 11
|
||||
|
||||
proclitics = [
|
||||
"o(","h(","oi(","ai(", -- articles
|
||||
"e)n","ei)s*","e)x","e)k", -- prepositions
|
||||
"ei)","w(s*", -- conjunctions
|
||||
"oy)","oy)k","oy)c" -- negation
|
||||
]
|
||||
|
||||
isEnclitic = flip elem enclitics
|
||||
isProclitic = flip elem proclitics
|
||||
|
||||
-- Check if a word is an enclitic or accented enclitic and extract the enclitic
|
||||
|
||||
wasEnclitic = let unaccented = (filter (not . hasAccent) enclitics)
|
||||
++ (map dropAccent (filter hasAccent enclitics))
|
||||
accented = (filter hasAccent enclitics)
|
||||
++ map addAcute (filter (not . hasAccent) enclitics)
|
||||
in flip elem (accented ++ unaccented)
|
||||
|
||||
wasProclitic = flip elem (map addAcute proclitics)
|
||||
|
||||
getEnclitic =
|
||||
let pairs = zip (enclitics ++ (map dropAccent (filter hasAccent enclitics))
|
||||
++ (map addAcute (filter (not . hasAccent) enclitics)))
|
||||
(enclitics ++ (filter hasAccent enclitics)
|
||||
++ (filter (not . hasAccent) enclitics))
|
||||
find = \v -> lookup v pairs
|
||||
in \v -> case (find v) of
|
||||
Just x -> x
|
||||
_ -> v
|
||||
getProclitic =
|
||||
let pairs = zip (map addAcute proclitics) proclitics
|
||||
find = \v -> lookup v pairs
|
||||
in \v -> case (find v) of
|
||||
Just x -> x
|
||||
_ -> v
|
||||
|
||||
-- | Accent manipulation
|
||||
|
||||
dropAccent = reverse . drop . reverse where
|
||||
drop s = case s of
|
||||
[] -> []
|
||||
'\'':cs -> cs
|
||||
'`':cs -> cs
|
||||
'~':cs -> cs
|
||||
c:cs -> c:drop cs
|
||||
|
||||
dropLastAccent = reverse . drop . reverse where
|
||||
drop s = case s of
|
||||
[] -> []
|
||||
'\'':cs -> cs
|
||||
'`':cs -> cs
|
||||
'~':cs -> cs
|
||||
c:cs -> c:drop cs
|
||||
|
||||
addAcute :: String -> String
|
||||
addAcute = reverse . acu . reverse where
|
||||
acu w = case w of
|
||||
c:cs | c == '\'' -> c:cs
|
||||
c:cs | c == '(' -> '\'':c:cs
|
||||
c:cs | c == ')' -> '\'':c:cs
|
||||
c:cs | isAGreekVowel c -> '\'':c:cs
|
||||
c:cs -> c : acu cs
|
||||
_ -> w
|
||||
|
||||
-- | Accent checking on end syllables
|
||||
|
||||
hasEndAcute = find . reverse where
|
||||
find s = case s of
|
||||
[] -> False
|
||||
'\'':cs -> True
|
||||
'`':cs -> False
|
||||
'~':cs -> False
|
||||
c:cs | isAGreekVowel c -> False
|
||||
_:cs -> find cs
|
||||
|
||||
hasEndCircum = find . reverse where
|
||||
find s = case s of
|
||||
[] -> False
|
||||
'\'':cs -> False
|
||||
'`':cs -> False
|
||||
'~':cs -> True
|
||||
c:cs | isAGreekVowel c -> False
|
||||
_:cs -> find cs
|
||||
|
||||
hasPrefinalAcute = find . reverse where
|
||||
find s = case s of
|
||||
[] -> False
|
||||
'\'':cs -> False -- final acute
|
||||
'`':cs -> False
|
||||
'~':cs -> False
|
||||
c:d:cs | isAGreekVowel c && isAGreekVowel d -> findNext cs
|
||||
c:cs | isAGreekVowel c -> findNext cs
|
||||
_:cs -> find cs where
|
||||
findNext s = case s of
|
||||
[] -> False
|
||||
'\'':cs -> True -- prefinal acute
|
||||
'`':cs -> False
|
||||
'~':cs -> False
|
||||
c:cs | isAGreekVowel c -> False
|
||||
_:cs -> findNext cs where
|
||||
|
||||
hasSingleAccent v =
|
||||
hasAccent v && not (hasAccent (dropLastAccent v))
|
||||
|
||||
hasAccent v = case v of
|
||||
[] -> False
|
||||
c:cs -> elem c ['\'','`','~'] || hasAccent cs
|
||||
|
||||
{- Tests:
|
||||
|
||||
-- denormalization. Examples in BR 11 work:
|
||||
-}
|
||||
enclitics_expls = -- normalized
|
||||
"sofw~n tis*":"sofw~n tine's*":"sof~n tinw~n": -- a.alpha
|
||||
"sofo's tis*":"sofoi' tine's*": -- a.beta
|
||||
"ei) tis*":"ei) tine's*":
|
||||
"a)'nvrwpos* tis*":"a)'nvrwpoi tine's*": -- a.gamma
|
||||
"doy~los* tis*":"doy~loi tine's*":
|
||||
"lo'gos* tis*":"lo'goi tine's*":"lo'gwn tinw~n": -- b.
|
||||
"ei) poy tis* tina' i)'doi": -- Anm. 2.
|
||||
[]
|
||||
|
||||
|
||||
unlexCode :: [String] -> String
|
||||
unlexCode s = case s of
|
||||
w:[] -> w
|
||||
[c]:cs | isParen c -> [c] ++ unlexCode cs
|
||||
w:cs@([c]:_) | isClosing c -> w ++ unlexCode cs
|
||||
w:ws -> w ++ " " ++ unlexCode ws
|
||||
_ -> []
|
||||
|
||||
|
||||
-- | LaTeX lexer in the math mode: \ should not be separated from the next word
|
||||
|
||||
lexLatexCode :: String -> [String]
|
||||
lexLatexCode = restoreBackslash . lexCode where --- quick hack: postprocess Haskell's lex
|
||||
restoreBackslash ws = case ws of
|
||||
"\\":w:ww -> ("\\" ++ w) : restoreBackslash ww
|
||||
w:ww -> w:restoreBackslash ww
|
||||
_ -> ws
|
||||
|
||||
-- * Mixed lexing
|
||||
|
||||
-- | LaTeX style lexer, with "math" environment using Code between $...$
|
||||
lexMixed :: (String -> Bool) -> String -> [String]
|
||||
lexMixed good = concat . alternate False [] where
|
||||
alternate env t s = case s of
|
||||
'$':cs -> lex env (reverse t) : ["$"] : alternate (not env) [] cs
|
||||
'\\':c:cs | elem c "()[]" -> lex env (reverse t) : [['\\',c]] : alternate (not env) [] cs
|
||||
c:cs -> alternate env (c:t) cs
|
||||
_ -> [lex env (reverse t)]
|
||||
lex env = if env then lexLatexCode else lexText good
|
||||
|
||||
unlexMixed :: (String -> Bool) -> [String] -> String
|
||||
unlexMixed good = capitInit . concat . alternate False where
|
||||
alternate env s = case s of
|
||||
_:_ -> case break (flip elem ["$","\\[","\\]","\\(","\\)"]) s of
|
||||
(t,[]) -> unlex env t : []
|
||||
(t,c:m) -> unlex env t : sep env c m : alternate (not env) m
|
||||
_ -> []
|
||||
unlex env = if env then unlexCode else (uncapitInit good . unlexText)
|
||||
sep env c m = case (m,env) of
|
||||
([p]:_,True) | isPunct p -> c -- closing $ glued to next punct
|
||||
(_, True) -> c ++ " " -- closing $ otherwise separated by space from what follows
|
||||
_ -> " " ++ c -- put space before opening $
|
||||
|
||||
-- * Additional lexing uitilties
|
||||
|
||||
-- | Capitalize first letter
|
||||
capitInit s = case s of
|
||||
c:cs -> toUpper c : cs
|
||||
_ -> s
|
||||
|
||||
-- | Uncapitalize first letter
|
||||
uncapitInit good s =
|
||||
case s of
|
||||
c:cs | not (good s) -> toLower c : cs
|
||||
_ -> s
|
||||
|
||||
-- | Unquote each string wrapped in double quotes
|
||||
unquote = map unq where
|
||||
unq s = case s of
|
||||
'"':cs@(_:_) | last cs == '"' -> init cs
|
||||
_ -> s
|
||||
|
||||
isPunct = flip elem ".?!,:;"
|
||||
isMajorPunct = flip elem ".?!"
|
||||
isMinorPunct = flip elem ",:;"
|
||||
isParen = flip elem "()[]{}"
|
||||
isClosing = flip elem ")]}"
|
||||
48
src/compiler/api/GF/Text/Pretty.hs
Normal file
48
src/compiler/api/GF/Text/Pretty.hs
Normal file
@@ -0,0 +1,48 @@
|
||||
-- | Pretty printing with class
|
||||
module GF.Text.Pretty(module GF.Text.Pretty,module PP) where
|
||||
import qualified Text.PrettyPrint as PP
|
||||
import Text.PrettyPrint as PP(Doc,Style(..),Mode(..),style,empty,isEmpty)
|
||||
|
||||
class Pretty a where
|
||||
pp :: a -> Doc
|
||||
ppList :: [a] -> Doc
|
||||
ppList = fsep . map pp -- hmm
|
||||
|
||||
instance Pretty Doc where pp = id
|
||||
instance Pretty Int where pp = PP.int
|
||||
instance Pretty Integer where pp = PP.integer
|
||||
instance Pretty Float where pp = PP.float
|
||||
instance Pretty Double where pp = PP.double
|
||||
instance Pretty Char where pp = PP.char; ppList = PP.text
|
||||
|
||||
instance Pretty a => Pretty [a] where
|
||||
pp = ppList
|
||||
ppList = fsep . map pp -- hmm
|
||||
|
||||
render x = PP.render (pp x)
|
||||
render80 x = renderStyle style{lineLength=80,ribbonsPerLine=1} x
|
||||
renderStyle s x = PP.renderStyle s (pp x)
|
||||
|
||||
infixl 5 $$,$+$
|
||||
infixl 6 <>,<+>
|
||||
|
||||
x $$ y = pp x PP.$$ pp y
|
||||
x $+$ y = pp x PP.$+$ pp y
|
||||
x <+> y = pp x PP.<+> pp y
|
||||
x <> y = pp x PP.<> pp y
|
||||
|
||||
braces x = PP.braces (pp x)
|
||||
brackets x = PP.brackets (pp x)
|
||||
cat xs = PP.cat (map pp xs)
|
||||
doubleQuotes x = PP.doubleQuotes (pp x)
|
||||
fcat xs = PP.fcat (map pp xs)
|
||||
fsep xs = PP.fsep (map pp xs)
|
||||
hang x d y = PP.hang (pp x) d (pp y)
|
||||
hcat xs = PP.hcat (map pp xs)
|
||||
hsep xs = PP.hsep (map pp xs)
|
||||
nest d x = PP.nest d (pp x)
|
||||
parens x = PP.parens (pp x)
|
||||
punctuate x ys = PP.punctuate (pp x) (map pp ys)
|
||||
quotes x = PP.quotes (pp x)
|
||||
sep xs = PP.sep (map pp xs)
|
||||
vcat xs = PP.vcat (map pp xs)
|
||||
331
src/compiler/api/GF/Text/Transliterations.hs
Normal file
331
src/compiler/api/GF/Text/Transliterations.hs
Normal file
@@ -0,0 +1,331 @@
|
||||
module GF.Text.Transliterations (
|
||||
transliterate,
|
||||
transliterateWithFile,
|
||||
transliteration,
|
||||
characterTable,
|
||||
transliterationPrintNames
|
||||
) where
|
||||
|
||||
import Data.Char
|
||||
import Numeric
|
||||
import qualified Data.Map as Map
|
||||
|
||||
-- transliterations between ASCII and a Unicode character set
|
||||
|
||||
-- current transliterations: devanagari, thai
|
||||
|
||||
-- to add a new one: define the Unicode range and the corresponding ASCII strings,
|
||||
-- which may be one or more characters long
|
||||
|
||||
-- conventions to be followed:
|
||||
-- each character is either [letter] or [letter+nonletters]
|
||||
-- when using a sparse range of unicodes, mark missing codes as "-" in transliterations
|
||||
-- characters can be invisible: ignored in translation to unicode
|
||||
|
||||
transliterate :: String -> Maybe (String -> String)
|
||||
transliterate s = case s of
|
||||
'f':'r':'o':'m':'_':t -> fmap appTransFromUnicode $ transliteration t
|
||||
't':'o':'_':t -> fmap appTransToUnicode $ transliteration t
|
||||
_ -> Nothing
|
||||
|
||||
transliterateWithFile :: String -> String -> Bool -> (String -> String)
|
||||
transliterateWithFile name src isFrom =
|
||||
(if isFrom then appTransFromUnicode else appTransToUnicode) (getTransliterationFile name src)
|
||||
|
||||
transliteration :: String -> Maybe Transliteration
|
||||
transliteration s = Map.lookup s allTransliterations
|
||||
|
||||
allTransliterations = Map.fromList [
|
||||
("amharic",transAmharic),
|
||||
("ancientgreek", transAncientGreek),
|
||||
("arabic", transArabic),
|
||||
("devanagari", transDevanagari),
|
||||
("greek", transGreek),
|
||||
("hebrew", transHebrew),
|
||||
("persian", transPersian),
|
||||
("sanskrit", transSanskrit),
|
||||
("sindhi", transSindhi),
|
||||
("nepali", transNepali),
|
||||
("telugu", transTelugu),
|
||||
("thai", transThai),
|
||||
("urdu", transUrdu)
|
||||
]
|
||||
|
||||
-- used in command options and help
|
||||
transliterationPrintNames = [(t,printname p) | (t,p) <- Map.toList allTransliterations]
|
||||
|
||||
characterTable :: Transliteration -> String
|
||||
characterTable = unlines . map prOne . Map.assocs . trans_from_unicode where
|
||||
prOne (i,s) = unwords ["|", showHex i "", "|", [toEnum i], "|", s, "|"]
|
||||
|
||||
data Transliteration = Trans {
|
||||
trans_to_unicode :: Map.Map String Int,
|
||||
trans_from_unicode :: Map.Map Int String,
|
||||
invisible_chars :: [String],
|
||||
printname :: String
|
||||
}
|
||||
|
||||
appTransToUnicode :: Transliteration -> String -> String
|
||||
appTransToUnicode trans =
|
||||
concat .
|
||||
map (\c -> maybe c (return . toEnum) $
|
||||
Map.lookup c (trans_to_unicode trans)
|
||||
) .
|
||||
filter (flip notElem (invisible_chars trans)) .
|
||||
unchar
|
||||
|
||||
appTransFromUnicode :: Transliteration -> String -> String
|
||||
appTransFromUnicode trans =
|
||||
concat .
|
||||
map (\c -> maybe [toEnum c] id $
|
||||
Map.lookup c (trans_from_unicode trans)
|
||||
) .
|
||||
map fromEnum
|
||||
|
||||
|
||||
mkTransliteration :: String -> [String] -> [Int] -> Transliteration
|
||||
mkTransliteration name ts us =
|
||||
Trans (Map.fromList (tzip ts us)) (Map.fromList (uzip us ts)) [] name
|
||||
where
|
||||
tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"]
|
||||
uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"]
|
||||
|
||||
getTransliterationFile :: String -> String -> Transliteration
|
||||
getTransliterationFile name = uncurry (mkTransliteration name) . codes
|
||||
where
|
||||
codes = unzip . map (mkOne . words) . filter (not . all isSpace) . lines
|
||||
mkOne ws = case ws of
|
||||
[c]:t:_ -> (t,fromEnum c) -- ä a:
|
||||
u:t:_ -> (t,read u) -- 228 a: OR 0xe4
|
||||
_ -> error $ "not a valid transliteration:" ++ unwords ws
|
||||
|
||||
unchar :: String -> [String]
|
||||
unchar s = case s of
|
||||
c:d:cs
|
||||
| isAlpha d -> [c] : unchar (d:cs)
|
||||
| isSpace d -> [c]:[d]: unchar cs
|
||||
| otherwise -> let (ds,cs2) = break (\x -> isAlpha x || isSpace x) cs in
|
||||
(c:d:ds) : unchar cs2
|
||||
[_] -> [s]
|
||||
_ -> []
|
||||
|
||||
transThai :: Transliteration
|
||||
transThai = mkTransliteration "Thai" allTrans allCodes where
|
||||
allTrans = words $
|
||||
"- k k1 - k2 - k3 g c c1 c2 s' c3 y' d' t' " ++
|
||||
"t1 t2 t3 n' d t t4 t5 t6 n b p p1 f p2 f' " ++
|
||||
"p3 m y r - l - w s- s. s h l' O h' - " ++
|
||||
"a. a a: a+ i i: v v: u u: - - - - - - " ++
|
||||
"e e' o: a% a& L R S T1 T2 T3 T4 K - - - " ++
|
||||
"N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 - - - - - - "
|
||||
allCodes = [0x0e00 .. 0x0e7f]
|
||||
|
||||
transDevanagari :: Transliteration
|
||||
transDevanagari =
|
||||
(mkTransliteration "Devanagari"
|
||||
allTransUrduHindi allCodes){invisible_chars = ["a"]} where
|
||||
allCodes = [0x0900 .. 0x095f] ++ [0x0966 .. 0x096f]
|
||||
|
||||
allTransUrduHindi = words $
|
||||
"- n~ m. h. - A A: I I: U U: r.- l.- - - E: " ++
|
||||
"E+ - - O: O+ k k' g g' n- c c' j j' n* T " ++
|
||||
"T' D D' N t t' d d' n - p p' b b' m y " ++
|
||||
"r - l L - v s* S s h - - X~ - a: i " ++
|
||||
"i: u u: r. l. - - e: e+ - - o: o+ X, - - " ++
|
||||
"- - - - - - - - q x g. z R R' f - " ++
|
||||
"N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 "
|
||||
|
||||
|
||||
transUrdu :: Transliteration
|
||||
transUrdu =
|
||||
(mkTransliteration "Urdu" allTrans allCodes) where
|
||||
allCodes = [0x0622 .. 0x062f] ++ [0x0630 .. 0x063a] ++ [0x0641,0x0642] ++ [0x06A9] ++ [0x0644 .. 0x0648] ++
|
||||
[0x0654,0x0658,0x0679,0x067e,0x0686,0x0688,0x0691,0x0698,0x06af,0x06c1,0x06c3,0x06cc,0x06ba,0x06be,0x06d2] ++
|
||||
[0x06f0 .. 0x06f9] ++ [0x061f,0x06D4]
|
||||
allTrans = words $
|
||||
"A - w^ - y^ a b - t C j H K d " ++ -- 0622 - 062f
|
||||
"Z r z s X S Z- t- z- e G " ++ -- 0630 - 063a
|
||||
"f q k l m n - w " ++ -- 0641, 0642, 0643 - 0648
|
||||
"$ n- T p c D R x g h t: y N h' E " ++ -- 0654,658,679,67e,686,688,698,6af,6c1,6c3,6cc,6ba,6be,6d2
|
||||
"N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 " ++ "? ."
|
||||
|
||||
transSindhi :: Transliteration
|
||||
transSindhi =
|
||||
(mkTransliteration "Sindhi" allTrans allCodes) where
|
||||
allCodes = [0x062e] ++ [0x0627 .. 0x062f] ++ [0x0630 .. 0x063a] ++ [0x0641 .. 0x0648] ++
|
||||
[0x067a,0x067b,0x067d,0x067e,0x067f] ++ [0x0680 .. 0x068f] ++
|
||||
[0x0699,0x0918,0x06a6,0x061d,0x06a9,0x06af,0x06b3,0x06bb,0x06be,0x06f6,0x064a,0x06b1, 0x06aa, 0x06fd, 0x06fe] ++
|
||||
[0x06f0 .. 0x06f9] ++ [0x061f,0x06D4]
|
||||
allTrans = words $
|
||||
"K a b - t C j H - d " ++ -- 0626 - 062f
|
||||
"Z r z s X S Z- t- z- e G " ++ -- 0630 - 063a
|
||||
"f q - L m n - W " ++ -- 0641 - 0648
|
||||
"T! B T p T' " ++ -- 067a,067b,067d,067e,067f
|
||||
"B' - - Y' J' - c c' - - d! - d' D - D' " ++ -- 0680 - 068f
|
||||
"R - F' - k' g G' t' h' e' y c! k A M " ++ -- 0699, 0918, 06a6, 061d, 06a9,06af,06b3,06bb,06be,06f6,06cc,06b1
|
||||
"N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 " ++ "? ."
|
||||
|
||||
|
||||
|
||||
transArabic :: Transliteration
|
||||
transArabic = mkTransliteration "Arabic" allTrans allCodes where
|
||||
allTrans = words $
|
||||
" V A: A? w? A- y? A b t. t v g H K d " ++ -- 0621 - 062f
|
||||
"W r z s C S D T Z c G " ++ -- 0630 - 063a
|
||||
" f q k l m n h w y. y a. u. i. a u " ++ -- 0641 - 064f
|
||||
"i v2 o a: V+ V- i: a+ " ++ -- 0650 - 0657
|
||||
"A* q?" -- 0671 (used by AED)
|
||||
allCodes = [0x0621..0x062f] ++ [0x0630..0x063a] ++
|
||||
[0x0641..0x064f] ++ [0x0650..0x0657] ++ [0x0671,0x061f]
|
||||
|
||||
transPersian :: Transliteration
|
||||
transPersian = (mkTransliteration "Persian/Farsi" allTrans allCodes)
|
||||
{invisible_chars = ["a","u","i"]} where
|
||||
allTrans = words $
|
||||
" V A: A? w? A- y? A b t. t t- j H K d " ++ -- 0621 - 062f
|
||||
"W r z s C S D T Z c G " ++ -- 0630 - 063a
|
||||
" f q - l m n h v - y. a. u. i. a u " ++ -- 0640 - 064f
|
||||
"i v2 o a: V+ V- i: a+ " ++ -- 0650 - 0657
|
||||
"p c^ J k g y q? Z0"
|
||||
allCodes = [0x0621..0x062f] ++ [0x0630..0x063a] ++
|
||||
[0x0641..0x064f] ++ [0x0650..0x0657] ++
|
||||
[0x067e,0x0686,0x0698,0x06a9,0x06af,0x06cc,0x061f,0x200c]
|
||||
|
||||
transNepali :: Transliteration
|
||||
transNepali = mkTransliteration "Nepali" allTrans allCodes where
|
||||
allTrans = words $
|
||||
"z+ z= " ++
|
||||
"- V M h: - H A i: I: f F Z - - - e: " ++
|
||||
"E: - - O W k K g G n: C c j J Y q " ++
|
||||
"Q x X N t T d D n - p P b B m y " ++
|
||||
"r - l L - v S z s h - - ~ ` a i " ++
|
||||
"I u U R - - - e E - - o w x: - - " ++
|
||||
"O: - _ - - - - - - - - - - - - - " ++
|
||||
"- - - - . > 0 1 2 3 4 5 6 7 8 9 " ++
|
||||
"- - - - - - - - - - - - - - - - "
|
||||
allCodes = [0x200c,0x200d] ++ [0x0900 .. 0x097f]
|
||||
|
||||
|
||||
transHebrew :: Transliteration
|
||||
transHebrew = mkTransliteration "unvocalized Hebrew" allTrans allCodes where
|
||||
allTrans = words $
|
||||
"A b g d h w z H T y K k l M m N " ++
|
||||
"n S O P p Z. Z q r s t - - - - - " ++
|
||||
"w2 w3 y2 g1 g2"
|
||||
allCodes = [0x05d0..0x05f4]
|
||||
|
||||
transTelugu :: Transliteration
|
||||
transTelugu = mkTransliteration "Telugu" allTrans allCodes where
|
||||
allTrans = words $
|
||||
"- c1 c2 c3 - A A: I I: U U: R_ L_ - E E: " ++
|
||||
"A' - O O: A_ k k. g g. n. c c. j j. n' T " ++
|
||||
"T. d d. N t t. d d. n - p p. b b. m y " ++
|
||||
"r R l L - v s' S s h - - - c5 a: i " ++
|
||||
"i: u u: r_ r. - e e: a' - o o: a_ c6 - - " ++
|
||||
"- - - - - c7 c8 z Z - - - - - - - " ++
|
||||
"R+ L+ l+ l* - - n0 n1 n2 n3 n4 n5 n6 n7 n8 n9 "
|
||||
allCodes = [0x0c00 .. 0x0c7f]
|
||||
|
||||
transGreek :: Transliteration
|
||||
transGreek = mkTransliteration "modern Greek" allTrans allCodes where
|
||||
allTrans = words $
|
||||
"- - - - - - A' - E' H' I' - O' - Y' W' " ++
|
||||
"i= A B G D E Z H V I K L M N X O " ++
|
||||
"P R - S T Y F C Q W I- Y- a' e' h' i' " ++
|
||||
"y= a b g d e z h v i k l m n x o " ++
|
||||
"p r s* s t y f c q w i- y- o' y' w' - "
|
||||
allCodes = [0x0380 .. 0x03cf]
|
||||
|
||||
transAncientGreek :: Transliteration
|
||||
transAncientGreek = mkTransliteration "ancient Greek" allTrans allCodes where
|
||||
allTrans = words $
|
||||
-- "- - - - - - - c: - - - - - - - - " ++ -- standard code point for colon: 00B7
|
||||
"- - - - - - - - - - - - - - - - " ++
|
||||
"i= A B G D E Z H V I K L M N X O " ++
|
||||
"P R - S T Y F C Q W I- Y- - - - - " ++
|
||||
"y= a b g d e z h v i k l m n x o " ++
|
||||
"p r s* s t y f c q w i- y- - - - - " ++
|
||||
"a) a( a)` a(` a)' a(' a)~ a(~ A) A( A)` A(` A)' A(' A)~ A(~ " ++ -- 1f00-1f09,1f0a-1f0f
|
||||
"e) e( e)` e(` e)' e(' - - E) E( E)` E(` E)' E(' - - " ++
|
||||
"h) h( h)` h(` h)' h(' h)~ h(~ H) H( H)` H(` H)' H(' H)~ H(~ " ++
|
||||
"i) i( i)` i(` i)' i(' i)~ i(~ I) I( I)` I(` I)' I(' I)~ I(~ " ++
|
||||
"o) o( o)` o(` o)' o(' - - O) O( O)` O(` O)' O(' - - " ++
|
||||
"y) y( y)` y(` y)' y(' y)~ y(~ - Y( - Y(` - Y(' - Y(~ " ++
|
||||
"w) w( w)` w(` w)' w(' w)~ w(~ W) W( W)` W(` W)' W(' W)~ W(~ " ++
|
||||
"a` a' e` e' h` h' i` i' o` o' y` y' w` w' - - " ++
|
||||
"a|) a|( a|)` a|(` a|)' a|(' a|)~ a|(~ - - - - - - - - " ++ -- 1f80-
|
||||
"h|) h|( h|)` h|(` h|)' h|(' h|)~ h|(~ - - - - - - - - " ++ -- 1f90-
|
||||
"w|) w|( w|)` w|(` w|)' w|(' w|)~ w|(~ - - - - - - - - " ++ -- 1fa0-
|
||||
"a. a_ a|` a| a|' - a~ a|~ - - - - - - - - " ++ -- 1fb0-
|
||||
"- - h|` h| h|' - h~ h|~ - - - - - - - - " ++ -- 1fc0-
|
||||
"i. i_ i=` i=' - - i~ i=~ - - - - - - - - " ++ -- 1fd0-
|
||||
"y. y_ y=` y=' r) r( y~ y=~ - - - - - - - - " ++ -- 1fe0-
|
||||
"- - w|` w| w|' - w~ w|~ - - - - - - - - " ++ -- 1ff0-
|
||||
-- HL, Private Use Area Code Points (New Athena Unicode, Cardo, ALPHABETUM, Antioch)
|
||||
-- see: http://apagreekkeys.org/technicalDetails.html
|
||||
-- GreekKeys Support by Donald Mastronarde
|
||||
"- - - - - - - - - e. o. R) Y) Y)` Y)' Y)~ " ++ -- e1a0-e1af
|
||||
"e~ e)~ e(~ e_ e_' e_` e_) e_( e_)` e_(` e_)' e_(' E)~ E(~ E_ E. " ++ -- e1b0-e1bf
|
||||
"o~ o)~ o(~ o_ o_' o_` o_) o_( o_)` o_(` o_)' o_(' O)~ O(~ O_ O. " ++ -- e1c0-e1cf
|
||||
"a_` - a_~ a_)` a_(` a_)~ a_(~ - a.` a.) a.)` a.(' a.(` - - - " ++ -- eaf0-eaff
|
||||
"a_' - - - a_) a_( - a_)' - a_(' a.' a.( a.)' - - - " ++ -- eb00-eb0f
|
||||
"e_)~ e_(~ - - - - - e_~ - - - - - - - - " ++ -- eb20-eb2f
|
||||
"- - - - - - i_~ - i_` i_' - - i_) i_)' i_( i_(' " ++ -- eb30-eb3f
|
||||
"i.' i.) i.)' i.( i.` i.)` - i.(' i.(` - - - - - - - " ++ -- eb40-eb4f
|
||||
"- - - - i_)` i_(` - i_)~ i_(~ - o_~ o_)~ o_(~ - - - " ++ -- eb50-eb5f
|
||||
"y_` " ++ -- eb6f
|
||||
"y_~ y_)` - - - y_(` - y_)~ y_(~ - y_' - - y_) y_( y_)' " ++ -- eb70-eb7f
|
||||
"y_(' y.' y.( y.` y.) y.)' - - y.)` y.(' y.(` - - - - - " -- eb80-eb8f
|
||||
allCodes = -- [0x00B0 .. 0x00Bf]
|
||||
[0x0380 .. 0x03cf] ++ [0x1f00 .. 0x1fff]
|
||||
++ [0xe1a0 .. 0xe1af]
|
||||
++ [0xe1b0 .. 0xe1bf]
|
||||
++ [0xe1c0 .. 0xe1cf]
|
||||
++ [0xeaf0 .. 0xeaff]
|
||||
++ [0xeb00 .. 0xeb0f]
|
||||
++ [0xeb20 .. 0xeb2f]
|
||||
++ [0xeb30 .. 0xeb3f]
|
||||
++ [0xeb40 .. 0xeb4f]
|
||||
++ [0xeb50 .. 0xeb5f] ++ [0xeb6f]
|
||||
++ [0xeb70 .. 0xeb7f]
|
||||
++ [0xeb80 .. 0xeb8f]
|
||||
|
||||
transAmharic :: Transliteration
|
||||
transAmharic = mkTransliteration "Amharic" allTrans allCodes where
|
||||
allTrans = words $
|
||||
" h. h- h' h( h) h h? h* l. l- l' l( l) l l? l* "++
|
||||
" H. H- H' H( H) H H? H* m. m- m' m( m) m m? m* "++
|
||||
" s. s- s' s( s) s s? s* r. r- r' r( r) r r? r* "++
|
||||
" - - - - - - - - x. x- x' x( x) x x? x* "++
|
||||
" q. q- q' q( q) q q? q* - - - - - - - - "++
|
||||
" - - - - - - - - - - - - - - - - "++
|
||||
" b. b- b' b( b) b b? b* v. v- v' v( v) v v? v* "++
|
||||
" t. t- t' t( t) t t? t* c. c- c' c( c) c c? c* "++
|
||||
" X. X- X' X( X) X X? - - - - X* - - - - "++
|
||||
" n. n- n' n( n) n n? n* N. N- N' N( N) N N? N* "++
|
||||
" a u i A E e o e* k. k- k' k( k) k k? - "++
|
||||
" - - - k* - - - - - - - - - - - - "++
|
||||
" - - - - - - - - w. w- w' w( w) w w? w* "++
|
||||
" - - - - - - - - z. z- z' z( z) z z? z* "++
|
||||
" Z. Z- Z' Z( Z) Z Z? Z* y. y- y' y( y) y y? y* "++
|
||||
" d. d- d' d( d) d d? d* - - - - - - - - "++
|
||||
" j. j- j' j( j) j j? j* g. g- g' g( g) g g? - "++
|
||||
" - - - g* - - - - - - - - - - - - "++
|
||||
" T. T- T' T( T) T T? T* C. C- C' C( C) C C? C* "++
|
||||
" P. P- P' P( P) P P? P* S. S- S' S( S) S S? S* "++
|
||||
" - - - - - - - - f. f- f' f( f) f f? f*"++
|
||||
" p. p- p' p( p) p p? p*"
|
||||
allCodes = [0x1200..0x1357]
|
||||
|
||||
-- by Prasad 31/5/2013
|
||||
transSanskrit :: Transliteration
|
||||
transSanskrit = (mkTransliteration "Sanskrit" allTrans allCodes) {invisible_chars = ["a"]} where
|
||||
allTrans = words $
|
||||
"- n~ m. h. - A A: I I: U U: R. L. - - E: " ++
|
||||
"E+ - O O: O+ k k' g g' n- c c' j j' n* T " ++
|
||||
"T' D D' N t t' d d' n - p p' b b' m y " ++
|
||||
"r - l L - v s* S s h - - - v- a: i " ++
|
||||
"i: u u: r. r.: - e e: e+ - o o: o+ a_ - - " ++
|
||||
"o~ - - - - - - - q x G z R R' f - " ++
|
||||
"R.: L.: l. l.: p, p. N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 "
|
||||
allCodes = [0x0900 .. 0x097f]
|
||||
Reference in New Issue
Block a user