forked from GitHub/gf-core
transliteration via configuration file: ps -to=file or ps -from=file
This commit is contained in:
@@ -628,14 +628,18 @@ allCommands env@(pgf, mos) = Map.fromList [
|
||||
"gr -cat=QCl | l | ps -bind -- linearization output from LangFin",
|
||||
"ps -to_devanagari \"A-p\" -- show Devanagari in UTF8 terminal",
|
||||
"rf -file=Hin.gf | ps -env=quotes -to_devanagari -- convert translit to UTF8",
|
||||
"rf -file=Ara.gf | ps -from_utf8 -env=quotes -from_arabic -- convert UTF8 to transliteration"
|
||||
"rf -file=Ara.gf | ps -from_utf8 -env=quotes -from_arabic -- convert UTF8 to transliteration",
|
||||
"ps -to=chinese.trans \"abc\" -- apply transliteration defined in file chinese.trans"
|
||||
],
|
||||
exec = \opts ->
|
||||
let (os,fs) = optsAndFlags opts in
|
||||
return . fromString . stringOps (envFlag fs) (map prOpt os) . toString,
|
||||
exec = \opts x -> do
|
||||
let (os,fs) = optsAndFlags opts
|
||||
trans <- optTranslit opts
|
||||
return ((fromString . trans . stringOps (envFlag fs) (map prOpt os) . toString) x),
|
||||
options = stringOpOptions,
|
||||
flags = [
|
||||
("env","apply in this environment only")
|
||||
("env","apply in this environment only"),
|
||||
("from","backward-apply transliteration defined in this file (format 'unicode translit' per line)"),
|
||||
("to", "forward-apply transliteration defined in this file")
|
||||
]
|
||||
}),
|
||||
("pt", emptyCommandInfo {
|
||||
@@ -1100,6 +1104,15 @@ allCommands env@(pgf, mos) = Map.fromList [
|
||||
probs <- readProbabilitiesFromFile file pgf
|
||||
return (setProbabilities probs pgf)
|
||||
|
||||
optTranslit opts = case (valStrOpts "to" "" opts, valStrOpts "from" "" opts) of
|
||||
("","") -> return id
|
||||
(file,"") -> do
|
||||
src <- readFile file
|
||||
return $ transliterateWithFile file src False
|
||||
(_,file) -> do
|
||||
src <- readFile file
|
||||
return $ transliterateWithFile file src True
|
||||
|
||||
optFile opts = valStrOpts "file" "_gftmp" opts
|
||||
|
||||
optType opts =
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
module GF.Text.Transliterations (
|
||||
transliterate,
|
||||
transliterateWithFile,
|
||||
transliteration,
|
||||
characterTable,
|
||||
transliterationPrintNames
|
||||
@@ -27,6 +28,10 @@ transliterate s = case s of
|
||||
't':'o':'_':t -> fmap appTransToUnicode $ transliteration t
|
||||
_ -> Nothing
|
||||
|
||||
transliterateWithFile :: String -> String -> Bool -> (String -> String)
|
||||
transliterateWithFile name src isFrom =
|
||||
(if isFrom then appTransFromUnicode else appTransToUnicode) (getTransliterationFile name src)
|
||||
|
||||
transliteration :: String -> Maybe Transliteration
|
||||
transliteration s = Map.lookup s allTransliterations
|
||||
|
||||
@@ -82,6 +87,14 @@ mkTransliteration name ts us =
|
||||
tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"]
|
||||
uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"]
|
||||
|
||||
getTransliterationFile :: String -> String -> Transliteration
|
||||
getTransliterationFile name = uncurry (mkTransliteration name) . codes
|
||||
where
|
||||
codes = unzip . map (mkOne . words) . lines
|
||||
mkOne ws = case ws of
|
||||
[c]:t:_ -> (t,fromEnum c) -- ä a:
|
||||
u:t:_ -> (t,read u) -- 228 a: OR 0xe4
|
||||
_ -> error $ "not a valid transliteration:" ++ unwords ws
|
||||
|
||||
unchar :: String -> [String]
|
||||
unchar s = case s of
|
||||
|
||||
Reference in New Issue
Block a user