1
0
forked from GitHub/gf-core

transliteration via configuration file: ps -to=file or ps -from=file

This commit is contained in:
aarne
2011-05-02 14:53:46 +00:00
parent fd0fb48493
commit 4ec34bdbb6
2 changed files with 31 additions and 5 deletions

View File

@@ -628,14 +628,18 @@ allCommands env@(pgf, mos) = Map.fromList [
"gr -cat=QCl | l | ps -bind -- linearization output from LangFin", "gr -cat=QCl | l | ps -bind -- linearization output from LangFin",
"ps -to_devanagari \"A-p\" -- show Devanagari in UTF8 terminal", "ps -to_devanagari \"A-p\" -- show Devanagari in UTF8 terminal",
"rf -file=Hin.gf | ps -env=quotes -to_devanagari -- convert translit to UTF8", "rf -file=Hin.gf | ps -env=quotes -to_devanagari -- convert translit to UTF8",
"rf -file=Ara.gf | ps -from_utf8 -env=quotes -from_arabic -- convert UTF8 to transliteration" "rf -file=Ara.gf | ps -from_utf8 -env=quotes -from_arabic -- convert UTF8 to transliteration",
"ps -to=chinese.trans \"abc\" -- apply transliteration defined in file chinese.trans"
], ],
exec = \opts -> exec = \opts x -> do
let (os,fs) = optsAndFlags opts in let (os,fs) = optsAndFlags opts
return . fromString . stringOps (envFlag fs) (map prOpt os) . toString, trans <- optTranslit opts
return ((fromString . trans . stringOps (envFlag fs) (map prOpt os) . toString) x),
options = stringOpOptions, options = stringOpOptions,
flags = [ flags = [
("env","apply in this environment only") ("env","apply in this environment only"),
("from","backward-apply transliteration defined in this file (format 'unicode translit' per line)"),
("to", "forward-apply transliteration defined in this file")
] ]
}), }),
("pt", emptyCommandInfo { ("pt", emptyCommandInfo {
@@ -1100,6 +1104,15 @@ allCommands env@(pgf, mos) = Map.fromList [
probs <- readProbabilitiesFromFile file pgf probs <- readProbabilitiesFromFile file pgf
return (setProbabilities probs pgf) return (setProbabilities probs pgf)
optTranslit opts = case (valStrOpts "to" "" opts, valStrOpts "from" "" opts) of
("","") -> return id
(file,"") -> do
src <- readFile file
return $ transliterateWithFile file src False
(_,file) -> do
src <- readFile file
return $ transliterateWithFile file src True
optFile opts = valStrOpts "file" "_gftmp" opts optFile opts = valStrOpts "file" "_gftmp" opts
optType opts = optType opts =

View File

@@ -1,5 +1,6 @@
module GF.Text.Transliterations ( module GF.Text.Transliterations (
transliterate, transliterate,
transliterateWithFile,
transliteration, transliteration,
characterTable, characterTable,
transliterationPrintNames transliterationPrintNames
@@ -27,6 +28,10 @@ transliterate s = case s of
't':'o':'_':t -> fmap appTransToUnicode $ transliteration t 't':'o':'_':t -> fmap appTransToUnicode $ transliteration t
_ -> Nothing _ -> Nothing
transliterateWithFile :: String -> String -> Bool -> (String -> String)
transliterateWithFile name src isFrom =
(if isFrom then appTransFromUnicode else appTransToUnicode) (getTransliterationFile name src)
transliteration :: String -> Maybe Transliteration transliteration :: String -> Maybe Transliteration
transliteration s = Map.lookup s allTransliterations transliteration s = Map.lookup s allTransliterations
@@ -82,6 +87,14 @@ mkTransliteration name ts us =
tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"] tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"]
uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"] uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"]
getTransliterationFile :: String -> String -> Transliteration
getTransliterationFile name = uncurry (mkTransliteration name) . codes
where
codes = unzip . map (mkOne . words) . lines
mkOne ws = case ws of
[c]:t:_ -> (t,fromEnum c) -- ä a:
u:t:_ -> (t,read u) -- 228 a: OR 0xe4
_ -> error $ "not a valid transliteration:" ++ unwords ws
unchar :: String -> [String] unchar :: String -> [String]
unchar s = case s of unchar s = case s of