transliteration via configuration file: ps -to=file or ps -from=file

This commit is contained in:
aarne
2011-05-02 14:53:46 +00:00
parent fd0fb48493
commit 4ec34bdbb6
2 changed files with 31 additions and 5 deletions

View File

@@ -628,14 +628,18 @@ allCommands env@(pgf, mos) = Map.fromList [
"gr -cat=QCl | l | ps -bind -- linearization output from LangFin",
"ps -to_devanagari \"A-p\" -- show Devanagari in UTF8 terminal",
"rf -file=Hin.gf | ps -env=quotes -to_devanagari -- convert translit to UTF8",
"rf -file=Ara.gf | ps -from_utf8 -env=quotes -from_arabic -- convert UTF8 to transliteration"
"rf -file=Ara.gf | ps -from_utf8 -env=quotes -from_arabic -- convert UTF8 to transliteration",
"ps -to=chinese.trans \"abc\" -- apply transliteration defined in file chinese.trans"
],
exec = \opts ->
let (os,fs) = optsAndFlags opts in
return . fromString . stringOps (envFlag fs) (map prOpt os) . toString,
exec = \opts x -> do
let (os,fs) = optsAndFlags opts
trans <- optTranslit opts
return ((fromString . trans . stringOps (envFlag fs) (map prOpt os) . toString) x),
options = stringOpOptions,
flags = [
("env","apply in this environment only")
("env","apply in this environment only"),
("from","backward-apply transliteration defined in this file (format 'unicode translit' per line)"),
("to", "forward-apply transliteration defined in this file")
]
}),
("pt", emptyCommandInfo {
@@ -1100,6 +1104,15 @@ allCommands env@(pgf, mos) = Map.fromList [
probs <- readProbabilitiesFromFile file pgf
return (setProbabilities probs pgf)
optTranslit opts = case (valStrOpts "to" "" opts, valStrOpts "from" "" opts) of
("","") -> return id
(file,"") -> do
src <- readFile file
return $ transliterateWithFile file src False
(_,file) -> do
src <- readFile file
return $ transliterateWithFile file src True
optFile opts = valStrOpts "file" "_gftmp" opts
optType opts =

View File

@@ -1,5 +1,6 @@
module GF.Text.Transliterations (
transliterate,
transliterateWithFile,
transliteration,
characterTable,
transliterationPrintNames
@@ -27,6 +28,10 @@ transliterate s = case s of
't':'o':'_':t -> fmap appTransToUnicode $ transliteration t
_ -> Nothing
transliterateWithFile :: String -> String -> Bool -> (String -> String)
transliterateWithFile name src isFrom =
(if isFrom then appTransFromUnicode else appTransToUnicode) (getTransliterationFile name src)
transliteration :: String -> Maybe Transliteration
transliteration s = Map.lookup s allTransliterations
@@ -82,6 +87,14 @@ mkTransliteration name ts us =
tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"]
uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"]
getTransliterationFile :: String -> String -> Transliteration
getTransliterationFile name = uncurry (mkTransliteration name) . codes
where
codes = unzip . map (mkOne . words) . lines
mkOne ws = case ws of
[c]:t:_ -> (t,fromEnum c) -- ä a:
u:t:_ -> (t,read u) -- 228 a: OR 0xe4
_ -> error $ "not a valid transliteration:" ++ unwords ws
unchar :: String -> [String]
unchar s = case s of