mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 19:42:50 -06:00
transliteration via configuration file: ps -to=file or ps -from=file
This commit is contained in:
@@ -628,14 +628,18 @@ allCommands env@(pgf, mos) = Map.fromList [
|
|||||||
"gr -cat=QCl | l | ps -bind -- linearization output from LangFin",
|
"gr -cat=QCl | l | ps -bind -- linearization output from LangFin",
|
||||||
"ps -to_devanagari \"A-p\" -- show Devanagari in UTF8 terminal",
|
"ps -to_devanagari \"A-p\" -- show Devanagari in UTF8 terminal",
|
||||||
"rf -file=Hin.gf | ps -env=quotes -to_devanagari -- convert translit to UTF8",
|
"rf -file=Hin.gf | ps -env=quotes -to_devanagari -- convert translit to UTF8",
|
||||||
"rf -file=Ara.gf | ps -from_utf8 -env=quotes -from_arabic -- convert UTF8 to transliteration"
|
"rf -file=Ara.gf | ps -from_utf8 -env=quotes -from_arabic -- convert UTF8 to transliteration",
|
||||||
|
"ps -to=chinese.trans \"abc\" -- apply transliteration defined in file chinese.trans"
|
||||||
],
|
],
|
||||||
exec = \opts ->
|
exec = \opts x -> do
|
||||||
let (os,fs) = optsAndFlags opts in
|
let (os,fs) = optsAndFlags opts
|
||||||
return . fromString . stringOps (envFlag fs) (map prOpt os) . toString,
|
trans <- optTranslit opts
|
||||||
|
return ((fromString . trans . stringOps (envFlag fs) (map prOpt os) . toString) x),
|
||||||
options = stringOpOptions,
|
options = stringOpOptions,
|
||||||
flags = [
|
flags = [
|
||||||
("env","apply in this environment only")
|
("env","apply in this environment only"),
|
||||||
|
("from","backward-apply transliteration defined in this file (format 'unicode translit' per line)"),
|
||||||
|
("to", "forward-apply transliteration defined in this file")
|
||||||
]
|
]
|
||||||
}),
|
}),
|
||||||
("pt", emptyCommandInfo {
|
("pt", emptyCommandInfo {
|
||||||
@@ -1100,6 +1104,15 @@ allCommands env@(pgf, mos) = Map.fromList [
|
|||||||
probs <- readProbabilitiesFromFile file pgf
|
probs <- readProbabilitiesFromFile file pgf
|
||||||
return (setProbabilities probs pgf)
|
return (setProbabilities probs pgf)
|
||||||
|
|
||||||
|
optTranslit opts = case (valStrOpts "to" "" opts, valStrOpts "from" "" opts) of
|
||||||
|
("","") -> return id
|
||||||
|
(file,"") -> do
|
||||||
|
src <- readFile file
|
||||||
|
return $ transliterateWithFile file src False
|
||||||
|
(_,file) -> do
|
||||||
|
src <- readFile file
|
||||||
|
return $ transliterateWithFile file src True
|
||||||
|
|
||||||
optFile opts = valStrOpts "file" "_gftmp" opts
|
optFile opts = valStrOpts "file" "_gftmp" opts
|
||||||
|
|
||||||
optType opts =
|
optType opts =
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
module GF.Text.Transliterations (
|
module GF.Text.Transliterations (
|
||||||
transliterate,
|
transliterate,
|
||||||
|
transliterateWithFile,
|
||||||
transliteration,
|
transliteration,
|
||||||
characterTable,
|
characterTable,
|
||||||
transliterationPrintNames
|
transliterationPrintNames
|
||||||
@@ -27,6 +28,10 @@ transliterate s = case s of
|
|||||||
't':'o':'_':t -> fmap appTransToUnicode $ transliteration t
|
't':'o':'_':t -> fmap appTransToUnicode $ transliteration t
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
|
|
||||||
|
transliterateWithFile :: String -> String -> Bool -> (String -> String)
|
||||||
|
transliterateWithFile name src isFrom =
|
||||||
|
(if isFrom then appTransFromUnicode else appTransToUnicode) (getTransliterationFile name src)
|
||||||
|
|
||||||
transliteration :: String -> Maybe Transliteration
|
transliteration :: String -> Maybe Transliteration
|
||||||
transliteration s = Map.lookup s allTransliterations
|
transliteration s = Map.lookup s allTransliterations
|
||||||
|
|
||||||
@@ -82,6 +87,14 @@ mkTransliteration name ts us =
|
|||||||
tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"]
|
tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"]
|
||||||
uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"]
|
uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"]
|
||||||
|
|
||||||
|
getTransliterationFile :: String -> String -> Transliteration
|
||||||
|
getTransliterationFile name = uncurry (mkTransliteration name) . codes
|
||||||
|
where
|
||||||
|
codes = unzip . map (mkOne . words) . lines
|
||||||
|
mkOne ws = case ws of
|
||||||
|
[c]:t:_ -> (t,fromEnum c) -- ä a:
|
||||||
|
u:t:_ -> (t,read u) -- 228 a: OR 0xe4
|
||||||
|
_ -> error $ "not a valid transliteration:" ++ unwords ws
|
||||||
|
|
||||||
unchar :: String -> [String]
|
unchar :: String -> [String]
|
||||||
unchar s = case s of
|
unchar s = case s of
|
||||||
|
|||||||
Reference in New Issue
Block a user