threaded string operations to strings if given as options to linearize

This commit is contained in:
aarne
2008-06-22 19:02:09 +00:00
parent f283528094
commit fc69b962bc
2 changed files with 41 additions and 36 deletions

View File

@@ -48,10 +48,10 @@ data CommandInfo = CommandInfo {
emptyCommandInfo :: CommandInfo emptyCommandInfo :: CommandInfo
emptyCommandInfo = CommandInfo { emptyCommandInfo = CommandInfo {
exec = \_ ts -> return (ts,[]), ---- exec = \_ ts -> return (ts,[]), ----
synopsis = "synopsis", synopsis = "",
syntax = "syntax", syntax = "",
explanation = "explanation", explanation = "",
longname = "longname", longname = "",
options = [], options = [],
flags = [], flags = [],
examples = [] examples = []
@@ -224,10 +224,13 @@ allCommands pgf = Map.fromList [
explanation = unlines [ explanation = unlines [
"Shows the linearization of a Tree by the grammars in scope.", "Shows the linearization of a Tree by the grammars in scope.",
"The -lang flag can be used to restrict this to fewer languages.", "The -lang flag can be used to restrict this to fewer languages.",
"See also the ps command for unlexing and character encoding." "A sequence of string operations (see command ps) can be given",
"as options, and works then like a pipe to the ps command, except",
"that it only affect the strings, not e.g. the table labels."
], ],
examples = [ examples = [
"l -langs=LangSwe,LangNor no_Utt -- linearize to LangSwe and LangNor" "l -langs=LangSwe,LangNor no_Utt -- linearize tree to LangSwe and LangNor",
"gr -lang=LangHin -cat=Cl | l -table -to_devanagari -to_utf8 -- hindi table"
], ],
exec = \opts -> return . fromStrings . map (optLin opts), exec = \opts -> return . fromStrings . map (optLin opts),
options = [ options = [
@@ -236,7 +239,7 @@ allCommands pgf = Map.fromList [
("table","show all forms labelled by parameters"), ("table","show all forms labelled by parameters"),
("term", "show PGF term"), ("term", "show PGF term"),
("treebank","show the tree and tag linearizations with language names") ("treebank","show the tree and tag linearizations with language names")
], ] ++ stringOpOptions,
flags = [ flags = [
("lang","the languages of linearization (comma-separated, no spaces)") ("lang","the languages of linearization (comma-separated, no spaces)")
] ]
@@ -341,25 +344,7 @@ allCommands pgf = Map.fromList [
"ps -to_devanagari -to_utf8 \"A-p\" -- show Devanagari in UTF8 terminal" "ps -to_devanagari -to_utf8 \"A-p\" -- show Devanagari in UTF8 terminal"
], ],
exec = \opts -> return . fromString . stringOps opts . toString, exec = \opts -> return . fromString . stringOps opts . toString,
options = [ options = stringOpOptions
("bind","bind tokens separated by Prelude.BIND, i.e. &+"),
("chars","lexer that makes every non-space character a token"),
("from_devanagari","from unicode to GF Devanagari transliteration"),
("from_thai","from unicode to GF Thai transliteration"),
("from_utf8","decode from utf8"),
("lextext","text-like lexer"),
("lexcode","code-like lexer"),
("lexmixed","mixture of text and code (code between $...$)"),
("to_devanagari","from GF Devanagari transliteration to unicode"),
("to_thai","from GF Thai transliteration to unicode"),
("to_utf8","encode to utf8"),
("unlextext","text-like unlexer"),
("unlexcode","code-like unlexer"),
("unlexmixed","mixture of text and code (code between $...$)"),
("unchars","unlexer that puts no spaces between tokens"),
("unwords","unlexer that puts a single space between tokens (default)"),
("words","lexer that assumes tokens separated by spaces (default)")
]
}), }),
("q", emptyCommandInfo { ("q", emptyCommandInfo {
longname = "quit", longname = "quit",
@@ -501,12 +486,12 @@ allCommands pgf = Map.fromList [
_ | isOpt "treebank" opts -> treebank opts t _ | isOpt "treebank" opts -> treebank opts t
_ -> unlines [linear opts lang t | lang <- optLangs opts] _ -> unlines [linear opts lang t | lang <- optLangs opts]
linear opts lang = unlex opts lang . case opts of linear opts lang = let unl = unlex opts lang in case opts of
_ | isOpt "all" opts -> allLinearize pgf (mkCId lang) _ | isOpt "all" opts -> allLinearize unl pgf (mkCId lang)
_ | isOpt "table" opts -> tableLinearize pgf (mkCId lang) _ | isOpt "table" opts -> tableLinearize unl pgf (mkCId lang)
_ | isOpt "term" opts -> termLinearize pgf (mkCId lang) _ | isOpt "term" opts -> termLinearize pgf (mkCId lang)
_ | isOpt "record" opts -> recordLinearize pgf (mkCId lang) _ | isOpt "record" opts -> recordLinearize pgf (mkCId lang)
_ -> linearize pgf lang _ -> unl . linearize pgf lang
treebank opts t = unlines $ treebank opts t = unlines $
(abstractName pgf ++ ": " ++ showTree t) : (abstractName pgf ++ ": " ++ showTree t) :
@@ -550,6 +535,26 @@ allCommands pgf = Map.fromList [
stringOps opts s = foldr app s (reverse (map prOpt opts)) where stringOps opts s = foldr app s (reverse (map prOpt opts)) where
app f = maybe id id (stringOp f) app f = maybe id id (stringOp f)
stringOpOptions = [
("bind","bind tokens separated by Prelude.BIND, i.e. &+"),
("chars","lexer that makes every non-space character a token"),
("from_devanagari","from unicode to GF Devanagari transliteration"),
("from_thai","from unicode to GF Thai transliteration"),
("from_utf8","decode from utf8"),
("lextext","text-like lexer"),
("lexcode","code-like lexer"),
("lexmixed","mixture of text and code (code between $...$)"),
("to_devanagari","from GF Devanagari transliteration to unicode"),
("to_thai","from GF Thai transliteration to unicode"),
("to_utf8","encode to utf8"),
("unlextext","text-like unlexer"),
("unlexcode","code-like unlexer"),
("unlexmixed","mixture of text and code (code between $...$)"),
("unchars","unlexer that puts no spaces between tokens"),
("unwords","unlexer that puts a single space between tokens (default)"),
("words","lexer that assumes tokens separated by spaces (default)")
]
translationQuiz :: PGF -> Language -> Language -> Category -> IO () translationQuiz :: PGF -> Language -> Language -> Category -> IO ()
translationQuiz pgf ig og cat = do translationQuiz pgf ig og cat = do
tts <- translationList pgf ig og cat infinity tts <- translationList pgf ig og cat infinity

View File

@@ -54,20 +54,20 @@ mkRecord typ trm = case (typ,trm) of
str = realize str = realize
-- show all branches, without labels and params -- show all branches, without labels and params
allLinearize :: PGF -> CId -> Tree -> String allLinearize :: (String -> String) -> PGF -> CId -> Tree -> String
allLinearize pgf lang = concat . map pr . tabularLinearize pgf lang where allLinearize unlex pgf lang = concat . map (unlex . pr) . tabularLinearize pgf lang where
pr (p,vs) = unlines vs pr (p,vs) = unlines vs
-- show all branches, with labels and params -- show all branches, with labels and params
tableLinearize :: PGF -> CId -> Tree -> String tableLinearize :: (String -> String) -> PGF -> CId -> Tree -> String
tableLinearize pgf lang = unlines . map pr . tabularLinearize pgf lang where tableLinearize unlex pgf lang = unlines . map pr . tabularLinearize pgf lang where
pr (p,vs) = p +++ ":" +++ unwords (intersperse "|" vs) pr (p,vs) = p +++ ":" +++ unwords (intersperse "|" (map unlex vs))
-- create a table from labels+params to variants -- create a table from labels+params to variants
tabularLinearize :: PGF -> CId -> Tree -> [(String,[String])] tabularLinearize :: PGF -> CId -> Tree -> [(String,[String])]
tabularLinearize pgf lang = branches . recLinearize pgf lang where tabularLinearize pgf lang = branches . recLinearize pgf lang where
branches r = case r of branches r = case r of
RR fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t] RR fs -> [( b,s) | (lab,t) <- fs, (b,s) <- branches t]
RT fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t] RT fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t]
RFV rs -> [([], ss) | (_,ss) <- concatMap branches rs] RFV rs -> [([], ss) | (_,ss) <- concatMap branches rs]
RS s -> [([], [s])] RS s -> [([], [s])]