mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-22 19:22:50 -06:00
GF shell: change parse & linearize to obtain useful results from p|l and l|p in more cases
These changes are inspired by the gf -cshell implementation of these commands.
The output of the linearize command has been changed to remove superfluous
blank lines and commas, and deliver the result as a list of strings instead of
a single multi-line string. This makes it possible to use -all and pipe the
results to the parse command. This also means that with -treebank -all,
the language tag will be repeated for each result from the same language.
The parse command, when trying to parse with more than one language, would
"forget" other results after a failed parse, and thus not send all
successful parses through the pipe. For example, if English is not the first
language in the grammar,
p "hello" | l
would output nothing, instead of translations of "hello" to all languages,
forcing the user to write
p -lang=Eng "hello" | l
instead, to get the expected result. The cause of this behaviour was in the
function fromParse, which was rather messy, so I assume it is not intentional,
but the result of a programming mistake at some point.
The fromParse function has now been refactored from a big recursive function
into
fromParse opts = foldr (joinPiped . fromParse1 opts) void
where the helper functions fromParse1 deals with a single parse result and
joinPiped combines multiple parse results.
This commit is contained in:
@@ -365,7 +365,7 @@ pgfCommands = Map.fromList [
|
|||||||
"will accept unknown adjectives, nouns and verbs with the resource grammar."
|
"will accept unknown adjectives, nouns and verbs with the resource grammar."
|
||||||
],
|
],
|
||||||
exec = getEnv $ \ opts ts (Env pgf mos) ->
|
exec = getEnv $ \ opts ts (Env pgf mos) ->
|
||||||
return . Piped $ fromParse opts (concat [map ((,) s) (par pgf opts s) | s <- toStrings ts]),
|
return $ fromParse opts (concat [map ((,) s) (par pgf opts s) | s <- toStrings ts]),
|
||||||
flags = [
|
flags = [
|
||||||
("cat","target category of parsing"),
|
("cat","target category of parsing"),
|
||||||
("lang","the languages of parsing (comma-separated, no spaces)"),
|
("lang","the languages of parsing (comma-separated, no spaces)"),
|
||||||
@@ -742,34 +742,53 @@ pgfCommands = Map.fromList [
|
|||||||
where
|
where
|
||||||
dp = valIntOpts "depth" 4 opts
|
dp = valIntOpts "depth" 4 opts
|
||||||
|
|
||||||
|
fromParse opts = foldr (joinPiped . fromParse1 opts) void
|
||||||
|
|
||||||
|
joinPiped (Piped (es1,ms1)) (Piped (es2,ms2)) = Piped (es1++es2,ms1+++-ms2)
|
||||||
|
|
||||||
|
fromParse1 opts (s,(po,bs))
|
||||||
|
| isOpt "bracket" opts = pipeMessage (showBracketedString bs)
|
||||||
|
| otherwise =
|
||||||
|
case po of
|
||||||
|
ParseOk ts -> fromExprs ts
|
||||||
|
ParseFailed i -> pipeMessage $ "The parser failed at token "
|
||||||
|
++ show i ++": "
|
||||||
|
++ show (words s !! max 0 (i-1))
|
||||||
|
-- ++ " in " ++ show s
|
||||||
|
ParseIncomplete -> pipeMessage "The sentence is not complete"
|
||||||
|
TypeError errs ->
|
||||||
|
pipeMessage . render $
|
||||||
|
"The parsing is successful but the type checking failed with error(s):"
|
||||||
|
$$ nest 2 (vcat (map (ppTcError . snd) errs))
|
||||||
|
|
||||||
optLins pgf opts ts = case opts of
|
optLins pgf opts ts = case opts of
|
||||||
_ | isOpt "groups" opts ->
|
_ | isOpt "groups" opts ->
|
||||||
map (unlines . snd) $ groupResults
|
concatMap snd $ groupResults
|
||||||
[[(lang, linear pgf opts lang t) | lang <- optLangs pgf opts] | t <- ts]
|
[[(lang, s) | lang <- optLangs pgf opts,s <- linear pgf opts lang t] | t <- ts]
|
||||||
_ -> map (optLin pgf opts) ts
|
_ -> concatMap (optLin pgf opts) ts
|
||||||
optLin pgf opts t = unlines $
|
optLin pgf opts t =
|
||||||
case opts of
|
case opts of
|
||||||
_ | isOpt "treebank" opts && isOpt "chunks" opts ->
|
_ | isOpt "treebank" opts && isOpt "chunks" opts ->
|
||||||
(showCId (abstractName pgf) ++ ": " ++ showExpr [] t) :
|
(showCId (abstractName pgf) ++ ": " ++ showExpr [] t) :
|
||||||
[showCId lang ++ ": " ++ li | (lang,li) <- linChunks pgf opts t] --linear pgf opts lang t | lang <- optLangs pgf opts]
|
[showCId lang ++ ": " ++ li | (lang,li) <- linChunks pgf opts t] --linear pgf opts lang t | lang <- optLangs pgf opts]
|
||||||
_ | isOpt "treebank" opts ->
|
_ | isOpt "treebank" opts ->
|
||||||
(showCId (abstractName pgf) ++ ": " ++ showExpr [] t) :
|
(showCId (abstractName pgf) ++ ": " ++ showExpr [] t) :
|
||||||
[showCId lang ++ ": " ++ linear pgf opts lang t | lang <- optLangs pgf opts]
|
[showCId lang ++ ": " ++ s | lang <- optLangs pgf opts, s<-linear pgf opts lang t]
|
||||||
_ | isOpt "chunks" opts -> map snd $ linChunks pgf opts t
|
_ | isOpt "chunks" opts -> map snd $ linChunks pgf opts t
|
||||||
_ -> [linear pgf opts lang t | lang <- optLangs pgf opts]
|
_ -> [s | lang <- optLangs pgf opts, s<-linear pgf opts lang t]
|
||||||
linChunks pgf opts t =
|
linChunks pgf opts t =
|
||||||
[(lang, unwords (intersperse "<+>" (map (linear pgf opts lang) (treeChunks t)))) | lang <- optLangs pgf opts]
|
[(lang, unwords (intersperse "<+>" (map (unlines . linear pgf opts lang) (treeChunks t)))) | lang <- optLangs pgf opts]
|
||||||
|
|
||||||
linear :: PGF -> [Option] -> CId -> Expr -> String
|
linear :: PGF -> [Option] -> CId -> Expr -> [String]
|
||||||
linear pgf opts lang = let unl = unlex opts lang in case opts of
|
linear pgf opts lang = let unl = unlex opts lang in case opts of
|
||||||
_ | isOpt "all" opts -> unlines . concat . intersperse [[]] .
|
_ | isOpt "all" opts -> concat . -- intersperse [[]] .
|
||||||
map (map (unl . snd)) . tabularLinearizes pgf lang
|
map (map (unl . snd)) . tabularLinearizes pgf lang
|
||||||
_ | isOpt "list" opts -> commaList . concat . intersperse [[]] .
|
_ | isOpt "list" opts -> (:[]) . commaList . concat .
|
||||||
map (map (unl . snd)) . tabularLinearizes pgf lang
|
map (map (unl . snd)) . tabularLinearizes pgf lang
|
||||||
_ | isOpt "table" opts -> unlines . concat . intersperse [[]] .
|
_ | isOpt "table" opts -> concat . -- intersperse [[]] .
|
||||||
map (map (\(p,v) -> p+++":"+++unl v)) . tabularLinearizes pgf lang
|
map (map (\(p,v) -> p+++":"+++unl v)) . tabularLinearizes pgf lang
|
||||||
_ | isOpt "bracket" opts -> unwords . map showBracketedString . bracketedLinearize pgf lang
|
_ | isOpt "bracket" opts -> (:[]) . unwords . map showBracketedString . bracketedLinearize pgf lang
|
||||||
_ -> unl . linearize pgf lang
|
_ -> (:[]) . unl . linearize pgf lang
|
||||||
|
|
||||||
-- replace each non-atomic constructor with mkC, where C is the val cat
|
-- replace each non-atomic constructor with mkC, where C is the val cat
|
||||||
tree2mk pgf = showExpr [] . t2m where
|
tree2mk pgf = showExpr [] . t2m where
|
||||||
@@ -844,22 +863,6 @@ pgfCommands = Map.fromList [
|
|||||||
optNumInf opts = valIntOpts "number" 1000000000 opts ---- 10^9
|
optNumInf opts = valIntOpts "number" 1000000000 opts ---- 10^9
|
||||||
takeOptNum opts = take (optNumInf opts)
|
takeOptNum opts = take (optNumInf opts)
|
||||||
|
|
||||||
fromParse opts [] = ([],[])
|
|
||||||
fromParse opts ((s,(po,bs)):ps)
|
|
||||||
| isOpt "bracket" opts = (es, showBracketedString bs
|
|
||||||
++ "\n" ++ msg)
|
|
||||||
| otherwise = case po of
|
|
||||||
ParseOk ts -> let Piped (es',msg') = fromExprs ts
|
|
||||||
in (es'++es,msg'++msg)
|
|
||||||
TypeError errs -> ([], render ("The parsing is successful but the type checking failed with error(s):" $$
|
|
||||||
nest 2 (vcat (map (ppTcError . snd) errs)))
|
|
||||||
++ "\n" ++ msg)
|
|
||||||
ParseFailed i -> ([], "The parser failed at token " ++ show (words s !! max 0 (i-1))
|
|
||||||
++ "\n" ++ msg)
|
|
||||||
ParseIncomplete-> ([], "The sentence is not complete")
|
|
||||||
where
|
|
||||||
(es,msg) = fromParse opts ps
|
|
||||||
|
|
||||||
returnFromExprs es = return $ case es of
|
returnFromExprs es = return $ case es of
|
||||||
[] -> pipeMessage "no trees found"
|
[] -> pipeMessage "no trees found"
|
||||||
_ -> fromExprs es
|
_ -> fromExprs es
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ module GF.Data.Operations (
|
|||||||
tree2list,
|
tree2list,
|
||||||
|
|
||||||
-- ** Printing
|
-- ** Printing
|
||||||
indent, (+++), (++-), (++++), (+++++),
|
indent, (+++), (++-), (++++), (+++-), (+++++),
|
||||||
prUpper, prReplicate, prTList, prQuotedString, prParenth, prCurly,
|
prUpper, prReplicate, prTList, prQuotedString, prParenth, prCurly,
|
||||||
prBracket, prArgList, prSemicList, prCurlyList, restoreEscapes,
|
prBracket, prArgList, prSemicList, prCurlyList, restoreEscapes,
|
||||||
numberedParagraphs, prConjList, prIfEmpty, wrapLines,
|
numberedParagraphs, prConjList, prIfEmpty, wrapLines,
|
||||||
@@ -160,13 +160,20 @@ tree2list = Map.toList
|
|||||||
indent :: Int -> String -> String
|
indent :: Int -> String -> String
|
||||||
indent i s = replicate i ' ' ++ s
|
indent i s = replicate i ' ' ++ s
|
||||||
|
|
||||||
(+++), (++-), (++++), (+++++) :: String -> String -> String
|
(+++), (++-), (++++), (+++-), (+++++) :: String -> String -> String
|
||||||
a +++ b = a ++ " " ++ b
|
a +++ b = a ++ " " ++ b
|
||||||
|
|
||||||
a ++- "" = a
|
a ++- "" = a
|
||||||
a ++- b = a +++ b
|
a ++- b = a +++ b
|
||||||
|
|
||||||
a ++++ b = a ++ "\n" ++ b
|
a ++++ b = a ++ "\n" ++ b
|
||||||
|
|
||||||
|
a +++- "" = a
|
||||||
|
a +++- b = a ++++ b
|
||||||
|
|
||||||
a +++++ b = a ++ "\n\n" ++ b
|
a +++++ b = a ++ "\n\n" ++ b
|
||||||
|
|
||||||
|
|
||||||
prUpper :: String -> String
|
prUpper :: String -> String
|
||||||
prUpper s = s1 ++ s2' where
|
prUpper s = s1 ++ s2' where
|
||||||
(s1,s2) = span isSpace s
|
(s1,s2) = span isSpace s
|
||||||
|
|||||||
Reference in New Issue
Block a user