mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 19:42:50 -06:00
Malt parser .conll format by vd -output=malt
This commit is contained in:
@@ -569,7 +569,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
|||||||
longname = "visualize_dependency",
|
longname = "visualize_dependency",
|
||||||
synopsis = "show word dependency tree graphically",
|
synopsis = "show word dependency tree graphically",
|
||||||
explanation = unlines [
|
explanation = unlines [
|
||||||
"Prints a dependency tree the .dot format (the graphviz format).",
|
"Prints a dependency tree in the .dot format (the graphviz format, default)",
|
||||||
|
"or the MaltParser/CoNLL format (flag -output=malt)",
|
||||||
"By default, the last argument is the head of every abstract syntax",
|
"By default, the last argument is the head of every abstract syntax",
|
||||||
"function; moreover, the head depends on the head of the function above.",
|
"function; moreover, the head depends on the head of the function above.",
|
||||||
"The graph can be saved in a file by the wf command as usual.",
|
"The graph can be saved in a file by the wf command as usual.",
|
||||||
@@ -581,21 +582,21 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
|||||||
exec = \opts es -> do
|
exec = \opts es -> do
|
||||||
let debug = isOpt "v" opts
|
let debug = isOpt "v" opts
|
||||||
let file = valStrOpts "file" "" opts
|
let file = valStrOpts "file" "" opts
|
||||||
|
let outp = valStrOpts "output" "dot" opts
|
||||||
mlab <- case file of
|
mlab <- case file of
|
||||||
"" -> return Nothing
|
"" -> return Nothing
|
||||||
_ -> readFile file >>= return . Just . getDepLabels . lines
|
_ -> readFile file >>= return . Just . getDepLabels . lines
|
||||||
let lang = optLang opts
|
let lang = optLang opts
|
||||||
let grph = if null es then [] else
|
let grphs = unlines $ map (dependencyTree outp debug mlab Nothing pgf lang) es
|
||||||
dependencyTree debug mlab Nothing pgf lang (head es)
|
|
||||||
if isFlag "view" opts || isFlag "format" opts then do
|
if isFlag "view" opts || isFlag "format" opts then do
|
||||||
let file s = "_grph." ++ s
|
let file s = "_grphd." ++ s
|
||||||
let view = optViewGraph opts ++ " "
|
let view = optViewGraph opts ++ " "
|
||||||
let format = optViewFormat opts
|
let format = optViewFormat opts
|
||||||
writeFile (file "dot") (enc grph)
|
writeFile (file "dot") (enc grphs)
|
||||||
system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++
|
system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++
|
||||||
" ; " ++ view ++ file format
|
" ; " ++ view ++ file format
|
||||||
return void
|
return void
|
||||||
else return $ fromString grph,
|
else return $ fromString grphs,
|
||||||
examples = [
|
examples = [
|
||||||
"gr | aw -- generate a tree and show word alignment as graph script",
|
"gr | aw -- generate a tree and show word alignment as graph script",
|
||||||
"gr | vt -view=\"open\" -- generate a tree and display alignment on a Mac"
|
"gr | vt -view=\"open\" -- generate a tree and display alignment on a Mac"
|
||||||
@@ -606,6 +607,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
|||||||
flags = [
|
flags = [
|
||||||
("file","configuration file for labels per fun, format 'fun l1 ... label ... l2'"),
|
("file","configuration file for labels per fun, format 'fun l1 ... label ... l2'"),
|
||||||
("format","format of the visualization file (default \"png\")"),
|
("format","format of the visualization file (default \"png\")"),
|
||||||
|
("output","output format of graph source (default \"dot\")"),
|
||||||
("view","program to open the resulting file (default \"open\")")
|
("view","program to open the resulting file (default \"open\")")
|
||||||
]
|
]
|
||||||
}),
|
}),
|
||||||
|
|||||||
@@ -60,10 +60,16 @@ prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where
|
|||||||
|
|
||||||
-- dependency trees from Linearize.linearizeMark
|
-- dependency trees from Linearize.linearizeMark
|
||||||
|
|
||||||
dependencyTree :: Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
|
dependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
|
||||||
dependencyTree debug mlab ms pgf lang exp = prGraph True lin2dep where
|
dependencyTree format debug mlab ms pgf lang exp = case format of
|
||||||
|
"malt" -> unlines (lin2dep format)
|
||||||
|
_ -> prGraph True (lin2dep format)
|
||||||
|
|
||||||
lin2dep = trace (ifd (show sortedNodes ++ show nodeWords)) $ prelude ++ nodes ++ links
|
where
|
||||||
|
|
||||||
|
lin2dep format = trace (ifd (show sortedNodes ++ show nodeWords)) $ case format of
|
||||||
|
"malt" -> map (concat . intersperse "\t") wnodes
|
||||||
|
_ -> prelude ++ nodes ++ links
|
||||||
|
|
||||||
ifd s = if debug then s else []
|
ifd s = if debug then s else []
|
||||||
|
|
||||||
@@ -78,8 +84,8 @@ dependencyTree debug mlab ms pgf lang exp = prGraph True lin2dep where
|
|||||||
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
|
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
|
||||||
((Just f,p),w) <- wlins pot]
|
((Just f,p),w) <- wlins pot]
|
||||||
|
|
||||||
links = map mkLink
|
links = map mkLink thelinks
|
||||||
[(word y, x, label tr y x) |
|
thelinks = [(word y, x, label tr y x) |
|
||||||
(_,((f,x),_)) <- tail nodeWords,
|
(_,((f,x),_)) <- tail nodeWords,
|
||||||
let y = dominant x]
|
let y = dominant x]
|
||||||
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
|
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
|
||||||
@@ -120,13 +126,32 @@ dependencyTree debug mlab ms pgf lang exp = prGraph True lin2dep where
|
|||||||
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
|
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
|
||||||
_ -> showCId f ++ "#" ++ show i
|
_ -> showCId f ++ "#" ++ show i
|
||||||
|
|
||||||
|
-- to generate CoNLL format for MaltParser
|
||||||
|
nodeMap :: Map.Map [Int] Int
|
||||||
|
nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
|
||||||
|
|
||||||
|
arcMap :: Map.Map [Int] ([Int],String)
|
||||||
|
arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
|
||||||
|
|
||||||
|
lookDomLab p = case Map.lookup p arcMap of
|
||||||
|
Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then "_" else l)
|
||||||
|
_ -> (0,unspec)
|
||||||
|
|
||||||
|
wnodes = [[show i, unwords ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
|
||||||
|
(i, ((fun,p),ws)) <- tail nodeWords,
|
||||||
|
let pos = showCId $ lookValCat pgf fun,
|
||||||
|
let morph = unspec,
|
||||||
|
let (dom,lab) = lookDomLab p
|
||||||
|
]
|
||||||
|
|
||||||
|
unspec = "_"
|
||||||
|
|
||||||
type Labels = Map.Map CId [String]
|
type Labels = Map.Map CId [String]
|
||||||
|
|
||||||
getDepLabels :: [String] -> Labels
|
getDepLabels :: [String] -> Labels
|
||||||
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
|
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-- parse trees from Linearize.linearizeMark
|
-- parse trees from Linearize.linearizeMark
|
||||||
---- nubrec and domins are quadratic, but could be (n log n)
|
---- nubrec and domins are quadratic, but could be (n log n)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user