Malt parser .conll format by vd -output=malt

This commit is contained in:
aarne
2009-10-20 13:04:33 +00:00
parent 499ac428d3
commit e82b6e02e9
2 changed files with 39 additions and 12 deletions

View File

@@ -569,7 +569,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
longname = "visualize_dependency",
synopsis = "show word dependency tree graphically",
explanation = unlines [
"Prints a dependency tree the .dot format (the graphviz format).",
"Prints a dependency tree in the .dot format (the graphviz format, default)",
"or the MaltParser/CoNLL format (flag -output=malt)",
"By default, the last argument is the head of every abstract syntax",
"function; moreover, the head depends on the head of the function above.",
"The graph can be saved in a file by the wf command as usual.",
@@ -581,21 +582,21 @@ allCommands cod env@(pgf, mos) = Map.fromList [
exec = \opts es -> do
let debug = isOpt "v" opts
let file = valStrOpts "file" "" opts
let outp = valStrOpts "output" "dot" opts
mlab <- case file of
"" -> return Nothing
_ -> readFile file >>= return . Just . getDepLabels . lines
let lang = optLang opts
let grph = if null es then [] else
dependencyTree debug mlab Nothing pgf lang (head es)
let grphs = unlines $ map (dependencyTree outp debug mlab Nothing pgf lang) es
if isFlag "view" opts || isFlag "format" opts then do
let file s = "_grph." ++ s
let file s = "_grphd." ++ s
let view = optViewGraph opts ++ " "
let format = optViewFormat opts
writeFile (file "dot") (enc grph)
writeFile (file "dot") (enc grphs)
system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++
" ; " ++ view ++ file format
return void
else return $ fromString grph,
else return $ fromString grphs,
examples = [
"gr | aw -- generate a tree and show word alignment as graph script",
"gr | vt -view=\"open\" -- generate a tree and display alignment on a Mac"
@@ -606,6 +607,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
flags = [
("file","configuration file for labels per fun, format 'fun l1 ... label ... l2'"),
("format","format of the visualization file (default \"png\")"),
("output","output format of graph source (default \"dot\")"),
("view","program to open the resulting file (default \"open\")")
]
}),

View File

@@ -60,10 +60,16 @@ prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where
-- dependency trees from Linearize.linearizeMark
dependencyTree :: Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
dependencyTree debug mlab ms pgf lang exp = prGraph True lin2dep where
dependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
dependencyTree format debug mlab ms pgf lang exp = case format of
"malt" -> unlines (lin2dep format)
_ -> prGraph True (lin2dep format)
lin2dep = trace (ifd (show sortedNodes ++ show nodeWords)) $ prelude ++ nodes ++ links
where
lin2dep format = trace (ifd (show sortedNodes ++ show nodeWords)) $ case format of
"malt" -> map (concat . intersperse "\t") wnodes
_ -> prelude ++ nodes ++ links
ifd s = if debug then s else []
@@ -78,8 +84,8 @@ dependencyTree debug mlab ms pgf lang exp = prGraph True lin2dep where
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
((Just f,p),w) <- wlins pot]
links = map mkLink
[(word y, x, label tr y x) |
links = map mkLink thelinks
thelinks = [(word y, x, label tr y x) |
(_,((f,x),_)) <- tail nodeWords,
let y = dominant x]
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
@@ -120,13 +126,32 @@ dependencyTree debug mlab ms pgf lang exp = prGraph True lin2dep where
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
_ -> showCId f ++ "#" ++ show i
-- to generate CoNLL format for MaltParser
nodeMap :: Map.Map [Int] Int
nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
arcMap :: Map.Map [Int] ([Int],String)
arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
lookDomLab p = case Map.lookup p arcMap of
Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then "_" else l)
_ -> (0,unspec)
wnodes = [[show i, unwords ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
(i, ((fun,p),ws)) <- tail nodeWords,
let pos = showCId $ lookValCat pgf fun,
let morph = unspec,
let (dom,lab) = lookDomLab p
]
unspec = "_"
type Labels = Map.Map CId [String]
getDepLabels :: [String] -> Labels
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
-- parse trees from Linearize.linearizeMark
---- nubrec and domins are quadratic, but could be (n log n)