---------------------------------------------------------------------- -- | -- Module : VisualizeTree -- Maintainer : AR -- Stability : (stable) -- Portability : (portable) -- -- > CVS $Date: -- > CVS $Author: -- > CVS $Revision: -- -- Print a graph of an abstract syntax tree in Graphviz DOT format -- Based on BB's VisualizeGrammar -- FIXME: change this to use GF.Visualization.Graphviz, -- instead of rolling its own. ----------------------------------------------------------------------------- module PGF.VisualizeTree ( GraphvizOptions(..) , graphvizDefaults , graphvizAbstractTree , graphvizParseTree , graphvizParseTreeOld , graphvizDependencyTree , graphvizBracketedString , graphvizAlignment , gizaAlignment , getDepLabels ) where import PGF.CId (CId,wildCId,showCId,ppCId,pCId,mkCId) import PGF.Data import PGF.Expr (showExpr, Tree) import PGF.Linearize import PGF.Macros (lookValCat, lookMap, BracketedString(..), BracketedTokn(..), flattenBracketedString) import qualified Data.Map as Map import qualified Data.IntMap as IntMap import Data.List (intersperse,nub,mapAccumL) import Data.Char (isDigit) import Data.Maybe (fromMaybe) import Text.PrettyPrint import Data.Array.IArray import Control.Monad import qualified Data.Set as Set import qualified Text.ParserCombinators.ReadP as RP data GraphvizOptions = GraphvizOptions {noLeaves :: Bool, noFun :: Bool, noCat :: Bool, nodeFont :: String, leafFont :: String, nodeColor :: String, leafColor :: String, nodeEdgeStyle :: String, leafEdgeStyle :: String } graphvizDefaults = GraphvizOptions False False False "" "" "" "" "" "" -- | Renders abstract syntax tree in Graphviz format graphvizAbstractTree :: PGF -> (Bool,Bool) -> Tree -> String graphvizAbstractTree pgf (funs,cats) = render . tree2graph where tree2graph t = text "graph {" $$ ppGraph [] [] 0 t $$ text "}" getAbs xs (EAbs _ x e) = getAbs (x:xs) e getAbs xs (ETyped e _) = getAbs xs e getAbs xs e = (xs,e) getApp (EApp x (EImplArg y)) es = getApp x es getApp (EApp x y) es = getApp x (y:es) getApp (ETyped e _) es = getApp e es getApp e es = (e,es) getLbl scope (EFun f) = let fun = if funs then ppCId f else empty cat = if cats then ppCId (lookValCat (abstract pgf) f) else empty sep = if funs && cats then colon else empty in fun <+> sep <+> cat getLbl scope (ELit l) = text (escapeStr (render (ppLit l))) getLbl scope (EMeta i) = ppMeta i getLbl scope (EVar i) = ppCId (scope !! i) getLbl scope (ETyped e _) = getLbl scope e getLbl scope (EImplArg e) = getLbl scope e ppGraph scope ps i e0 = let (xs, e1) = getAbs [] e0 (e2,args) = getApp e1 [] binds = if null xs then empty else text "\\\\" <> hcat (punctuate comma (map ppCId xs)) <+> text "->" (lbl,eargs) = case e2 of EAbs _ _ _ -> (char '@', e2:args) -- eta-redexes are rendered with artificial "@" node _ -> (getLbl scope' e2, args) scope' = xs ++ scope in ppNode (i:ps) <> text "[label =" <+> doubleQuotes (binds <+> lbl) <> text ", style = \"solid\", shape = \"plaintext\"] ;" $$ (if null ps then empty else ppNode ps <+> text "--" <+> ppNode (i:ps) <+> text "[style = \"solid\"];") $$ vcat (zipWith (ppGraph scope' (i:ps)) [0..] eargs) ppNode ps = char 'n' <> hcat (punctuate (char '_') (map int ps)) escapeStr [] = [] escapeStr ('\\':cs) = '\\':'\\':escapeStr cs escapeStr ('"' :cs) = '\\':'"' :escapeStr cs escapeStr (c :cs) = c :escapeStr cs type Labels = Map.Map CId [String] graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String graphvizDependencyTree format debug mlab ms pgf lang t = render $ case format of "conll" -> vcat (map (hcat . intersperse (char '\t') ) wnodes) "malt_input" -> vcat (map (hcat . intersperse (char '\t') . take 6) wnodes) _ -> text "digraph {" $$ space $$ nest 2 (text "rankdir=RL ;" $$ text "node [shape = plaintext] ;" $$ vcat nodes $$ vcat links) $$ text "}" where nodes = map mkNode leaves links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves] wnodes = [[int i, maltws ws, ppCId fun, ppCId cat, ppCId cat, unspec, int parent, text lab, unspec, unspec] | ((cat,fid,fun),i,ws) <- tail leaves, let (lab,parent) = maybe (dep_lbl,0) (\(lbl,fid) -> (lbl,head [i | ((_,fid1,_),i,_) <- leaves, fid == fid1])) (lookup fid deps) ] maltws = text . concat . intersperse "+" . words -- no spaces in column 2 nil = -1 bs = bracketedLinearize pgf lang t root = (wildCId,nil,wildCId) leaves = (root,0,root_lbl) : (groupAndIndexIt 1 . getLeaves root) bs deps = let (_,(h,deps)) = getDeps 0 [] t [] in (h,(dep_lbl,nil)):deps groupAndIndexIt id [] = [] groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1 where collect pws@((p1,w):pws1) | p == p1 = let (ws,pws2) = collect pws1 in (w:ws,pws2) collect pws = ([],pws) getLeaves parent bs = case bs of Leaf w -> [(parent,w)] Bracket cat fid _ fun _ bss -> concatMap (getLeaves (cat,fid,fun)) bss mkNode ((_,p,_),i,w) = tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi mkLink (x,(lbl,y)) = tag x <+> text "->" <+> tag y <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;" labels = maybe Map.empty id mlab getDeps n_fid xs (EAbs _ x e) es = getDeps n_fid (x:xs) e es getDeps n_fid xs (EApp e1 e2) es = getDeps n_fid xs e1 (e2:es) getDeps n_fid xs (EImplArg e) es = getDeps n_fid xs e es getDeps n_fid xs (ETyped e _) es = getDeps n_fid xs e es getDeps n_fid xs (EFun f) es = let (n_fid_1,ds) = descend n_fid xs es (mb_h, deps) = selectHead f ds in case mb_h of Just (fid,deps0) -> (n_fid_1+1,(fid,deps0++ [(n_fid_1,(dep_lbl,fid))]++ concat [(m,(lbl,fid)):ds | (lbl,(m,ds)) <- deps])) Nothing -> (n_fid_1+1,(n_fid_1,concat [(m,(lbl,n_fid_1)):ds | (lbl,(m,ds)) <- deps])) getDeps n_fid xs (EMeta i) es = (n_fid+2,(n_fid,[])) getDeps n_fid xs (EVar i) _ = (n_fid+2,(n_fid,[])) getDeps n_fid xs (ELit l) [] = (n_fid+1,(n_fid,[])) descend n_fid xs es = mapAccumL (\n_fid e -> getDeps n_fid xs e []) n_fid es selectHead f ds = case Map.lookup f labels of Just lbls -> extractHead (zip lbls ds) Nothing -> extractLast ds where extractHead [] = (Nothing, []) extractHead (ld@(l,d):lds) | l == head_lbl = (Just d,lds) | otherwise = let (mb_h,deps) = extractHead lds in (mb_h,ld:deps) extractLast [] = (Nothing, []) extractLast (d:ds) | null ds = (Just d,[]) | otherwise = let (mb_h,deps) = extractLast ds in (mb_h,(dep_lbl,d):deps) dep_lbl = "dep" head_lbl = "head" root_lbl = "ROOT" unspec = text "_" getDepLabels :: [String] -> Labels getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss] graphvizParseTree :: PGF -> Language -> GraphvizOptions -> Tree -> String graphvizParseTree pgf lang opts = graphvizBracketedString opts . bracketedLinearize pgf lang graphvizBracketedString :: GraphvizOptions -> BracketedString -> String graphvizBracketedString opts bs = render graphviz_code where graphviz_code = text "graph {" $$ text node_style $$ vcat internal_nodes $$ (if noLeaves opts then empty else text leaf_style $$ leaf_nodes ) $$ text "}" leaf_style = mkOption "edge" "style" (leafEdgeStyle opts) ++ mkOption "edge" "color" (leafColor opts) ++ mkOption "node" "fontcolor" (leafColor opts) ++ mkOption "node" "fontname" (leafFont opts) ++ mkOption "node" "shape" "plaintext" node_style = mkOption "edge" "style" (nodeEdgeStyle opts) ++ mkOption "edge" "color" (nodeColor opts) ++ mkOption "node" "fontcolor" (nodeColor opts) ++ mkOption "node" "fontname" (nodeFont opts) ++ mkOption "node" "shape" nodeshape where nodeshape | noFun opts && noCat opts = "point" | otherwise = "plaintext" mkOption object optname optvalue | null optvalue = "" | otherwise = object ++ "[" ++ optname ++ "=\"" ++ optvalue ++ "\"]; " mkNode fun cat | noFun opts = showCId cat | noCat opts = showCId fun | otherwise = showCId fun ++ " : " ++ showCId cat nil = -1 internal_nodes = [mkLevel internals | internals <- getInternals [(nil, bs)], not (null internals)] leaf_nodes = mkLevel [(parent, id, word) | (id, (parent, word)) <- zip [100000..] (getLeaves nil bs)] getInternals [] = [] getInternals nodes = nub [(parent, fid, mkNode fun cat) | (parent, Bracket cat fid _ fun _ _) <- nodes] : getInternals [(fid, child) | (_, Bracket _ fid _ _ _ children) <- nodes, child <- children] getLeaves parent (Leaf word) = [(parent, word)] getLeaves parent (Bracket _ fid i _ _ children) = concatMap (getLeaves fid) children mkLevel nodes = text "subgraph {rank=same;" $$ nest 2 (-- the following gives the name of the node and its label: vcat [tag id <> text (mkOption "" "label" lbl) | (_, id, lbl) <- nodes] $$ -- the following is for fixing the order between the children: (if length nodes > 1 then text (mkOption "edge" "style" "invis") $$ hsep (intersperse (text " -- ") [tag id | (_, id, _) <- nodes]) <+> semi else empty) ) $$ text "}" $$ -- the following is for the edges between parent and children: vcat [tag pid <> text " -- " <> tag id <> semi | (pid, id, _) <- nodes, pid /= nil] $$ space graphvizParseTreeOld :: PGF -> Language -> Tree -> String graphvizParseTreeOld pgf lang = graphvizBracketedStringOld . bracketedLinearize pgf lang graphvizBracketedStringOld :: BracketedString -> String graphvizBracketedStringOld = render . lin2tree where lin2tree bs = text "graph {" $$ space $$ nest 2 (text "rankdir=BU ;" $$ text "node [shape = record, color = white] ;" $$ space $$ vcat (nodes bs)) $$ text "}" where nodes bs = zipWith mkStruct [0..] (interns ++ [zipWith (\i (l,p,w) -> (l,p,i,w)) [99990..] leaves]) nil = -1 leaves = getLeaves 0 nil bs interns = getInterns 0 [(nil,bs)] getLeaves level parent bs = case bs of Leaf w -> [(level-1,parent,w)] Bracket _ fid i _ _ bss -> concatMap (getLeaves (level+1) fid) bss getInterns level [] = [] getInterns level nodes = nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _ _ _) <- nodes] : getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ _ _ children) <- nodes, child <- children] mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$ vcat [link pl pid l id | (pl,pid,id,_) <- cs] link pl pid l id | pl < 0 = empty | otherwise = struct pl <> colon <> tag pid <> colon <> char 's' <+> text "--" <+> struct l <> colon <> tag id <> colon <> char 'n' <+> semi fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text c | (_,_,id,c) <- cs]) type Rel = (Int,[Int]) -- possibly needs changes after clearing about many-to-many on this level type IndexedSeq = (Int,[String]) type LangSeq = [IndexedSeq] data PreAlign = PreAlign [LangSeq] [[Rel]] deriving Show -- alignment structure for a phrase in 2 languages, along with the -- many-to-many relations genPreAlignment :: PGF -> [Language] -> Expr -> PreAlign genPreAlignment pgf langs = lin2align . linsBracketed where linsBracketed t = [bracketedLinearize pgf lang t | lang <- langs] lin2align :: [BracketedString] -> PreAlign lin2align bss = PreAlign langSeqs langRels where (langSeqs,langRels) = mkLayers leaves nil = -1 leaves = map (groupAndIndexIt 0 . getLeaves nil) bss groupAndIndexIt id [] = [] groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1 where collect pws@((p1,w):pws1) | p == p1 = let (ws,pws2) = collect pws1 in (w:ws,pws2) collect pws = ([],pws) getLeaves parent bs = case bs of Leaf w -> [(parent,w)] Bracket _ fid _ _ _ bss -> concatMap (getLeaves fid) bss mkLayers (cs:css:rest) = let (lrest, rrest) = mkLayers (css:rest) in ((fields cs) : lrest, (map (mkLinks css) cs) : rrest) mkLayers [cs] = ([fields cs], []) mkLayers _ = ([],[]) mkLinks cs (p0,id0,_) = (id0,indices) where indices = [id1 | (p1,id1,_) <- cs, p1 == p0] fields cs = [(id, [w]) | (_,id,w) <- cs] -- we assume we have 2 languages - source and target gizaAlignment :: PGF -> (Language,Language) -> Expr -> (String,String,String) gizaAlignment pgf (l1,l2) e = let PreAlign [rl1,rl2] rels = genPreAlignment pgf [l1,l2] e in (unwords (map showIndSeq rl1), unwords (concat $ map snd rl2), unwords $ words $ showRels rl2 (concat rels)) showIndSeq (_,l) = let ww = map words l w_ = map (intersperse "_") ww in concat $ concat w_ showRels inds2 [] = [] showRels inds2 ((ind,is):rest) = let lOffs = computeOffset inds2 0 ltemp = [(i,getOffsetIndex i lOffs) | i <- is] lcurr = concat $ map (\(offset,ncomp) -> [show ind ++ "-" ++ show (-1 + offset + ii) ++ " "| ii <- [1..ncomp]]) (map snd ltemp) lrest = showRels inds2 rest in (unwords lcurr) ++ lrest getOffsetIndex i lst = let ll = filter (\(x,_) -> x == i) lst in snd $ head ll computeOffset [] transp = [] computeOffset ((i,l):rest) transp = let nw = (length $ words $ concat l) in (i,(transp,nw)) : (computeOffset rest (transp + nw)) -- alignment in the Graphviz format from the intermediate structure -- same effect as the old direct function graphvizAlignment :: PGF -> [Language] -> Expr -> String graphvizAlignment pgf langs exp = render (text "digraph {" $$ space $$ nest 2 (text "rankdir=LR ;" $$ text "node [shape = record] ;" $$ space $$ renderList 0 lrels rrels) $$ text "}") where (PreAlign lrels rrels) = genPreAlignment pgf langs exp renderList ii (l:ls) (r:rs) = struct ii <> text "[label = \"" <> fields l <> text "\"] ;" $$ (case ls of [] -> empty _ -> vcat [struct ii <> colon <> tag id0 <> colon <> char 'e' <+> text "->" <+> struct (ii+1) <> colon <> tag id1 <> colon <> char 'w' <+> semi | (id0,ids) <- r, id1 <- ids] $$ renderList (ii + 1) ls rs) renderList ii [] _ = empty renderList ii [l] [] = struct ii <> text "[label = \"" <> fields l <> text "\"] ;" fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text w | (id,ws) <- cs, w <- ws]) -- auxiliaries for graphviz syntax struct l = text ("struct" ++ show l) tbrackets d = char '<' <> d <> char '>' tag i | i < 0 = char 'r' <> int (negate i) | otherwise = char 'n' <> int i