mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-10 13:29:32 -06:00
458 lines
18 KiB
Haskell
458 lines
18 KiB
Haskell
----------------------------------------------------------------------
|
|
-- |
|
|
-- Module : VisualizeTree
|
|
-- Maintainer : AR
|
|
-- Stability : (stable)
|
|
-- Portability : (portable)
|
|
--
|
|
-- > CVS $Date:
|
|
-- > CVS $Author:
|
|
-- > CVS $Revision:
|
|
--
|
|
-- Print a graph of an abstract syntax tree in Graphviz DOT format
|
|
-- Based on BB's VisualizeGrammar
|
|
-- FIXME: change this to use GF.Visualization.Graphviz,
|
|
-- instead of rolling its own.
|
|
-----------------------------------------------------------------------------
|
|
|
|
module PGF.VisualizeTree
|
|
( GraphvizOptions(..)
|
|
, graphvizDefaults
|
|
, graphvizAbstractTree
|
|
, graphvizParseTree
|
|
, graphvizParseTreeOld
|
|
, graphvizDependencyTree
|
|
, graphvizBracketedString
|
|
, graphvizAlignment
|
|
, gizaAlignment
|
|
, getDepLabels
|
|
) where
|
|
|
|
import PGF.CId (CId,wildCId,showCId,ppCId,pCId,mkCId)
|
|
import PGF.Data
|
|
import PGF.Expr (showExpr, Tree)
|
|
import PGF.Linearize
|
|
import PGF.Macros (lookValCat, lookMap,
|
|
BracketedString(..), BracketedTokn(..), flattenBracketedString)
|
|
|
|
import qualified Data.Map as Map
|
|
import qualified Data.IntMap as IntMap
|
|
import Data.List (intersperse,nub,mapAccumL)
|
|
import Data.Char (isDigit)
|
|
import Data.Maybe (fromMaybe)
|
|
import Text.PrettyPrint
|
|
|
|
import Data.Array.IArray
|
|
import Control.Monad
|
|
import qualified Data.Set as Set
|
|
import qualified Text.ParserCombinators.ReadP as RP
|
|
|
|
|
|
data GraphvizOptions = GraphvizOptions {noLeaves :: Bool,
|
|
noFun :: Bool,
|
|
noCat :: Bool,
|
|
nodeFont :: String,
|
|
leafFont :: String,
|
|
nodeColor :: String,
|
|
leafColor :: String,
|
|
nodeEdgeStyle :: String,
|
|
leafEdgeStyle :: String
|
|
}
|
|
|
|
graphvizDefaults = GraphvizOptions False False False "" "" "" "" "" ""
|
|
|
|
|
|
-- | Renders abstract syntax tree in Graphviz format
|
|
graphvizAbstractTree :: PGF -> (Bool,Bool) -> Tree -> String
|
|
graphvizAbstractTree pgf (funs,cats) = render . tree2graph
|
|
where
|
|
tree2graph t =
|
|
text "graph {" $$
|
|
ppGraph [] [] 0 t $$
|
|
text "}"
|
|
|
|
getAbs xs (EAbs _ x e) = getAbs (x:xs) e
|
|
getAbs xs (ETyped e _) = getAbs xs e
|
|
getAbs xs e = (xs,e)
|
|
|
|
getApp (EApp x (EImplArg y)) es = getApp x es
|
|
getApp (EApp x y) es = getApp x (y:es)
|
|
getApp (ETyped e _) es = getApp e es
|
|
getApp e es = (e,es)
|
|
|
|
getLbl scope (EFun f) = let fun = if funs then ppCId f else empty
|
|
cat = if cats then ppCId (lookValCat (abstract pgf) f) else empty
|
|
sep = if funs && cats then colon else empty
|
|
in fun <+> sep <+> cat
|
|
getLbl scope (ELit l) = text (escapeStr (render (ppLit l)))
|
|
getLbl scope (EMeta i) = ppMeta i
|
|
getLbl scope (EVar i) = ppCId (scope !! i)
|
|
getLbl scope (ETyped e _) = getLbl scope e
|
|
getLbl scope (EImplArg e) = getLbl scope e
|
|
|
|
ppGraph scope ps i e0 =
|
|
let (xs, e1) = getAbs [] e0
|
|
(e2,args) = getApp e1 []
|
|
binds = if null xs
|
|
then empty
|
|
else text "\\\\" <> hcat (punctuate comma (map ppCId xs)) <+> text "->"
|
|
(lbl,eargs) = case e2 of
|
|
EAbs _ _ _ -> (char '@', e2:args) -- eta-redexes are rendered with artificial "@" node
|
|
_ -> (getLbl scope' e2, args)
|
|
scope' = xs ++ scope
|
|
in ppNode (i:ps) <> text "[label =" <+> doubleQuotes (binds <+> lbl) <> text ", style = \"solid\", shape = \"plaintext\"] ;" $$
|
|
(if null ps
|
|
then empty
|
|
else ppNode ps <+> text "--" <+> ppNode (i:ps) <+> text "[style = \"solid\"];") $$
|
|
vcat (zipWith (ppGraph scope' (i:ps)) [0..] eargs)
|
|
|
|
ppNode ps = char 'n' <> hcat (punctuate (char '_') (map int ps))
|
|
|
|
escapeStr [] = []
|
|
escapeStr ('\\':cs) = '\\':'\\':escapeStr cs
|
|
escapeStr ('"' :cs) = '\\':'"' :escapeStr cs
|
|
escapeStr (c :cs) = c :escapeStr cs
|
|
|
|
|
|
type Labels = Map.Map CId [String]
|
|
|
|
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String
|
|
graphvizDependencyTree format debug mlab ms pgf lang t = render $
|
|
case format of
|
|
"conll" -> vcat (map (hcat . intersperse (char '\t') ) wnodes)
|
|
"malt_input" -> vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
|
|
_ -> text "digraph {" $$
|
|
space $$
|
|
nest 2 (text "rankdir=RL ;" $$
|
|
text "node [shape = plaintext] ;" $$
|
|
vcat nodes $$
|
|
vcat links) $$
|
|
text "}"
|
|
where
|
|
nodes = map mkNode leaves
|
|
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves]
|
|
|
|
wnodes = [[int i, maltws ws, ppCId fun, ppCId cat, ppCId cat, unspec, int parent, text lab, unspec, unspec] |
|
|
((cat,fid,fun),i,ws) <- tail leaves,
|
|
let (lab,parent) = maybe (dep_lbl,0)
|
|
(\(lbl,fid) -> (lbl,head [i | ((_,fid1,_),i,_) <- leaves, fid == fid1]))
|
|
(lookup fid deps)
|
|
]
|
|
maltws = text . concat . intersperse "+" . words -- no spaces in column 2
|
|
|
|
nil = -1
|
|
|
|
bs = bracketedLinearize pgf lang t
|
|
|
|
root = (wildCId,nil,wildCId)
|
|
|
|
leaves = (root,0,root_lbl) : (groupAndIndexIt 1 . getLeaves root) bs
|
|
deps = let (_,(h,deps)) = getDeps 0 [] t []
|
|
in (h,(dep_lbl,nil)):deps
|
|
|
|
groupAndIndexIt id [] = []
|
|
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
|
|
in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
|
|
where
|
|
collect pws@((p1,w):pws1)
|
|
| p == p1 = let (ws,pws2) = collect pws1
|
|
in (w:ws,pws2)
|
|
collect pws = ([],pws)
|
|
|
|
getLeaves parent bs =
|
|
case bs of
|
|
Leaf w -> [(parent,w)]
|
|
Bracket cat fid _ fun _ bss -> concatMap (getLeaves (cat,fid,fun)) bss
|
|
|
|
mkNode ((_,p,_),i,w) =
|
|
tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi
|
|
|
|
mkLink (x,(lbl,y)) = tag x <+> text "->" <+> tag y <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;"
|
|
|
|
labels = maybe Map.empty id mlab
|
|
|
|
getDeps n_fid xs (EAbs _ x e) es = getDeps n_fid (x:xs) e es
|
|
getDeps n_fid xs (EApp e1 e2) es = getDeps n_fid xs e1 (e2:es)
|
|
getDeps n_fid xs (EImplArg e) es = getDeps n_fid xs e es
|
|
getDeps n_fid xs (ETyped e _) es = getDeps n_fid xs e es
|
|
getDeps n_fid xs (EFun f) es = let (n_fid_1,ds) = descend n_fid xs es
|
|
(mb_h, deps) = selectHead f ds
|
|
in case mb_h of
|
|
Just (fid,deps0) -> (n_fid_1+1,(fid,deps0++
|
|
[(n_fid_1,(dep_lbl,fid))]++
|
|
concat [(m,(lbl,fid)):ds | (lbl,(m,ds)) <- deps]))
|
|
Nothing -> (n_fid_1+1,(n_fid_1,concat [(m,(lbl,n_fid_1)):ds | (lbl,(m,ds)) <- deps]))
|
|
getDeps n_fid xs (EMeta i) es = (n_fid+2,(n_fid,[]))
|
|
getDeps n_fid xs (EVar i) _ = (n_fid+2,(n_fid,[]))
|
|
getDeps n_fid xs (ELit l) [] = (n_fid+1,(n_fid,[]))
|
|
|
|
descend n_fid xs es = mapAccumL (\n_fid e -> getDeps n_fid xs e []) n_fid es
|
|
|
|
selectHead f ds =
|
|
case Map.lookup f labels of
|
|
Just lbls -> extractHead (zip lbls ds)
|
|
Nothing -> extractLast ds
|
|
where
|
|
extractHead [] = (Nothing, [])
|
|
extractHead (ld@(l,d):lds)
|
|
| l == head_lbl = (Just d,lds)
|
|
| otherwise = let (mb_h,deps) = extractHead lds
|
|
in (mb_h,ld:deps)
|
|
|
|
extractLast [] = (Nothing, [])
|
|
extractLast (d:ds)
|
|
| null ds = (Just d,[])
|
|
| otherwise = let (mb_h,deps) = extractLast ds
|
|
in (mb_h,(dep_lbl,d):deps)
|
|
|
|
dep_lbl = "dep"
|
|
head_lbl = "head"
|
|
root_lbl = "ROOT"
|
|
unspec = text "_"
|
|
|
|
getDepLabels :: [String] -> Labels
|
|
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
|
|
|
|
|
|
graphvizParseTree :: PGF -> Language -> GraphvizOptions -> Tree -> String
|
|
graphvizParseTree pgf lang opts = graphvizBracketedString opts . bracketedLinearize pgf lang
|
|
|
|
|
|
graphvizBracketedString :: GraphvizOptions -> BracketedString -> String
|
|
graphvizBracketedString opts bs = render graphviz_code
|
|
where
|
|
graphviz_code
|
|
= text "graph {" $$
|
|
text node_style $$
|
|
vcat internal_nodes $$
|
|
(if noLeaves opts then empty
|
|
else text leaf_style $$
|
|
leaf_nodes
|
|
) $$ text "}"
|
|
|
|
leaf_style = mkOption "edge" "style" (leafEdgeStyle opts) ++
|
|
mkOption "edge" "color" (leafColor opts) ++
|
|
mkOption "node" "fontcolor" (leafColor opts) ++
|
|
mkOption "node" "fontname" (leafFont opts) ++
|
|
mkOption "node" "shape" "plaintext"
|
|
|
|
node_style = mkOption "edge" "style" (nodeEdgeStyle opts) ++
|
|
mkOption "edge" "color" (nodeColor opts) ++
|
|
mkOption "node" "fontcolor" (nodeColor opts) ++
|
|
mkOption "node" "fontname" (nodeFont opts) ++
|
|
mkOption "node" "shape" nodeshape
|
|
where nodeshape | noFun opts && noCat opts = "point"
|
|
| otherwise = "plaintext"
|
|
|
|
mkOption object optname optvalue
|
|
| null optvalue = ""
|
|
| otherwise = object ++ "[" ++ optname ++ "=\"" ++ optvalue ++ "\"]; "
|
|
|
|
mkNode fun cat
|
|
| noFun opts = showCId cat
|
|
| noCat opts = showCId fun
|
|
| otherwise = showCId fun ++ " : " ++ showCId cat
|
|
|
|
nil = -1
|
|
internal_nodes = [mkLevel internals |
|
|
internals <- getInternals [(nil, bs)],
|
|
not (null internals)]
|
|
leaf_nodes = mkLevel [(parent, id, word) |
|
|
(id, (parent, word)) <- zip [100000..] (getLeaves nil bs)]
|
|
|
|
getInternals [] = []
|
|
getInternals nodes
|
|
= nub [(parent, fid, mkNode fun cat) |
|
|
(parent, Bracket cat fid _ fun _ _) <- nodes]
|
|
: getInternals [(fid, child) |
|
|
(_, Bracket _ fid _ _ _ children) <- nodes,
|
|
child <- children]
|
|
|
|
getLeaves parent (Leaf word) = [(parent, word)]
|
|
getLeaves parent (Bracket _ fid i _ _ children)
|
|
= concatMap (getLeaves fid) children
|
|
|
|
mkLevel nodes
|
|
= text "subgraph {rank=same;" $$
|
|
nest 2 (-- the following gives the name of the node and its label:
|
|
vcat [tag id <> text (mkOption "" "label" lbl) | (_, id, lbl) <- nodes] $$
|
|
-- the following is for fixing the order between the children:
|
|
(if length nodes > 1 then
|
|
text (mkOption "edge" "style" "invis") $$
|
|
hsep (intersperse (text " -- ") [tag id | (_, id, _) <- nodes]) <+> semi
|
|
else empty)
|
|
) $$
|
|
text "}" $$
|
|
-- the following is for the edges between parent and children:
|
|
vcat [tag pid <> text " -- " <> tag id <> semi | (pid, id, _) <- nodes, pid /= nil] $$
|
|
space
|
|
|
|
|
|
|
|
graphvizParseTreeOld :: PGF -> Language -> Tree -> String
|
|
graphvizParseTreeOld pgf lang = graphvizBracketedStringOld . bracketedLinearize pgf lang
|
|
|
|
|
|
graphvizBracketedStringOld :: BracketedString -> String
|
|
graphvizBracketedStringOld = render . lin2tree
|
|
where
|
|
lin2tree bs =
|
|
text "graph {" $$
|
|
space $$
|
|
nest 2 (text "rankdir=BU ;" $$
|
|
text "node [shape = record, color = white] ;" $$
|
|
space $$
|
|
vcat (nodes bs)) $$
|
|
text "}"
|
|
where
|
|
nodes bs = zipWith mkStruct [0..] (interns ++ [zipWith (\i (l,p,w) -> (l,p,i,w)) [99990..] leaves])
|
|
|
|
nil = -1
|
|
|
|
leaves = getLeaves 0 nil bs
|
|
interns = getInterns 0 [(nil,bs)]
|
|
|
|
getLeaves level parent bs =
|
|
case bs of
|
|
Leaf w -> [(level-1,parent,w)]
|
|
Bracket _ fid i _ _ bss -> concatMap (getLeaves (level+1) fid) bss
|
|
|
|
getInterns level [] = []
|
|
getInterns level nodes =
|
|
nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _ _ _) <- nodes] :
|
|
getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ _ _ children) <- nodes, child <- children]
|
|
|
|
mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
|
|
vcat [link pl pid l id | (pl,pid,id,_) <- cs]
|
|
link pl pid l id
|
|
| pl < 0 = empty
|
|
| otherwise = struct pl <> colon <> tag pid <> colon <> char 's' <+>
|
|
text "--" <+>
|
|
struct l <> colon <> tag id <> colon <> char 'n' <+> semi
|
|
fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text c | (_,_,id,c) <- cs])
|
|
|
|
|
|
type Rel = (Int,[Int])
|
|
-- possibly needs changes after clearing about many-to-many on this level
|
|
|
|
type IndexedSeq = (Int,[String])
|
|
type LangSeq = [IndexedSeq]
|
|
|
|
data PreAlign = PreAlign [LangSeq] [[Rel]]
|
|
deriving Show
|
|
-- alignment structure for a phrase in 2 languages, along with the
|
|
-- many-to-many relations
|
|
|
|
|
|
genPreAlignment :: PGF -> [Language] -> Expr -> PreAlign
|
|
genPreAlignment pgf langs = lin2align . linsBracketed
|
|
where
|
|
linsBracketed t = [bracketedLinearize pgf lang t | lang <- langs]
|
|
|
|
lin2align :: [BracketedString] -> PreAlign
|
|
lin2align bss = PreAlign langSeqs langRels
|
|
where
|
|
(langSeqs,langRels) = mkLayers leaves
|
|
nil = -1
|
|
leaves = map (groupAndIndexIt 0 . getLeaves nil) bss
|
|
|
|
groupAndIndexIt id [] = []
|
|
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
|
|
in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
|
|
where
|
|
collect pws@((p1,w):pws1)
|
|
| p == p1 = let (ws,pws2) = collect pws1
|
|
in (w:ws,pws2)
|
|
collect pws = ([],pws)
|
|
|
|
getLeaves parent bs =
|
|
case bs of
|
|
Leaf w -> [(parent,w)]
|
|
Bracket _ fid _ _ _ bss -> concatMap (getLeaves fid) bss
|
|
|
|
mkLayers (cs:css:rest) = let (lrest, rrest) = mkLayers (css:rest)
|
|
in ((fields cs) : lrest, (map (mkLinks css) cs) : rrest)
|
|
mkLayers [cs] = ([fields cs], [])
|
|
mkLayers _ = ([],[])
|
|
|
|
mkLinks cs (p0,id0,_) = (id0,indices)
|
|
where
|
|
indices = [id1 | (p1,id1,_) <- cs, p1 == p0]
|
|
|
|
fields cs = [(id, [w]) | (_,id,w) <- cs]
|
|
|
|
|
|
-- we assume we have 2 languages - source and target
|
|
gizaAlignment :: PGF -> (Language,Language) -> Expr -> (String,String,String)
|
|
gizaAlignment pgf (l1,l2) e = let PreAlign [rl1,rl2] rels = genPreAlignment pgf [l1,l2] e
|
|
in
|
|
(unwords (map showIndSeq rl1), unwords (concat $ map snd rl2),
|
|
unwords $ words $ showRels rl2 (concat rels))
|
|
|
|
|
|
showIndSeq (_,l) = let ww = map words l
|
|
w_ = map (intersperse "_") ww
|
|
in
|
|
concat $ concat w_
|
|
|
|
showRels inds2 [] = []
|
|
showRels inds2 ((ind,is):rest) =
|
|
let lOffs = computeOffset inds2 0
|
|
ltemp = [(i,getOffsetIndex i lOffs) | i <- is]
|
|
lcurr = concat $ map (\(offset,ncomp) -> [show ind ++ "-" ++ show (-1 + offset + ii) ++ " "| ii <- [1..ncomp]]) (map snd ltemp)
|
|
lrest = showRels inds2 rest
|
|
in
|
|
(unwords lcurr) ++ lrest
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
getOffsetIndex i lst = let ll = filter (\(x,_) -> x == i) lst
|
|
in
|
|
snd $ head ll
|
|
|
|
computeOffset [] transp = []
|
|
computeOffset ((i,l):rest) transp = let nw = (length $ words $ concat l)
|
|
in (i,(transp,nw)) : (computeOffset rest (transp + nw))
|
|
|
|
|
|
|
|
-- alignment in the Graphviz format from the intermediate structure
|
|
-- same effect as the old direct function
|
|
graphvizAlignment :: PGF -> [Language] -> Expr -> String
|
|
graphvizAlignment pgf langs exp =
|
|
render (text "digraph {" $$
|
|
space $$
|
|
nest 2 (text "rankdir=LR ;" $$
|
|
text "node [shape = record] ;" $$
|
|
space $$
|
|
renderList 0 lrels rrels) $$
|
|
text "}")
|
|
where
|
|
(PreAlign lrels rrels) = genPreAlignment pgf langs exp
|
|
|
|
|
|
renderList ii (l:ls) (r:rs) = struct ii <> text "[label = \"" <> fields l <> text "\"] ;" $$
|
|
(case ls of
|
|
[] -> empty
|
|
_ -> vcat [struct ii <> colon <> tag id0
|
|
<> colon <> char 'e' <+> text "->" <+> struct (ii+1)
|
|
<> colon <> tag id1 <> colon <> char 'w' <+> semi
|
|
| (id0,ids) <- r, id1 <- ids] $$ renderList (ii + 1) ls rs)
|
|
renderList ii [] _ = empty
|
|
renderList ii [l] [] = struct ii <> text "[label = \"" <> fields l <> text "\"] ;"
|
|
|
|
fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text w | (id,ws) <- cs, w <- ws])
|
|
|
|
|
|
|
|
-- auxiliaries for graphviz syntax
|
|
struct l = text ("struct" ++ show l)
|
|
tbrackets d = char '<' <> d <> char '>'
|
|
tag i
|
|
| i < 0 = char 'r' <> int (negate i)
|
|
| otherwise = char 'n' <> int i
|