parse tree visualization command vp

This commit is contained in:
aarne
2009-10-07 16:18:33 +00:00
parent f607cd0108
commit deb905bbdb
3 changed files with 109 additions and 23 deletions

View File

@@ -564,6 +564,42 @@ allCommands cod env@(pgf, mos) = Map.fromList [
return $ fromString out,
options = transliterationPrintNames
}),
("vp", emptyCommandInfo {
longname = "visualize_parse",
synopsis = "show parse tree graphically",
explanation = unlines [
"Prints a parse tree the .dot format (the graphviz format).",
"The graph can be saved in a file by the wf command as usual.",
"If the -view flag is defined, the graph is saved in a temporary file",
"which is processed by graphviz and displayed by the program indicated",
"by the flag. The target format is png, unless overridden by the",
"flag -format."
],
exec = \opts es -> do
let lang = optLang opts
let grph = if null es then [] else parseTree Nothing pgf lang (head es)
if isFlag "view" opts || isFlag "format" opts then do
let file s = "_grph." ++ s
let view = optViewGraph opts ++ " "
let format = optViewFormat opts
writeFile (file "dot") (enc grph)
system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++
" ; " ++ view ++ file format
return void
else return $ fromString grph,
examples = [
"gr | aw -- generate a tree and show word alignment as graph script",
"gr | vt -view=\"open\" -- generate a tree and display alignment on a Mac"
],
options = [
],
flags = [
("format","format of the visualization file (default \"png\")"),
("view","program to open the resulting file (default \"open\")")
]
}),
("vt", emptyCommandInfo {
longname = "visualize_tree",
synopsis = "show a set of trees graphically",

View File

@@ -129,12 +129,14 @@ linearizesMark pgf lang = realizes . linTreeMark pgf lang
linTreeMark :: PGF -> CId -> Expr -> Term
linTreeMark pgf lang = lin [] . expr2tree
where
lin p (Abs xs e ) = case lin p e of
R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs)
TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs)
lin p (Fun fun es) = let argVariants =
mapM (\ (i,e) -> liftVariants $ lin (sub p i) e) (zip [0..] es)
in variants [mark p $ compute pgf lang args $ look fun | args <- argVariants]
lin p (Abs xs e ) = case lin p e of
R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs)
TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs)
lin p (Fun fun es) =
let argVariants =
mapM (\ (i,e) -> liftVariants $ lin (sub p i) e) (zip [0..] es)
in variants [mark (fun,p) $ compute pgf lang args $ look fun |
args <- argVariants]
lin p (Lit (LStr s)) = mark p $ R [kks (show s)] -- quoted
lin p (Lit (LInt i)) = mark p $ R [kks (show i)]
lin p (Lit (LFlt d)) = mark p $ R [kks (show d)]
@@ -143,6 +145,7 @@ linTreeMark pgf lang = lin [] . expr2tree
look = lookLin pgf lang
mark :: Show a => a -> Term -> Term
mark p t = case t of
R ts -> R $ map (mark p) ts
FV ts -> R $ map (mark p) ts

View File

@@ -15,11 +15,11 @@
-- instead of rolling its own.
-----------------------------------------------------------------------------
module PGF.VisualizeTree ( visualizeTrees, alignLinearize
module PGF.VisualizeTree ( visualizeTrees, parseTree, alignLinearize
,PosText(..),readPosText
) where
import PGF.CId (showCId)
import PGF.CId (CId,showCId,pCId)
import PGF.Data
import PGF.Tree
import PGF.Linearize
@@ -29,6 +29,8 @@ import Data.List (intersperse,nub)
import Data.Char (isDigit)
import qualified Text.ParserCombinators.ReadP as RP
import Debug.Trace
visualizeTrees :: PGF -> (Bool,Bool) -> [Expr] -> String
visualizeTrees pgf funscats = unlines . map (prGraph False . tree2graph pgf funscats . expr2tree)
@@ -55,6 +57,57 @@ prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where
graph = if digr then "digraph" else "graph"
-- parse trees from Linearize.linearizeMark
---- nubrec and domins are quadratic, but could be (n log n)
parseTree :: Maybe String -> PGF -> CId -> Expr -> String
parseTree ms pgf lang = prGraph False . lin2tree pgf . linMark where
linMark = head . linearizesMark pgf lang
---- use Just str if you have str to match against
lin2tree pgf s = trace s $ prelude ++ nodes ++ links where
prelude = ["rankdir=BU ;", "node [shape = record, color = white] ;"]
nodeRecs = zip [0..]
(nub (filter (not . null) (nlins [postext] ++ [leaves postext])))
nlins pts =
nubrec [] $ [(p,cat f) | T (Just f, p) _ <- pts] :
concatMap nlins [ts | T _ ts <- pts]
leaves pt = [(p++[j],s) | (j,(p,s)) <-
zip [9990..] [(p,s) | (p,ss) <- wlins pt, s <- ss]]
nubrec es rs = case rs of
r:rr -> let r' = filter (not . flip elem es) (nub r)
in r' : nubrec (r' ++ es) rr
_ -> rs
nodes = map mkStruct nodeRecs
mkStruct (i,cs) = struct i ++ "[label = \"" ++ fields cs ++ "\"] ;"
cat = showCId . lookValCat pgf
fields cs = concat (intersperse "|" [ mtag (showp p) ++ c | (p,c) <- cs])
struct i = "struct" ++ show i
links = map mkEdge domins
domins = nub [((i,x),(j,y)) |
(i,xs) <- nodeRecs, (j,ys) <- nodeRecs,
x <- xs, y <- ys, dominates x y]
dominates (p,x) (q,y) = not (null q) && p == init q
mkEdge ((i,x),(j,y)) =
struct i ++ ":n" ++ uncommas (showp (fst x)) ++ ":s -- " ++
struct j ++ ":n" ++ uncommas (showp (fst y)) ++ ":n ;"
postext = readPosText s
-- auxiliaries for graphviz syntax
struct i = "struct" ++ show i
mark (j,n) = "n" ++ show j ++ "a" ++ uncommas n
uncommas = map (\c -> if c==',' then 'c' else c)
tag s = "<" ++ s ++ ">"
showp = init . tail . show
mtag = tag . ('n':) . uncommas
-- word alignments from Linearize.linearizesMark
-- words are chunks like {[0,1,1,0] old}
@@ -63,7 +116,7 @@ alignLinearize pgf = prGraph True . lin2graph . linsMark where
linsMark t = [s | la <- cncnames pgf, s <- take 1 (linearizesMark pgf la t)]
lin2graph :: [String] -> [String]
lin2graph ss = prelude ++ nodes ++ links
lin2graph ss = trace (show ss) $ prelude ++ nodes ++ links
where
@@ -71,10 +124,9 @@ lin2graph ss = prelude ++ nodes ++ links
nlins :: [(Int,[((Int,String),String)])]
nlins = [(i, [((j,showp p),unw ws) | (j,(p,ws)) <- zip [0..] ws]) |
(i,ws) <- zip [0..] (map (wlins . readPosText) ss)]
(i,ws) <- zip [0..] (map (wlins . readPosText) ss)]
unw = concat . intersperse "\\ " -- space escape in graphviz
showp = init . tail . show
nodes = map mkStruct nlins
@@ -82,14 +134,6 @@ lin2graph ss = prelude ++ nodes ++ links
fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws])
struct i = "struct" ++ show i
mark (j,n) = "n" ++ show j ++ "a" ++ uncommas n
uncommas = map (\c -> if c==',' then 'c' else c)
tag s = "<" ++ s ++ ">"
links = nub $ concatMap mkEdge (init nlins)
mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list
@@ -100,15 +144,15 @@ lin2graph ss = prelude ++ nodes ++ links
wlins :: PosText -> [([Int],[String])]
wlins pt = case pt of
T p pts -> concatMap (lins p) pts
T (_,p) pts -> concatMap (lins p) pts
M ws -> if null ws then [] else [([],ws)]
where
lins p pt = case pt of
T q pts -> concatMap (lins q) pts
T (_,q) pts -> concatMap (lins q) pts
M ws -> if null ws then [] else [(p,ws)]
data PosText =
T [Int] [PosText]
T (Maybe CId,[Int]) [PosText]
| M [String]
deriving Show
@@ -125,10 +169,13 @@ readPosText = fst . head . (RP.readP_to_S pPosText) where
ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ')
return (M ws)
pPos = do
fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f))
RP.<++ (return Nothing)
RP.char '[' >> RP.skipSpaces
is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',')
RP.char ']' >> RP.skipSpaces
return (map read is)
RP.char ')' RP.<++ return ' '
return (fun,map read is)
{-