From d5666aebd02e4883c949915c15f072f2bec3e9de Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Tue, 16 Apr 2013 13:10:48 +0000 Subject: [PATCH] the generation of dependency trees in the Haskell runtime is now finally working with bracketed strings. This also fixes some errors in the old implementation --- src/runtime/haskell/PGF/VisualizeTree.hs | 308 +++++------------------ 1 file changed, 63 insertions(+), 245 deletions(-) diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs index 0cb323b92..48d86cf26 100644 --- a/src/runtime/haskell/PGF/VisualizeTree.hs +++ b/src/runtime/haskell/PGF/VisualizeTree.hs @@ -28,7 +28,7 @@ module PGF.VisualizeTree , getDepLabels ) where -import PGF.CId (CId,showCId,ppCId,pCId,mkCId) +import PGF.CId (CId,wildCId,showCId,ppCId,pCId,mkCId) import PGF.Data import PGF.Expr (showExpr, Tree) import PGF.Linearize @@ -37,7 +37,7 @@ import PGF.Macros (lookValCat, lookMap, import qualified Data.Map as Map import qualified Data.IntMap as IntMap -import Data.List (intersperse,nub,isPrefixOf,sort,sortBy) +import Data.List (intersperse,nub,mapAccumL) import Data.Char (isDigit) import Data.Maybe (fromMaybe) import Text.PrettyPrint @@ -116,33 +116,39 @@ graphvizAbstractTree pgf (funs,cats) = render . tree2graph type Labels = Map.Map CId [String] -{- This is an attempt to build the dependency tree from the bracketed string. - Unfortunately it doesn't quite work. See the actual implementation at - the end of this module. - graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String graphvizDependencyTree format debug mlab ms pgf lang t = render $ case format of - "malt" -> vcat (map (hcat . intersperse (char '\t') ) wnodes) + "conll" -> vcat (map (hcat . intersperse (char '\t') ) wnodes) "malt_input" -> vcat (map (hcat . intersperse (char '\t') . take 6) wnodes) _ -> text "digraph {" $$ space $$ - nest 2 (text "rankdir=LR ;" $$ + nest 2 (text "rankdir=RL ;" $$ text "node [shape = plaintext] ;" $$ vcat nodes $$ vcat links) $$ text "}" where nodes = map mkNode leaves - links = map mkLink [(fid, fromMaybe nil (lookup fid deps)) | (fid,_,w) <- tail leaves] - wnodes = undefined + links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves] + + wnodes = [[int i, maltws ws, ppCId fun, ppCId cat, ppCId cat, unspec, int parent, text lab, unspec, unspec] | + ((cat,fid,fun),i,ws) <- tail leaves, + let (lab,parent) = maybe (dep_lbl,0) + (\(lbl,fid) -> (lbl,head [i | ((_,fid1,_),i,_) <- leaves, fid == fid1])) + (lookup fid deps) + ] + maltws = text . concat . intersperse "+" . words -- no spaces in column 2 nil = -1 bs = bracketedLinearize pgf lang t - leaves = (nil,0,"ROOT") : (groupAndIndexIt 1 . getLeaves nil) bs - deps = getDeps nil [bs] + root = (wildCId,nil,wildCId) + + leaves = (root,0,root_lbl) : (groupAndIndexIt 1 . getLeaves root) bs + deps = let (_,(h,deps)) = getDeps 0 [] t [] + in (h,(dep_lbl,nil)):deps groupAndIndexIt id [] = [] groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws @@ -155,32 +161,54 @@ graphvizDependencyTree format debug mlab ms pgf lang t = render $ getLeaves parent bs = case bs of - Leaf w -> [(parent,w)] - Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss + Leaf w -> [(parent,w)] + Bracket cat fid _ fun _ bss -> concatMap (getLeaves (cat,fid,fun)) bss - getDeps out_head bss = - case selectHead (children bss) of - Just ((head, bss'), deps) -> concat (descend out_head head bss' : [descend (headOf head bss') fid bss | (fid,bss) <- IntMap.toList deps]) - Nothing -> [] - where - descend head fid bss = (fid,head) : getDeps head bss - - headOf head bss - | null [() | Leaf _ <- bss] = - case selectHead (children bss) of - Just ((head, bss), deps) -> headOf head bss - Nothing -> head - | otherwise = head - - children bss = IntMap.fromListWith (++) [(fid,bss) | Bracket _ fid _ _ bss <- bss] - - selectHead children = IntMap.maxViewWithKey children - - mkNode (p,i,w) = + mkNode ((_,p,_),i,w) = tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi - mkLink (x,y) = tag y <+> text "->" <+> tag x -- ++ " [label = \"" ++ l ++ "\"] ;" --} + mkLink (x,(lbl,y)) = tag x <+> text "->" <+> tag y <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;" + + labels = maybe Map.empty id mlab + + getDeps n_fid xs (EAbs _ x e) es = getDeps n_fid (x:xs) e es + getDeps n_fid xs (EApp e1 e2) es = getDeps n_fid xs e1 (e2:es) + getDeps n_fid xs (EImplArg e) es = getDeps n_fid xs e es + getDeps n_fid xs (ETyped e _) es = getDeps n_fid xs e es + getDeps n_fid xs (EFun f) es = let (n_fid_1,ds) = descend n_fid xs es + (mb_h, deps) = selectHead f ds + in case mb_h of + Just (fid,deps0) -> (n_fid_1+1,(fid,deps0++ + [(n_fid_1,(dep_lbl,fid))]++ + concat [(m,(lbl,fid)):ds | (lbl,(m,ds)) <- deps])) + Nothing -> (n_fid_1+1,(n_fid_1,concat [(m,(lbl,n_fid_1)):ds | (lbl,(m,ds)) <- deps])) + getDeps n_fid xs (EMeta i) es = (n_fid+2,(n_fid,[])) + getDeps n_fid xs (EVar i) _ = (n_fid+2,(n_fid,[])) + getDeps n_fid xs (ELit l) [] = (n_fid+1,(n_fid,[])) + + descend n_fid xs es = mapAccumL (\n_fid e -> getDeps n_fid xs e []) n_fid es + + selectHead f ds = + case Map.lookup f labels of + Just lbls -> extractHead (zip lbls ds) + Nothing -> extractLast ds + where + extractHead [] = (Nothing, []) + extractHead (ld@(l,d):lds) + | l == head_lbl = (Just d,lds) + | otherwise = let (mb_h,deps) = extractHead lds + in (mb_h,ld:deps) + + extractLast [] = (Nothing, []) + extractLast (d:ds) + | null ds = (Just d,[]) + | otherwise = let (mb_h,deps) = extractLast ds + in (mb_h,(dep_lbl,d):deps) + + dep_lbl = "dep" + head_lbl = "head" + root_lbl = "ROOT" + unspec = text "_" getDepLabels :: [String] -> Labels getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss] @@ -427,213 +455,3 @@ tbrackets d = char '<' <> d <> char '>' tag i | i < 0 = char 'r' <> int (negate i) | otherwise = char 'n' <> int i - - - --------------------------------------------------------------------- --- The linearization code bellow is needed just in order to --- produce the dependency tree. Unfortunately the bracketed string --- doesn't give us an easy way to find which part of the string --- corresponds to which argument of the parent function. --- --- Uuuuugly!!! I hope that this code will be removed one day. - -type LinTable = Array LIndex [BracketedTokn] - - -linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable] -linTree pgf lang mark e = lin0 [] [] [] Nothing e - where - cnc = lookMap (error "no lang") lang (concretes pgf) - lp = lproductions cnc - - lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e - lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e - lin0 path xs ys mb_fid e = lin path ys mb_fid e [] - - lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es) - lin path xs mb_fid (ELit l) [] = case l of - LStr s -> return (mark Nothing path (ss s)) - LInt n -> return (mark Nothing path (ss (show n))) - LFlt f -> return (mark Nothing path (ss (show f))) - lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es) - lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es - lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es - - ss s = listArray (0,0) [[LeafKS [s]]] - - apply path xs mb_fid f es = - case Map.lookup f lp of - Just prods -> case lookupProds mb_fid prods of - Just set -> do prod <- Set.toList set - case prod of - PApply funid fids -> do guard (length fids == length es) - args <- sequence (zipWith3 (\i (PArg _ fid) e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es) - let (CncFun _ lins) = cncfuns cnc ! funid - return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins]) - PCoerce fid -> apply path xs (Just fid) f es - Nothing -> mzero - where - lookupProds (Just fid) prods = IntMap.lookup fid prods - lookupProds Nothing prods = Just (Set.filter isApp (Set.unions (IntMap.elems prods))) - - sub i path = i:path - - isApp (PApply _ _) = True - isApp _ = False - - computeSeq seqid args = concatMap compute (elems seq) - where - seq = sequences cnc ! seqid - - compute (SymCat d r) = (args !! d) ! r - compute (SymLit d r) = (args !! d) ! r - compute (SymKS ts) = [LeafKS ts] - compute (SymKP ts alts) = [LeafKP ts alts] - -untokn :: [BracketedTokn] -> [String] -untokn ts = case ts of - LeafKP d _ : [] -> d - LeafKP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss - LeafKS s : ws -> s ++ untokn ws - [] -> [] - where - sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of - v:_ -> v - _ -> d - - --- show bracketed markup with references to tree structure -markLinearizes :: PGF -> CId -> Expr -> [String] -markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark - where - mark mb_f path lint = amap (bracket mb_f path) lint - - bracket Nothing path ts = [LeafKS ["("++show (reverse path)]] ++ ts ++ [LeafKS [")"]] - bracket (Just f) path ts = [LeafKS ["(("++showCId f++","++show (reverse path)++")"]] ++ ts ++ [LeafKS [")"]] - - -graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String -graphvizDependencyTree format debug mlab ms pgf lang tr = case format of - "malt" -> unlines (lin2dep format) - "malt_input" -> unlines (lin2dep format) - _ -> concat $ map (++"\n") $ ["digraph {\n"] ++ lin2dep format ++ ["}"] - where - - lin2dep format = -- trace (ifd (show sortedNodes ++ show nodeWords)) $ - case format of - "malt" -> map (concat . intersperse "\t") wnodes - "malt_input" -> map (concat . intersperse "\t" . take 6) wnodes - _ -> prelude ++ nodes ++ links - - ifd s = if debug then s else [] - - pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang tr - ---- use Just str if you have str to match against - - prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"] - - nodes = map mkNode nodeWords - mkNode (i,((_,p),ss)) = - node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;" - nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)| - ((Just f,p),w) <- wlins pot] - - links = map mkLink thelinks - thelinks = [(word y, x, label tr y x) | - (_,((f,x),_)) <- tail nodeWords, - let y = dominant x] - mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;" - node = show . show - - dominant x = case x of - [] -> x - _ | not (x == hx) -> hx - _ -> dominant (init x) - where - hx = headArg (init x) tr x - - headArg x0 tr x = case (unApp tr,x) of - (Just (f,[]),[_]) -> x0 ---- ?? - (Just (f,ts),[_]) -> x0 ++ [getHead (length ts - 1) f] - (Just (f,ts),i:y) -> headArg x0 (ts !! i) y - _ -> x0 ---- - - label tr y x = case span (uncurry (==)) (zip y x) of - (xys,(_,i):_) -> getLabel i (funAt tr (map fst xys)) - _ -> "" ---- - - funAt tr x = case (unApp tr,x) of - (Just (f,_) ,[]) -> f - (Just (f,ts),i:y) -> funAt (ts !! i) y - _ -> mkCId (render (ppExpr 0 [] tr)) ---- - - word x = if elem x sortedNodes then x else - let x' = headArg x tr (x ++[0]) in - if x' == x then [] else word x' - - sortedNodes = [p | (_,((_,p),_)) <- nodeWords] - - labels = maybe Map.empty id mlab - getHead i f = case Map.lookup f labels of - Just ls -> length $ takeWhile (/= "head") ls - _ -> i - getLabel i f = case Map.lookup f labels of - Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i - _ -> showCId f ++ "#" ++ show i - --- to generate CoNLL format for MaltParser - nodeMap :: Map.Map [Int] Int - nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords] - - arcMap :: Map.Map [Int] ([Int],String) - arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks] - - lookDomLab p = case Map.lookup p arcMap of - Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l) - _ -> (0,rootlabel) - - wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] | - (i, ((fun,p),ws)) <- tail nodeWords, - let pos = showCId $ lookValCat (abstract pgf) fun, - let morph = unspec, - let (dom,lab) = lookDomLab p - ] - maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2 - unspec = "_" - rootlabel = "ROOT" - -wlins :: PosText -> [((Maybe CId,[Int]),[String])] -wlins pt = case pt of - T p pts -> concatMap (lins p) pts - M ws -> if null ws then [] else [((Nothing,[]),ws)] - where - lins p pt = case pt of - T q pts -> concatMap (lins q) pts - M ws -> if null ws then [] else [(p,ws)] - -data PosText = - T (Maybe CId,[Int]) [PosText] - | M [String] - deriving Show - -readPosText :: String -> PosText -readPosText = fst . head . (RP.readP_to_S pPosText) where - pPosText = do - RP.char '(' >> RP.skipSpaces - p <- pPos - RP.skipSpaces - ts <- RP.many pPosText - RP.char ')' >> RP.skipSpaces - return (T p ts) - RP.<++ do - ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ') - return (M ws) - pPos = do - fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f)) - RP.<++ (return Nothing) - RP.char '[' >> RP.skipSpaces - is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',') - RP.char ']' >> RP.skipSpaces - RP.char ')' RP.<++ return ' ' - return (fun,map read is)