1
0
forked from GitHub/gf-core

the generation of dependency trees in the Haskell runtime is now finally working with bracketed strings. This also fixes some errors in the old implementation

This commit is contained in:
kr.angelov
2013-04-16 13:10:48 +00:00
parent 2f35964871
commit f6d675c34b

View File

@@ -28,7 +28,7 @@ module PGF.VisualizeTree
, getDepLabels
) where
import PGF.CId (CId,showCId,ppCId,pCId,mkCId)
import PGF.CId (CId,wildCId,showCId,ppCId,pCId,mkCId)
import PGF.Data
import PGF.Expr (showExpr, Tree)
import PGF.Linearize
@@ -37,7 +37,7 @@ import PGF.Macros (lookValCat, lookMap,
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.List (intersperse,nub,isPrefixOf,sort,sortBy)
import Data.List (intersperse,nub,mapAccumL)
import Data.Char (isDigit)
import Data.Maybe (fromMaybe)
import Text.PrettyPrint
@@ -116,33 +116,39 @@ graphvizAbstractTree pgf (funs,cats) = render . tree2graph
type Labels = Map.Map CId [String]
{- This is an attempt to build the dependency tree from the bracketed string.
Unfortunately it doesn't quite work. See the actual implementation at
the end of this module.
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String
graphvizDependencyTree format debug mlab ms pgf lang t = render $
case format of
"malt" -> vcat (map (hcat . intersperse (char '\t') ) wnodes)
"conll" -> vcat (map (hcat . intersperse (char '\t') ) wnodes)
"malt_input" -> vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
_ -> text "digraph {" $$
space $$
nest 2 (text "rankdir=LR ;" $$
nest 2 (text "rankdir=RL ;" $$
text "node [shape = plaintext] ;" $$
vcat nodes $$
vcat links) $$
text "}"
where
nodes = map mkNode leaves
links = map mkLink [(fid, fromMaybe nil (lookup fid deps)) | (fid,_,w) <- tail leaves]
wnodes = undefined
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves]
wnodes = [[int i, maltws ws, ppCId fun, ppCId cat, ppCId cat, unspec, int parent, text lab, unspec, unspec] |
((cat,fid,fun),i,ws) <- tail leaves,
let (lab,parent) = maybe (dep_lbl,0)
(\(lbl,fid) -> (lbl,head [i | ((_,fid1,_),i,_) <- leaves, fid == fid1]))
(lookup fid deps)
]
maltws = text . concat . intersperse "+" . words -- no spaces in column 2
nil = -1
bs = bracketedLinearize pgf lang t
leaves = (nil,0,"ROOT") : (groupAndIndexIt 1 . getLeaves nil) bs
deps = getDeps nil [bs]
root = (wildCId,nil,wildCId)
leaves = (root,0,root_lbl) : (groupAndIndexIt 1 . getLeaves root) bs
deps = let (_,(h,deps)) = getDeps 0 [] t []
in (h,(dep_lbl,nil)):deps
groupAndIndexIt id [] = []
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
@@ -155,32 +161,54 @@ graphvizDependencyTree format debug mlab ms pgf lang t = render $
getLeaves parent bs =
case bs of
Leaf w -> [(parent,w)]
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
Leaf w -> [(parent,w)]
Bracket cat fid _ fun _ bss -> concatMap (getLeaves (cat,fid,fun)) bss
getDeps out_head bss =
case selectHead (children bss) of
Just ((head, bss'), deps) -> concat (descend out_head head bss' : [descend (headOf head bss') fid bss | (fid,bss) <- IntMap.toList deps])
Nothing -> []
where
descend head fid bss = (fid,head) : getDeps head bss
headOf head bss
| null [() | Leaf _ <- bss] =
case selectHead (children bss) of
Just ((head, bss), deps) -> headOf head bss
Nothing -> head
| otherwise = head
children bss = IntMap.fromListWith (++) [(fid,bss) | Bracket _ fid _ _ bss <- bss]
selectHead children = IntMap.maxViewWithKey children
mkNode (p,i,w) =
mkNode ((_,p,_),i,w) =
tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi
mkLink (x,y) = tag y <+> text "->" <+> tag x -- ++ " [label = \"" ++ l ++ "\"] ;"
-}
mkLink (x,(lbl,y)) = tag x <+> text "->" <+> tag y <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;"
labels = maybe Map.empty id mlab
getDeps n_fid xs (EAbs _ x e) es = getDeps n_fid (x:xs) e es
getDeps n_fid xs (EApp e1 e2) es = getDeps n_fid xs e1 (e2:es)
getDeps n_fid xs (EImplArg e) es = getDeps n_fid xs e es
getDeps n_fid xs (ETyped e _) es = getDeps n_fid xs e es
getDeps n_fid xs (EFun f) es = let (n_fid_1,ds) = descend n_fid xs es
(mb_h, deps) = selectHead f ds
in case mb_h of
Just (fid,deps0) -> (n_fid_1+1,(fid,deps0++
[(n_fid_1,(dep_lbl,fid))]++
concat [(m,(lbl,fid)):ds | (lbl,(m,ds)) <- deps]))
Nothing -> (n_fid_1+1,(n_fid_1,concat [(m,(lbl,n_fid_1)):ds | (lbl,(m,ds)) <- deps]))
getDeps n_fid xs (EMeta i) es = (n_fid+2,(n_fid,[]))
getDeps n_fid xs (EVar i) _ = (n_fid+2,(n_fid,[]))
getDeps n_fid xs (ELit l) [] = (n_fid+1,(n_fid,[]))
descend n_fid xs es = mapAccumL (\n_fid e -> getDeps n_fid xs e []) n_fid es
selectHead f ds =
case Map.lookup f labels of
Just lbls -> extractHead (zip lbls ds)
Nothing -> extractLast ds
where
extractHead [] = (Nothing, [])
extractHead (ld@(l,d):lds)
| l == head_lbl = (Just d,lds)
| otherwise = let (mb_h,deps) = extractHead lds
in (mb_h,ld:deps)
extractLast [] = (Nothing, [])
extractLast (d:ds)
| null ds = (Just d,[])
| otherwise = let (mb_h,deps) = extractLast ds
in (mb_h,(dep_lbl,d):deps)
dep_lbl = "dep"
head_lbl = "head"
root_lbl = "ROOT"
unspec = text "_"
getDepLabels :: [String] -> Labels
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
@@ -427,213 +455,3 @@ tbrackets d = char '<' <> d <> char '>'
tag i
| i < 0 = char 'r' <> int (negate i)
| otherwise = char 'n' <> int i
--------------------------------------------------------------------
-- The linearization code bellow is needed just in order to
-- produce the dependency tree. Unfortunately the bracketed string
-- doesn't give us an easy way to find which part of the string
-- corresponds to which argument of the parent function.
--
-- Uuuuugly!!! I hope that this code will be removed one day.
type LinTable = Array LIndex [BracketedTokn]
linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable]
linTree pgf lang mark e = lin0 [] [] [] Nothing e
where
cnc = lookMap (error "no lang") lang (concretes pgf)
lp = lproductions cnc
lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
lin0 path xs ys mb_fid e = lin path ys mb_fid e []
lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es)
lin path xs mb_fid (ELit l) [] = case l of
LStr s -> return (mark Nothing path (ss s))
LInt n -> return (mark Nothing path (ss (show n)))
LFlt f -> return (mark Nothing path (ss (show f)))
lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es)
lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es
lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es
ss s = listArray (0,0) [[LeafKS [s]]]
apply path xs mb_fid f es =
case Map.lookup f lp of
Just prods -> case lookupProds mb_fid prods of
Just set -> do prod <- Set.toList set
case prod of
PApply funid fids -> do guard (length fids == length es)
args <- sequence (zipWith3 (\i (PArg _ fid) e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
let (CncFun _ lins) = cncfuns cnc ! funid
return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
PCoerce fid -> apply path xs (Just fid) f es
Nothing -> mzero
where
lookupProds (Just fid) prods = IntMap.lookup fid prods
lookupProds Nothing prods = Just (Set.filter isApp (Set.unions (IntMap.elems prods)))
sub i path = i:path
isApp (PApply _ _) = True
isApp _ = False
computeSeq seqid args = concatMap compute (elems seq)
where
seq = sequences cnc ! seqid
compute (SymCat d r) = (args !! d) ! r
compute (SymLit d r) = (args !! d) ! r
compute (SymKS ts) = [LeafKS ts]
compute (SymKP ts alts) = [LeafKP ts alts]
untokn :: [BracketedTokn] -> [String]
untokn ts = case ts of
LeafKP d _ : [] -> d
LeafKP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
LeafKS s : ws -> s ++ untokn ws
[] -> []
where
sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
v:_ -> v
_ -> d
-- show bracketed markup with references to tree structure
markLinearizes :: PGF -> CId -> Expr -> [String]
markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark
where
mark mb_f path lint = amap (bracket mb_f path) lint
bracket Nothing path ts = [LeafKS ["("++show (reverse path)]] ++ ts ++ [LeafKS [")"]]
bracket (Just f) path ts = [LeafKS ["(("++showCId f++","++show (reverse path)++")"]] ++ ts ++ [LeafKS [")"]]
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
graphvizDependencyTree format debug mlab ms pgf lang tr = case format of
"malt" -> unlines (lin2dep format)
"malt_input" -> unlines (lin2dep format)
_ -> concat $ map (++"\n") $ ["digraph {\n"] ++ lin2dep format ++ ["}"]
where
lin2dep format = -- trace (ifd (show sortedNodes ++ show nodeWords)) $
case format of
"malt" -> map (concat . intersperse "\t") wnodes
"malt_input" -> map (concat . intersperse "\t" . take 6) wnodes
_ -> prelude ++ nodes ++ links
ifd s = if debug then s else []
pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang tr
---- use Just str if you have str to match against
prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
nodes = map mkNode nodeWords
mkNode (i,((_,p),ss)) =
node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
((Just f,p),w) <- wlins pot]
links = map mkLink thelinks
thelinks = [(word y, x, label tr y x) |
(_,((f,x),_)) <- tail nodeWords,
let y = dominant x]
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
node = show . show
dominant x = case x of
[] -> x
_ | not (x == hx) -> hx
_ -> dominant (init x)
where
hx = headArg (init x) tr x
headArg x0 tr x = case (unApp tr,x) of
(Just (f,[]),[_]) -> x0 ---- ??
(Just (f,ts),[_]) -> x0 ++ [getHead (length ts - 1) f]
(Just (f,ts),i:y) -> headArg x0 (ts !! i) y
_ -> x0 ----
label tr y x = case span (uncurry (==)) (zip y x) of
(xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
_ -> "" ----
funAt tr x = case (unApp tr,x) of
(Just (f,_) ,[]) -> f
(Just (f,ts),i:y) -> funAt (ts !! i) y
_ -> mkCId (render (ppExpr 0 [] tr)) ----
word x = if elem x sortedNodes then x else
let x' = headArg x tr (x ++[0]) in
if x' == x then [] else word x'
sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
labels = maybe Map.empty id mlab
getHead i f = case Map.lookup f labels of
Just ls -> length $ takeWhile (/= "head") ls
_ -> i
getLabel i f = case Map.lookup f labels of
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
_ -> showCId f ++ "#" ++ show i
-- to generate CoNLL format for MaltParser
nodeMap :: Map.Map [Int] Int
nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
arcMap :: Map.Map [Int] ([Int],String)
arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
lookDomLab p = case Map.lookup p arcMap of
Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
_ -> (0,rootlabel)
wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
(i, ((fun,p),ws)) <- tail nodeWords,
let pos = showCId $ lookValCat (abstract pgf) fun,
let morph = unspec,
let (dom,lab) = lookDomLab p
]
maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
unspec = "_"
rootlabel = "ROOT"
wlins :: PosText -> [((Maybe CId,[Int]),[String])]
wlins pt = case pt of
T p pts -> concatMap (lins p) pts
M ws -> if null ws then [] else [((Nothing,[]),ws)]
where
lins p pt = case pt of
T q pts -> concatMap (lins q) pts
M ws -> if null ws then [] else [(p,ws)]
data PosText =
T (Maybe CId,[Int]) [PosText]
| M [String]
deriving Show
readPosText :: String -> PosText
readPosText = fst . head . (RP.readP_to_S pPosText) where
pPosText = do
RP.char '(' >> RP.skipSpaces
p <- pPos
RP.skipSpaces
ts <- RP.many pPosText
RP.char ')' >> RP.skipSpaces
return (T p ts)
RP.<++ do
ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ')
return (M ws)
pPos = do
fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f))
RP.<++ (return Nothing)
RP.char '[' >> RP.skipSpaces
is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',')
RP.char ']' >> RP.skipSpaces
RP.char ')' RP.<++ return ' '
return (fun,map read is)