forked from GitHub/gf-core
the generation of dependency trees in the Haskell runtime is now finally working with bracketed strings. This also fixes some errors in the old implementation
This commit is contained in:
@@ -28,7 +28,7 @@ module PGF.VisualizeTree
|
||||
, getDepLabels
|
||||
) where
|
||||
|
||||
import PGF.CId (CId,showCId,ppCId,pCId,mkCId)
|
||||
import PGF.CId (CId,wildCId,showCId,ppCId,pCId,mkCId)
|
||||
import PGF.Data
|
||||
import PGF.Expr (showExpr, Tree)
|
||||
import PGF.Linearize
|
||||
@@ -37,7 +37,7 @@ import PGF.Macros (lookValCat, lookMap,
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Data.List (intersperse,nub,isPrefixOf,sort,sortBy)
|
||||
import Data.List (intersperse,nub,mapAccumL)
|
||||
import Data.Char (isDigit)
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Text.PrettyPrint
|
||||
@@ -116,33 +116,39 @@ graphvizAbstractTree pgf (funs,cats) = render . tree2graph
|
||||
|
||||
type Labels = Map.Map CId [String]
|
||||
|
||||
{- This is an attempt to build the dependency tree from the bracketed string.
|
||||
Unfortunately it doesn't quite work. See the actual implementation at
|
||||
the end of this module.
|
||||
|
||||
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String
|
||||
graphvizDependencyTree format debug mlab ms pgf lang t = render $
|
||||
case format of
|
||||
"malt" -> vcat (map (hcat . intersperse (char '\t') ) wnodes)
|
||||
"conll" -> vcat (map (hcat . intersperse (char '\t') ) wnodes)
|
||||
"malt_input" -> vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
|
||||
_ -> text "digraph {" $$
|
||||
space $$
|
||||
nest 2 (text "rankdir=LR ;" $$
|
||||
nest 2 (text "rankdir=RL ;" $$
|
||||
text "node [shape = plaintext] ;" $$
|
||||
vcat nodes $$
|
||||
vcat links) $$
|
||||
text "}"
|
||||
where
|
||||
nodes = map mkNode leaves
|
||||
links = map mkLink [(fid, fromMaybe nil (lookup fid deps)) | (fid,_,w) <- tail leaves]
|
||||
wnodes = undefined
|
||||
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves]
|
||||
|
||||
wnodes = [[int i, maltws ws, ppCId fun, ppCId cat, ppCId cat, unspec, int parent, text lab, unspec, unspec] |
|
||||
((cat,fid,fun),i,ws) <- tail leaves,
|
||||
let (lab,parent) = maybe (dep_lbl,0)
|
||||
(\(lbl,fid) -> (lbl,head [i | ((_,fid1,_),i,_) <- leaves, fid == fid1]))
|
||||
(lookup fid deps)
|
||||
]
|
||||
maltws = text . concat . intersperse "+" . words -- no spaces in column 2
|
||||
|
||||
nil = -1
|
||||
|
||||
bs = bracketedLinearize pgf lang t
|
||||
|
||||
leaves = (nil,0,"ROOT") : (groupAndIndexIt 1 . getLeaves nil) bs
|
||||
deps = getDeps nil [bs]
|
||||
root = (wildCId,nil,wildCId)
|
||||
|
||||
leaves = (root,0,root_lbl) : (groupAndIndexIt 1 . getLeaves root) bs
|
||||
deps = let (_,(h,deps)) = getDeps 0 [] t []
|
||||
in (h,(dep_lbl,nil)):deps
|
||||
|
||||
groupAndIndexIt id [] = []
|
||||
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
|
||||
@@ -155,32 +161,54 @@ graphvizDependencyTree format debug mlab ms pgf lang t = render $
|
||||
|
||||
getLeaves parent bs =
|
||||
case bs of
|
||||
Leaf w -> [(parent,w)]
|
||||
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
|
||||
Leaf w -> [(parent,w)]
|
||||
Bracket cat fid _ fun _ bss -> concatMap (getLeaves (cat,fid,fun)) bss
|
||||
|
||||
getDeps out_head bss =
|
||||
case selectHead (children bss) of
|
||||
Just ((head, bss'), deps) -> concat (descend out_head head bss' : [descend (headOf head bss') fid bss | (fid,bss) <- IntMap.toList deps])
|
||||
Nothing -> []
|
||||
where
|
||||
descend head fid bss = (fid,head) : getDeps head bss
|
||||
|
||||
headOf head bss
|
||||
| null [() | Leaf _ <- bss] =
|
||||
case selectHead (children bss) of
|
||||
Just ((head, bss), deps) -> headOf head bss
|
||||
Nothing -> head
|
||||
| otherwise = head
|
||||
|
||||
children bss = IntMap.fromListWith (++) [(fid,bss) | Bracket _ fid _ _ bss <- bss]
|
||||
|
||||
selectHead children = IntMap.maxViewWithKey children
|
||||
|
||||
mkNode (p,i,w) =
|
||||
mkNode ((_,p,_),i,w) =
|
||||
tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi
|
||||
|
||||
mkLink (x,y) = tag y <+> text "->" <+> tag x -- ++ " [label = \"" ++ l ++ "\"] ;"
|
||||
-}
|
||||
mkLink (x,(lbl,y)) = tag x <+> text "->" <+> tag y <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;"
|
||||
|
||||
labels = maybe Map.empty id mlab
|
||||
|
||||
getDeps n_fid xs (EAbs _ x e) es = getDeps n_fid (x:xs) e es
|
||||
getDeps n_fid xs (EApp e1 e2) es = getDeps n_fid xs e1 (e2:es)
|
||||
getDeps n_fid xs (EImplArg e) es = getDeps n_fid xs e es
|
||||
getDeps n_fid xs (ETyped e _) es = getDeps n_fid xs e es
|
||||
getDeps n_fid xs (EFun f) es = let (n_fid_1,ds) = descend n_fid xs es
|
||||
(mb_h, deps) = selectHead f ds
|
||||
in case mb_h of
|
||||
Just (fid,deps0) -> (n_fid_1+1,(fid,deps0++
|
||||
[(n_fid_1,(dep_lbl,fid))]++
|
||||
concat [(m,(lbl,fid)):ds | (lbl,(m,ds)) <- deps]))
|
||||
Nothing -> (n_fid_1+1,(n_fid_1,concat [(m,(lbl,n_fid_1)):ds | (lbl,(m,ds)) <- deps]))
|
||||
getDeps n_fid xs (EMeta i) es = (n_fid+2,(n_fid,[]))
|
||||
getDeps n_fid xs (EVar i) _ = (n_fid+2,(n_fid,[]))
|
||||
getDeps n_fid xs (ELit l) [] = (n_fid+1,(n_fid,[]))
|
||||
|
||||
descend n_fid xs es = mapAccumL (\n_fid e -> getDeps n_fid xs e []) n_fid es
|
||||
|
||||
selectHead f ds =
|
||||
case Map.lookup f labels of
|
||||
Just lbls -> extractHead (zip lbls ds)
|
||||
Nothing -> extractLast ds
|
||||
where
|
||||
extractHead [] = (Nothing, [])
|
||||
extractHead (ld@(l,d):lds)
|
||||
| l == head_lbl = (Just d,lds)
|
||||
| otherwise = let (mb_h,deps) = extractHead lds
|
||||
in (mb_h,ld:deps)
|
||||
|
||||
extractLast [] = (Nothing, [])
|
||||
extractLast (d:ds)
|
||||
| null ds = (Just d,[])
|
||||
| otherwise = let (mb_h,deps) = extractLast ds
|
||||
in (mb_h,(dep_lbl,d):deps)
|
||||
|
||||
dep_lbl = "dep"
|
||||
head_lbl = "head"
|
||||
root_lbl = "ROOT"
|
||||
unspec = text "_"
|
||||
|
||||
getDepLabels :: [String] -> Labels
|
||||
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
|
||||
@@ -427,213 +455,3 @@ tbrackets d = char '<' <> d <> char '>'
|
||||
tag i
|
||||
| i < 0 = char 'r' <> int (negate i)
|
||||
| otherwise = char 'n' <> int i
|
||||
|
||||
|
||||
|
||||
--------------------------------------------------------------------
|
||||
-- The linearization code bellow is needed just in order to
|
||||
-- produce the dependency tree. Unfortunately the bracketed string
|
||||
-- doesn't give us an easy way to find which part of the string
|
||||
-- corresponds to which argument of the parent function.
|
||||
--
|
||||
-- Uuuuugly!!! I hope that this code will be removed one day.
|
||||
|
||||
type LinTable = Array LIndex [BracketedTokn]
|
||||
|
||||
|
||||
linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable]
|
||||
linTree pgf lang mark e = lin0 [] [] [] Nothing e
|
||||
where
|
||||
cnc = lookMap (error "no lang") lang (concretes pgf)
|
||||
lp = lproductions cnc
|
||||
|
||||
lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
|
||||
lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
|
||||
lin0 path xs ys mb_fid e = lin path ys mb_fid e []
|
||||
|
||||
lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es)
|
||||
lin path xs mb_fid (ELit l) [] = case l of
|
||||
LStr s -> return (mark Nothing path (ss s))
|
||||
LInt n -> return (mark Nothing path (ss (show n)))
|
||||
LFlt f -> return (mark Nothing path (ss (show f)))
|
||||
lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es)
|
||||
lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es
|
||||
lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es
|
||||
|
||||
ss s = listArray (0,0) [[LeafKS [s]]]
|
||||
|
||||
apply path xs mb_fid f es =
|
||||
case Map.lookup f lp of
|
||||
Just prods -> case lookupProds mb_fid prods of
|
||||
Just set -> do prod <- Set.toList set
|
||||
case prod of
|
||||
PApply funid fids -> do guard (length fids == length es)
|
||||
args <- sequence (zipWith3 (\i (PArg _ fid) e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
|
||||
let (CncFun _ lins) = cncfuns cnc ! funid
|
||||
return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
|
||||
PCoerce fid -> apply path xs (Just fid) f es
|
||||
Nothing -> mzero
|
||||
where
|
||||
lookupProds (Just fid) prods = IntMap.lookup fid prods
|
||||
lookupProds Nothing prods = Just (Set.filter isApp (Set.unions (IntMap.elems prods)))
|
||||
|
||||
sub i path = i:path
|
||||
|
||||
isApp (PApply _ _) = True
|
||||
isApp _ = False
|
||||
|
||||
computeSeq seqid args = concatMap compute (elems seq)
|
||||
where
|
||||
seq = sequences cnc ! seqid
|
||||
|
||||
compute (SymCat d r) = (args !! d) ! r
|
||||
compute (SymLit d r) = (args !! d) ! r
|
||||
compute (SymKS ts) = [LeafKS ts]
|
||||
compute (SymKP ts alts) = [LeafKP ts alts]
|
||||
|
||||
untokn :: [BracketedTokn] -> [String]
|
||||
untokn ts = case ts of
|
||||
LeafKP d _ : [] -> d
|
||||
LeafKP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
|
||||
LeafKS s : ws -> s ++ untokn ws
|
||||
[] -> []
|
||||
where
|
||||
sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
|
||||
v:_ -> v
|
||||
_ -> d
|
||||
|
||||
|
||||
-- show bracketed markup with references to tree structure
|
||||
markLinearizes :: PGF -> CId -> Expr -> [String]
|
||||
markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark
|
||||
where
|
||||
mark mb_f path lint = amap (bracket mb_f path) lint
|
||||
|
||||
bracket Nothing path ts = [LeafKS ["("++show (reverse path)]] ++ ts ++ [LeafKS [")"]]
|
||||
bracket (Just f) path ts = [LeafKS ["(("++showCId f++","++show (reverse path)++")"]] ++ ts ++ [LeafKS [")"]]
|
||||
|
||||
|
||||
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
|
||||
graphvizDependencyTree format debug mlab ms pgf lang tr = case format of
|
||||
"malt" -> unlines (lin2dep format)
|
||||
"malt_input" -> unlines (lin2dep format)
|
||||
_ -> concat $ map (++"\n") $ ["digraph {\n"] ++ lin2dep format ++ ["}"]
|
||||
where
|
||||
|
||||
lin2dep format = -- trace (ifd (show sortedNodes ++ show nodeWords)) $
|
||||
case format of
|
||||
"malt" -> map (concat . intersperse "\t") wnodes
|
||||
"malt_input" -> map (concat . intersperse "\t" . take 6) wnodes
|
||||
_ -> prelude ++ nodes ++ links
|
||||
|
||||
ifd s = if debug then s else []
|
||||
|
||||
pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang tr
|
||||
---- use Just str if you have str to match against
|
||||
|
||||
prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
|
||||
|
||||
nodes = map mkNode nodeWords
|
||||
mkNode (i,((_,p),ss)) =
|
||||
node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
|
||||
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
|
||||
((Just f,p),w) <- wlins pot]
|
||||
|
||||
links = map mkLink thelinks
|
||||
thelinks = [(word y, x, label tr y x) |
|
||||
(_,((f,x),_)) <- tail nodeWords,
|
||||
let y = dominant x]
|
||||
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
|
||||
node = show . show
|
||||
|
||||
dominant x = case x of
|
||||
[] -> x
|
||||
_ | not (x == hx) -> hx
|
||||
_ -> dominant (init x)
|
||||
where
|
||||
hx = headArg (init x) tr x
|
||||
|
||||
headArg x0 tr x = case (unApp tr,x) of
|
||||
(Just (f,[]),[_]) -> x0 ---- ??
|
||||
(Just (f,ts),[_]) -> x0 ++ [getHead (length ts - 1) f]
|
||||
(Just (f,ts),i:y) -> headArg x0 (ts !! i) y
|
||||
_ -> x0 ----
|
||||
|
||||
label tr y x = case span (uncurry (==)) (zip y x) of
|
||||
(xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
|
||||
_ -> "" ----
|
||||
|
||||
funAt tr x = case (unApp tr,x) of
|
||||
(Just (f,_) ,[]) -> f
|
||||
(Just (f,ts),i:y) -> funAt (ts !! i) y
|
||||
_ -> mkCId (render (ppExpr 0 [] tr)) ----
|
||||
|
||||
word x = if elem x sortedNodes then x else
|
||||
let x' = headArg x tr (x ++[0]) in
|
||||
if x' == x then [] else word x'
|
||||
|
||||
sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
|
||||
|
||||
labels = maybe Map.empty id mlab
|
||||
getHead i f = case Map.lookup f labels of
|
||||
Just ls -> length $ takeWhile (/= "head") ls
|
||||
_ -> i
|
||||
getLabel i f = case Map.lookup f labels of
|
||||
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
|
||||
_ -> showCId f ++ "#" ++ show i
|
||||
|
||||
-- to generate CoNLL format for MaltParser
|
||||
nodeMap :: Map.Map [Int] Int
|
||||
nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
|
||||
|
||||
arcMap :: Map.Map [Int] ([Int],String)
|
||||
arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
|
||||
|
||||
lookDomLab p = case Map.lookup p arcMap of
|
||||
Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
|
||||
_ -> (0,rootlabel)
|
||||
|
||||
wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
|
||||
(i, ((fun,p),ws)) <- tail nodeWords,
|
||||
let pos = showCId $ lookValCat (abstract pgf) fun,
|
||||
let morph = unspec,
|
||||
let (dom,lab) = lookDomLab p
|
||||
]
|
||||
maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
|
||||
unspec = "_"
|
||||
rootlabel = "ROOT"
|
||||
|
||||
wlins :: PosText -> [((Maybe CId,[Int]),[String])]
|
||||
wlins pt = case pt of
|
||||
T p pts -> concatMap (lins p) pts
|
||||
M ws -> if null ws then [] else [((Nothing,[]),ws)]
|
||||
where
|
||||
lins p pt = case pt of
|
||||
T q pts -> concatMap (lins q) pts
|
||||
M ws -> if null ws then [] else [(p,ws)]
|
||||
|
||||
data PosText =
|
||||
T (Maybe CId,[Int]) [PosText]
|
||||
| M [String]
|
||||
deriving Show
|
||||
|
||||
readPosText :: String -> PosText
|
||||
readPosText = fst . head . (RP.readP_to_S pPosText) where
|
||||
pPosText = do
|
||||
RP.char '(' >> RP.skipSpaces
|
||||
p <- pPos
|
||||
RP.skipSpaces
|
||||
ts <- RP.many pPosText
|
||||
RP.char ')' >> RP.skipSpaces
|
||||
return (T p ts)
|
||||
RP.<++ do
|
||||
ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ')
|
||||
return (M ws)
|
||||
pPos = do
|
||||
fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f))
|
||||
RP.<++ (return Nothing)
|
||||
RP.char '[' >> RP.skipSpaces
|
||||
is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',')
|
||||
RP.char ']' >> RP.skipSpaces
|
||||
RP.char ')' RP.<++ return ' '
|
||||
return (fun,map read is)
|
||||
|
||||
Reference in New Issue
Block a user