mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 03:32:51 -06:00
the generation of dependency trees in the Haskell runtime is now finally working with bracketed strings. This also fixes some errors in the old implementation
This commit is contained in:
@@ -28,7 +28,7 @@ module PGF.VisualizeTree
|
|||||||
, getDepLabels
|
, getDepLabels
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import PGF.CId (CId,showCId,ppCId,pCId,mkCId)
|
import PGF.CId (CId,wildCId,showCId,ppCId,pCId,mkCId)
|
||||||
import PGF.Data
|
import PGF.Data
|
||||||
import PGF.Expr (showExpr, Tree)
|
import PGF.Expr (showExpr, Tree)
|
||||||
import PGF.Linearize
|
import PGF.Linearize
|
||||||
@@ -37,7 +37,7 @@ import PGF.Macros (lookValCat, lookMap,
|
|||||||
|
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
import qualified Data.IntMap as IntMap
|
import qualified Data.IntMap as IntMap
|
||||||
import Data.List (intersperse,nub,isPrefixOf,sort,sortBy)
|
import Data.List (intersperse,nub,mapAccumL)
|
||||||
import Data.Char (isDigit)
|
import Data.Char (isDigit)
|
||||||
import Data.Maybe (fromMaybe)
|
import Data.Maybe (fromMaybe)
|
||||||
import Text.PrettyPrint
|
import Text.PrettyPrint
|
||||||
@@ -116,33 +116,39 @@ graphvizAbstractTree pgf (funs,cats) = render . tree2graph
|
|||||||
|
|
||||||
type Labels = Map.Map CId [String]
|
type Labels = Map.Map CId [String]
|
||||||
|
|
||||||
{- This is an attempt to build the dependency tree from the bracketed string.
|
|
||||||
Unfortunately it doesn't quite work. See the actual implementation at
|
|
||||||
the end of this module.
|
|
||||||
|
|
||||||
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String
|
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String
|
||||||
graphvizDependencyTree format debug mlab ms pgf lang t = render $
|
graphvizDependencyTree format debug mlab ms pgf lang t = render $
|
||||||
case format of
|
case format of
|
||||||
"malt" -> vcat (map (hcat . intersperse (char '\t') ) wnodes)
|
"conll" -> vcat (map (hcat . intersperse (char '\t') ) wnodes)
|
||||||
"malt_input" -> vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
|
"malt_input" -> vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
|
||||||
_ -> text "digraph {" $$
|
_ -> text "digraph {" $$
|
||||||
space $$
|
space $$
|
||||||
nest 2 (text "rankdir=LR ;" $$
|
nest 2 (text "rankdir=RL ;" $$
|
||||||
text "node [shape = plaintext] ;" $$
|
text "node [shape = plaintext] ;" $$
|
||||||
vcat nodes $$
|
vcat nodes $$
|
||||||
vcat links) $$
|
vcat links) $$
|
||||||
text "}"
|
text "}"
|
||||||
where
|
where
|
||||||
nodes = map mkNode leaves
|
nodes = map mkNode leaves
|
||||||
links = map mkLink [(fid, fromMaybe nil (lookup fid deps)) | (fid,_,w) <- tail leaves]
|
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves]
|
||||||
wnodes = undefined
|
|
||||||
|
wnodes = [[int i, maltws ws, ppCId fun, ppCId cat, ppCId cat, unspec, int parent, text lab, unspec, unspec] |
|
||||||
|
((cat,fid,fun),i,ws) <- tail leaves,
|
||||||
|
let (lab,parent) = maybe (dep_lbl,0)
|
||||||
|
(\(lbl,fid) -> (lbl,head [i | ((_,fid1,_),i,_) <- leaves, fid == fid1]))
|
||||||
|
(lookup fid deps)
|
||||||
|
]
|
||||||
|
maltws = text . concat . intersperse "+" . words -- no spaces in column 2
|
||||||
|
|
||||||
nil = -1
|
nil = -1
|
||||||
|
|
||||||
bs = bracketedLinearize pgf lang t
|
bs = bracketedLinearize pgf lang t
|
||||||
|
|
||||||
leaves = (nil,0,"ROOT") : (groupAndIndexIt 1 . getLeaves nil) bs
|
root = (wildCId,nil,wildCId)
|
||||||
deps = getDeps nil [bs]
|
|
||||||
|
leaves = (root,0,root_lbl) : (groupAndIndexIt 1 . getLeaves root) bs
|
||||||
|
deps = let (_,(h,deps)) = getDeps 0 [] t []
|
||||||
|
in (h,(dep_lbl,nil)):deps
|
||||||
|
|
||||||
groupAndIndexIt id [] = []
|
groupAndIndexIt id [] = []
|
||||||
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
|
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
|
||||||
@@ -155,32 +161,54 @@ graphvizDependencyTree format debug mlab ms pgf lang t = render $
|
|||||||
|
|
||||||
getLeaves parent bs =
|
getLeaves parent bs =
|
||||||
case bs of
|
case bs of
|
||||||
Leaf w -> [(parent,w)]
|
Leaf w -> [(parent,w)]
|
||||||
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
|
Bracket cat fid _ fun _ bss -> concatMap (getLeaves (cat,fid,fun)) bss
|
||||||
|
|
||||||
getDeps out_head bss =
|
mkNode ((_,p,_),i,w) =
|
||||||
case selectHead (children bss) of
|
|
||||||
Just ((head, bss'), deps) -> concat (descend out_head head bss' : [descend (headOf head bss') fid bss | (fid,bss) <- IntMap.toList deps])
|
|
||||||
Nothing -> []
|
|
||||||
where
|
|
||||||
descend head fid bss = (fid,head) : getDeps head bss
|
|
||||||
|
|
||||||
headOf head bss
|
|
||||||
| null [() | Leaf _ <- bss] =
|
|
||||||
case selectHead (children bss) of
|
|
||||||
Just ((head, bss), deps) -> headOf head bss
|
|
||||||
Nothing -> head
|
|
||||||
| otherwise = head
|
|
||||||
|
|
||||||
children bss = IntMap.fromListWith (++) [(fid,bss) | Bracket _ fid _ _ bss <- bss]
|
|
||||||
|
|
||||||
selectHead children = IntMap.maxViewWithKey children
|
|
||||||
|
|
||||||
mkNode (p,i,w) =
|
|
||||||
tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi
|
tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi
|
||||||
|
|
||||||
mkLink (x,y) = tag y <+> text "->" <+> tag x -- ++ " [label = \"" ++ l ++ "\"] ;"
|
mkLink (x,(lbl,y)) = tag x <+> text "->" <+> tag y <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;"
|
||||||
-}
|
|
||||||
|
labels = maybe Map.empty id mlab
|
||||||
|
|
||||||
|
getDeps n_fid xs (EAbs _ x e) es = getDeps n_fid (x:xs) e es
|
||||||
|
getDeps n_fid xs (EApp e1 e2) es = getDeps n_fid xs e1 (e2:es)
|
||||||
|
getDeps n_fid xs (EImplArg e) es = getDeps n_fid xs e es
|
||||||
|
getDeps n_fid xs (ETyped e _) es = getDeps n_fid xs e es
|
||||||
|
getDeps n_fid xs (EFun f) es = let (n_fid_1,ds) = descend n_fid xs es
|
||||||
|
(mb_h, deps) = selectHead f ds
|
||||||
|
in case mb_h of
|
||||||
|
Just (fid,deps0) -> (n_fid_1+1,(fid,deps0++
|
||||||
|
[(n_fid_1,(dep_lbl,fid))]++
|
||||||
|
concat [(m,(lbl,fid)):ds | (lbl,(m,ds)) <- deps]))
|
||||||
|
Nothing -> (n_fid_1+1,(n_fid_1,concat [(m,(lbl,n_fid_1)):ds | (lbl,(m,ds)) <- deps]))
|
||||||
|
getDeps n_fid xs (EMeta i) es = (n_fid+2,(n_fid,[]))
|
||||||
|
getDeps n_fid xs (EVar i) _ = (n_fid+2,(n_fid,[]))
|
||||||
|
getDeps n_fid xs (ELit l) [] = (n_fid+1,(n_fid,[]))
|
||||||
|
|
||||||
|
descend n_fid xs es = mapAccumL (\n_fid e -> getDeps n_fid xs e []) n_fid es
|
||||||
|
|
||||||
|
selectHead f ds =
|
||||||
|
case Map.lookup f labels of
|
||||||
|
Just lbls -> extractHead (zip lbls ds)
|
||||||
|
Nothing -> extractLast ds
|
||||||
|
where
|
||||||
|
extractHead [] = (Nothing, [])
|
||||||
|
extractHead (ld@(l,d):lds)
|
||||||
|
| l == head_lbl = (Just d,lds)
|
||||||
|
| otherwise = let (mb_h,deps) = extractHead lds
|
||||||
|
in (mb_h,ld:deps)
|
||||||
|
|
||||||
|
extractLast [] = (Nothing, [])
|
||||||
|
extractLast (d:ds)
|
||||||
|
| null ds = (Just d,[])
|
||||||
|
| otherwise = let (mb_h,deps) = extractLast ds
|
||||||
|
in (mb_h,(dep_lbl,d):deps)
|
||||||
|
|
||||||
|
dep_lbl = "dep"
|
||||||
|
head_lbl = "head"
|
||||||
|
root_lbl = "ROOT"
|
||||||
|
unspec = text "_"
|
||||||
|
|
||||||
getDepLabels :: [String] -> Labels
|
getDepLabels :: [String] -> Labels
|
||||||
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
|
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
|
||||||
@@ -427,213 +455,3 @@ tbrackets d = char '<' <> d <> char '>'
|
|||||||
tag i
|
tag i
|
||||||
| i < 0 = char 'r' <> int (negate i)
|
| i < 0 = char 'r' <> int (negate i)
|
||||||
| otherwise = char 'n' <> int i
|
| otherwise = char 'n' <> int i
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------------
|
|
||||||
-- The linearization code bellow is needed just in order to
|
|
||||||
-- produce the dependency tree. Unfortunately the bracketed string
|
|
||||||
-- doesn't give us an easy way to find which part of the string
|
|
||||||
-- corresponds to which argument of the parent function.
|
|
||||||
--
|
|
||||||
-- Uuuuugly!!! I hope that this code will be removed one day.
|
|
||||||
|
|
||||||
type LinTable = Array LIndex [BracketedTokn]
|
|
||||||
|
|
||||||
|
|
||||||
linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable]
|
|
||||||
linTree pgf lang mark e = lin0 [] [] [] Nothing e
|
|
||||||
where
|
|
||||||
cnc = lookMap (error "no lang") lang (concretes pgf)
|
|
||||||
lp = lproductions cnc
|
|
||||||
|
|
||||||
lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
|
|
||||||
lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
|
|
||||||
lin0 path xs ys mb_fid e = lin path ys mb_fid e []
|
|
||||||
|
|
||||||
lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es)
|
|
||||||
lin path xs mb_fid (ELit l) [] = case l of
|
|
||||||
LStr s -> return (mark Nothing path (ss s))
|
|
||||||
LInt n -> return (mark Nothing path (ss (show n)))
|
|
||||||
LFlt f -> return (mark Nothing path (ss (show f)))
|
|
||||||
lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es)
|
|
||||||
lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es
|
|
||||||
lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es
|
|
||||||
|
|
||||||
ss s = listArray (0,0) [[LeafKS [s]]]
|
|
||||||
|
|
||||||
apply path xs mb_fid f es =
|
|
||||||
case Map.lookup f lp of
|
|
||||||
Just prods -> case lookupProds mb_fid prods of
|
|
||||||
Just set -> do prod <- Set.toList set
|
|
||||||
case prod of
|
|
||||||
PApply funid fids -> do guard (length fids == length es)
|
|
||||||
args <- sequence (zipWith3 (\i (PArg _ fid) e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
|
|
||||||
let (CncFun _ lins) = cncfuns cnc ! funid
|
|
||||||
return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
|
|
||||||
PCoerce fid -> apply path xs (Just fid) f es
|
|
||||||
Nothing -> mzero
|
|
||||||
where
|
|
||||||
lookupProds (Just fid) prods = IntMap.lookup fid prods
|
|
||||||
lookupProds Nothing prods = Just (Set.filter isApp (Set.unions (IntMap.elems prods)))
|
|
||||||
|
|
||||||
sub i path = i:path
|
|
||||||
|
|
||||||
isApp (PApply _ _) = True
|
|
||||||
isApp _ = False
|
|
||||||
|
|
||||||
computeSeq seqid args = concatMap compute (elems seq)
|
|
||||||
where
|
|
||||||
seq = sequences cnc ! seqid
|
|
||||||
|
|
||||||
compute (SymCat d r) = (args !! d) ! r
|
|
||||||
compute (SymLit d r) = (args !! d) ! r
|
|
||||||
compute (SymKS ts) = [LeafKS ts]
|
|
||||||
compute (SymKP ts alts) = [LeafKP ts alts]
|
|
||||||
|
|
||||||
untokn :: [BracketedTokn] -> [String]
|
|
||||||
untokn ts = case ts of
|
|
||||||
LeafKP d _ : [] -> d
|
|
||||||
LeafKP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
|
|
||||||
LeafKS s : ws -> s ++ untokn ws
|
|
||||||
[] -> []
|
|
||||||
where
|
|
||||||
sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
|
|
||||||
v:_ -> v
|
|
||||||
_ -> d
|
|
||||||
|
|
||||||
|
|
||||||
-- show bracketed markup with references to tree structure
|
|
||||||
markLinearizes :: PGF -> CId -> Expr -> [String]
|
|
||||||
markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark
|
|
||||||
where
|
|
||||||
mark mb_f path lint = amap (bracket mb_f path) lint
|
|
||||||
|
|
||||||
bracket Nothing path ts = [LeafKS ["("++show (reverse path)]] ++ ts ++ [LeafKS [")"]]
|
|
||||||
bracket (Just f) path ts = [LeafKS ["(("++showCId f++","++show (reverse path)++")"]] ++ ts ++ [LeafKS [")"]]
|
|
||||||
|
|
||||||
|
|
||||||
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
|
|
||||||
graphvizDependencyTree format debug mlab ms pgf lang tr = case format of
|
|
||||||
"malt" -> unlines (lin2dep format)
|
|
||||||
"malt_input" -> unlines (lin2dep format)
|
|
||||||
_ -> concat $ map (++"\n") $ ["digraph {\n"] ++ lin2dep format ++ ["}"]
|
|
||||||
where
|
|
||||||
|
|
||||||
lin2dep format = -- trace (ifd (show sortedNodes ++ show nodeWords)) $
|
|
||||||
case format of
|
|
||||||
"malt" -> map (concat . intersperse "\t") wnodes
|
|
||||||
"malt_input" -> map (concat . intersperse "\t" . take 6) wnodes
|
|
||||||
_ -> prelude ++ nodes ++ links
|
|
||||||
|
|
||||||
ifd s = if debug then s else []
|
|
||||||
|
|
||||||
pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang tr
|
|
||||||
---- use Just str if you have str to match against
|
|
||||||
|
|
||||||
prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
|
|
||||||
|
|
||||||
nodes = map mkNode nodeWords
|
|
||||||
mkNode (i,((_,p),ss)) =
|
|
||||||
node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
|
|
||||||
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
|
|
||||||
((Just f,p),w) <- wlins pot]
|
|
||||||
|
|
||||||
links = map mkLink thelinks
|
|
||||||
thelinks = [(word y, x, label tr y x) |
|
|
||||||
(_,((f,x),_)) <- tail nodeWords,
|
|
||||||
let y = dominant x]
|
|
||||||
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
|
|
||||||
node = show . show
|
|
||||||
|
|
||||||
dominant x = case x of
|
|
||||||
[] -> x
|
|
||||||
_ | not (x == hx) -> hx
|
|
||||||
_ -> dominant (init x)
|
|
||||||
where
|
|
||||||
hx = headArg (init x) tr x
|
|
||||||
|
|
||||||
headArg x0 tr x = case (unApp tr,x) of
|
|
||||||
(Just (f,[]),[_]) -> x0 ---- ??
|
|
||||||
(Just (f,ts),[_]) -> x0 ++ [getHead (length ts - 1) f]
|
|
||||||
(Just (f,ts),i:y) -> headArg x0 (ts !! i) y
|
|
||||||
_ -> x0 ----
|
|
||||||
|
|
||||||
label tr y x = case span (uncurry (==)) (zip y x) of
|
|
||||||
(xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
|
|
||||||
_ -> "" ----
|
|
||||||
|
|
||||||
funAt tr x = case (unApp tr,x) of
|
|
||||||
(Just (f,_) ,[]) -> f
|
|
||||||
(Just (f,ts),i:y) -> funAt (ts !! i) y
|
|
||||||
_ -> mkCId (render (ppExpr 0 [] tr)) ----
|
|
||||||
|
|
||||||
word x = if elem x sortedNodes then x else
|
|
||||||
let x' = headArg x tr (x ++[0]) in
|
|
||||||
if x' == x then [] else word x'
|
|
||||||
|
|
||||||
sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
|
|
||||||
|
|
||||||
labels = maybe Map.empty id mlab
|
|
||||||
getHead i f = case Map.lookup f labels of
|
|
||||||
Just ls -> length $ takeWhile (/= "head") ls
|
|
||||||
_ -> i
|
|
||||||
getLabel i f = case Map.lookup f labels of
|
|
||||||
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
|
|
||||||
_ -> showCId f ++ "#" ++ show i
|
|
||||||
|
|
||||||
-- to generate CoNLL format for MaltParser
|
|
||||||
nodeMap :: Map.Map [Int] Int
|
|
||||||
nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
|
|
||||||
|
|
||||||
arcMap :: Map.Map [Int] ([Int],String)
|
|
||||||
arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
|
|
||||||
|
|
||||||
lookDomLab p = case Map.lookup p arcMap of
|
|
||||||
Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
|
|
||||||
_ -> (0,rootlabel)
|
|
||||||
|
|
||||||
wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
|
|
||||||
(i, ((fun,p),ws)) <- tail nodeWords,
|
|
||||||
let pos = showCId $ lookValCat (abstract pgf) fun,
|
|
||||||
let morph = unspec,
|
|
||||||
let (dom,lab) = lookDomLab p
|
|
||||||
]
|
|
||||||
maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
|
|
||||||
unspec = "_"
|
|
||||||
rootlabel = "ROOT"
|
|
||||||
|
|
||||||
wlins :: PosText -> [((Maybe CId,[Int]),[String])]
|
|
||||||
wlins pt = case pt of
|
|
||||||
T p pts -> concatMap (lins p) pts
|
|
||||||
M ws -> if null ws then [] else [((Nothing,[]),ws)]
|
|
||||||
where
|
|
||||||
lins p pt = case pt of
|
|
||||||
T q pts -> concatMap (lins q) pts
|
|
||||||
M ws -> if null ws then [] else [(p,ws)]
|
|
||||||
|
|
||||||
data PosText =
|
|
||||||
T (Maybe CId,[Int]) [PosText]
|
|
||||||
| M [String]
|
|
||||||
deriving Show
|
|
||||||
|
|
||||||
readPosText :: String -> PosText
|
|
||||||
readPosText = fst . head . (RP.readP_to_S pPosText) where
|
|
||||||
pPosText = do
|
|
||||||
RP.char '(' >> RP.skipSpaces
|
|
||||||
p <- pPos
|
|
||||||
RP.skipSpaces
|
|
||||||
ts <- RP.many pPosText
|
|
||||||
RP.char ')' >> RP.skipSpaces
|
|
||||||
return (T p ts)
|
|
||||||
RP.<++ do
|
|
||||||
ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ')
|
|
||||||
return (M ws)
|
|
||||||
pPos = do
|
|
||||||
fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f))
|
|
||||||
RP.<++ (return Nothing)
|
|
||||||
RP.char '[' >> RP.skipSpaces
|
|
||||||
is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',')
|
|
||||||
RP.char ']' >> RP.skipSpaces
|
|
||||||
RP.char ')' RP.<++ return ' '
|
|
||||||
return (fun,map read is)
|
|
||||||
|
|||||||
Reference in New Issue
Block a user