1
0
forked from GitHub/gf-core

I switched back to the old algorithm for generating dependency trees. This required an ugly hack but there is no easy and quick other way :-(

This commit is contained in:
krasimir
2010-05-29 12:49:54 +00:00
parent 8b8f9ed1c2
commit 540ca4224e

View File

@@ -24,11 +24,11 @@ module PGF.VisualizeTree
, getDepLabels , getDepLabels
) where ) where
import PGF.CId (CId,showCId,ppCId,mkCId) import PGF.CId (CId,showCId,ppCId,pCId,mkCId)
import PGF.Data import PGF.Data
import PGF.Expr (showExpr, Tree) import PGF.Expr (showExpr, Tree)
import PGF.Linearize import PGF.Linearize
import PGF.Macros (lookValCat, BracketedString(..), flattenBracketedString) import PGF.Macros (lookValCat, lookMap, _B, _V, BracketedString(..), flattenBracketedString)
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.IntMap as IntMap import qualified Data.IntMap as IntMap
@@ -37,6 +37,12 @@ import Data.Char (isDigit)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Text.PrettyPrint import Text.PrettyPrint
import Data.Array.IArray
import Control.Monad
import qualified Data.Set as Set
import qualified Text.ParserCombinators.ReadP as RP
-- | Renders abstract syntax tree in Graphviz format -- | Renders abstract syntax tree in Graphviz format
graphvizAbstractTree :: PGF -> (Bool,Bool) -> Tree -> String graphvizAbstractTree :: PGF -> (Bool,Bool) -> Tree -> String
graphvizAbstractTree pgf (funs,cats) = render . tree2graph graphvizAbstractTree pgf (funs,cats) = render . tree2graph
@@ -90,6 +96,10 @@ graphvizAbstractTree pgf (funs,cats) = render . tree2graph
type Labels = Map.Map CId [String] type Labels = Map.Map CId [String]
{- This is an attempt to build the dependency tree from the bracketed string.
Unfortunately it doesn't quite work. See the actual implementation at
the end of this module.
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String
graphvizDependencyTree format debug mlab ms pgf lang t = render $ graphvizDependencyTree format debug mlab ms pgf lang t = render $
case format of case format of
@@ -129,27 +139,28 @@ graphvizDependencyTree format debug mlab ms pgf lang t = render $
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
getDeps out_head bss = getDeps out_head bss =
case IntMap.maxViewWithKey children of case selectHead (children bss) of
Just ((head, bss'), deps) -> concat (descend out_head head bss' : [descend (headOf head bss') fid bss | (fid,bss) <- IntMap.toList deps]) Just ((head, bss'), deps) -> concat (descend out_head head bss' : [descend (headOf head bss') fid bss | (fid,bss) <- IntMap.toList deps])
Nothing -> [] Nothing -> []
where where
children = IntMap.fromListWith (++) [(fid,bss) | Bracket _ fid _ _ bss <- bss]
descend head fid bss = (fid,head) : getDeps head bss descend head fid bss = (fid,head) : getDeps head bss
headOf head bss headOf head bss
| null [() | Leaf _ <- bss] = | null [() | Leaf _ <- bss] =
case IntMap.maxViewWithKey children of case selectHead (children bss) of
Just ((head, bss), deps) -> headOf head bss Just ((head, bss), deps) -> headOf head bss
Nothing -> head Nothing -> head
| otherwise = head | otherwise = head
where
children = IntMap.fromListWith (++) [(fid,bss) | Bracket _ fid _ _ bss <- bss] children bss = IntMap.fromListWith (++) [(fid,bss) | Bracket _ fid _ _ bss <- bss]
selectHead children = IntMap.maxViewWithKey children
mkNode (p,i,w) = mkNode (p,i,w) =
tag p <> text " [label = " <> doubleQuotes (int i <> char '.' <+> text w) <> text "] ;" tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi
mkLink (x,y) = tag y <+> text "->" <+> tag x -- ++ " [label = \"" ++ l ++ "\"] ;" mkLink (x,y) = tag y <+> text "->" <+> tag x -- ++ " [label = \"" ++ l ++ "\"] ;"
-}
getDepLabels :: [String] -> Labels getDepLabels :: [String] -> Labels
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss] getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
@@ -253,3 +264,220 @@ tbrackets d = char '<' <> d <> char '>'
tag i tag i
| i < 0 = char 'r' <> int (negate i) | i < 0 = char 'r' <> int (negate i)
| otherwise = char 'n' <> int i | otherwise = char 'n' <> int i
--------------------------------------------------------------------
-- The linearization code bellow is needed just in order to
-- produce the dependency tree. Unfortunately the bracketed string
-- doesn't give us an easy way to find which part of the string
-- corresponds to which argument of the parent function.
--
-- Uuuuugly!!! I hope that this code will be removed one day.
type LinTable = Array LIndex [Tokn]
linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable]
linTree pgf lang mark e = lin0 [] [] [] Nothing e
where
cnc = lookMap (error "no lang") lang (concretes pgf)
lp = lproductions cnc
lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
lin0 path xs ys mb_fid e | null xs = lin path ys mb_fid e []
| otherwise = apply path (xs ++ ys) mb_fid _B (e:[ELit (LStr x) | x <- xs])
lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es)
lin path xs mb_fid (ELit l) [] = case l of
LStr s -> return (mark Nothing path (ss s))
LInt n -> return (mark Nothing path (ss (show n)))
LFlt f -> return (mark Nothing path (ss (show f)))
lin path xs mb_fid (EMeta i) es = apply path xs mb_fid _V (ELit (LStr ('?':show i)):es)
lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es)
lin path xs mb_fid (EVar i) es = apply path xs mb_fid _V (ELit (LStr (xs !! i)) :es)
lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es
lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es
ss s = listArray (0,0) [[KS s]]
apply path xs mb_fid f es =
case Map.lookup f lp of
Just prods -> case lookupProds mb_fid prods of
Just set -> do prod <- Set.toList set
case prod of
PApply funid fids -> do guard (length fids == length es)
args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
let (CncFun _ lins) = cncfuns cnc ! funid
return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
PCoerce fid -> apply path xs (Just fid) f es
Nothing -> mzero
Nothing -> apply path xs mb_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin
where
lookupProds (Just fid) prods = IntMap.lookup fid prods
lookupProds Nothing prods
| f == _B || f == _V = Nothing
| otherwise = Just (Set.filter isApp (Set.unions (IntMap.elems prods)))
sub i path
| f == _B || f == _V = path
| otherwise = i:path
isApp (PApply _ _) = True
isApp _ = False
computeSeq seqid args = concatMap compute (elems seq)
where
seq = sequences cnc ! seqid
compute (SymCat d r) = (args !! d) ! r
compute (SymLit d r) = (args !! d) ! r
compute (SymKS ts) = map KS ts
compute (SymKP ts alts) = [KP ts alts]
untokn :: [Tokn] -> [String]
untokn ts = case ts of
KP d _ : [] -> d
KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
KS s : ws -> s : untokn ws
[] -> []
where
sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
v:_ -> v
_ -> d
-- show bracketed markup with references to tree structure
markLinearizes :: PGF -> CId -> Expr -> [String]
markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark
where
mark mb_f path lint = amap (bracket mb_f path) lint
bracket Nothing path ts = [KS ("("++show (reverse path))] ++ ts ++ [KS ")"]
bracket (Just f) path ts = [KS ("(("++showCId f++","++show (reverse path)++")")] ++ ts ++ [KS ")"]
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
graphvizDependencyTree format debug mlab ms pgf lang tr = case format of
"malt" -> unlines (lin2dep format)
"malt_input" -> unlines (lin2dep format)
_ -> concat $ map (++"\n") $ ["digraph {\n"] ++ lin2dep format ++ ["}"]
where
lin2dep format = -- trace (ifd (show sortedNodes ++ show nodeWords)) $
case format of
"malt" -> map (concat . intersperse "\t") wnodes
"malt_input" -> map (concat . intersperse "\t" . take 6) wnodes
_ -> prelude ++ nodes ++ links
ifd s = if debug then s else []
pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang tr
---- use Just str if you have str to match against
prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
nodes = map mkNode nodeWords
mkNode (i,((_,p),ss)) =
node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
((Just f,p),w) <- wlins pot]
links = map mkLink thelinks
thelinks = [(word y, x, label tr y x) |
(_,((f,x),_)) <- tail nodeWords,
let y = dominant x]
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
node = show . show
dominant x = case x of
[] -> x
_ | not (x == hx) -> hx
_ -> dominant (init x)
where
hx = headArg (init x) tr x
headArg x0 tr x = case (unApp tr,x) of
(Just (f,[]),[_]) -> x0 ---- ??
(Just (f,ts),[_]) -> x0 ++ [getHead (length ts - 1) f]
(Just (f,ts),i:y) -> headArg x0 (ts !! i) y
_ -> x0 ----
label tr y x = case span (uncurry (==)) (zip y x) of
(xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
_ -> "" ----
funAt tr x = case (unApp tr,x) of
(Just (f,_) ,[]) -> f
(Just (f,ts),i:y) -> funAt (ts !! i) y
_ -> mkCId (render (ppExpr 0 [] tr)) ----
word x = if elem x sortedNodes then x else
let x' = headArg x tr (x ++[0]) in
if x' == x then [] else word x'
sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
labels = maybe Map.empty id mlab
getHead i f = case Map.lookup f labels of
Just ls -> length $ takeWhile (/= "head") ls
_ -> i
getLabel i f = case Map.lookup f labels of
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
_ -> showCId f ++ "#" ++ show i
-- to generate CoNLL format for MaltParser
nodeMap :: Map.Map [Int] Int
nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
arcMap :: Map.Map [Int] ([Int],String)
arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
lookDomLab p = case Map.lookup p arcMap of
Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
_ -> (0,rootlabel)
wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
(i, ((fun,p),ws)) <- tail nodeWords,
let pos = showCId $ lookValCat pgf fun,
let morph = unspec,
let (dom,lab) = lookDomLab p
]
maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
unspec = "_"
rootlabel = "ROOT"
wlins :: PosText -> [((Maybe CId,[Int]),[String])]
wlins pt = case pt of
T p pts -> concatMap (lins p) pts
M ws -> if null ws then [] else [((Nothing,[]),ws)]
where
lins p pt = case pt of
T q pts -> concatMap (lins q) pts
M ws -> if null ws then [] else [(p,ws)]
data PosText =
T (Maybe CId,[Int]) [PosText]
| M [String]
deriving Show
readPosText :: String -> PosText
readPosText = fst . head . (RP.readP_to_S pPosText) where
pPosText = do
RP.char '(' >> RP.skipSpaces
p <- pPos
RP.skipSpaces
ts <- RP.many pPosText
RP.char ')' >> RP.skipSpaces
return (T p ts)
RP.<++ do
ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ')
return (M ws)
pPos = do
fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f))
RP.<++ (return Nothing)
RP.char '[' >> RP.skipSpaces
is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',')
RP.char ']' >> RP.skipSpaces
RP.char ')' RP.<++ return ' '
return (fun,map read is)