now every BracketedString also has reference to the source expression(s)

This commit is contained in:
krasimir
2010-05-19 13:32:39 +00:00
parent 1743e88192
commit e0dc9c80a6
5 changed files with 88 additions and 52 deletions

View File

@@ -14,6 +14,7 @@
module PGF.Forest( Forest(..)
, BracketedString, showBracketedString, lengthBracketedString
, linearizeWithBrackets
, foldForest
) where
import PGF.CId
@@ -26,6 +27,7 @@ import qualified Data.Map as Map
import qualified Data.IntSet as IntSet
import qualified Data.IntMap as IntMap
import Control.Monad
import GF.Data.SortedList
data Forest
= Forest
@@ -48,11 +50,11 @@ linearizeWithBrackets = head . snd . untokn "" . bracketedTokn
--
bracketedTokn :: Forest -> BracketedTokn
bracketedTokn (Forest abs cnc forest root) =
bracketedTokn f@(Forest abs cnc forest root) =
case [computeSeq seq (map (render IntMap.empty) args) | (seq,args) <- root] of
([bs@(Bracket_ cat fid label lin)]:_) -> bs
(bss:_) -> Bracket_ wildCId 0 0 bss
[] -> Bracket_ wildCId 0 0 []
([bs@(Bracket_ _ _ _ _ _)]:_) -> bs
(bss:_) -> Bracket_ wildCId 0 0 [] bss
[] -> Bracket_ wildCId 0 0 [] []
where
trusted = foldl1 IntSet.intersection [IntSet.unions (map (trustedSpots IntSet.empty) args) | (_,args) <- root]
@@ -97,8 +99,56 @@ bracketedTokn (Forest abs cnc forest root) =
getArg d r
| not (null arg_lin) &&
IntSet.member fid trusted
= [Bracket_ cat fid r arg_lin]
= [Bracket_ cat fid r es arg_lin]
| otherwise = arg_lin
where
arg_lin = lin ! r
(fid,cat,lin) = args !! d
es = getAbsTrees f fid
-- | This function extracts the list of all completed parse trees
-- that spans the whole input consumed so far. The trees are also
-- limited by the category specified, which is usually
-- the same as the startup category.
getAbsTrees :: Forest -> FId -> [Expr]
getAbsTrees (Forest abs cnc forest root) fid =
nubsort $ do (fvs,e) <- go Set.empty 0 (0,fid)
guard (Set.null fvs)
return e
where
go rec fcat' (d,fcat)
| fcat < totalCats cnc = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
| Set.member fcat rec = mzero
| otherwise = foldForest (\funid args trees ->
do let CncFun fn lins = cncfuns cnc ! funid
args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
check_ho_fun fn args
`mplus`
trees)
(\const _ trees ->
return (freeVar const,const)
`mplus`
trees)
[] fcat forest
check_ho_fun fun args
| fun == _V = return (head args)
| fun == _B = return (foldl1 Set.difference (map fst args), foldr (\x e -> EAbs Explicit (mkVar (snd x)) e) (snd (head args)) (tail args))
| otherwise = return (Set.unions (map fst args),foldl (\e x -> EApp e (snd x)) (EFun fun) args)
mkVar (EFun v) = v
mkVar (EMeta _) = wildCId
freeVar (EFun v) = Set.singleton v
freeVar _ = Set.empty
foldForest :: (FunId -> [FId] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FId -> IntMap.IntMap (Set.Set Production) -> b
foldForest f g b fcat forest =
case IntMap.lookup fcat forest of
Nothing -> b
Just set -> Set.fold foldProd b set
where
foldProd (PCoerce fcat) b = foldForest f g b fcat forest
foldProd (PApply funid args) b = f funid args b
foldProd (PConst _ const toks) b = g const toks b

View File

@@ -63,7 +63,7 @@ type CncType = (CId, FId) -- concrete type is the abstract type (the category
linTree :: PGF -> Language -> Expr -> [Array LIndex BracketedTokn]
linTree pgf lang e =
[amapWithIndex (\label -> Bracket_ cat fid label) lin | (_,((cat,fid),lin)) <- lin0 [] [] Nothing 0 e]
[amapWithIndex (\label -> Bracket_ cat fid label [e]) lin | (_,((cat,fid),e,lin)) <- lin0 [] [] Nothing 0 e]
where
cnc = lookMap (error "no lang") lang (concretes pgf)
lp = lproductions cnc
@@ -74,26 +74,26 @@ linTree pgf lang e =
| otherwise = apply (xs ++ ys) mb_cty n_fid _B (e:[ELit (LStr x) | x <- xs])
lin xs mb_cty n_fid (EApp e1 e2) es = lin xs mb_cty n_fid e1 (e2:es)
lin xs mb_cty n_fid (ELit l) [] = case l of
LStr s -> return (n_fid+1,((cidString,n_fid),ss s))
LInt n -> return (n_fid+1,((cidInt, n_fid),ss (show n)))
LFlt f -> return (n_fid+1,((cidFloat, n_fid),ss (show f)))
lin xs mb_cty n_fid e@(ELit l) [] = case l of
LStr s -> return (n_fid+1,((cidString,n_fid),e,ss s))
LInt n -> return (n_fid+1,((cidInt, n_fid),e,ss (show n)))
LFlt f -> return (n_fid+1,((cidFloat, n_fid),e,ss (show f)))
lin xs mb_cty n_fid (EMeta i) es = apply xs mb_cty n_fid _V (ELit (LStr ('?':show i)):es)
lin xs mb_cty n_fid (EFun f) es = apply xs mb_cty n_fid f es
lin xs mb_cty n_fid (EVar i) es = apply xs mb_cty n_fid _V (ELit (LStr (xs !! i)) :es)
lin xs mb_cty n_fid (ETyped e _) es = lin xs mb_cty n_fid e es
lin xs mb_cty n_fid (EImplArg e) es = lin xs mb_cty n_fid e es
lin xs mb_cty n_fid (ETyped e _) es = lin xs mb_cty n_fid e es
lin xs mb_cty n_fid (EImplArg e) es = lin xs mb_cty n_fid e es
ss s = listArray (0,0) [[LeafKS [s]]]
apply :: [String] -> Maybe CncType -> FId -> CId -> [Expr] -> [(FId,(CncType, LinTable))]
apply :: [String] -> Maybe CncType -> FId -> CId -> [Expr] -> [(FId,(CncType, Expr, LinTable))]
apply xs mb_cty n_fid f es =
case Map.lookup f lp of
Just prods -> do (funid,(cat,fid),ctys) <- getApps prods
guard (length ctys == length es)
(n_fid,args) <- descend n_fid (zip ctys es)
let (CncFun _ lins) = cncfuns cnc ! funid
return (n_fid+1,((cat,n_fid),listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins]))
return (n_fid+1,((cat,n_fid),undefined,listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins]))
Nothing -> apply xs mb_cty n_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin
where
getApps prods =
@@ -116,7 +116,7 @@ linTree pgf lang e =
(n_fid,args) <- descend n_fid fes
return (n_fid,arg:args)
computeSeq :: SeqId -> [(CncType,LinTable)] -> [BracketedTokn]
computeSeq :: SeqId -> [(CncType,Expr,LinTable)] -> [BracketedTokn]
computeSeq seqid args = concatMap compute (elems seq)
where
seq = sequences cnc ! seqid
@@ -127,11 +127,11 @@ linTree pgf lang e =
compute (SymKP ts alts) = [LeafKP ts alts]
getArg d r
| not (null arg_lin) = [Bracket_ cat fid r arg_lin]
| not (null arg_lin) = [Bracket_ cat fid r [e] arg_lin]
| otherwise = arg_lin
where
arg_lin = lin ! r
((cat,fid),lin) = args !! d
arg_lin = lin ! r
((cat,fid),e,lin) = args !! d
amapWithIndex :: (IArray a e1, IArray a e2, Ix i) => (i -> e1 -> e2) -> a i e1 -> a i e2
amapWithIndex f arr = listArray (bounds arr) (map (uncurry f) (assocs arr))

View File

@@ -212,7 +212,8 @@ updateProductionIndices pgf = pgf{ concretes = fmap updateConcrete (concretes pg
-- mark the beginning and the end of each constituent.
data BracketedString
= Leaf String -- ^ this is the leaf i.e. a single token
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [BracketedString] -- ^ this is a bracket. The 'CId' is the category of
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [Expr] [BracketedString]
-- ^ this is a bracket. The 'CId' is the category of
-- the phrase. The 'FId' is an unique identifier for
-- every phrase in the sentence. For context-free grammars
-- i.e. without discontinuous constituents this identifier
@@ -227,7 +228,7 @@ data BracketedString
data BracketedTokn
= LeafKS [String]
| LeafKP [String] [Alternative]
| Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [BracketedTokn] -- Invariant: the list is not empty
| Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [Expr] [BracketedTokn] -- Invariant: the list is not empty
type LinTable = Array.Array LIndex [BracketedTokn]
@@ -238,12 +239,12 @@ showBracketedString :: BracketedString -> String
showBracketedString = render . ppBracketedString
ppBracketedString (Leaf t) = text t
ppBracketedString (Bracket cat fcat index bss) = parens (ppCId cat <+> hsep (map ppBracketedString bss))
ppBracketedString (Bracket cat fcat index _ bss) = parens (ppCId cat <+> hsep (map ppBracketedString bss))
-- | The length of the bracketed string in number of tokens.
lengthBracketedString :: BracketedString -> Int
lengthBracketedString (Leaf _) = 1
lengthBracketedString (Bracket _ _ _ bss) = sum (map lengthBracketedString bss)
lengthBracketedString (Leaf _) = 1
lengthBracketedString (Bracket _ _ _ _ bss) = sum (map lengthBracketedString bss)
untokn :: String -> BracketedTokn -> (String,[BracketedString])
untokn nw (LeafKS ts) = (head ts,map Leaf ts)
@@ -254,10 +255,10 @@ untokn nw (LeafKP d vs) = let ts = sel d vs nw
case [v | Alt v cs <- vs, any (\c -> isPrefixOf c nw) cs] of
v:_ -> v
_ -> d
untokn nw (Bracket_ cat fid index bss) =
untokn nw (Bracket_ cat fid index es bss) =
let (nw',bss') = mapAccumR untokn nw bss
in (nw',[Bracket cat fid index (concat bss')])
in (nw',[Bracket cat fid index es (concat bss')])
flattenBracketedString :: BracketedString -> [String]
flattenBracketedString (Leaf w) = [w]
flattenBracketedString (Bracket _ _ _ bss) = concatMap flattenBracketedString bss
flattenBracketedString (Leaf w) = [w]
flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss

View File

@@ -27,7 +27,7 @@ import PGF.Data
import PGF.Expr(Tree)
import PGF.Macros
import PGF.TypeCheck
import PGF.Forest(Forest(Forest), linearizeWithBrackets)
import PGF.Forest(Forest(Forest), linearizeWithBrackets, foldForest)
-- | This data type encodes the different outcomes which you could get from the parser.
data ParseResult
@@ -379,21 +379,6 @@ insertPC :: PassiveKey -> FId -> PassiveChart -> PassiveChart
insertPC key fcat chart = Map.insert key fcat chart
----------------------------------------------------------------
-- Forest
----------------------------------------------------------------
foldForest :: (FunId -> [FId] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FId -> IntMap.IntMap (Set.Set Production) -> b
foldForest f g b fcat forest =
case IntMap.lookup fcat forest of
Nothing -> b
Just set -> Set.fold foldProd b set
where
foldProd (PCoerce fcat) b = foldForest f g b fcat forest
foldProd (PApply funid args) b = f funid args b
foldProd (PConst _ const toks) b = g const toks b
----------------------------------------------------------------
-- Parse State
----------------------------------------------------------------

View File

@@ -22,7 +22,7 @@ module PGF.VisualizeTree
, graphvizBracketedString
, graphvizAlignment
, getDepLabels
) where
) where
import PGF.CId (CId,showCId,ppCId,mkCId)
import PGF.Data
@@ -122,8 +122,8 @@ graphvizDependencyTree format debug mlab ms pgf lang t = render $
getLeaves parent bs =
case bs of
Leaf w -> [(parent,w)]
Bracket _ fid _ bss -> concatMap (getLeaves fid) bss
Leaf w -> [(parent,w)]
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
mkNode (p,i,w) =
tag p <> text " [label = " <> doubleQuotes (int i <> char '.' <+> text w) <> text "] ;"
@@ -234,13 +234,13 @@ graphvizBracketedString = render . lin2tree
getLeaves level parent bs =
case bs of
Leaf w -> [(level-1,parent,w)]
Bracket _ fid i bss -> concatMap (getLeaves (level+1) fid) bss
Leaf w -> [(level-1,parent,w)]
Bracket _ fid i _ bss -> concatMap (getLeaves (level+1) fid) bss
getInterns level [] = []
getInterns level nodes =
nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _) <- nodes] :
getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ children) <- nodes, child <- children]
nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _ _) <- nodes] :
getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ _ children) <- nodes, child <- children]
mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
vcat [link pl pid l id | (pl,pid,id,_) <- cs]
@@ -290,8 +290,8 @@ graphvizAlignment pgf langs = render . lin2graph . linsBracketed
getLeaves parent bs =
case bs of
Leaf w -> [(parent,w)]
Bracket _ fid _ bss -> concatMap (getLeaves fid) bss
Leaf w -> [(parent,w)]
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
mkLayers l [] = empty
mkLayers l (cs:css) = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$