now every BracketedString also has reference to the source expression(s)

This commit is contained in:
krasimir
2010-05-19 13:32:39 +00:00
parent 1743e88192
commit e0dc9c80a6
5 changed files with 88 additions and 52 deletions

View File

@@ -14,6 +14,7 @@
module PGF.Forest( Forest(..) module PGF.Forest( Forest(..)
, BracketedString, showBracketedString, lengthBracketedString , BracketedString, showBracketedString, lengthBracketedString
, linearizeWithBrackets , linearizeWithBrackets
, foldForest
) where ) where
import PGF.CId import PGF.CId
@@ -26,6 +27,7 @@ import qualified Data.Map as Map
import qualified Data.IntSet as IntSet import qualified Data.IntSet as IntSet
import qualified Data.IntMap as IntMap import qualified Data.IntMap as IntMap
import Control.Monad import Control.Monad
import GF.Data.SortedList
data Forest data Forest
= Forest = Forest
@@ -48,11 +50,11 @@ linearizeWithBrackets = head . snd . untokn "" . bracketedTokn
-- --
bracketedTokn :: Forest -> BracketedTokn bracketedTokn :: Forest -> BracketedTokn
bracketedTokn (Forest abs cnc forest root) = bracketedTokn f@(Forest abs cnc forest root) =
case [computeSeq seq (map (render IntMap.empty) args) | (seq,args) <- root] of case [computeSeq seq (map (render IntMap.empty) args) | (seq,args) <- root] of
([bs@(Bracket_ cat fid label lin)]:_) -> bs ([bs@(Bracket_ _ _ _ _ _)]:_) -> bs
(bss:_) -> Bracket_ wildCId 0 0 bss (bss:_) -> Bracket_ wildCId 0 0 [] bss
[] -> Bracket_ wildCId 0 0 [] [] -> Bracket_ wildCId 0 0 [] []
where where
trusted = foldl1 IntSet.intersection [IntSet.unions (map (trustedSpots IntSet.empty) args) | (_,args) <- root] trusted = foldl1 IntSet.intersection [IntSet.unions (map (trustedSpots IntSet.empty) args) | (_,args) <- root]
@@ -97,8 +99,56 @@ bracketedTokn (Forest abs cnc forest root) =
getArg d r getArg d r
| not (null arg_lin) && | not (null arg_lin) &&
IntSet.member fid trusted IntSet.member fid trusted
= [Bracket_ cat fid r arg_lin] = [Bracket_ cat fid r es arg_lin]
| otherwise = arg_lin | otherwise = arg_lin
where where
arg_lin = lin ! r arg_lin = lin ! r
(fid,cat,lin) = args !! d (fid,cat,lin) = args !! d
es = getAbsTrees f fid
-- | This function extracts the list of all completed parse trees
-- that spans the whole input consumed so far. The trees are also
-- limited by the category specified, which is usually
-- the same as the startup category.
getAbsTrees :: Forest -> FId -> [Expr]
getAbsTrees (Forest abs cnc forest root) fid =
nubsort $ do (fvs,e) <- go Set.empty 0 (0,fid)
guard (Set.null fvs)
return e
where
go rec fcat' (d,fcat)
| fcat < totalCats cnc = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
| Set.member fcat rec = mzero
| otherwise = foldForest (\funid args trees ->
do let CncFun fn lins = cncfuns cnc ! funid
args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
check_ho_fun fn args
`mplus`
trees)
(\const _ trees ->
return (freeVar const,const)
`mplus`
trees)
[] fcat forest
check_ho_fun fun args
| fun == _V = return (head args)
| fun == _B = return (foldl1 Set.difference (map fst args), foldr (\x e -> EAbs Explicit (mkVar (snd x)) e) (snd (head args)) (tail args))
| otherwise = return (Set.unions (map fst args),foldl (\e x -> EApp e (snd x)) (EFun fun) args)
mkVar (EFun v) = v
mkVar (EMeta _) = wildCId
freeVar (EFun v) = Set.singleton v
freeVar _ = Set.empty
foldForest :: (FunId -> [FId] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FId -> IntMap.IntMap (Set.Set Production) -> b
foldForest f g b fcat forest =
case IntMap.lookup fcat forest of
Nothing -> b
Just set -> Set.fold foldProd b set
where
foldProd (PCoerce fcat) b = foldForest f g b fcat forest
foldProd (PApply funid args) b = f funid args b
foldProd (PConst _ const toks) b = g const toks b

View File

@@ -63,7 +63,7 @@ type CncType = (CId, FId) -- concrete type is the abstract type (the category
linTree :: PGF -> Language -> Expr -> [Array LIndex BracketedTokn] linTree :: PGF -> Language -> Expr -> [Array LIndex BracketedTokn]
linTree pgf lang e = linTree pgf lang e =
[amapWithIndex (\label -> Bracket_ cat fid label) lin | (_,((cat,fid),lin)) <- lin0 [] [] Nothing 0 e] [amapWithIndex (\label -> Bracket_ cat fid label [e]) lin | (_,((cat,fid),e,lin)) <- lin0 [] [] Nothing 0 e]
where where
cnc = lookMap (error "no lang") lang (concretes pgf) cnc = lookMap (error "no lang") lang (concretes pgf)
lp = lproductions cnc lp = lproductions cnc
@@ -74,26 +74,26 @@ linTree pgf lang e =
| otherwise = apply (xs ++ ys) mb_cty n_fid _B (e:[ELit (LStr x) | x <- xs]) | otherwise = apply (xs ++ ys) mb_cty n_fid _B (e:[ELit (LStr x) | x <- xs])
lin xs mb_cty n_fid (EApp e1 e2) es = lin xs mb_cty n_fid e1 (e2:es) lin xs mb_cty n_fid (EApp e1 e2) es = lin xs mb_cty n_fid e1 (e2:es)
lin xs mb_cty n_fid (ELit l) [] = case l of lin xs mb_cty n_fid e@(ELit l) [] = case l of
LStr s -> return (n_fid+1,((cidString,n_fid),ss s)) LStr s -> return (n_fid+1,((cidString,n_fid),e,ss s))
LInt n -> return (n_fid+1,((cidInt, n_fid),ss (show n))) LInt n -> return (n_fid+1,((cidInt, n_fid),e,ss (show n)))
LFlt f -> return (n_fid+1,((cidFloat, n_fid),ss (show f))) LFlt f -> return (n_fid+1,((cidFloat, n_fid),e,ss (show f)))
lin xs mb_cty n_fid (EMeta i) es = apply xs mb_cty n_fid _V (ELit (LStr ('?':show i)):es) lin xs mb_cty n_fid (EMeta i) es = apply xs mb_cty n_fid _V (ELit (LStr ('?':show i)):es)
lin xs mb_cty n_fid (EFun f) es = apply xs mb_cty n_fid f es lin xs mb_cty n_fid (EFun f) es = apply xs mb_cty n_fid f es
lin xs mb_cty n_fid (EVar i) es = apply xs mb_cty n_fid _V (ELit (LStr (xs !! i)) :es) lin xs mb_cty n_fid (EVar i) es = apply xs mb_cty n_fid _V (ELit (LStr (xs !! i)) :es)
lin xs mb_cty n_fid (ETyped e _) es = lin xs mb_cty n_fid e es lin xs mb_cty n_fid (ETyped e _) es = lin xs mb_cty n_fid e es
lin xs mb_cty n_fid (EImplArg e) es = lin xs mb_cty n_fid e es lin xs mb_cty n_fid (EImplArg e) es = lin xs mb_cty n_fid e es
ss s = listArray (0,0) [[LeafKS [s]]] ss s = listArray (0,0) [[LeafKS [s]]]
apply :: [String] -> Maybe CncType -> FId -> CId -> [Expr] -> [(FId,(CncType, LinTable))] apply :: [String] -> Maybe CncType -> FId -> CId -> [Expr] -> [(FId,(CncType, Expr, LinTable))]
apply xs mb_cty n_fid f es = apply xs mb_cty n_fid f es =
case Map.lookup f lp of case Map.lookup f lp of
Just prods -> do (funid,(cat,fid),ctys) <- getApps prods Just prods -> do (funid,(cat,fid),ctys) <- getApps prods
guard (length ctys == length es) guard (length ctys == length es)
(n_fid,args) <- descend n_fid (zip ctys es) (n_fid,args) <- descend n_fid (zip ctys es)
let (CncFun _ lins) = cncfuns cnc ! funid let (CncFun _ lins) = cncfuns cnc ! funid
return (n_fid+1,((cat,n_fid),listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])) return (n_fid+1,((cat,n_fid),undefined,listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins]))
Nothing -> apply xs mb_cty n_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin Nothing -> apply xs mb_cty n_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin
where where
getApps prods = getApps prods =
@@ -116,7 +116,7 @@ linTree pgf lang e =
(n_fid,args) <- descend n_fid fes (n_fid,args) <- descend n_fid fes
return (n_fid,arg:args) return (n_fid,arg:args)
computeSeq :: SeqId -> [(CncType,LinTable)] -> [BracketedTokn] computeSeq :: SeqId -> [(CncType,Expr,LinTable)] -> [BracketedTokn]
computeSeq seqid args = concatMap compute (elems seq) computeSeq seqid args = concatMap compute (elems seq)
where where
seq = sequences cnc ! seqid seq = sequences cnc ! seqid
@@ -127,11 +127,11 @@ linTree pgf lang e =
compute (SymKP ts alts) = [LeafKP ts alts] compute (SymKP ts alts) = [LeafKP ts alts]
getArg d r getArg d r
| not (null arg_lin) = [Bracket_ cat fid r arg_lin] | not (null arg_lin) = [Bracket_ cat fid r [e] arg_lin]
| otherwise = arg_lin | otherwise = arg_lin
where where
arg_lin = lin ! r arg_lin = lin ! r
((cat,fid),lin) = args !! d ((cat,fid),e,lin) = args !! d
amapWithIndex :: (IArray a e1, IArray a e2, Ix i) => (i -> e1 -> e2) -> a i e1 -> a i e2 amapWithIndex :: (IArray a e1, IArray a e2, Ix i) => (i -> e1 -> e2) -> a i e1 -> a i e2
amapWithIndex f arr = listArray (bounds arr) (map (uncurry f) (assocs arr)) amapWithIndex f arr = listArray (bounds arr) (map (uncurry f) (assocs arr))

View File

@@ -212,7 +212,8 @@ updateProductionIndices pgf = pgf{ concretes = fmap updateConcrete (concretes pg
-- mark the beginning and the end of each constituent. -- mark the beginning and the end of each constituent.
data BracketedString data BracketedString
= Leaf String -- ^ this is the leaf i.e. a single token = Leaf String -- ^ this is the leaf i.e. a single token
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [BracketedString] -- ^ this is a bracket. The 'CId' is the category of | Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [Expr] [BracketedString]
-- ^ this is a bracket. The 'CId' is the category of
-- the phrase. The 'FId' is an unique identifier for -- the phrase. The 'FId' is an unique identifier for
-- every phrase in the sentence. For context-free grammars -- every phrase in the sentence. For context-free grammars
-- i.e. without discontinuous constituents this identifier -- i.e. without discontinuous constituents this identifier
@@ -227,7 +228,7 @@ data BracketedString
data BracketedTokn data BracketedTokn
= LeafKS [String] = LeafKS [String]
| LeafKP [String] [Alternative] | LeafKP [String] [Alternative]
| Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [BracketedTokn] -- Invariant: the list is not empty | Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [Expr] [BracketedTokn] -- Invariant: the list is not empty
type LinTable = Array.Array LIndex [BracketedTokn] type LinTable = Array.Array LIndex [BracketedTokn]
@@ -238,12 +239,12 @@ showBracketedString :: BracketedString -> String
showBracketedString = render . ppBracketedString showBracketedString = render . ppBracketedString
ppBracketedString (Leaf t) = text t ppBracketedString (Leaf t) = text t
ppBracketedString (Bracket cat fcat index bss) = parens (ppCId cat <+> hsep (map ppBracketedString bss)) ppBracketedString (Bracket cat fcat index _ bss) = parens (ppCId cat <+> hsep (map ppBracketedString bss))
-- | The length of the bracketed string in number of tokens. -- | The length of the bracketed string in number of tokens.
lengthBracketedString :: BracketedString -> Int lengthBracketedString :: BracketedString -> Int
lengthBracketedString (Leaf _) = 1 lengthBracketedString (Leaf _) = 1
lengthBracketedString (Bracket _ _ _ bss) = sum (map lengthBracketedString bss) lengthBracketedString (Bracket _ _ _ _ bss) = sum (map lengthBracketedString bss)
untokn :: String -> BracketedTokn -> (String,[BracketedString]) untokn :: String -> BracketedTokn -> (String,[BracketedString])
untokn nw (LeafKS ts) = (head ts,map Leaf ts) untokn nw (LeafKS ts) = (head ts,map Leaf ts)
@@ -254,10 +255,10 @@ untokn nw (LeafKP d vs) = let ts = sel d vs nw
case [v | Alt v cs <- vs, any (\c -> isPrefixOf c nw) cs] of case [v | Alt v cs <- vs, any (\c -> isPrefixOf c nw) cs] of
v:_ -> v v:_ -> v
_ -> d _ -> d
untokn nw (Bracket_ cat fid index bss) = untokn nw (Bracket_ cat fid index es bss) =
let (nw',bss') = mapAccumR untokn nw bss let (nw',bss') = mapAccumR untokn nw bss
in (nw',[Bracket cat fid index (concat bss')]) in (nw',[Bracket cat fid index es (concat bss')])
flattenBracketedString :: BracketedString -> [String] flattenBracketedString :: BracketedString -> [String]
flattenBracketedString (Leaf w) = [w] flattenBracketedString (Leaf w) = [w]
flattenBracketedString (Bracket _ _ _ bss) = concatMap flattenBracketedString bss flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss

View File

@@ -27,7 +27,7 @@ import PGF.Data
import PGF.Expr(Tree) import PGF.Expr(Tree)
import PGF.Macros import PGF.Macros
import PGF.TypeCheck import PGF.TypeCheck
import PGF.Forest(Forest(Forest), linearizeWithBrackets) import PGF.Forest(Forest(Forest), linearizeWithBrackets, foldForest)
-- | This data type encodes the different outcomes which you could get from the parser. -- | This data type encodes the different outcomes which you could get from the parser.
data ParseResult data ParseResult
@@ -379,21 +379,6 @@ insertPC :: PassiveKey -> FId -> PassiveChart -> PassiveChart
insertPC key fcat chart = Map.insert key fcat chart insertPC key fcat chart = Map.insert key fcat chart
----------------------------------------------------------------
-- Forest
----------------------------------------------------------------
foldForest :: (FunId -> [FId] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FId -> IntMap.IntMap (Set.Set Production) -> b
foldForest f g b fcat forest =
case IntMap.lookup fcat forest of
Nothing -> b
Just set -> Set.fold foldProd b set
where
foldProd (PCoerce fcat) b = foldForest f g b fcat forest
foldProd (PApply funid args) b = f funid args b
foldProd (PConst _ const toks) b = g const toks b
---------------------------------------------------------------- ----------------------------------------------------------------
-- Parse State -- Parse State
---------------------------------------------------------------- ----------------------------------------------------------------

View File

@@ -22,7 +22,7 @@ module PGF.VisualizeTree
, graphvizBracketedString , graphvizBracketedString
, graphvizAlignment , graphvizAlignment
, getDepLabels , getDepLabels
) where ) where
import PGF.CId (CId,showCId,ppCId,mkCId) import PGF.CId (CId,showCId,ppCId,mkCId)
import PGF.Data import PGF.Data
@@ -122,8 +122,8 @@ graphvizDependencyTree format debug mlab ms pgf lang t = render $
getLeaves parent bs = getLeaves parent bs =
case bs of case bs of
Leaf w -> [(parent,w)] Leaf w -> [(parent,w)]
Bracket _ fid _ bss -> concatMap (getLeaves fid) bss Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
mkNode (p,i,w) = mkNode (p,i,w) =
tag p <> text " [label = " <> doubleQuotes (int i <> char '.' <+> text w) <> text "] ;" tag p <> text " [label = " <> doubleQuotes (int i <> char '.' <+> text w) <> text "] ;"
@@ -234,13 +234,13 @@ graphvizBracketedString = render . lin2tree
getLeaves level parent bs = getLeaves level parent bs =
case bs of case bs of
Leaf w -> [(level-1,parent,w)] Leaf w -> [(level-1,parent,w)]
Bracket _ fid i bss -> concatMap (getLeaves (level+1) fid) bss Bracket _ fid i _ bss -> concatMap (getLeaves (level+1) fid) bss
getInterns level [] = [] getInterns level [] = []
getInterns level nodes = getInterns level nodes =
nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _) <- nodes] : nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _ _) <- nodes] :
getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ children) <- nodes, child <- children] getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ _ children) <- nodes, child <- children]
mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$ mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
vcat [link pl pid l id | (pl,pid,id,_) <- cs] vcat [link pl pid l id | (pl,pid,id,_) <- cs]
@@ -290,8 +290,8 @@ graphvizAlignment pgf langs = render . lin2graph . linsBracketed
getLeaves parent bs = getLeaves parent bs =
case bs of case bs of
Leaf w -> [(parent,w)] Leaf w -> [(parent,w)]
Bracket _ fid _ bss -> concatMap (getLeaves fid) bss Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
mkLayers l [] = empty mkLayers l [] = empty
mkLayers l (cs:css) = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$ mkLayers l (cs:css) = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$