forked from GitHub/gf-core
now every BracketedString also has reference to the source expression(s)
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
module PGF.Forest( Forest(..)
|
||||
, BracketedString, showBracketedString, lengthBracketedString
|
||||
, linearizeWithBrackets
|
||||
, foldForest
|
||||
) where
|
||||
|
||||
import PGF.CId
|
||||
@@ -26,6 +27,7 @@ import qualified Data.Map as Map
|
||||
import qualified Data.IntSet as IntSet
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Control.Monad
|
||||
import GF.Data.SortedList
|
||||
|
||||
data Forest
|
||||
= Forest
|
||||
@@ -48,11 +50,11 @@ linearizeWithBrackets = head . snd . untokn "" . bracketedTokn
|
||||
--
|
||||
|
||||
bracketedTokn :: Forest -> BracketedTokn
|
||||
bracketedTokn (Forest abs cnc forest root) =
|
||||
bracketedTokn f@(Forest abs cnc forest root) =
|
||||
case [computeSeq seq (map (render IntMap.empty) args) | (seq,args) <- root] of
|
||||
([bs@(Bracket_ cat fid label lin)]:_) -> bs
|
||||
(bss:_) -> Bracket_ wildCId 0 0 bss
|
||||
[] -> Bracket_ wildCId 0 0 []
|
||||
([bs@(Bracket_ _ _ _ _ _)]:_) -> bs
|
||||
(bss:_) -> Bracket_ wildCId 0 0 [] bss
|
||||
[] -> Bracket_ wildCId 0 0 [] []
|
||||
where
|
||||
trusted = foldl1 IntSet.intersection [IntSet.unions (map (trustedSpots IntSet.empty) args) | (_,args) <- root]
|
||||
|
||||
@@ -97,8 +99,56 @@ bracketedTokn (Forest abs cnc forest root) =
|
||||
getArg d r
|
||||
| not (null arg_lin) &&
|
||||
IntSet.member fid trusted
|
||||
= [Bracket_ cat fid r arg_lin]
|
||||
= [Bracket_ cat fid r es arg_lin]
|
||||
| otherwise = arg_lin
|
||||
where
|
||||
arg_lin = lin ! r
|
||||
(fid,cat,lin) = args !! d
|
||||
es = getAbsTrees f fid
|
||||
|
||||
-- | This function extracts the list of all completed parse trees
|
||||
-- that spans the whole input consumed so far. The trees are also
|
||||
-- limited by the category specified, which is usually
|
||||
-- the same as the startup category.
|
||||
getAbsTrees :: Forest -> FId -> [Expr]
|
||||
getAbsTrees (Forest abs cnc forest root) fid =
|
||||
nubsort $ do (fvs,e) <- go Set.empty 0 (0,fid)
|
||||
guard (Set.null fvs)
|
||||
return e
|
||||
where
|
||||
go rec fcat' (d,fcat)
|
||||
| fcat < totalCats cnc = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
|
||||
| Set.member fcat rec = mzero
|
||||
| otherwise = foldForest (\funid args trees ->
|
||||
do let CncFun fn lins = cncfuns cnc ! funid
|
||||
args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
|
||||
check_ho_fun fn args
|
||||
`mplus`
|
||||
trees)
|
||||
(\const _ trees ->
|
||||
return (freeVar const,const)
|
||||
`mplus`
|
||||
trees)
|
||||
[] fcat forest
|
||||
|
||||
check_ho_fun fun args
|
||||
| fun == _V = return (head args)
|
||||
| fun == _B = return (foldl1 Set.difference (map fst args), foldr (\x e -> EAbs Explicit (mkVar (snd x)) e) (snd (head args)) (tail args))
|
||||
| otherwise = return (Set.unions (map fst args),foldl (\e x -> EApp e (snd x)) (EFun fun) args)
|
||||
|
||||
mkVar (EFun v) = v
|
||||
mkVar (EMeta _) = wildCId
|
||||
|
||||
freeVar (EFun v) = Set.singleton v
|
||||
freeVar _ = Set.empty
|
||||
|
||||
|
||||
foldForest :: (FunId -> [FId] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FId -> IntMap.IntMap (Set.Set Production) -> b
|
||||
foldForest f g b fcat forest =
|
||||
case IntMap.lookup fcat forest of
|
||||
Nothing -> b
|
||||
Just set -> Set.fold foldProd b set
|
||||
where
|
||||
foldProd (PCoerce fcat) b = foldForest f g b fcat forest
|
||||
foldProd (PApply funid args) b = f funid args b
|
||||
foldProd (PConst _ const toks) b = g const toks b
|
||||
|
||||
@@ -63,7 +63,7 @@ type CncType = (CId, FId) -- concrete type is the abstract type (the category
|
||||
|
||||
linTree :: PGF -> Language -> Expr -> [Array LIndex BracketedTokn]
|
||||
linTree pgf lang e =
|
||||
[amapWithIndex (\label -> Bracket_ cat fid label) lin | (_,((cat,fid),lin)) <- lin0 [] [] Nothing 0 e]
|
||||
[amapWithIndex (\label -> Bracket_ cat fid label [e]) lin | (_,((cat,fid),e,lin)) <- lin0 [] [] Nothing 0 e]
|
||||
where
|
||||
cnc = lookMap (error "no lang") lang (concretes pgf)
|
||||
lp = lproductions cnc
|
||||
@@ -74,26 +74,26 @@ linTree pgf lang e =
|
||||
| otherwise = apply (xs ++ ys) mb_cty n_fid _B (e:[ELit (LStr x) | x <- xs])
|
||||
|
||||
lin xs mb_cty n_fid (EApp e1 e2) es = lin xs mb_cty n_fid e1 (e2:es)
|
||||
lin xs mb_cty n_fid (ELit l) [] = case l of
|
||||
LStr s -> return (n_fid+1,((cidString,n_fid),ss s))
|
||||
LInt n -> return (n_fid+1,((cidInt, n_fid),ss (show n)))
|
||||
LFlt f -> return (n_fid+1,((cidFloat, n_fid),ss (show f)))
|
||||
lin xs mb_cty n_fid e@(ELit l) [] = case l of
|
||||
LStr s -> return (n_fid+1,((cidString,n_fid),e,ss s))
|
||||
LInt n -> return (n_fid+1,((cidInt, n_fid),e,ss (show n)))
|
||||
LFlt f -> return (n_fid+1,((cidFloat, n_fid),e,ss (show f)))
|
||||
lin xs mb_cty n_fid (EMeta i) es = apply xs mb_cty n_fid _V (ELit (LStr ('?':show i)):es)
|
||||
lin xs mb_cty n_fid (EFun f) es = apply xs mb_cty n_fid f es
|
||||
lin xs mb_cty n_fid (EVar i) es = apply xs mb_cty n_fid _V (ELit (LStr (xs !! i)) :es)
|
||||
lin xs mb_cty n_fid (ETyped e _) es = lin xs mb_cty n_fid e es
|
||||
lin xs mb_cty n_fid (EImplArg e) es = lin xs mb_cty n_fid e es
|
||||
lin xs mb_cty n_fid (ETyped e _) es = lin xs mb_cty n_fid e es
|
||||
lin xs mb_cty n_fid (EImplArg e) es = lin xs mb_cty n_fid e es
|
||||
|
||||
ss s = listArray (0,0) [[LeafKS [s]]]
|
||||
|
||||
apply :: [String] -> Maybe CncType -> FId -> CId -> [Expr] -> [(FId,(CncType, LinTable))]
|
||||
apply :: [String] -> Maybe CncType -> FId -> CId -> [Expr] -> [(FId,(CncType, Expr, LinTable))]
|
||||
apply xs mb_cty n_fid f es =
|
||||
case Map.lookup f lp of
|
||||
Just prods -> do (funid,(cat,fid),ctys) <- getApps prods
|
||||
guard (length ctys == length es)
|
||||
(n_fid,args) <- descend n_fid (zip ctys es)
|
||||
let (CncFun _ lins) = cncfuns cnc ! funid
|
||||
return (n_fid+1,((cat,n_fid),listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins]))
|
||||
return (n_fid+1,((cat,n_fid),undefined,listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins]))
|
||||
Nothing -> apply xs mb_cty n_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin
|
||||
where
|
||||
getApps prods =
|
||||
@@ -116,7 +116,7 @@ linTree pgf lang e =
|
||||
(n_fid,args) <- descend n_fid fes
|
||||
return (n_fid,arg:args)
|
||||
|
||||
computeSeq :: SeqId -> [(CncType,LinTable)] -> [BracketedTokn]
|
||||
computeSeq :: SeqId -> [(CncType,Expr,LinTable)] -> [BracketedTokn]
|
||||
computeSeq seqid args = concatMap compute (elems seq)
|
||||
where
|
||||
seq = sequences cnc ! seqid
|
||||
@@ -127,11 +127,11 @@ linTree pgf lang e =
|
||||
compute (SymKP ts alts) = [LeafKP ts alts]
|
||||
|
||||
getArg d r
|
||||
| not (null arg_lin) = [Bracket_ cat fid r arg_lin]
|
||||
| not (null arg_lin) = [Bracket_ cat fid r [e] arg_lin]
|
||||
| otherwise = arg_lin
|
||||
where
|
||||
arg_lin = lin ! r
|
||||
((cat,fid),lin) = args !! d
|
||||
arg_lin = lin ! r
|
||||
((cat,fid),e,lin) = args !! d
|
||||
|
||||
amapWithIndex :: (IArray a e1, IArray a e2, Ix i) => (i -> e1 -> e2) -> a i e1 -> a i e2
|
||||
amapWithIndex f arr = listArray (bounds arr) (map (uncurry f) (assocs arr))
|
||||
|
||||
@@ -212,7 +212,8 @@ updateProductionIndices pgf = pgf{ concretes = fmap updateConcrete (concretes pg
|
||||
-- mark the beginning and the end of each constituent.
|
||||
data BracketedString
|
||||
= Leaf String -- ^ this is the leaf i.e. a single token
|
||||
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [BracketedString] -- ^ this is a bracket. The 'CId' is the category of
|
||||
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [Expr] [BracketedString]
|
||||
-- ^ this is a bracket. The 'CId' is the category of
|
||||
-- the phrase. The 'FId' is an unique identifier for
|
||||
-- every phrase in the sentence. For context-free grammars
|
||||
-- i.e. without discontinuous constituents this identifier
|
||||
@@ -227,7 +228,7 @@ data BracketedString
|
||||
data BracketedTokn
|
||||
= LeafKS [String]
|
||||
| LeafKP [String] [Alternative]
|
||||
| Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [BracketedTokn] -- Invariant: the list is not empty
|
||||
| Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [Expr] [BracketedTokn] -- Invariant: the list is not empty
|
||||
|
||||
type LinTable = Array.Array LIndex [BracketedTokn]
|
||||
|
||||
@@ -238,12 +239,12 @@ showBracketedString :: BracketedString -> String
|
||||
showBracketedString = render . ppBracketedString
|
||||
|
||||
ppBracketedString (Leaf t) = text t
|
||||
ppBracketedString (Bracket cat fcat index bss) = parens (ppCId cat <+> hsep (map ppBracketedString bss))
|
||||
ppBracketedString (Bracket cat fcat index _ bss) = parens (ppCId cat <+> hsep (map ppBracketedString bss))
|
||||
|
||||
-- | The length of the bracketed string in number of tokens.
|
||||
lengthBracketedString :: BracketedString -> Int
|
||||
lengthBracketedString (Leaf _) = 1
|
||||
lengthBracketedString (Bracket _ _ _ bss) = sum (map lengthBracketedString bss)
|
||||
lengthBracketedString (Leaf _) = 1
|
||||
lengthBracketedString (Bracket _ _ _ _ bss) = sum (map lengthBracketedString bss)
|
||||
|
||||
untokn :: String -> BracketedTokn -> (String,[BracketedString])
|
||||
untokn nw (LeafKS ts) = (head ts,map Leaf ts)
|
||||
@@ -254,10 +255,10 @@ untokn nw (LeafKP d vs) = let ts = sel d vs nw
|
||||
case [v | Alt v cs <- vs, any (\c -> isPrefixOf c nw) cs] of
|
||||
v:_ -> v
|
||||
_ -> d
|
||||
untokn nw (Bracket_ cat fid index bss) =
|
||||
untokn nw (Bracket_ cat fid index es bss) =
|
||||
let (nw',bss') = mapAccumR untokn nw bss
|
||||
in (nw',[Bracket cat fid index (concat bss')])
|
||||
in (nw',[Bracket cat fid index es (concat bss')])
|
||||
|
||||
flattenBracketedString :: BracketedString -> [String]
|
||||
flattenBracketedString (Leaf w) = [w]
|
||||
flattenBracketedString (Bracket _ _ _ bss) = concatMap flattenBracketedString bss
|
||||
flattenBracketedString (Leaf w) = [w]
|
||||
flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss
|
||||
|
||||
@@ -27,7 +27,7 @@ import PGF.Data
|
||||
import PGF.Expr(Tree)
|
||||
import PGF.Macros
|
||||
import PGF.TypeCheck
|
||||
import PGF.Forest(Forest(Forest), linearizeWithBrackets)
|
||||
import PGF.Forest(Forest(Forest), linearizeWithBrackets, foldForest)
|
||||
|
||||
-- | This data type encodes the different outcomes which you could get from the parser.
|
||||
data ParseResult
|
||||
@@ -379,21 +379,6 @@ insertPC :: PassiveKey -> FId -> PassiveChart -> PassiveChart
|
||||
insertPC key fcat chart = Map.insert key fcat chart
|
||||
|
||||
|
||||
----------------------------------------------------------------
|
||||
-- Forest
|
||||
----------------------------------------------------------------
|
||||
|
||||
foldForest :: (FunId -> [FId] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FId -> IntMap.IntMap (Set.Set Production) -> b
|
||||
foldForest f g b fcat forest =
|
||||
case IntMap.lookup fcat forest of
|
||||
Nothing -> b
|
||||
Just set -> Set.fold foldProd b set
|
||||
where
|
||||
foldProd (PCoerce fcat) b = foldForest f g b fcat forest
|
||||
foldProd (PApply funid args) b = f funid args b
|
||||
foldProd (PConst _ const toks) b = g const toks b
|
||||
|
||||
|
||||
----------------------------------------------------------------
|
||||
-- Parse State
|
||||
----------------------------------------------------------------
|
||||
|
||||
@@ -22,7 +22,7 @@ module PGF.VisualizeTree
|
||||
, graphvizBracketedString
|
||||
, graphvizAlignment
|
||||
, getDepLabels
|
||||
) where
|
||||
) where
|
||||
|
||||
import PGF.CId (CId,showCId,ppCId,mkCId)
|
||||
import PGF.Data
|
||||
@@ -122,8 +122,8 @@ graphvizDependencyTree format debug mlab ms pgf lang t = render $
|
||||
|
||||
getLeaves parent bs =
|
||||
case bs of
|
||||
Leaf w -> [(parent,w)]
|
||||
Bracket _ fid _ bss -> concatMap (getLeaves fid) bss
|
||||
Leaf w -> [(parent,w)]
|
||||
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
|
||||
|
||||
mkNode (p,i,w) =
|
||||
tag p <> text " [label = " <> doubleQuotes (int i <> char '.' <+> text w) <> text "] ;"
|
||||
@@ -234,13 +234,13 @@ graphvizBracketedString = render . lin2tree
|
||||
|
||||
getLeaves level parent bs =
|
||||
case bs of
|
||||
Leaf w -> [(level-1,parent,w)]
|
||||
Bracket _ fid i bss -> concatMap (getLeaves (level+1) fid) bss
|
||||
Leaf w -> [(level-1,parent,w)]
|
||||
Bracket _ fid i _ bss -> concatMap (getLeaves (level+1) fid) bss
|
||||
|
||||
getInterns level [] = []
|
||||
getInterns level nodes =
|
||||
nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _) <- nodes] :
|
||||
getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ children) <- nodes, child <- children]
|
||||
nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _ _) <- nodes] :
|
||||
getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ _ children) <- nodes, child <- children]
|
||||
|
||||
mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
|
||||
vcat [link pl pid l id | (pl,pid,id,_) <- cs]
|
||||
@@ -290,8 +290,8 @@ graphvizAlignment pgf langs = render . lin2graph . linsBracketed
|
||||
|
||||
getLeaves parent bs =
|
||||
case bs of
|
||||
Leaf w -> [(parent,w)]
|
||||
Bracket _ fid _ bss -> concatMap (getLeaves fid) bss
|
||||
Leaf w -> [(parent,w)]
|
||||
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
|
||||
|
||||
mkLayers l [] = empty
|
||||
mkLayers l (cs:css) = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
|
||||
|
||||
Reference in New Issue
Block a user