diff --git a/src/runtime/haskell/PGF/Forest.hs b/src/runtime/haskell/PGF/Forest.hs index 4cc3dd908..ece6a8000 100644 --- a/src/runtime/haskell/PGF/Forest.hs +++ b/src/runtime/haskell/PGF/Forest.hs @@ -14,6 +14,7 @@ module PGF.Forest( Forest(..) , BracketedString, showBracketedString, lengthBracketedString , linearizeWithBrackets + , foldForest ) where import PGF.CId @@ -26,6 +27,7 @@ import qualified Data.Map as Map import qualified Data.IntSet as IntSet import qualified Data.IntMap as IntMap import Control.Monad +import GF.Data.SortedList data Forest = Forest @@ -48,11 +50,11 @@ linearizeWithBrackets = head . snd . untokn "" . bracketedTokn -- bracketedTokn :: Forest -> BracketedTokn -bracketedTokn (Forest abs cnc forest root) = +bracketedTokn f@(Forest abs cnc forest root) = case [computeSeq seq (map (render IntMap.empty) args) | (seq,args) <- root] of - ([bs@(Bracket_ cat fid label lin)]:_) -> bs - (bss:_) -> Bracket_ wildCId 0 0 bss - [] -> Bracket_ wildCId 0 0 [] + ([bs@(Bracket_ _ _ _ _ _)]:_) -> bs + (bss:_) -> Bracket_ wildCId 0 0 [] bss + [] -> Bracket_ wildCId 0 0 [] [] where trusted = foldl1 IntSet.intersection [IntSet.unions (map (trustedSpots IntSet.empty) args) | (_,args) <- root] @@ -97,8 +99,56 @@ bracketedTokn (Forest abs cnc forest root) = getArg d r | not (null arg_lin) && IntSet.member fid trusted - = [Bracket_ cat fid r arg_lin] + = [Bracket_ cat fid r es arg_lin] | otherwise = arg_lin where arg_lin = lin ! r (fid,cat,lin) = args !! d + es = getAbsTrees f fid + +-- | This function extracts the list of all completed parse trees +-- that spans the whole input consumed so far. The trees are also +-- limited by the category specified, which is usually +-- the same as the startup category. +getAbsTrees :: Forest -> FId -> [Expr] +getAbsTrees (Forest abs cnc forest root) fid = + nubsort $ do (fvs,e) <- go Set.empty 0 (0,fid) + guard (Set.null fvs) + return e + where + go rec fcat' (d,fcat) + | fcat < totalCats cnc = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments + | Set.member fcat rec = mzero + | otherwise = foldForest (\funid args trees -> + do let CncFun fn lins = cncfuns cnc ! funid + args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args) + check_ho_fun fn args + `mplus` + trees) + (\const _ trees -> + return (freeVar const,const) + `mplus` + trees) + [] fcat forest + + check_ho_fun fun args + | fun == _V = return (head args) + | fun == _B = return (foldl1 Set.difference (map fst args), foldr (\x e -> EAbs Explicit (mkVar (snd x)) e) (snd (head args)) (tail args)) + | otherwise = return (Set.unions (map fst args),foldl (\e x -> EApp e (snd x)) (EFun fun) args) + + mkVar (EFun v) = v + mkVar (EMeta _) = wildCId + + freeVar (EFun v) = Set.singleton v + freeVar _ = Set.empty + + +foldForest :: (FunId -> [FId] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FId -> IntMap.IntMap (Set.Set Production) -> b +foldForest f g b fcat forest = + case IntMap.lookup fcat forest of + Nothing -> b + Just set -> Set.fold foldProd b set + where + foldProd (PCoerce fcat) b = foldForest f g b fcat forest + foldProd (PApply funid args) b = f funid args b + foldProd (PConst _ const toks) b = g const toks b diff --git a/src/runtime/haskell/PGF/Linearize.hs b/src/runtime/haskell/PGF/Linearize.hs index 503b98d7b..cf70c1efb 100644 --- a/src/runtime/haskell/PGF/Linearize.hs +++ b/src/runtime/haskell/PGF/Linearize.hs @@ -63,7 +63,7 @@ type CncType = (CId, FId) -- concrete type is the abstract type (the category linTree :: PGF -> Language -> Expr -> [Array LIndex BracketedTokn] linTree pgf lang e = - [amapWithIndex (\label -> Bracket_ cat fid label) lin | (_,((cat,fid),lin)) <- lin0 [] [] Nothing 0 e] + [amapWithIndex (\label -> Bracket_ cat fid label [e]) lin | (_,((cat,fid),e,lin)) <- lin0 [] [] Nothing 0 e] where cnc = lookMap (error "no lang") lang (concretes pgf) lp = lproductions cnc @@ -74,26 +74,26 @@ linTree pgf lang e = | otherwise = apply (xs ++ ys) mb_cty n_fid _B (e:[ELit (LStr x) | x <- xs]) lin xs mb_cty n_fid (EApp e1 e2) es = lin xs mb_cty n_fid e1 (e2:es) - lin xs mb_cty n_fid (ELit l) [] = case l of - LStr s -> return (n_fid+1,((cidString,n_fid),ss s)) - LInt n -> return (n_fid+1,((cidInt, n_fid),ss (show n))) - LFlt f -> return (n_fid+1,((cidFloat, n_fid),ss (show f))) + lin xs mb_cty n_fid e@(ELit l) [] = case l of + LStr s -> return (n_fid+1,((cidString,n_fid),e,ss s)) + LInt n -> return (n_fid+1,((cidInt, n_fid),e,ss (show n))) + LFlt f -> return (n_fid+1,((cidFloat, n_fid),e,ss (show f))) lin xs mb_cty n_fid (EMeta i) es = apply xs mb_cty n_fid _V (ELit (LStr ('?':show i)):es) lin xs mb_cty n_fid (EFun f) es = apply xs mb_cty n_fid f es lin xs mb_cty n_fid (EVar i) es = apply xs mb_cty n_fid _V (ELit (LStr (xs !! i)) :es) - lin xs mb_cty n_fid (ETyped e _) es = lin xs mb_cty n_fid e es - lin xs mb_cty n_fid (EImplArg e) es = lin xs mb_cty n_fid e es + lin xs mb_cty n_fid (ETyped e _) es = lin xs mb_cty n_fid e es + lin xs mb_cty n_fid (EImplArg e) es = lin xs mb_cty n_fid e es ss s = listArray (0,0) [[LeafKS [s]]] - apply :: [String] -> Maybe CncType -> FId -> CId -> [Expr] -> [(FId,(CncType, LinTable))] + apply :: [String] -> Maybe CncType -> FId -> CId -> [Expr] -> [(FId,(CncType, Expr, LinTable))] apply xs mb_cty n_fid f es = case Map.lookup f lp of Just prods -> do (funid,(cat,fid),ctys) <- getApps prods guard (length ctys == length es) (n_fid,args) <- descend n_fid (zip ctys es) let (CncFun _ lins) = cncfuns cnc ! funid - return (n_fid+1,((cat,n_fid),listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])) + return (n_fid+1,((cat,n_fid),undefined,listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])) Nothing -> apply xs mb_cty n_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin where getApps prods = @@ -116,7 +116,7 @@ linTree pgf lang e = (n_fid,args) <- descend n_fid fes return (n_fid,arg:args) - computeSeq :: SeqId -> [(CncType,LinTable)] -> [BracketedTokn] + computeSeq :: SeqId -> [(CncType,Expr,LinTable)] -> [BracketedTokn] computeSeq seqid args = concatMap compute (elems seq) where seq = sequences cnc ! seqid @@ -127,11 +127,11 @@ linTree pgf lang e = compute (SymKP ts alts) = [LeafKP ts alts] getArg d r - | not (null arg_lin) = [Bracket_ cat fid r arg_lin] + | not (null arg_lin) = [Bracket_ cat fid r [e] arg_lin] | otherwise = arg_lin where - arg_lin = lin ! r - ((cat,fid),lin) = args !! d + arg_lin = lin ! r + ((cat,fid),e,lin) = args !! d amapWithIndex :: (IArray a e1, IArray a e2, Ix i) => (i -> e1 -> e2) -> a i e1 -> a i e2 amapWithIndex f arr = listArray (bounds arr) (map (uncurry f) (assocs arr)) diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs index f4bfae646..328bf369d 100644 --- a/src/runtime/haskell/PGF/Macros.hs +++ b/src/runtime/haskell/PGF/Macros.hs @@ -212,7 +212,8 @@ updateProductionIndices pgf = pgf{ concretes = fmap updateConcrete (concretes pg -- mark the beginning and the end of each constituent. data BracketedString = Leaf String -- ^ this is the leaf i.e. a single token - | Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [BracketedString] -- ^ this is a bracket. The 'CId' is the category of + | Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [Expr] [BracketedString] + -- ^ this is a bracket. The 'CId' is the category of -- the phrase. The 'FId' is an unique identifier for -- every phrase in the sentence. For context-free grammars -- i.e. without discontinuous constituents this identifier @@ -227,7 +228,7 @@ data BracketedString data BracketedTokn = LeafKS [String] | LeafKP [String] [Alternative] - | Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [BracketedTokn] -- Invariant: the list is not empty + | Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [Expr] [BracketedTokn] -- Invariant: the list is not empty type LinTable = Array.Array LIndex [BracketedTokn] @@ -238,12 +239,12 @@ showBracketedString :: BracketedString -> String showBracketedString = render . ppBracketedString ppBracketedString (Leaf t) = text t -ppBracketedString (Bracket cat fcat index bss) = parens (ppCId cat <+> hsep (map ppBracketedString bss)) +ppBracketedString (Bracket cat fcat index _ bss) = parens (ppCId cat <+> hsep (map ppBracketedString bss)) -- | The length of the bracketed string in number of tokens. lengthBracketedString :: BracketedString -> Int -lengthBracketedString (Leaf _) = 1 -lengthBracketedString (Bracket _ _ _ bss) = sum (map lengthBracketedString bss) +lengthBracketedString (Leaf _) = 1 +lengthBracketedString (Bracket _ _ _ _ bss) = sum (map lengthBracketedString bss) untokn :: String -> BracketedTokn -> (String,[BracketedString]) untokn nw (LeafKS ts) = (head ts,map Leaf ts) @@ -254,10 +255,10 @@ untokn nw (LeafKP d vs) = let ts = sel d vs nw case [v | Alt v cs <- vs, any (\c -> isPrefixOf c nw) cs] of v:_ -> v _ -> d -untokn nw (Bracket_ cat fid index bss) = +untokn nw (Bracket_ cat fid index es bss) = let (nw',bss') = mapAccumR untokn nw bss - in (nw',[Bracket cat fid index (concat bss')]) + in (nw',[Bracket cat fid index es (concat bss')]) flattenBracketedString :: BracketedString -> [String] -flattenBracketedString (Leaf w) = [w] -flattenBracketedString (Bracket _ _ _ bss) = concatMap flattenBracketedString bss +flattenBracketedString (Leaf w) = [w] +flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs index ce195f752..f48fab097 100644 --- a/src/runtime/haskell/PGF/Parse.hs +++ b/src/runtime/haskell/PGF/Parse.hs @@ -27,7 +27,7 @@ import PGF.Data import PGF.Expr(Tree) import PGF.Macros import PGF.TypeCheck -import PGF.Forest(Forest(Forest), linearizeWithBrackets) +import PGF.Forest(Forest(Forest), linearizeWithBrackets, foldForest) -- | This data type encodes the different outcomes which you could get from the parser. data ParseResult @@ -379,21 +379,6 @@ insertPC :: PassiveKey -> FId -> PassiveChart -> PassiveChart insertPC key fcat chart = Map.insert key fcat chart ----------------------------------------------------------------- --- Forest ----------------------------------------------------------------- - -foldForest :: (FunId -> [FId] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FId -> IntMap.IntMap (Set.Set Production) -> b -foldForest f g b fcat forest = - case IntMap.lookup fcat forest of - Nothing -> b - Just set -> Set.fold foldProd b set - where - foldProd (PCoerce fcat) b = foldForest f g b fcat forest - foldProd (PApply funid args) b = f funid args b - foldProd (PConst _ const toks) b = g const toks b - - ---------------------------------------------------------------- -- Parse State ---------------------------------------------------------------- diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs index 098d6a07f..542044b2d 100644 --- a/src/runtime/haskell/PGF/VisualizeTree.hs +++ b/src/runtime/haskell/PGF/VisualizeTree.hs @@ -22,7 +22,7 @@ module PGF.VisualizeTree , graphvizBracketedString , graphvizAlignment , getDepLabels - ) where + ) where import PGF.CId (CId,showCId,ppCId,mkCId) import PGF.Data @@ -122,8 +122,8 @@ graphvizDependencyTree format debug mlab ms pgf lang t = render $ getLeaves parent bs = case bs of - Leaf w -> [(parent,w)] - Bracket _ fid _ bss -> concatMap (getLeaves fid) bss + Leaf w -> [(parent,w)] + Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss mkNode (p,i,w) = tag p <> text " [label = " <> doubleQuotes (int i <> char '.' <+> text w) <> text "] ;" @@ -234,13 +234,13 @@ graphvizBracketedString = render . lin2tree getLeaves level parent bs = case bs of - Leaf w -> [(level-1,parent,w)] - Bracket _ fid i bss -> concatMap (getLeaves (level+1) fid) bss + Leaf w -> [(level-1,parent,w)] + Bracket _ fid i _ bss -> concatMap (getLeaves (level+1) fid) bss getInterns level [] = [] getInterns level nodes = - nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _) <- nodes] : - getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ children) <- nodes, child <- children] + nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _ _) <- nodes] : + getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ _ children) <- nodes, child <- children] mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$ vcat [link pl pid l id | (pl,pid,id,_) <- cs] @@ -290,8 +290,8 @@ graphvizAlignment pgf langs = render . lin2graph . linsBracketed getLeaves parent bs = case bs of - Leaf w -> [(parent,w)] - Bracket _ fid _ bss -> concatMap (getLeaves fid) bss + Leaf w -> [(parent,w)] + Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss mkLayers l [] = empty mkLayers l (cs:css) = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$