From 8a0b3b8ba8034074f4489e041b74e05a3bc8c544 Mon Sep 17 00:00:00 2001 From: krasimir Date: Tue, 14 Dec 2010 08:46:22 +0000 Subject: [PATCH] optimization in the parser for large lexicons. Now, the parser is slightly slower for grammars with a small lexicon but 3-4 times faster for the English Resource Grammar used in combination with Oxford Advanced Learners Dictionary --- src/runtime/haskell/PGF/Binary.hs | 1 + src/runtime/haskell/PGF/Data.hs | 3 + src/runtime/haskell/PGF/Forest.hs | 1 - src/runtime/haskell/PGF/Morphology.hs | 2 +- src/runtime/haskell/PGF/Optimize.hs | 38 ++++- src/runtime/haskell/PGF/Parse.hs | 236 +++++++++++++++----------- 6 files changed, 177 insertions(+), 104 deletions(-) diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs index 26f994797..32b751159 100644 --- a/src/runtime/haskell/PGF/Binary.hs +++ b/src/runtime/haskell/PGF/Binary.hs @@ -68,6 +68,7 @@ instance Binary Concr where , productions=productions , pproductions = IntMap.empty , lproductions = Map.empty + , lexicon = IntMap.empty , cnccats=cnccats, totalCats=totalCats }) diff --git a/src/runtime/haskell/PGF/Data.hs b/src/runtime/haskell/PGF/Data.hs index f82d33644..3e26cbd98 100644 --- a/src/runtime/haskell/PGF/Data.hs +++ b/src/runtime/haskell/PGF/Data.hs @@ -7,6 +7,8 @@ import PGF.Type import qualified Data.Map as Map import qualified Data.Set as Set import qualified Data.IntMap as IntMap +import qualified Data.IntSet as IntSet +import qualified GF.Data.TrieMap as TMap import Data.Array.IArray import Data.Array.Unboxed import Data.List @@ -42,6 +44,7 @@ data Concr = Concr { pproductions :: IntMap.IntMap (Set.Set Production), -- productions needed for parsing lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)), -- productions needed for linearization cnccats :: Map.Map CId CncCat, + lexicon :: IntMap.IntMap (IntMap.IntMap (TMap.TrieMap Token IntSet.IntSet)), totalCats :: {-# UNPACK #-} !FId } diff --git a/src/runtime/haskell/PGF/Forest.hs b/src/runtime/haskell/PGF/Forest.hs index a4a9266f7..be96ac0f3 100644 --- a/src/runtime/haskell/PGF/Forest.hs +++ b/src/runtime/haskell/PGF/Forest.hs @@ -17,7 +17,6 @@ module PGF.Forest( Forest(..) , BracketedString, showBracketedString, lengthBracketedString , linearizeWithBrackets , getAbsTrees - , foldForest ) where import PGF.CId diff --git a/src/runtime/haskell/PGF/Morphology.hs b/src/runtime/haskell/PGF/Morphology.hs index 711f9c01d..d5a2d28bc 100644 --- a/src/runtime/haskell/PGF/Morphology.hs +++ b/src/runtime/haskell/PGF/Morphology.hs @@ -29,7 +29,7 @@ buildMorpho pgf lang = Morpho $ collectWords pinfo = Map.fromListWith (++) [(t, [(fun,lbls ! l)]) | (CncCat s e lbls) <- Map.elems (cnccats pinfo) , fid <- [s..e] - , PApply funid _ <- maybe [] Set.toList (IntMap.lookup fid (pproductions pinfo)) + , PApply funid _ <- maybe [] Set.toList (IntMap.lookup fid (productions pinfo)) , let CncFun fun lins = cncfuns pinfo ! funid , (l,seqid) <- assocs lins , sym <- elems (sequences pinfo ! seqid) diff --git a/src/runtime/haskell/PGF/Optimize.hs b/src/runtime/haskell/PGF/Optimize.hs index d5b9230b4..f7fb79779 100644 --- a/src/runtime/haskell/PGF/Optimize.hs +++ b/src/runtime/haskell/PGF/Optimize.hs @@ -17,6 +17,7 @@ import qualified Data.Map as Map import qualified Data.Set as Set import qualified Data.IntSet as IntSet import qualified Data.IntMap as IntMap +import qualified GF.Data.TrieMap as TrieMap import qualified Data.List as List import Control.Monad.ST import GF.Data.Utilities(sortNub) @@ -195,10 +196,41 @@ filterProductions prods0 hoc0 prods accumHOC (PApply funid args) hoc = List.foldl' (\hoc (PArg hypos _) -> List.foldl' (\hoc (_,fid) -> IntSet.insert fid hoc) hoc hypos) hoc args accumHOC _ hoc = hoc +splitLexicalRules cnc p_prods = + IntMap.foldWithKey split (IntMap.empty,IntMap.empty) p_prods + where + split fid set (lex,syn) = + let (lex0,syn0) = Set.partition isLexical set + !lex' = if Set.null lex0 + then lex + else let !mp = IntMap.unionsWith (TrieMap.unionWith IntSet.union) + [words funid | PApply funid [] <- Set.toList lex0] + in IntMap.insert fid mp lex + !syn' = if Set.null syn0 + then syn + else IntMap.insert fid syn0 syn + in (lex', syn') + + + isLexical (PApply _ []) = True + isLexical _ = False + + words funid = IntMap.fromList [(lbl,seq2prefix (elems (sequences cnc ! seqid))) + | (lbl,seqid) <- assocs lins] + where + CncFun _ lins = cncfuns cnc ! funid + + wf ts = (ts,IntSet.singleton funid) + + seq2prefix [] = TrieMap.fromList [wf []] + seq2prefix (SymKS ts :syms) = TrieMap.fromList [wf ts] + seq2prefix (SymKP ts alts:syms) = TrieMap.fromList (wf ts : [wf ts | Alt ts ps <- alts]) + updateConcrete abs cnc = - let p_prods = filterProductions IntMap.empty IntSet.empty (productions cnc) - l_prods = linIndex cnc p_prods - in cnc{pproductions = p_prods, lproductions = l_prods} + let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc) + (lex,p_prods) = splitLexicalRules cnc p_prods0 + l_prods = linIndex cnc p_prods0 + in cnc{pproductions = p_prods, lproductions = l_prods, lexicon = lex} where linIndex cnc productions = Map.fromListWith (IntMap.unionWith Set.union) diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs index 295c579ed..9f6d85515 100644 --- a/src/runtime/haskell/PGF/Parse.hs +++ b/src/runtime/haskell/PGF/Parse.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BangPatterns, RankNTypes #-} module PGF.Parse ( ParseState , ErrorState @@ -17,8 +17,9 @@ import Data.Array.Base (unsafeAt) import Data.List (isPrefixOf, foldl') import Data.Maybe (fromMaybe, maybe, maybeToList) import qualified Data.Map as Map -import qualified GF.Data.TrieMap as TMap +import qualified GF.Data.TrieMap as TrieMap import qualified Data.IntMap as IntMap +import qualified Data.IntSet as IntSet import qualified Data.Set as Set import Control.Monad @@ -28,16 +29,16 @@ import PGF.Data import PGF.Expr(Tree) import PGF.Macros import PGF.TypeCheck -import PGF.Forest(Forest(Forest), linearizeWithBrackets, getAbsTrees, foldForest) +import PGF.Forest(Forest(Forest), linearizeWithBrackets, getAbsTrees) -- | The input to the parser is a pair of predicates. The first one --- 'piToken' checks that a given token, suggested by the grammar, +-- 'piToken' selects a token from a list of suggestions from the grammar, -- actually appears at the current position in the input string. -- The second one 'piLiteral' recognizes whether a literal with forest id 'FId' -- could be matched at the current position. data ParseInput = ParseInput - { piToken :: Token -> Bool + { piToken :: forall a . Map.Map Token a -> Maybe a , piLiteral :: FId -> Maybe (CId,Tree,[Token]) } @@ -80,28 +81,36 @@ parseWithRecovery pgf lang typ open_typs dp toks = accept (initState pgf lang ty -- startup category. initState :: PGF -> Language -> Type -> ParseState initState pgf lang (DTyp _ start _) = - let items = case Map.lookup start (cnccats cnc) of - Just (CncCat s e labels) -> do fid <- range (s,e) - (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) - [] fid (pproductions cnc) - let CncFun fn lins = cncfuns cnc ! funid - (lbl,seqid) <- assocs lins - return (Active 0 0 funid seqid args (AK fid lbl)) - Nothing -> mzero - - cnc = lookConcrComplete pgf lang - + let (acc,items) = case Map.lookup start (cnccats cnc) of + Just (CncCat s e labels) -> + let keys = do fid <- range (s,e) + lbl <- indices labels + return (AK fid lbl) + in foldl' (\(acc,items) key -> predict flit ftok cnc + (pproductions cnc) + key key 0 + acc items) + (Map.empty,[]) + keys + Nothing -> (Map.empty,[]) in PState pgf cnc (Chart emptyAC [] emptyPC (pproductions cnc) (totalCats cnc) 0) - (TMap.singleton [] (Set.fromList items)) + (TrieMap.compose (Just (Set.fromList items)) acc) + where + cnc = lookConcrComplete pgf lang + + flit _ = Nothing + + ftok = Map.unionWith (TrieMap.unionWith Set.union) + -- | This function constructs the simplest possible parser input. -- It checks the tokens for exact matching and recognizes only @String@, @Int@ and @Float@ literals. -- The @Int@ and @Float@ literals match only if the token passed is some number. -- The @String@ literal always match but the length of the literal could be only one token. simpleParseInput :: Token -> ParseInput -simpleParseInput t = ParseInput (==t) (matchLit t) +simpleParseInput t = ParseInput (Map.lookup t) (matchLit t) where matchLit t fid | fid == fidString = Just (cidString,ELit (LStr t),[t]) @@ -112,7 +121,10 @@ simpleParseInput t = ParseInput (==t) (matchLit t) | fid == fidVar = Just (wildCId,EFun (mkCId t),[t]) | otherwise = Nothing -mkParseInput :: PGF -> Language -> (a -> Token -> Bool) -> [(CId,a -> Maybe (Tree,[Token]))] -> a -> ParseInput +mkParseInput :: PGF -> Language + -> (forall a . b -> Map.Map Token a -> Maybe a) + -> [(CId,b -> Maybe (Tree,[Token]))] + -> (b -> ParseInput) mkParseInput pgf lang ftok flits = \x -> ParseInput (ftok x) (flit x) where flit = mk flits @@ -120,16 +132,10 @@ mkParseInput pgf lang ftok flits = \x -> ParseInput (ftok x) (flit x) cnc = lookConcr pgf lang mk [] = \x fid -> Nothing - mk ((c,flit):flits) = \x fid -> if match fid - then fmap (\(tree,toks) -> (c,tree,toks)) (flit x) - else flit' x fid - where - flit' = mk flits - - match fid = - case Map.lookup c (cnccats cnc) of - Just (CncCat s e _) -> inRange (s,e) fid - Nothing -> False + mk ((c,flit):flits) = \x fid -> case Map.lookup c (cnccats cnc) of + Just (CncCat s e _) | inRange (s,e) fid + -> fmap (\(tree,toks) -> (c,tree,toks)) (flit x) + _ -> mk flits x fid -- | From the current state and the next token -- 'nextState' computes a new state, where the token @@ -137,37 +143,37 @@ mkParseInput pgf lang ftok flits = \x -> ParseInput (ftok x) (flit x) -- If the new token cannot be accepted then an error state -- is returned. nextState :: ParseState -> ParseInput -> Either ErrorState ParseState -nextState (PState pgf cnc chart items) input = - let (mb_agenda,map_items) = TMap.decompose items +nextState (PState pgf cnc chart cnt0) input = + let (mb_agenda,map_items) = TrieMap.decompose cnt0 agenda = maybe [] Set.toList mb_agenda - acc = TMap.unions [tmap | (t,tmap) <- Map.toList map_items, piToken input t] - (acc1,chart1) = process flit ftok (sequences cnc) (cncfuns cnc) (lindefs cnc) agenda acc chart + cnt = fromMaybe TrieMap.empty (piToken input map_items) + (cnt1,chart1) = process flit ftok cnc agenda cnt chart chart2 = chart1{ active =emptyAC , actives=active chart1 : actives chart1 , passive=emptyPC , offset =offset chart1+1 } - in if TMap.null acc1 + in if TrieMap.null cnt1 then Left (EState pgf cnc chart2) - else Right (PState pgf cnc chart2 acc1) + else Right (PState pgf cnc chart2 cnt1) where flit = piLiteral input - ftok (tok:toks) item acc - | piToken input tok = TMap.insertWith Set.union toks (Set.singleton item) acc - ftok _ item acc = acc - + ftok choices cnt = + case piToken input choices of + Just cnt' -> TrieMap.unionWith Set.union cnt' cnt + Nothing -> cnt -- | If the next token is not known but only its prefix (possible empty prefix) -- then the 'getCompletions' function can be used to calculate the possible -- next words and the consequent states. This is used for word completions in -- the GF interpreter. getCompletions :: ParseState -> String -> Map.Map Token ParseState -getCompletions (PState pgf cnc chart items) w = - let (mb_agenda,map_items) = TMap.decompose items +getCompletions (PState pgf cnc chart cnt0) w = + let (mb_agenda,map_items) = TrieMap.decompose cnt0 agenda = maybe [] Set.toList mb_agenda acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items - (acc',chart1) = process flit ftok (sequences cnc) (cncfuns cnc) (lindefs cnc) agenda acc chart + (acc',chart1) = process flit ftok cnc agenda acc chart chart2 = chart1{ active =emptyAC , actives=active chart1 : actives chart1 , passive=emptyPC @@ -177,21 +183,21 @@ getCompletions (PState pgf cnc chart items) w = where flit _ = Nothing - ftok (tok:toks) item acc - | isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc - ftok _ item acc = acc + ftok choices = + Map.unionWith (TrieMap.unionWith Set.union) + (Map.filterWithKey (\tok _ -> isPrefixOf w tok) choices) recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map Token ParseState) recoveryStates open_types (EState pgf cnc chart) = let open_fcats = concatMap type2fcats open_types agenda = foldl (complete open_fcats) [] (actives chart) - (acc,chart1) = process flit ftok (sequences cnc) (cncfuns cnc) (lindefs cnc) agenda Map.empty chart + (acc,chart1) = process flit ftok cnc agenda Map.empty chart chart2 = chart1{ active =emptyAC , actives=active chart1 : actives chart1 , passive=emptyPC , offset =offset chart1+1 } - in (PState pgf cnc chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf cnc chart2) acc) + in (PState pgf cnc chart (TrieMap.singleton [] (Set.fromList agenda)), fmap (PState pgf cnc chart2) acc) where type2fcats (DTyp _ cat _) = case Map.lookup cat (cnccats cnc) of Just (CncCat s e labels) -> range (s,e) @@ -204,14 +210,14 @@ recoveryStates open_types (EState pgf cnc chart) = [set | fcat <- open_fcats, (set,_) <- lookupACByFCat fcat ac] flit _ = Nothing - ftok (tok:toks) item acc = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc + ftok toks = Map.unionWith (TrieMap.unionWith Set.union) toks -- | This function extracts the list of all completed parse trees -- that spans the whole input consumed so far. The trees are also -- limited by the category specified, which is usually -- the same as the startup category. getParseOutput :: ParseState -> Type -> Maybe Int -> (ParseOutput,BracketedString) -getParseOutput (PState pgf cnc chart items) ty@(DTyp _ start _) dp = +getParseOutput (PState pgf cnc chart cnt) ty@(DTyp _ start _) dp = let froots | null roots = getPartialSeq (sequences cnc) (reverse (active chart1 : actives chart1)) seq | otherwise = [([SymCat 0 lbl],[PArg [] fid]) | AK fid lbl <- roots] @@ -228,13 +234,14 @@ getParseOutput (PState pgf cnc chart items) ty@(DTyp _ start _) dp = in (res,bs) where - (mb_agenda,acc) = TMap.decompose items + (mb_agenda,acc) = TrieMap.decompose cnt agenda = maybe [] Set.toList mb_agenda - (acc',chart1) = process flit ftok (sequences cnc) (cncfuns cnc) (lindefs cnc) agenda (TMap.compose Nothing acc) chart - seq = [(j,cutAt ppos toks seqid,args,key) | (toks,set) <- TMap.toList acc', Active j ppos funid seqid args key <- Set.toList set] + (acc',chart1) = process flit ftok cnc agenda (TrieMap.compose Nothing acc) chart + seq = [(j,cutAt ppos toks seqid,args,key) | (toks,set) <- TrieMap.toList acc' + , Active j ppos funid seqid args key <- Set.toList set] - flit _ = Nothing - ftok toks item acc = TMap.insertWith Set.union toks (Set.singleton item) acc + flit _ = Nothing + ftok toks = TrieMap.unionWith Set.union (TrieMap.compose Nothing toks) cutAt ppos toks seqid = let seq = unsafeAt (sequences cnc) seqid @@ -275,8 +282,8 @@ getPartialSeq seqs actives = expand Set.empty inc n (SymLit d r) = SymLit (n+d) r inc n s = s -process flit ftok !seqs !funs defs [] acc chart = (acc,chart) -process flit ftok !seqs !funs defs (item@(Active j ppos funid seqid args key0):items) acc chart +process flit ftok cnc [] acc chart = (acc,chart) +process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc chart | inRange (bounds lin) ppos = case unsafeAt lin ppos of SymCat d r -> let PArg hypos !fid = args !! d @@ -285,9 +292,10 @@ process flit ftok !seqs !funs defs (item@(Active j ppos funid seqid args key0):i items2 = case lookupPC (mkPK key k) (passive chart) of Nothing -> items Just id -> (Active j (ppos+1) funid seqid (updateAt d (PArg hypos id) args) key0) : items - items3 = foldForest (\funid args items -> Active k 0 funid (rhs funid r) args key : items) - (\_ _ items -> items) - items2 fid (IntMap.unionWith Set.union new_sc (forest chart)) + (acc',items4) = predict flit ftok cnc + (IntMap.unionWith Set.union new_sc (forest chart)) + key key k + acc items2 new_sc = foldl uu parent_sc hypos parent_sc = case lookupAC key0 ((active chart : actives chart) !! (k-j)) of @@ -295,15 +303,15 @@ process flit ftok !seqs !funs defs (item@(Active j ppos funid seqid args key0):i Just (set,sc) -> sc in case lookupAC key (active chart) of - Nothing -> process flit ftok seqs funs defs items3 acc chart{active=insertAC key (Set.singleton item,new_sc) (active chart)} - Just (set,sc) | Set.member item set -> process flit ftok seqs funs defs items acc chart - | otherwise -> process flit ftok seqs funs defs items2 acc chart{active=insertAC key (Set.insert item set,IntMap.unionWith Set.union new_sc sc) (active chart)} - SymKS toks -> let !acc' = ftok toks (Active j (ppos+1) funid seqid args key0) acc - in process flit ftok seqs funs defs items acc' chart + Nothing -> process flit ftok cnc items4 acc' chart{active=insertAC key (Set.singleton item,new_sc) (active chart)} + Just (set,sc) | Set.member item set -> process flit ftok cnc items acc chart + | otherwise -> process flit ftok cnc items2 acc chart{active=insertAC key (Set.insert item set,IntMap.unionWith Set.union new_sc sc) (active chart)} + SymKS toks -> let !acc' = ftok_ toks (Active j (ppos+1) funid seqid args key0) acc + in process flit ftok cnc items acc' chart SymKP strs vars - -> let !acc' = foldl (\acc toks -> ftok toks (Active j (ppos+1) funid seqid args key0) acc) acc + -> let !acc' = foldl (\acc toks -> ftok_ toks (Active j (ppos+1) funid seqid args key0) acc) acc (strs:[strs' | Alt strs' _ <- vars]) - in process flit ftok seqs funs defs items acc' chart + in process flit ftok cnc items acc' chart SymLit d r -> let PArg hypos fid = args !! d key = AK fid r !fid' = case lookupPC (mkPK key k) (passive chart) of @@ -311,17 +319,17 @@ process flit ftok !seqs !funs defs (item@(Active j ppos funid seqid args key0):i Just fid -> fid in case [ts | PConst _ _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of - (toks:_) -> let !acc' = ftok toks (Active j (ppos+1) funid seqid (updateAt d (PArg hypos fid') args) key0) acc - in process flit ftok seqs funs defs items acc' chart + (toks:_) -> let !acc' = ftok_ toks (Active j (ppos+1) funid seqid (updateAt d (PArg hypos fid') args) key0) acc + in process flit ftok cnc items acc' chart [] -> case flit fid of Just (cat,lit,toks) -> let fid' = nextId chart - !acc' = ftok toks (Active j (ppos+1) funid seqid (updateAt d (PArg hypos fid') args) key0) acc - in process flit ftok seqs funs defs items acc' chart{passive=insertPC (mkPK key k) fid' (passive chart) - ,forest =IntMap.insert fid' (Set.singleton (PConst cat lit toks)) (forest chart) - ,nextId =nextId chart+1 - } - Nothing -> process flit ftok seqs funs defs items acc chart + !acc' = ftok_ toks (Active j (ppos+1) funid seqid (updateAt d (PArg hypos fid') args) key0) acc + in process flit ftok cnc items acc' chart{passive=insertPC (mkPK key k) fid' (passive chart) + ,forest =IntMap.insert fid' (Set.singleton (PConst cat lit toks)) (forest chart) + ,nextId =nextId chart+1 + } + Nothing -> process flit ftok cnc items acc chart SymVar d r -> let PArg hypos fid0 = args !! d (fid1,fid2) = hypos !! r key = AK fid1 0 @@ -330,17 +338,17 @@ process flit ftok !seqs !funs defs (item@(Active j ppos funid seqid args key0):i Just fid -> fid in case [ts | PConst _ _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of - (toks:_) -> let !acc' = ftok toks (Active j (ppos+1) funid seqid (updateAt d (PArg (updateAt r (fid',fid2) hypos) fid0) args) key0) acc - in process flit ftok seqs funs defs items acc' chart + (toks:_) -> let !acc' = ftok_ toks (Active j (ppos+1) funid seqid (updateAt d (PArg (updateAt r (fid',fid2) hypos) fid0) args) key0) acc + in process flit ftok cnc items acc' chart [] -> case flit fid1 of Just (cat,lit,toks) -> let fid' = nextId chart - !acc' = ftok toks (Active j (ppos+1) funid seqid (updateAt d (PArg (updateAt r (fid',fid2) hypos) fid0) args) key0) acc - in process flit ftok seqs funs defs items acc' chart{passive=insertPC (mkPK key k) fid' (passive chart) - ,forest =IntMap.insert fid' (Set.singleton (PConst cat lit toks)) (forest chart) - ,nextId =nextId chart+1 - } - Nothing -> process flit ftok seqs funs defs items acc chart + !acc' = ftok_ toks (Active j (ppos+1) funid seqid (updateAt d (PArg (updateAt r (fid',fid2) hypos) fid0) args) key0) acc + in process flit ftok cnc items acc' chart{passive=insertPC (mkPK key k) fid' (passive chart) + ,forest =IntMap.insert fid' (Set.singleton (PConst cat lit toks)) (forest chart) + ,nextId =nextId chart+1 + } + Nothing -> process flit ftok cnc items acc chart | otherwise = case lookupPC (mkPK key0 j) (passive chart) of Nothing -> let fid = nextId chart @@ -348,34 +356,61 @@ process flit ftok !seqs !funs defs (item@(Active j ppos funid seqid args key0):i items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of Nothing -> items Just (set,sc) -> Set.fold (\(Active j' ppos funid seqid args keyc) -> - let SymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos + let SymCat d _ = unsafeAt (unsafeAt (sequences cnc) seqid) ppos PArg hypos _ = args !! d in (:) (Active j' (ppos+1) funid seqid (updateAt d (PArg hypos fid) args) keyc)) items set - in process flit ftok seqs funs defs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart) - ,forest =IntMap.insert fid (Set.singleton (PApply funid args)) (forest chart) - ,nextId =nextId chart+1 - } + in process flit ftok cnc items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart) + ,forest =IntMap.insert fid (Set.singleton (PApply funid args)) (forest chart) + ,nextId =nextId chart+1 + } Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items - in process flit ftok seqs funs defs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (PApply funid args)) (forest chart)} + in process flit ftok cnc items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (PApply funid args)) (forest chart)} where - !lin = unsafeAt seqs seqid + !lin = unsafeAt (sequences cnc) seqid !k = offset chart mkPK (AK fid lbl) j = PK fid lbl j rhs funid lbl = unsafeAt lins lbl where - CncFun _ lins = unsafeAt funs funid + CncFun _ lins = unsafeAt (cncfuns cnc) funid uu forest (fid1,fid2) = - case IntMap.lookup fid2 defs of + case IntMap.lookup fid2 (lindefs cnc) of Just funs -> foldl (\forest funid -> IntMap.insertWith Set.union fid2 (Set.singleton (PApply funid [PArg [] fid1])) forest) forest funs Nothing -> forest + + ftok_ (tok:toks) item cnt = + ftok (Map.singleton tok (TrieMap.singleton toks (Set.singleton item))) cnt + +predict flit ftok cnc forest key0 key@(AK fid lbl) k acc items = + let (acc1,items1) = case IntMap.lookup fid forest of + Nothing -> (acc,items) + Just set -> Set.fold foldProd (acc,items) set + + (acc2,items2) = case IntMap.lookup fid (lexicon cnc) >>= IntMap.lookup lbl of + Just tmap -> let (mb_v,toks) = TrieMap.decompose (TrieMap.map (toItems key0 k) tmap) + acc1' = ftok toks acc1 + items1' = maybe [] Set.toList mb_v ++ items1 + in (acc1',items1') + Nothing -> (acc1,items1) + in (acc2,items2) + where + foldProd (PCoerce fid) (acc,items) = predict flit ftok cnc forest key0 (AK fid lbl) k acc items + foldProd (PApply funid args) (acc,items) = (acc,Active k 0 funid (rhs funid lbl) args key0 : items) + foldProd (PConst _ const toks) (acc,items) = (acc,items) + + rhs funid lbl = unsafeAt lins lbl + where + CncFun _ lins = unsafeAt (cncfuns cnc) funid + + toItems key@(AK fid lbl) k funids = + Set.fromList [Active k 1 funid (rhs funid lbl) [] key | funid <- IntSet.toList funids] + updateAt :: Int -> a -> [a] -> [a] updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs] - ---------------------------------------------------------------- -- Active Chart ---------------------------------------------------------------- @@ -385,22 +420,23 @@ data Active {-# UNPACK #-} !DotPos {-# UNPACK #-} !FunId {-# UNPACK #-} !SeqId - [PArg] + [PArg] {-# UNPACK #-} !ActiveKey deriving (Eq,Show,Ord) data ActiveKey = AK {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex deriving (Eq,Ord,Show) -type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active, IntMap.IntMap (Set.Set Production))) +type ActiveSet = Set.Set Active +type ActiveChart = IntMap.IntMap (IntMap.IntMap (ActiveSet, IntMap.IntMap (Set.Set Production))) emptyAC :: ActiveChart emptyAC = IntMap.empty -lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active, IntMap.IntMap (Set.Set Production)) -lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l +lookupAC :: ActiveKey -> ActiveChart -> Maybe (ActiveSet, IntMap.IntMap (Set.Set Production)) +lookupAC (AK fid lbl) chart = IntMap.lookup fid chart >>= IntMap.lookup lbl -lookupACByFCat :: FId -> ActiveChart -> [(Set.Set Active, IntMap.IntMap (Set.Set Production))] +lookupACByFCat :: FId -> ActiveChart -> [(ActiveSet, IntMap.IntMap (Set.Set Production))] lookupACByFCat fcat chart = case IntMap.lookup fcat chart of Nothing -> [] @@ -412,7 +448,7 @@ labelsAC fcat chart = Nothing -> [] Just map -> IntMap.keys map -insertAC :: ActiveKey -> (Set.Set Active, IntMap.IntMap (Set.Set Production)) -> ActiveChart -> ActiveChart +insertAC :: ActiveKey -> (ActiveSet, IntMap.IntMap (Set.Set Production)) -> ActiveChart -> ActiveChart insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart @@ -444,7 +480,7 @@ insertPC key fcat chart = Map.insert key fcat chart -- | An abstract data type whose values represent -- the current state in an incremental parser. -data ParseState = PState PGF Concr Chart (TMap.TrieMap String (Set.Set Active)) +data ParseState = PState PGF Concr Chart Continuation data Chart = Chart @@ -457,6 +493,8 @@ data Chart } deriving Show +type Continuation = TrieMap.TrieMap Token ActiveSet + ---------------------------------------------------------------- -- Error State ----------------------------------------------------------------