1
0
forked from GitHub/gf-core

optimization in the parser for large lexicons. Now, the parser is slightly slower for grammars with a small lexicon but 3-4 times faster for the English Resource Grammar used in combination with Oxford Advanced Learners Dictionary

This commit is contained in:
krasimir
2010-12-14 08:46:22 +00:00
parent 0c4f5030c7
commit 7d58efb660
6 changed files with 177 additions and 104 deletions

View File

@@ -68,6 +68,7 @@ instance Binary Concr where
, productions=productions , productions=productions
, pproductions = IntMap.empty , pproductions = IntMap.empty
, lproductions = Map.empty , lproductions = Map.empty
, lexicon = IntMap.empty
, cnccats=cnccats, totalCats=totalCats , cnccats=cnccats, totalCats=totalCats
}) })

View File

@@ -7,6 +7,8 @@ import PGF.Type
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.IntMap as IntMap import qualified Data.IntMap as IntMap
import qualified Data.IntSet as IntSet
import qualified GF.Data.TrieMap as TMap
import Data.Array.IArray import Data.Array.IArray
import Data.Array.Unboxed import Data.Array.Unboxed
import Data.List import Data.List
@@ -42,6 +44,7 @@ data Concr = Concr {
pproductions :: IntMap.IntMap (Set.Set Production), -- productions needed for parsing pproductions :: IntMap.IntMap (Set.Set Production), -- productions needed for parsing
lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)), -- productions needed for linearization lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)), -- productions needed for linearization
cnccats :: Map.Map CId CncCat, cnccats :: Map.Map CId CncCat,
lexicon :: IntMap.IntMap (IntMap.IntMap (TMap.TrieMap Token IntSet.IntSet)),
totalCats :: {-# UNPACK #-} !FId totalCats :: {-# UNPACK #-} !FId
} }

View File

@@ -17,7 +17,6 @@ module PGF.Forest( Forest(..)
, BracketedString, showBracketedString, lengthBracketedString , BracketedString, showBracketedString, lengthBracketedString
, linearizeWithBrackets , linearizeWithBrackets
, getAbsTrees , getAbsTrees
, foldForest
) where ) where
import PGF.CId import PGF.CId

View File

@@ -29,7 +29,7 @@ buildMorpho pgf lang = Morpho $
collectWords pinfo = Map.fromListWith (++) collectWords pinfo = Map.fromListWith (++)
[(t, [(fun,lbls ! l)]) | (CncCat s e lbls) <- Map.elems (cnccats pinfo) [(t, [(fun,lbls ! l)]) | (CncCat s e lbls) <- Map.elems (cnccats pinfo)
, fid <- [s..e] , fid <- [s..e]
, PApply funid _ <- maybe [] Set.toList (IntMap.lookup fid (pproductions pinfo)) , PApply funid _ <- maybe [] Set.toList (IntMap.lookup fid (productions pinfo))
, let CncFun fun lins = cncfuns pinfo ! funid , let CncFun fun lins = cncfuns pinfo ! funid
, (l,seqid) <- assocs lins , (l,seqid) <- assocs lins
, sym <- elems (sequences pinfo ! seqid) , sym <- elems (sequences pinfo ! seqid)

View File

@@ -17,6 +17,7 @@ import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.IntSet as IntSet import qualified Data.IntSet as IntSet
import qualified Data.IntMap as IntMap import qualified Data.IntMap as IntMap
import qualified GF.Data.TrieMap as TrieMap
import qualified Data.List as List import qualified Data.List as List
import Control.Monad.ST import Control.Monad.ST
import GF.Data.Utilities(sortNub) import GF.Data.Utilities(sortNub)
@@ -195,10 +196,41 @@ filterProductions prods0 hoc0 prods
accumHOC (PApply funid args) hoc = List.foldl' (\hoc (PArg hypos _) -> List.foldl' (\hoc (_,fid) -> IntSet.insert fid hoc) hoc hypos) hoc args accumHOC (PApply funid args) hoc = List.foldl' (\hoc (PArg hypos _) -> List.foldl' (\hoc (_,fid) -> IntSet.insert fid hoc) hoc hypos) hoc args
accumHOC _ hoc = hoc accumHOC _ hoc = hoc
splitLexicalRules cnc p_prods =
IntMap.foldWithKey split (IntMap.empty,IntMap.empty) p_prods
where
split fid set (lex,syn) =
let (lex0,syn0) = Set.partition isLexical set
!lex' = if Set.null lex0
then lex
else let !mp = IntMap.unionsWith (TrieMap.unionWith IntSet.union)
[words funid | PApply funid [] <- Set.toList lex0]
in IntMap.insert fid mp lex
!syn' = if Set.null syn0
then syn
else IntMap.insert fid syn0 syn
in (lex', syn')
isLexical (PApply _ []) = True
isLexical _ = False
words funid = IntMap.fromList [(lbl,seq2prefix (elems (sequences cnc ! seqid)))
| (lbl,seqid) <- assocs lins]
where
CncFun _ lins = cncfuns cnc ! funid
wf ts = (ts,IntSet.singleton funid)
seq2prefix [] = TrieMap.fromList [wf []]
seq2prefix (SymKS ts :syms) = TrieMap.fromList [wf ts]
seq2prefix (SymKP ts alts:syms) = TrieMap.fromList (wf ts : [wf ts | Alt ts ps <- alts])
updateConcrete abs cnc = updateConcrete abs cnc =
let p_prods = filterProductions IntMap.empty IntSet.empty (productions cnc) let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc)
l_prods = linIndex cnc p_prods (lex,p_prods) = splitLexicalRules cnc p_prods0
in cnc{pproductions = p_prods, lproductions = l_prods} l_prods = linIndex cnc p_prods0
in cnc{pproductions = p_prods, lproductions = l_prods, lexicon = lex}
where where
linIndex cnc productions = linIndex cnc productions =
Map.fromListWith (IntMap.unionWith Set.union) Map.fromListWith (IntMap.unionWith Set.union)

View File

@@ -1,4 +1,4 @@
{-# LANGUAGE BangPatterns #-} {-# LANGUAGE BangPatterns, RankNTypes #-}
module PGF.Parse module PGF.Parse
( ParseState ( ParseState
, ErrorState , ErrorState
@@ -17,8 +17,9 @@ import Data.Array.Base (unsafeAt)
import Data.List (isPrefixOf, foldl') import Data.List (isPrefixOf, foldl')
import Data.Maybe (fromMaybe, maybe, maybeToList) import Data.Maybe (fromMaybe, maybe, maybeToList)
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified GF.Data.TrieMap as TMap import qualified GF.Data.TrieMap as TrieMap
import qualified Data.IntMap as IntMap import qualified Data.IntMap as IntMap
import qualified Data.IntSet as IntSet
import qualified Data.Set as Set import qualified Data.Set as Set
import Control.Monad import Control.Monad
@@ -28,16 +29,16 @@ import PGF.Data
import PGF.Expr(Tree) import PGF.Expr(Tree)
import PGF.Macros import PGF.Macros
import PGF.TypeCheck import PGF.TypeCheck
import PGF.Forest(Forest(Forest), linearizeWithBrackets, getAbsTrees, foldForest) import PGF.Forest(Forest(Forest), linearizeWithBrackets, getAbsTrees)
-- | The input to the parser is a pair of predicates. The first one -- | The input to the parser is a pair of predicates. The first one
-- 'piToken' checks that a given token, suggested by the grammar, -- 'piToken' selects a token from a list of suggestions from the grammar,
-- actually appears at the current position in the input string. -- actually appears at the current position in the input string.
-- The second one 'piLiteral' recognizes whether a literal with forest id 'FId' -- The second one 'piLiteral' recognizes whether a literal with forest id 'FId'
-- could be matched at the current position. -- could be matched at the current position.
data ParseInput data ParseInput
= ParseInput = ParseInput
{ piToken :: Token -> Bool { piToken :: forall a . Map.Map Token a -> Maybe a
, piLiteral :: FId -> Maybe (CId,Tree,[Token]) , piLiteral :: FId -> Maybe (CId,Tree,[Token])
} }
@@ -80,28 +81,36 @@ parseWithRecovery pgf lang typ open_typs dp toks = accept (initState pgf lang ty
-- startup category. -- startup category.
initState :: PGF -> Language -> Type -> ParseState initState :: PGF -> Language -> Type -> ParseState
initState pgf lang (DTyp _ start _) = initState pgf lang (DTyp _ start _) =
let items = case Map.lookup start (cnccats cnc) of let (acc,items) = case Map.lookup start (cnccats cnc) of
Just (CncCat s e labels) -> do fid <- range (s,e) Just (CncCat s e labels) ->
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) let keys = do fid <- range (s,e)
[] fid (pproductions cnc) lbl <- indices labels
let CncFun fn lins = cncfuns cnc ! funid return (AK fid lbl)
(lbl,seqid) <- assocs lins in foldl' (\(acc,items) key -> predict flit ftok cnc
return (Active 0 0 funid seqid args (AK fid lbl)) (pproductions cnc)
Nothing -> mzero key key 0
acc items)
cnc = lookConcrComplete pgf lang (Map.empty,[])
keys
Nothing -> (Map.empty,[])
in PState pgf in PState pgf
cnc cnc
(Chart emptyAC [] emptyPC (pproductions cnc) (totalCats cnc) 0) (Chart emptyAC [] emptyPC (pproductions cnc) (totalCats cnc) 0)
(TMap.singleton [] (Set.fromList items)) (TrieMap.compose (Just (Set.fromList items)) acc)
where
cnc = lookConcrComplete pgf lang
flit _ = Nothing
ftok = Map.unionWith (TrieMap.unionWith Set.union)
-- | This function constructs the simplest possible parser input. -- | This function constructs the simplest possible parser input.
-- It checks the tokens for exact matching and recognizes only @String@, @Int@ and @Float@ literals. -- It checks the tokens for exact matching and recognizes only @String@, @Int@ and @Float@ literals.
-- The @Int@ and @Float@ literals match only if the token passed is some number. -- The @Int@ and @Float@ literals match only if the token passed is some number.
-- The @String@ literal always match but the length of the literal could be only one token. -- The @String@ literal always match but the length of the literal could be only one token.
simpleParseInput :: Token -> ParseInput simpleParseInput :: Token -> ParseInput
simpleParseInput t = ParseInput (==t) (matchLit t) simpleParseInput t = ParseInput (Map.lookup t) (matchLit t)
where where
matchLit t fid matchLit t fid
| fid == fidString = Just (cidString,ELit (LStr t),[t]) | fid == fidString = Just (cidString,ELit (LStr t),[t])
@@ -112,7 +121,10 @@ simpleParseInput t = ParseInput (==t) (matchLit t)
| fid == fidVar = Just (wildCId,EFun (mkCId t),[t]) | fid == fidVar = Just (wildCId,EFun (mkCId t),[t])
| otherwise = Nothing | otherwise = Nothing
mkParseInput :: PGF -> Language -> (a -> Token -> Bool) -> [(CId,a -> Maybe (Tree,[Token]))] -> a -> ParseInput mkParseInput :: PGF -> Language
-> (forall a . b -> Map.Map Token a -> Maybe a)
-> [(CId,b -> Maybe (Tree,[Token]))]
-> (b -> ParseInput)
mkParseInput pgf lang ftok flits = \x -> ParseInput (ftok x) (flit x) mkParseInput pgf lang ftok flits = \x -> ParseInput (ftok x) (flit x)
where where
flit = mk flits flit = mk flits
@@ -120,16 +132,10 @@ mkParseInput pgf lang ftok flits = \x -> ParseInput (ftok x) (flit x)
cnc = lookConcr pgf lang cnc = lookConcr pgf lang
mk [] = \x fid -> Nothing mk [] = \x fid -> Nothing
mk ((c,flit):flits) = \x fid -> if match fid mk ((c,flit):flits) = \x fid -> case Map.lookup c (cnccats cnc) of
then fmap (\(tree,toks) -> (c,tree,toks)) (flit x) Just (CncCat s e _) | inRange (s,e) fid
else flit' x fid -> fmap (\(tree,toks) -> (c,tree,toks)) (flit x)
where _ -> mk flits x fid
flit' = mk flits
match fid =
case Map.lookup c (cnccats cnc) of
Just (CncCat s e _) -> inRange (s,e) fid
Nothing -> False
-- | From the current state and the next token -- | From the current state and the next token
-- 'nextState' computes a new state, where the token -- 'nextState' computes a new state, where the token
@@ -137,37 +143,37 @@ mkParseInput pgf lang ftok flits = \x -> ParseInput (ftok x) (flit x)
-- If the new token cannot be accepted then an error state -- If the new token cannot be accepted then an error state
-- is returned. -- is returned.
nextState :: ParseState -> ParseInput -> Either ErrorState ParseState nextState :: ParseState -> ParseInput -> Either ErrorState ParseState
nextState (PState pgf cnc chart items) input = nextState (PState pgf cnc chart cnt0) input =
let (mb_agenda,map_items) = TMap.decompose items let (mb_agenda,map_items) = TrieMap.decompose cnt0
agenda = maybe [] Set.toList mb_agenda agenda = maybe [] Set.toList mb_agenda
acc = TMap.unions [tmap | (t,tmap) <- Map.toList map_items, piToken input t] cnt = fromMaybe TrieMap.empty (piToken input map_items)
(acc1,chart1) = process flit ftok (sequences cnc) (cncfuns cnc) (lindefs cnc) agenda acc chart (cnt1,chart1) = process flit ftok cnc agenda cnt chart
chart2 = chart1{ active =emptyAC chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1 , actives=active chart1 : actives chart1
, passive=emptyPC , passive=emptyPC
, offset =offset chart1+1 , offset =offset chart1+1
} }
in if TMap.null acc1 in if TrieMap.null cnt1
then Left (EState pgf cnc chart2) then Left (EState pgf cnc chart2)
else Right (PState pgf cnc chart2 acc1) else Right (PState pgf cnc chart2 cnt1)
where where
flit = piLiteral input flit = piLiteral input
ftok (tok:toks) item acc ftok choices cnt =
| piToken input tok = TMap.insertWith Set.union toks (Set.singleton item) acc case piToken input choices of
ftok _ item acc = acc Just cnt' -> TrieMap.unionWith Set.union cnt' cnt
Nothing -> cnt
-- | If the next token is not known but only its prefix (possible empty prefix) -- | If the next token is not known but only its prefix (possible empty prefix)
-- then the 'getCompletions' function can be used to calculate the possible -- then the 'getCompletions' function can be used to calculate the possible
-- next words and the consequent states. This is used for word completions in -- next words and the consequent states. This is used for word completions in
-- the GF interpreter. -- the GF interpreter.
getCompletions :: ParseState -> String -> Map.Map Token ParseState getCompletions :: ParseState -> String -> Map.Map Token ParseState
getCompletions (PState pgf cnc chart items) w = getCompletions (PState pgf cnc chart cnt0) w =
let (mb_agenda,map_items) = TMap.decompose items let (mb_agenda,map_items) = TrieMap.decompose cnt0
agenda = maybe [] Set.toList mb_agenda agenda = maybe [] Set.toList mb_agenda
acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items
(acc',chart1) = process flit ftok (sequences cnc) (cncfuns cnc) (lindefs cnc) agenda acc chart (acc',chart1) = process flit ftok cnc agenda acc chart
chart2 = chart1{ active =emptyAC chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1 , actives=active chart1 : actives chart1
, passive=emptyPC , passive=emptyPC
@@ -177,21 +183,21 @@ getCompletions (PState pgf cnc chart items) w =
where where
flit _ = Nothing flit _ = Nothing
ftok (tok:toks) item acc ftok choices =
| isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc Map.unionWith (TrieMap.unionWith Set.union)
ftok _ item acc = acc (Map.filterWithKey (\tok _ -> isPrefixOf w tok) choices)
recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map Token ParseState) recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map Token ParseState)
recoveryStates open_types (EState pgf cnc chart) = recoveryStates open_types (EState pgf cnc chart) =
let open_fcats = concatMap type2fcats open_types let open_fcats = concatMap type2fcats open_types
agenda = foldl (complete open_fcats) [] (actives chart) agenda = foldl (complete open_fcats) [] (actives chart)
(acc,chart1) = process flit ftok (sequences cnc) (cncfuns cnc) (lindefs cnc) agenda Map.empty chart (acc,chart1) = process flit ftok cnc agenda Map.empty chart
chart2 = chart1{ active =emptyAC chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1 , actives=active chart1 : actives chart1
, passive=emptyPC , passive=emptyPC
, offset =offset chart1+1 , offset =offset chart1+1
} }
in (PState pgf cnc chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf cnc chart2) acc) in (PState pgf cnc chart (TrieMap.singleton [] (Set.fromList agenda)), fmap (PState pgf cnc chart2) acc)
where where
type2fcats (DTyp _ cat _) = case Map.lookup cat (cnccats cnc) of type2fcats (DTyp _ cat _) = case Map.lookup cat (cnccats cnc) of
Just (CncCat s e labels) -> range (s,e) Just (CncCat s e labels) -> range (s,e)
@@ -204,14 +210,14 @@ recoveryStates open_types (EState pgf cnc chart) =
[set | fcat <- open_fcats, (set,_) <- lookupACByFCat fcat ac] [set | fcat <- open_fcats, (set,_) <- lookupACByFCat fcat ac]
flit _ = Nothing flit _ = Nothing
ftok (tok:toks) item acc = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc ftok toks = Map.unionWith (TrieMap.unionWith Set.union) toks
-- | This function extracts the list of all completed parse trees -- | This function extracts the list of all completed parse trees
-- that spans the whole input consumed so far. The trees are also -- that spans the whole input consumed so far. The trees are also
-- limited by the category specified, which is usually -- limited by the category specified, which is usually
-- the same as the startup category. -- the same as the startup category.
getParseOutput :: ParseState -> Type -> Maybe Int -> (ParseOutput,BracketedString) getParseOutput :: ParseState -> Type -> Maybe Int -> (ParseOutput,BracketedString)
getParseOutput (PState pgf cnc chart items) ty@(DTyp _ start _) dp = getParseOutput (PState pgf cnc chart cnt) ty@(DTyp _ start _) dp =
let froots | null roots = getPartialSeq (sequences cnc) (reverse (active chart1 : actives chart1)) seq let froots | null roots = getPartialSeq (sequences cnc) (reverse (active chart1 : actives chart1)) seq
| otherwise = [([SymCat 0 lbl],[PArg [] fid]) | AK fid lbl <- roots] | otherwise = [([SymCat 0 lbl],[PArg [] fid]) | AK fid lbl <- roots]
@@ -228,13 +234,14 @@ getParseOutput (PState pgf cnc chart items) ty@(DTyp _ start _) dp =
in (res,bs) in (res,bs)
where where
(mb_agenda,acc) = TMap.decompose items (mb_agenda,acc) = TrieMap.decompose cnt
agenda = maybe [] Set.toList mb_agenda agenda = maybe [] Set.toList mb_agenda
(acc',chart1) = process flit ftok (sequences cnc) (cncfuns cnc) (lindefs cnc) agenda (TMap.compose Nothing acc) chart (acc',chart1) = process flit ftok cnc agenda (TrieMap.compose Nothing acc) chart
seq = [(j,cutAt ppos toks seqid,args,key) | (toks,set) <- TMap.toList acc', Active j ppos funid seqid args key <- Set.toList set] seq = [(j,cutAt ppos toks seqid,args,key) | (toks,set) <- TrieMap.toList acc'
, Active j ppos funid seqid args key <- Set.toList set]
flit _ = Nothing flit _ = Nothing
ftok toks item acc = TMap.insertWith Set.union toks (Set.singleton item) acc ftok toks = TrieMap.unionWith Set.union (TrieMap.compose Nothing toks)
cutAt ppos toks seqid = cutAt ppos toks seqid =
let seq = unsafeAt (sequences cnc) seqid let seq = unsafeAt (sequences cnc) seqid
@@ -275,8 +282,8 @@ getPartialSeq seqs actives = expand Set.empty
inc n (SymLit d r) = SymLit (n+d) r inc n (SymLit d r) = SymLit (n+d) r
inc n s = s inc n s = s
process flit ftok !seqs !funs defs [] acc chart = (acc,chart) process flit ftok cnc [] acc chart = (acc,chart)
process flit ftok !seqs !funs defs (item@(Active j ppos funid seqid args key0):items) acc chart process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc chart
| inRange (bounds lin) ppos = | inRange (bounds lin) ppos =
case unsafeAt lin ppos of case unsafeAt lin ppos of
SymCat d r -> let PArg hypos !fid = args !! d SymCat d r -> let PArg hypos !fid = args !! d
@@ -285,9 +292,10 @@ process flit ftok !seqs !funs defs (item@(Active j ppos funid seqid args key0):i
items2 = case lookupPC (mkPK key k) (passive chart) of items2 = case lookupPC (mkPK key k) (passive chart) of
Nothing -> items Nothing -> items
Just id -> (Active j (ppos+1) funid seqid (updateAt d (PArg hypos id) args) key0) : items Just id -> (Active j (ppos+1) funid seqid (updateAt d (PArg hypos id) args) key0) : items
items3 = foldForest (\funid args items -> Active k 0 funid (rhs funid r) args key : items) (acc',items4) = predict flit ftok cnc
(\_ _ items -> items) (IntMap.unionWith Set.union new_sc (forest chart))
items2 fid (IntMap.unionWith Set.union new_sc (forest chart)) key key k
acc items2
new_sc = foldl uu parent_sc hypos new_sc = foldl uu parent_sc hypos
parent_sc = case lookupAC key0 ((active chart : actives chart) !! (k-j)) of parent_sc = case lookupAC key0 ((active chart : actives chart) !! (k-j)) of
@@ -295,15 +303,15 @@ process flit ftok !seqs !funs defs (item@(Active j ppos funid seqid args key0):i
Just (set,sc) -> sc Just (set,sc) -> sc
in case lookupAC key (active chart) of in case lookupAC key (active chart) of
Nothing -> process flit ftok seqs funs defs items3 acc chart{active=insertAC key (Set.singleton item,new_sc) (active chart)} Nothing -> process flit ftok cnc items4 acc' chart{active=insertAC key (Set.singleton item,new_sc) (active chart)}
Just (set,sc) | Set.member item set -> process flit ftok seqs funs defs items acc chart Just (set,sc) | Set.member item set -> process flit ftok cnc items acc chart
| otherwise -> process flit ftok seqs funs defs items2 acc chart{active=insertAC key (Set.insert item set,IntMap.unionWith Set.union new_sc sc) (active chart)} | otherwise -> process flit ftok cnc items2 acc chart{active=insertAC key (Set.insert item set,IntMap.unionWith Set.union new_sc sc) (active chart)}
SymKS toks -> let !acc' = ftok toks (Active j (ppos+1) funid seqid args key0) acc SymKS toks -> let !acc' = ftok_ toks (Active j (ppos+1) funid seqid args key0) acc
in process flit ftok seqs funs defs items acc' chart in process flit ftok cnc items acc' chart
SymKP strs vars SymKP strs vars
-> let !acc' = foldl (\acc toks -> ftok toks (Active j (ppos+1) funid seqid args key0) acc) acc -> let !acc' = foldl (\acc toks -> ftok_ toks (Active j (ppos+1) funid seqid args key0) acc) acc
(strs:[strs' | Alt strs' _ <- vars]) (strs:[strs' | Alt strs' _ <- vars])
in process flit ftok seqs funs defs items acc' chart in process flit ftok cnc items acc' chart
SymLit d r -> let PArg hypos fid = args !! d SymLit d r -> let PArg hypos fid = args !! d
key = AK fid r key = AK fid r
!fid' = case lookupPC (mkPK key k) (passive chart) of !fid' = case lookupPC (mkPK key k) (passive chart) of
@@ -311,17 +319,17 @@ process flit ftok !seqs !funs defs (item@(Active j ppos funid seqid args key0):i
Just fid -> fid Just fid -> fid
in case [ts | PConst _ _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of in case [ts | PConst _ _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of
(toks:_) -> let !acc' = ftok toks (Active j (ppos+1) funid seqid (updateAt d (PArg hypos fid') args) key0) acc (toks:_) -> let !acc' = ftok_ toks (Active j (ppos+1) funid seqid (updateAt d (PArg hypos fid') args) key0) acc
in process flit ftok seqs funs defs items acc' chart in process flit ftok cnc items acc' chart
[] -> case flit fid of [] -> case flit fid of
Just (cat,lit,toks) Just (cat,lit,toks)
-> let fid' = nextId chart -> let fid' = nextId chart
!acc' = ftok toks (Active j (ppos+1) funid seqid (updateAt d (PArg hypos fid') args) key0) acc !acc' = ftok_ toks (Active j (ppos+1) funid seqid (updateAt d (PArg hypos fid') args) key0) acc
in process flit ftok seqs funs defs items acc' chart{passive=insertPC (mkPK key k) fid' (passive chart) in process flit ftok cnc items acc' chart{passive=insertPC (mkPK key k) fid' (passive chart)
,forest =IntMap.insert fid' (Set.singleton (PConst cat lit toks)) (forest chart) ,forest =IntMap.insert fid' (Set.singleton (PConst cat lit toks)) (forest chart)
,nextId =nextId chart+1 ,nextId =nextId chart+1
} }
Nothing -> process flit ftok seqs funs defs items acc chart Nothing -> process flit ftok cnc items acc chart
SymVar d r -> let PArg hypos fid0 = args !! d SymVar d r -> let PArg hypos fid0 = args !! d
(fid1,fid2) = hypos !! r (fid1,fid2) = hypos !! r
key = AK fid1 0 key = AK fid1 0
@@ -330,17 +338,17 @@ process flit ftok !seqs !funs defs (item@(Active j ppos funid seqid args key0):i
Just fid -> fid Just fid -> fid
in case [ts | PConst _ _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of in case [ts | PConst _ _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of
(toks:_) -> let !acc' = ftok toks (Active j (ppos+1) funid seqid (updateAt d (PArg (updateAt r (fid',fid2) hypos) fid0) args) key0) acc (toks:_) -> let !acc' = ftok_ toks (Active j (ppos+1) funid seqid (updateAt d (PArg (updateAt r (fid',fid2) hypos) fid0) args) key0) acc
in process flit ftok seqs funs defs items acc' chart in process flit ftok cnc items acc' chart
[] -> case flit fid1 of [] -> case flit fid1 of
Just (cat,lit,toks) Just (cat,lit,toks)
-> let fid' = nextId chart -> let fid' = nextId chart
!acc' = ftok toks (Active j (ppos+1) funid seqid (updateAt d (PArg (updateAt r (fid',fid2) hypos) fid0) args) key0) acc !acc' = ftok_ toks (Active j (ppos+1) funid seqid (updateAt d (PArg (updateAt r (fid',fid2) hypos) fid0) args) key0) acc
in process flit ftok seqs funs defs items acc' chart{passive=insertPC (mkPK key k) fid' (passive chart) in process flit ftok cnc items acc' chart{passive=insertPC (mkPK key k) fid' (passive chart)
,forest =IntMap.insert fid' (Set.singleton (PConst cat lit toks)) (forest chart) ,forest =IntMap.insert fid' (Set.singleton (PConst cat lit toks)) (forest chart)
,nextId =nextId chart+1 ,nextId =nextId chart+1
} }
Nothing -> process flit ftok seqs funs defs items acc chart Nothing -> process flit ftok cnc items acc chart
| otherwise = | otherwise =
case lookupPC (mkPK key0 j) (passive chart) of case lookupPC (mkPK key0 j) (passive chart) of
Nothing -> let fid = nextId chart Nothing -> let fid = nextId chart
@@ -348,34 +356,61 @@ process flit ftok !seqs !funs defs (item@(Active j ppos funid seqid args key0):i
items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of
Nothing -> items Nothing -> items
Just (set,sc) -> Set.fold (\(Active j' ppos funid seqid args keyc) -> Just (set,sc) -> Set.fold (\(Active j' ppos funid seqid args keyc) ->
let SymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos let SymCat d _ = unsafeAt (unsafeAt (sequences cnc) seqid) ppos
PArg hypos _ = args !! d PArg hypos _ = args !! d
in (:) (Active j' (ppos+1) funid seqid (updateAt d (PArg hypos fid) args) keyc)) items set in (:) (Active j' (ppos+1) funid seqid (updateAt d (PArg hypos fid) args) keyc)) items set
in process flit ftok seqs funs defs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart) in process flit ftok cnc items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart)
,forest =IntMap.insert fid (Set.singleton (PApply funid args)) (forest chart) ,forest =IntMap.insert fid (Set.singleton (PApply funid args)) (forest chart)
,nextId =nextId chart+1 ,nextId =nextId chart+1
} }
Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items
in process flit ftok seqs funs defs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (PApply funid args)) (forest chart)} in process flit ftok cnc items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (PApply funid args)) (forest chart)}
where where
!lin = unsafeAt seqs seqid !lin = unsafeAt (sequences cnc) seqid
!k = offset chart !k = offset chart
mkPK (AK fid lbl) j = PK fid lbl j mkPK (AK fid lbl) j = PK fid lbl j
rhs funid lbl = unsafeAt lins lbl rhs funid lbl = unsafeAt lins lbl
where where
CncFun _ lins = unsafeAt funs funid CncFun _ lins = unsafeAt (cncfuns cnc) funid
uu forest (fid1,fid2) = uu forest (fid1,fid2) =
case IntMap.lookup fid2 defs of case IntMap.lookup fid2 (lindefs cnc) of
Just funs -> foldl (\forest funid -> IntMap.insertWith Set.union fid2 (Set.singleton (PApply funid [PArg [] fid1])) forest) forest funs Just funs -> foldl (\forest funid -> IntMap.insertWith Set.union fid2 (Set.singleton (PApply funid [PArg [] fid1])) forest) forest funs
Nothing -> forest Nothing -> forest
ftok_ (tok:toks) item cnt =
ftok (Map.singleton tok (TrieMap.singleton toks (Set.singleton item))) cnt
predict flit ftok cnc forest key0 key@(AK fid lbl) k acc items =
let (acc1,items1) = case IntMap.lookup fid forest of
Nothing -> (acc,items)
Just set -> Set.fold foldProd (acc,items) set
(acc2,items2) = case IntMap.lookup fid (lexicon cnc) >>= IntMap.lookup lbl of
Just tmap -> let (mb_v,toks) = TrieMap.decompose (TrieMap.map (toItems key0 k) tmap)
acc1' = ftok toks acc1
items1' = maybe [] Set.toList mb_v ++ items1
in (acc1',items1')
Nothing -> (acc1,items1)
in (acc2,items2)
where
foldProd (PCoerce fid) (acc,items) = predict flit ftok cnc forest key0 (AK fid lbl) k acc items
foldProd (PApply funid args) (acc,items) = (acc,Active k 0 funid (rhs funid lbl) args key0 : items)
foldProd (PConst _ const toks) (acc,items) = (acc,items)
rhs funid lbl = unsafeAt lins lbl
where
CncFun _ lins = unsafeAt (cncfuns cnc) funid
toItems key@(AK fid lbl) k funids =
Set.fromList [Active k 1 funid (rhs funid lbl) [] key | funid <- IntSet.toList funids]
updateAt :: Int -> a -> [a] -> [a] updateAt :: Int -> a -> [a] -> [a]
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs] updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
---------------------------------------------------------------- ----------------------------------------------------------------
-- Active Chart -- Active Chart
---------------------------------------------------------------- ----------------------------------------------------------------
@@ -385,22 +420,23 @@ data Active
{-# UNPACK #-} !DotPos {-# UNPACK #-} !DotPos
{-# UNPACK #-} !FunId {-# UNPACK #-} !FunId
{-# UNPACK #-} !SeqId {-# UNPACK #-} !SeqId
[PArg] [PArg]
{-# UNPACK #-} !ActiveKey {-# UNPACK #-} !ActiveKey
deriving (Eq,Show,Ord) deriving (Eq,Show,Ord)
data ActiveKey data ActiveKey
= AK {-# UNPACK #-} !FId = AK {-# UNPACK #-} !FId
{-# UNPACK #-} !LIndex {-# UNPACK #-} !LIndex
deriving (Eq,Ord,Show) deriving (Eq,Ord,Show)
type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active, IntMap.IntMap (Set.Set Production))) type ActiveSet = Set.Set Active
type ActiveChart = IntMap.IntMap (IntMap.IntMap (ActiveSet, IntMap.IntMap (Set.Set Production)))
emptyAC :: ActiveChart emptyAC :: ActiveChart
emptyAC = IntMap.empty emptyAC = IntMap.empty
lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active, IntMap.IntMap (Set.Set Production)) lookupAC :: ActiveKey -> ActiveChart -> Maybe (ActiveSet, IntMap.IntMap (Set.Set Production))
lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l lookupAC (AK fid lbl) chart = IntMap.lookup fid chart >>= IntMap.lookup lbl
lookupACByFCat :: FId -> ActiveChart -> [(Set.Set Active, IntMap.IntMap (Set.Set Production))] lookupACByFCat :: FId -> ActiveChart -> [(ActiveSet, IntMap.IntMap (Set.Set Production))]
lookupACByFCat fcat chart = lookupACByFCat fcat chart =
case IntMap.lookup fcat chart of case IntMap.lookup fcat chart of
Nothing -> [] Nothing -> []
@@ -412,7 +448,7 @@ labelsAC fcat chart =
Nothing -> [] Nothing -> []
Just map -> IntMap.keys map Just map -> IntMap.keys map
insertAC :: ActiveKey -> (Set.Set Active, IntMap.IntMap (Set.Set Production)) -> ActiveChart -> ActiveChart insertAC :: ActiveKey -> (ActiveSet, IntMap.IntMap (Set.Set Production)) -> ActiveChart -> ActiveChart
insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart
@@ -444,7 +480,7 @@ insertPC key fcat chart = Map.insert key fcat chart
-- | An abstract data type whose values represent -- | An abstract data type whose values represent
-- the current state in an incremental parser. -- the current state in an incremental parser.
data ParseState = PState PGF Concr Chart (TMap.TrieMap String (Set.Set Active)) data ParseState = PState PGF Concr Chart Continuation
data Chart data Chart
= Chart = Chart
@@ -457,6 +493,8 @@ data Chart
} }
deriving Show deriving Show
type Continuation = TrieMap.TrieMap Token ActiveSet
---------------------------------------------------------------- ----------------------------------------------------------------
-- Error State -- Error State
---------------------------------------------------------------- ----------------------------------------------------------------