1
0
forked from GitHub/gf-core
Files
gf-core/src-3.0/PGF/Parsing/FCFG/Incremental.hs
2008-06-19 12:48:29 +00:00

188 lines
7.8 KiB
Haskell

{-# LANGUAGE BangPatterns #-}
module PGF.Parsing.FCFG.Incremental
( ParseState
, initState
, nextState
, getCompletions
, extractExps
, parse
) where
import Data.Array
import Data.Array.Base (unsafeAt)
import Data.List (isPrefixOf, foldl')
import Data.Maybe (fromMaybe)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Control.Monad
import GF.Data.Assoc
import GF.Data.SortedList
import qualified GF.Data.MultiMap as MM
import PGF.CId
import PGF.Data
import PGF.Parsing.FCFG.Utilities
import Debug.Trace
parse :: ParserInfo -> CId -> [FToken] -> [Tree]
parse pinfo start toks = extractExps (foldl' nextState (initState pinfo start) toks) start
initState :: ParserInfo -> CId -> ParseState
initState pinfo start =
let items = do
c <- Map.findWithDefault [] start (startupCats pinfo)
ruleid <- topdownRules pinfo ? c
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
lbl <- indices lins
return (Active 0 lbl 0 ruleid args cat)
forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)]
max_fid = case IntMap.maxViewWithKey forest of
Just ((fid,_), _) -> fid+1
Nothing -> 0
in State pinfo
(Chart MM.empty [] Map.empty forest max_fid 0)
(Set.fromList items)
-- | From the current state and the next token
-- 'nextState' computes a new state where the token
-- is consumed and the current position shifted by one.
nextState :: ParseState -> String -> ParseState
nextState (State pinfo chart items) t =
let (items1,chart1) = process add (allRules pinfo) (Set.toList items) (Set.empty,chart)
chart2 = chart1{ active =MM.empty
, actives=active chart1 : actives chart1
, passive=Map.empty
, offset =offset chart1+1
}
in State pinfo chart2 items1
where
add tok item set
| tok == t = Set.insert item set
| otherwise = set
-- | If the next token is not known but only its prefix (possible empty prefix)
-- then the 'getCompletions' function can be used to calculate the possible
-- next words and the consequent states. This is used for word completions in
-- the GF interpreter.
getCompletions :: ParseState -> String -> Map.Map String ParseState
getCompletions (State pinfo chart items) w =
let (map',chart1) = process add (allRules pinfo) (Set.toList items) (MM.empty,chart)
chart2 = chart1{ active =MM.empty
, actives=active chart1 : actives chart1
, passive=Map.empty
, offset =offset chart1+1
}
in fmap (State pinfo chart2) map'
where
add tok item map
| isPrefixOf w tok = fromMaybe map (MM.insert' tok item map)
| otherwise = map
extractExps :: ParseState -> CId -> [Tree]
extractExps (State pinfo chart items) start = exps
where
(_,st) = process (\_ _ -> id) (allRules pinfo) (Set.toList items) ((),chart)
exps = nubsort $ do
c <- Map.findWithDefault [] start (startupCats pinfo)
ruleid <- topdownRules pinfo ? c
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
lbl <- indices lins
fid <- Map.lookup (PK c lbl 0) (passive st)
go Set.empty fid
go rec fid
| Set.member fid rec = mzero
| otherwise = do set <- IntMap.lookup fid (forest st)
Passive ruleid args <- Set.toList set
let (FRule fn _ _ cat lins) = allRules pinfo ! ruleid
if fn == wildCId
then go (Set.insert fid rec) (head args)
else do args <- mapM (go (Set.insert fid rec)) args
return (Fun fn args)
process fn !rules [] acc_chart = acc_chart
process fn !rules (item:items) acc_chart = univRule item acc_chart
where
univRule (Active j lbl ppos ruleid args fid0) acc_chart@(acc,chart)
| inRange (bounds lin) ppos =
case unsafeAt lin ppos of
FSymCat r d -> let !fid = args !! d
in case MM.insert' (AK fid r) item (active chart) of
Nothing -> process fn rules items $ acc_chart
Just actCat -> (case Map.lookup (PK fid r k) (passive chart) of
Nothing -> id
Just id -> process fn rules [Active j lbl (ppos+1) ruleid (updateAt d id args) fid0]) $
(case IntMap.lookup fid (forest chart) of
Nothing -> id
Just set -> process fn rules (Set.fold (\(Passive ruleid args) -> (:) (Active k r 0 ruleid args fid)) [] set)) $
process fn rules items $
(acc,chart{active=actCat})
FSymTok tok -> process fn rules items $
(fn tok (Active j lbl (ppos+1) ruleid args fid0) acc,chart)
| otherwise = case Map.lookup (PK fid0 lbl j) (passive chart) of
Nothing -> let fid = nextId chart
in process fn rules [Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc
| Active j' lbl ppos ruleid args fidc <- ((active chart:actives chart) !! (k-j)) MM.! (AK fid0 lbl),
let FSymCat _ d = unsafeAt (rhs ruleid lbl) ppos] $
process fn rules items $
(acc,chart{passive=Map.insert (PK fid0 lbl j) fid (passive chart)
,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest chart)
,nextId =nextId chart+1
})
Just id -> process fn rules items $
(acc,chart{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest chart)})
where
!lin = rhs ruleid lbl
!k = offset chart
rhs ruleid lbl = unsafeAt lins lbl
where
(FRule _ _ _ cat lins) = unsafeAt rules ruleid
updateAt :: Int -> a -> [a] -> [a]
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
data Active
= Active {-# UNPACK #-} !Int
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !FPointPos
{-# UNPACK #-} !RuleId
[FCat]
{-# UNPACK #-} !FCat
deriving (Eq,Show,Ord)
data Passive
= Passive {-# UNPACK #-} !RuleId
[FCat]
deriving (Eq,Ord,Show)
data ActiveKey
= AK {-# UNPACK #-} !FCat
{-# UNPACK #-} !FIndex
deriving (Eq,Ord,Show)
data PassiveKey
= PK {-# UNPACK #-} !FCat
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !Int
deriving (Eq,Ord,Show)
-- | An abstract data type whose values represent
-- the current state in an incremental parser.
data ParseState = State ParserInfo Chart (Set.Set Active)
data Chart
= Chart
{ active :: MM.MultiMap ActiveKey Active
, actives :: [MM.MultiMap ActiveKey Active]
, passive :: Map.Map PassiveKey FCat
, forest :: IntMap.IntMap (Set.Set Passive)
, nextId :: {-# UNPACK #-} !FCat
, offset :: {-# UNPACK #-} !Int
}