Hackish version of the incremental parser

This commit is contained in:
krasimir
2008-06-03 06:59:44 +00:00
parent fe2d34f9e1
commit 1647026506
6 changed files with 212 additions and 35 deletions

View File

@@ -35,6 +35,7 @@ library
PGF.BuildParser
PGF.Parsing.FCFG.Utilities
PGF.Parsing.FCFG.Active
PGF.Parsing.FCFG.Incremental
PGF.Parsing.FCFG
PGF.Raw.Parse
PGF.Raw.Print

View File

@@ -17,11 +17,14 @@ import PGF.CId
import PGF.ShowLinearize
import PGF.Macros
import PGF.Data ----
import qualified PGF.Parsing.FCFG.Incremental as Incremental
import GF.Compile.Export
import GF.Infra.UseIO
import GF.Data.ErrM ----
import Data.Maybe
import qualified Data.Map as Map
import System.CPUTime
type CommandOutput = ([Exp],String) ---- errors, etc
@@ -124,6 +127,10 @@ allCommands pgf = Map.fromAscList [
("pg", emptyCommandInfo {
exec = \opts _ -> return $ fromString $ prGrammar opts,
flags = ["cat","lang","printer"]
}),
("wc", emptyCommandInfo {
exec = \opts _ -> wordCompletion opts >> return ([],[]),
flags = ["cat","lang"]
})
]
where
@@ -153,5 +160,27 @@ allCommands pgf = Map.fromAscList [
prGrammar opts = case valIdOpts "printer" "" opts of
"cats" -> unwords $ categories pgf
v -> prPGF (read v) pgf (prCId (absname pgf))
v -> prPGF (read v) pgf
wordCompletion opts = do
let lang = head (optLangs opts)
cat = optCat opts
pinfo = fromMaybe (error ("Unknown language: " ++ lang)) (lookParser pgf (mkCId lang))
state0 = Incremental.initState pinfo (mkCId cat)
putStrFlush ">> "
s <- getLine
if null s
then return ()
else do cpu1 <- getCPUTime
st <- parse pinfo state0 (words s)
let exps = Incremental.extractExps pinfo (mkCId cat) st
mapM_ (putStrLn . showExp) exps
cpu2 <- getCPUTime
putStrLn (show ((cpu2 - cpu1) `div` 1000000000) ++ " msec")
wordCompletion opts
where
parse pinfo st [] = do putStrLnFlush ""
return st
parse pinfo st (t:ts) = do putStrFlush "."
st1 <- return $! (Incremental.nextState pinfo t st)
parse pinfo st1 ts

View File

@@ -170,7 +170,7 @@ linearize pgf lang = PGF.Linearize.linearize pgf (mkCId lang)
parse pgf lang cat s =
case lookParser pgf (mkCId lang) of
Nothing -> error ("Unknown language: " ++ lang)
Just pinfo -> case parseFCF "bottomup" pinfo (mkCId cat) (words s) of
Just pinfo -> case parseFCFG "bottomup" pinfo (mkCId cat) (words s) of
Ok x -> x
Bad s -> error s

View File

@@ -8,7 +8,7 @@
-----------------------------------------------------------------------------
module PGF.Parsing.FCFG
(parseFCF,buildParserInfo,ParserInfo(..),makeFinalEdge) where
(buildParserInfo,ParserInfo,parseFCFG) where
import GF.Data.ErrM
import GF.Data.Assoc
@@ -19,7 +19,8 @@ import PGF.Data
import PGF.Macros
import PGF.BuildParser
import PGF.Parsing.FCFG.Utilities
import PGF.Parsing.FCFG.Active
import qualified PGF.Parsing.FCFG.Active as Active
import qualified PGF.Parsing.FCFG.Incremental as Incremental
import qualified Data.Map as Map
@@ -28,25 +29,12 @@ import qualified Data.Map as Map
-- main parsing function
parseFCF :: String -- ^ parsing strategy
parseFCFG :: String -- ^ parsing strategy
-> ParserInfo -- ^ compiled grammar (fcfg)
-> CId -- ^ starting category
-> [String] -- ^ input tokens
-> Err [Exp] -- ^ resulting GF terms
parseFCF strategy pinfo startCat inString =
do let inTokens = input inString
startCats <- case Map.lookup startCat (startupCats pinfo) of
Just cats -> return cats
Nothing -> fail $ "Unknown startup category: " ++ prCId startCat
fcfParser <- parseFCF strategy
let chart = fcfParser pinfo startCats inTokens
(i,j) = inputBounds inTokens
finalEdges = [makeFinalEdge cat i j | cat <- startCats]
forests = chart2forests chart (const False) finalEdges
filteredForests = forests >>= applyProfileToForest
return $ nubsort $ filteredForests >>= forest2exps
where
parseFCF :: String -> Err (FCFParser)
parseFCF "bottomup" = return $ parse "b"
parseFCF "topdown" = return $ parse "t"
parseFCF strat = fail $ "FCFG parsing strategy not defined: " ++ strat
parseFCFG "bottomup" pinfo start toks = return $ Active.parse "b" pinfo start toks
parseFCFG "topdown" pinfo start toks = return $ Active.parse "t" pinfo start toks
parseFCFG "incremental" pinfo start toks = return $ Incremental.parse pinfo start toks
parseFCFG strat pinfo start toks = fail $ "FCFG parsing strategy not defined: " ++ strat

View File

@@ -7,7 +7,7 @@
-- MCFG parsing, the active algorithm
-----------------------------------------------------------------------------
module PGF.Parsing.FCFG.Active (FCFParser, parse, makeFinalEdge) where
module PGF.Parsing.FCFG.Active (parse) where
import GF.Data.Assoc
import GF.Data.SortedList
@@ -32,17 +32,20 @@ makeFinalEdge cat 0 0 = (cat, [EmptyRange])
makeFinalEdge cat i j = (cat, [makeRange i j])
-- | the list of categories = possible starting categories
type FCFParser = ParserInfo
-> [FCat]
-> Input FToken
-> SyntaxChart (CId,[Profile]) (FCat,RangeRec)
parse :: String -> FCFParser
parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo
where chart = process strategy pinfo toks axioms emptyXChart
axioms | isBU strategy = literals pinfo toks ++ initialBU pinfo toks
| isTD strategy = literals pinfo toks ++ initialTD pinfo starts toks
parse :: String -> ParserInfo -> CId -> [FToken] -> [Exp]
parse strategy pinfo start toks = nubsort $ filteredForests >>= forest2exps
where
inTokens = input toks
starts = Map.findWithDefault [] start (startupCats pinfo)
schart = xchart2syntaxchart chart pinfo
(i,j) = inputBounds inTokens
finalEdges = [makeFinalEdge cat i j | cat <- starts]
forests = chart2forests schart (const False) finalEdges
filteredForests = forests >>= applyProfileToForest
chart = process strategy pinfo inTokens axioms emptyXChart
axioms | isBU strategy = literals pinfo inTokens ++ initialBU pinfo inTokens
| isTD strategy = literals pinfo inTokens ++ initialTD pinfo starts inTokens
isBU s = s=="b"
isTD s = s=="t"

View File

@@ -0,0 +1,156 @@
{-# OPTIONS -fbang-patterns #-}
module PGF.Parsing.FCFG.Incremental
( State
, initState
, nextState
, getCompletions
, extractExps
, parse
) where
import Data.Array
import Data.Array.Base (unsafeAt)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Control.Monad
import GF.Data.Assoc
import GF.Data.SortedList
import qualified GF.Data.MultiMap as MM
import PGF.CId
import PGF.Data
import PGF.Parsing.FCFG.Utilities
import Debug.Trace
parse :: ParserInfo -> CId -> [FToken] -> [Exp]
parse pinfo start toks = go (initState pinfo start) toks
where
go st [] = extractExps pinfo start st
go st (t:ts) = go (nextState pinfo t st) ts
initState :: ParserInfo -> CId -> State
initState pinfo start =
let items = do
c <- Map.findWithDefault [] start (startupCats pinfo)
ruleid <- topdownRules pinfo ? c
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
lbl <- indices lins
return (Active 0 lbl 0 ruleid args cat)
forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)]
max_fid = case IntMap.maxViewWithKey forest of
Just ((fid,_), _) -> fid+1
Nothing -> 0
in process (allRules pinfo) items (State MM.empty [] MM.empty Map.empty forest max_fid 0)
nextState :: ParserInfo -> FToken -> State -> State
nextState pinfo t state =
process (allRules pinfo) (tokens state MM.! t) state{ chart=MM.empty
, charts=chart state : charts state
, tokens=MM.empty
, passive=Map.empty
, currOffset=currOffset state+1
}
getCompletions :: State -> FToken -> [FToken]
getCompletions state w =
[t | t <- MM.keys (tokens state), take (length w) t == w]
extractExps :: ParserInfo -> CId -> State -> [Exp]
extractExps pinfo start st = exps
where
exps = nubsort $ do
c <- Map.findWithDefault [] start (startupCats pinfo)
ruleid <- topdownRules pinfo ? c
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
lbl <- indices lins
fid <- Map.lookup (PK c lbl 0) (passive st)
go fid
go fid = do
set <- IntMap.lookup fid (forest st)
Passive ruleid args <- Set.toList set
let (FRule fn _ _ cat lins) = allRules pinfo ! ruleid
args <- mapM go args
return (EApp fn args)
process !rules [] state = state
process !rules (item:items) state = process rules items $! univRule item state
where
univRule (Active j lbl ppos ruleid args fid0) state
| inRange (bounds lin) ppos =
case unsafeAt lin ppos of
FSymCat r d -> {-# SCC "COND11" #-}
let !fid = args !! d
in case MM.insert' (AK fid r) item (chart state) of
Nothing -> state
Just actCat -> (case Map.lookup (PK fid r k) (passive state) of
Nothing -> id
Just id -> process rules [Active j lbl (ppos+1) ruleid (updateAt d id args) fid0]) $
(case IntMap.lookup fid (forest state) of
Nothing -> id
Just set -> process rules (Set.fold (\(Passive ruleid args) -> (:) (Active k r 0 ruleid args fid)) [] set)) $
state{chart=actCat}
FSymTok tok -> {-# SCC "COND12" #-}
case MM.insert' tok (Active j lbl (ppos+1) ruleid args fid0) (tokens state) of
Nothing -> state
Just actTok -> state{tokens=actTok}
| otherwise = {-# SCC "COND2" #-}
case Map.lookup (PK fid0 lbl j) (passive state) of
Nothing -> let fid = nextId state
in process rules [Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc
| Active j' lbl ppos ruleid args fidc <- ((chart state:charts state) !! (k-j)) MM.! (AK fid0 lbl),
let FSymCat _ d = unsafeAt (rhs ruleid lbl) ppos] $
state{passive=Map.insert (PK fid0 lbl j) fid (passive state)
,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest state)
,nextId =nextId state+1
}
Just id -> state{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest state)}
where
!lin = rhs ruleid lbl
!k = currOffset state
rhs ruleid lbl = unsafeAt lins lbl
where
(FRule _ _ _ cat lins) = unsafeAt rules ruleid
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
data Active
= Active {-# UNPACK #-} !Int
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !FPointPos
{-# UNPACK #-} !RuleId
[FCat]
{-# UNPACK #-} !FCat
deriving (Eq,Show,Ord)
data Passive
= Passive {-# UNPACK #-} !RuleId
[FCat]
deriving (Eq,Ord,Show)
data ActiveKey
= AK {-# UNPACK #-} !FCat
{-# UNPACK #-} !FIndex
deriving (Eq,Ord,Show)
data PassiveKey
= PK {-# UNPACK #-} !FCat
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !Int
deriving (Eq,Ord,Show)
data State
= State
{ chart :: MM.MultiMap ActiveKey Active
, charts :: [MM.MultiMap ActiveKey Active]
, tokens :: MM.MultiMap FToken Active
, passive :: Map.Map PassiveKey FCat
, forest :: IntMap.IntMap (Set.Set Passive)
, nextId :: {-# UNPACK #-} !FCat
, currOffset :: {-# UNPACK #-} !Int
}
deriving Show