forked from GitHub/gf-core
Hackish version of the incremental parser
This commit is contained in:
1
GF.cabal
1
GF.cabal
@@ -35,6 +35,7 @@ library
|
||||
PGF.BuildParser
|
||||
PGF.Parsing.FCFG.Utilities
|
||||
PGF.Parsing.FCFG.Active
|
||||
PGF.Parsing.FCFG.Incremental
|
||||
PGF.Parsing.FCFG
|
||||
PGF.Raw.Parse
|
||||
PGF.Raw.Print
|
||||
|
||||
@@ -17,11 +17,14 @@ import PGF.CId
|
||||
import PGF.ShowLinearize
|
||||
import PGF.Macros
|
||||
import PGF.Data ----
|
||||
import qualified PGF.Parsing.FCFG.Incremental as Incremental
|
||||
import GF.Compile.Export
|
||||
|
||||
import GF.Infra.UseIO
|
||||
import GF.Data.ErrM ----
|
||||
|
||||
import Data.Maybe
|
||||
import qualified Data.Map as Map
|
||||
import System.CPUTime
|
||||
|
||||
type CommandOutput = ([Exp],String) ---- errors, etc
|
||||
|
||||
@@ -124,6 +127,10 @@ allCommands pgf = Map.fromAscList [
|
||||
("pg", emptyCommandInfo {
|
||||
exec = \opts _ -> return $ fromString $ prGrammar opts,
|
||||
flags = ["cat","lang","printer"]
|
||||
}),
|
||||
("wc", emptyCommandInfo {
|
||||
exec = \opts _ -> wordCompletion opts >> return ([],[]),
|
||||
flags = ["cat","lang"]
|
||||
})
|
||||
]
|
||||
where
|
||||
@@ -153,5 +160,27 @@ allCommands pgf = Map.fromAscList [
|
||||
|
||||
prGrammar opts = case valIdOpts "printer" "" opts of
|
||||
"cats" -> unwords $ categories pgf
|
||||
v -> prPGF (read v) pgf (prCId (absname pgf))
|
||||
v -> prPGF (read v) pgf
|
||||
|
||||
wordCompletion opts = do
|
||||
let lang = head (optLangs opts)
|
||||
cat = optCat opts
|
||||
pinfo = fromMaybe (error ("Unknown language: " ++ lang)) (lookParser pgf (mkCId lang))
|
||||
state0 = Incremental.initState pinfo (mkCId cat)
|
||||
putStrFlush ">> "
|
||||
s <- getLine
|
||||
if null s
|
||||
then return ()
|
||||
else do cpu1 <- getCPUTime
|
||||
st <- parse pinfo state0 (words s)
|
||||
let exps = Incremental.extractExps pinfo (mkCId cat) st
|
||||
mapM_ (putStrLn . showExp) exps
|
||||
cpu2 <- getCPUTime
|
||||
putStrLn (show ((cpu2 - cpu1) `div` 1000000000) ++ " msec")
|
||||
wordCompletion opts
|
||||
where
|
||||
parse pinfo st [] = do putStrLnFlush ""
|
||||
return st
|
||||
parse pinfo st (t:ts) = do putStrFlush "."
|
||||
st1 <- return $! (Incremental.nextState pinfo t st)
|
||||
parse pinfo st1 ts
|
||||
|
||||
@@ -170,7 +170,7 @@ linearize pgf lang = PGF.Linearize.linearize pgf (mkCId lang)
|
||||
parse pgf lang cat s =
|
||||
case lookParser pgf (mkCId lang) of
|
||||
Nothing -> error ("Unknown language: " ++ lang)
|
||||
Just pinfo -> case parseFCF "bottomup" pinfo (mkCId cat) (words s) of
|
||||
Just pinfo -> case parseFCFG "bottomup" pinfo (mkCId cat) (words s) of
|
||||
Ok x -> x
|
||||
Bad s -> error s
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module PGF.Parsing.FCFG
|
||||
(parseFCF,buildParserInfo,ParserInfo(..),makeFinalEdge) where
|
||||
(buildParserInfo,ParserInfo,parseFCFG) where
|
||||
|
||||
import GF.Data.ErrM
|
||||
import GF.Data.Assoc
|
||||
@@ -19,7 +19,8 @@ import PGF.Data
|
||||
import PGF.Macros
|
||||
import PGF.BuildParser
|
||||
import PGF.Parsing.FCFG.Utilities
|
||||
import PGF.Parsing.FCFG.Active
|
||||
import qualified PGF.Parsing.FCFG.Active as Active
|
||||
import qualified PGF.Parsing.FCFG.Incremental as Incremental
|
||||
|
||||
import qualified Data.Map as Map
|
||||
|
||||
@@ -28,25 +29,12 @@ import qualified Data.Map as Map
|
||||
|
||||
-- main parsing function
|
||||
|
||||
parseFCF :: String -- ^ parsing strategy
|
||||
parseFCFG :: String -- ^ parsing strategy
|
||||
-> ParserInfo -- ^ compiled grammar (fcfg)
|
||||
-> CId -- ^ starting category
|
||||
-> [String] -- ^ input tokens
|
||||
-> Err [Exp] -- ^ resulting GF terms
|
||||
parseFCF strategy pinfo startCat inString =
|
||||
do let inTokens = input inString
|
||||
startCats <- case Map.lookup startCat (startupCats pinfo) of
|
||||
Just cats -> return cats
|
||||
Nothing -> fail $ "Unknown startup category: " ++ prCId startCat
|
||||
fcfParser <- parseFCF strategy
|
||||
let chart = fcfParser pinfo startCats inTokens
|
||||
(i,j) = inputBounds inTokens
|
||||
finalEdges = [makeFinalEdge cat i j | cat <- startCats]
|
||||
forests = chart2forests chart (const False) finalEdges
|
||||
filteredForests = forests >>= applyProfileToForest
|
||||
return $ nubsort $ filteredForests >>= forest2exps
|
||||
where
|
||||
parseFCF :: String -> Err (FCFParser)
|
||||
parseFCF "bottomup" = return $ parse "b"
|
||||
parseFCF "topdown" = return $ parse "t"
|
||||
parseFCF strat = fail $ "FCFG parsing strategy not defined: " ++ strat
|
||||
parseFCFG "bottomup" pinfo start toks = return $ Active.parse "b" pinfo start toks
|
||||
parseFCFG "topdown" pinfo start toks = return $ Active.parse "t" pinfo start toks
|
||||
parseFCFG "incremental" pinfo start toks = return $ Incremental.parse pinfo start toks
|
||||
parseFCFG strat pinfo start toks = fail $ "FCFG parsing strategy not defined: " ++ strat
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
-- MCFG parsing, the active algorithm
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module PGF.Parsing.FCFG.Active (FCFParser, parse, makeFinalEdge) where
|
||||
module PGF.Parsing.FCFG.Active (parse) where
|
||||
|
||||
import GF.Data.Assoc
|
||||
import GF.Data.SortedList
|
||||
@@ -32,17 +32,20 @@ makeFinalEdge cat 0 0 = (cat, [EmptyRange])
|
||||
makeFinalEdge cat i j = (cat, [makeRange i j])
|
||||
|
||||
-- | the list of categories = possible starting categories
|
||||
type FCFParser = ParserInfo
|
||||
-> [FCat]
|
||||
-> Input FToken
|
||||
-> SyntaxChart (CId,[Profile]) (FCat,RangeRec)
|
||||
|
||||
|
||||
parse :: String -> FCFParser
|
||||
parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo
|
||||
where chart = process strategy pinfo toks axioms emptyXChart
|
||||
axioms | isBU strategy = literals pinfo toks ++ initialBU pinfo toks
|
||||
| isTD strategy = literals pinfo toks ++ initialTD pinfo starts toks
|
||||
parse :: String -> ParserInfo -> CId -> [FToken] -> [Exp]
|
||||
parse strategy pinfo start toks = nubsort $ filteredForests >>= forest2exps
|
||||
where
|
||||
inTokens = input toks
|
||||
starts = Map.findWithDefault [] start (startupCats pinfo)
|
||||
schart = xchart2syntaxchart chart pinfo
|
||||
(i,j) = inputBounds inTokens
|
||||
finalEdges = [makeFinalEdge cat i j | cat <- starts]
|
||||
forests = chart2forests schart (const False) finalEdges
|
||||
filteredForests = forests >>= applyProfileToForest
|
||||
|
||||
chart = process strategy pinfo inTokens axioms emptyXChart
|
||||
axioms | isBU strategy = literals pinfo inTokens ++ initialBU pinfo inTokens
|
||||
| isTD strategy = literals pinfo inTokens ++ initialTD pinfo starts inTokens
|
||||
|
||||
isBU s = s=="b"
|
||||
isTD s = s=="t"
|
||||
|
||||
156
src-3.0/PGF/Parsing/FCFG/Incremental.hs
Normal file
156
src-3.0/PGF/Parsing/FCFG/Incremental.hs
Normal file
@@ -0,0 +1,156 @@
|
||||
{-# OPTIONS -fbang-patterns #-}
|
||||
module PGF.Parsing.FCFG.Incremental
|
||||
( State
|
||||
, initState
|
||||
, nextState
|
||||
, getCompletions
|
||||
, extractExps
|
||||
, parse
|
||||
) where
|
||||
|
||||
import Data.Array
|
||||
import Data.Array.Base (unsafeAt)
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.Set as Set
|
||||
import Control.Monad
|
||||
|
||||
import GF.Data.Assoc
|
||||
import GF.Data.SortedList
|
||||
import qualified GF.Data.MultiMap as MM
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Parsing.FCFG.Utilities
|
||||
import Debug.Trace
|
||||
|
||||
parse :: ParserInfo -> CId -> [FToken] -> [Exp]
|
||||
parse pinfo start toks = go (initState pinfo start) toks
|
||||
where
|
||||
go st [] = extractExps pinfo start st
|
||||
go st (t:ts) = go (nextState pinfo t st) ts
|
||||
|
||||
initState :: ParserInfo -> CId -> State
|
||||
initState pinfo start =
|
||||
let items = do
|
||||
c <- Map.findWithDefault [] start (startupCats pinfo)
|
||||
ruleid <- topdownRules pinfo ? c
|
||||
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
|
||||
lbl <- indices lins
|
||||
return (Active 0 lbl 0 ruleid args cat)
|
||||
|
||||
forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)]
|
||||
|
||||
max_fid = case IntMap.maxViewWithKey forest of
|
||||
Just ((fid,_), _) -> fid+1
|
||||
Nothing -> 0
|
||||
|
||||
in process (allRules pinfo) items (State MM.empty [] MM.empty Map.empty forest max_fid 0)
|
||||
|
||||
nextState :: ParserInfo -> FToken -> State -> State
|
||||
nextState pinfo t state =
|
||||
process (allRules pinfo) (tokens state MM.! t) state{ chart=MM.empty
|
||||
, charts=chart state : charts state
|
||||
, tokens=MM.empty
|
||||
, passive=Map.empty
|
||||
, currOffset=currOffset state+1
|
||||
}
|
||||
|
||||
getCompletions :: State -> FToken -> [FToken]
|
||||
getCompletions state w =
|
||||
[t | t <- MM.keys (tokens state), take (length w) t == w]
|
||||
|
||||
extractExps :: ParserInfo -> CId -> State -> [Exp]
|
||||
extractExps pinfo start st = exps
|
||||
where
|
||||
exps = nubsort $ do
|
||||
c <- Map.findWithDefault [] start (startupCats pinfo)
|
||||
ruleid <- topdownRules pinfo ? c
|
||||
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
|
||||
lbl <- indices lins
|
||||
fid <- Map.lookup (PK c lbl 0) (passive st)
|
||||
go fid
|
||||
|
||||
go fid = do
|
||||
set <- IntMap.lookup fid (forest st)
|
||||
Passive ruleid args <- Set.toList set
|
||||
let (FRule fn _ _ cat lins) = allRules pinfo ! ruleid
|
||||
args <- mapM go args
|
||||
return (EApp fn args)
|
||||
|
||||
process !rules [] state = state
|
||||
process !rules (item:items) state = process rules items $! univRule item state
|
||||
where
|
||||
univRule (Active j lbl ppos ruleid args fid0) state
|
||||
| inRange (bounds lin) ppos =
|
||||
case unsafeAt lin ppos of
|
||||
FSymCat r d -> {-# SCC "COND11" #-}
|
||||
let !fid = args !! d
|
||||
in case MM.insert' (AK fid r) item (chart state) of
|
||||
Nothing -> state
|
||||
Just actCat -> (case Map.lookup (PK fid r k) (passive state) of
|
||||
Nothing -> id
|
||||
Just id -> process rules [Active j lbl (ppos+1) ruleid (updateAt d id args) fid0]) $
|
||||
(case IntMap.lookup fid (forest state) of
|
||||
Nothing -> id
|
||||
Just set -> process rules (Set.fold (\(Passive ruleid args) -> (:) (Active k r 0 ruleid args fid)) [] set)) $
|
||||
state{chart=actCat}
|
||||
FSymTok tok -> {-# SCC "COND12" #-}
|
||||
case MM.insert' tok (Active j lbl (ppos+1) ruleid args fid0) (tokens state) of
|
||||
Nothing -> state
|
||||
Just actTok -> state{tokens=actTok}
|
||||
| otherwise = {-# SCC "COND2" #-}
|
||||
case Map.lookup (PK fid0 lbl j) (passive state) of
|
||||
Nothing -> let fid = nextId state
|
||||
in process rules [Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc
|
||||
| Active j' lbl ppos ruleid args fidc <- ((chart state:charts state) !! (k-j)) MM.! (AK fid0 lbl),
|
||||
let FSymCat _ d = unsafeAt (rhs ruleid lbl) ppos] $
|
||||
state{passive=Map.insert (PK fid0 lbl j) fid (passive state)
|
||||
,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest state)
|
||||
,nextId =nextId state+1
|
||||
}
|
||||
Just id -> state{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest state)}
|
||||
where
|
||||
!lin = rhs ruleid lbl
|
||||
!k = currOffset state
|
||||
|
||||
rhs ruleid lbl = unsafeAt lins lbl
|
||||
where
|
||||
(FRule _ _ _ cat lins) = unsafeAt rules ruleid
|
||||
|
||||
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
|
||||
|
||||
|
||||
data Active
|
||||
= Active {-# UNPACK #-} !Int
|
||||
{-# UNPACK #-} !FIndex
|
||||
{-# UNPACK #-} !FPointPos
|
||||
{-# UNPACK #-} !RuleId
|
||||
[FCat]
|
||||
{-# UNPACK #-} !FCat
|
||||
deriving (Eq,Show,Ord)
|
||||
data Passive
|
||||
= Passive {-# UNPACK #-} !RuleId
|
||||
[FCat]
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
data ActiveKey
|
||||
= AK {-# UNPACK #-} !FCat
|
||||
{-# UNPACK #-} !FIndex
|
||||
deriving (Eq,Ord,Show)
|
||||
data PassiveKey
|
||||
= PK {-# UNPACK #-} !FCat
|
||||
{-# UNPACK #-} !FIndex
|
||||
{-# UNPACK #-} !Int
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
data State
|
||||
= State
|
||||
{ chart :: MM.MultiMap ActiveKey Active
|
||||
, charts :: [MM.MultiMap ActiveKey Active]
|
||||
, tokens :: MM.MultiMap FToken Active
|
||||
, passive :: Map.Map PassiveKey FCat
|
||||
, forest :: IntMap.IntMap (Set.Set Passive)
|
||||
, nextId :: {-# UNPACK #-} !FCat
|
||||
, currOffset :: {-# UNPACK #-} !Int
|
||||
}
|
||||
deriving Show
|
||||
Reference in New Issue
Block a user