mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-24 03:52:50 -06:00
Hackish version of the incremental parser
This commit is contained in:
1
GF.cabal
1
GF.cabal
@@ -35,6 +35,7 @@ library
|
|||||||
PGF.BuildParser
|
PGF.BuildParser
|
||||||
PGF.Parsing.FCFG.Utilities
|
PGF.Parsing.FCFG.Utilities
|
||||||
PGF.Parsing.FCFG.Active
|
PGF.Parsing.FCFG.Active
|
||||||
|
PGF.Parsing.FCFG.Incremental
|
||||||
PGF.Parsing.FCFG
|
PGF.Parsing.FCFG
|
||||||
PGF.Raw.Parse
|
PGF.Raw.Parse
|
||||||
PGF.Raw.Print
|
PGF.Raw.Print
|
||||||
|
|||||||
@@ -17,11 +17,14 @@ import PGF.CId
|
|||||||
import PGF.ShowLinearize
|
import PGF.ShowLinearize
|
||||||
import PGF.Macros
|
import PGF.Macros
|
||||||
import PGF.Data ----
|
import PGF.Data ----
|
||||||
|
import qualified PGF.Parsing.FCFG.Incremental as Incremental
|
||||||
import GF.Compile.Export
|
import GF.Compile.Export
|
||||||
|
import GF.Infra.UseIO
|
||||||
import GF.Data.ErrM ----
|
import GF.Data.ErrM ----
|
||||||
|
|
||||||
|
import Data.Maybe
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
|
import System.CPUTime
|
||||||
|
|
||||||
type CommandOutput = ([Exp],String) ---- errors, etc
|
type CommandOutput = ([Exp],String) ---- errors, etc
|
||||||
|
|
||||||
@@ -124,6 +127,10 @@ allCommands pgf = Map.fromAscList [
|
|||||||
("pg", emptyCommandInfo {
|
("pg", emptyCommandInfo {
|
||||||
exec = \opts _ -> return $ fromString $ prGrammar opts,
|
exec = \opts _ -> return $ fromString $ prGrammar opts,
|
||||||
flags = ["cat","lang","printer"]
|
flags = ["cat","lang","printer"]
|
||||||
|
}),
|
||||||
|
("wc", emptyCommandInfo {
|
||||||
|
exec = \opts _ -> wordCompletion opts >> return ([],[]),
|
||||||
|
flags = ["cat","lang"]
|
||||||
})
|
})
|
||||||
]
|
]
|
||||||
where
|
where
|
||||||
@@ -153,5 +160,27 @@ allCommands pgf = Map.fromAscList [
|
|||||||
|
|
||||||
prGrammar opts = case valIdOpts "printer" "" opts of
|
prGrammar opts = case valIdOpts "printer" "" opts of
|
||||||
"cats" -> unwords $ categories pgf
|
"cats" -> unwords $ categories pgf
|
||||||
v -> prPGF (read v) pgf (prCId (absname pgf))
|
v -> prPGF (read v) pgf
|
||||||
|
|
||||||
|
wordCompletion opts = do
|
||||||
|
let lang = head (optLangs opts)
|
||||||
|
cat = optCat opts
|
||||||
|
pinfo = fromMaybe (error ("Unknown language: " ++ lang)) (lookParser pgf (mkCId lang))
|
||||||
|
state0 = Incremental.initState pinfo (mkCId cat)
|
||||||
|
putStrFlush ">> "
|
||||||
|
s <- getLine
|
||||||
|
if null s
|
||||||
|
then return ()
|
||||||
|
else do cpu1 <- getCPUTime
|
||||||
|
st <- parse pinfo state0 (words s)
|
||||||
|
let exps = Incremental.extractExps pinfo (mkCId cat) st
|
||||||
|
mapM_ (putStrLn . showExp) exps
|
||||||
|
cpu2 <- getCPUTime
|
||||||
|
putStrLn (show ((cpu2 - cpu1) `div` 1000000000) ++ " msec")
|
||||||
|
wordCompletion opts
|
||||||
|
where
|
||||||
|
parse pinfo st [] = do putStrLnFlush ""
|
||||||
|
return st
|
||||||
|
parse pinfo st (t:ts) = do putStrFlush "."
|
||||||
|
st1 <- return $! (Incremental.nextState pinfo t st)
|
||||||
|
parse pinfo st1 ts
|
||||||
|
|||||||
@@ -170,7 +170,7 @@ linearize pgf lang = PGF.Linearize.linearize pgf (mkCId lang)
|
|||||||
parse pgf lang cat s =
|
parse pgf lang cat s =
|
||||||
case lookParser pgf (mkCId lang) of
|
case lookParser pgf (mkCId lang) of
|
||||||
Nothing -> error ("Unknown language: " ++ lang)
|
Nothing -> error ("Unknown language: " ++ lang)
|
||||||
Just pinfo -> case parseFCF "bottomup" pinfo (mkCId cat) (words s) of
|
Just pinfo -> case parseFCFG "bottomup" pinfo (mkCId cat) (words s) of
|
||||||
Ok x -> x
|
Ok x -> x
|
||||||
Bad s -> error s
|
Bad s -> error s
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
module PGF.Parsing.FCFG
|
module PGF.Parsing.FCFG
|
||||||
(parseFCF,buildParserInfo,ParserInfo(..),makeFinalEdge) where
|
(buildParserInfo,ParserInfo,parseFCFG) where
|
||||||
|
|
||||||
import GF.Data.ErrM
|
import GF.Data.ErrM
|
||||||
import GF.Data.Assoc
|
import GF.Data.Assoc
|
||||||
@@ -19,7 +19,8 @@ import PGF.Data
|
|||||||
import PGF.Macros
|
import PGF.Macros
|
||||||
import PGF.BuildParser
|
import PGF.BuildParser
|
||||||
import PGF.Parsing.FCFG.Utilities
|
import PGF.Parsing.FCFG.Utilities
|
||||||
import PGF.Parsing.FCFG.Active
|
import qualified PGF.Parsing.FCFG.Active as Active
|
||||||
|
import qualified PGF.Parsing.FCFG.Incremental as Incremental
|
||||||
|
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
|
|
||||||
@@ -28,25 +29,12 @@ import qualified Data.Map as Map
|
|||||||
|
|
||||||
-- main parsing function
|
-- main parsing function
|
||||||
|
|
||||||
parseFCF :: String -- ^ parsing strategy
|
parseFCFG :: String -- ^ parsing strategy
|
||||||
-> ParserInfo -- ^ compiled grammar (fcfg)
|
-> ParserInfo -- ^ compiled grammar (fcfg)
|
||||||
-> CId -- ^ starting category
|
-> CId -- ^ starting category
|
||||||
-> [String] -- ^ input tokens
|
-> [String] -- ^ input tokens
|
||||||
-> Err [Exp] -- ^ resulting GF terms
|
-> Err [Exp] -- ^ resulting GF terms
|
||||||
parseFCF strategy pinfo startCat inString =
|
parseFCFG "bottomup" pinfo start toks = return $ Active.parse "b" pinfo start toks
|
||||||
do let inTokens = input inString
|
parseFCFG "topdown" pinfo start toks = return $ Active.parse "t" pinfo start toks
|
||||||
startCats <- case Map.lookup startCat (startupCats pinfo) of
|
parseFCFG "incremental" pinfo start toks = return $ Incremental.parse pinfo start toks
|
||||||
Just cats -> return cats
|
parseFCFG strat pinfo start toks = fail $ "FCFG parsing strategy not defined: " ++ strat
|
||||||
Nothing -> fail $ "Unknown startup category: " ++ prCId startCat
|
|
||||||
fcfParser <- parseFCF strategy
|
|
||||||
let chart = fcfParser pinfo startCats inTokens
|
|
||||||
(i,j) = inputBounds inTokens
|
|
||||||
finalEdges = [makeFinalEdge cat i j | cat <- startCats]
|
|
||||||
forests = chart2forests chart (const False) finalEdges
|
|
||||||
filteredForests = forests >>= applyProfileToForest
|
|
||||||
return $ nubsort $ filteredForests >>= forest2exps
|
|
||||||
where
|
|
||||||
parseFCF :: String -> Err (FCFParser)
|
|
||||||
parseFCF "bottomup" = return $ parse "b"
|
|
||||||
parseFCF "topdown" = return $ parse "t"
|
|
||||||
parseFCF strat = fail $ "FCFG parsing strategy not defined: " ++ strat
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
-- MCFG parsing, the active algorithm
|
-- MCFG parsing, the active algorithm
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
module PGF.Parsing.FCFG.Active (FCFParser, parse, makeFinalEdge) where
|
module PGF.Parsing.FCFG.Active (parse) where
|
||||||
|
|
||||||
import GF.Data.Assoc
|
import GF.Data.Assoc
|
||||||
import GF.Data.SortedList
|
import GF.Data.SortedList
|
||||||
@@ -32,17 +32,20 @@ makeFinalEdge cat 0 0 = (cat, [EmptyRange])
|
|||||||
makeFinalEdge cat i j = (cat, [makeRange i j])
|
makeFinalEdge cat i j = (cat, [makeRange i j])
|
||||||
|
|
||||||
-- | the list of categories = possible starting categories
|
-- | the list of categories = possible starting categories
|
||||||
type FCFParser = ParserInfo
|
parse :: String -> ParserInfo -> CId -> [FToken] -> [Exp]
|
||||||
-> [FCat]
|
parse strategy pinfo start toks = nubsort $ filteredForests >>= forest2exps
|
||||||
-> Input FToken
|
where
|
||||||
-> SyntaxChart (CId,[Profile]) (FCat,RangeRec)
|
inTokens = input toks
|
||||||
|
starts = Map.findWithDefault [] start (startupCats pinfo)
|
||||||
|
schart = xchart2syntaxchart chart pinfo
|
||||||
parse :: String -> FCFParser
|
(i,j) = inputBounds inTokens
|
||||||
parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo
|
finalEdges = [makeFinalEdge cat i j | cat <- starts]
|
||||||
where chart = process strategy pinfo toks axioms emptyXChart
|
forests = chart2forests schart (const False) finalEdges
|
||||||
axioms | isBU strategy = literals pinfo toks ++ initialBU pinfo toks
|
filteredForests = forests >>= applyProfileToForest
|
||||||
| isTD strategy = literals pinfo toks ++ initialTD pinfo starts toks
|
|
||||||
|
chart = process strategy pinfo inTokens axioms emptyXChart
|
||||||
|
axioms | isBU strategy = literals pinfo inTokens ++ initialBU pinfo inTokens
|
||||||
|
| isTD strategy = literals pinfo inTokens ++ initialTD pinfo starts inTokens
|
||||||
|
|
||||||
isBU s = s=="b"
|
isBU s = s=="b"
|
||||||
isTD s = s=="t"
|
isTD s = s=="t"
|
||||||
|
|||||||
156
src-3.0/PGF/Parsing/FCFG/Incremental.hs
Normal file
156
src-3.0/PGF/Parsing/FCFG/Incremental.hs
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
{-# OPTIONS -fbang-patterns #-}
|
||||||
|
module PGF.Parsing.FCFG.Incremental
|
||||||
|
( State
|
||||||
|
, initState
|
||||||
|
, nextState
|
||||||
|
, getCompletions
|
||||||
|
, extractExps
|
||||||
|
, parse
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.Array
|
||||||
|
import Data.Array.Base (unsafeAt)
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import qualified Data.IntMap as IntMap
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
import Control.Monad
|
||||||
|
|
||||||
|
import GF.Data.Assoc
|
||||||
|
import GF.Data.SortedList
|
||||||
|
import qualified GF.Data.MultiMap as MM
|
||||||
|
import PGF.CId
|
||||||
|
import PGF.Data
|
||||||
|
import PGF.Parsing.FCFG.Utilities
|
||||||
|
import Debug.Trace
|
||||||
|
|
||||||
|
parse :: ParserInfo -> CId -> [FToken] -> [Exp]
|
||||||
|
parse pinfo start toks = go (initState pinfo start) toks
|
||||||
|
where
|
||||||
|
go st [] = extractExps pinfo start st
|
||||||
|
go st (t:ts) = go (nextState pinfo t st) ts
|
||||||
|
|
||||||
|
initState :: ParserInfo -> CId -> State
|
||||||
|
initState pinfo start =
|
||||||
|
let items = do
|
||||||
|
c <- Map.findWithDefault [] start (startupCats pinfo)
|
||||||
|
ruleid <- topdownRules pinfo ? c
|
||||||
|
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
|
||||||
|
lbl <- indices lins
|
||||||
|
return (Active 0 lbl 0 ruleid args cat)
|
||||||
|
|
||||||
|
forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)]
|
||||||
|
|
||||||
|
max_fid = case IntMap.maxViewWithKey forest of
|
||||||
|
Just ((fid,_), _) -> fid+1
|
||||||
|
Nothing -> 0
|
||||||
|
|
||||||
|
in process (allRules pinfo) items (State MM.empty [] MM.empty Map.empty forest max_fid 0)
|
||||||
|
|
||||||
|
nextState :: ParserInfo -> FToken -> State -> State
|
||||||
|
nextState pinfo t state =
|
||||||
|
process (allRules pinfo) (tokens state MM.! t) state{ chart=MM.empty
|
||||||
|
, charts=chart state : charts state
|
||||||
|
, tokens=MM.empty
|
||||||
|
, passive=Map.empty
|
||||||
|
, currOffset=currOffset state+1
|
||||||
|
}
|
||||||
|
|
||||||
|
getCompletions :: State -> FToken -> [FToken]
|
||||||
|
getCompletions state w =
|
||||||
|
[t | t <- MM.keys (tokens state), take (length w) t == w]
|
||||||
|
|
||||||
|
extractExps :: ParserInfo -> CId -> State -> [Exp]
|
||||||
|
extractExps pinfo start st = exps
|
||||||
|
where
|
||||||
|
exps = nubsort $ do
|
||||||
|
c <- Map.findWithDefault [] start (startupCats pinfo)
|
||||||
|
ruleid <- topdownRules pinfo ? c
|
||||||
|
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
|
||||||
|
lbl <- indices lins
|
||||||
|
fid <- Map.lookup (PK c lbl 0) (passive st)
|
||||||
|
go fid
|
||||||
|
|
||||||
|
go fid = do
|
||||||
|
set <- IntMap.lookup fid (forest st)
|
||||||
|
Passive ruleid args <- Set.toList set
|
||||||
|
let (FRule fn _ _ cat lins) = allRules pinfo ! ruleid
|
||||||
|
args <- mapM go args
|
||||||
|
return (EApp fn args)
|
||||||
|
|
||||||
|
process !rules [] state = state
|
||||||
|
process !rules (item:items) state = process rules items $! univRule item state
|
||||||
|
where
|
||||||
|
univRule (Active j lbl ppos ruleid args fid0) state
|
||||||
|
| inRange (bounds lin) ppos =
|
||||||
|
case unsafeAt lin ppos of
|
||||||
|
FSymCat r d -> {-# SCC "COND11" #-}
|
||||||
|
let !fid = args !! d
|
||||||
|
in case MM.insert' (AK fid r) item (chart state) of
|
||||||
|
Nothing -> state
|
||||||
|
Just actCat -> (case Map.lookup (PK fid r k) (passive state) of
|
||||||
|
Nothing -> id
|
||||||
|
Just id -> process rules [Active j lbl (ppos+1) ruleid (updateAt d id args) fid0]) $
|
||||||
|
(case IntMap.lookup fid (forest state) of
|
||||||
|
Nothing -> id
|
||||||
|
Just set -> process rules (Set.fold (\(Passive ruleid args) -> (:) (Active k r 0 ruleid args fid)) [] set)) $
|
||||||
|
state{chart=actCat}
|
||||||
|
FSymTok tok -> {-# SCC "COND12" #-}
|
||||||
|
case MM.insert' tok (Active j lbl (ppos+1) ruleid args fid0) (tokens state) of
|
||||||
|
Nothing -> state
|
||||||
|
Just actTok -> state{tokens=actTok}
|
||||||
|
| otherwise = {-# SCC "COND2" #-}
|
||||||
|
case Map.lookup (PK fid0 lbl j) (passive state) of
|
||||||
|
Nothing -> let fid = nextId state
|
||||||
|
in process rules [Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc
|
||||||
|
| Active j' lbl ppos ruleid args fidc <- ((chart state:charts state) !! (k-j)) MM.! (AK fid0 lbl),
|
||||||
|
let FSymCat _ d = unsafeAt (rhs ruleid lbl) ppos] $
|
||||||
|
state{passive=Map.insert (PK fid0 lbl j) fid (passive state)
|
||||||
|
,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest state)
|
||||||
|
,nextId =nextId state+1
|
||||||
|
}
|
||||||
|
Just id -> state{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest state)}
|
||||||
|
where
|
||||||
|
!lin = rhs ruleid lbl
|
||||||
|
!k = currOffset state
|
||||||
|
|
||||||
|
rhs ruleid lbl = unsafeAt lins lbl
|
||||||
|
where
|
||||||
|
(FRule _ _ _ cat lins) = unsafeAt rules ruleid
|
||||||
|
|
||||||
|
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
|
||||||
|
|
||||||
|
|
||||||
|
data Active
|
||||||
|
= Active {-# UNPACK #-} !Int
|
||||||
|
{-# UNPACK #-} !FIndex
|
||||||
|
{-# UNPACK #-} !FPointPos
|
||||||
|
{-# UNPACK #-} !RuleId
|
||||||
|
[FCat]
|
||||||
|
{-# UNPACK #-} !FCat
|
||||||
|
deriving (Eq,Show,Ord)
|
||||||
|
data Passive
|
||||||
|
= Passive {-# UNPACK #-} !RuleId
|
||||||
|
[FCat]
|
||||||
|
deriving (Eq,Ord,Show)
|
||||||
|
|
||||||
|
data ActiveKey
|
||||||
|
= AK {-# UNPACK #-} !FCat
|
||||||
|
{-# UNPACK #-} !FIndex
|
||||||
|
deriving (Eq,Ord,Show)
|
||||||
|
data PassiveKey
|
||||||
|
= PK {-# UNPACK #-} !FCat
|
||||||
|
{-# UNPACK #-} !FIndex
|
||||||
|
{-# UNPACK #-} !Int
|
||||||
|
deriving (Eq,Ord,Show)
|
||||||
|
|
||||||
|
data State
|
||||||
|
= State
|
||||||
|
{ chart :: MM.MultiMap ActiveKey Active
|
||||||
|
, charts :: [MM.MultiMap ActiveKey Active]
|
||||||
|
, tokens :: MM.MultiMap FToken Active
|
||||||
|
, passive :: Map.Map PassiveKey FCat
|
||||||
|
, forest :: IntMap.IntMap (Set.Set Passive)
|
||||||
|
, nextId :: {-# UNPACK #-} !FCat
|
||||||
|
, currOffset :: {-# UNPACK #-} !Int
|
||||||
|
}
|
||||||
|
deriving Show
|
||||||
Reference in New Issue
Block a user