forked from GitHub/gf-core
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
This commit is contained in:
179
src-3.0/GF/Parsing/FCFG/Active.hs
Normal file
179
src-3.0/GF/Parsing/FCFG/Active.hs
Normal file
@@ -0,0 +1,179 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : Krasimir Angelov
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- MCFG parsing, the active algorithm
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Parsing.FCFG.Active (parse) where
|
||||
|
||||
import GF.Data.GeneralDeduction
|
||||
import GF.Data.Assoc
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Utilities
|
||||
|
||||
import GF.Formalism.FCFG
|
||||
import GF.Formalism.Utilities
|
||||
|
||||
import GF.Infra.PrintClass
|
||||
|
||||
import GF.Parsing.FCFG.Range
|
||||
import GF.Parsing.FCFG.PInfo
|
||||
|
||||
import Control.Monad (guard)
|
||||
|
||||
import qualified Data.List as List
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import Data.Array
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * parsing
|
||||
|
||||
parse :: String -> FCFParser
|
||||
parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo
|
||||
where chart = process strategy pinfo toks axioms emptyXChart
|
||||
axioms | isBU strategy = literals pinfo toks ++ initialBU pinfo toks
|
||||
| isTD strategy = literals pinfo toks ++ initialTD pinfo starts toks
|
||||
|
||||
isBU s = s=="b"
|
||||
isTD s = s=="t"
|
||||
|
||||
-- used in prediction
|
||||
emptyChildren :: RuleId -> FCFPInfo -> SyntaxNode RuleId RangeRec
|
||||
emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) [])
|
||||
where
|
||||
FRule _ rhs _ _ = allRules pinfo ! ruleid
|
||||
|
||||
process :: String -> FCFPInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat
|
||||
process strategy pinfo toks [] chart = chart
|
||||
process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart
|
||||
where
|
||||
univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart
|
||||
| inRange (bounds lin) ppos =
|
||||
case lin ! ppos of
|
||||
FSymCat c r d -> case recs !! d of
|
||||
[] -> case insertXChart chart item c of
|
||||
Nothing -> chart
|
||||
Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c
|
||||
rng <- concatRange rng (found' !! r)
|
||||
return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)))
|
||||
++
|
||||
do guard (isTD strategy)
|
||||
ruleid <- topdownRules pinfo ? c
|
||||
return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))
|
||||
in process strategy pinfo toks items chart
|
||||
found' -> let items = do rng <- concatRange rng (found' !! r)
|
||||
return (c, Active found rng lbl (ppos+1) node)
|
||||
in process strategy pinfo toks items chart
|
||||
FSymTok tok -> let items = do (i,j) <- inputToken toks ? tok
|
||||
rng' <- concatRange rng (makeRange i j)
|
||||
return (cat, Active found rng' lbl (ppos+1) node)
|
||||
in process strategy pinfo toks items chart
|
||||
| otherwise =
|
||||
if inRange (bounds lins) (lbl+1)
|
||||
then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart
|
||||
else univRule cat (Final (reverse (rng:found)) node) chart
|
||||
where
|
||||
(FRule fn _ cat lins) = allRules pinfo ! ruleid
|
||||
lin = lins ! lbl
|
||||
univRule cat item@(Final found' node) chart =
|
||||
case insertXChart chart item cat of
|
||||
Nothing -> chart
|
||||
Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat
|
||||
let FRule _ _ _ lins = allRules pinfo ! ruleid
|
||||
FSymCat cat r d = lins ! l ! ppos
|
||||
rng <- concatRange rng (found' !! r)
|
||||
return (cat, Active found rng l (ppos+1) (updateChildren node d found'))
|
||||
++
|
||||
do guard (isBU strategy)
|
||||
ruleid <- leftcornerCats pinfo ? cat
|
||||
let FRule _ _ _ lins = allRules pinfo ! ruleid
|
||||
FSymCat cat r d = lins ! 0 ! 0
|
||||
return (cat, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found'))
|
||||
|
||||
updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec
|
||||
updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs
|
||||
in process strategy pinfo toks items chart
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * XChart
|
||||
|
||||
data Item
|
||||
= Active RangeRec
|
||||
Range
|
||||
{-# UNPACK #-} !FIndex
|
||||
{-# UNPACK #-} !FPointPos
|
||||
(SyntaxNode RuleId RangeRec)
|
||||
| Final RangeRec (SyntaxNode RuleId RangeRec)
|
||||
deriving (Eq, Ord)
|
||||
|
||||
data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c)
|
||||
|
||||
emptyXChart :: Ord c => XChart c
|
||||
emptyXChart = XChart emptyChart emptyChart
|
||||
|
||||
insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c =
|
||||
case chartInsert actives item c of
|
||||
Nothing -> Nothing
|
||||
Just actives -> Just (XChart actives finals)
|
||||
|
||||
insertXChart (XChart actives finals) item@(Final _ _) c =
|
||||
case chartInsert finals item c of
|
||||
Nothing -> Nothing
|
||||
Just finals -> Just (XChart actives finals)
|
||||
|
||||
lookupXChartAct (XChart actives finals) c = chartLookup actives c
|
||||
lookupXChartFinal (XChart actives finals) c = chartLookup finals c
|
||||
|
||||
xchart2syntaxchart :: XChart FCat -> FCFPInfo -> SyntaxChart FName (FCat,RangeRec)
|
||||
xchart2syntaxchart (XChart actives finals) pinfo =
|
||||
accumAssoc groupSyntaxNodes $
|
||||
[ case node of
|
||||
SNode ruleid rrecs -> let FRule fun rhs cat _ = allRules pinfo ! ruleid
|
||||
in ((cat,found), SNode fun (zip rhs rrecs))
|
||||
SString s -> ((cat,found), SString s)
|
||||
SInt n -> ((cat,found), SInt n)
|
||||
SFloat f -> ((cat,found), SFloat f)
|
||||
| (cat, Final found node) <- chartAssocs finals
|
||||
]
|
||||
|
||||
literals :: FCFPInfo -> Input FToken -> [(FCat,Item)]
|
||||
literals pinfo toks =
|
||||
[let (c,node) = lexer t in (c,Final [makeRange i j] node) | Edge i j t <- inputEdges toks, not (t `elem` grammarToks pinfo)]
|
||||
where
|
||||
lexer t =
|
||||
case reads t of
|
||||
[(n,"")] -> (fcatInt, SInt (n::Integer))
|
||||
_ -> case reads t of
|
||||
[(f,"")] -> (fcatFloat, SFloat (f::Double))
|
||||
_ -> (fcatString,SString t)
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Earley --
|
||||
|
||||
-- called with all starting categories
|
||||
initialTD :: FCFPInfo -> [FCat] -> Input FToken -> [(FCat,Item)]
|
||||
initialTD pinfo starts toks =
|
||||
do cat <- starts
|
||||
ruleid <- topdownRules pinfo ? cat
|
||||
return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo))
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Kilbury --
|
||||
|
||||
initialBU :: FCFPInfo -> Input FToken -> [(FCat,Item)]
|
||||
initialBU pinfo toks =
|
||||
do (tok,rngs) <- aAssocs (inputToken toks)
|
||||
ruleid <- leftcornerTokens pinfo ? tok
|
||||
let FRule _ _ cat _ = allRules pinfo ! ruleid
|
||||
(i,j) <- rngs
|
||||
return (cat,Active [] (makeRange i j) 0 1 (emptyChildren ruleid pinfo))
|
||||
++
|
||||
do ruleid <- epsilonRules pinfo
|
||||
let FRule _ _ cat _ = allRules pinfo ! ruleid
|
||||
return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))
|
||||
107
src-3.0/GF/Parsing/FCFG/Incremental.hs
Normal file
107
src-3.0/GF/Parsing/FCFG/Incremental.hs
Normal file
@@ -0,0 +1,107 @@
|
||||
module GF.Parsing.FCFG.Incremental where
|
||||
|
||||
import Data.Array
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.Set as Set
|
||||
import Control.Monad
|
||||
|
||||
import GF.Data.Assoc
|
||||
import GF.Data.GeneralDeduction
|
||||
import GF.Formalism.FCFG
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Parsing.FCFG.PInfo
|
||||
import GF.Parsing.FCFG.Range
|
||||
import GF.GFCC.CId
|
||||
import Debug.Trace
|
||||
|
||||
initState :: FCFPInfo -> CId -> State
|
||||
initState pinfo start =
|
||||
let items = do
|
||||
starts <- Map.lookup start (startupCats pinfo)
|
||||
c <- starts
|
||||
ruleid <- topdownRules pinfo ? c
|
||||
let (FRule fn args cat lins) = allRules pinfo ! ruleid
|
||||
lbl <- indices lins
|
||||
return (Active 0 lbl 0 ruleid args cat)
|
||||
|
||||
forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ args cat _) <- assocs (allRules pinfo)]
|
||||
|
||||
max_fid = case IntMap.maxViewWithKey forest of
|
||||
Just ((fid,_), _) -> fid+1
|
||||
Nothing -> 0
|
||||
|
||||
in process pinfo items (State emptyChart [] emptyChart Map.empty forest max_fid 0)
|
||||
|
||||
nextState :: FCFPInfo -> FToken -> State -> State
|
||||
nextState pinfo t state =
|
||||
process pinfo (chartLookup (tokens state) t) state{ chart=emptyChart
|
||||
, charts=chart state : charts state
|
||||
, tokens=emptyChart
|
||||
, passive=Map.empty
|
||||
, currOffset=currOffset state+1
|
||||
}
|
||||
|
||||
getCompletions :: State -> FToken -> [FToken]
|
||||
getCompletions state w =
|
||||
[t | t <- chartKeys (tokens state), take (length w) t == w]
|
||||
|
||||
process pinfo [] state = state
|
||||
process pinfo (item@(Active j lbl ppos ruleid args fid0):xitems) state
|
||||
| inRange (bounds lin) ppos =
|
||||
case lin ! ppos of
|
||||
FSymCat _ r d -> let fid = args !! d
|
||||
in case chartInsert (chart state) item (fid,r) of
|
||||
Nothing -> process pinfo xitems state
|
||||
Just actCat -> let items = do exprs <- IntMap.lookup fid (forest state)
|
||||
(Passive ruleid args) <- Set.toList exprs
|
||||
return (Active k r 0 ruleid args fid)
|
||||
`mplus`
|
||||
do id <- Map.lookup (fid,r,k) (passive state)
|
||||
return (Active j lbl (ppos+1) ruleid (updateAt d id args) fid0)
|
||||
in process pinfo (xitems++items) state{chart=actCat}
|
||||
FSymTok tok -> case chartInsert (tokens state) (Active j lbl (ppos+1) ruleid args fid0) tok of
|
||||
Nothing -> process pinfo xitems state
|
||||
Just actTok -> process pinfo xitems state{tokens=actTok}
|
||||
| otherwise = case Map.lookup (fid0, lbl, j) (passive state) of
|
||||
Nothing -> let fid = nextId state
|
||||
items = do Active j' lbl ppos ruleid args fidc <- chartLookup ((chart state:charts state) !! (k-j)) (fid0,lbl)
|
||||
let FSymCat _ _ d = rhs ruleid lbl ! ppos
|
||||
return (Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc)
|
||||
in process pinfo (xitems++items) state{passive=Map.insert (fid0, lbl, j) fid (passive state)
|
||||
,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest state)
|
||||
,nextId =nextId state+1
|
||||
}
|
||||
Just id -> process pinfo xitems state{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest state)}
|
||||
where
|
||||
lin = rhs ruleid lbl
|
||||
k = currOffset state
|
||||
|
||||
rhs ruleid lbl = lins ! lbl
|
||||
where
|
||||
(FRule _ _ cat lins) = allRules pinfo ! ruleid
|
||||
|
||||
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
|
||||
|
||||
|
||||
data Active
|
||||
= Active Int FIndex FPointPos RuleId [FCat] FCat
|
||||
deriving (Eq,Show,Ord)
|
||||
data Passive
|
||||
= Passive RuleId [FCat]
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
|
||||
data State
|
||||
= State
|
||||
{ chart :: Chart
|
||||
, charts :: [Chart]
|
||||
, tokens :: ParseChart Active FToken
|
||||
, passive :: Map.Map (FCat, FIndex, Int) FCat
|
||||
, forest :: IntMap.IntMap (Set.Set Passive)
|
||||
, nextId :: FCat
|
||||
, currOffset :: Int
|
||||
}
|
||||
deriving Show
|
||||
|
||||
type Chart = ParseChart Active (FCat, FIndex)
|
||||
121
src-3.0/GF/Parsing/FCFG/PInfo.hs
Normal file
121
src-3.0/GF/Parsing/FCFG/PInfo.hs
Normal file
@@ -0,0 +1,121 @@
|
||||
---------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : Krasimir Angelov
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- FCFG parsing, parser information
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Parsing.FCFG.PInfo where
|
||||
|
||||
import GF.Infra.PrintClass
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.FCFG
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import GF.Parsing.FCFG.Range
|
||||
import qualified GF.GFCC.CId as AbsGFCC
|
||||
|
||||
import Data.Array
|
||||
import Data.Maybe
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import Debug.Trace
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- type declarations
|
||||
|
||||
-- | the list of categories = possible starting categories
|
||||
type FCFParser = FCFPInfo
|
||||
-> [FCat]
|
||||
-> Input FToken
|
||||
-> SyntaxChart FName (FCat,RangeRec)
|
||||
|
||||
makeFinalEdge cat 0 0 = (cat, [EmptyRange])
|
||||
makeFinalEdge cat i j = (cat, [makeRange i j])
|
||||
|
||||
------------------------------------------------------------
|
||||
-- parser information
|
||||
|
||||
type RuleId = Int
|
||||
|
||||
data FCFPInfo
|
||||
= FCFPInfo { allRules :: Array RuleId FRule
|
||||
, topdownRules :: Assoc FCat (SList RuleId)
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
|
||||
-- , emptyRules :: [RuleId]
|
||||
, epsilonRules :: [RuleId]
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
|
||||
, leftcornerCats :: Assoc FCat (SList RuleId)
|
||||
, leftcornerTokens :: Assoc FToken (SList RuleId)
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
|
||||
, grammarCats :: SList FCat
|
||||
, grammarToks :: SList FToken
|
||||
, startupCats :: Map.Map AbsGFCC.CId [FCat]
|
||||
}
|
||||
|
||||
|
||||
getLeftCornerTok lins
|
||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
||||
FSymTok tok -> Just tok
|
||||
_ -> Nothing
|
||||
| otherwise = Nothing
|
||||
where
|
||||
syms = lins ! 0
|
||||
|
||||
getLeftCornerCat lins
|
||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
||||
FSymCat c _ _ -> Just c
|
||||
_ -> Nothing
|
||||
| otherwise = Nothing
|
||||
where
|
||||
syms = lins ! 0
|
||||
|
||||
buildFCFPInfo :: FGrammar -> FCFPInfo
|
||||
buildFCFPInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x,set) <- Map.toList leftcornFilter]) $
|
||||
FCFPInfo { allRules = allrules
|
||||
, topdownRules = topdownrules
|
||||
-- , emptyRules = emptyrules
|
||||
, epsilonRules = epsilonrules
|
||||
, leftcornerCats = leftcorncats
|
||||
, leftcornerTokens = leftcorntoks
|
||||
, grammarCats = grammarcats
|
||||
, grammarToks = grammartoks
|
||||
, startupCats = startup
|
||||
}
|
||||
|
||||
where allrules = listArray (0,length grammar-1) grammar
|
||||
topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule _ _ cat _) <- assocs allrules]
|
||||
-- emptyrules = [ruleid | (ruleid, FRule _ [] _ _) <- assocs allrules]
|
||||
epsilonrules = [ ruleid | (ruleid, FRule _ _ _ lins) <- assocs allrules,
|
||||
not (inRange (bounds (lins ! 0)) 0) ]
|
||||
leftcorncats = accumAssoc id
|
||||
[ (fromJust (getLeftCornerCat lins), ruleid) |
|
||||
(ruleid, FRule _ _ _ lins) <- assocs allrules, isJust (getLeftCornerCat lins) ]
|
||||
leftcorntoks = accumAssoc id
|
||||
[ (fromJust (getLeftCornerTok lins), ruleid) |
|
||||
(ruleid, FRule _ _ _ lins) <- assocs allrules, isJust (getLeftCornerTok lins) ]
|
||||
grammarcats = aElems topdownrules
|
||||
grammartoks = nubsort [t | (FRule _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin]
|
||||
|
||||
fcfPInfoToFGrammar :: FCFPInfo -> FGrammar
|
||||
fcfPInfoToFGrammar pinfo = (elems (allRules pinfo), startupCats pinfo)
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- pretty-printing of statistics
|
||||
|
||||
instance Print FCFPInfo where
|
||||
prt pI = "[ allRules=" ++ sl (elems . allRules) ++
|
||||
"; tdRules=" ++ sla topdownRules ++
|
||||
-- "; emptyRules=" ++ sl emptyRules ++
|
||||
"; epsilonRules=" ++ sl epsilonRules ++
|
||||
"; lcCats=" ++ sla leftcornerCats ++
|
||||
"; lcTokens=" ++ sla leftcornerTokens ++
|
||||
"; categories=" ++ sl grammarCats ++
|
||||
" ]"
|
||||
|
||||
where sl f = show $ length $ f pI
|
||||
sla f = let (as, bs) = unzip $ aAssocs $ f pI
|
||||
in show (length as) ++ "/" ++ show (length (concat bs))
|
||||
|
||||
50
src-3.0/GF/Parsing/FCFG/Range.hs
Normal file
50
src-3.0/GF/Parsing/FCFG/Range.hs
Normal file
@@ -0,0 +1,50 @@
|
||||
---------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : Krasimir Angelov
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- Definitions of ranges, and operations on ranges
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Parsing.FCFG.Range
|
||||
( RangeRec, Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange,
|
||||
) where
|
||||
|
||||
|
||||
-- GF modules
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Infra.PrintClass
|
||||
|
||||
------------------------------------------------------------
|
||||
-- ranges as single pairs
|
||||
|
||||
type RangeRec = [Range]
|
||||
|
||||
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
|
||||
| EmptyRange
|
||||
deriving (Eq, Ord)
|
||||
|
||||
makeRange :: Int -> Int -> Range
|
||||
makeRange = Range
|
||||
|
||||
concatRange :: Range -> Range -> [Range]
|
||||
concatRange EmptyRange rng = return rng
|
||||
concatRange rng EmptyRange = return rng
|
||||
concatRange (Range i j) (Range j' k) = [Range i k | j==j']
|
||||
|
||||
rangeEdge :: a -> Range -> Edge a
|
||||
rangeEdge a (Range i j) = Edge i j a
|
||||
|
||||
edgeRange :: Edge a -> Range
|
||||
edgeRange (Edge i j _) = Range i j
|
||||
|
||||
minRange :: Range -> Int
|
||||
minRange (Range i j) = i
|
||||
|
||||
maxRange :: Range -> Int
|
||||
maxRange (Range i j) = j
|
||||
|
||||
instance Print Range where
|
||||
prt (Range i j) = "(" ++ show i ++ "-" ++ show j ++ ")"
|
||||
prt (EmptyRange) = "(?)"
|
||||
Reference in New Issue
Block a user