corrected bottomup MCFG parsing, added GF.Parsing.MCFG.FastActive

This commit is contained in:
peb
2006-06-07 16:18:28 +00:00
parent 55cb508a0f
commit da3a5d27e3
4 changed files with 214 additions and 30 deletions

View File

@@ -23,9 +23,10 @@ import GF.Parsing.MCFG.PInfo
import qualified GF.Parsing.MCFG.Naive as Naive
import qualified GF.Parsing.MCFG.Active as Active
import qualified GF.Parsing.MCFG.Active2 as Active2
import qualified GF.Parsing.MCFG.FastActive as FastActive
-- import qualified GF.Parsing.MCFG.Active2 as Active2
import qualified GF.Parsing.MCFG.Incremental as Incremental
import qualified GF.Parsing.MCFG.Incremental2 as Incremental2
-- import qualified GF.Parsing.MCFG.Incremental2 as Incremental2
----------------------------------------------------------------------
-- parsing
@@ -35,13 +36,13 @@ parseMCF prs | prs `elem` strategies = Ok $ parseMCF' prs
| otherwise = Bad $ "MCFG parsing strategy not defined: " ++ prs
strategies = words "bottomup topdown n an ab at i an2 ab2 at2 i2 rn ran rab rat ri"
strategies = words "bottomup topdown n an ab at i rn ran rab rat ri ft fb"
parseMCF' :: (Ord c, Ord n, Ord l, Ord t) => String -> MCFParser c n l t
parseMCF' "bottomup" pinfo starts toks = Active.parse "b" pinfo starts toks
parseMCF' "topdown" pinfo starts toks = Active.parse "t" pinfo starts toks
parseMCF' "bottomup" pinfo starts toks = parseMCF' "fb" pinfo starts toks
parseMCF' "topdown" pinfo starts toks = parseMCF' "ft" pinfo starts toks
parseMCF' "n" pinfo starts toks = Naive.parse pinfo starts toks
parseMCF' "an" pinfo starts toks = Active.parse "n" pinfo starts toks
@@ -49,10 +50,10 @@ parseMCF' "ab" pinfo starts toks = Active.parse "b" pinfo starts toks
parseMCF' "at" pinfo starts toks = Active.parse "t" pinfo starts toks
parseMCF' "i" pinfo starts toks = Incremental.parse pinfo starts toks
parseMCF' "an2" pinfo starts toks = Active2.parse "n" pinfo starts toks
parseMCF' "ab2" pinfo starts toks = Active2.parse "b" pinfo starts toks
parseMCF' "at2" pinfo starts toks = Active2.parse "t" pinfo starts toks
parseMCF' "i2" pinfo starts toks = Incremental2.parse pinfo starts toks
-- parseMCF' "an2" pinfo starts toks = Active2.parse "n" pinfo starts toks
-- parseMCF' "ab2" pinfo starts toks = Active2.parse "b" pinfo starts toks
-- parseMCF' "at2" pinfo starts toks = Active2.parse "t" pinfo starts toks
-- parseMCF' "i2" pinfo starts toks = Incremental2.parse pinfo starts toks
parseMCF' "rn" pinfo starts toks = Naive.parseR (rrP pinfo toks) starts
parseMCF' "ran" pinfo starts toks = Active.parseR "n" (rrP pinfo toks) starts
@@ -61,4 +62,7 @@ parseMCF' "rat" pinfo starts toks = Active.parseR "t" (rrP pinfo toks) starts
parseMCF' "ri" pinfo starts toks = Incremental.parseR (rrP pinfo toks) starts ntoks
where ntoks = snd (inputBounds toks)
parseMCF' "fb" pinfo starts toks = FastActive.parse "b" (rrP pinfo toks) starts
parseMCF' "ft" pinfo starts toks = FastActive.parse "t" (rrP pinfo toks) starts
rrP pi = rangeRestrictPInfo pi

View File

@@ -57,7 +57,7 @@ process strategy pinfo starts toks
| isBU strategy = [scan, predictKilbury pinfo toks]
| isTD strategy = [scan, predictEarley pinfo toks]
axioms | isNil strategy = predict pinfo toks
| isBU strategy = terminal pinfo toks ++ initialScan pinfo toks
| isBU strategy = {- terminal pinfo toks ++ -} initialScan pinfo toks
| isTD strategy = initial pinfo starts toks
--processR :: (Ord n, Ord c, Ord l) =>
@@ -70,7 +70,7 @@ processR strategy pinfo starts
| isBU strategy = [scan, predictKilburyR pinfo]
| isTD strategy = [scan, predictEarleyR pinfo]
axioms | isNil strategy = predictR pinfo
| isBU strategy = terminalR pinfo ++ initialScanR pinfo
| isBU strategy = {- terminalR pinfo ++ -} initialScanR pinfo
| isTD strategy = initialR pinfo starts
isNil s = s=="n"
@@ -200,18 +200,20 @@ predictEarleyR2 rng (Rule abs (Cnc _ _ (lin : lins))) =
----------------------------------------------------------------------
-- Kilbury --
terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
terminal pinfo toks =
tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $
do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
lins' <- rangeRestRec toks lins
return $ Final abs (makeRangeRec lins') []
-- terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
-- terminal pinfo toks =
-- tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $
-- do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
-- lins' <- rangeRestRec toks lins
-- return $ Final abs (makeRangeRec lins') []
initialScan :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
initialScan pinfo toks =
tracePrt "MCFG.Active (Kilbury) - initial scanned rules" (prt . length) $
tracePrt "MCFG.Active (Kilbury) - initial scanned rules + epsilon rules" (prt . length) $
do tok <- aElems (inputToken toks)
Rule abs (Cnc _ _ lins) <- leftcornerTokens pinfo ? tok
Rule abs (Cnc _ _ lins) <-
leftcornerTokens pinfo ? tok ++
epsilonRules pinfo
lin' : lins' <- rangeRestRec toks lins
return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs)
@@ -230,16 +232,18 @@ predictKilbury _ _ _ _ = []
----------------------------------------------------------------------
-- KilburyR --
terminalR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
terminalR pinfo =
tracePrt "MCFG.Active (Kilbury Range) - initial terminal rules" (prt . length) $
do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
return $ Final abs (makeRangeRec lins) []
-- terminalR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
-- terminalR pinfo =
-- tracePrt "MCFG.Active (Kilbury Range) - initial terminal rules" (prt . length) $
-- do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
-- return $ Final abs (makeRangeRec lins) []
initialScanR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
initialScanR pinfo =
tracePrt "MCFG.Active (Kilbury Range) - initial scanned rules" (prt . length) $
do Rule abs (Cnc _ _ (lin : lins)) <- concatMap snd (aAssocs (leftcornerTokens pinfo))
do Rule abs (Cnc _ _ (lin : lins)) <-
concatMap snd (aAssocs (leftcornerTokens pinfo)) ++
epsilonRules pinfo
return $ Active abs [] EmptyRange lin lins (emptyChildren abs)
predictKilburyR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range

View File

@@ -0,0 +1,175 @@
----------------------------------------------------------------------
-- |
-- Maintainer : Peter Ljunglöf
-- Stability : (stable)
-- Portability : (portable)
--
-- MCFG parsing, the active algorithm, optimized version
-- structure stolen from Krasimir Angelov's GF.Parsing.FCFG.Active
-----------------------------------------------------------------------------
module GF.Parsing.MCFG.FastActive (parse) where
import GF.Data.GeneralDeduction
import GF.Data.Assoc
import GF.Data.Utilities
import GF.Formalism.GCFG
import GF.Formalism.MCFG
import GF.Formalism.Utilities
import GF.Infra.Ident
import GF.Parsing.MCFG.Range
import GF.Parsing.MCFG.PInfo
import GF.System.Tracing
import Control.Monad (guard)
import GF.Infra.Print
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import Data.Array
----------------------------------------------------------------------
-- * parsing
-- parse :: (Ord c, Ord n, Ord l, Ord t) => String -> MCFParser c n l t
parse strategy pinfo starts =
[ Abs (cat, found) (zip rhs rrecs) fun |
Final (Abs cat rhs fun) found rrecs <- listXChartFinal chart ]
where chart = process strategy pinfo axioms emptyXChart
-- axioms | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks
axioms | isBU strategy = initialBU pinfo
| isTD strategy = initialTD pinfo starts
isBU s = s=="b"
isTD s = s=="t"
-- used in prediction
emptyChildren :: Abstract c n -> [RangeRec l]
emptyChildren (Abs _ rhs _) = replicate (length rhs) []
updateChildren :: Eq l => [RangeRec l] -> Int -> RangeRec l -> [[RangeRec l]]
updateChildren recs i rec = updateNthM update i recs
where update rec' = do guard (null rec' || rec' == rec)
return rec
process :: (Ord c, Ord n, Ord l) => String -> MCFPInfo c n l Range -> [Item c n l] -> XChart c n l -> XChart c n l
process strategy pinfo [] chart = chart
process strategy pinfo (item:items) chart = process strategy pinfo items $! univRule item chart
where
univRule item@(Active abs found rng (Lin l syms) lins recs) chart
= case syms of
Cat(c,r,d) : syms' ->
case insertXChart chart item c of
Nothing -> chart
Just chart ->
let items = -- predict topdown
[ Active abs [] EmptyRange lin lins (emptyChildren abs) |
isTD strategy,
Rule abs (Cnc _ _ (lin:lins)) <- topdownRules pinfo ? c ] ++
-- combine
[ Active abs found rng'' (Lin l syms') lins recs' |
Final _ found' _ <- lookupXChartFinal chart c,
rng' <- projection r found',
rng'' <- concatRange rng rng',
recs' <- updateChildren recs d found' ]
in process strategy pinfo items chart
-- scan
Tok rng' : syms' ->
let items = [ Active abs found rng'' (Lin l syms') lins recs |
rng'' <- concatRange rng rng' ]
in process strategy pinfo items chart
-- complete
[] -> case lins of
(lin':lins') -> univRule (Active abs ((l,rng):found) EmptyRange lin' lins' recs) chart
[] -> univRule (Final abs (reverse ((l,rng):found)) recs) chart
univRule item@(Final abs@(Abs cat _ _) found' recs) chart =
case insertXChart chart item cat of
Nothing -> chart
Just chart ->
let items = -- predict bottomup
[ Active abs [] rng (Lin l syms') lins children |
isBU strategy,
Rule abs (Cnc _ _ (Lin l (Cat(c,r,d):syms') : lins)) <- leftcornerCats pinfo ? cat,
-- lin' : lins' <- rangeRestRec toks (Lin l syms' : lins),
rng <- projection r found',
children <- unifyRec (emptyChildren abs) d found' ] ++
-- combine
[ Active abs found rng'' (Lin l syms') lins recs' |
Active abs found rng (Lin l (Cat(c,r,d):syms')) lins recs <- lookupXChartAct chart cat,
rng' <- projection r found',
rng'' <- concatRange rng rng',
recs' <- updateChildren recs d found' ]
in process strategy pinfo items chart
----------------------------------------------------------------------
-- * XChart
data XChart c n l = XChart !(AChart c n l) !(AChart c n l)
type AChart c n l = ParseChart (Item c n l) c
data Item c n l = Active (Abstract c n)
(RangeRec l)
Range
(Lin c l Range)
(LinRec c l Range)
[RangeRec l]
| Final (Abstract c n) (RangeRec l) [RangeRec l]
-- | Passive c (RangeRec l)
deriving (Eq, Ord, Show)
emptyXChart :: (Ord c, Ord n, Ord l) => XChart c n l
emptyXChart = XChart emptyChart emptyChart
insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _) c =
case chartInsert actives item c of
Nothing -> Nothing
Just actives -> Just (XChart actives finals)
insertXChart (XChart actives finals) item@(Final _ _ _) c =
case chartInsert finals item c of
Nothing -> Nothing
Just finals -> Just (XChart actives finals)
lookupXChartAct (XChart actives finals) c = chartLookup actives c
lookupXChartFinal (XChart actives finals) c = chartLookup finals c
listXChartAct (XChart actives finals) = chartList actives
listXChartFinal (XChart actives finals) = chartList finals
----------------------------------------------------------------------
-- Earley --
-- called with all starting categories
initialTD :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [c] -> [Item c n l]
initialTD pinfo starts =
[ Active abs [] (Range (0, 0)) lin' lins' (emptyChildren abs) |
cat <- starts,
Rule abs (Cnc _ _ (lin':lins')) <- topdownRules pinfo ? cat ]
-- lin' : lins' <- rangeRestRec toks lins
----------------------------------------------------------------------
-- Kilbury --
initialBU :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
initialBU pinfo =
[ Active abs [] EmptyRange lin' lins' (emptyChildren abs) |
-- do tok <- aElems (inputToken toks)
Rule abs (Cnc _ _ (lin':lins')) <-
concatMap snd (aAssocs (leftcornerTokens pinfo)) ++
-- leftcornerTokens pinfo ? tok ++
epsilonRules pinfo ]
-- lin' : lins' <- rangeRestRec toks lins

View File

@@ -46,7 +46,8 @@ data MCFPInfo c n l t
, nameRules :: Assoc n (SList (MCFRule c n l t))
, topdownRules :: Assoc c (SList (MCFRule c n l t))
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
, emptyRules :: [MCFRule c n l t]
, epsilonRules :: [MCFRule c n l t]
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, leftcornerCats :: Assoc c (SList (MCFRule c n l t))
, leftcornerTokens :: Assoc t (SList (MCFRule c n l t))
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
@@ -71,7 +72,7 @@ rangeRestrictPInfo (pinfo{-::MCFPInfo c n l t-}) inp =
MCFPInfo { grammarTokens = nubsort (map edgeRange (inputEdges inp))
, nameRules = rrAssoc (nameRules pinfo)
, topdownRules = rrAssoc (topdownRules pinfo)
, emptyRules = rrRules (emptyRules pinfo)
, epsilonRules = rrRules (epsilonRules pinfo)
, leftcornerCats = rrAssoc (leftcornerCats pinfo)
, leftcornerTokens = lctokens
, grammarCats = grammarCats pinfo
@@ -100,7 +101,7 @@ buildMCFPInfo grammar =
MCFPInfo { grammarTokens = grammartokens
, nameRules = namerules
, topdownRules = topdownrules
, emptyRules = emptyrules
, epsilonRules = epsilonrules
, leftcornerCats = leftcorncats
, leftcornerTokens = leftcorntoks
, grammarCats = grammarcats
@@ -115,7 +116,7 @@ buildMCFPInfo grammar =
[ (name, rule) | rule@(Rule (Abs _ _ name) _) <- allrules ]
topdownrules = accumAssoc id
[ (cat, rule) | rule@(Rule (Abs cat _ _) _) <- allrules ]
emptyrules = [ rule | rule@(Rule (Abs _ [] _) _) <- allrules ]
epsilonrules = [ rule | rule@(Rule _ (Cnc _ _ (Lin _ [] : _))) <- allrules ]
leftcorncats = accumAssoc id
[ (cat, rule) |
rule@(Rule _ (Cnc _ _ (Lin _ (Cat(cat,_,_):_) : _))) <- allrules ]
@@ -149,7 +150,7 @@ instance (Ord c, Ord n, Ord l, Ord t) => Print (MCFPInfo c n l t) where
"; categories=" ++ sl grammarCats ++
"; nameRules=" ++ sla nameRules ++
"; tdRules=" ++ sla topdownRules ++
"; emptyRules=" ++ sl emptyRules ++
"; epsilonRules=" ++ sl epsilonRules ++
"; lcCats=" ++ sla leftcornerCats ++
"; lcTokens=" ++ sla leftcornerTokens ++
"; byToken=" ++ sla rulesByToken ++