mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
corrected bottomup MCFG parsing, added GF.Parsing.MCFG.FastActive
This commit is contained in:
@@ -23,9 +23,10 @@ import GF.Parsing.MCFG.PInfo
|
||||
|
||||
import qualified GF.Parsing.MCFG.Naive as Naive
|
||||
import qualified GF.Parsing.MCFG.Active as Active
|
||||
import qualified GF.Parsing.MCFG.Active2 as Active2
|
||||
import qualified GF.Parsing.MCFG.FastActive as FastActive
|
||||
-- import qualified GF.Parsing.MCFG.Active2 as Active2
|
||||
import qualified GF.Parsing.MCFG.Incremental as Incremental
|
||||
import qualified GF.Parsing.MCFG.Incremental2 as Incremental2
|
||||
-- import qualified GF.Parsing.MCFG.Incremental2 as Incremental2
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- parsing
|
||||
@@ -35,13 +36,13 @@ parseMCF prs | prs `elem` strategies = Ok $ parseMCF' prs
|
||||
| otherwise = Bad $ "MCFG parsing strategy not defined: " ++ prs
|
||||
|
||||
|
||||
strategies = words "bottomup topdown n an ab at i an2 ab2 at2 i2 rn ran rab rat ri"
|
||||
strategies = words "bottomup topdown n an ab at i rn ran rab rat ri ft fb"
|
||||
|
||||
|
||||
parseMCF' :: (Ord c, Ord n, Ord l, Ord t) => String -> MCFParser c n l t
|
||||
|
||||
parseMCF' "bottomup" pinfo starts toks = Active.parse "b" pinfo starts toks
|
||||
parseMCF' "topdown" pinfo starts toks = Active.parse "t" pinfo starts toks
|
||||
parseMCF' "bottomup" pinfo starts toks = parseMCF' "fb" pinfo starts toks
|
||||
parseMCF' "topdown" pinfo starts toks = parseMCF' "ft" pinfo starts toks
|
||||
|
||||
parseMCF' "n" pinfo starts toks = Naive.parse pinfo starts toks
|
||||
parseMCF' "an" pinfo starts toks = Active.parse "n" pinfo starts toks
|
||||
@@ -49,10 +50,10 @@ parseMCF' "ab" pinfo starts toks = Active.parse "b" pinfo starts toks
|
||||
parseMCF' "at" pinfo starts toks = Active.parse "t" pinfo starts toks
|
||||
parseMCF' "i" pinfo starts toks = Incremental.parse pinfo starts toks
|
||||
|
||||
parseMCF' "an2" pinfo starts toks = Active2.parse "n" pinfo starts toks
|
||||
parseMCF' "ab2" pinfo starts toks = Active2.parse "b" pinfo starts toks
|
||||
parseMCF' "at2" pinfo starts toks = Active2.parse "t" pinfo starts toks
|
||||
parseMCF' "i2" pinfo starts toks = Incremental2.parse pinfo starts toks
|
||||
-- parseMCF' "an2" pinfo starts toks = Active2.parse "n" pinfo starts toks
|
||||
-- parseMCF' "ab2" pinfo starts toks = Active2.parse "b" pinfo starts toks
|
||||
-- parseMCF' "at2" pinfo starts toks = Active2.parse "t" pinfo starts toks
|
||||
-- parseMCF' "i2" pinfo starts toks = Incremental2.parse pinfo starts toks
|
||||
|
||||
parseMCF' "rn" pinfo starts toks = Naive.parseR (rrP pinfo toks) starts
|
||||
parseMCF' "ran" pinfo starts toks = Active.parseR "n" (rrP pinfo toks) starts
|
||||
@@ -61,4 +62,7 @@ parseMCF' "rat" pinfo starts toks = Active.parseR "t" (rrP pinfo toks) starts
|
||||
parseMCF' "ri" pinfo starts toks = Incremental.parseR (rrP pinfo toks) starts ntoks
|
||||
where ntoks = snd (inputBounds toks)
|
||||
|
||||
parseMCF' "fb" pinfo starts toks = FastActive.parse "b" (rrP pinfo toks) starts
|
||||
parseMCF' "ft" pinfo starts toks = FastActive.parse "t" (rrP pinfo toks) starts
|
||||
|
||||
rrP pi = rangeRestrictPInfo pi
|
||||
|
||||
@@ -57,7 +57,7 @@ process strategy pinfo starts toks
|
||||
| isBU strategy = [scan, predictKilbury pinfo toks]
|
||||
| isTD strategy = [scan, predictEarley pinfo toks]
|
||||
axioms | isNil strategy = predict pinfo toks
|
||||
| isBU strategy = terminal pinfo toks ++ initialScan pinfo toks
|
||||
| isBU strategy = {- terminal pinfo toks ++ -} initialScan pinfo toks
|
||||
| isTD strategy = initial pinfo starts toks
|
||||
|
||||
--processR :: (Ord n, Ord c, Ord l) =>
|
||||
@@ -70,7 +70,7 @@ processR strategy pinfo starts
|
||||
| isBU strategy = [scan, predictKilburyR pinfo]
|
||||
| isTD strategy = [scan, predictEarleyR pinfo]
|
||||
axioms | isNil strategy = predictR pinfo
|
||||
| isBU strategy = terminalR pinfo ++ initialScanR pinfo
|
||||
| isBU strategy = {- terminalR pinfo ++ -} initialScanR pinfo
|
||||
| isTD strategy = initialR pinfo starts
|
||||
|
||||
isNil s = s=="n"
|
||||
@@ -200,18 +200,20 @@ predictEarleyR2 rng (Rule abs (Cnc _ _ (lin : lins))) =
|
||||
----------------------------------------------------------------------
|
||||
-- Kilbury --
|
||||
|
||||
terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
|
||||
terminal pinfo toks =
|
||||
tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $
|
||||
do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
|
||||
lins' <- rangeRestRec toks lins
|
||||
return $ Final abs (makeRangeRec lins') []
|
||||
-- terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
|
||||
-- terminal pinfo toks =
|
||||
-- tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $
|
||||
-- do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
|
||||
-- lins' <- rangeRestRec toks lins
|
||||
-- return $ Final abs (makeRangeRec lins') []
|
||||
|
||||
initialScan :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l]
|
||||
initialScan pinfo toks =
|
||||
tracePrt "MCFG.Active (Kilbury) - initial scanned rules" (prt . length) $
|
||||
tracePrt "MCFG.Active (Kilbury) - initial scanned rules + epsilon rules" (prt . length) $
|
||||
do tok <- aElems (inputToken toks)
|
||||
Rule abs (Cnc _ _ lins) <- leftcornerTokens pinfo ? tok
|
||||
Rule abs (Cnc _ _ lins) <-
|
||||
leftcornerTokens pinfo ? tok ++
|
||||
epsilonRules pinfo
|
||||
lin' : lins' <- rangeRestRec toks lins
|
||||
return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs)
|
||||
|
||||
@@ -230,16 +232,18 @@ predictKilbury _ _ _ _ = []
|
||||
----------------------------------------------------------------------
|
||||
-- KilburyR --
|
||||
|
||||
terminalR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
|
||||
terminalR pinfo =
|
||||
tracePrt "MCFG.Active (Kilbury Range) - initial terminal rules" (prt . length) $
|
||||
do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
|
||||
return $ Final abs (makeRangeRec lins) []
|
||||
-- terminalR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
|
||||
-- terminalR pinfo =
|
||||
-- tracePrt "MCFG.Active (Kilbury Range) - initial terminal rules" (prt . length) $
|
||||
-- do Rule abs (Cnc _ _ lins) <- emptyRules pinfo
|
||||
-- return $ Final abs (makeRangeRec lins) []
|
||||
|
||||
initialScanR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
|
||||
initialScanR pinfo =
|
||||
tracePrt "MCFG.Active (Kilbury Range) - initial scanned rules" (prt . length) $
|
||||
do Rule abs (Cnc _ _ (lin : lins)) <- concatMap snd (aAssocs (leftcornerTokens pinfo))
|
||||
do Rule abs (Cnc _ _ (lin : lins)) <-
|
||||
concatMap snd (aAssocs (leftcornerTokens pinfo)) ++
|
||||
epsilonRules pinfo
|
||||
return $ Active abs [] EmptyRange lin lins (emptyChildren abs)
|
||||
|
||||
predictKilburyR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range
|
||||
|
||||
175
src/GF/Parsing/MCFG/FastActive.hs
Normal file
175
src/GF/Parsing/MCFG/FastActive.hs
Normal file
@@ -0,0 +1,175 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : Peter Ljunglöf
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- MCFG parsing, the active algorithm, optimized version
|
||||
-- structure stolen from Krasimir Angelov's GF.Parsing.FCFG.Active
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Parsing.MCFG.FastActive (parse) where
|
||||
|
||||
import GF.Data.GeneralDeduction
|
||||
import GF.Data.Assoc
|
||||
import GF.Data.Utilities
|
||||
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.MCFG
|
||||
import GF.Formalism.Utilities
|
||||
|
||||
import GF.Infra.Ident
|
||||
|
||||
import GF.Parsing.MCFG.Range
|
||||
import GF.Parsing.MCFG.PInfo
|
||||
|
||||
import GF.System.Tracing
|
||||
|
||||
import Control.Monad (guard)
|
||||
|
||||
import GF.Infra.Print
|
||||
|
||||
import qualified Data.List as List
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import Data.Array
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * parsing
|
||||
|
||||
-- parse :: (Ord c, Ord n, Ord l, Ord t) => String -> MCFParser c n l t
|
||||
parse strategy pinfo starts =
|
||||
[ Abs (cat, found) (zip rhs rrecs) fun |
|
||||
Final (Abs cat rhs fun) found rrecs <- listXChartFinal chart ]
|
||||
where chart = process strategy pinfo axioms emptyXChart
|
||||
|
||||
-- axioms | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks
|
||||
axioms | isBU strategy = initialBU pinfo
|
||||
| isTD strategy = initialTD pinfo starts
|
||||
|
||||
isBU s = s=="b"
|
||||
isTD s = s=="t"
|
||||
|
||||
-- used in prediction
|
||||
emptyChildren :: Abstract c n -> [RangeRec l]
|
||||
emptyChildren (Abs _ rhs _) = replicate (length rhs) []
|
||||
|
||||
updateChildren :: Eq l => [RangeRec l] -> Int -> RangeRec l -> [[RangeRec l]]
|
||||
updateChildren recs i rec = updateNthM update i recs
|
||||
where update rec' = do guard (null rec' || rec' == rec)
|
||||
return rec
|
||||
|
||||
process :: (Ord c, Ord n, Ord l) => String -> MCFPInfo c n l Range -> [Item c n l] -> XChart c n l -> XChart c n l
|
||||
process strategy pinfo [] chart = chart
|
||||
process strategy pinfo (item:items) chart = process strategy pinfo items $! univRule item chart
|
||||
where
|
||||
univRule item@(Active abs found rng (Lin l syms) lins recs) chart
|
||||
= case syms of
|
||||
Cat(c,r,d) : syms' ->
|
||||
case insertXChart chart item c of
|
||||
Nothing -> chart
|
||||
Just chart ->
|
||||
let items = -- predict topdown
|
||||
[ Active abs [] EmptyRange lin lins (emptyChildren abs) |
|
||||
isTD strategy,
|
||||
Rule abs (Cnc _ _ (lin:lins)) <- topdownRules pinfo ? c ] ++
|
||||
|
||||
-- combine
|
||||
[ Active abs found rng'' (Lin l syms') lins recs' |
|
||||
Final _ found' _ <- lookupXChartFinal chart c,
|
||||
rng' <- projection r found',
|
||||
rng'' <- concatRange rng rng',
|
||||
recs' <- updateChildren recs d found' ]
|
||||
in process strategy pinfo items chart
|
||||
|
||||
-- scan
|
||||
Tok rng' : syms' ->
|
||||
let items = [ Active abs found rng'' (Lin l syms') lins recs |
|
||||
rng'' <- concatRange rng rng' ]
|
||||
in process strategy pinfo items chart
|
||||
|
||||
-- complete
|
||||
[] -> case lins of
|
||||
(lin':lins') -> univRule (Active abs ((l,rng):found) EmptyRange lin' lins' recs) chart
|
||||
[] -> univRule (Final abs (reverse ((l,rng):found)) recs) chart
|
||||
|
||||
univRule item@(Final abs@(Abs cat _ _) found' recs) chart =
|
||||
case insertXChart chart item cat of
|
||||
Nothing -> chart
|
||||
Just chart ->
|
||||
let items = -- predict bottomup
|
||||
[ Active abs [] rng (Lin l syms') lins children |
|
||||
isBU strategy,
|
||||
Rule abs (Cnc _ _ (Lin l (Cat(c,r,d):syms') : lins)) <- leftcornerCats pinfo ? cat,
|
||||
-- lin' : lins' <- rangeRestRec toks (Lin l syms' : lins),
|
||||
rng <- projection r found',
|
||||
children <- unifyRec (emptyChildren abs) d found' ] ++
|
||||
|
||||
-- combine
|
||||
[ Active abs found rng'' (Lin l syms') lins recs' |
|
||||
Active abs found rng (Lin l (Cat(c,r,d):syms')) lins recs <- lookupXChartAct chart cat,
|
||||
rng' <- projection r found',
|
||||
rng'' <- concatRange rng rng',
|
||||
recs' <- updateChildren recs d found' ]
|
||||
in process strategy pinfo items chart
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * XChart
|
||||
|
||||
data XChart c n l = XChart !(AChart c n l) !(AChart c n l)
|
||||
type AChart c n l = ParseChart (Item c n l) c
|
||||
|
||||
data Item c n l = Active (Abstract c n)
|
||||
(RangeRec l)
|
||||
Range
|
||||
(Lin c l Range)
|
||||
(LinRec c l Range)
|
||||
[RangeRec l]
|
||||
| Final (Abstract c n) (RangeRec l) [RangeRec l]
|
||||
-- | Passive c (RangeRec l)
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
emptyXChart :: (Ord c, Ord n, Ord l) => XChart c n l
|
||||
emptyXChart = XChart emptyChart emptyChart
|
||||
|
||||
insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _) c =
|
||||
case chartInsert actives item c of
|
||||
Nothing -> Nothing
|
||||
Just actives -> Just (XChart actives finals)
|
||||
|
||||
insertXChart (XChart actives finals) item@(Final _ _ _) c =
|
||||
case chartInsert finals item c of
|
||||
Nothing -> Nothing
|
||||
Just finals -> Just (XChart actives finals)
|
||||
|
||||
lookupXChartAct (XChart actives finals) c = chartLookup actives c
|
||||
lookupXChartFinal (XChart actives finals) c = chartLookup finals c
|
||||
|
||||
listXChartAct (XChart actives finals) = chartList actives
|
||||
listXChartFinal (XChart actives finals) = chartList finals
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Earley --
|
||||
|
||||
-- called with all starting categories
|
||||
initialTD :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [c] -> [Item c n l]
|
||||
initialTD pinfo starts =
|
||||
[ Active abs [] (Range (0, 0)) lin' lins' (emptyChildren abs) |
|
||||
cat <- starts,
|
||||
Rule abs (Cnc _ _ (lin':lins')) <- topdownRules pinfo ? cat ]
|
||||
-- lin' : lins' <- rangeRestRec toks lins
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Kilbury --
|
||||
|
||||
initialBU :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l]
|
||||
initialBU pinfo =
|
||||
[ Active abs [] EmptyRange lin' lins' (emptyChildren abs) |
|
||||
-- do tok <- aElems (inputToken toks)
|
||||
Rule abs (Cnc _ _ (lin':lins')) <-
|
||||
concatMap snd (aAssocs (leftcornerTokens pinfo)) ++
|
||||
-- leftcornerTokens pinfo ? tok ++
|
||||
epsilonRules pinfo ]
|
||||
-- lin' : lins' <- rangeRestRec toks lins
|
||||
@@ -46,7 +46,8 @@ data MCFPInfo c n l t
|
||||
, nameRules :: Assoc n (SList (MCFRule c n l t))
|
||||
, topdownRules :: Assoc c (SList (MCFRule c n l t))
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
|
||||
, emptyRules :: [MCFRule c n l t]
|
||||
, epsilonRules :: [MCFRule c n l t]
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
|
||||
, leftcornerCats :: Assoc c (SList (MCFRule c n l t))
|
||||
, leftcornerTokens :: Assoc t (SList (MCFRule c n l t))
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
|
||||
@@ -71,7 +72,7 @@ rangeRestrictPInfo (pinfo{-::MCFPInfo c n l t-}) inp =
|
||||
MCFPInfo { grammarTokens = nubsort (map edgeRange (inputEdges inp))
|
||||
, nameRules = rrAssoc (nameRules pinfo)
|
||||
, topdownRules = rrAssoc (topdownRules pinfo)
|
||||
, emptyRules = rrRules (emptyRules pinfo)
|
||||
, epsilonRules = rrRules (epsilonRules pinfo)
|
||||
, leftcornerCats = rrAssoc (leftcornerCats pinfo)
|
||||
, leftcornerTokens = lctokens
|
||||
, grammarCats = grammarCats pinfo
|
||||
@@ -100,7 +101,7 @@ buildMCFPInfo grammar =
|
||||
MCFPInfo { grammarTokens = grammartokens
|
||||
, nameRules = namerules
|
||||
, topdownRules = topdownrules
|
||||
, emptyRules = emptyrules
|
||||
, epsilonRules = epsilonrules
|
||||
, leftcornerCats = leftcorncats
|
||||
, leftcornerTokens = leftcorntoks
|
||||
, grammarCats = grammarcats
|
||||
@@ -115,7 +116,7 @@ buildMCFPInfo grammar =
|
||||
[ (name, rule) | rule@(Rule (Abs _ _ name) _) <- allrules ]
|
||||
topdownrules = accumAssoc id
|
||||
[ (cat, rule) | rule@(Rule (Abs cat _ _) _) <- allrules ]
|
||||
emptyrules = [ rule | rule@(Rule (Abs _ [] _) _) <- allrules ]
|
||||
epsilonrules = [ rule | rule@(Rule _ (Cnc _ _ (Lin _ [] : _))) <- allrules ]
|
||||
leftcorncats = accumAssoc id
|
||||
[ (cat, rule) |
|
||||
rule@(Rule _ (Cnc _ _ (Lin _ (Cat(cat,_,_):_) : _))) <- allrules ]
|
||||
@@ -149,7 +150,7 @@ instance (Ord c, Ord n, Ord l, Ord t) => Print (MCFPInfo c n l t) where
|
||||
"; categories=" ++ sl grammarCats ++
|
||||
"; nameRules=" ++ sla nameRules ++
|
||||
"; tdRules=" ++ sla topdownRules ++
|
||||
"; emptyRules=" ++ sl emptyRules ++
|
||||
"; epsilonRules=" ++ sl epsilonRules ++
|
||||
"; lcCats=" ++ sla leftcornerCats ++
|
||||
"; lcTokens=" ++ sla leftcornerTokens ++
|
||||
"; byToken=" ++ sla rulesByToken ++
|
||||
|
||||
Reference in New Issue
Block a user