From da3a5d27e319dd7812cbc2ec1c2843a05838d767 Mon Sep 17 00:00:00 2001 From: peb Date: Wed, 7 Jun 2006 16:18:28 +0000 Subject: [PATCH] corrected bottomup MCFG parsing, added GF.Parsing.MCFG.FastActive --- src/GF/Parsing/MCFG.hs | 22 ++-- src/GF/Parsing/MCFG/Active.hs | 36 +++--- src/GF/Parsing/MCFG/FastActive.hs | 175 ++++++++++++++++++++++++++++++ src/GF/Parsing/MCFG/PInfo.hs | 11 +- 4 files changed, 214 insertions(+), 30 deletions(-) create mode 100644 src/GF/Parsing/MCFG/FastActive.hs diff --git a/src/GF/Parsing/MCFG.hs b/src/GF/Parsing/MCFG.hs index 6aec811de..bda3af675 100644 --- a/src/GF/Parsing/MCFG.hs +++ b/src/GF/Parsing/MCFG.hs @@ -23,9 +23,10 @@ import GF.Parsing.MCFG.PInfo import qualified GF.Parsing.MCFG.Naive as Naive import qualified GF.Parsing.MCFG.Active as Active -import qualified GF.Parsing.MCFG.Active2 as Active2 +import qualified GF.Parsing.MCFG.FastActive as FastActive +-- import qualified GF.Parsing.MCFG.Active2 as Active2 import qualified GF.Parsing.MCFG.Incremental as Incremental -import qualified GF.Parsing.MCFG.Incremental2 as Incremental2 +-- import qualified GF.Parsing.MCFG.Incremental2 as Incremental2 ---------------------------------------------------------------------- -- parsing @@ -35,13 +36,13 @@ parseMCF prs | prs `elem` strategies = Ok $ parseMCF' prs | otherwise = Bad $ "MCFG parsing strategy not defined: " ++ prs -strategies = words "bottomup topdown n an ab at i an2 ab2 at2 i2 rn ran rab rat ri" +strategies = words "bottomup topdown n an ab at i rn ran rab rat ri ft fb" parseMCF' :: (Ord c, Ord n, Ord l, Ord t) => String -> MCFParser c n l t -parseMCF' "bottomup" pinfo starts toks = Active.parse "b" pinfo starts toks -parseMCF' "topdown" pinfo starts toks = Active.parse "t" pinfo starts toks +parseMCF' "bottomup" pinfo starts toks = parseMCF' "fb" pinfo starts toks +parseMCF' "topdown" pinfo starts toks = parseMCF' "ft" pinfo starts toks parseMCF' "n" pinfo starts toks = Naive.parse pinfo starts toks parseMCF' "an" pinfo starts toks = Active.parse "n" pinfo starts toks @@ -49,10 +50,10 @@ parseMCF' "ab" pinfo starts toks = Active.parse "b" pinfo starts toks parseMCF' "at" pinfo starts toks = Active.parse "t" pinfo starts toks parseMCF' "i" pinfo starts toks = Incremental.parse pinfo starts toks -parseMCF' "an2" pinfo starts toks = Active2.parse "n" pinfo starts toks -parseMCF' "ab2" pinfo starts toks = Active2.parse "b" pinfo starts toks -parseMCF' "at2" pinfo starts toks = Active2.parse "t" pinfo starts toks -parseMCF' "i2" pinfo starts toks = Incremental2.parse pinfo starts toks +-- parseMCF' "an2" pinfo starts toks = Active2.parse "n" pinfo starts toks +-- parseMCF' "ab2" pinfo starts toks = Active2.parse "b" pinfo starts toks +-- parseMCF' "at2" pinfo starts toks = Active2.parse "t" pinfo starts toks +-- parseMCF' "i2" pinfo starts toks = Incremental2.parse pinfo starts toks parseMCF' "rn" pinfo starts toks = Naive.parseR (rrP pinfo toks) starts parseMCF' "ran" pinfo starts toks = Active.parseR "n" (rrP pinfo toks) starts @@ -61,4 +62,7 @@ parseMCF' "rat" pinfo starts toks = Active.parseR "t" (rrP pinfo toks) starts parseMCF' "ri" pinfo starts toks = Incremental.parseR (rrP pinfo toks) starts ntoks where ntoks = snd (inputBounds toks) +parseMCF' "fb" pinfo starts toks = FastActive.parse "b" (rrP pinfo toks) starts +parseMCF' "ft" pinfo starts toks = FastActive.parse "t" (rrP pinfo toks) starts + rrP pi = rangeRestrictPInfo pi diff --git a/src/GF/Parsing/MCFG/Active.hs b/src/GF/Parsing/MCFG/Active.hs index a422f7e10..5ccd43398 100644 --- a/src/GF/Parsing/MCFG/Active.hs +++ b/src/GF/Parsing/MCFG/Active.hs @@ -57,7 +57,7 @@ process strategy pinfo starts toks | isBU strategy = [scan, predictKilbury pinfo toks] | isTD strategy = [scan, predictEarley pinfo toks] axioms | isNil strategy = predict pinfo toks - | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks + | isBU strategy = {- terminal pinfo toks ++ -} initialScan pinfo toks | isTD strategy = initial pinfo starts toks --processR :: (Ord n, Ord c, Ord l) => @@ -70,7 +70,7 @@ processR strategy pinfo starts | isBU strategy = [scan, predictKilburyR pinfo] | isTD strategy = [scan, predictEarleyR pinfo] axioms | isNil strategy = predictR pinfo - | isBU strategy = terminalR pinfo ++ initialScanR pinfo + | isBU strategy = {- terminalR pinfo ++ -} initialScanR pinfo | isTD strategy = initialR pinfo starts isNil s = s=="n" @@ -200,18 +200,20 @@ predictEarleyR2 rng (Rule abs (Cnc _ _ (lin : lins))) = ---------------------------------------------------------------------- -- Kilbury -- -terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l] -terminal pinfo toks = - tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $ - do Rule abs (Cnc _ _ lins) <- emptyRules pinfo - lins' <- rangeRestRec toks lins - return $ Final abs (makeRangeRec lins') [] +-- terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l] +-- terminal pinfo toks = +-- tracePrt "MCFG.Active (Kilbury) - initial terminal rules" (prt . length) $ +-- do Rule abs (Cnc _ _ lins) <- emptyRules pinfo +-- lins' <- rangeRestRec toks lins +-- return $ Final abs (makeRangeRec lins') [] initialScan :: (Ord c, Ord n, Ord l, Ord t) => MCFPInfo c n l t -> Input t -> [Item c n l] initialScan pinfo toks = - tracePrt "MCFG.Active (Kilbury) - initial scanned rules" (prt . length) $ + tracePrt "MCFG.Active (Kilbury) - initial scanned rules + epsilon rules" (prt . length) $ do tok <- aElems (inputToken toks) - Rule abs (Cnc _ _ lins) <- leftcornerTokens pinfo ? tok + Rule abs (Cnc _ _ lins) <- + leftcornerTokens pinfo ? tok ++ + epsilonRules pinfo lin' : lins' <- rangeRestRec toks lins return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs) @@ -230,16 +232,18 @@ predictKilbury _ _ _ _ = [] ---------------------------------------------------------------------- -- KilburyR -- -terminalR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l] -terminalR pinfo = - tracePrt "MCFG.Active (Kilbury Range) - initial terminal rules" (prt . length) $ - do Rule abs (Cnc _ _ lins) <- emptyRules pinfo - return $ Final abs (makeRangeRec lins) [] +-- terminalR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l] +-- terminalR pinfo = +-- tracePrt "MCFG.Active (Kilbury Range) - initial terminal rules" (prt . length) $ +-- do Rule abs (Cnc _ _ lins) <- emptyRules pinfo +-- return $ Final abs (makeRangeRec lins) [] initialScanR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l] initialScanR pinfo = tracePrt "MCFG.Active (Kilbury Range) - initial scanned rules" (prt . length) $ - do Rule abs (Cnc _ _ (lin : lins)) <- concatMap snd (aAssocs (leftcornerTokens pinfo)) + do Rule abs (Cnc _ _ (lin : lins)) <- + concatMap snd (aAssocs (leftcornerTokens pinfo)) ++ + epsilonRules pinfo return $ Active abs [] EmptyRange lin lins (emptyChildren abs) predictKilburyR :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range diff --git a/src/GF/Parsing/MCFG/FastActive.hs b/src/GF/Parsing/MCFG/FastActive.hs new file mode 100644 index 000000000..de33172d2 --- /dev/null +++ b/src/GF/Parsing/MCFG/FastActive.hs @@ -0,0 +1,175 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- MCFG parsing, the active algorithm, optimized version +-- structure stolen from Krasimir Angelov's GF.Parsing.FCFG.Active +----------------------------------------------------------------------------- + +module GF.Parsing.MCFG.FastActive (parse) where + +import GF.Data.GeneralDeduction +import GF.Data.Assoc +import GF.Data.Utilities + +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.Utilities + +import GF.Infra.Ident + +import GF.Parsing.MCFG.Range +import GF.Parsing.MCFG.PInfo + +import GF.System.Tracing + +import Control.Monad (guard) + +import GF.Infra.Print + +import qualified Data.List as List +import qualified Data.Map as Map +import qualified Data.Set as Set +import Data.Array + +---------------------------------------------------------------------- +-- * parsing + +-- parse :: (Ord c, Ord n, Ord l, Ord t) => String -> MCFParser c n l t +parse strategy pinfo starts = + [ Abs (cat, found) (zip rhs rrecs) fun | + Final (Abs cat rhs fun) found rrecs <- listXChartFinal chart ] + where chart = process strategy pinfo axioms emptyXChart + + -- axioms | isBU strategy = terminal pinfo toks ++ initialScan pinfo toks + axioms | isBU strategy = initialBU pinfo + | isTD strategy = initialTD pinfo starts + +isBU s = s=="b" +isTD s = s=="t" + +-- used in prediction +emptyChildren :: Abstract c n -> [RangeRec l] +emptyChildren (Abs _ rhs _) = replicate (length rhs) [] + +updateChildren :: Eq l => [RangeRec l] -> Int -> RangeRec l -> [[RangeRec l]] +updateChildren recs i rec = updateNthM update i recs + where update rec' = do guard (null rec' || rec' == rec) + return rec + +process :: (Ord c, Ord n, Ord l) => String -> MCFPInfo c n l Range -> [Item c n l] -> XChart c n l -> XChart c n l +process strategy pinfo [] chart = chart +process strategy pinfo (item:items) chart = process strategy pinfo items $! univRule item chart + where + univRule item@(Active abs found rng (Lin l syms) lins recs) chart + = case syms of + Cat(c,r,d) : syms' -> + case insertXChart chart item c of + Nothing -> chart + Just chart -> + let items = -- predict topdown + [ Active abs [] EmptyRange lin lins (emptyChildren abs) | + isTD strategy, + Rule abs (Cnc _ _ (lin:lins)) <- topdownRules pinfo ? c ] ++ + + -- combine + [ Active abs found rng'' (Lin l syms') lins recs' | + Final _ found' _ <- lookupXChartFinal chart c, + rng' <- projection r found', + rng'' <- concatRange rng rng', + recs' <- updateChildren recs d found' ] + in process strategy pinfo items chart + + -- scan + Tok rng' : syms' -> + let items = [ Active abs found rng'' (Lin l syms') lins recs | + rng'' <- concatRange rng rng' ] + in process strategy pinfo items chart + + -- complete + [] -> case lins of + (lin':lins') -> univRule (Active abs ((l,rng):found) EmptyRange lin' lins' recs) chart + [] -> univRule (Final abs (reverse ((l,rng):found)) recs) chart + + univRule item@(Final abs@(Abs cat _ _) found' recs) chart = + case insertXChart chart item cat of + Nothing -> chart + Just chart -> + let items = -- predict bottomup + [ Active abs [] rng (Lin l syms') lins children | + isBU strategy, + Rule abs (Cnc _ _ (Lin l (Cat(c,r,d):syms') : lins)) <- leftcornerCats pinfo ? cat, + -- lin' : lins' <- rangeRestRec toks (Lin l syms' : lins), + rng <- projection r found', + children <- unifyRec (emptyChildren abs) d found' ] ++ + + -- combine + [ Active abs found rng'' (Lin l syms') lins recs' | + Active abs found rng (Lin l (Cat(c,r,d):syms')) lins recs <- lookupXChartAct chart cat, + rng' <- projection r found', + rng'' <- concatRange rng rng', + recs' <- updateChildren recs d found' ] + in process strategy pinfo items chart + +---------------------------------------------------------------------- +-- * XChart + +data XChart c n l = XChart !(AChart c n l) !(AChart c n l) +type AChart c n l = ParseChart (Item c n l) c + +data Item c n l = Active (Abstract c n) + (RangeRec l) + Range + (Lin c l Range) + (LinRec c l Range) + [RangeRec l] + | Final (Abstract c n) (RangeRec l) [RangeRec l] +-- | Passive c (RangeRec l) + deriving (Eq, Ord, Show) + +emptyXChart :: (Ord c, Ord n, Ord l) => XChart c n l +emptyXChart = XChart emptyChart emptyChart + +insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _) c = + case chartInsert actives item c of + Nothing -> Nothing + Just actives -> Just (XChart actives finals) + +insertXChart (XChart actives finals) item@(Final _ _ _) c = + case chartInsert finals item c of + Nothing -> Nothing + Just finals -> Just (XChart actives finals) + +lookupXChartAct (XChart actives finals) c = chartLookup actives c +lookupXChartFinal (XChart actives finals) c = chartLookup finals c + +listXChartAct (XChart actives finals) = chartList actives +listXChartFinal (XChart actives finals) = chartList finals + + +---------------------------------------------------------------------- +-- Earley -- + +-- called with all starting categories +initialTD :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [c] -> [Item c n l] +initialTD pinfo starts = + [ Active abs [] (Range (0, 0)) lin' lins' (emptyChildren abs) | + cat <- starts, + Rule abs (Cnc _ _ (lin':lins')) <- topdownRules pinfo ? cat ] + -- lin' : lins' <- rangeRestRec toks lins + + +---------------------------------------------------------------------- +-- Kilbury -- + +initialBU :: (Ord c, Ord n, Ord l) => MCFPInfo c n l Range -> [Item c n l] +initialBU pinfo = + [ Active abs [] EmptyRange lin' lins' (emptyChildren abs) | + -- do tok <- aElems (inputToken toks) + Rule abs (Cnc _ _ (lin':lins')) <- + concatMap snd (aAssocs (leftcornerTokens pinfo)) ++ + -- leftcornerTokens pinfo ? tok ++ + epsilonRules pinfo ] + -- lin' : lins' <- rangeRestRec toks lins diff --git a/src/GF/Parsing/MCFG/PInfo.hs b/src/GF/Parsing/MCFG/PInfo.hs index 4fbe3e736..5a61a4edf 100644 --- a/src/GF/Parsing/MCFG/PInfo.hs +++ b/src/GF/Parsing/MCFG/PInfo.hs @@ -46,7 +46,8 @@ data MCFPInfo c n l t , nameRules :: Assoc n (SList (MCFRule c n l t)) , topdownRules :: Assoc c (SList (MCFRule c n l t)) -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): - , emptyRules :: [MCFRule c n l t] + , epsilonRules :: [MCFRule c n l t] + -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): , leftcornerCats :: Assoc c (SList (MCFRule c n l t)) , leftcornerTokens :: Assoc t (SList (MCFRule c n l t)) -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): @@ -71,7 +72,7 @@ rangeRestrictPInfo (pinfo{-::MCFPInfo c n l t-}) inp = MCFPInfo { grammarTokens = nubsort (map edgeRange (inputEdges inp)) , nameRules = rrAssoc (nameRules pinfo) , topdownRules = rrAssoc (topdownRules pinfo) - , emptyRules = rrRules (emptyRules pinfo) + , epsilonRules = rrRules (epsilonRules pinfo) , leftcornerCats = rrAssoc (leftcornerCats pinfo) , leftcornerTokens = lctokens , grammarCats = grammarCats pinfo @@ -100,7 +101,7 @@ buildMCFPInfo grammar = MCFPInfo { grammarTokens = grammartokens , nameRules = namerules , topdownRules = topdownrules - , emptyRules = emptyrules + , epsilonRules = epsilonrules , leftcornerCats = leftcorncats , leftcornerTokens = leftcorntoks , grammarCats = grammarcats @@ -115,7 +116,7 @@ buildMCFPInfo grammar = [ (name, rule) | rule@(Rule (Abs _ _ name) _) <- allrules ] topdownrules = accumAssoc id [ (cat, rule) | rule@(Rule (Abs cat _ _) _) <- allrules ] - emptyrules = [ rule | rule@(Rule (Abs _ [] _) _) <- allrules ] + epsilonrules = [ rule | rule@(Rule _ (Cnc _ _ (Lin _ [] : _))) <- allrules ] leftcorncats = accumAssoc id [ (cat, rule) | rule@(Rule _ (Cnc _ _ (Lin _ (Cat(cat,_,_):_) : _))) <- allrules ] @@ -149,7 +150,7 @@ instance (Ord c, Ord n, Ord l, Ord t) => Print (MCFPInfo c n l t) where "; categories=" ++ sl grammarCats ++ "; nameRules=" ++ sla nameRules ++ "; tdRules=" ++ sla topdownRules ++ - "; emptyRules=" ++ sl emptyRules ++ + "; epsilonRules=" ++ sl epsilonRules ++ "; lcCats=" ++ sla leftcornerCats ++ "; lcTokens=" ++ sla leftcornerTokens ++ "; byToken=" ++ sla rulesByToken ++