1
0
forked from GitHub/gf-core
Files
gf-core/src-3.0/GF/Parsing/MCFG/PInfo.hs

163 lines
6.0 KiB
Haskell

---------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/13 12:40:19 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.5 $
--
-- MCFG parsing, parser information
-----------------------------------------------------------------------------
module GF.Parsing.MCFG.PInfo where
import GF.System.Tracing
import GF.Infra.Print
import GF.Formalism.Utilities
import GF.Formalism.GCFG
import GF.Formalism.MCFG
import GF.Data.SortedList
import GF.Data.Assoc
import GF.Parsing.MCFG.Range
----------------------------------------------------------------------
-- type declarations
-- | the list of categories = possible starting categories
type MCFParser c n l t = MCFPInfo c n l t
-> [c]
-> Input t
-> SyntaxChart n (c, RangeRec l)
makeFinalEdge :: c -> l -> (Int, Int) -> (c, RangeRec l)
makeFinalEdge cat lbl bnds = (cat, [(lbl, makeRange bnds)])
------------------------------------------------------------
-- parser information
data MCFPInfo c n l t
= MCFPInfo { grammarTokens :: SList t
, nameRules :: Assoc n (SList (MCFRule c n l t))
, topdownRules :: Assoc c (SList (MCFRule c n l t))
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
, epsilonRules :: [MCFRule c n l t]
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, leftcornerCats :: Assoc c (SList (MCFRule c n l t))
, leftcornerTokens :: Assoc t (SList (MCFRule c n l t))
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, grammarCats :: SList c
-- ^ used when calculating starting categories
, rulesByToken :: Assoc t (SList (MCFRule c n l t, SList t))
, rulesWithoutTokens :: SList (MCFRule c n l t)
-- ^ used by 'rulesMatchingInput'
, allRules :: MCFGrammar c n l t
-- ^ used by any unoptimized algorithm
--bottomupRules :: Assoc (Symbol c t) (SList (CFRule c n t)),
--emptyLeftcornerRules :: Assoc c (SList (CFRule c n t)),
--emptyCategories :: Set c,
}
rangeRestrictPInfo :: (Ord c, Ord n, Ord l, Ord t) =>
MCFPInfo c n l t -> Input t -> MCFPInfo c n l Range
rangeRestrictPInfo (pinfo{-::MCFPInfo c n l t-}) inp =
tracePrt "MCFG.PInfo - Restricting the parser information" (prt . grammarTokens)
MCFPInfo { grammarTokens = nubsort (map edgeRange (inputEdges inp))
, nameRules = rrAssoc (nameRules pinfo)
, topdownRules = rrAssoc (topdownRules pinfo)
, epsilonRules = rrRules (epsilonRules pinfo)
, leftcornerCats = rrAssoc (leftcornerCats pinfo)
, leftcornerTokens = lctokens
, grammarCats = grammarCats pinfo
, rulesByToken = emptyAssoc -- error "MCFG.PInfo.rulesByToken - no range restriction"
, rulesWithoutTokens = [] -- error "MCFG.PInfo.rulesByToken - no range restriction"
, allRules = allrules -- rrRules (allRules pinfo)
}
where lctokens = accumAssoc id
[ (rng, rule) | (tok, rules) <- aAssocs (leftcornerTokens pinfo),
inputToken inp ?= tok,
rule@(Rule _ (Cnc _ _ (Lin _ (Tok rng:_) : _)))
<- concatMap (rangeRestrictRule inp) rules ]
allrules = rrRules $ rulesMatchingInput pinfo inp
rrAssoc assoc = filterNull $ fmap rrRules assoc
filterNull assoc = assocFilter (not . null) assoc
rrRules rules = concatMap (rangeRestrictRule inp) rules
buildMCFPInfo :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> MCFPInfo c n l t
buildMCFPInfo grammar =
traceCalcFirst grammar $
tracePrt "MCFG.PInfo - parser info" (prt) $
MCFPInfo { grammarTokens = grammartokens
, nameRules = namerules
, topdownRules = topdownrules
, epsilonRules = epsilonrules
, leftcornerCats = leftcorncats
, leftcornerTokens = leftcorntoks
, grammarCats = grammarcats
, rulesByToken = rulesbytoken
, rulesWithoutTokens = ruleswithouttokens
, allRules = allrules
}
where allrules = concatMap expandVariants grammar
grammartokens = union (map fst ruletokens)
namerules = accumAssoc id
[ (name, rule) | rule@(Rule (Abs _ _ name) _) <- allrules ]
topdownrules = accumAssoc id
[ (cat, rule) | rule@(Rule (Abs cat _ _) _) <- allrules ]
epsilonrules = [ rule | rule@(Rule _ (Cnc _ _ (Lin _ [] : _))) <- allrules ]
leftcorncats = accumAssoc id
[ (cat, rule) |
rule@(Rule _ (Cnc _ _ (Lin _ (Cat(cat,_,_):_) : _))) <- allrules ]
leftcorntoks = accumAssoc id
[ (tok, rule) |
rule@(Rule _ (Cnc _ _ (Lin _ (Tok tok:_) : _))) <- allrules ]
grammarcats = aElems topdownrules
ruletokens = [ (toksoflins lins, rule) |
rule@(Rule _ (Cnc _ _ lins)) <- allrules ]
toksoflins lins = nubsort [ tok | Lin _ syms <- lins, Tok tok <- syms ]
rulesbytoken = accumAssoc id
[ (tok, (rule, toks)) | (tok:toks, rule) <- ruletokens ]
ruleswithouttokens = nubsort [ rule | ([], rule) <- ruletokens ]
-- | return only the rules for which all tokens are in the input string
rulesMatchingInput :: Ord t => MCFPInfo c n l t -> Input t -> [MCFRule c n l t]
rulesMatchingInput pinfo inp =
[ rule | tok <- toks,
(rule, ruletoks) <- rulesByToken pinfo ? tok,
ruletoks `subset` toks ]
++ rulesWithoutTokens pinfo
where toks = aElems (inputToken inp)
----------------------------------------------------------------------
-- pretty-printing of statistics
instance (Ord c, Ord n, Ord l, Ord t) => Print (MCFPInfo c n l t) where
prt pI = "[ tokens=" ++ sl grammarTokens ++
"; categories=" ++ sl grammarCats ++
"; nameRules=" ++ sla nameRules ++
"; tdRules=" ++ sla topdownRules ++
"; epsilonRules=" ++ sl epsilonRules ++
"; lcCats=" ++ sla leftcornerCats ++
"; lcTokens=" ++ sla leftcornerTokens ++
"; byToken=" ++ sla rulesByToken ++
"; noTokens=" ++ sl rulesWithoutTokens ++
"; allRules=" ++ sl allRules ++
" ]"
where sl f = show $ length $ f pI
sla f = let (as, bs) = unzip $ aAssocs $ f pI
in show (length as) ++ "/" ++ show (length (concat bs))