forked from GitHub/gf-core
163 lines
6.0 KiB
Haskell
163 lines
6.0 KiB
Haskell
---------------------------------------------------------------------
|
|
-- |
|
|
-- Maintainer : PL
|
|
-- Stability : (stable)
|
|
-- Portability : (portable)
|
|
--
|
|
-- > CVS $Date: 2005/05/13 12:40:19 $
|
|
-- > CVS $Author: peb $
|
|
-- > CVS $Revision: 1.5 $
|
|
--
|
|
-- MCFG parsing, parser information
|
|
-----------------------------------------------------------------------------
|
|
|
|
module GF.Parsing.MCFG.PInfo where
|
|
|
|
import GF.System.Tracing
|
|
import GF.Infra.Print
|
|
|
|
import GF.Formalism.Utilities
|
|
import GF.Formalism.GCFG
|
|
import GF.Formalism.MCFG
|
|
import GF.Data.SortedList
|
|
import GF.Data.Assoc
|
|
import GF.Parsing.MCFG.Range
|
|
|
|
----------------------------------------------------------------------
|
|
-- type declarations
|
|
|
|
-- | the list of categories = possible starting categories
|
|
type MCFParser c n l t = MCFPInfo c n l t
|
|
-> [c]
|
|
-> Input t
|
|
-> SyntaxChart n (c, RangeRec l)
|
|
|
|
makeFinalEdge :: c -> l -> (Int, Int) -> (c, RangeRec l)
|
|
makeFinalEdge cat lbl bnds = (cat, [(lbl, makeRange bnds)])
|
|
|
|
|
|
------------------------------------------------------------
|
|
-- parser information
|
|
|
|
data MCFPInfo c n l t
|
|
= MCFPInfo { grammarTokens :: SList t
|
|
, nameRules :: Assoc n (SList (MCFRule c n l t))
|
|
, topdownRules :: Assoc c (SList (MCFRule c n l t))
|
|
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
|
|
, epsilonRules :: [MCFRule c n l t]
|
|
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
|
|
, leftcornerCats :: Assoc c (SList (MCFRule c n l t))
|
|
, leftcornerTokens :: Assoc t (SList (MCFRule c n l t))
|
|
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
|
|
, grammarCats :: SList c
|
|
-- ^ used when calculating starting categories
|
|
, rulesByToken :: Assoc t (SList (MCFRule c n l t, SList t))
|
|
, rulesWithoutTokens :: SList (MCFRule c n l t)
|
|
-- ^ used by 'rulesMatchingInput'
|
|
, allRules :: MCFGrammar c n l t
|
|
-- ^ used by any unoptimized algorithm
|
|
|
|
--bottomupRules :: Assoc (Symbol c t) (SList (CFRule c n t)),
|
|
--emptyLeftcornerRules :: Assoc c (SList (CFRule c n t)),
|
|
--emptyCategories :: Set c,
|
|
}
|
|
|
|
|
|
rangeRestrictPInfo :: (Ord c, Ord n, Ord l, Ord t) =>
|
|
MCFPInfo c n l t -> Input t -> MCFPInfo c n l Range
|
|
rangeRestrictPInfo (pinfo{-::MCFPInfo c n l t-}) inp =
|
|
tracePrt "MCFG.PInfo - Restricting the parser information" (prt . grammarTokens)
|
|
MCFPInfo { grammarTokens = nubsort (map edgeRange (inputEdges inp))
|
|
, nameRules = rrAssoc (nameRules pinfo)
|
|
, topdownRules = rrAssoc (topdownRules pinfo)
|
|
, epsilonRules = rrRules (epsilonRules pinfo)
|
|
, leftcornerCats = rrAssoc (leftcornerCats pinfo)
|
|
, leftcornerTokens = lctokens
|
|
, grammarCats = grammarCats pinfo
|
|
, rulesByToken = emptyAssoc -- error "MCFG.PInfo.rulesByToken - no range restriction"
|
|
, rulesWithoutTokens = [] -- error "MCFG.PInfo.rulesByToken - no range restriction"
|
|
, allRules = allrules -- rrRules (allRules pinfo)
|
|
}
|
|
|
|
where lctokens = accumAssoc id
|
|
[ (rng, rule) | (tok, rules) <- aAssocs (leftcornerTokens pinfo),
|
|
inputToken inp ?= tok,
|
|
rule@(Rule _ (Cnc _ _ (Lin _ (Tok rng:_) : _)))
|
|
<- concatMap (rangeRestrictRule inp) rules ]
|
|
|
|
allrules = rrRules $ rulesMatchingInput pinfo inp
|
|
|
|
rrAssoc assoc = filterNull $ fmap rrRules assoc
|
|
filterNull assoc = assocFilter (not . null) assoc
|
|
rrRules rules = concatMap (rangeRestrictRule inp) rules
|
|
|
|
|
|
buildMCFPInfo :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> MCFPInfo c n l t
|
|
buildMCFPInfo grammar =
|
|
traceCalcFirst grammar $
|
|
tracePrt "MCFG.PInfo - parser info" (prt) $
|
|
MCFPInfo { grammarTokens = grammartokens
|
|
, nameRules = namerules
|
|
, topdownRules = topdownrules
|
|
, epsilonRules = epsilonrules
|
|
, leftcornerCats = leftcorncats
|
|
, leftcornerTokens = leftcorntoks
|
|
, grammarCats = grammarcats
|
|
, rulesByToken = rulesbytoken
|
|
, rulesWithoutTokens = ruleswithouttokens
|
|
, allRules = allrules
|
|
}
|
|
|
|
where allrules = concatMap expandVariants grammar
|
|
grammartokens = union (map fst ruletokens)
|
|
namerules = accumAssoc id
|
|
[ (name, rule) | rule@(Rule (Abs _ _ name) _) <- allrules ]
|
|
topdownrules = accumAssoc id
|
|
[ (cat, rule) | rule@(Rule (Abs cat _ _) _) <- allrules ]
|
|
epsilonrules = [ rule | rule@(Rule _ (Cnc _ _ (Lin _ [] : _))) <- allrules ]
|
|
leftcorncats = accumAssoc id
|
|
[ (cat, rule) |
|
|
rule@(Rule _ (Cnc _ _ (Lin _ (Cat(cat,_,_):_) : _))) <- allrules ]
|
|
leftcorntoks = accumAssoc id
|
|
[ (tok, rule) |
|
|
rule@(Rule _ (Cnc _ _ (Lin _ (Tok tok:_) : _))) <- allrules ]
|
|
grammarcats = aElems topdownrules
|
|
ruletokens = [ (toksoflins lins, rule) |
|
|
rule@(Rule _ (Cnc _ _ lins)) <- allrules ]
|
|
toksoflins lins = nubsort [ tok | Lin _ syms <- lins, Tok tok <- syms ]
|
|
rulesbytoken = accumAssoc id
|
|
[ (tok, (rule, toks)) | (tok:toks, rule) <- ruletokens ]
|
|
ruleswithouttokens = nubsort [ rule | ([], rule) <- ruletokens ]
|
|
|
|
|
|
-- | return only the rules for which all tokens are in the input string
|
|
rulesMatchingInput :: Ord t => MCFPInfo c n l t -> Input t -> [MCFRule c n l t]
|
|
rulesMatchingInput pinfo inp =
|
|
[ rule | tok <- toks,
|
|
(rule, ruletoks) <- rulesByToken pinfo ? tok,
|
|
ruletoks `subset` toks ]
|
|
++ rulesWithoutTokens pinfo
|
|
where toks = aElems (inputToken inp)
|
|
|
|
|
|
----------------------------------------------------------------------
|
|
-- pretty-printing of statistics
|
|
|
|
instance (Ord c, Ord n, Ord l, Ord t) => Print (MCFPInfo c n l t) where
|
|
prt pI = "[ tokens=" ++ sl grammarTokens ++
|
|
"; categories=" ++ sl grammarCats ++
|
|
"; nameRules=" ++ sla nameRules ++
|
|
"; tdRules=" ++ sla topdownRules ++
|
|
"; epsilonRules=" ++ sl epsilonRules ++
|
|
"; lcCats=" ++ sla leftcornerCats ++
|
|
"; lcTokens=" ++ sla leftcornerTokens ++
|
|
"; byToken=" ++ sla rulesByToken ++
|
|
"; noTokens=" ++ sl rulesWithoutTokens ++
|
|
"; allRules=" ++ sl allRules ++
|
|
" ]"
|
|
|
|
where sl f = show $ length $ f pI
|
|
sla f = let (as, bs) = unzip $ aAssocs $ f pI
|
|
in show (length as) ++ "/" ++ show (length (concat bs))
|
|
|