forked from GitHub/gf-core
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
This commit is contained in:
162
src-3.0/GF/Parsing/MCFG/PInfo.hs
Normal file
162
src-3.0/GF/Parsing/MCFG/PInfo.hs
Normal file
@@ -0,0 +1,162 @@
|
||||
---------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/05/13 12:40:19 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.5 $
|
||||
--
|
||||
-- MCFG parsing, parser information
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Parsing.MCFG.PInfo where
|
||||
|
||||
import GF.System.Tracing
|
||||
import GF.Infra.Print
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.MCFG
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import GF.Parsing.MCFG.Range
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- type declarations
|
||||
|
||||
-- | the list of categories = possible starting categories
|
||||
type MCFParser c n l t = MCFPInfo c n l t
|
||||
-> [c]
|
||||
-> Input t
|
||||
-> SyntaxChart n (c, RangeRec l)
|
||||
|
||||
makeFinalEdge :: c -> l -> (Int, Int) -> (c, RangeRec l)
|
||||
makeFinalEdge cat lbl bnds = (cat, [(lbl, makeRange bnds)])
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- parser information
|
||||
|
||||
data MCFPInfo c n l t
|
||||
= MCFPInfo { grammarTokens :: SList t
|
||||
, nameRules :: Assoc n (SList (MCFRule c n l t))
|
||||
, topdownRules :: Assoc c (SList (MCFRule c n l t))
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
|
||||
, epsilonRules :: [MCFRule c n l t]
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
|
||||
, leftcornerCats :: Assoc c (SList (MCFRule c n l t))
|
||||
, leftcornerTokens :: Assoc t (SList (MCFRule c n l t))
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
|
||||
, grammarCats :: SList c
|
||||
-- ^ used when calculating starting categories
|
||||
, rulesByToken :: Assoc t (SList (MCFRule c n l t, SList t))
|
||||
, rulesWithoutTokens :: SList (MCFRule c n l t)
|
||||
-- ^ used by 'rulesMatchingInput'
|
||||
, allRules :: MCFGrammar c n l t
|
||||
-- ^ used by any unoptimized algorithm
|
||||
|
||||
--bottomupRules :: Assoc (Symbol c t) (SList (CFRule c n t)),
|
||||
--emptyLeftcornerRules :: Assoc c (SList (CFRule c n t)),
|
||||
--emptyCategories :: Set c,
|
||||
}
|
||||
|
||||
|
||||
rangeRestrictPInfo :: (Ord c, Ord n, Ord l, Ord t) =>
|
||||
MCFPInfo c n l t -> Input t -> MCFPInfo c n l Range
|
||||
rangeRestrictPInfo (pinfo{-::MCFPInfo c n l t-}) inp =
|
||||
tracePrt "MCFG.PInfo - Restricting the parser information" (prt . grammarTokens)
|
||||
MCFPInfo { grammarTokens = nubsort (map edgeRange (inputEdges inp))
|
||||
, nameRules = rrAssoc (nameRules pinfo)
|
||||
, topdownRules = rrAssoc (topdownRules pinfo)
|
||||
, epsilonRules = rrRules (epsilonRules pinfo)
|
||||
, leftcornerCats = rrAssoc (leftcornerCats pinfo)
|
||||
, leftcornerTokens = lctokens
|
||||
, grammarCats = grammarCats pinfo
|
||||
, rulesByToken = emptyAssoc -- error "MCFG.PInfo.rulesByToken - no range restriction"
|
||||
, rulesWithoutTokens = [] -- error "MCFG.PInfo.rulesByToken - no range restriction"
|
||||
, allRules = allrules -- rrRules (allRules pinfo)
|
||||
}
|
||||
|
||||
where lctokens = accumAssoc id
|
||||
[ (rng, rule) | (tok, rules) <- aAssocs (leftcornerTokens pinfo),
|
||||
inputToken inp ?= tok,
|
||||
rule@(Rule _ (Cnc _ _ (Lin _ (Tok rng:_) : _)))
|
||||
<- concatMap (rangeRestrictRule inp) rules ]
|
||||
|
||||
allrules = rrRules $ rulesMatchingInput pinfo inp
|
||||
|
||||
rrAssoc assoc = filterNull $ fmap rrRules assoc
|
||||
filterNull assoc = assocFilter (not . null) assoc
|
||||
rrRules rules = concatMap (rangeRestrictRule inp) rules
|
||||
|
||||
|
||||
buildMCFPInfo :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> MCFPInfo c n l t
|
||||
buildMCFPInfo grammar =
|
||||
traceCalcFirst grammar $
|
||||
tracePrt "MCFG.PInfo - parser info" (prt) $
|
||||
MCFPInfo { grammarTokens = grammartokens
|
||||
, nameRules = namerules
|
||||
, topdownRules = topdownrules
|
||||
, epsilonRules = epsilonrules
|
||||
, leftcornerCats = leftcorncats
|
||||
, leftcornerTokens = leftcorntoks
|
||||
, grammarCats = grammarcats
|
||||
, rulesByToken = rulesbytoken
|
||||
, rulesWithoutTokens = ruleswithouttokens
|
||||
, allRules = allrules
|
||||
}
|
||||
|
||||
where allrules = concatMap expandVariants grammar
|
||||
grammartokens = union (map fst ruletokens)
|
||||
namerules = accumAssoc id
|
||||
[ (name, rule) | rule@(Rule (Abs _ _ name) _) <- allrules ]
|
||||
topdownrules = accumAssoc id
|
||||
[ (cat, rule) | rule@(Rule (Abs cat _ _) _) <- allrules ]
|
||||
epsilonrules = [ rule | rule@(Rule _ (Cnc _ _ (Lin _ [] : _))) <- allrules ]
|
||||
leftcorncats = accumAssoc id
|
||||
[ (cat, rule) |
|
||||
rule@(Rule _ (Cnc _ _ (Lin _ (Cat(cat,_,_):_) : _))) <- allrules ]
|
||||
leftcorntoks = accumAssoc id
|
||||
[ (tok, rule) |
|
||||
rule@(Rule _ (Cnc _ _ (Lin _ (Tok tok:_) : _))) <- allrules ]
|
||||
grammarcats = aElems topdownrules
|
||||
ruletokens = [ (toksoflins lins, rule) |
|
||||
rule@(Rule _ (Cnc _ _ lins)) <- allrules ]
|
||||
toksoflins lins = nubsort [ tok | Lin _ syms <- lins, Tok tok <- syms ]
|
||||
rulesbytoken = accumAssoc id
|
||||
[ (tok, (rule, toks)) | (tok:toks, rule) <- ruletokens ]
|
||||
ruleswithouttokens = nubsort [ rule | ([], rule) <- ruletokens ]
|
||||
|
||||
|
||||
-- | return only the rules for which all tokens are in the input string
|
||||
rulesMatchingInput :: Ord t => MCFPInfo c n l t -> Input t -> [MCFRule c n l t]
|
||||
rulesMatchingInput pinfo inp =
|
||||
[ rule | tok <- toks,
|
||||
(rule, ruletoks) <- rulesByToken pinfo ? tok,
|
||||
ruletoks `subset` toks ]
|
||||
++ rulesWithoutTokens pinfo
|
||||
where toks = aElems (inputToken inp)
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- pretty-printing of statistics
|
||||
|
||||
instance (Ord c, Ord n, Ord l, Ord t) => Print (MCFPInfo c n l t) where
|
||||
prt pI = "[ tokens=" ++ sl grammarTokens ++
|
||||
"; categories=" ++ sl grammarCats ++
|
||||
"; nameRules=" ++ sla nameRules ++
|
||||
"; tdRules=" ++ sla topdownRules ++
|
||||
"; epsilonRules=" ++ sl epsilonRules ++
|
||||
"; lcCats=" ++ sla leftcornerCats ++
|
||||
"; lcTokens=" ++ sla leftcornerTokens ++
|
||||
"; byToken=" ++ sla rulesByToken ++
|
||||
"; noTokens=" ++ sl rulesWithoutTokens ++
|
||||
"; allRules=" ++ sl allRules ++
|
||||
" ]"
|
||||
|
||||
where sl f = show $ length $ f pI
|
||||
sla f = let (as, bs) = unzip $ aAssocs $ f pI
|
||||
in show (length as) ++ "/" ++ show (length (concat bs))
|
||||
|
||||
Reference in New Issue
Block a user