added filtering for useless productions in PMCFG

This commit is contained in:
krasimir
2009-05-18 15:52:10 +00:00
parent 7508fa5785
commit 5f986f5992
4 changed files with 22 additions and 6 deletions

View File

@@ -329,6 +329,7 @@ getParserInfo :: GrammarEnv -> ParserInfo
getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) =
ParserInfo { functions = mkArray funSet
, sequences = mkArray seqSet
, productions0= prodSet
, productions = prodSet
, startCats = Map.map getFCatList catSet
, totalCats = last_id+1

View File

@@ -405,13 +405,15 @@ getParserInfo :: GrammarEnv -> ParserInfo
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
ParserInfo { functions = mkArray funSet
, sequences = mkArray seqSet
, productions = IntMap.union prodSet coercions
, productions0= productions0
, productions = filterProductions productions0
, startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet)
, totalCats = last_id+1
}
where
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
productions0 = IntMap.union prodSet coercions
coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
getFCats :: GrammarEnv -> ProtoFCat -> [FCat]

View File

@@ -7,6 +7,8 @@ import Data.Binary.Put
import Data.Binary.Get
import qualified Data.ByteString as BS
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Control.Monad
pgfMajorVersion, pgfMinorVersion :: Word16
@@ -159,13 +161,15 @@ instance Binary Production where
_ -> decodingError
instance Binary ParserInfo where
put p = put (functions p, sequences p, productions p, totalCats p, startCats p)
put p = put (functions p, sequences p, productions0 p, totalCats p, startCats p)
get = do functions <- get
sequences <- get
productions <- get
productions0<- get
totalCats <- get
startCats <- get
return (ParserInfo{functions=functions,sequences=sequences,productions=productions
return (ParserInfo{functions=functions,sequences=sequences
,productions0=productions0
,productions =filterProductions productions0
,totalCats=totalCats,startCats=startCats})
decodingError = fail "This PGF file was compiled with different version of GF"

View File

@@ -41,7 +41,8 @@ data Alternative =
data ParserInfo
= ParserInfo { functions :: Array FunId FFun
, sequences :: Array SeqId FSeq
, productions :: IntMap.IntMap (Set.Set Production)
, productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
, productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
, startCats :: Map.Map CId [FCat]
, totalCats :: {-# UNPACK #-} !FCat
}
@@ -57,7 +58,7 @@ fcatVar = (-4)
ppPMCFG :: ParserInfo -> Doc
ppPMCFG pinfo =
text "productions" $$
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions pinfo), prod <- Set.toList set]) $$
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions0 pinfo), prod <- Set.toList set]) $$
text "functions" $$
nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$
text "sequences" $$
@@ -101,3 +102,11 @@ ppFCat fcat
ppFunId funid = char 'F' <> int funid
ppSeqId seqid = char 'S' <> int seqid
filterProductions prods =
fmap (Set.filter filterRule) prods
where
filterRule (FApply funid args) = all (\fcat -> IntMap.member fcat prods) args
filterRule (FCoerce _) = True
filterRule _ = True