added filtering for useless productions in PMCFG

This commit is contained in:
krasimir
2009-05-18 15:52:10 +00:00
parent 7508fa5785
commit 5f986f5992
4 changed files with 22 additions and 6 deletions

View File

@@ -329,6 +329,7 @@ getParserInfo :: GrammarEnv -> ParserInfo
getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) = getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) =
ParserInfo { functions = mkArray funSet ParserInfo { functions = mkArray funSet
, sequences = mkArray seqSet , sequences = mkArray seqSet
, productions0= prodSet
, productions = prodSet , productions = prodSet
, startCats = Map.map getFCatList catSet , startCats = Map.map getFCatList catSet
, totalCats = last_id+1 , totalCats = last_id+1

View File

@@ -405,13 +405,15 @@ getParserInfo :: GrammarEnv -> ParserInfo
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
ParserInfo { functions = mkArray funSet ParserInfo { functions = mkArray funSet
, sequences = mkArray seqSet , sequences = mkArray seqSet
, productions = IntMap.union prodSet coercions , productions0= productions0
, productions = filterProductions productions0
, startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet) , startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet)
, totalCats = last_id+1 , totalCats = last_id+1
} }
where where
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
productions0 = IntMap.union prodSet coercions
coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet] coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
getFCats :: GrammarEnv -> ProtoFCat -> [FCat] getFCats :: GrammarEnv -> ProtoFCat -> [FCat]

View File

@@ -7,6 +7,8 @@ import Data.Binary.Put
import Data.Binary.Get import Data.Binary.Get
import qualified Data.ByteString as BS import qualified Data.ByteString as BS
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Control.Monad import Control.Monad
pgfMajorVersion, pgfMinorVersion :: Word16 pgfMajorVersion, pgfMinorVersion :: Word16
@@ -159,13 +161,15 @@ instance Binary Production where
_ -> decodingError _ -> decodingError
instance Binary ParserInfo where instance Binary ParserInfo where
put p = put (functions p, sequences p, productions p, totalCats p, startCats p) put p = put (functions p, sequences p, productions0 p, totalCats p, startCats p)
get = do functions <- get get = do functions <- get
sequences <- get sequences <- get
productions <- get productions0<- get
totalCats <- get totalCats <- get
startCats <- get startCats <- get
return (ParserInfo{functions=functions,sequences=sequences,productions=productions return (ParserInfo{functions=functions,sequences=sequences
,productions0=productions0
,productions =filterProductions productions0
,totalCats=totalCats,startCats=startCats}) ,totalCats=totalCats,startCats=startCats})
decodingError = fail "This PGF file was compiled with different version of GF" decodingError = fail "This PGF file was compiled with different version of GF"

View File

@@ -41,7 +41,8 @@ data Alternative =
data ParserInfo data ParserInfo
= ParserInfo { functions :: Array FunId FFun = ParserInfo { functions :: Array FunId FFun
, sequences :: Array SeqId FSeq , sequences :: Array SeqId FSeq
, productions :: IntMap.IntMap (Set.Set Production) , productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
, productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
, startCats :: Map.Map CId [FCat] , startCats :: Map.Map CId [FCat]
, totalCats :: {-# UNPACK #-} !FCat , totalCats :: {-# UNPACK #-} !FCat
} }
@@ -57,7 +58,7 @@ fcatVar = (-4)
ppPMCFG :: ParserInfo -> Doc ppPMCFG :: ParserInfo -> Doc
ppPMCFG pinfo = ppPMCFG pinfo =
text "productions" $$ text "productions" $$
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions pinfo), prod <- Set.toList set]) $$ nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions0 pinfo), prod <- Set.toList set]) $$
text "functions" $$ text "functions" $$
nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$ nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$
text "sequences" $$ text "sequences" $$
@@ -101,3 +102,11 @@ ppFCat fcat
ppFunId funid = char 'F' <> int funid ppFunId funid = char 'F' <> int funid
ppSeqId seqid = char 'S' <> int seqid ppSeqId seqid = char 'S' <> int seqid
filterProductions prods =
fmap (Set.filter filterRule) prods
where
filterRule (FApply funid args) = all (\fcat -> IntMap.member fcat prods) args
filterRule (FCoerce _) = True
filterRule _ = True