mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-24 03:52:50 -06:00
added filtering for useless productions in PMCFG
This commit is contained in:
@@ -329,6 +329,7 @@ getParserInfo :: GrammarEnv -> ParserInfo
|
|||||||
getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) =
|
getParserInfo (GrammarEnv last_id catSet seqSet funSet prodSet) =
|
||||||
ParserInfo { functions = mkArray funSet
|
ParserInfo { functions = mkArray funSet
|
||||||
, sequences = mkArray seqSet
|
, sequences = mkArray seqSet
|
||||||
|
, productions0= prodSet
|
||||||
, productions = prodSet
|
, productions = prodSet
|
||||||
, startCats = Map.map getFCatList catSet
|
, startCats = Map.map getFCatList catSet
|
||||||
, totalCats = last_id+1
|
, totalCats = last_id+1
|
||||||
|
|||||||
@@ -405,13 +405,15 @@ getParserInfo :: GrammarEnv -> ParserInfo
|
|||||||
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
|
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
|
||||||
ParserInfo { functions = mkArray funSet
|
ParserInfo { functions = mkArray funSet
|
||||||
, sequences = mkArray seqSet
|
, sequences = mkArray seqSet
|
||||||
, productions = IntMap.union prodSet coercions
|
, productions0= productions0
|
||||||
|
, productions = filterProductions productions0
|
||||||
, startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet)
|
, startCats = maybe Map.empty (Map.map (\(start,end,_) -> range (start,end))) (IntMap.lookup 0 catSet)
|
||||||
, totalCats = last_id+1
|
, totalCats = last_id+1
|
||||||
}
|
}
|
||||||
where
|
where
|
||||||
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
|
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
|
||||||
|
|
||||||
|
productions0 = IntMap.union prodSet coercions
|
||||||
coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
|
coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
|
||||||
|
|
||||||
getFCats :: GrammarEnv -> ProtoFCat -> [FCat]
|
getFCats :: GrammarEnv -> ProtoFCat -> [FCat]
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ import Data.Binary.Put
|
|||||||
import Data.Binary.Get
|
import Data.Binary.Get
|
||||||
import qualified Data.ByteString as BS
|
import qualified Data.ByteString as BS
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
|
import qualified Data.IntMap as IntMap
|
||||||
|
import qualified Data.Set as Set
|
||||||
import Control.Monad
|
import Control.Monad
|
||||||
|
|
||||||
pgfMajorVersion, pgfMinorVersion :: Word16
|
pgfMajorVersion, pgfMinorVersion :: Word16
|
||||||
@@ -159,13 +161,15 @@ instance Binary Production where
|
|||||||
_ -> decodingError
|
_ -> decodingError
|
||||||
|
|
||||||
instance Binary ParserInfo where
|
instance Binary ParserInfo where
|
||||||
put p = put (functions p, sequences p, productions p, totalCats p, startCats p)
|
put p = put (functions p, sequences p, productions0 p, totalCats p, startCats p)
|
||||||
get = do functions <- get
|
get = do functions <- get
|
||||||
sequences <- get
|
sequences <- get
|
||||||
productions <- get
|
productions0<- get
|
||||||
totalCats <- get
|
totalCats <- get
|
||||||
startCats <- get
|
startCats <- get
|
||||||
return (ParserInfo{functions=functions,sequences=sequences,productions=productions
|
return (ParserInfo{functions=functions,sequences=sequences
|
||||||
|
,productions0=productions0
|
||||||
|
,productions =filterProductions productions0
|
||||||
,totalCats=totalCats,startCats=startCats})
|
,totalCats=totalCats,startCats=startCats})
|
||||||
|
|
||||||
decodingError = fail "This PGF file was compiled with different version of GF"
|
decodingError = fail "This PGF file was compiled with different version of GF"
|
||||||
|
|||||||
@@ -41,7 +41,8 @@ data Alternative =
|
|||||||
data ParserInfo
|
data ParserInfo
|
||||||
= ParserInfo { functions :: Array FunId FFun
|
= ParserInfo { functions :: Array FunId FFun
|
||||||
, sequences :: Array SeqId FSeq
|
, sequences :: Array SeqId FSeq
|
||||||
, productions :: IntMap.IntMap (Set.Set Production)
|
, productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
|
||||||
|
, productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
|
||||||
, startCats :: Map.Map CId [FCat]
|
, startCats :: Map.Map CId [FCat]
|
||||||
, totalCats :: {-# UNPACK #-} !FCat
|
, totalCats :: {-# UNPACK #-} !FCat
|
||||||
}
|
}
|
||||||
@@ -57,7 +58,7 @@ fcatVar = (-4)
|
|||||||
ppPMCFG :: ParserInfo -> Doc
|
ppPMCFG :: ParserInfo -> Doc
|
||||||
ppPMCFG pinfo =
|
ppPMCFG pinfo =
|
||||||
text "productions" $$
|
text "productions" $$
|
||||||
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions pinfo), prod <- Set.toList set]) $$
|
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions0 pinfo), prod <- Set.toList set]) $$
|
||||||
text "functions" $$
|
text "functions" $$
|
||||||
nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$
|
nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$
|
||||||
text "sequences" $$
|
text "sequences" $$
|
||||||
@@ -101,3 +102,11 @@ ppFCat fcat
|
|||||||
|
|
||||||
ppFunId funid = char 'F' <> int funid
|
ppFunId funid = char 'F' <> int funid
|
||||||
ppSeqId seqid = char 'S' <> int seqid
|
ppSeqId seqid = char 'S' <> int seqid
|
||||||
|
|
||||||
|
|
||||||
|
filterProductions prods =
|
||||||
|
fmap (Set.filter filterRule) prods
|
||||||
|
where
|
||||||
|
filterRule (FApply funid args) = all (\fcat -> IntMap.member fcat prods) args
|
||||||
|
filterRule (FCoerce _) = True
|
||||||
|
filterRule _ = True
|
||||||
|
|||||||
Reference in New Issue
Block a user