dead code elimination for PGF. Note: the produced grammars will not work well with metavariables and high-order abstract syntax

This commit is contained in:
krasimir
2010-06-09 11:32:59 +00:00
parent 4e35f7e5ec
commit d6f32b3bcd
8 changed files with 229 additions and 61 deletions

View File

@@ -43,6 +43,7 @@ library
PGF.Printer
PGF.Probabilistic
PGF.Forest
PGF.Optimize
GF.Data.TrieMap
GF.Data.Utilities
GF.Data.SortedList

View File

@@ -41,6 +41,7 @@ import PGF.Check
import PGF.CId
import PGF.Data
import PGF.Macros
import PGF.Optimize
-- | Compiles a number of source files and builds a 'PGF' structure for them.
@@ -60,7 +61,7 @@ link opts cnc gr = do
(True, True) -> ioeIO $ putStrLn "OK"
(False,True) -> return ()
_ -> ioeIO $ putStrLn $ "Corrupted PGF"
return gc
return $ if flag optOptimizePGF opts then optimizePGF gc else gc
Bad s -> fail s
batchCompile :: Options -> [FilePath] -> IOE SourceGrammar

View File

@@ -5,7 +5,7 @@ import GF.Compile.Export
import GF.Compile.GeneratePMCFG
import PGF.CId
import PGF.Macros(updateProductionIndices)
import PGF.Optimize(updateProductionIndices)
import PGF.Check(checkLin)
import qualified PGF.Macros as CM
import qualified PGF.Data as C

View File

@@ -161,6 +161,7 @@ data Flags = Flags {
optPreprocessors :: [String],
optEncoding :: String,
optOptimizations :: Set Optimization,
optOptimizePGF :: Bool,
optCFGTransforms :: Set CFGTransform,
optLibraryPath :: [FilePath],
optStartCat :: Maybe String,
@@ -260,6 +261,7 @@ defaultFlags = Flags {
optPreprocessors = [],
optEncoding = "latin1",
optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize],
optOptimizePGF = False,
optCFGTransforms = Set.fromList [CFGRemoveCycles, CFGBottomUpFilter,
CFGTopDownFilter, CFGMergeIdentical],
optLibraryPath = [],
@@ -348,6 +350,8 @@ optDescr =
Option [] ["unlexer"] (ReqArg unlexer "UNLEXER") "Use unlexer UNLEXER.",
Option [] ["optimize"] (ReqArg optimize "OPT")
"Select an optimization package. OPT = all | values | parametrize | none",
Option [] ["optimize-pgf"] (NoArg (optimize_pgf True))
"Enable or disable global grammar optimization. This could significantly reduce the size of the final PGF file",
Option [] ["stem"] (onOff (toggleOptimize OptStem) True) "Perform stem-suffix analysis (default on).",
Option [] ["cse"] (onOff (toggleOptimize OptCSE) True) "Perform common sub-expression elimination (default on).",
Option [] ["cfg"] (ReqArg cfgTransform "TRANS") "Enable or disable specific CFG transformations. TRANS = merge, no-merge, bottomup, no-bottomup, ...",
@@ -406,6 +410,8 @@ optDescr =
optimize x = case lookup x optimizationPackages of
Just p -> set $ \o -> o { optOptimizations = p }
Nothing -> fail $ "Unknown optimization package: " ++ x
optimize_pgf x = set $ \o -> o { optOptimizePGF = x }
toggleOptimize x b = set $ setOptimization' x b

View File

@@ -4,6 +4,7 @@ module GFC (mainGFC) where
import PGF
import PGF.CId
import PGF.Data
import PGF.Optimize
import GF.Compile
import GF.Compile.Export
@@ -55,7 +56,8 @@ compileCFFiles opts fs =
unionPGFFiles :: Options -> [FilePath] -> IOE ()
unionPGFFiles opts fs =
do pgfs <- mapM readPGFVerbose fs
let pgf = foldl1 unionPGF pgfs
let pgf0 = foldl1 unionPGF pgfs
pgf = if flag optOptimizePGF opts then optimizePGF pgf0 else pgf0
pgfFile = grammarName opts pgf <.> "pgf"
if pgfFile `elem` fs
then putStrLnE $ "Refusing to overwrite " ++ pgfFile

View File

@@ -2,7 +2,7 @@ module PGF.Binary where
import PGF.CId
import PGF.Data
import PGF.Macros
import PGF.Optimize
import Data.Binary
import Data.Binary.Put
import Data.Binary.Get

View File

@@ -10,7 +10,6 @@ import qualified Data.IntSet as IntSet
import qualified Data.Array as Array
import Data.Maybe
import Data.List
import GF.Data.Utilities(sortNub)
import Text.PrettyPrint
-- operations for manipulating PGF grammars and objects
@@ -148,62 +147,6 @@ cidVar = mkCId "__gfVar"
_B = mkCId "__gfB"
_V = mkCId "__gfV"
updateProductionIndices :: PGF -> PGF
updateProductionIndices pgf = pgf{ concretes = fmap updateConcrete (concretes pgf) }
where
updateConcrete cnc =
let p_prods = (filterProductions IntMap.empty . parseIndex cnc) (productions cnc)
l_prods = (linIndex cnc . filterProductions IntMap.empty) (productions cnc)
in cnc{pproductions = p_prods, lproductions = l_prods}
filterProductions prods0 prods
| prods0 == prods1 = prods0
| otherwise = filterProductions prods1 prods
where
prods1 = IntMap.unionWith Set.union prods0 (IntMap.mapMaybe (filterProdSet prods0) prods)
filterProdSet prods0 set
| Set.null set1 = Nothing
| otherwise = Just set1
where
set1 = Set.filter (filterRule prods0) set
filterRule prods0 (PApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods0) args
filterRule prods0 (PCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods0
filterRule prods0 _ = True
parseIndex cnc = IntMap.mapMaybeWithKey filterProdSet
where
filterProdSet fid prods
| fid `IntSet.member` ho_fids = Just prods
| otherwise = let prods' = Set.filter (not . is_ho_prod) prods
in if Set.null prods'
then Nothing
else Just prods'
is_ho_prod (PApply _ [fid]) | fid == fcatVar = True
is_ho_prod _ = False
ho_fids :: IntSet.IntSet
ho_fids = IntSet.fromList [fid | cat <- ho_cats
, fid <- maybe [] (\(CncCat s e _) -> [s..e]) (Map.lookup cat (cnccats cnc))]
ho_cats :: [CId]
ho_cats = sortNub [c | (ty,_,_) <- Map.elems (funs (abstract pgf))
, h <- case ty of {DTyp hyps val _ -> hyps}
, c <- fst (catSkeleton (typeOfHypo h))]
linIndex cnc productions =
Map.fromListWith (IntMap.unionWith Set.union)
[(fun,IntMap.singleton res (Set.singleton prod)) | (res,prods) <- IntMap.toList productions
, prod <- Set.toList prods
, fun <- getFunctions prod]
where
getFunctions (PApply funid args) = let CncFun fun _ = cncfuns cnc Array.! funid in [fun]
getFunctions (PCoerce fid) = case IntMap.lookup fid productions of
Nothing -> []
Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod]
-- Utilities for doing linearization

View File

@@ -0,0 +1,215 @@
module PGF.Optimize
( optimizePGF
, updateProductionIndices
) where
import PGF.CId
import PGF.Data
import PGF.Macros
import Data.Maybe
import Data.List (mapAccumL, nub)
import Data.Array.IArray
import Data.Array.MArray
import Data.Array.ST
import Data.Array.Unboxed
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntSet as IntSet
import qualified Data.IntMap as IntMap
import Control.Monad.ST
import GF.Data.Utilities(sortNub)
optimizePGF :: PGF -> PGF
optimizePGF pgf = pgf{concretes=fmap (updateConcrete (abstract pgf) .
topDownFilter (lookStartCat pgf) .
bottomUpFilter ) (concretes pgf)}
updateProductionIndices :: PGF -> PGF
updateProductionIndices pgf = pgf{concretes = fmap (updateConcrete (abstract pgf)) (concretes pgf)}
topDownFilter :: CId -> Concr -> Concr
topDownFilter startCat cnc =
let ((seqs,funs),prods) = IntMap.mapAccumWithKey (\env res set -> mapAccumLSet (optimize res) env set)
(Map.empty,Map.empty)
(productions cnc)
cats = Map.mapWithKey filterCatLabels (cnccats cnc)
in cnc{ sequences = mkSetArray seqs
, cncfuns = mkSetArray funs
, productions = prods
, cnccats = cats
}
where
fid2cat fid =
case IntMap.lookup fid fid2catMap of
Just cat -> cat
Nothing -> case [fid | Just set <- [IntMap.lookup fid (productions cnc)], PCoerce fid <- Set.toList set] of
(fid:_) -> fid2cat fid
_ -> error "unknown forest id"
where
fid2catMap = IntMap.fromList [(fid,cat) | (cat,CncCat start end lbls) <- Map.toList (cnccats cnc),
fid <- [start..end]]
starts =
case Map.lookup startCat (cnccats cnc) of
Just (CncCat _ _ lbls) -> [(startCat,lbl) | lbl <- indices lbls]
Nothing -> []
allRelations =
Map.unionsWith Set.union
[rel fid prod | (fid,set) <- IntMap.toList (productions cnc),
prod <- Set.toList set]
where
rel fid (PApply funid args) = Map.fromList [((fid2cat fid,lbl),deps args seqid) | (lbl,seqid) <- assocs lin]
where
CncFun _ lin = cncfuns cnc ! funid
rel fid _ = Map.empty
deps args seqid = Set.fromList [(fid2cat (args !! r),d) | SymCat r d <- elems seq]
where
seq = sequences cnc ! seqid
-- here we create a mapping from category to an array of indices.
-- An element of the array is equal to -1 if the corresponding index
-- is not going to be used in the optimized grammar, or the new index
-- if it will be used
closure :: Map.Map CId (UArray LIndex LIndex)
closure = runST $ do
set <- initSet
addLitCat cidString set
addLitCat cidInt set
addLitCat cidFloat set
addLitCat cidVar set
closureSet set starts
doneSet set
where
initSet :: ST s (Map.Map CId (STUArray s LIndex LIndex))
initSet =
fmap Map.fromAscList $ sequence
[fmap ((,) cat) (newArray (bounds lbls) (-1))
| (cat,CncCat _ _ lbls) <- Map.toAscList (cnccats cnc)]
addLitCat cat set =
case Map.lookup cat set of
Just indices -> writeArray indices 0 0
Nothing -> return ()
closureSet set [] = return ()
closureSet set (x@(cat,index):xs) =
case Map.lookup cat set of
Just indices -> do v <- readArray indices index
writeArray indices index 0
if v < 0
then case Map.lookup x allRelations of
Just ys -> closureSet set (Set.toList ys++xs)
Nothing -> closureSet set xs
else closureSet set xs
Nothing -> error "unknown cat"
doneSet set =
fmap Map.fromAscList $ mapM done (Map.toAscList set)
where
done (cat,indices) = do
(s,e) <- getBounds indices
reindex indices s e 0
indices <- unsafeFreeze indices
return (cat,indices)
reindex indices i j k
| i <= j = do v <- readArray indices i
if v < 0
then reindex indices (i+1) j k
else writeArray indices i k >>
reindex indices (i+1) j (k+1)
| otherwise = return ()
optimize res (seqs,funs) (PApply funid args) =
let (seqs',lin') = mapAccumL addUnique seqs [amap updateSymbol (sequences cnc ! seqid) |
(lbl,seqid) <- assocs lin, indicesOf res ! lbl >= 0]
(funs',funid') = addUnique funs (CncFun fun (mkArray lin'))
in ((seqs',funs'), PApply funid' args)
where
CncFun fun lin = cncfuns cnc ! funid
indicesOf fid =
case Map.lookup (fid2cat fid) closure of
Just indices -> indices
Nothing -> error "unknown category"
addUnique seqs seq =
case Map.lookup seq seqs of
Just seqid -> (seqs,seqid)
Nothing -> let seqid = Map.size seqs
in (Map.insert seq seqid seqs, seqid)
updateSymbol (SymCat r d) = SymCat r (indicesOf (args !! r) ! d)
updateSymbol s = s
optimize res env prod = (env,prod)
filterCatLabels cat (CncCat start end lbls) =
case Map.lookup cat closure of
Just indices -> let lbls' = mkArray [lbl | (i,lbl) <- assocs lbls, indices ! i >= 0]
in CncCat start end lbls'
Nothing -> error "unknown category"
mkSetArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
mkArray lst = listArray (0,length lst-1) lst
mapAccumLSet f b set = let (b',lst) = mapAccumL f b (Set.toList set)
in (b',Set.fromList lst)
bottomUpFilter :: Concr -> Concr
bottomUpFilter cnc = cnc{productions=filterProductions IntMap.empty (productions cnc)}
filterProductions prods0 prods
| prods0 == prods1 = prods0
| otherwise = filterProductions prods1 prods
where
prods1 = IntMap.unionWith Set.union prods0 (IntMap.mapMaybe (filterProdSet prods0) prods)
filterProdSet prods0 set
| Set.null set1 = Nothing
| otherwise = Just set1
where
set1 = Set.filter (filterRule prods0) set
filterRule prods0 (PApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods0) args
filterRule prods0 (PCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods0
filterRule prods0 _ = True
updateConcrete abs cnc =
let p_prods = (filterProductions IntMap.empty . parseIndex cnc) (productions cnc)
l_prods = (linIndex cnc . filterProductions IntMap.empty) (productions cnc)
in cnc{pproductions = p_prods, lproductions = l_prods}
where
parseIndex cnc = IntMap.mapMaybeWithKey filterProdSet
where
filterProdSet fid prods
| fid `IntSet.member` ho_fids = Just prods
| otherwise = let prods' = Set.filter (not . is_ho_prod) prods
in if Set.null prods'
then Nothing
else Just prods'
is_ho_prod (PApply _ [fid]) | fid == fcatVar = True
is_ho_prod _ = False
ho_fids :: IntSet.IntSet
ho_fids = IntSet.fromList [fid | cat <- ho_cats
, fid <- maybe [] (\(CncCat s e _) -> [s..e]) (Map.lookup cat (cnccats cnc))]
ho_cats :: [CId]
ho_cats = sortNub [c | (ty,_,_) <- Map.elems (funs abs)
, h <- case ty of {DTyp hyps val _ -> hyps}
, c <- fst (catSkeleton (typeOfHypo h))]
linIndex cnc productions =
Map.fromListWith (IntMap.unionWith Set.union)
[(fun,IntMap.singleton res (Set.singleton prod)) | (res,prods) <- IntMap.toList productions
, prod <- Set.toList prods
, fun <- getFunctions prod]
where
getFunctions (PApply funid args) = let CncFun fun _ = cncfuns cnc ! funid in [fun]
getFunctions (PCoerce fid) = case IntMap.lookup fid productions of
Nothing -> []
Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod]