mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
dead code elimination for PGF. Note: the produced grammars will not work well with metavariables and high-order abstract syntax
This commit is contained in:
1
gf.cabal
1
gf.cabal
@@ -43,6 +43,7 @@ library
|
||||
PGF.Printer
|
||||
PGF.Probabilistic
|
||||
PGF.Forest
|
||||
PGF.Optimize
|
||||
GF.Data.TrieMap
|
||||
GF.Data.Utilities
|
||||
GF.Data.SortedList
|
||||
|
||||
@@ -41,6 +41,7 @@ import PGF.Check
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Macros
|
||||
import PGF.Optimize
|
||||
|
||||
|
||||
-- | Compiles a number of source files and builds a 'PGF' structure for them.
|
||||
@@ -60,7 +61,7 @@ link opts cnc gr = do
|
||||
(True, True) -> ioeIO $ putStrLn "OK"
|
||||
(False,True) -> return ()
|
||||
_ -> ioeIO $ putStrLn $ "Corrupted PGF"
|
||||
return gc
|
||||
return $ if flag optOptimizePGF opts then optimizePGF gc else gc
|
||||
Bad s -> fail s
|
||||
|
||||
batchCompile :: Options -> [FilePath] -> IOE SourceGrammar
|
||||
|
||||
@@ -5,7 +5,7 @@ import GF.Compile.Export
|
||||
import GF.Compile.GeneratePMCFG
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Macros(updateProductionIndices)
|
||||
import PGF.Optimize(updateProductionIndices)
|
||||
import PGF.Check(checkLin)
|
||||
import qualified PGF.Macros as CM
|
||||
import qualified PGF.Data as C
|
||||
|
||||
@@ -161,6 +161,7 @@ data Flags = Flags {
|
||||
optPreprocessors :: [String],
|
||||
optEncoding :: String,
|
||||
optOptimizations :: Set Optimization,
|
||||
optOptimizePGF :: Bool,
|
||||
optCFGTransforms :: Set CFGTransform,
|
||||
optLibraryPath :: [FilePath],
|
||||
optStartCat :: Maybe String,
|
||||
@@ -260,6 +261,7 @@ defaultFlags = Flags {
|
||||
optPreprocessors = [],
|
||||
optEncoding = "latin1",
|
||||
optOptimizations = Set.fromList [OptStem,OptCSE,OptExpand,OptParametrize],
|
||||
optOptimizePGF = False,
|
||||
optCFGTransforms = Set.fromList [CFGRemoveCycles, CFGBottomUpFilter,
|
||||
CFGTopDownFilter, CFGMergeIdentical],
|
||||
optLibraryPath = [],
|
||||
@@ -348,6 +350,8 @@ optDescr =
|
||||
Option [] ["unlexer"] (ReqArg unlexer "UNLEXER") "Use unlexer UNLEXER.",
|
||||
Option [] ["optimize"] (ReqArg optimize "OPT")
|
||||
"Select an optimization package. OPT = all | values | parametrize | none",
|
||||
Option [] ["optimize-pgf"] (NoArg (optimize_pgf True))
|
||||
"Enable or disable global grammar optimization. This could significantly reduce the size of the final PGF file",
|
||||
Option [] ["stem"] (onOff (toggleOptimize OptStem) True) "Perform stem-suffix analysis (default on).",
|
||||
Option [] ["cse"] (onOff (toggleOptimize OptCSE) True) "Perform common sub-expression elimination (default on).",
|
||||
Option [] ["cfg"] (ReqArg cfgTransform "TRANS") "Enable or disable specific CFG transformations. TRANS = merge, no-merge, bottomup, no-bottomup, ...",
|
||||
@@ -406,6 +410,8 @@ optDescr =
|
||||
optimize x = case lookup x optimizationPackages of
|
||||
Just p -> set $ \o -> o { optOptimizations = p }
|
||||
Nothing -> fail $ "Unknown optimization package: " ++ x
|
||||
|
||||
optimize_pgf x = set $ \o -> o { optOptimizePGF = x }
|
||||
|
||||
toggleOptimize x b = set $ setOptimization' x b
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ module GFC (mainGFC) where
|
||||
import PGF
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Optimize
|
||||
import GF.Compile
|
||||
import GF.Compile.Export
|
||||
|
||||
@@ -55,7 +56,8 @@ compileCFFiles opts fs =
|
||||
unionPGFFiles :: Options -> [FilePath] -> IOE ()
|
||||
unionPGFFiles opts fs =
|
||||
do pgfs <- mapM readPGFVerbose fs
|
||||
let pgf = foldl1 unionPGF pgfs
|
||||
let pgf0 = foldl1 unionPGF pgfs
|
||||
pgf = if flag optOptimizePGF opts then optimizePGF pgf0 else pgf0
|
||||
pgfFile = grammarName opts pgf <.> "pgf"
|
||||
if pgfFile `elem` fs
|
||||
then putStrLnE $ "Refusing to overwrite " ++ pgfFile
|
||||
|
||||
@@ -2,7 +2,7 @@ module PGF.Binary where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Macros
|
||||
import PGF.Optimize
|
||||
import Data.Binary
|
||||
import Data.Binary.Put
|
||||
import Data.Binary.Get
|
||||
|
||||
@@ -10,7 +10,6 @@ import qualified Data.IntSet as IntSet
|
||||
import qualified Data.Array as Array
|
||||
import Data.Maybe
|
||||
import Data.List
|
||||
import GF.Data.Utilities(sortNub)
|
||||
import Text.PrettyPrint
|
||||
|
||||
-- operations for manipulating PGF grammars and objects
|
||||
@@ -148,62 +147,6 @@ cidVar = mkCId "__gfVar"
|
||||
_B = mkCId "__gfB"
|
||||
_V = mkCId "__gfV"
|
||||
|
||||
updateProductionIndices :: PGF -> PGF
|
||||
updateProductionIndices pgf = pgf{ concretes = fmap updateConcrete (concretes pgf) }
|
||||
where
|
||||
updateConcrete cnc =
|
||||
let p_prods = (filterProductions IntMap.empty . parseIndex cnc) (productions cnc)
|
||||
l_prods = (linIndex cnc . filterProductions IntMap.empty) (productions cnc)
|
||||
in cnc{pproductions = p_prods, lproductions = l_prods}
|
||||
|
||||
filterProductions prods0 prods
|
||||
| prods0 == prods1 = prods0
|
||||
| otherwise = filterProductions prods1 prods
|
||||
where
|
||||
prods1 = IntMap.unionWith Set.union prods0 (IntMap.mapMaybe (filterProdSet prods0) prods)
|
||||
|
||||
filterProdSet prods0 set
|
||||
| Set.null set1 = Nothing
|
||||
| otherwise = Just set1
|
||||
where
|
||||
set1 = Set.filter (filterRule prods0) set
|
||||
|
||||
filterRule prods0 (PApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods0) args
|
||||
filterRule prods0 (PCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods0
|
||||
filterRule prods0 _ = True
|
||||
|
||||
parseIndex cnc = IntMap.mapMaybeWithKey filterProdSet
|
||||
where
|
||||
filterProdSet fid prods
|
||||
| fid `IntSet.member` ho_fids = Just prods
|
||||
| otherwise = let prods' = Set.filter (not . is_ho_prod) prods
|
||||
in if Set.null prods'
|
||||
then Nothing
|
||||
else Just prods'
|
||||
|
||||
is_ho_prod (PApply _ [fid]) | fid == fcatVar = True
|
||||
is_ho_prod _ = False
|
||||
|
||||
ho_fids :: IntSet.IntSet
|
||||
ho_fids = IntSet.fromList [fid | cat <- ho_cats
|
||||
, fid <- maybe [] (\(CncCat s e _) -> [s..e]) (Map.lookup cat (cnccats cnc))]
|
||||
|
||||
ho_cats :: [CId]
|
||||
ho_cats = sortNub [c | (ty,_,_) <- Map.elems (funs (abstract pgf))
|
||||
, h <- case ty of {DTyp hyps val _ -> hyps}
|
||||
, c <- fst (catSkeleton (typeOfHypo h))]
|
||||
|
||||
linIndex cnc productions =
|
||||
Map.fromListWith (IntMap.unionWith Set.union)
|
||||
[(fun,IntMap.singleton res (Set.singleton prod)) | (res,prods) <- IntMap.toList productions
|
||||
, prod <- Set.toList prods
|
||||
, fun <- getFunctions prod]
|
||||
where
|
||||
getFunctions (PApply funid args) = let CncFun fun _ = cncfuns cnc Array.! funid in [fun]
|
||||
getFunctions (PCoerce fid) = case IntMap.lookup fid productions of
|
||||
Nothing -> []
|
||||
Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod]
|
||||
|
||||
|
||||
-- Utilities for doing linearization
|
||||
|
||||
|
||||
215
src/runtime/haskell/PGF/Optimize.hs
Normal file
215
src/runtime/haskell/PGF/Optimize.hs
Normal file
@@ -0,0 +1,215 @@
|
||||
module PGF.Optimize
|
||||
( optimizePGF
|
||||
, updateProductionIndices
|
||||
) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Macros
|
||||
import Data.Maybe
|
||||
import Data.List (mapAccumL, nub)
|
||||
import Data.Array.IArray
|
||||
import Data.Array.MArray
|
||||
import Data.Array.ST
|
||||
import Data.Array.Unboxed
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.IntSet as IntSet
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Control.Monad.ST
|
||||
import GF.Data.Utilities(sortNub)
|
||||
|
||||
optimizePGF :: PGF -> PGF
|
||||
optimizePGF pgf = pgf{concretes=fmap (updateConcrete (abstract pgf) .
|
||||
topDownFilter (lookStartCat pgf) .
|
||||
bottomUpFilter ) (concretes pgf)}
|
||||
|
||||
updateProductionIndices :: PGF -> PGF
|
||||
updateProductionIndices pgf = pgf{concretes = fmap (updateConcrete (abstract pgf)) (concretes pgf)}
|
||||
|
||||
topDownFilter :: CId -> Concr -> Concr
|
||||
topDownFilter startCat cnc =
|
||||
let ((seqs,funs),prods) = IntMap.mapAccumWithKey (\env res set -> mapAccumLSet (optimize res) env set)
|
||||
(Map.empty,Map.empty)
|
||||
(productions cnc)
|
||||
cats = Map.mapWithKey filterCatLabels (cnccats cnc)
|
||||
in cnc{ sequences = mkSetArray seqs
|
||||
, cncfuns = mkSetArray funs
|
||||
, productions = prods
|
||||
, cnccats = cats
|
||||
}
|
||||
where
|
||||
fid2cat fid =
|
||||
case IntMap.lookup fid fid2catMap of
|
||||
Just cat -> cat
|
||||
Nothing -> case [fid | Just set <- [IntMap.lookup fid (productions cnc)], PCoerce fid <- Set.toList set] of
|
||||
(fid:_) -> fid2cat fid
|
||||
_ -> error "unknown forest id"
|
||||
where
|
||||
fid2catMap = IntMap.fromList [(fid,cat) | (cat,CncCat start end lbls) <- Map.toList (cnccats cnc),
|
||||
fid <- [start..end]]
|
||||
|
||||
starts =
|
||||
case Map.lookup startCat (cnccats cnc) of
|
||||
Just (CncCat _ _ lbls) -> [(startCat,lbl) | lbl <- indices lbls]
|
||||
Nothing -> []
|
||||
|
||||
allRelations =
|
||||
Map.unionsWith Set.union
|
||||
[rel fid prod | (fid,set) <- IntMap.toList (productions cnc),
|
||||
prod <- Set.toList set]
|
||||
where
|
||||
rel fid (PApply funid args) = Map.fromList [((fid2cat fid,lbl),deps args seqid) | (lbl,seqid) <- assocs lin]
|
||||
where
|
||||
CncFun _ lin = cncfuns cnc ! funid
|
||||
rel fid _ = Map.empty
|
||||
|
||||
deps args seqid = Set.fromList [(fid2cat (args !! r),d) | SymCat r d <- elems seq]
|
||||
where
|
||||
seq = sequences cnc ! seqid
|
||||
|
||||
-- here we create a mapping from category to an array of indices.
|
||||
-- An element of the array is equal to -1 if the corresponding index
|
||||
-- is not going to be used in the optimized grammar, or the new index
|
||||
-- if it will be used
|
||||
closure :: Map.Map CId (UArray LIndex LIndex)
|
||||
closure = runST $ do
|
||||
set <- initSet
|
||||
addLitCat cidString set
|
||||
addLitCat cidInt set
|
||||
addLitCat cidFloat set
|
||||
addLitCat cidVar set
|
||||
closureSet set starts
|
||||
doneSet set
|
||||
where
|
||||
initSet :: ST s (Map.Map CId (STUArray s LIndex LIndex))
|
||||
initSet =
|
||||
fmap Map.fromAscList $ sequence
|
||||
[fmap ((,) cat) (newArray (bounds lbls) (-1))
|
||||
| (cat,CncCat _ _ lbls) <- Map.toAscList (cnccats cnc)]
|
||||
|
||||
addLitCat cat set =
|
||||
case Map.lookup cat set of
|
||||
Just indices -> writeArray indices 0 0
|
||||
Nothing -> return ()
|
||||
|
||||
closureSet set [] = return ()
|
||||
closureSet set (x@(cat,index):xs) =
|
||||
case Map.lookup cat set of
|
||||
Just indices -> do v <- readArray indices index
|
||||
writeArray indices index 0
|
||||
if v < 0
|
||||
then case Map.lookup x allRelations of
|
||||
Just ys -> closureSet set (Set.toList ys++xs)
|
||||
Nothing -> closureSet set xs
|
||||
else closureSet set xs
|
||||
Nothing -> error "unknown cat"
|
||||
|
||||
doneSet set =
|
||||
fmap Map.fromAscList $ mapM done (Map.toAscList set)
|
||||
where
|
||||
done (cat,indices) = do
|
||||
(s,e) <- getBounds indices
|
||||
reindex indices s e 0
|
||||
indices <- unsafeFreeze indices
|
||||
return (cat,indices)
|
||||
|
||||
reindex indices i j k
|
||||
| i <= j = do v <- readArray indices i
|
||||
if v < 0
|
||||
then reindex indices (i+1) j k
|
||||
else writeArray indices i k >>
|
||||
reindex indices (i+1) j (k+1)
|
||||
| otherwise = return ()
|
||||
|
||||
optimize res (seqs,funs) (PApply funid args) =
|
||||
let (seqs',lin') = mapAccumL addUnique seqs [amap updateSymbol (sequences cnc ! seqid) |
|
||||
(lbl,seqid) <- assocs lin, indicesOf res ! lbl >= 0]
|
||||
(funs',funid') = addUnique funs (CncFun fun (mkArray lin'))
|
||||
in ((seqs',funs'), PApply funid' args)
|
||||
where
|
||||
CncFun fun lin = cncfuns cnc ! funid
|
||||
|
||||
indicesOf fid =
|
||||
case Map.lookup (fid2cat fid) closure of
|
||||
Just indices -> indices
|
||||
Nothing -> error "unknown category"
|
||||
|
||||
addUnique seqs seq =
|
||||
case Map.lookup seq seqs of
|
||||
Just seqid -> (seqs,seqid)
|
||||
Nothing -> let seqid = Map.size seqs
|
||||
in (Map.insert seq seqid seqs, seqid)
|
||||
|
||||
updateSymbol (SymCat r d) = SymCat r (indicesOf (args !! r) ! d)
|
||||
updateSymbol s = s
|
||||
optimize res env prod = (env,prod)
|
||||
|
||||
filterCatLabels cat (CncCat start end lbls) =
|
||||
case Map.lookup cat closure of
|
||||
Just indices -> let lbls' = mkArray [lbl | (i,lbl) <- assocs lbls, indices ! i >= 0]
|
||||
in CncCat start end lbls'
|
||||
Nothing -> error "unknown category"
|
||||
|
||||
mkSetArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
|
||||
mkArray lst = listArray (0,length lst-1) lst
|
||||
|
||||
mapAccumLSet f b set = let (b',lst) = mapAccumL f b (Set.toList set)
|
||||
in (b',Set.fromList lst)
|
||||
|
||||
|
||||
bottomUpFilter :: Concr -> Concr
|
||||
bottomUpFilter cnc = cnc{productions=filterProductions IntMap.empty (productions cnc)}
|
||||
|
||||
filterProductions prods0 prods
|
||||
| prods0 == prods1 = prods0
|
||||
| otherwise = filterProductions prods1 prods
|
||||
where
|
||||
prods1 = IntMap.unionWith Set.union prods0 (IntMap.mapMaybe (filterProdSet prods0) prods)
|
||||
|
||||
filterProdSet prods0 set
|
||||
| Set.null set1 = Nothing
|
||||
| otherwise = Just set1
|
||||
where
|
||||
set1 = Set.filter (filterRule prods0) set
|
||||
|
||||
filterRule prods0 (PApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods0) args
|
||||
filterRule prods0 (PCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods0
|
||||
filterRule prods0 _ = True
|
||||
|
||||
updateConcrete abs cnc =
|
||||
let p_prods = (filterProductions IntMap.empty . parseIndex cnc) (productions cnc)
|
||||
l_prods = (linIndex cnc . filterProductions IntMap.empty) (productions cnc)
|
||||
in cnc{pproductions = p_prods, lproductions = l_prods}
|
||||
where
|
||||
parseIndex cnc = IntMap.mapMaybeWithKey filterProdSet
|
||||
where
|
||||
filterProdSet fid prods
|
||||
| fid `IntSet.member` ho_fids = Just prods
|
||||
| otherwise = let prods' = Set.filter (not . is_ho_prod) prods
|
||||
in if Set.null prods'
|
||||
then Nothing
|
||||
else Just prods'
|
||||
|
||||
is_ho_prod (PApply _ [fid]) | fid == fcatVar = True
|
||||
is_ho_prod _ = False
|
||||
|
||||
ho_fids :: IntSet.IntSet
|
||||
ho_fids = IntSet.fromList [fid | cat <- ho_cats
|
||||
, fid <- maybe [] (\(CncCat s e _) -> [s..e]) (Map.lookup cat (cnccats cnc))]
|
||||
|
||||
ho_cats :: [CId]
|
||||
ho_cats = sortNub [c | (ty,_,_) <- Map.elems (funs abs)
|
||||
, h <- case ty of {DTyp hyps val _ -> hyps}
|
||||
, c <- fst (catSkeleton (typeOfHypo h))]
|
||||
|
||||
linIndex cnc productions =
|
||||
Map.fromListWith (IntMap.unionWith Set.union)
|
||||
[(fun,IntMap.singleton res (Set.singleton prod)) | (res,prods) <- IntMap.toList productions
|
||||
, prod <- Set.toList prods
|
||||
, fun <- getFunctions prod]
|
||||
where
|
||||
getFunctions (PApply funid args) = let CncFun fun _ = cncfuns cnc ! funid in [fun]
|
||||
getFunctions (PCoerce fid) = case IntMap.lookup fid productions of
|
||||
Nothing -> []
|
||||
Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod]
|
||||
Reference in New Issue
Block a user