diff --git a/src/GF/Canon/Subexpressions.hs b/src/GF/Canon/Subexpressions.hs index 0bc8d546d..6d351a0b2 100644 --- a/src/GF/Canon/Subexpressions.hs +++ b/src/GF/Canon/Subexpressions.hs @@ -5,15 +5,17 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/09/18 22:55:46 $ +-- > CVS $Date: 2005/09/19 10:05:48 $ -- > CVS $Author: aarne $ --- > CVS $Revision: 1.1 $ +-- > CVS $Revision: 1.2 $ -- -- Common subexpression elimination. -- all tables. AR 18\/9\/2005. ----------------------------------------------------------------------------- -module GF.Canon.Subexpressions where -- (subelimCanon) where +module GF.Canon.Subexpressions ( + elimSubtermsMod, prSubtermStat, unSubelimCanon + ) where import GF.Canon.AbsGFC import GF.Infra.Ident @@ -28,82 +30,118 @@ import Control.Monad import Data.FiniteMap import Data.List -type TermList = FiniteMap Term (Int,Int) -- number of occs, id +{- +This module implements a simple common subexpression elimination + for gfc grammars, to factor out shared subterms in lin rules. +It works in three phases: -type TermM a = STM (TermList,Int) a + (1) collectSubterms collects recursively all subterms of forms table and (P x..y) + from lin definitions (experience shows that only these forms + tend to get shared) and counts how many times they occur + (2) addSubexpConsts takes those subterms t that occur more than once + and creates definitions of form "oper A''n = t" where n is a + fresh number; notice that we assume no ids of this form are in + scope otherwise + (3) elimSubtermsMod goes through lins and the created opers by replacing largest + possible subterms by the newly created identifiers + +The optimization is invoked in gf by the flag i -subs. + +If an application does not support GFC opers, the effect of this +optimization can be undone by the function unSubelimCanon. + +The function unSubelimCanon can be used to diagnostisize how much +cse is possible in the grammar. It is used by the flag pg -printer=subs. + +-} + +-- exported functions + +elimSubtermsMod :: (Ident,CanonModInfo) -> Err (Ident, CanonModInfo) +elimSubtermsMod (mo,m) = case m of + M.ModMod (M.Module mt st fs me ops js) -> do + (tree,_) <- appSTM (getSubtermsMod mo (tree2list js)) (emptyFM,0) + js2 <- liftM buildTree $ addSubexpConsts mo tree $ tree2list js + return (mo,M.ModMod (M.Module mt st fs me ops js2)) + _ -> return (mo,m) prSubtermStat :: CanonGrammar -> String prSubtermStat gr = unlines [prt mo ++++ expsIn mo js | (mo,js) <- mos] where mos = [(i, tree2list (M.jments m)) | (i, M.ModMod m) <- M.modules gr, M.isModCnc m] expsIn mo js = err id id $ do - (js', (tree,nu)) <- appSTM (getSubtermsMod mo js) (emptyFM,0) - let list0 = filter ((>1) . fst . snd) $ fmToList tree - let list1 = sortBy (\ (_,(m,_)) (_,(n,_)) -> compare n m) list0 - return $ unlines [show n ++ "\t" ++ prt trm | (trm,(n,_)) <- list1] + (tree,_) <- appSTM (getSubtermsMod mo js) (emptyFM,0) + let list0 = fmToList tree + let list1 = sortBy (\ (_,(m,_)) (_,(n,_)) -> compare n m) list0 + return $ unlines [show n ++ "\t" ++ prt trm | (trm,(n,_)) <- list1] -elimSubtermsMod :: (Ident,CanonModInfo) -> Err (Ident, CanonModInfo) -elimSubtermsMod (mo,m) = case m of - M.ModMod (M.Module mt st fs me ops js) -> do - (js',(tree,_)) <- appSTM (getSubtermsMod mo (tree2list js)) (emptyFM,0) - js2 <- liftM buildTree $ addSubexpConsts tree js' - return (mo,M.ModMod (M.Module mt st fs me ops js2)) - _ -> return (mo,m) +unSubelimCanon :: CanonGrammar -> CanonGrammar +unSubelimCanon gr@(M.MGrammar modules) = + M.MGrammar $ map unparModule modules where + unparModule (i,m) = case m of + M.ModMod (M.Module mt@(M.MTConcrete _) st fs me ops js) -> + (i, M.ModMod (M.Module mt st fs me ops (mapTree unparInfo js))) + _ -> (i,m) + unparInfo (c,info) = case info of + CncFun k xs t m -> (c, CncFun k xs (unparTerm t) m) + _ -> (c,info) + unparTerm t = case t of + I c -> errVal t $ liftM unparTerm $ lookupGlobal gr c + _ -> C.composSafeOp unparTerm t -addSubexpConsts :: FiniteMap Term (Int,Int) -> [(Ident,Info)] -> Err [(Ident,Info)] -addSubexpConsts tree lins = do - let opers = [oper id trm | (trm,(nu,id)) <- list, nu > 1] - mapM filterOne $ opers ++ lins +-- implementation + +type TermList = FiniteMap Term (Int,Int) -- number of occs, id +type TermM a = STM (TermList,Int) a + +addSubexpConsts :: Ident -> FiniteMap Term (Int,Int) -> [(Ident,Info)] -> Err [(Ident,Info)] +addSubexpConsts mo tree lins = do + let opers = [oper id trm | (trm,(_,id)) <- list] + mapM mkOne $ opers ++ lins where - filterOne (f,def) = case def of + mkOne (f,def) = case def of CncFun ci xs trm pn -> do - trm' <- recomp trm + trm' <- recomp f trm return (f,CncFun ci xs trm' pn) ResOper ty trm -> do - trm' <- recomp trm + trm' <- recomp f trm return (f,ResOper ty trm') _ -> return (f,def) - recomp t = case t of - I (CIQ _ e) -> do - (nu,exp) <- getCount e - if nu > 1 then return t else recomp exp - _ -> composOp recomp t + recomp f t = case lookupFM tree t of + Just (_,id) | ident id /= f -> return $ I $ cident mo id + _ -> composOp (recomp f) t + list = fmToList tree - tree' = listToFM $ map (\ (e, (nu,id)) -> (ident id,(nu,e))) $ list - getCount e = case lookupFM tree' e of - Just v -> return v - _ -> return (2,undefined) --- global from elsewhere: keep + oper id trm = (ident id, ResOper TStr trm) --- type TStr does not matter -getSubtermsMod :: Ident -> [(Ident,Info)] -> TermM [(Ident,Info)] +getSubtermsMod :: Ident -> [(Ident,Info)] -> TermM (FiniteMap Term (Int,Int)) getSubtermsMod mo js = do - js' <- mapM getInfo js - tryAgain - tryAgain --- do twice instead of fixpoint iteration - return js' ---- + mapM (getInfo (collectSubterms mo)) js + (tree0,_) <- readSTM + return $ filterFM (\_ (nu,_) -> nu > 1) tree0 where - getInfo fi@(f,i) = case i of + getInfo get fi@(f,i) = case i of CncFun ci xs trm pn -> do - trm' <- getSubterms mo trm - return $ (f,CncFun ci xs trm' pn) + get trm + return $ fi + ResOper ty trm -> do + get trm + return $ fi _ -> return fi - tryAgain = do - (ts,i) <- readSTM - let trms = map fst $ fmToList ts - mapM (getSubtermsAgain mo) trms - (ts',i') <- readSTM - if False ---- i' > i || count ts' > count ts - then tryAgain - else return () - count = sum . map (fst . snd) . fmToList -- how many subterms there are -getSubterms :: Ident -> Term -> TermM Term -getSubterms mo t = case t of +collectSubterms :: Ident -> Term -> TermM Term +collectSubterms mo t = case t of Par _ (_:_) -> add t - T _ cs -> add t - V _ ts -> add t + T ty cs -> do + let (ps,ts) = unzip [(p,t) | Cas p t <- cs] + mapM (collectSubterms mo) ts + add t + V ty ts -> do + mapM (collectSubterms mo) ts + add t K (KP _ _) -> add t - _ -> composOp (getSubterms mo) t + _ -> composOp (collectSubterms mo) t where add t = do (ts,i) <- readSTM @@ -112,41 +150,10 @@ getSubterms mo t = case t of Just (nu,id) -> ((nu+1,id), i) _ -> ((1, i ), i+1) writeSTM (addToFM ts t (count,id), next) - return $ I $ cident mo id - --- this is used in later phases of iteration -getSubtermsAgain :: Ident -> Term -> TermM Term -getSubtermsAgain mo t = case t of - T ty cs -> do - let (ps,ts) = unzip [(p,t) | Cas p t <- cs] - ts' <- mapM (getSubterms mo) ts - return $ T ty $ [Cas p t | (p,t) <- zip ps ts'] - V ty ts -> do - liftM (V ty) $ mapM (getSubterms mo) ts - Par _ _ -> return t - K _ -> return t - _ -> getSubterms mo t + return t --- only because of composOp ident :: Int -> Ident ident i = identC ("A''" ++ show i) --- cident :: Ident -> Int -> CIdent cident mo = CIQ mo . ident - - -unSubelimCanon :: CanonGrammar -> CanonGrammar -unSubelimCanon gr@(M.MGrammar modules) = - M.MGrammar $ map unparModule modules where - - unparModule (i,m) = case m of - M.ModMod (M.Module mt@(M.MTConcrete _) st fs me ops js) -> - (i, M.ModMod (M.Module mt st fs me ops (mapTree unparInfo js))) - _ -> (i,m) - - unparInfo (c,info) = case info of - CncFun k xs t m -> (c, CncFun k xs (unparTerm t) m) - _ -> (c,info) - - unparTerm t = case t of - I c -> errVal t $ liftM unparTerm $ lookupGlobal gr c - _ -> C.composSafeOp unparTerm t