Compare commits

..

2 Commits

Author SHA1 Message Date
John J. Camilleri
3e2673de3b Use modify instead of insert: results mildly better but not significantly 2021-03-16 16:45:57 +01:00
John J. Camilleri
6c6a201d96 Introduce state with Map for caching compilation, but results are worse 2021-03-12 13:39:56 +01:00
5 changed files with 419 additions and 236 deletions

View File

@@ -15,14 +15,14 @@ import GF.Infra.UseIO (IOE)
import GF.Text.Pretty (pp, render)
import Control.Applicative ((<|>))
import Control.Monad (when, unless, forM, forM_)
import qualified Control.Monad.State as CMS
import Data.Either (lefts, rights)
import qualified Data.IntMap as IntMap
import Control.Monad (when, forM, forM_)
import qualified Control.Monad.State.Strict as CMS
import Data.List (elemIndex)
import qualified Data.List as L
import qualified Data.Map.Strict as Map
import Data.Maybe (fromJust, isJust)
import Data.Text (Text)
import qualified Data.Text as T
import System.Environment (lookupEnv)
import System.FilePath ((</>), (<.>))
import Text.Printf (printf)
@@ -37,7 +37,7 @@ mkCanon2lpgf opts gr am = do
ppCanonical debugDir canon
dumpCanonical debugDir canon
(an,abs) <- mkAbstract ab
cncs <- mapM (mkConcrete debug) cncs
cncs <- mapM (mkConcrete debug ab) cncs
let lpgf = LPGF {
L.absname = an,
L.abstract = abs,
@@ -48,247 +48,268 @@ mkCanon2lpgf opts gr am = do
where
canon@(C.Grammar ab cncs) = grammar2canonical opts am gr
mkAbstract :: (ErrorMonad err) => C.Abstract -> err (CId, L.Abstract)
mkAbstract (C.Abstract modId flags cats funs) = return (mdi2i modId, L.Abstract {})
mkAbstract :: (ErrorMonad err) => C.Abstract -> err (CId, L.Abstract)
mkAbstract (C.Abstract modId flags cats funs) = return (mdi2i modId, L.Abstract {})
mkConcrete :: (ErrorMonad err) => Bool -> C.Concrete -> err (CId, L.Concrete)
mkConcrete debug (C.Concrete modId absModId flags params' lincats lindefs) = do
let
(C.Abstract _ _ _ funs) = ab
params = inlineParamAliases params'
mkConcrete :: (ErrorMonad err) => Bool -> C.Abstract -> C.Concrete -> err (CId, L.Concrete)
mkConcrete debug (C.Abstract _ _ _ funs) (C.Concrete modId absModId flags params0 lincats lindefs0) = do
let
-- Some transformations on canonical grammar
-- Builds maps for lookups
params :: [C.ParamDef]
params = inlineParamAliases params0
paramValueMap :: Map.Map C.ParamId C.ParamDef -- constructor -> definition
paramValueMap = Map.fromList [ (v,d) | d@(C.ParamDef _ vs) <- params, (C.Param v _) <- vs ]
lindefs :: [C.LinDef]
lindefs =
[ C.LinDef funId varIds linValue'
| (C.LinDef funId varIds linValue) <- lindefs0
, let Right linType = lookupLinType funId
, let linValue' = cleanupRecordFields linValue linType
]
lincatMap :: Map.Map C.CatId C.LincatDef
lincatMap = Map.fromList [ (cid,d) | d@(C.LincatDef cid _) <- lincats ]
funMap :: Map.Map C.FunId C.FunDef
funMap = Map.fromList [ (fid,d) | d@(C.FunDef fid _) <- funs ]
-- | Lookup paramdef, providing dummy fallback when not found
-- Workaround for https://github.com/GrammaticalFramework/gf-core/issues/100
lookupParamDef :: C.ParamId -> Either String C.ParamDef
lookupParamDef pid = case Map.lookup pid paramValueMap of
Just d -> Right d
Nothing ->
-- Left $ printf "Cannot find param definition: %s" (show pid)
Right $ C.ParamDef (C.ParamId (C.Unqual "DUMMY")) [C.Param pid []]
-- | Lookup lintype for a function
lookupLinType :: C.FunId -> Either String C.LinType
lookupLinType funId = do
fun <- m2e (printf "Cannot find type for: %s" (show funId)) (Map.lookup funId funMap)
let (C.FunDef _ (C.Type _ (C.TypeApp catId _))) = fun
lincat <- m2e (printf "Cannot find lincat for: %s" (show catId)) (Map.lookup catId lincatMap)
let (C.LincatDef _ lt) = lincat
return lt
-- | Lookup lintype for a function's argument
lookupLinTypeArg :: C.FunId -> Int -> Either String C.LinType
lookupLinTypeArg funId argIx = do
fun <- m2e (printf "Cannot find type for: %s" (show funId)) (Map.lookup funId funMap)
let (C.FunDef _ (C.Type args _)) = fun
let (C.TypeBinding _ (C.Type _ (C.TypeApp catId _))) = args !! argIx
lincat <- m2e (printf "Cannot find lincat for: %s" (show catId)) (Map.lookup catId lincatMap)
let (C.LincatDef _ lt) = lincat
return lt
-- Filter out record fields from definitions which don't appear in lincat.
-- Workaround for https://github.com/GrammaticalFramework/gf-core/issues/101
cleanupRecordFields :: C.LinValue -> C.LinType -> C.LinValue
cleanupRecordFields (C.RecordValue rrvs) (C.RecordType rrs) =
let defnFields = Map.fromList [ (lid, lt) | (C.RecordRow lid lt) <- rrs ]
in C.RecordValue
[ C.RecordRow lid lv'
| C.RecordRow lid lv <- rrvs
, Map.member lid defnFields
, let Just lt = Map.lookup lid defnFields
, let lv' = cleanupRecordFields lv lt
]
cleanupRecordFields lv _ = lv
lindefs' =
[ C.LinDef funId varIds linValue'
| (C.LinDef funId varIds linValue) <- lindefs
, let Right linType = lookupLinType funId
, let linValue' = cleanupRecordFields linValue linType
-- Filter out record fields from definitions which don't appear in lincat.
-- Workaround for https://github.com/GrammaticalFramework/gf-core/issues/101
cleanupRecordFields :: C.LinValue -> C.LinType -> C.LinValue
cleanupRecordFields (C.RecordValue rrvs) (C.RecordType rrs) =
let defnFields = Map.fromList [ (lid, lt) | (C.RecordRow lid lt) <- rrs ]
in C.RecordValue
[ C.RecordRow lid lv'
| C.RecordRow lid lv <- rrvs
, Map.member lid defnFields
, let Just lt = Map.lookup lid defnFields
, let lv' = cleanupRecordFields lv lt
]
es = map mkLin lindefs'
lins = Map.fromList $ rights es
cleanupRecordFields lv _ = lv
-- | Main code generation function
mkLin :: C.LinDef -> Either String (CId, L.LinFun)
mkLin (C.LinDef funId varIds linValue) = do
-- when debug $ trace funId
(lf, _) <- val2lin linValue
return (fi2i funId, lf)
where
val2lin :: C.LinValue -> Either String (L.LinFun, Maybe C.LinType)
val2lin lv = case lv of
-- Builds maps for lookups
C.ConcatValue v1 v2 -> do
(v1',t1) <- val2lin v1
(v2',t2) <- val2lin v2
return (L.Concat v1' v2', t1 <|> t2) -- t1 else t2
paramValueMap :: Map.Map C.ParamId C.ParamDef -- constructor -> definition
paramValueMap = Map.fromList [ (v,d) | d@(C.ParamDef _ vs) <- params, (C.Param v _) <- vs ]
C.LiteralValue ll -> case ll of
C.FloatConstant f -> return (L.Token $ show f, Just C.FloatType)
C.IntConstant i -> return (L.Token $ show i, Just C.IntType)
C.StrConstant s -> return (L.Token s, Just C.StrType)
lincatMap :: Map.Map C.CatId C.LincatDef
lincatMap = Map.fromList [ (cid,d) | d@(C.LincatDef cid _) <- lincats ]
C.ErrorValue err -> return (L.Error err, Nothing)
funMap :: Map.Map C.FunId C.FunDef
funMap = Map.fromList [ (fid,d) | d@(C.FunDef fid _) <- funs ]
C.ParamConstant (C.Param pid lvs) -> do
let
collectProjections :: C.LinValue -> Either String [L.LinFun]
collectProjections (C.ParamConstant (C.Param pid lvs)) = do
def <- lookupParamDef pid
let (C.ParamDef tpid defpids) = def
pidIx <- eitherElemIndex pid [ p | C.Param p _ <- defpids ]
rest <- mapM collectProjections lvs
return $ L.Ix (pidIx+1) : concat rest
collectProjections lv = do
(lf,_) <- val2lin lv
return [lf]
lfs <- collectProjections lv
let term = L.Tuple lfs
def <- lookupParamDef pid
let (C.ParamDef tpid _) = def
return (term, Just $ C.ParamType (C.ParamTypeId tpid))
-- | Lookup paramdef, providing dummy fallback when not found
-- Workaround for https://github.com/GrammaticalFramework/gf-core/issues/100
lookupParamDef :: C.ParamId -> Either String C.ParamDef
lookupParamDef pid = case Map.lookup pid paramValueMap of
Just d -> Right d
Nothing ->
-- Left $ printf "Cannot find param definition: %s" (show pid)
Right $ C.ParamDef (C.ParamId (C.Unqual "DUMMY")) [C.Param pid []]
C.PredefValue (C.PredefId pid) -> case pid of
"BIND" -> return (L.Bind, Nothing)
"SOFT_BIND" -> return (L.Bind, Nothing)
"SOFT_SPACE" -> return (L.Space, Nothing)
"CAPIT" -> return (L.Capit, Nothing)
"ALL_CAPIT" -> return (L.AllCapit, Nothing)
_ -> Left $ printf "Unknown predef function: %s" pid
-- | Lookup lintype for a function
lookupLinType :: C.FunId -> Either String C.LinType
lookupLinType funId = do
fun <- m2e (printf "Cannot find type for: %s" (show funId)) (Map.lookup funId funMap)
let (C.FunDef _ (C.Type _ (C.TypeApp catId _))) = fun
lincat <- m2e (printf "Cannot find lincat for: %s" (show catId)) (Map.lookup catId lincatMap)
let (C.LincatDef _ lt) = lincat
return lt
C.RecordValue rrvs -> do
let rrvs' = sortRecordRows rrvs
ts <- sequence [ val2lin lv | C.RecordRow lid lv <- rrvs' ]
return (L.Tuple (map fst ts), Just $ C.RecordType [ C.RecordRow lid lt | (C.RecordRow lid _, (_, Just lt)) <- zip rrvs' ts])
-- | Lookup lintype for a function's argument
lookupLinTypeArg :: C.FunId -> Int -> Either String C.LinType
lookupLinTypeArg funId argIx = do
fun <- m2e (printf "Cannot find type for: %s" (show funId)) (Map.lookup funId funMap)
let (C.FunDef _ (C.Type args _)) = fun
let (C.TypeBinding _ (C.Type _ (C.TypeApp catId _))) = args !! argIx
lincat <- m2e (printf "Cannot find lincat for: %s" (show catId)) (Map.lookup catId lincatMap)
let (C.LincatDef _ lt) = lincat
return lt
C.TableValue lt trvs -> do
-- group the rows by "left-most" value
let
groupRow :: C.TableRowValue -> C.TableRowValue -> Bool
groupRow (C.TableRow p1 _) (C.TableRow p2 _) = groupPattern p1 p2
-- Code generation
groupPattern :: C.LinPattern -> C.LinPattern -> Bool
groupPattern p1 p2 = case (p1,p2) of
(C.ParamPattern (C.Param pid1 _), C.ParamPattern (C.Param pid2 _)) -> pid1 == pid2 -- compare only constructors
(C.RecordPattern (C.RecordRow lid1 patt1:_), C.RecordPattern (C.RecordRow lid2 patt2:_)) -> groupPattern patt1 patt2 -- lid1 == lid2 necessarily
_ -> error $ printf "Mismatched patterns in grouping:\n%s\n%s" (show p1) (show p2)
-- | Main code generation function
mkLin :: C.LinDef -> CodeGen (CId, L.LinFun)
mkLin (C.LinDef funId varIds linValue) = do
-- when debug $ trace funId
(lf, _) <- val2lin' linValue --skip memoisation at top level
return (fi2i funId, lf)
where
val2lin :: C.LinValue -> CodeGen (L.LinFun, Maybe C.LinType)
val2lin lv@(C.TableValue _ _) = do
-- val2lin lv@(C.ParamConstant _) = do
m <- CMS.get
case Map.lookup lv m of
Just r -> return r
Nothing -> do
r <- val2lin' lv
CMS.modify (Map.insert lv r)
return r
val2lin lv = val2lin' lv
grps :: [[C.TableRowValue]]
grps = L.groupBy groupRow trvs
val2lin' :: C.LinValue -> CodeGen (L.LinFun, Maybe C.LinType)
val2lin' lv = case lv of
-- remove one level of depth and recurse
let
handleGroup :: [C.TableRowValue] -> Either String (L.LinFun, Maybe C.LinType)
handleGroup [C.TableRow patt lv] =
C.ConcatValue v1 v2 -> do
(v1',t1) <- val2lin v1
(v2',t2) <- val2lin v2
return (L.Concat v1' v2', t1 <|> t2) -- t1 else t2
C.LiteralValue ll -> case ll of
C.FloatConstant f -> return (L.Token $ T.pack $ show f, Just C.FloatType)
C.IntConstant i -> return (L.Token $ T.pack $ show i, Just C.IntType)
C.StrConstant s -> return (L.Token $ T.pack s, Just C.StrType)
C.ErrorValue err -> return (L.Error err, Nothing)
C.ParamConstant (C.Param pid lvs) -> do
let
collectProjections :: C.LinValue -> CodeGen [L.LinFun]
collectProjections (C.ParamConstant (C.Param pid lvs)) = do
def <- CMS.lift $ lookupParamDef pid
let (C.ParamDef tpid defpids) = def
pidIx <- CMS.lift $ eitherElemIndex pid [ p | C.Param p _ <- defpids ]
rest <- mapM collectProjections lvs
return $ L.Ix (pidIx+1) : concat rest
collectProjections lv = do
(lf,_) <- val2lin lv
return [lf]
lfs <- collectProjections lv
let term = L.Tuple lfs
def <- CMS.lift $ lookupParamDef pid
let (C.ParamDef tpid _) = def
return (term, Just $ C.ParamType (C.ParamTypeId tpid))
C.PredefValue (C.PredefId pid) -> case pid of
"BIND" -> return (L.Bind, Nothing)
"SOFT_BIND" -> return (L.Bind, Nothing)
"SOFT_SPACE" -> return (L.Space, Nothing)
"CAPIT" -> return (L.Capit, Nothing)
"ALL_CAPIT" -> return (L.AllCapit, Nothing)
_ -> CMS.lift $ Left $ printf "Unknown predef function: %s" pid
C.RecordValue rrvs -> do
let rrvs' = sortRecordRows rrvs
ts <- sequence [ val2lin lv | C.RecordRow lid lv <- rrvs' ]
return (L.Tuple (map fst ts), Just $ C.RecordType [ C.RecordRow lid lt | (C.RecordRow lid _, (_, Just lt)) <- zip rrvs' ts])
C.TableValue lt trvs -> do
-- group the rows by "left-most" value
let
groupRow :: C.TableRowValue -> C.TableRowValue -> Bool
groupRow (C.TableRow p1 _) (C.TableRow p2 _) = groupPattern p1 p2
groupPattern :: C.LinPattern -> C.LinPattern -> Bool
groupPattern p1 p2 = case (p1,p2) of
(C.ParamPattern (C.Param pid1 _), C.ParamPattern (C.Param pid2 _)) -> pid1 == pid2 -- compare only constructors
(C.RecordPattern (C.RecordRow lid1 patt1:_), C.RecordPattern (C.RecordRow lid2 patt2:_)) -> groupPattern patt1 patt2 -- lid1 == lid2 necessarily
_ -> error $ printf "Mismatched patterns in grouping:\n%s\n%s" (show p1) (show p2)
grps :: [[C.TableRowValue]]
grps = L.groupBy groupRow trvs
-- remove one level of depth and recurse
let
handleGroup :: [C.TableRowValue] -> CodeGen (L.LinFun, Maybe C.LinType)
handleGroup [C.TableRow patt lv] =
case reducePattern patt of
Just patt' -> do
(lf,lt) <- handleGroup [C.TableRow patt' lv]
return (L.Tuple [lf],lt)
Nothing -> val2lin lv
handleGroup rows = do
let rows' = map reduceRow rows
val2lin (C.TableValue lt rows') -- lt is wrong here, but is unused
reducePattern :: C.LinPattern -> Maybe C.LinPattern
reducePattern patt =
case patt of
C.ParamPattern (C.Param _ []) -> Nothing
C.ParamPattern (C.Param _ patts) -> Just $ C.ParamPattern (C.Param pid' patts')
where
C.ParamPattern (C.Param pid1 patts1) = head patts
pid' = pid1
patts' = patts1 ++ tail patts
C.RecordPattern [] -> Nothing
C.RecordPattern (C.RecordRow lid patt:rrs) ->
case reducePattern patt of
Just patt' -> do
(lf,lt) <- handleGroup [C.TableRow patt' lv]
return (L.Tuple [lf],lt)
Nothing -> val2lin lv
handleGroup rows = do
let rows' = map reduceRow rows
val2lin (C.TableValue lt rows') -- lt is wrong here, but is unused
Just patt' -> Just $ C.RecordPattern (C.RecordRow lid patt':rrs)
Nothing -> if null rrs then Nothing else Just $ C.RecordPattern rrs
reducePattern :: C.LinPattern -> Maybe C.LinPattern
reducePattern patt =
case patt of
C.ParamPattern (C.Param _ []) -> Nothing
C.ParamPattern (C.Param _ patts) -> Just $ C.ParamPattern (C.Param pid' patts')
where
C.ParamPattern (C.Param pid1 patts1) = head patts
pid' = pid1
patts' = patts1 ++ tail patts
_ -> error $ printf "Unhandled pattern in reducing: %s" (show patt)
C.RecordPattern [] -> Nothing
C.RecordPattern (C.RecordRow lid patt:rrs) ->
case reducePattern patt of
Just patt' -> Just $ C.RecordPattern (C.RecordRow lid patt':rrs)
Nothing -> if null rrs then Nothing else Just $ C.RecordPattern rrs
reduceRow :: C.TableRowValue -> C.TableRowValue
reduceRow (C.TableRow patt lv) =
let Just patt' = reducePattern patt
in C.TableRow patt' lv
_ -> error $ printf "Unhandled pattern in reducing: %s" (show patt)
-- ts :: [(L.LinFun, Maybe C.LinType)]
ts <- mapM handleGroup grps
reduceRow :: C.TableRowValue -> C.TableRowValue
reduceRow (C.TableRow patt lv) =
let Just patt' = reducePattern patt
in C.TableRow patt' lv
-- return
let typ = case ts of
(_, Just tst):_ -> Just $ C.TableType lt tst
_ -> Nothing
return (L.Tuple (map fst ts), typ)
-- ts :: [(L.LinFun, Maybe C.LinType)]
ts <- mapM handleGroup grps
-- TODO TuplePattern, WildPattern?
-- return
let typ = case ts of
(_, Just tst):_ -> Just $ C.TableType lt tst
_ -> Nothing
return (L.Tuple (map fst ts), typ)
C.TupleValue lvs -> do
ts <- mapM val2lin lvs
return (L.Tuple (map fst ts), Just $ C.TupleType (map (fromJust.snd) ts))
-- TODO TuplePattern, WildPattern?
C.VariantValue [] -> return (L.Empty, Nothing) -- TODO Just C.StrType ?
C.VariantValue (vr:_) -> val2lin vr -- NOTE variants not supported, just pick first
C.TupleValue lvs -> do
ts <- mapM val2lin lvs
return (L.Tuple (map fst ts), Just $ C.TupleType (map (fromJust.snd) ts))
C.VarValue (C.VarValueId (C.Unqual v)) -> do
ix <- CMS.lift $ eitherElemIndex (C.VarId v) varIds
lt <- CMS.lift $ lookupLinTypeArg funId ix
return (L.Argument (ix+1), Just lt)
C.VariantValue [] -> return (L.Empty, Nothing) -- TODO Just C.StrType ?
C.VariantValue (vr:_) -> val2lin vr -- NOTE variants not supported, just pick first
C.PreValue pts df -> do
pts' <- forM pts $ \(pfxs, lv) -> do
(lv', _) <- val2lin lv
return (map T.pack pfxs, lv')
(df', lt) <- val2lin df
return (L.Pre pts' df', lt)
C.VarValue (C.VarValueId (C.Unqual v)) -> do
ix <- eitherElemIndex (C.VarId v) varIds
lt <- lookupLinTypeArg funId ix
return (L.Argument (ix+1), Just lt)
C.Projection v1 lblId -> do
(v1', mtyp) <- val2lin v1
-- find label index in argument type
let Just (C.RecordType rrs) = mtyp
let rrs' = [ lid | C.RecordRow lid _ <- rrs ]
-- lblIx <- eitherElemIndex lblId rrs'
let
lblIx = case eitherElemIndex lblId rrs' of
Right x -> x
Left _ -> 0 -- corresponds to Prelude.False
-- lookup lintype for record row
let C.RecordRow _ lt = rrs !! lblIx
return (L.Projection v1' (L.Ix (lblIx+1)), Just lt)
C.PreValue pts df -> do
pts' <- forM pts $ \(pfxs, lv) -> do
(lv', _) <- val2lin lv
return (pfxs, lv')
(df', lt) <- val2lin df
return (L.Pre pts' df', lt)
C.Selection v1 v2 -> do
(v1', t1) <- val2lin v1
(v2', t2) <- val2lin v2
let Just (C.TableType t11 t12) = t1 -- t11 == t2
return (L.Projection v1' v2', Just t12)
C.Projection v1 lblId -> do
(v1', mtyp) <- val2lin v1
-- find label index in argument type
let Just (C.RecordType rrs) = mtyp
let rrs' = [ lid | C.RecordRow lid _ <- rrs ]
-- lblIx <- eitherElemIndex lblId rrs'
let
lblIx = case eitherElemIndex lblId rrs' of
Right x -> x
Left _ -> 0 -- corresponds to Prelude.False
-- lookup lintype for record row
let C.RecordRow _ lt = rrs !! lblIx
return (L.Projection v1' (L.Ix (lblIx+1)), Just lt)
-- C.CommentedValue cmnt lv -> val2lin lv
C.CommentedValue cmnt lv -> case cmnt of
"impossible" -> val2lin lv >>= \(_, typ) -> return (L.Empty, typ)
_ -> val2lin lv
C.Selection v1 v2 -> do
(v1', t1) <- val2lin v1
(v2', t2) <- val2lin v2
let Just (C.TableType t11 t12) = t1 -- t11 == t2
return (L.Projection v1' v2', Just t12)
v -> CMS.lift $ Left $ printf "val2lin not implemented for: %s" (show v)
-- C.CommentedValue cmnt lv -> val2lin lv
C.CommentedValue cmnt lv -> case cmnt of
"impossible" -> val2lin lv >>= \(_, typ) -> return (L.Empty, typ)
_ -> val2lin lv
v -> Left $ printf "val2lin not implemented for: %s" (show v)
unless (null $ lefts es) (raise $ unlines (lefts es))
-- Invoke code generation
let e = flip CMS.evalStateT Map.empty $ mapM mkLin lindefs
case e of
Left err -> raise err
Right lins -> do
let maybeOptimise = if debug then id else extractStrings
let concr = maybeOptimise $ L.Concrete {
L.toks = IntMap.empty,
L.lins = lins
L.toks = IntMapBuilder.emptyIntMap,
L.lins = Map.fromList lins
}
return (mdi2i modId, concr)
type CodeGen a = CMS.StateT (Map.Map C.LinValue (L.LinFun, Maybe C.LinType)) (Either String) a
-- | Remove ParamAliasDefs by inlining their definitions
inlineParamAliases :: [C.ParamDef] -> [C.ParamDef]
inlineParamAliases defs = if null aliases then defs else map rp' pdefs
@@ -343,12 +364,12 @@ extractStrings concr = L.Concrete { L.toks = toks', L.lins = lins' }
(lins',imb') = CMS.runState (go0 (L.lins concr)) imb
toks' = IntMapBuilder.toIntMap imb'
go0 :: Map.Map CId L.LinFun -> CMS.State (IntMapBuilder.IMB String) (Map.Map CId L.LinFun)
go0 :: Map.Map CId L.LinFun -> CMS.State (IntMapBuilder.IMB Text) (Map.Map CId L.LinFun)
go0 mp = do
xs <- mapM (\(cid,lin) -> go lin >>= \lin' -> return (cid,lin')) (Map.toList mp)
return $ Map.fromList xs
go :: L.LinFun -> CMS.State (IntMapBuilder.IMB String) L.LinFun
go :: L.LinFun -> CMS.State (IntMapBuilder.IMB Text) L.LinFun
go lf = case lf of
L.Token str -> do
imb <- CMS.get
@@ -360,7 +381,7 @@ extractStrings concr = L.Concrete { L.toks = toks', L.lins = lins' }
-- pts' <- mapM (\(pfxs,lv) -> go lv >>= \lv' -> return (pfxs,lv')) pts
pts' <- forM pts $ \(pfxs,lv) -> do
imb <- CMS.get
let str = show pfxs
let str = T.pack $ show pfxs
let (ix,imb') = IntMapBuilder.insert' str imb
CMS.put imb'
lv' <- go lv

View File

@@ -23,6 +23,10 @@ empty = IMB {
valMap = HashMap.empty
}
-- | An empty IntMap
emptyIntMap :: IntMap a
emptyIntMap = IntMap.empty
-- | Lookup a value
lookup :: (Eq a, Hashable a) => a -> IMB a -> Maybe Int
lookup a IMB { valMap = vm } = HashMap.lookup a vm

View File

@@ -16,12 +16,13 @@ import PGF.Tree (Tree (..), expr2tree, prTree)
import qualified Control.Exception as EX
import Control.Monad (liftM, liftM2, forM_)
import qualified Control.Monad.Writer as CMW
import Data.Char (toUpper)
import Data.Binary (Binary, put, get, putWord8, getWord8, encodeFile, decodeFile)
import Data.Either (isLeft)
import qualified Data.IntMap as IntMap
import Data.List (isPrefixOf)
import qualified Data.Map.Strict as Map
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Text.Printf (printf)
import Prelude hiding ((!!))
@@ -40,7 +41,7 @@ data Abstract = Abstract {
-- | Concrete syntax
data Concrete = Concrete {
toks :: IntMap.IntMap String, -- ^ all strings are stored exactly once here
toks :: IntMap.IntMap Text, -- ^ all strings are stored exactly once here
-- lincats :: Map.Map CId LinType, -- ^ a linearization type for each category
lins :: Map.Map CId LinFun -- ^ a linearization function for each function
} deriving (Show)
@@ -64,12 +65,12 @@ data LinFun =
| Space -- ^ space between adjacent tokens
| Capit -- ^ capitalise next character
| AllCapit -- ^ capitalise next word
| Pre [([String], LinFun)] LinFun
| Pre [([Text], LinFun)] LinFun
| Missing CId -- ^ missing definition (inserted at runtime)
-- From original definition in paper
| Empty
| Token String
| Token Text
| Concat LinFun LinFun
| Ix Int
| Tuple [LinFun]
@@ -157,6 +158,10 @@ instance Binary LinFun where
14 -> liftM TokenIx get
_ -> fail "Failed to decode LPGF binary format"
instance Binary Text where
put = put . TE.encodeUtf8
get = liftM TE.decodeUtf8 get
abstractName :: LPGF -> CId
abstractName = absname
@@ -168,14 +173,22 @@ readLPGF = Data.Binary.decodeFile
-- | Main linearize function, to 'String'
linearize :: LPGF -> Language -> Expr -> String
linearize lpgf lang =
linearize lpgf lang expr = T.unpack $ linearizeText lpgf lang expr
-- | Main linearize function, to 'Data.Text.Text'
linearizeText :: LPGF -> Language -> Expr -> Text
linearizeText lpgf lang =
case Map.lookup lang (concretes lpgf) of
Just concr -> linearizeConcrete concr
Just concr -> linearizeConcreteText concr
Nothing -> error $ printf "Unknown language: %s" (showCId lang)
-- | Language-specific linearize function, to 'String'
linearizeConcrete :: Concrete -> Expr -> String
linearizeConcrete concr expr = lin2string $ lin (expr2tree expr)
linearizeConcrete concr expr = T.unpack $ linearizeConcreteText concr expr
-- | Language-specific linearize function, to 'Data.Text.Text'
linearizeConcreteText :: Concrete -> Expr -> Text
linearizeConcreteText concr expr = lin2string $ lin (expr2tree expr)
where
lin :: Tree -> LinFun
lin tree = case tree of
@@ -196,7 +209,7 @@ try comp = do
-- | Evaluation context
data Context = Context {
cxArgs :: [LinFun], -- ^ is a sequence of terms
cxToks :: IntMap.IntMap String -- ^ token map
cxToks :: IntMap.IntMap Text -- ^ token map
}
-- | Operational semantics
@@ -225,7 +238,7 @@ eval cxt t = case t of
PreIx pts df -> Pre pts' df'
where
pts' = [(pfxs, eval cxt t) | (ix, t) <- pts, let pfxs = maybe [] read $ IntMap.lookup ix (cxToks cxt)]
pts' = [(pfxs, eval cxt t) | (ix, t) <- pts, let pfxs = maybe [] (read . T.unpack) $ IntMap.lookup ix (cxToks cxt)]
df' = eval cxt df
TokenIx i -> maybe Empty Token $ IntMap.lookup i (cxToks cxt)
@@ -239,32 +252,32 @@ flattenTuple = \case
-- | Turn concrete syntax terms into an actual string.
-- This is done in two passes, first to flatten concats & evaluate pre's, then to
-- apply BIND and other predefs.
lin2string :: LinFun -> String
lin2string lf = unwords $ join $ flatten [lf]
lin2string :: LinFun -> Text
lin2string lf = T.unwords $ join $ flatten [lf]
where
-- Process bind et al into final token list
join :: [Either LinFun String] -> [String]
join :: [Either LinFun Text] -> [Text]
join elt = case elt of
Right tok:Left Bind:ls ->
case join ls of
next:ls' -> tok : next : ls'
next:ls' -> tok `T.append` next : ls'
_ -> []
Right tok:ls -> tok : join ls
Left Space:ls -> join ls
Left Capit:ls ->
case join ls of
next:ls' -> (toUpper (head next) : tail next) : ls'
next:ls' -> T.toUpper (T.take 1 next) `T.append` T.drop 1 next : ls'
_ -> []
Left AllCapit:ls ->
case join ls of
next:ls' -> map toUpper next : ls'
next:ls' -> T.toUpper next : ls'
_ -> []
Left (Missing cid):ls -> join (Right (printf "[%s]" (show cid)) : ls)
Left (Missing cid):ls -> join (Right (T.pack (printf "[%s]" (show cid))) : ls)
[] -> []
x -> error $ printf "Unhandled term in lin2string: %s" (show x)
-- Process concats, tuples, pre into flat list
flatten :: [LinFun] -> [Either LinFun String]
flatten :: [LinFun] -> [Either LinFun Text]
flatten [] = []
flatten (l:ls) = case l of
Empty -> flatten ls
@@ -278,7 +291,7 @@ lin2string lf = unwords $ join $ flatten [lf]
f = flatten ls
ch = case dropWhile isLeft f of
Right next:_ ->
let matches = [ l | (pfxs, l) <- pts, any (`isPrefixOf` next) pfxs ]
let matches = [ l | (pfxs, l) <- pts, any (`T.isPrefixOf` next) pfxs ]
in if null matches then df else head matches
_ -> df
in flatten (ch:ls)
@@ -323,7 +336,7 @@ instance PP LPGF where
instance PP Concrete where
pp (Concrete toks lins) = do
forM_ (IntMap.toList toks) $ \(i,tok) ->
CMW.tell [show i ++ " " ++ tok]
CMW.tell [show i ++ " " ++ T.unpack tok]
CMW.tell [""]
forM_ (Map.toList lins) $ \(cid,lin) -> do
CMW.tell ["# " ++ showCId cid]

View File

@@ -86,6 +86,150 @@ stack exec -- hp2ps -c lpgf-bench.hp && open lpgf-bench.ps
- http://book.realworldhaskell.org/read/profiling-and-optimization.html
- https://wiki.haskell.org/Performance
### Honing in
```
stack build --test --bench --no-run-tests --no-run-benchmarks &&
stack bench --benchmark-arguments "compile lpgf testsuite/lpgf/phrasebook/PhrasebookFre.gf +RTS -T -RTS"
```
**Baseline PGF**
- compile: 1.600776s
- size: 2.88 MB Phrasebook.pgf
Max memory: 328.20 MB
**Baseline LPGF = B**
- compile: 12.401099s
- size: 3.01 MB Phrasebook.lpgf
Max memory: 1.33 GB
**Baseline LPGF String**
- compile: 12.124689s
- size: 3.01 MB Phrasebook.lpgf
Max memory: 1.34 GB
**B -extractStrings**
- compile: 13.822735s
- size: 5.78 MB Phrasebook.lpgf
Max memory: 1.39 GB
**B -cleanupRecordFields**
- compile: 13.670776s
- size: 3.01 MB Phrasebook.lpgf
Max memory: 1.48 GB
**No generation at all = E**
- compile: 0.521001s
- size: 3.27 KB Phrasebook.lpgf
Max memory: 230.69 MB
**+ Concat, Literal, Error, Predef, Tuple, Variant, Commented**
- compile: 1.503594s
- size: 3.27 KB Phrasebook.lpgf
Max memory: 395.31 MB
**+ Var, Pre, Selection**
- compile: 1.260184s
- size: 3.28 KB Phrasebook.lpgf
Max memory: 392.17 MB
**+ Record**
- compile: 1.659233s
- size: 7.07 KB Phrasebook.lpgf
Max memory: 397.41 MB
**+ Projection = X**
- compile: 1.446217s
- size: 7.94 KB Phrasebook.lpgf
Max memory: 423.62 MB
**X + Param**
- compile: 2.073838s
- size: 10.82 KB Phrasebook.lpgf
Max memory: 619.71 MB
**X + Table**
- compile: 11.26558s
- size: 2.48 MB Phrasebook.lpgf
Max memory: 1.15 GB
### Repeated terms in compilation
**Param and Table**
| Concr | Total | Unique | Perc |
|:--------------|-------:|-------:|-----:|
| PhrasebookEng | 8673 | 1724 | 20% |
| PhrasebookSwe | 14802 | 2257 | 15% |
| PhrasebookFin | 526225 | 4866 | 1% |
**Param**
| Concr | Total | Unique | Perc |
|:--------------|-------:|-------:|-----:|
| PhrasebookEng | 3211 | 78 | 2% |
| PhrasebookSwe | 7567 | 69 | 1% |
| PhrasebookFin | 316355 | 310 | 0.1% |
**Table**
| Concr | Total | Unique | Perc |
|:--------------|-------:|-------:|-----:|
| PhrasebookEng | 5470 | 1654 | 30% |
| PhrasebookSwe | 7243 | 2196 | 30% |
| PhrasebookFin | 209878 | 4564 | 2% |
### After impelementing state monad for table memoisation
**worse!**
- compile: 12.55848s
- size: 3.01 MB Phrasebook.lpgf
Max memory: 2.25 GB
**Params**
| Concr | Total | Misses | Perc |
|:--------------|-------:|-------:|------:|
| PhrasebookEng | 3211 | 72 | 2% |
| PhrasebookSwe | 7526 | 61 | 1% |
| PhrasebookFin | 135268 | 333 | 0.2% |
| PhrasebookFre | 337102 | 76 | 0.02% |
_modify instead of insert_
| Concr | Total | Misses | Perc |
|:--------------|-------:|-------:|------:|
| PhrasebookEng | 3211 | 70 | 2% |
| PhrasebookSwe | 7526 | 61 | 1% |
| PhrasebookFin | 135268 | 302 | 0.2% |
| PhrasebookFre | 337102 | 72 | 0.02% |
**Tables**
| Concr | Total | Misses | Perc |
|:--------------|------:|-------:|-----:|
| PhrasebookEng | 3719 | 3170 | 85% |
| PhrasebookSwe | 4031 | 3019 | 75% |
| PhrasebookFin | 36875 | 21730 | 59% |
| PhrasebookFre | 41397 | 32967 | 80% |
_modify instead of insert_
| Concr | Total | Misses | Perc |
|:--------------|------:|-------:|-----:|
| PhrasebookEng | 2415 | 1646 | 68% |
| PhrasebookSwe | 3228 | 2188 | 68% |
| PhrasebookFin | 8793 | 4556 | 52% |
| PhrasebookFre | 12490 | 5793 | 46% |
Conclusions:
- map itself requires more memory than acual compilation
- lookup is also as slow as actual compilation
Tried HashMap (deriving Hashable for LinValue), no inprovement.
Using show on LinValue for keys is incredibly slow.
# Notes on compilation
## 1 (see unittests/Params4)

View File

@@ -14,6 +14,7 @@ import Data.Either (isLeft)
import qualified Data.List as L
import Data.Maybe (fromJust, isJust, isNothing)
import qualified Data.Map as Map
import Data.Text (Text)
import Data.Time.Clock (getCurrentTime, diffUTCTime)
import System.Console.ANSI
import System.Directory (listDirectory, getFileSize)
@@ -154,13 +155,13 @@ linPGF2 :: PGF2.PGF -> [PGF2.Expr] -> [[String]]
linPGF2 pgf trees =
[ map (PGF2.linearize concr) trees | (_, concr) <- Map.toList (PGF2.languages pgf) ]
linLPGF :: LPGF.LPGF -> [PGF.Expr] -> [[String]]
linLPGF :: LPGF.LPGF -> [PGF.Expr] -> [[Text]]
linLPGF lpgf trees =
[ map (LPGF.linearizeConcrete concr) trees | (_,concr) <- Map.toList (LPGF.concretes lpgf) ]
[ map (LPGF.linearizeConcreteText concr) trees | (_,concr) <- Map.toList (LPGF.concretes lpgf) ]
linLPGF' :: LPGF.LPGF -> [PGF.Expr] -> IO [[Either String String]]
linLPGF' :: LPGF.LPGF -> [PGF.Expr] -> IO [[Either String Text]]
linLPGF' lpgf trees =
forM (Map.toList (LPGF.concretes lpgf)) $ \(_,concr) -> mapM (LPGF.try . LPGF.linearizeConcrete concr) trees
forM (Map.toList (LPGF.concretes lpgf)) $ \(_,concr) -> mapM (LPGF.try . LPGF.linearizeConcreteText concr) trees
-- | Produce human readable file size
-- Adapted from https://hackage.haskell.org/package/hrfsize