1 Commits

Author SHA1 Message Date
John J. Camilleri
c058457337 Change Data.Text to String as a test, seemingly makes no difference. 2021-03-10 16:50:26 +01:00
5 changed files with 236 additions and 419 deletions

View File

@@ -15,14 +15,14 @@ import GF.Infra.UseIO (IOE)
import GF.Text.Pretty (pp, render) import GF.Text.Pretty (pp, render)
import Control.Applicative ((<|>)) import Control.Applicative ((<|>))
import Control.Monad (when, forM, forM_) import Control.Monad (when, unless, forM, forM_)
import qualified Control.Monad.State.Strict as CMS import qualified Control.Monad.State as CMS
import Data.Either (lefts, rights)
import qualified Data.IntMap as IntMap
import Data.List (elemIndex) import Data.List (elemIndex)
import qualified Data.List as L import qualified Data.List as L
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import Data.Maybe (fromJust, isJust) import Data.Maybe (fromJust, isJust)
import Data.Text (Text)
import qualified Data.Text as T
import System.Environment (lookupEnv) import System.Environment (lookupEnv)
import System.FilePath ((</>), (<.>)) import System.FilePath ((</>), (<.>))
import Text.Printf (printf) import Text.Printf (printf)
@@ -37,7 +37,7 @@ mkCanon2lpgf opts gr am = do
ppCanonical debugDir canon ppCanonical debugDir canon
dumpCanonical debugDir canon dumpCanonical debugDir canon
(an,abs) <- mkAbstract ab (an,abs) <- mkAbstract ab
cncs <- mapM (mkConcrete debug ab) cncs cncs <- mapM (mkConcrete debug) cncs
let lpgf = LPGF { let lpgf = LPGF {
L.absname = an, L.absname = an,
L.abstract = abs, L.abstract = abs,
@@ -48,38 +48,14 @@ mkCanon2lpgf opts gr am = do
where where
canon@(C.Grammar ab cncs) = grammar2canonical opts am gr canon@(C.Grammar ab cncs) = grammar2canonical opts am gr
mkAbstract :: (ErrorMonad err) => C.Abstract -> err (CId, L.Abstract) mkAbstract :: (ErrorMonad err) => C.Abstract -> err (CId, L.Abstract)
mkAbstract (C.Abstract modId flags cats funs) = return (mdi2i modId, L.Abstract {}) mkAbstract (C.Abstract modId flags cats funs) = return (mdi2i modId, L.Abstract {})
mkConcrete :: (ErrorMonad err) => Bool -> C.Abstract -> C.Concrete -> err (CId, L.Concrete) mkConcrete :: (ErrorMonad err) => Bool -> C.Concrete -> err (CId, L.Concrete)
mkConcrete debug (C.Abstract _ _ _ funs) (C.Concrete modId absModId flags params0 lincats lindefs0) = do mkConcrete debug (C.Concrete modId absModId flags params' lincats lindefs) = do
let let
-- Some transformations on canonical grammar (C.Abstract _ _ _ funs) = ab
params = inlineParamAliases params'
params :: [C.ParamDef]
params = inlineParamAliases params0
lindefs :: [C.LinDef]
lindefs =
[ C.LinDef funId varIds linValue'
| (C.LinDef funId varIds linValue) <- lindefs0
, let Right linType = lookupLinType funId
, let linValue' = cleanupRecordFields linValue linType
]
-- Filter out record fields from definitions which don't appear in lincat.
-- Workaround for https://github.com/GrammaticalFramework/gf-core/issues/101
cleanupRecordFields :: C.LinValue -> C.LinType -> C.LinValue
cleanupRecordFields (C.RecordValue rrvs) (C.RecordType rrs) =
let defnFields = Map.fromList [ (lid, lt) | (C.RecordRow lid lt) <- rrs ]
in C.RecordValue
[ C.RecordRow lid lv'
| C.RecordRow lid lv <- rrvs
, Map.member lid defnFields
, let Just lt = Map.lookup lid defnFields
, let lv' = cleanupRecordFields lv lt
]
cleanupRecordFields lv _ = lv
-- Builds maps for lookups -- Builds maps for lookups
@@ -120,29 +96,38 @@ mkConcrete debug (C.Abstract _ _ _ funs) (C.Concrete modId absModId flags params
let (C.LincatDef _ lt) = lincat let (C.LincatDef _ lt) = lincat
return lt return lt
-- Code generation -- Filter out record fields from definitions which don't appear in lincat.
-- Workaround for https://github.com/GrammaticalFramework/gf-core/issues/101
cleanupRecordFields :: C.LinValue -> C.LinType -> C.LinValue
cleanupRecordFields (C.RecordValue rrvs) (C.RecordType rrs) =
let defnFields = Map.fromList [ (lid, lt) | (C.RecordRow lid lt) <- rrs ]
in C.RecordValue
[ C.RecordRow lid lv'
| C.RecordRow lid lv <- rrvs
, Map.member lid defnFields
, let Just lt = Map.lookup lid defnFields
, let lv' = cleanupRecordFields lv lt
]
cleanupRecordFields lv _ = lv
lindefs' =
[ C.LinDef funId varIds linValue'
| (C.LinDef funId varIds linValue) <- lindefs
, let Right linType = lookupLinType funId
, let linValue' = cleanupRecordFields linValue linType
]
es = map mkLin lindefs'
lins = Map.fromList $ rights es
-- | Main code generation function -- | Main code generation function
mkLin :: C.LinDef -> CodeGen (CId, L.LinFun) mkLin :: C.LinDef -> Either String (CId, L.LinFun)
mkLin (C.LinDef funId varIds linValue) = do mkLin (C.LinDef funId varIds linValue) = do
-- when debug $ trace funId -- when debug $ trace funId
(lf, _) <- val2lin' linValue --skip memoisation at top level (lf, _) <- val2lin linValue
return (fi2i funId, lf) return (fi2i funId, lf)
where where
val2lin :: C.LinValue -> CodeGen (L.LinFun, Maybe C.LinType) val2lin :: C.LinValue -> Either String (L.LinFun, Maybe C.LinType)
val2lin lv@(C.TableValue _ _) = do val2lin lv = case lv of
-- val2lin lv@(C.ParamConstant _) = do
m <- CMS.get
case Map.lookup lv m of
Just r -> return r
Nothing -> do
r <- val2lin' lv
CMS.modify (Map.insert lv r)
return r
val2lin lv = val2lin' lv
val2lin' :: C.LinValue -> CodeGen (L.LinFun, Maybe C.LinType)
val2lin' lv = case lv of
C.ConcatValue v1 v2 -> do C.ConcatValue v1 v2 -> do
(v1',t1) <- val2lin v1 (v1',t1) <- val2lin v1
@@ -150,19 +135,19 @@ mkConcrete debug (C.Abstract _ _ _ funs) (C.Concrete modId absModId flags params
return (L.Concat v1' v2', t1 <|> t2) -- t1 else t2 return (L.Concat v1' v2', t1 <|> t2) -- t1 else t2
C.LiteralValue ll -> case ll of C.LiteralValue ll -> case ll of
C.FloatConstant f -> return (L.Token $ T.pack $ show f, Just C.FloatType) C.FloatConstant f -> return (L.Token $ show f, Just C.FloatType)
C.IntConstant i -> return (L.Token $ T.pack $ show i, Just C.IntType) C.IntConstant i -> return (L.Token $ show i, Just C.IntType)
C.StrConstant s -> return (L.Token $ T.pack s, Just C.StrType) C.StrConstant s -> return (L.Token s, Just C.StrType)
C.ErrorValue err -> return (L.Error err, Nothing) C.ErrorValue err -> return (L.Error err, Nothing)
C.ParamConstant (C.Param pid lvs) -> do C.ParamConstant (C.Param pid lvs) -> do
let let
collectProjections :: C.LinValue -> CodeGen [L.LinFun] collectProjections :: C.LinValue -> Either String [L.LinFun]
collectProjections (C.ParamConstant (C.Param pid lvs)) = do collectProjections (C.ParamConstant (C.Param pid lvs)) = do
def <- CMS.lift $ lookupParamDef pid def <- lookupParamDef pid
let (C.ParamDef tpid defpids) = def let (C.ParamDef tpid defpids) = def
pidIx <- CMS.lift $ eitherElemIndex pid [ p | C.Param p _ <- defpids ] pidIx <- eitherElemIndex pid [ p | C.Param p _ <- defpids ]
rest <- mapM collectProjections lvs rest <- mapM collectProjections lvs
return $ L.Ix (pidIx+1) : concat rest return $ L.Ix (pidIx+1) : concat rest
collectProjections lv = do collectProjections lv = do
@@ -170,7 +155,7 @@ mkConcrete debug (C.Abstract _ _ _ funs) (C.Concrete modId absModId flags params
return [lf] return [lf]
lfs <- collectProjections lv lfs <- collectProjections lv
let term = L.Tuple lfs let term = L.Tuple lfs
def <- CMS.lift $ lookupParamDef pid def <- lookupParamDef pid
let (C.ParamDef tpid _) = def let (C.ParamDef tpid _) = def
return (term, Just $ C.ParamType (C.ParamTypeId tpid)) return (term, Just $ C.ParamType (C.ParamTypeId tpid))
@@ -180,7 +165,7 @@ mkConcrete debug (C.Abstract _ _ _ funs) (C.Concrete modId absModId flags params
"SOFT_SPACE" -> return (L.Space, Nothing) "SOFT_SPACE" -> return (L.Space, Nothing)
"CAPIT" -> return (L.Capit, Nothing) "CAPIT" -> return (L.Capit, Nothing)
"ALL_CAPIT" -> return (L.AllCapit, Nothing) "ALL_CAPIT" -> return (L.AllCapit, Nothing)
_ -> CMS.lift $ Left $ printf "Unknown predef function: %s" pid _ -> Left $ printf "Unknown predef function: %s" pid
C.RecordValue rrvs -> do C.RecordValue rrvs -> do
let rrvs' = sortRecordRows rrvs let rrvs' = sortRecordRows rrvs
@@ -204,7 +189,7 @@ mkConcrete debug (C.Abstract _ _ _ funs) (C.Concrete modId absModId flags params
-- remove one level of depth and recurse -- remove one level of depth and recurse
let let
handleGroup :: [C.TableRowValue] -> CodeGen (L.LinFun, Maybe C.LinType) handleGroup :: [C.TableRowValue] -> Either String (L.LinFun, Maybe C.LinType)
handleGroup [C.TableRow patt lv] = handleGroup [C.TableRow patt lv] =
case reducePattern patt of case reducePattern patt of
Just patt' -> do Just patt' -> do
@@ -257,14 +242,14 @@ mkConcrete debug (C.Abstract _ _ _ funs) (C.Concrete modId absModId flags params
C.VariantValue (vr:_) -> val2lin vr -- NOTE variants not supported, just pick first C.VariantValue (vr:_) -> val2lin vr -- NOTE variants not supported, just pick first
C.VarValue (C.VarValueId (C.Unqual v)) -> do C.VarValue (C.VarValueId (C.Unqual v)) -> do
ix <- CMS.lift $ eitherElemIndex (C.VarId v) varIds ix <- eitherElemIndex (C.VarId v) varIds
lt <- CMS.lift $ lookupLinTypeArg funId ix lt <- lookupLinTypeArg funId ix
return (L.Argument (ix+1), Just lt) return (L.Argument (ix+1), Just lt)
C.PreValue pts df -> do C.PreValue pts df -> do
pts' <- forM pts $ \(pfxs, lv) -> do pts' <- forM pts $ \(pfxs, lv) -> do
(lv', _) <- val2lin lv (lv', _) <- val2lin lv
return (map T.pack pfxs, lv') return (pfxs, lv')
(df', lt) <- val2lin df (df', lt) <- val2lin df
return (L.Pre pts' df', lt) return (L.Pre pts' df', lt)
@@ -293,23 +278,17 @@ mkConcrete debug (C.Abstract _ _ _ funs) (C.Concrete modId absModId flags params
"impossible" -> val2lin lv >>= \(_, typ) -> return (L.Empty, typ) "impossible" -> val2lin lv >>= \(_, typ) -> return (L.Empty, typ)
_ -> val2lin lv _ -> val2lin lv
v -> CMS.lift $ Left $ printf "val2lin not implemented for: %s" (show v) v -> Left $ printf "val2lin not implemented for: %s" (show v)
-- Invoke code generation unless (null $ lefts es) (raise $ unlines (lefts es))
let e = flip CMS.evalStateT Map.empty $ mapM mkLin lindefs
case e of
Left err -> raise err
Right lins -> do
let maybeOptimise = if debug then id else extractStrings let maybeOptimise = if debug then id else extractStrings
let concr = maybeOptimise $ L.Concrete { let concr = maybeOptimise $ L.Concrete {
L.toks = IntMapBuilder.emptyIntMap, L.toks = IntMap.empty,
L.lins = Map.fromList lins L.lins = lins
} }
return (mdi2i modId, concr) return (mdi2i modId, concr)
type CodeGen a = CMS.StateT (Map.Map C.LinValue (L.LinFun, Maybe C.LinType)) (Either String) a
-- | Remove ParamAliasDefs by inlining their definitions -- | Remove ParamAliasDefs by inlining their definitions
inlineParamAliases :: [C.ParamDef] -> [C.ParamDef] inlineParamAliases :: [C.ParamDef] -> [C.ParamDef]
inlineParamAliases defs = if null aliases then defs else map rp' pdefs inlineParamAliases defs = if null aliases then defs else map rp' pdefs
@@ -364,12 +343,12 @@ extractStrings concr = L.Concrete { L.toks = toks', L.lins = lins' }
(lins',imb') = CMS.runState (go0 (L.lins concr)) imb (lins',imb') = CMS.runState (go0 (L.lins concr)) imb
toks' = IntMapBuilder.toIntMap imb' toks' = IntMapBuilder.toIntMap imb'
go0 :: Map.Map CId L.LinFun -> CMS.State (IntMapBuilder.IMB Text) (Map.Map CId L.LinFun) go0 :: Map.Map CId L.LinFun -> CMS.State (IntMapBuilder.IMB String) (Map.Map CId L.LinFun)
go0 mp = do go0 mp = do
xs <- mapM (\(cid,lin) -> go lin >>= \lin' -> return (cid,lin')) (Map.toList mp) xs <- mapM (\(cid,lin) -> go lin >>= \lin' -> return (cid,lin')) (Map.toList mp)
return $ Map.fromList xs return $ Map.fromList xs
go :: L.LinFun -> CMS.State (IntMapBuilder.IMB Text) L.LinFun go :: L.LinFun -> CMS.State (IntMapBuilder.IMB String) L.LinFun
go lf = case lf of go lf = case lf of
L.Token str -> do L.Token str -> do
imb <- CMS.get imb <- CMS.get
@@ -381,7 +360,7 @@ extractStrings concr = L.Concrete { L.toks = toks', L.lins = lins' }
-- pts' <- mapM (\(pfxs,lv) -> go lv >>= \lv' -> return (pfxs,lv')) pts -- pts' <- mapM (\(pfxs,lv) -> go lv >>= \lv' -> return (pfxs,lv')) pts
pts' <- forM pts $ \(pfxs,lv) -> do pts' <- forM pts $ \(pfxs,lv) -> do
imb <- CMS.get imb <- CMS.get
let str = T.pack $ show pfxs let str = show pfxs
let (ix,imb') = IntMapBuilder.insert' str imb let (ix,imb') = IntMapBuilder.insert' str imb
CMS.put imb' CMS.put imb'
lv' <- go lv lv' <- go lv

View File

@@ -23,10 +23,6 @@ empty = IMB {
valMap = HashMap.empty valMap = HashMap.empty
} }
-- | An empty IntMap
emptyIntMap :: IntMap a
emptyIntMap = IntMap.empty
-- | Lookup a value -- | Lookup a value
lookup :: (Eq a, Hashable a) => a -> IMB a -> Maybe Int lookup :: (Eq a, Hashable a) => a -> IMB a -> Maybe Int
lookup a IMB { valMap = vm } = HashMap.lookup a vm lookup a IMB { valMap = vm } = HashMap.lookup a vm

View File

@@ -16,13 +16,12 @@ import PGF.Tree (Tree (..), expr2tree, prTree)
import qualified Control.Exception as EX import qualified Control.Exception as EX
import Control.Monad (liftM, liftM2, forM_) import Control.Monad (liftM, liftM2, forM_)
import qualified Control.Monad.Writer as CMW import qualified Control.Monad.Writer as CMW
import Data.Char (toUpper)
import Data.Binary (Binary, put, get, putWord8, getWord8, encodeFile, decodeFile) import Data.Binary (Binary, put, get, putWord8, getWord8, encodeFile, decodeFile)
import Data.Either (isLeft) import Data.Either (isLeft)
import qualified Data.IntMap as IntMap import qualified Data.IntMap as IntMap
import Data.List (isPrefixOf)
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Text.Printf (printf) import Text.Printf (printf)
import Prelude hiding ((!!)) import Prelude hiding ((!!))
@@ -41,7 +40,7 @@ data Abstract = Abstract {
-- | Concrete syntax -- | Concrete syntax
data Concrete = Concrete { data Concrete = Concrete {
toks :: IntMap.IntMap Text, -- ^ all strings are stored exactly once here toks :: IntMap.IntMap String, -- ^ all strings are stored exactly once here
-- lincats :: Map.Map CId LinType, -- ^ a linearization type for each category -- lincats :: Map.Map CId LinType, -- ^ a linearization type for each category
lins :: Map.Map CId LinFun -- ^ a linearization function for each function lins :: Map.Map CId LinFun -- ^ a linearization function for each function
} deriving (Show) } deriving (Show)
@@ -65,12 +64,12 @@ data LinFun =
| Space -- ^ space between adjacent tokens | Space -- ^ space between adjacent tokens
| Capit -- ^ capitalise next character | Capit -- ^ capitalise next character
| AllCapit -- ^ capitalise next word | AllCapit -- ^ capitalise next word
| Pre [([Text], LinFun)] LinFun | Pre [([String], LinFun)] LinFun
| Missing CId -- ^ missing definition (inserted at runtime) | Missing CId -- ^ missing definition (inserted at runtime)
-- From original definition in paper -- From original definition in paper
| Empty | Empty
| Token Text | Token String
| Concat LinFun LinFun | Concat LinFun LinFun
| Ix Int | Ix Int
| Tuple [LinFun] | Tuple [LinFun]
@@ -158,10 +157,6 @@ instance Binary LinFun where
14 -> liftM TokenIx get 14 -> liftM TokenIx get
_ -> fail "Failed to decode LPGF binary format" _ -> fail "Failed to decode LPGF binary format"
instance Binary Text where
put = put . TE.encodeUtf8
get = liftM TE.decodeUtf8 get
abstractName :: LPGF -> CId abstractName :: LPGF -> CId
abstractName = absname abstractName = absname
@@ -173,22 +168,14 @@ readLPGF = Data.Binary.decodeFile
-- | Main linearize function, to 'String' -- | Main linearize function, to 'String'
linearize :: LPGF -> Language -> Expr -> String linearize :: LPGF -> Language -> Expr -> String
linearize lpgf lang expr = T.unpack $ linearizeText lpgf lang expr linearize lpgf lang =
-- | Main linearize function, to 'Data.Text.Text'
linearizeText :: LPGF -> Language -> Expr -> Text
linearizeText lpgf lang =
case Map.lookup lang (concretes lpgf) of case Map.lookup lang (concretes lpgf) of
Just concr -> linearizeConcreteText concr Just concr -> linearizeConcrete concr
Nothing -> error $ printf "Unknown language: %s" (showCId lang) Nothing -> error $ printf "Unknown language: %s" (showCId lang)
-- | Language-specific linearize function, to 'String' -- | Language-specific linearize function, to 'String'
linearizeConcrete :: Concrete -> Expr -> String linearizeConcrete :: Concrete -> Expr -> String
linearizeConcrete concr expr = T.unpack $ linearizeConcreteText concr expr linearizeConcrete concr expr = lin2string $ lin (expr2tree expr)
-- | Language-specific linearize function, to 'Data.Text.Text'
linearizeConcreteText :: Concrete -> Expr -> Text
linearizeConcreteText concr expr = lin2string $ lin (expr2tree expr)
where where
lin :: Tree -> LinFun lin :: Tree -> LinFun
lin tree = case tree of lin tree = case tree of
@@ -209,7 +196,7 @@ try comp = do
-- | Evaluation context -- | Evaluation context
data Context = Context { data Context = Context {
cxArgs :: [LinFun], -- ^ is a sequence of terms cxArgs :: [LinFun], -- ^ is a sequence of terms
cxToks :: IntMap.IntMap Text -- ^ token map cxToks :: IntMap.IntMap String -- ^ token map
} }
-- | Operational semantics -- | Operational semantics
@@ -238,7 +225,7 @@ eval cxt t = case t of
PreIx pts df -> Pre pts' df' PreIx pts df -> Pre pts' df'
where where
pts' = [(pfxs, eval cxt t) | (ix, t) <- pts, let pfxs = maybe [] (read . T.unpack) $ IntMap.lookup ix (cxToks cxt)] pts' = [(pfxs, eval cxt t) | (ix, t) <- pts, let pfxs = maybe [] read $ IntMap.lookup ix (cxToks cxt)]
df' = eval cxt df df' = eval cxt df
TokenIx i -> maybe Empty Token $ IntMap.lookup i (cxToks cxt) TokenIx i -> maybe Empty Token $ IntMap.lookup i (cxToks cxt)
@@ -252,32 +239,32 @@ flattenTuple = \case
-- | Turn concrete syntax terms into an actual string. -- | Turn concrete syntax terms into an actual string.
-- This is done in two passes, first to flatten concats & evaluate pre's, then to -- This is done in two passes, first to flatten concats & evaluate pre's, then to
-- apply BIND and other predefs. -- apply BIND and other predefs.
lin2string :: LinFun -> Text lin2string :: LinFun -> String
lin2string lf = T.unwords $ join $ flatten [lf] lin2string lf = unwords $ join $ flatten [lf]
where where
-- Process bind et al into final token list -- Process bind et al into final token list
join :: [Either LinFun Text] -> [Text] join :: [Either LinFun String] -> [String]
join elt = case elt of join elt = case elt of
Right tok:Left Bind:ls -> Right tok:Left Bind:ls ->
case join ls of case join ls of
next:ls' -> tok `T.append` next : ls' next:ls' -> tok : next : ls'
_ -> [] _ -> []
Right tok:ls -> tok : join ls Right tok:ls -> tok : join ls
Left Space:ls -> join ls Left Space:ls -> join ls
Left Capit:ls -> Left Capit:ls ->
case join ls of case join ls of
next:ls' -> T.toUpper (T.take 1 next) `T.append` T.drop 1 next : ls' next:ls' -> (toUpper (head next) : tail next) : ls'
_ -> [] _ -> []
Left AllCapit:ls -> Left AllCapit:ls ->
case join ls of case join ls of
next:ls' -> T.toUpper next : ls' next:ls' -> map toUpper next : ls'
_ -> [] _ -> []
Left (Missing cid):ls -> join (Right (T.pack (printf "[%s]" (show cid))) : ls) Left (Missing cid):ls -> join (Right (printf "[%s]" (show cid)) : ls)
[] -> [] [] -> []
x -> error $ printf "Unhandled term in lin2string: %s" (show x) x -> error $ printf "Unhandled term in lin2string: %s" (show x)
-- Process concats, tuples, pre into flat list -- Process concats, tuples, pre into flat list
flatten :: [LinFun] -> [Either LinFun Text] flatten :: [LinFun] -> [Either LinFun String]
flatten [] = [] flatten [] = []
flatten (l:ls) = case l of flatten (l:ls) = case l of
Empty -> flatten ls Empty -> flatten ls
@@ -291,7 +278,7 @@ lin2string lf = T.unwords $ join $ flatten [lf]
f = flatten ls f = flatten ls
ch = case dropWhile isLeft f of ch = case dropWhile isLeft f of
Right next:_ -> Right next:_ ->
let matches = [ l | (pfxs, l) <- pts, any (`T.isPrefixOf` next) pfxs ] let matches = [ l | (pfxs, l) <- pts, any (`isPrefixOf` next) pfxs ]
in if null matches then df else head matches in if null matches then df else head matches
_ -> df _ -> df
in flatten (ch:ls) in flatten (ch:ls)
@@ -336,7 +323,7 @@ instance PP LPGF where
instance PP Concrete where instance PP Concrete where
pp (Concrete toks lins) = do pp (Concrete toks lins) = do
forM_ (IntMap.toList toks) $ \(i,tok) -> forM_ (IntMap.toList toks) $ \(i,tok) ->
CMW.tell [show i ++ " " ++ T.unpack tok] CMW.tell [show i ++ " " ++ tok]
CMW.tell [""] CMW.tell [""]
forM_ (Map.toList lins) $ \(cid,lin) -> do forM_ (Map.toList lins) $ \(cid,lin) -> do
CMW.tell ["# " ++ showCId cid] CMW.tell ["# " ++ showCId cid]

View File

@@ -86,150 +86,6 @@ stack exec -- hp2ps -c lpgf-bench.hp && open lpgf-bench.ps
- http://book.realworldhaskell.org/read/profiling-and-optimization.html - http://book.realworldhaskell.org/read/profiling-and-optimization.html
- https://wiki.haskell.org/Performance - https://wiki.haskell.org/Performance
### Honing in
```
stack build --test --bench --no-run-tests --no-run-benchmarks &&
stack bench --benchmark-arguments "compile lpgf testsuite/lpgf/phrasebook/PhrasebookFre.gf +RTS -T -RTS"
```
**Baseline PGF**
- compile: 1.600776s
- size: 2.88 MB Phrasebook.pgf
Max memory: 328.20 MB
**Baseline LPGF = B**
- compile: 12.401099s
- size: 3.01 MB Phrasebook.lpgf
Max memory: 1.33 GB
**Baseline LPGF String**
- compile: 12.124689s
- size: 3.01 MB Phrasebook.lpgf
Max memory: 1.34 GB
**B -extractStrings**
- compile: 13.822735s
- size: 5.78 MB Phrasebook.lpgf
Max memory: 1.39 GB
**B -cleanupRecordFields**
- compile: 13.670776s
- size: 3.01 MB Phrasebook.lpgf
Max memory: 1.48 GB
**No generation at all = E**
- compile: 0.521001s
- size: 3.27 KB Phrasebook.lpgf
Max memory: 230.69 MB
**+ Concat, Literal, Error, Predef, Tuple, Variant, Commented**
- compile: 1.503594s
- size: 3.27 KB Phrasebook.lpgf
Max memory: 395.31 MB
**+ Var, Pre, Selection**
- compile: 1.260184s
- size: 3.28 KB Phrasebook.lpgf
Max memory: 392.17 MB
**+ Record**
- compile: 1.659233s
- size: 7.07 KB Phrasebook.lpgf
Max memory: 397.41 MB
**+ Projection = X**
- compile: 1.446217s
- size: 7.94 KB Phrasebook.lpgf
Max memory: 423.62 MB
**X + Param**
- compile: 2.073838s
- size: 10.82 KB Phrasebook.lpgf
Max memory: 619.71 MB
**X + Table**
- compile: 11.26558s
- size: 2.48 MB Phrasebook.lpgf
Max memory: 1.15 GB
### Repeated terms in compilation
**Param and Table**
| Concr | Total | Unique | Perc |
|:--------------|-------:|-------:|-----:|
| PhrasebookEng | 8673 | 1724 | 20% |
| PhrasebookSwe | 14802 | 2257 | 15% |
| PhrasebookFin | 526225 | 4866 | 1% |
**Param**
| Concr | Total | Unique | Perc |
|:--------------|-------:|-------:|-----:|
| PhrasebookEng | 3211 | 78 | 2% |
| PhrasebookSwe | 7567 | 69 | 1% |
| PhrasebookFin | 316355 | 310 | 0.1% |
**Table**
| Concr | Total | Unique | Perc |
|:--------------|-------:|-------:|-----:|
| PhrasebookEng | 5470 | 1654 | 30% |
| PhrasebookSwe | 7243 | 2196 | 30% |
| PhrasebookFin | 209878 | 4564 | 2% |
### After impelementing state monad for table memoisation
**worse!**
- compile: 12.55848s
- size: 3.01 MB Phrasebook.lpgf
Max memory: 2.25 GB
**Params**
| Concr | Total | Misses | Perc |
|:--------------|-------:|-------:|------:|
| PhrasebookEng | 3211 | 72 | 2% |
| PhrasebookSwe | 7526 | 61 | 1% |
| PhrasebookFin | 135268 | 333 | 0.2% |
| PhrasebookFre | 337102 | 76 | 0.02% |
_modify instead of insert_
| Concr | Total | Misses | Perc |
|:--------------|-------:|-------:|------:|
| PhrasebookEng | 3211 | 70 | 2% |
| PhrasebookSwe | 7526 | 61 | 1% |
| PhrasebookFin | 135268 | 302 | 0.2% |
| PhrasebookFre | 337102 | 72 | 0.02% |
**Tables**
| Concr | Total | Misses | Perc |
|:--------------|------:|-------:|-----:|
| PhrasebookEng | 3719 | 3170 | 85% |
| PhrasebookSwe | 4031 | 3019 | 75% |
| PhrasebookFin | 36875 | 21730 | 59% |
| PhrasebookFre | 41397 | 32967 | 80% |
_modify instead of insert_
| Concr | Total | Misses | Perc |
|:--------------|------:|-------:|-----:|
| PhrasebookEng | 2415 | 1646 | 68% |
| PhrasebookSwe | 3228 | 2188 | 68% |
| PhrasebookFin | 8793 | 4556 | 52% |
| PhrasebookFre | 12490 | 5793 | 46% |
Conclusions:
- map itself requires more memory than acual compilation
- lookup is also as slow as actual compilation
Tried HashMap (deriving Hashable for LinValue), no inprovement.
Using show on LinValue for keys is incredibly slow.
# Notes on compilation # Notes on compilation
## 1 (see unittests/Params4) ## 1 (see unittests/Params4)

View File

@@ -14,7 +14,6 @@ import Data.Either (isLeft)
import qualified Data.List as L import qualified Data.List as L
import Data.Maybe (fromJust, isJust, isNothing) import Data.Maybe (fromJust, isJust, isNothing)
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Text (Text)
import Data.Time.Clock (getCurrentTime, diffUTCTime) import Data.Time.Clock (getCurrentTime, diffUTCTime)
import System.Console.ANSI import System.Console.ANSI
import System.Directory (listDirectory, getFileSize) import System.Directory (listDirectory, getFileSize)
@@ -155,13 +154,13 @@ linPGF2 :: PGF2.PGF -> [PGF2.Expr] -> [[String]]
linPGF2 pgf trees = linPGF2 pgf trees =
[ map (PGF2.linearize concr) trees | (_, concr) <- Map.toList (PGF2.languages pgf) ] [ map (PGF2.linearize concr) trees | (_, concr) <- Map.toList (PGF2.languages pgf) ]
linLPGF :: LPGF.LPGF -> [PGF.Expr] -> [[Text]] linLPGF :: LPGF.LPGF -> [PGF.Expr] -> [[String]]
linLPGF lpgf trees = linLPGF lpgf trees =
[ map (LPGF.linearizeConcreteText concr) trees | (_,concr) <- Map.toList (LPGF.concretes lpgf) ] [ map (LPGF.linearizeConcrete concr) trees | (_,concr) <- Map.toList (LPGF.concretes lpgf) ]
linLPGF' :: LPGF.LPGF -> [PGF.Expr] -> IO [[Either String Text]] linLPGF' :: LPGF.LPGF -> [PGF.Expr] -> IO [[Either String String]]
linLPGF' lpgf trees = linLPGF' lpgf trees =
forM (Map.toList (LPGF.concretes lpgf)) $ \(_,concr) -> mapM (LPGF.try . LPGF.linearizeConcreteText concr) trees forM (Map.toList (LPGF.concretes lpgf)) $ \(_,concr) -> mapM (LPGF.try . LPGF.linearizeConcrete concr) trees
-- | Produce human readable file size -- | Produce human readable file size
-- Adapted from https://hackage.haskell.org/package/hrfsize -- Adapted from https://hackage.haskell.org/package/hrfsize