forked from GitHub/gf-core
Compare commits
1 Commits
lpgf-memo
...
lpgf-strin
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c058457337 |
@@ -15,14 +15,14 @@ import GF.Infra.UseIO (IOE)
|
||||
import GF.Text.Pretty (pp, render)
|
||||
|
||||
import Control.Applicative ((<|>))
|
||||
import Control.Monad (when, forM, forM_)
|
||||
import qualified Control.Monad.State.Strict as CMS
|
||||
import Control.Monad (when, unless, forM, forM_)
|
||||
import qualified Control.Monad.State as CMS
|
||||
import Data.Either (lefts, rights)
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Data.List (elemIndex)
|
||||
import qualified Data.List as L
|
||||
import qualified Data.Map.Strict as Map
|
||||
import Data.Maybe (fromJust, isJust)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import System.Environment (lookupEnv)
|
||||
import System.FilePath ((</>), (<.>))
|
||||
import Text.Printf (printf)
|
||||
@@ -37,7 +37,7 @@ mkCanon2lpgf opts gr am = do
|
||||
ppCanonical debugDir canon
|
||||
dumpCanonical debugDir canon
|
||||
(an,abs) <- mkAbstract ab
|
||||
cncs <- mapM (mkConcrete debug ab) cncs
|
||||
cncs <- mapM (mkConcrete debug) cncs
|
||||
let lpgf = LPGF {
|
||||
L.absname = an,
|
||||
L.abstract = abs,
|
||||
@@ -48,268 +48,247 @@ mkCanon2lpgf opts gr am = do
|
||||
where
|
||||
canon@(C.Grammar ab cncs) = grammar2canonical opts am gr
|
||||
|
||||
mkAbstract :: (ErrorMonad err) => C.Abstract -> err (CId, L.Abstract)
|
||||
mkAbstract (C.Abstract modId flags cats funs) = return (mdi2i modId, L.Abstract {})
|
||||
mkAbstract :: (ErrorMonad err) => C.Abstract -> err (CId, L.Abstract)
|
||||
mkAbstract (C.Abstract modId flags cats funs) = return (mdi2i modId, L.Abstract {})
|
||||
|
||||
mkConcrete :: (ErrorMonad err) => Bool -> C.Abstract -> C.Concrete -> err (CId, L.Concrete)
|
||||
mkConcrete debug (C.Abstract _ _ _ funs) (C.Concrete modId absModId flags params0 lincats lindefs0) = do
|
||||
let
|
||||
-- Some transformations on canonical grammar
|
||||
mkConcrete :: (ErrorMonad err) => Bool -> C.Concrete -> err (CId, L.Concrete)
|
||||
mkConcrete debug (C.Concrete modId absModId flags params' lincats lindefs) = do
|
||||
let
|
||||
(C.Abstract _ _ _ funs) = ab
|
||||
params = inlineParamAliases params'
|
||||
|
||||
params :: [C.ParamDef]
|
||||
params = inlineParamAliases params0
|
||||
-- Builds maps for lookups
|
||||
|
||||
lindefs :: [C.LinDef]
|
||||
lindefs =
|
||||
[ C.LinDef funId varIds linValue'
|
||||
| (C.LinDef funId varIds linValue) <- lindefs0
|
||||
, let Right linType = lookupLinType funId
|
||||
, let linValue' = cleanupRecordFields linValue linType
|
||||
]
|
||||
paramValueMap :: Map.Map C.ParamId C.ParamDef -- constructor -> definition
|
||||
paramValueMap = Map.fromList [ (v,d) | d@(C.ParamDef _ vs) <- params, (C.Param v _) <- vs ]
|
||||
|
||||
-- Filter out record fields from definitions which don't appear in lincat.
|
||||
-- Workaround for https://github.com/GrammaticalFramework/gf-core/issues/101
|
||||
cleanupRecordFields :: C.LinValue -> C.LinType -> C.LinValue
|
||||
cleanupRecordFields (C.RecordValue rrvs) (C.RecordType rrs) =
|
||||
let defnFields = Map.fromList [ (lid, lt) | (C.RecordRow lid lt) <- rrs ]
|
||||
in C.RecordValue
|
||||
[ C.RecordRow lid lv'
|
||||
| C.RecordRow lid lv <- rrvs
|
||||
, Map.member lid defnFields
|
||||
, let Just lt = Map.lookup lid defnFields
|
||||
, let lv' = cleanupRecordFields lv lt
|
||||
lincatMap :: Map.Map C.CatId C.LincatDef
|
||||
lincatMap = Map.fromList [ (cid,d) | d@(C.LincatDef cid _) <- lincats ]
|
||||
|
||||
funMap :: Map.Map C.FunId C.FunDef
|
||||
funMap = Map.fromList [ (fid,d) | d@(C.FunDef fid _) <- funs ]
|
||||
|
||||
-- | Lookup paramdef, providing dummy fallback when not found
|
||||
-- Workaround for https://github.com/GrammaticalFramework/gf-core/issues/100
|
||||
lookupParamDef :: C.ParamId -> Either String C.ParamDef
|
||||
lookupParamDef pid = case Map.lookup pid paramValueMap of
|
||||
Just d -> Right d
|
||||
Nothing ->
|
||||
-- Left $ printf "Cannot find param definition: %s" (show pid)
|
||||
Right $ C.ParamDef (C.ParamId (C.Unqual "DUMMY")) [C.Param pid []]
|
||||
|
||||
-- | Lookup lintype for a function
|
||||
lookupLinType :: C.FunId -> Either String C.LinType
|
||||
lookupLinType funId = do
|
||||
fun <- m2e (printf "Cannot find type for: %s" (show funId)) (Map.lookup funId funMap)
|
||||
let (C.FunDef _ (C.Type _ (C.TypeApp catId _))) = fun
|
||||
lincat <- m2e (printf "Cannot find lincat for: %s" (show catId)) (Map.lookup catId lincatMap)
|
||||
let (C.LincatDef _ lt) = lincat
|
||||
return lt
|
||||
|
||||
-- | Lookup lintype for a function's argument
|
||||
lookupLinTypeArg :: C.FunId -> Int -> Either String C.LinType
|
||||
lookupLinTypeArg funId argIx = do
|
||||
fun <- m2e (printf "Cannot find type for: %s" (show funId)) (Map.lookup funId funMap)
|
||||
let (C.FunDef _ (C.Type args _)) = fun
|
||||
let (C.TypeBinding _ (C.Type _ (C.TypeApp catId _))) = args !! argIx
|
||||
lincat <- m2e (printf "Cannot find lincat for: %s" (show catId)) (Map.lookup catId lincatMap)
|
||||
let (C.LincatDef _ lt) = lincat
|
||||
return lt
|
||||
|
||||
-- Filter out record fields from definitions which don't appear in lincat.
|
||||
-- Workaround for https://github.com/GrammaticalFramework/gf-core/issues/101
|
||||
cleanupRecordFields :: C.LinValue -> C.LinType -> C.LinValue
|
||||
cleanupRecordFields (C.RecordValue rrvs) (C.RecordType rrs) =
|
||||
let defnFields = Map.fromList [ (lid, lt) | (C.RecordRow lid lt) <- rrs ]
|
||||
in C.RecordValue
|
||||
[ C.RecordRow lid lv'
|
||||
| C.RecordRow lid lv <- rrvs
|
||||
, Map.member lid defnFields
|
||||
, let Just lt = Map.lookup lid defnFields
|
||||
, let lv' = cleanupRecordFields lv lt
|
||||
]
|
||||
cleanupRecordFields lv _ = lv
|
||||
|
||||
lindefs' =
|
||||
[ C.LinDef funId varIds linValue'
|
||||
| (C.LinDef funId varIds linValue) <- lindefs
|
||||
, let Right linType = lookupLinType funId
|
||||
, let linValue' = cleanupRecordFields linValue linType
|
||||
]
|
||||
cleanupRecordFields lv _ = lv
|
||||
es = map mkLin lindefs'
|
||||
lins = Map.fromList $ rights es
|
||||
|
||||
-- Builds maps for lookups
|
||||
-- | Main code generation function
|
||||
mkLin :: C.LinDef -> Either String (CId, L.LinFun)
|
||||
mkLin (C.LinDef funId varIds linValue) = do
|
||||
-- when debug $ trace funId
|
||||
(lf, _) <- val2lin linValue
|
||||
return (fi2i funId, lf)
|
||||
where
|
||||
val2lin :: C.LinValue -> Either String (L.LinFun, Maybe C.LinType)
|
||||
val2lin lv = case lv of
|
||||
|
||||
paramValueMap :: Map.Map C.ParamId C.ParamDef -- constructor -> definition
|
||||
paramValueMap = Map.fromList [ (v,d) | d@(C.ParamDef _ vs) <- params, (C.Param v _) <- vs ]
|
||||
C.ConcatValue v1 v2 -> do
|
||||
(v1',t1) <- val2lin v1
|
||||
(v2',t2) <- val2lin v2
|
||||
return (L.Concat v1' v2', t1 <|> t2) -- t1 else t2
|
||||
|
||||
lincatMap :: Map.Map C.CatId C.LincatDef
|
||||
lincatMap = Map.fromList [ (cid,d) | d@(C.LincatDef cid _) <- lincats ]
|
||||
C.LiteralValue ll -> case ll of
|
||||
C.FloatConstant f -> return (L.Token $ show f, Just C.FloatType)
|
||||
C.IntConstant i -> return (L.Token $ show i, Just C.IntType)
|
||||
C.StrConstant s -> return (L.Token s, Just C.StrType)
|
||||
|
||||
funMap :: Map.Map C.FunId C.FunDef
|
||||
funMap = Map.fromList [ (fid,d) | d@(C.FunDef fid _) <- funs ]
|
||||
C.ErrorValue err -> return (L.Error err, Nothing)
|
||||
|
||||
-- | Lookup paramdef, providing dummy fallback when not found
|
||||
-- Workaround for https://github.com/GrammaticalFramework/gf-core/issues/100
|
||||
lookupParamDef :: C.ParamId -> Either String C.ParamDef
|
||||
lookupParamDef pid = case Map.lookup pid paramValueMap of
|
||||
Just d -> Right d
|
||||
Nothing ->
|
||||
-- Left $ printf "Cannot find param definition: %s" (show pid)
|
||||
Right $ C.ParamDef (C.ParamId (C.Unqual "DUMMY")) [C.Param pid []]
|
||||
C.ParamConstant (C.Param pid lvs) -> do
|
||||
let
|
||||
collectProjections :: C.LinValue -> Either String [L.LinFun]
|
||||
collectProjections (C.ParamConstant (C.Param pid lvs)) = do
|
||||
def <- lookupParamDef pid
|
||||
let (C.ParamDef tpid defpids) = def
|
||||
pidIx <- eitherElemIndex pid [ p | C.Param p _ <- defpids ]
|
||||
rest <- mapM collectProjections lvs
|
||||
return $ L.Ix (pidIx+1) : concat rest
|
||||
collectProjections lv = do
|
||||
(lf,_) <- val2lin lv
|
||||
return [lf]
|
||||
lfs <- collectProjections lv
|
||||
let term = L.Tuple lfs
|
||||
def <- lookupParamDef pid
|
||||
let (C.ParamDef tpid _) = def
|
||||
return (term, Just $ C.ParamType (C.ParamTypeId tpid))
|
||||
|
||||
-- | Lookup lintype for a function
|
||||
lookupLinType :: C.FunId -> Either String C.LinType
|
||||
lookupLinType funId = do
|
||||
fun <- m2e (printf "Cannot find type for: %s" (show funId)) (Map.lookup funId funMap)
|
||||
let (C.FunDef _ (C.Type _ (C.TypeApp catId _))) = fun
|
||||
lincat <- m2e (printf "Cannot find lincat for: %s" (show catId)) (Map.lookup catId lincatMap)
|
||||
let (C.LincatDef _ lt) = lincat
|
||||
return lt
|
||||
C.PredefValue (C.PredefId pid) -> case pid of
|
||||
"BIND" -> return (L.Bind, Nothing)
|
||||
"SOFT_BIND" -> return (L.Bind, Nothing)
|
||||
"SOFT_SPACE" -> return (L.Space, Nothing)
|
||||
"CAPIT" -> return (L.Capit, Nothing)
|
||||
"ALL_CAPIT" -> return (L.AllCapit, Nothing)
|
||||
_ -> Left $ printf "Unknown predef function: %s" pid
|
||||
|
||||
-- | Lookup lintype for a function's argument
|
||||
lookupLinTypeArg :: C.FunId -> Int -> Either String C.LinType
|
||||
lookupLinTypeArg funId argIx = do
|
||||
fun <- m2e (printf "Cannot find type for: %s" (show funId)) (Map.lookup funId funMap)
|
||||
let (C.FunDef _ (C.Type args _)) = fun
|
||||
let (C.TypeBinding _ (C.Type _ (C.TypeApp catId _))) = args !! argIx
|
||||
lincat <- m2e (printf "Cannot find lincat for: %s" (show catId)) (Map.lookup catId lincatMap)
|
||||
let (C.LincatDef _ lt) = lincat
|
||||
return lt
|
||||
C.RecordValue rrvs -> do
|
||||
let rrvs' = sortRecordRows rrvs
|
||||
ts <- sequence [ val2lin lv | C.RecordRow lid lv <- rrvs' ]
|
||||
return (L.Tuple (map fst ts), Just $ C.RecordType [ C.RecordRow lid lt | (C.RecordRow lid _, (_, Just lt)) <- zip rrvs' ts])
|
||||
|
||||
-- Code generation
|
||||
C.TableValue lt trvs -> do
|
||||
-- group the rows by "left-most" value
|
||||
let
|
||||
groupRow :: C.TableRowValue -> C.TableRowValue -> Bool
|
||||
groupRow (C.TableRow p1 _) (C.TableRow p2 _) = groupPattern p1 p2
|
||||
|
||||
-- | Main code generation function
|
||||
mkLin :: C.LinDef -> CodeGen (CId, L.LinFun)
|
||||
mkLin (C.LinDef funId varIds linValue) = do
|
||||
-- when debug $ trace funId
|
||||
(lf, _) <- val2lin' linValue --skip memoisation at top level
|
||||
return (fi2i funId, lf)
|
||||
where
|
||||
val2lin :: C.LinValue -> CodeGen (L.LinFun, Maybe C.LinType)
|
||||
val2lin lv@(C.TableValue _ _) = do
|
||||
-- val2lin lv@(C.ParamConstant _) = do
|
||||
m <- CMS.get
|
||||
case Map.lookup lv m of
|
||||
Just r -> return r
|
||||
Nothing -> do
|
||||
r <- val2lin' lv
|
||||
CMS.modify (Map.insert lv r)
|
||||
return r
|
||||
val2lin lv = val2lin' lv
|
||||
groupPattern :: C.LinPattern -> C.LinPattern -> Bool
|
||||
groupPattern p1 p2 = case (p1,p2) of
|
||||
(C.ParamPattern (C.Param pid1 _), C.ParamPattern (C.Param pid2 _)) -> pid1 == pid2 -- compare only constructors
|
||||
(C.RecordPattern (C.RecordRow lid1 patt1:_), C.RecordPattern (C.RecordRow lid2 patt2:_)) -> groupPattern patt1 patt2 -- lid1 == lid2 necessarily
|
||||
_ -> error $ printf "Mismatched patterns in grouping:\n%s\n%s" (show p1) (show p2)
|
||||
|
||||
val2lin' :: C.LinValue -> CodeGen (L.LinFun, Maybe C.LinType)
|
||||
val2lin' lv = case lv of
|
||||
grps :: [[C.TableRowValue]]
|
||||
grps = L.groupBy groupRow trvs
|
||||
|
||||
C.ConcatValue v1 v2 -> do
|
||||
(v1',t1) <- val2lin v1
|
||||
(v2',t2) <- val2lin v2
|
||||
return (L.Concat v1' v2', t1 <|> t2) -- t1 else t2
|
||||
|
||||
C.LiteralValue ll -> case ll of
|
||||
C.FloatConstant f -> return (L.Token $ T.pack $ show f, Just C.FloatType)
|
||||
C.IntConstant i -> return (L.Token $ T.pack $ show i, Just C.IntType)
|
||||
C.StrConstant s -> return (L.Token $ T.pack s, Just C.StrType)
|
||||
|
||||
C.ErrorValue err -> return (L.Error err, Nothing)
|
||||
|
||||
C.ParamConstant (C.Param pid lvs) -> do
|
||||
let
|
||||
collectProjections :: C.LinValue -> CodeGen [L.LinFun]
|
||||
collectProjections (C.ParamConstant (C.Param pid lvs)) = do
|
||||
def <- CMS.lift $ lookupParamDef pid
|
||||
let (C.ParamDef tpid defpids) = def
|
||||
pidIx <- CMS.lift $ eitherElemIndex pid [ p | C.Param p _ <- defpids ]
|
||||
rest <- mapM collectProjections lvs
|
||||
return $ L.Ix (pidIx+1) : concat rest
|
||||
collectProjections lv = do
|
||||
(lf,_) <- val2lin lv
|
||||
return [lf]
|
||||
lfs <- collectProjections lv
|
||||
let term = L.Tuple lfs
|
||||
def <- CMS.lift $ lookupParamDef pid
|
||||
let (C.ParamDef tpid _) = def
|
||||
return (term, Just $ C.ParamType (C.ParamTypeId tpid))
|
||||
|
||||
C.PredefValue (C.PredefId pid) -> case pid of
|
||||
"BIND" -> return (L.Bind, Nothing)
|
||||
"SOFT_BIND" -> return (L.Bind, Nothing)
|
||||
"SOFT_SPACE" -> return (L.Space, Nothing)
|
||||
"CAPIT" -> return (L.Capit, Nothing)
|
||||
"ALL_CAPIT" -> return (L.AllCapit, Nothing)
|
||||
_ -> CMS.lift $ Left $ printf "Unknown predef function: %s" pid
|
||||
|
||||
C.RecordValue rrvs -> do
|
||||
let rrvs' = sortRecordRows rrvs
|
||||
ts <- sequence [ val2lin lv | C.RecordRow lid lv <- rrvs' ]
|
||||
return (L.Tuple (map fst ts), Just $ C.RecordType [ C.RecordRow lid lt | (C.RecordRow lid _, (_, Just lt)) <- zip rrvs' ts])
|
||||
|
||||
C.TableValue lt trvs -> do
|
||||
-- group the rows by "left-most" value
|
||||
let
|
||||
groupRow :: C.TableRowValue -> C.TableRowValue -> Bool
|
||||
groupRow (C.TableRow p1 _) (C.TableRow p2 _) = groupPattern p1 p2
|
||||
|
||||
groupPattern :: C.LinPattern -> C.LinPattern -> Bool
|
||||
groupPattern p1 p2 = case (p1,p2) of
|
||||
(C.ParamPattern (C.Param pid1 _), C.ParamPattern (C.Param pid2 _)) -> pid1 == pid2 -- compare only constructors
|
||||
(C.RecordPattern (C.RecordRow lid1 patt1:_), C.RecordPattern (C.RecordRow lid2 patt2:_)) -> groupPattern patt1 patt2 -- lid1 == lid2 necessarily
|
||||
_ -> error $ printf "Mismatched patterns in grouping:\n%s\n%s" (show p1) (show p2)
|
||||
|
||||
grps :: [[C.TableRowValue]]
|
||||
grps = L.groupBy groupRow trvs
|
||||
|
||||
-- remove one level of depth and recurse
|
||||
let
|
||||
handleGroup :: [C.TableRowValue] -> CodeGen (L.LinFun, Maybe C.LinType)
|
||||
handleGroup [C.TableRow patt lv] =
|
||||
case reducePattern patt of
|
||||
Just patt' -> do
|
||||
(lf,lt) <- handleGroup [C.TableRow patt' lv]
|
||||
return (L.Tuple [lf],lt)
|
||||
Nothing -> val2lin lv
|
||||
handleGroup rows = do
|
||||
let rows' = map reduceRow rows
|
||||
val2lin (C.TableValue lt rows') -- lt is wrong here, but is unused
|
||||
|
||||
reducePattern :: C.LinPattern -> Maybe C.LinPattern
|
||||
reducePattern patt =
|
||||
case patt of
|
||||
C.ParamPattern (C.Param _ []) -> Nothing
|
||||
C.ParamPattern (C.Param _ patts) -> Just $ C.ParamPattern (C.Param pid' patts')
|
||||
where
|
||||
C.ParamPattern (C.Param pid1 patts1) = head patts
|
||||
pid' = pid1
|
||||
patts' = patts1 ++ tail patts
|
||||
|
||||
C.RecordPattern [] -> Nothing
|
||||
C.RecordPattern (C.RecordRow lid patt:rrs) ->
|
||||
-- remove one level of depth and recurse
|
||||
let
|
||||
handleGroup :: [C.TableRowValue] -> Either String (L.LinFun, Maybe C.LinType)
|
||||
handleGroup [C.TableRow patt lv] =
|
||||
case reducePattern patt of
|
||||
Just patt' -> Just $ C.RecordPattern (C.RecordRow lid patt':rrs)
|
||||
Nothing -> if null rrs then Nothing else Just $ C.RecordPattern rrs
|
||||
Just patt' -> do
|
||||
(lf,lt) <- handleGroup [C.TableRow patt' lv]
|
||||
return (L.Tuple [lf],lt)
|
||||
Nothing -> val2lin lv
|
||||
handleGroup rows = do
|
||||
let rows' = map reduceRow rows
|
||||
val2lin (C.TableValue lt rows') -- lt is wrong here, but is unused
|
||||
|
||||
_ -> error $ printf "Unhandled pattern in reducing: %s" (show patt)
|
||||
reducePattern :: C.LinPattern -> Maybe C.LinPattern
|
||||
reducePattern patt =
|
||||
case patt of
|
||||
C.ParamPattern (C.Param _ []) -> Nothing
|
||||
C.ParamPattern (C.Param _ patts) -> Just $ C.ParamPattern (C.Param pid' patts')
|
||||
where
|
||||
C.ParamPattern (C.Param pid1 patts1) = head patts
|
||||
pid' = pid1
|
||||
patts' = patts1 ++ tail patts
|
||||
|
||||
reduceRow :: C.TableRowValue -> C.TableRowValue
|
||||
reduceRow (C.TableRow patt lv) =
|
||||
let Just patt' = reducePattern patt
|
||||
in C.TableRow patt' lv
|
||||
C.RecordPattern [] -> Nothing
|
||||
C.RecordPattern (C.RecordRow lid patt:rrs) ->
|
||||
case reducePattern patt of
|
||||
Just patt' -> Just $ C.RecordPattern (C.RecordRow lid patt':rrs)
|
||||
Nothing -> if null rrs then Nothing else Just $ C.RecordPattern rrs
|
||||
|
||||
-- ts :: [(L.LinFun, Maybe C.LinType)]
|
||||
ts <- mapM handleGroup grps
|
||||
_ -> error $ printf "Unhandled pattern in reducing: %s" (show patt)
|
||||
|
||||
-- return
|
||||
let typ = case ts of
|
||||
(_, Just tst):_ -> Just $ C.TableType lt tst
|
||||
_ -> Nothing
|
||||
return (L.Tuple (map fst ts), typ)
|
||||
reduceRow :: C.TableRowValue -> C.TableRowValue
|
||||
reduceRow (C.TableRow patt lv) =
|
||||
let Just patt' = reducePattern patt
|
||||
in C.TableRow patt' lv
|
||||
|
||||
-- TODO TuplePattern, WildPattern?
|
||||
-- ts :: [(L.LinFun, Maybe C.LinType)]
|
||||
ts <- mapM handleGroup grps
|
||||
|
||||
C.TupleValue lvs -> do
|
||||
ts <- mapM val2lin lvs
|
||||
return (L.Tuple (map fst ts), Just $ C.TupleType (map (fromJust.snd) ts))
|
||||
-- return
|
||||
let typ = case ts of
|
||||
(_, Just tst):_ -> Just $ C.TableType lt tst
|
||||
_ -> Nothing
|
||||
return (L.Tuple (map fst ts), typ)
|
||||
|
||||
C.VariantValue [] -> return (L.Empty, Nothing) -- TODO Just C.StrType ?
|
||||
C.VariantValue (vr:_) -> val2lin vr -- NOTE variants not supported, just pick first
|
||||
-- TODO TuplePattern, WildPattern?
|
||||
|
||||
C.VarValue (C.VarValueId (C.Unqual v)) -> do
|
||||
ix <- CMS.lift $ eitherElemIndex (C.VarId v) varIds
|
||||
lt <- CMS.lift $ lookupLinTypeArg funId ix
|
||||
return (L.Argument (ix+1), Just lt)
|
||||
C.TupleValue lvs -> do
|
||||
ts <- mapM val2lin lvs
|
||||
return (L.Tuple (map fst ts), Just $ C.TupleType (map (fromJust.snd) ts))
|
||||
|
||||
C.PreValue pts df -> do
|
||||
pts' <- forM pts $ \(pfxs, lv) -> do
|
||||
(lv', _) <- val2lin lv
|
||||
return (map T.pack pfxs, lv')
|
||||
(df', lt) <- val2lin df
|
||||
return (L.Pre pts' df', lt)
|
||||
C.VariantValue [] -> return (L.Empty, Nothing) -- TODO Just C.StrType ?
|
||||
C.VariantValue (vr:_) -> val2lin vr -- NOTE variants not supported, just pick first
|
||||
|
||||
C.Projection v1 lblId -> do
|
||||
(v1', mtyp) <- val2lin v1
|
||||
-- find label index in argument type
|
||||
let Just (C.RecordType rrs) = mtyp
|
||||
let rrs' = [ lid | C.RecordRow lid _ <- rrs ]
|
||||
-- lblIx <- eitherElemIndex lblId rrs'
|
||||
let
|
||||
lblIx = case eitherElemIndex lblId rrs' of
|
||||
Right x -> x
|
||||
Left _ -> 0 -- corresponds to Prelude.False
|
||||
-- lookup lintype for record row
|
||||
let C.RecordRow _ lt = rrs !! lblIx
|
||||
return (L.Projection v1' (L.Ix (lblIx+1)), Just lt)
|
||||
C.VarValue (C.VarValueId (C.Unqual v)) -> do
|
||||
ix <- eitherElemIndex (C.VarId v) varIds
|
||||
lt <- lookupLinTypeArg funId ix
|
||||
return (L.Argument (ix+1), Just lt)
|
||||
|
||||
C.Selection v1 v2 -> do
|
||||
(v1', t1) <- val2lin v1
|
||||
(v2', t2) <- val2lin v2
|
||||
let Just (C.TableType t11 t12) = t1 -- t11 == t2
|
||||
return (L.Projection v1' v2', Just t12)
|
||||
C.PreValue pts df -> do
|
||||
pts' <- forM pts $ \(pfxs, lv) -> do
|
||||
(lv', _) <- val2lin lv
|
||||
return (pfxs, lv')
|
||||
(df', lt) <- val2lin df
|
||||
return (L.Pre pts' df', lt)
|
||||
|
||||
-- C.CommentedValue cmnt lv -> val2lin lv
|
||||
C.CommentedValue cmnt lv -> case cmnt of
|
||||
"impossible" -> val2lin lv >>= \(_, typ) -> return (L.Empty, typ)
|
||||
_ -> val2lin lv
|
||||
C.Projection v1 lblId -> do
|
||||
(v1', mtyp) <- val2lin v1
|
||||
-- find label index in argument type
|
||||
let Just (C.RecordType rrs) = mtyp
|
||||
let rrs' = [ lid | C.RecordRow lid _ <- rrs ]
|
||||
-- lblIx <- eitherElemIndex lblId rrs'
|
||||
let
|
||||
lblIx = case eitherElemIndex lblId rrs' of
|
||||
Right x -> x
|
||||
Left _ -> 0 -- corresponds to Prelude.False
|
||||
-- lookup lintype for record row
|
||||
let C.RecordRow _ lt = rrs !! lblIx
|
||||
return (L.Projection v1' (L.Ix (lblIx+1)), Just lt)
|
||||
|
||||
v -> CMS.lift $ Left $ printf "val2lin not implemented for: %s" (show v)
|
||||
C.Selection v1 v2 -> do
|
||||
(v1', t1) <- val2lin v1
|
||||
(v2', t2) <- val2lin v2
|
||||
let Just (C.TableType t11 t12) = t1 -- t11 == t2
|
||||
return (L.Projection v1' v2', Just t12)
|
||||
|
||||
-- Invoke code generation
|
||||
-- C.CommentedValue cmnt lv -> val2lin lv
|
||||
C.CommentedValue cmnt lv -> case cmnt of
|
||||
"impossible" -> val2lin lv >>= \(_, typ) -> return (L.Empty, typ)
|
||||
_ -> val2lin lv
|
||||
|
||||
v -> Left $ printf "val2lin not implemented for: %s" (show v)
|
||||
|
||||
unless (null $ lefts es) (raise $ unlines (lefts es))
|
||||
|
||||
let e = flip CMS.evalStateT Map.empty $ mapM mkLin lindefs
|
||||
case e of
|
||||
Left err -> raise err
|
||||
Right lins -> do
|
||||
let maybeOptimise = if debug then id else extractStrings
|
||||
let concr = maybeOptimise $ L.Concrete {
|
||||
L.toks = IntMapBuilder.emptyIntMap,
|
||||
L.lins = Map.fromList lins
|
||||
L.toks = IntMap.empty,
|
||||
L.lins = lins
|
||||
}
|
||||
return (mdi2i modId, concr)
|
||||
|
||||
type CodeGen a = CMS.StateT (Map.Map C.LinValue (L.LinFun, Maybe C.LinType)) (Either String) a
|
||||
|
||||
-- | Remove ParamAliasDefs by inlining their definitions
|
||||
inlineParamAliases :: [C.ParamDef] -> [C.ParamDef]
|
||||
inlineParamAliases defs = if null aliases then defs else map rp' pdefs
|
||||
@@ -364,12 +343,12 @@ extractStrings concr = L.Concrete { L.toks = toks', L.lins = lins' }
|
||||
(lins',imb') = CMS.runState (go0 (L.lins concr)) imb
|
||||
toks' = IntMapBuilder.toIntMap imb'
|
||||
|
||||
go0 :: Map.Map CId L.LinFun -> CMS.State (IntMapBuilder.IMB Text) (Map.Map CId L.LinFun)
|
||||
go0 :: Map.Map CId L.LinFun -> CMS.State (IntMapBuilder.IMB String) (Map.Map CId L.LinFun)
|
||||
go0 mp = do
|
||||
xs <- mapM (\(cid,lin) -> go lin >>= \lin' -> return (cid,lin')) (Map.toList mp)
|
||||
return $ Map.fromList xs
|
||||
|
||||
go :: L.LinFun -> CMS.State (IntMapBuilder.IMB Text) L.LinFun
|
||||
go :: L.LinFun -> CMS.State (IntMapBuilder.IMB String) L.LinFun
|
||||
go lf = case lf of
|
||||
L.Token str -> do
|
||||
imb <- CMS.get
|
||||
@@ -381,7 +360,7 @@ extractStrings concr = L.Concrete { L.toks = toks', L.lins = lins' }
|
||||
-- pts' <- mapM (\(pfxs,lv) -> go lv >>= \lv' -> return (pfxs,lv')) pts
|
||||
pts' <- forM pts $ \(pfxs,lv) -> do
|
||||
imb <- CMS.get
|
||||
let str = T.pack $ show pfxs
|
||||
let str = show pfxs
|
||||
let (ix,imb') = IntMapBuilder.insert' str imb
|
||||
CMS.put imb'
|
||||
lv' <- go lv
|
||||
|
||||
@@ -23,10 +23,6 @@ empty = IMB {
|
||||
valMap = HashMap.empty
|
||||
}
|
||||
|
||||
-- | An empty IntMap
|
||||
emptyIntMap :: IntMap a
|
||||
emptyIntMap = IntMap.empty
|
||||
|
||||
-- | Lookup a value
|
||||
lookup :: (Eq a, Hashable a) => a -> IMB a -> Maybe Int
|
||||
lookup a IMB { valMap = vm } = HashMap.lookup a vm
|
||||
|
||||
@@ -16,13 +16,12 @@ import PGF.Tree (Tree (..), expr2tree, prTree)
|
||||
import qualified Control.Exception as EX
|
||||
import Control.Monad (liftM, liftM2, forM_)
|
||||
import qualified Control.Monad.Writer as CMW
|
||||
import Data.Char (toUpper)
|
||||
import Data.Binary (Binary, put, get, putWord8, getWord8, encodeFile, decodeFile)
|
||||
import Data.Either (isLeft)
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Data.List (isPrefixOf)
|
||||
import qualified Data.Map.Strict as Map
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as TE
|
||||
import Text.Printf (printf)
|
||||
|
||||
import Prelude hiding ((!!))
|
||||
@@ -41,7 +40,7 @@ data Abstract = Abstract {
|
||||
|
||||
-- | Concrete syntax
|
||||
data Concrete = Concrete {
|
||||
toks :: IntMap.IntMap Text, -- ^ all strings are stored exactly once here
|
||||
toks :: IntMap.IntMap String, -- ^ all strings are stored exactly once here
|
||||
-- lincats :: Map.Map CId LinType, -- ^ a linearization type for each category
|
||||
lins :: Map.Map CId LinFun -- ^ a linearization function for each function
|
||||
} deriving (Show)
|
||||
@@ -65,12 +64,12 @@ data LinFun =
|
||||
| Space -- ^ space between adjacent tokens
|
||||
| Capit -- ^ capitalise next character
|
||||
| AllCapit -- ^ capitalise next word
|
||||
| Pre [([Text], LinFun)] LinFun
|
||||
| Pre [([String], LinFun)] LinFun
|
||||
| Missing CId -- ^ missing definition (inserted at runtime)
|
||||
|
||||
-- From original definition in paper
|
||||
| Empty
|
||||
| Token Text
|
||||
| Token String
|
||||
| Concat LinFun LinFun
|
||||
| Ix Int
|
||||
| Tuple [LinFun]
|
||||
@@ -158,10 +157,6 @@ instance Binary LinFun where
|
||||
14 -> liftM TokenIx get
|
||||
_ -> fail "Failed to decode LPGF binary format"
|
||||
|
||||
instance Binary Text where
|
||||
put = put . TE.encodeUtf8
|
||||
get = liftM TE.decodeUtf8 get
|
||||
|
||||
abstractName :: LPGF -> CId
|
||||
abstractName = absname
|
||||
|
||||
@@ -173,22 +168,14 @@ readLPGF = Data.Binary.decodeFile
|
||||
|
||||
-- | Main linearize function, to 'String'
|
||||
linearize :: LPGF -> Language -> Expr -> String
|
||||
linearize lpgf lang expr = T.unpack $ linearizeText lpgf lang expr
|
||||
|
||||
-- | Main linearize function, to 'Data.Text.Text'
|
||||
linearizeText :: LPGF -> Language -> Expr -> Text
|
||||
linearizeText lpgf lang =
|
||||
linearize lpgf lang =
|
||||
case Map.lookup lang (concretes lpgf) of
|
||||
Just concr -> linearizeConcreteText concr
|
||||
Just concr -> linearizeConcrete concr
|
||||
Nothing -> error $ printf "Unknown language: %s" (showCId lang)
|
||||
|
||||
-- | Language-specific linearize function, to 'String'
|
||||
linearizeConcrete :: Concrete -> Expr -> String
|
||||
linearizeConcrete concr expr = T.unpack $ linearizeConcreteText concr expr
|
||||
|
||||
-- | Language-specific linearize function, to 'Data.Text.Text'
|
||||
linearizeConcreteText :: Concrete -> Expr -> Text
|
||||
linearizeConcreteText concr expr = lin2string $ lin (expr2tree expr)
|
||||
linearizeConcrete concr expr = lin2string $ lin (expr2tree expr)
|
||||
where
|
||||
lin :: Tree -> LinFun
|
||||
lin tree = case tree of
|
||||
@@ -209,7 +196,7 @@ try comp = do
|
||||
-- | Evaluation context
|
||||
data Context = Context {
|
||||
cxArgs :: [LinFun], -- ^ is a sequence of terms
|
||||
cxToks :: IntMap.IntMap Text -- ^ token map
|
||||
cxToks :: IntMap.IntMap String -- ^ token map
|
||||
}
|
||||
|
||||
-- | Operational semantics
|
||||
@@ -238,7 +225,7 @@ eval cxt t = case t of
|
||||
|
||||
PreIx pts df -> Pre pts' df'
|
||||
where
|
||||
pts' = [(pfxs, eval cxt t) | (ix, t) <- pts, let pfxs = maybe [] (read . T.unpack) $ IntMap.lookup ix (cxToks cxt)]
|
||||
pts' = [(pfxs, eval cxt t) | (ix, t) <- pts, let pfxs = maybe [] read $ IntMap.lookup ix (cxToks cxt)]
|
||||
df' = eval cxt df
|
||||
TokenIx i -> maybe Empty Token $ IntMap.lookup i (cxToks cxt)
|
||||
|
||||
@@ -252,32 +239,32 @@ flattenTuple = \case
|
||||
-- | Turn concrete syntax terms into an actual string.
|
||||
-- This is done in two passes, first to flatten concats & evaluate pre's, then to
|
||||
-- apply BIND and other predefs.
|
||||
lin2string :: LinFun -> Text
|
||||
lin2string lf = T.unwords $ join $ flatten [lf]
|
||||
lin2string :: LinFun -> String
|
||||
lin2string lf = unwords $ join $ flatten [lf]
|
||||
where
|
||||
-- Process bind et al into final token list
|
||||
join :: [Either LinFun Text] -> [Text]
|
||||
join :: [Either LinFun String] -> [String]
|
||||
join elt = case elt of
|
||||
Right tok:Left Bind:ls ->
|
||||
case join ls of
|
||||
next:ls' -> tok `T.append` next : ls'
|
||||
next:ls' -> tok : next : ls'
|
||||
_ -> []
|
||||
Right tok:ls -> tok : join ls
|
||||
Left Space:ls -> join ls
|
||||
Left Capit:ls ->
|
||||
case join ls of
|
||||
next:ls' -> T.toUpper (T.take 1 next) `T.append` T.drop 1 next : ls'
|
||||
next:ls' -> (toUpper (head next) : tail next) : ls'
|
||||
_ -> []
|
||||
Left AllCapit:ls ->
|
||||
case join ls of
|
||||
next:ls' -> T.toUpper next : ls'
|
||||
next:ls' -> map toUpper next : ls'
|
||||
_ -> []
|
||||
Left (Missing cid):ls -> join (Right (T.pack (printf "[%s]" (show cid))) : ls)
|
||||
Left (Missing cid):ls -> join (Right (printf "[%s]" (show cid)) : ls)
|
||||
[] -> []
|
||||
x -> error $ printf "Unhandled term in lin2string: %s" (show x)
|
||||
|
||||
-- Process concats, tuples, pre into flat list
|
||||
flatten :: [LinFun] -> [Either LinFun Text]
|
||||
flatten :: [LinFun] -> [Either LinFun String]
|
||||
flatten [] = []
|
||||
flatten (l:ls) = case l of
|
||||
Empty -> flatten ls
|
||||
@@ -291,7 +278,7 @@ lin2string lf = T.unwords $ join $ flatten [lf]
|
||||
f = flatten ls
|
||||
ch = case dropWhile isLeft f of
|
||||
Right next:_ ->
|
||||
let matches = [ l | (pfxs, l) <- pts, any (`T.isPrefixOf` next) pfxs ]
|
||||
let matches = [ l | (pfxs, l) <- pts, any (`isPrefixOf` next) pfxs ]
|
||||
in if null matches then df else head matches
|
||||
_ -> df
|
||||
in flatten (ch:ls)
|
||||
@@ -336,7 +323,7 @@ instance PP LPGF where
|
||||
instance PP Concrete where
|
||||
pp (Concrete toks lins) = do
|
||||
forM_ (IntMap.toList toks) $ \(i,tok) ->
|
||||
CMW.tell [show i ++ " " ++ T.unpack tok]
|
||||
CMW.tell [show i ++ " " ++ tok]
|
||||
CMW.tell [""]
|
||||
forM_ (Map.toList lins) $ \(cid,lin) -> do
|
||||
CMW.tell ["# " ++ showCId cid]
|
||||
|
||||
@@ -86,150 +86,6 @@ stack exec -- hp2ps -c lpgf-bench.hp && open lpgf-bench.ps
|
||||
- http://book.realworldhaskell.org/read/profiling-and-optimization.html
|
||||
- https://wiki.haskell.org/Performance
|
||||
|
||||
|
||||
### Honing in
|
||||
|
||||
```
|
||||
stack build --test --bench --no-run-tests --no-run-benchmarks &&
|
||||
stack bench --benchmark-arguments "compile lpgf testsuite/lpgf/phrasebook/PhrasebookFre.gf +RTS -T -RTS"
|
||||
```
|
||||
|
||||
**Baseline PGF**
|
||||
- compile: 1.600776s
|
||||
- size: 2.88 MB Phrasebook.pgf
|
||||
Max memory: 328.20 MB
|
||||
|
||||
**Baseline LPGF = B**
|
||||
- compile: 12.401099s
|
||||
- size: 3.01 MB Phrasebook.lpgf
|
||||
Max memory: 1.33 GB
|
||||
|
||||
**Baseline LPGF String**
|
||||
- compile: 12.124689s
|
||||
- size: 3.01 MB Phrasebook.lpgf
|
||||
Max memory: 1.34 GB
|
||||
|
||||
**B -extractStrings**
|
||||
- compile: 13.822735s
|
||||
- size: 5.78 MB Phrasebook.lpgf
|
||||
Max memory: 1.39 GB
|
||||
|
||||
**B -cleanupRecordFields**
|
||||
- compile: 13.670776s
|
||||
- size: 3.01 MB Phrasebook.lpgf
|
||||
Max memory: 1.48 GB
|
||||
|
||||
**No generation at all = E**
|
||||
- compile: 0.521001s
|
||||
- size: 3.27 KB Phrasebook.lpgf
|
||||
Max memory: 230.69 MB
|
||||
|
||||
**+ Concat, Literal, Error, Predef, Tuple, Variant, Commented**
|
||||
- compile: 1.503594s
|
||||
- size: 3.27 KB Phrasebook.lpgf
|
||||
Max memory: 395.31 MB
|
||||
|
||||
**+ Var, Pre, Selection**
|
||||
- compile: 1.260184s
|
||||
- size: 3.28 KB Phrasebook.lpgf
|
||||
Max memory: 392.17 MB
|
||||
|
||||
**+ Record**
|
||||
- compile: 1.659233s
|
||||
- size: 7.07 KB Phrasebook.lpgf
|
||||
Max memory: 397.41 MB
|
||||
|
||||
**+ Projection = X**
|
||||
- compile: 1.446217s
|
||||
- size: 7.94 KB Phrasebook.lpgf
|
||||
Max memory: 423.62 MB
|
||||
|
||||
**X + Param**
|
||||
- compile: 2.073838s
|
||||
- size: 10.82 KB Phrasebook.lpgf
|
||||
Max memory: 619.71 MB
|
||||
|
||||
**X + Table**
|
||||
- compile: 11.26558s
|
||||
- size: 2.48 MB Phrasebook.lpgf
|
||||
Max memory: 1.15 GB
|
||||
|
||||
### Repeated terms in compilation
|
||||
|
||||
**Param and Table**
|
||||
|
||||
| Concr | Total | Unique | Perc |
|
||||
|:--------------|-------:|-------:|-----:|
|
||||
| PhrasebookEng | 8673 | 1724 | 20% |
|
||||
| PhrasebookSwe | 14802 | 2257 | 15% |
|
||||
| PhrasebookFin | 526225 | 4866 | 1% |
|
||||
|
||||
**Param**
|
||||
|
||||
| Concr | Total | Unique | Perc |
|
||||
|:--------------|-------:|-------:|-----:|
|
||||
| PhrasebookEng | 3211 | 78 | 2% |
|
||||
| PhrasebookSwe | 7567 | 69 | 1% |
|
||||
| PhrasebookFin | 316355 | 310 | 0.1% |
|
||||
|
||||
**Table**
|
||||
|
||||
| Concr | Total | Unique | Perc |
|
||||
|:--------------|-------:|-------:|-----:|
|
||||
| PhrasebookEng | 5470 | 1654 | 30% |
|
||||
| PhrasebookSwe | 7243 | 2196 | 30% |
|
||||
| PhrasebookFin | 209878 | 4564 | 2% |
|
||||
|
||||
### After impelementing state monad for table memoisation
|
||||
|
||||
**worse!**
|
||||
- compile: 12.55848s
|
||||
- size: 3.01 MB Phrasebook.lpgf
|
||||
Max memory: 2.25 GB
|
||||
|
||||
**Params**
|
||||
|
||||
| Concr | Total | Misses | Perc |
|
||||
|:--------------|-------:|-------:|------:|
|
||||
| PhrasebookEng | 3211 | 72 | 2% |
|
||||
| PhrasebookSwe | 7526 | 61 | 1% |
|
||||
| PhrasebookFin | 135268 | 333 | 0.2% |
|
||||
| PhrasebookFre | 337102 | 76 | 0.02% |
|
||||
|
||||
_modify instead of insert_
|
||||
|
||||
| Concr | Total | Misses | Perc |
|
||||
|:--------------|-------:|-------:|------:|
|
||||
| PhrasebookEng | 3211 | 70 | 2% |
|
||||
| PhrasebookSwe | 7526 | 61 | 1% |
|
||||
| PhrasebookFin | 135268 | 302 | 0.2% |
|
||||
| PhrasebookFre | 337102 | 72 | 0.02% |
|
||||
|
||||
**Tables**
|
||||
|
||||
| Concr | Total | Misses | Perc |
|
||||
|:--------------|------:|-------:|-----:|
|
||||
| PhrasebookEng | 3719 | 3170 | 85% |
|
||||
| PhrasebookSwe | 4031 | 3019 | 75% |
|
||||
| PhrasebookFin | 36875 | 21730 | 59% |
|
||||
| PhrasebookFre | 41397 | 32967 | 80% |
|
||||
|
||||
_modify instead of insert_
|
||||
|
||||
| Concr | Total | Misses | Perc |
|
||||
|:--------------|------:|-------:|-----:|
|
||||
| PhrasebookEng | 2415 | 1646 | 68% |
|
||||
| PhrasebookSwe | 3228 | 2188 | 68% |
|
||||
| PhrasebookFin | 8793 | 4556 | 52% |
|
||||
| PhrasebookFre | 12490 | 5793 | 46% |
|
||||
|
||||
Conclusions:
|
||||
- map itself requires more memory than acual compilation
|
||||
- lookup is also as slow as actual compilation
|
||||
|
||||
Tried HashMap (deriving Hashable for LinValue), no inprovement.
|
||||
Using show on LinValue for keys is incredibly slow.
|
||||
|
||||
# Notes on compilation
|
||||
|
||||
## 1 (see unittests/Params4)
|
||||
|
||||
@@ -14,7 +14,6 @@ import Data.Either (isLeft)
|
||||
import qualified Data.List as L
|
||||
import Data.Maybe (fromJust, isJust, isNothing)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Text (Text)
|
||||
import Data.Time.Clock (getCurrentTime, diffUTCTime)
|
||||
import System.Console.ANSI
|
||||
import System.Directory (listDirectory, getFileSize)
|
||||
@@ -155,13 +154,13 @@ linPGF2 :: PGF2.PGF -> [PGF2.Expr] -> [[String]]
|
||||
linPGF2 pgf trees =
|
||||
[ map (PGF2.linearize concr) trees | (_, concr) <- Map.toList (PGF2.languages pgf) ]
|
||||
|
||||
linLPGF :: LPGF.LPGF -> [PGF.Expr] -> [[Text]]
|
||||
linLPGF :: LPGF.LPGF -> [PGF.Expr] -> [[String]]
|
||||
linLPGF lpgf trees =
|
||||
[ map (LPGF.linearizeConcreteText concr) trees | (_,concr) <- Map.toList (LPGF.concretes lpgf) ]
|
||||
[ map (LPGF.linearizeConcrete concr) trees | (_,concr) <- Map.toList (LPGF.concretes lpgf) ]
|
||||
|
||||
linLPGF' :: LPGF.LPGF -> [PGF.Expr] -> IO [[Either String Text]]
|
||||
linLPGF' :: LPGF.LPGF -> [PGF.Expr] -> IO [[Either String String]]
|
||||
linLPGF' lpgf trees =
|
||||
forM (Map.toList (LPGF.concretes lpgf)) $ \(_,concr) -> mapM (LPGF.try . LPGF.linearizeConcreteText concr) trees
|
||||
forM (Map.toList (LPGF.concretes lpgf)) $ \(_,concr) -> mapM (LPGF.try . LPGF.linearizeConcrete concr) trees
|
||||
|
||||
-- | Produce human readable file size
|
||||
-- Adapted from https://hackage.haskell.org/package/hrfsize
|
||||
|
||||
Reference in New Issue
Block a user