forked from GitHub/gf-core
448 lines
18 KiB
Haskell
448 lines
18 KiB
Haskell
module GF.Compile.GrammarToLPGF (mkCanon2lpgf) where
|
|
|
|
import LPGF (LPGF (..))
|
|
import qualified LPGF as L
|
|
|
|
import PGF.CId
|
|
import GF.Grammar.Grammar
|
|
import qualified GF.Grammar.Canonical as C
|
|
import GF.Compile.GrammarToCanonical (grammar2canonical)
|
|
|
|
import GF.Data.Operations (ErrorMonad (..))
|
|
import qualified GF.Data.IntMapBuilder as IntMapBuilder
|
|
import GF.Infra.Option (Options)
|
|
import GF.Infra.UseIO (IOE)
|
|
import GF.Text.Pretty (pp, render)
|
|
|
|
import Control.Applicative ((<|>))
|
|
import Control.Monad (when, unless, forM, forM_)
|
|
import qualified Control.Monad.State as CMS
|
|
import Data.Either (lefts, rights)
|
|
import qualified Data.IntMap as IntMap
|
|
import Data.List (elemIndex)
|
|
import qualified Data.List as L
|
|
import qualified Data.Map.Strict as Map
|
|
import Data.Maybe (fromJust, isJust)
|
|
import System.Environment (lookupEnv)
|
|
import System.FilePath ((</>), (<.>))
|
|
import Text.Printf (printf)
|
|
|
|
import qualified Debug.Trace
|
|
trace x = Debug.Trace.trace ("> " ++ show x) (return ())
|
|
|
|
mkCanon2lpgf :: Options -> SourceGrammar -> ModuleName -> IOE LPGF
|
|
mkCanon2lpgf opts gr am = do
|
|
debug <- isJust <$> lookupEnv "DEBUG"
|
|
when debug $ do
|
|
ppCanonical debugDir canon
|
|
dumpCanonical debugDir canon
|
|
(an,abs) <- mkAbstract ab
|
|
cncs <- mapM (mkConcrete debug) cncs
|
|
let lpgf = LPGF {
|
|
L.absname = an,
|
|
L.abstract = abs,
|
|
L.concretes = Map.fromList cncs
|
|
}
|
|
when debug $ ppLPGF debugDir lpgf
|
|
return lpgf
|
|
where
|
|
canon@(C.Grammar ab cncs) = grammar2canonical opts am gr
|
|
|
|
mkAbstract :: (ErrorMonad err) => C.Abstract -> err (CId, L.Abstract)
|
|
mkAbstract (C.Abstract modId flags cats funs) = return (mdi2i modId, L.Abstract {})
|
|
|
|
mkConcrete :: (ErrorMonad err) => Bool -> C.Concrete -> err (CId, L.Concrete)
|
|
mkConcrete debug (C.Concrete modId absModId flags params' lincats lindefs) = do
|
|
let
|
|
(C.Abstract _ _ _ funs) = ab
|
|
params = inlineParamAliases params'
|
|
|
|
-- Builds maps for lookups
|
|
|
|
paramValueMap :: Map.Map C.ParamId C.ParamDef -- constructor -> definition
|
|
paramValueMap = Map.fromList [ (v,d) | d@(C.ParamDef _ vs) <- params, (C.Param v _) <- vs ]
|
|
|
|
lincatMap :: Map.Map C.CatId C.LincatDef
|
|
lincatMap = Map.fromList [ (cid,d) | d@(C.LincatDef cid _) <- lincats ]
|
|
|
|
funMap :: Map.Map C.FunId C.FunDef
|
|
funMap = Map.fromList [ (fid,d) | d@(C.FunDef fid _) <- funs ]
|
|
|
|
-- | Lookup paramdef, providing dummy fallback when not found
|
|
-- Workaround for https://github.com/GrammaticalFramework/gf-core/issues/100
|
|
lookupParamDef :: C.ParamId -> Either String C.ParamDef
|
|
lookupParamDef pid = case Map.lookup pid paramValueMap of
|
|
Just d -> Right d
|
|
Nothing ->
|
|
-- Left $ printf "Cannot find param definition: %s" (show pid)
|
|
Right $ C.ParamDef (C.ParamId (C.Unqual "DUMMY")) [C.Param pid []]
|
|
|
|
-- | Lookup lintype for a function
|
|
lookupLinType :: C.FunId -> Either String C.LinType
|
|
lookupLinType funId = do
|
|
fun <- m2e (printf "Cannot find type for: %s" (show funId)) (Map.lookup funId funMap)
|
|
let (C.FunDef _ (C.Type _ (C.TypeApp catId _))) = fun
|
|
lincat <- m2e (printf "Cannot find lincat for: %s" (show catId)) (Map.lookup catId lincatMap)
|
|
let (C.LincatDef _ lt) = lincat
|
|
return lt
|
|
|
|
-- | Lookup lintype for a function's argument
|
|
lookupLinTypeArg :: C.FunId -> Int -> Either String C.LinType
|
|
lookupLinTypeArg funId argIx = do
|
|
fun <- m2e (printf "Cannot find type for: %s" (show funId)) (Map.lookup funId funMap)
|
|
let (C.FunDef _ (C.Type args _)) = fun
|
|
let (C.TypeBinding _ (C.Type _ (C.TypeApp catId _))) = args !! argIx
|
|
lincat <- m2e (printf "Cannot find lincat for: %s" (show catId)) (Map.lookup catId lincatMap)
|
|
let (C.LincatDef _ lt) = lincat
|
|
return lt
|
|
|
|
-- Filter out record fields from definitions which don't appear in lincat.
|
|
-- Workaround for https://github.com/GrammaticalFramework/gf-core/issues/101
|
|
cleanupRecordFields :: C.LinValue -> C.LinType -> C.LinValue
|
|
cleanupRecordFields (C.RecordValue rrvs) (C.RecordType rrs) =
|
|
let defnFields = Map.fromList [ (lid, lt) | (C.RecordRow lid lt) <- rrs ]
|
|
in C.RecordValue
|
|
[ C.RecordRow lid lv'
|
|
| C.RecordRow lid lv <- rrvs
|
|
, Map.member lid defnFields
|
|
, let Just lt = Map.lookup lid defnFields
|
|
, let lv' = cleanupRecordFields lv lt
|
|
]
|
|
cleanupRecordFields lv _ = lv
|
|
|
|
lindefs' =
|
|
[ C.LinDef funId varIds linValue'
|
|
| (C.LinDef funId varIds linValue) <- lindefs
|
|
, let Right linType = lookupLinType funId
|
|
, let linValue' = cleanupRecordFields linValue linType
|
|
]
|
|
es = map mkLin lindefs'
|
|
lins = Map.fromList $ rights es
|
|
|
|
-- | Main code generation function
|
|
mkLin :: C.LinDef -> Either String (CId, L.LinFun)
|
|
mkLin (C.LinDef funId varIds linValue) = do
|
|
-- when debug $ trace funId
|
|
(lf, _) <- val2lin linValue
|
|
return (fi2i funId, lf)
|
|
where
|
|
val2lin :: C.LinValue -> Either String (L.LinFun, Maybe C.LinType)
|
|
val2lin lv = case lv of
|
|
|
|
C.ConcatValue v1 v2 -> do
|
|
(v1',t1) <- val2lin v1
|
|
(v2',t2) <- val2lin v2
|
|
return (L.Concat v1' v2', t1 <|> t2) -- t1 else t2
|
|
|
|
C.LiteralValue ll -> case ll of
|
|
C.FloatConstant f -> return (L.Token $ show f, Just C.FloatType)
|
|
C.IntConstant i -> return (L.Token $ show i, Just C.IntType)
|
|
C.StrConstant s -> return (L.Token s, Just C.StrType)
|
|
|
|
C.ErrorValue err -> return (L.Error err, Nothing)
|
|
|
|
C.ParamConstant (C.Param pid lvs) -> do
|
|
let
|
|
collectProjections :: C.LinValue -> Either String [L.LinFun]
|
|
collectProjections (C.ParamConstant (C.Param pid lvs)) = do
|
|
def <- lookupParamDef pid
|
|
let (C.ParamDef tpid defpids) = def
|
|
pidIx <- eitherElemIndex pid [ p | C.Param p _ <- defpids ]
|
|
rest <- mapM collectProjections lvs
|
|
return $ L.Ix (pidIx+1) : concat rest
|
|
collectProjections lv = do
|
|
(lf,_) <- val2lin lv
|
|
return [lf]
|
|
lfs <- collectProjections lv
|
|
let term = L.Tuple lfs
|
|
def <- lookupParamDef pid
|
|
let (C.ParamDef tpid _) = def
|
|
return (term, Just $ C.ParamType (C.ParamTypeId tpid))
|
|
|
|
C.PredefValue (C.PredefId pid) -> case pid of
|
|
"BIND" -> return (L.Bind, Nothing)
|
|
"SOFT_BIND" -> return (L.Bind, Nothing)
|
|
"SOFT_SPACE" -> return (L.Space, Nothing)
|
|
"CAPIT" -> return (L.Capit, Nothing)
|
|
"ALL_CAPIT" -> return (L.AllCapit, Nothing)
|
|
_ -> Left $ printf "Unknown predef function: %s" pid
|
|
|
|
C.RecordValue rrvs -> do
|
|
let rrvs' = sortRecordRows rrvs
|
|
ts <- sequence [ val2lin lv | C.RecordRow lid lv <- rrvs' ]
|
|
return (L.Tuple (map fst ts), Just $ C.RecordType [ C.RecordRow lid lt | (C.RecordRow lid _, (_, Just lt)) <- zip rrvs' ts])
|
|
|
|
C.TableValue lt trvs -> do
|
|
-- group the rows by "left-most" value
|
|
let
|
|
groupRow :: C.TableRowValue -> C.TableRowValue -> Bool
|
|
groupRow (C.TableRow p1 _) (C.TableRow p2 _) = groupPattern p1 p2
|
|
|
|
groupPattern :: C.LinPattern -> C.LinPattern -> Bool
|
|
groupPattern p1 p2 = case (p1,p2) of
|
|
(C.ParamPattern (C.Param pid1 _), C.ParamPattern (C.Param pid2 _)) -> pid1 == pid2 -- compare only constructors
|
|
(C.RecordPattern (C.RecordRow lid1 patt1:_), C.RecordPattern (C.RecordRow lid2 patt2:_)) -> groupPattern patt1 patt2 -- lid1 == lid2 necessarily
|
|
_ -> error $ printf "Mismatched patterns in grouping:\n%s\n%s" (show p1) (show p2)
|
|
|
|
grps :: [[C.TableRowValue]]
|
|
grps = L.groupBy groupRow trvs
|
|
|
|
-- remove one level of depth and recurse
|
|
let
|
|
handleGroup :: [C.TableRowValue] -> Either String (L.LinFun, Maybe C.LinType)
|
|
handleGroup [C.TableRow patt lv] =
|
|
case reducePattern patt of
|
|
Just patt' -> do
|
|
(lf,lt) <- handleGroup [C.TableRow patt' lv]
|
|
return (L.Tuple [lf],lt)
|
|
Nothing -> val2lin lv
|
|
handleGroup rows = do
|
|
let rows' = map reduceRow rows
|
|
val2lin (C.TableValue lt rows') -- lt is wrong here, but is unused
|
|
|
|
reducePattern :: C.LinPattern -> Maybe C.LinPattern
|
|
reducePattern patt =
|
|
case patt of
|
|
C.ParamPattern (C.Param _ []) -> Nothing
|
|
C.ParamPattern (C.Param _ patts) -> Just $ C.ParamPattern (C.Param pid' patts')
|
|
where
|
|
C.ParamPattern (C.Param pid1 patts1) = head patts
|
|
pid' = pid1
|
|
patts' = patts1 ++ tail patts
|
|
|
|
C.RecordPattern [] -> Nothing
|
|
C.RecordPattern (C.RecordRow lid patt:rrs) ->
|
|
case reducePattern patt of
|
|
Just patt' -> Just $ C.RecordPattern (C.RecordRow lid patt':rrs)
|
|
Nothing -> if null rrs then Nothing else Just $ C.RecordPattern rrs
|
|
|
|
_ -> error $ printf "Unhandled pattern in reducing: %s" (show patt)
|
|
|
|
reduceRow :: C.TableRowValue -> C.TableRowValue
|
|
reduceRow (C.TableRow patt lv) =
|
|
let Just patt' = reducePattern patt
|
|
in C.TableRow patt' lv
|
|
|
|
-- ts :: [(L.LinFun, Maybe C.LinType)]
|
|
ts <- mapM handleGroup grps
|
|
|
|
-- return
|
|
let typ = case ts of
|
|
(_, Just tst):_ -> Just $ C.TableType lt tst
|
|
_ -> Nothing
|
|
return (L.Tuple (map fst ts), typ)
|
|
|
|
-- TODO TuplePattern, WildPattern?
|
|
|
|
C.TupleValue lvs -> do
|
|
ts <- mapM val2lin lvs
|
|
return (L.Tuple (map fst ts), Just $ C.TupleType (map (fromJust.snd) ts))
|
|
|
|
C.VariantValue [] -> return (L.Empty, Nothing) -- TODO Just C.StrType ?
|
|
C.VariantValue (vr:_) -> val2lin vr -- NOTE variants not supported, just pick first
|
|
|
|
C.VarValue (C.VarValueId (C.Unqual v)) -> do
|
|
ix <- eitherElemIndex (C.VarId v) varIds
|
|
lt <- lookupLinTypeArg funId ix
|
|
return (L.Argument (ix+1), Just lt)
|
|
|
|
C.PreValue pts df -> do
|
|
pts' <- forM pts $ \(pfxs, lv) -> do
|
|
(lv', _) <- val2lin lv
|
|
return (pfxs, lv')
|
|
(df', lt) <- val2lin df
|
|
return (L.Pre pts' df', lt)
|
|
|
|
C.Projection v1 lblId -> do
|
|
(v1', mtyp) <- val2lin v1
|
|
-- find label index in argument type
|
|
let Just (C.RecordType rrs) = mtyp
|
|
let rrs' = [ lid | C.RecordRow lid _ <- rrs ]
|
|
-- lblIx <- eitherElemIndex lblId rrs'
|
|
let
|
|
lblIx = case eitherElemIndex lblId rrs' of
|
|
Right x -> x
|
|
Left _ -> 0 -- corresponds to Prelude.False
|
|
-- lookup lintype for record row
|
|
let C.RecordRow _ lt = rrs !! lblIx
|
|
return (L.Projection v1' (L.Ix (lblIx+1)), Just lt)
|
|
|
|
C.Selection v1 v2 -> do
|
|
(v1', t1) <- val2lin v1
|
|
(v2', t2) <- val2lin v2
|
|
let Just (C.TableType t11 t12) = t1 -- t11 == t2
|
|
return (L.Projection v1' v2', Just t12)
|
|
|
|
-- C.CommentedValue cmnt lv -> val2lin lv
|
|
C.CommentedValue cmnt lv -> case cmnt of
|
|
"impossible" -> val2lin lv >>= \(_, typ) -> return (L.Empty, typ)
|
|
_ -> val2lin lv
|
|
|
|
v -> Left $ printf "val2lin not implemented for: %s" (show v)
|
|
|
|
unless (null $ lefts es) (raise $ unlines (lefts es))
|
|
|
|
let maybeOptimise = if debug then id else extractStrings
|
|
let concr = maybeOptimise $ L.Concrete {
|
|
L.toks = IntMap.empty,
|
|
L.lins = lins
|
|
}
|
|
return (mdi2i modId, concr)
|
|
|
|
-- | Remove ParamAliasDefs by inlining their definitions
|
|
inlineParamAliases :: [C.ParamDef] -> [C.ParamDef]
|
|
inlineParamAliases defs = if null aliases then defs else map rp' pdefs
|
|
where
|
|
(aliases,pdefs) = L.partition isParamAliasDef defs
|
|
|
|
rp' :: C.ParamDef -> C.ParamDef
|
|
rp' (C.ParamDef pid pids) = C.ParamDef pid (map rp'' pids)
|
|
rp' (C.ParamAliasDef _ _) = error "inlineParamAliases called on ParamAliasDef" -- impossible
|
|
|
|
rp'' :: C.ParamValueDef -> C.ParamValueDef
|
|
rp'' (C.Param pid pids) = C.Param pid (map rp''' pids)
|
|
|
|
rp''' :: C.ParamId -> C.ParamId
|
|
rp''' pid = case L.find (\(C.ParamAliasDef p _) -> p == pid) aliases of
|
|
Just (C.ParamAliasDef _ (C.ParamType (C.ParamTypeId p))) -> p
|
|
_ -> pid
|
|
|
|
-- | Always put 's' reocord field first, then sort alphabetically.
|
|
-- Workaround for https://github.com/GrammaticalFramework/gf-core/issues/102
|
|
-- Based on GF.Granmar.Macros.sortRec
|
|
sortRecordRows :: [C.RecordRowValue] -> [C.RecordRowValue]
|
|
sortRecordRows = L.sortBy ordLabel
|
|
where
|
|
ordLabel (C.RecordRow (C.LabelId l1) _) (C.RecordRow (C.LabelId l2) _) =
|
|
case (l1,l2) of
|
|
("s",_) -> LT
|
|
(_,"s") -> GT
|
|
(s1,s2) -> compare s1 s2
|
|
|
|
-- sortRecord :: C.LinValue -> C.LinValue
|
|
-- sortRecord (C.RecordValue rrvs) = C.RecordValue (sortRecordRows rrvs)
|
|
-- sortRecord lv = lv
|
|
|
|
isParamAliasDef :: C.ParamDef -> Bool
|
|
isParamAliasDef (C.ParamAliasDef _ _) = True
|
|
isParamAliasDef _ = False
|
|
|
|
isParamType :: C.LinType -> Bool
|
|
isParamType (C.ParamType _) = True
|
|
isParamType _ = False
|
|
|
|
isRecordType :: C.LinType -> Bool
|
|
isRecordType (C.RecordType _) = True
|
|
isRecordType _ = False
|
|
|
|
-- | Find all token strings, put them in a map and replace with token indexes
|
|
extractStrings :: L.Concrete -> L.Concrete
|
|
extractStrings concr = L.Concrete { L.toks = toks', L.lins = lins' }
|
|
where
|
|
imb = IntMapBuilder.fromIntMap (L.toks concr)
|
|
(lins',imb') = CMS.runState (go0 (L.lins concr)) imb
|
|
toks' = IntMapBuilder.toIntMap imb'
|
|
|
|
go0 :: Map.Map CId L.LinFun -> CMS.State (IntMapBuilder.IMB String) (Map.Map CId L.LinFun)
|
|
go0 mp = do
|
|
xs <- mapM (\(cid,lin) -> go lin >>= \lin' -> return (cid,lin')) (Map.toList mp)
|
|
return $ Map.fromList xs
|
|
|
|
go :: L.LinFun -> CMS.State (IntMapBuilder.IMB String) L.LinFun
|
|
go lf = case lf of
|
|
L.Token str -> do
|
|
imb <- CMS.get
|
|
let (ix,imb') = IntMapBuilder.insert' str imb
|
|
CMS.put imb'
|
|
return $ L.TokenIx ix
|
|
|
|
L.Pre pts df -> do
|
|
-- pts' <- mapM (\(pfxs,lv) -> go lv >>= \lv' -> return (pfxs,lv')) pts
|
|
pts' <- forM pts $ \(pfxs,lv) -> do
|
|
imb <- CMS.get
|
|
let str = show pfxs
|
|
let (ix,imb') = IntMapBuilder.insert' str imb
|
|
CMS.put imb'
|
|
lv' <- go lv
|
|
return (ix,lv')
|
|
df' <- go df
|
|
return $ L.PreIx pts' df'
|
|
L.Concat s t -> do
|
|
s' <- go s
|
|
t' <- go t
|
|
return $ L.Concat s' t'
|
|
L.Tuple ts -> do
|
|
ts' <- mapM go ts
|
|
return $ L.Tuple ts'
|
|
L.Projection t u -> do
|
|
t' <- go t
|
|
u' <- go u
|
|
return $ L.Projection t' u'
|
|
t -> return t
|
|
|
|
-- | Convert Maybe to Either value with error
|
|
m2e :: String -> Maybe a -> Either String a
|
|
m2e err = maybe (Left err) Right
|
|
|
|
-- | Wrap elemIndex into Either value
|
|
eitherElemIndex :: (Eq a, Show a) => a -> [a] -> Either String Int
|
|
eitherElemIndex x xs = m2e (printf "Cannot find: %s in %s" (show x) (show xs)) (elemIndex x xs)
|
|
|
|
mdi2s :: C.ModId -> String
|
|
mdi2s (C.ModId i) = i
|
|
|
|
mdi2i :: C.ModId -> CId
|
|
mdi2i (C.ModId i) = mkCId i
|
|
|
|
fi2i :: C.FunId -> CId
|
|
fi2i (C.FunId i) = mkCId i
|
|
|
|
-- Debugging
|
|
|
|
debugDir :: FilePath
|
|
debugDir = "DEBUG"
|
|
|
|
-- | Pretty-print canonical grammars to file
|
|
ppCanonical :: FilePath -> C.Grammar -> IO ()
|
|
ppCanonical path (C.Grammar ab cncs) = do
|
|
let (C.Abstract modId flags cats funs) = ab
|
|
writeFile (path </> mdi2s modId <.> "canonical.gf") (render $ pp ab)
|
|
forM_ cncs $ \cnc@(C.Concrete modId absModId flags params lincats lindefs) ->
|
|
writeFile' (path </> mdi2s modId <.> "canonical.gf") (render $ pp cnc)
|
|
|
|
-- | Dump canonical grammars to file
|
|
dumpCanonical :: FilePath -> C.Grammar -> IO ()
|
|
dumpCanonical path (C.Grammar ab cncs) = do
|
|
let (C.Abstract modId flags cats funs) = ab
|
|
let body = unlines $ map show cats ++ [""] ++ map show funs
|
|
writeFile' (path </> mdi2s modId <.> "canonical.dump") body
|
|
|
|
forM_ cncs $ \(C.Concrete modId absModId flags params lincats lindefs) -> do
|
|
let body = unlines $ concat [
|
|
map show params,
|
|
[""],
|
|
map show lincats,
|
|
[""],
|
|
map show lindefs
|
|
]
|
|
writeFile' (path </> mdi2s modId <.> "canonical.dump") body
|
|
|
|
-- | Pretty-print LPGF to file
|
|
ppLPGF :: FilePath -> LPGF -> IO ()
|
|
ppLPGF path lpgf =
|
|
forM_ (Map.toList $ L.concretes lpgf) $ \(cid,concr) ->
|
|
writeFile' (path </> showCId cid <.> "lpgf.txt") (L.render $ L.pp concr)
|
|
|
|
-- | Dump LPGF to file
|
|
dumpLPGF :: FilePath -> LPGF -> IO ()
|
|
dumpLPGF path lpgf =
|
|
forM_ (Map.toList $ L.concretes lpgf) $ \(cid,concr) -> do
|
|
let body = unlines $ map show (Map.toList $ L.lins concr)
|
|
writeFile' (path </> showCId cid <.> "lpgf.dump") body
|
|
|
|
-- | Write a file and report it to console
|
|
writeFile' :: FilePath -> String -> IO ()
|
|
writeFile' p b = do
|
|
writeFile p b
|
|
putStrLn $ "Wrote " ++ p
|