Refactoring in GF.Compile and GF.ReadFiles with an eye to parallel compilation

In particular, the function compileOne has been moved to the new module
GF.CompileOne and its type has been changed from

    compileOne :: ... -> CompileEnv -> FilePath -> IOE CompileEnv

to

    compileOne :: ... -> SourceGrammar -> FilePath -> IOE OneCompiledModule

making it more suitable for use in a parallel compiler.
This commit is contained in:
hallgren
2014-08-13 16:46:11 +00:00
parent 70051a375b
commit 6215fc941f
4 changed files with 250 additions and 204 deletions

View File

@@ -1,41 +1,29 @@
module GF.Compile (batchCompile, link, srcAbsName, compileToPGF) where
import Prelude hiding (catch)
import GF.System.Catch
-- the main compiler passes
import GF.Compile.GetGrammar
import GF.Compile.Rename
import GF.Compile.CheckGrammar
import GF.Compile.Optimize(optimizeModule)
import GF.Compile.SubExOpt
import GF.Compile.GeneratePMCFG
import GF.Compile.GrammarToPGF
import GF.Compile.ReadFiles
import GF.Compile.Update
--import GF.Compile.Refresh
import GF.Compile.Tags
import GF.Compile.GrammarToPGF(mkCanon2pgf)
import GF.Compile.ReadFiles(ModEnv,getOptionsFromFile,getAllFiles,
importsOfModule)
import GF.CompileOne(compileOne)
import GF.Grammar.Grammar
import GF.Grammar.Printer
import GF.Grammar.Binary
import GF.Grammar.Grammar(SourceGrammar,msrc,modules,emptySourceGrammar,
abstractOfConcrete,prependModule)
import GF.Infra.Ident
import GF.Infra.Ident(Ident,identS,showIdent)
import GF.Infra.Option
import GF.Infra.UseIO
import GF.Infra.CheckM
import GF.Data.Operations
import GF.Infra.UseIO(IOE,FullPath,liftIO,getLibraryDirectory,putIfVerb,
justModuleName,extendPathEnv,putStrE,putPointE)
import GF.Data.Operations(raise,(+++),err)
import Control.Monad
import GF.System.Directory
import System.FilePath
import qualified Data.Map as Map
--import qualified Data.Set as Set
import Control.Monad(foldM,when)
import GF.System.Directory(doesFileExist,getModificationTime)
import System.FilePath((</>),isRelative,dropFileName)
import qualified Data.Map as Map(empty,insert,lookup,elems)
import Data.List(nub)
import Data.Time(UTCTime)
import GF.Text.Pretty
import GF.Text.Pretty(render,($$),(<+>),nest)
import PGF.Internal(optimizePGF)
import PGF
import PGF(PGF,defaultProbabilities,setProbabilities,readProbabilitiesFromFile)
-- | Compiles a number of source files and builds a 'PGF' structure for them.
compileToPGF :: Options -> [FilePath] -> IOE PGF
@@ -70,21 +58,6 @@ compileSourceGrammar opts gr = do
return gr'
-}
-- to output an intermediate stage
intermOut :: Options -> Dump -> Doc -> IOE ()
intermOut opts d doc
| dump opts d = ePutStrLn (render ("\n\n--#" <+> show d $$ doc))
| otherwise = return ()
warnOut opts warnings
| null warnings = return ()
| otherwise = liftIO $ ePutStrLn ws `catch` oops
where
oops _ = ePutStrLn "" -- prevent crash on character encoding problem
ws = if flag optVerbosity opts == Normal
then '\n':warnings
else warnings
-- | compile with one module as starting point
-- command-line options override options (marked by --#) in the file
-- As for path: if it is read from file, the file path is prepended to each name.
@@ -92,21 +65,20 @@ warnOut opts warnings
compileModule :: Options -- ^ Options from program command line and shell command.
-> CompileEnv -> FilePath -> IOE CompileEnv
compileModule opts1 env file = do
file <- getRealFile file
opts0 <- getOptionsFromFile file
let curr_dir = dropFileName file
lib_dir <- liftIO $ getLibraryDirectory (addOptions opts0 opts1)
let opts = addOptions (fixRelativeLibPaths curr_dir lib_dir opts0) opts1
ps0 <- extendPathEnv opts
let ps = nub (curr_dir : ps0)
putIfVerb opts $ "module search path:" +++ show ps ----
let (sgr,rfs) = env
files <- getAllFiles opts ps rfs file
putIfVerb opts $ "files to read:" +++ show files ----
let names = map justModuleName files
putIfVerb opts $ "modules to include:" +++ show names ----
foldM (compileOne opts) (sgr,rfs) files
compileModule opts1 env@(_,rfs) file =
do file <- getRealFile file
opts0 <- getOptionsFromFile file
let curr_dir = dropFileName file
lib_dir <- liftIO $ getLibraryDirectory (addOptions opts0 opts1)
let opts = addOptions (fixRelativeLibPaths curr_dir lib_dir opts0) opts1
ps0 <- extendPathEnv opts
let ps = nub (curr_dir : ps0)
putIfVerb opts $ "module search path:" +++ show ps ----
files <- getAllFiles opts ps rfs file
putIfVerb opts $ "files to read:" +++ show files ----
let names = map justModuleName files
putIfVerb opts $ "modules to include:" +++ show names ----
foldM (compileOne' opts) env files
where
getRealFile file = do
exists <- liftIO $ doesFileExist file
@@ -121,112 +93,9 @@ compileModule opts1 env file = do
else raise (render ("None of these files exists:" $$ nest 2 (file $$ file1)))
else raise (render ("File" <+> file <+> "does not exist."))
compileOne :: Options -> CompileEnv -> FullPath -> IOE CompileEnv
compileOne opts env@(srcgr,_) file = do
let putpOpt v m act
| verbAtLeast opts Verbose = putPointE Normal opts v act
| verbAtLeast opts Normal = putStrE m >> act
| otherwise = putPointE Verbose opts v act
let path = dropFileName file
let name = dropExtension file
cwd <- liftIO getCurrentDirectory
case takeExtensions file of
-- for compiled gf, read the file and update environment
-- also undo common subexp optimization, to enable normal computations
".gfo" -> do
sm00 <- putPointE Verbose opts ("+ reading" +++ file) $ liftIO (decodeModule file)
let sm0 = (fst sm00, (snd sm00) {mflags = mflags (snd sm00) `addOptions` opts})
intermOut opts (Dump Source) (ppModule Internal sm0)
let sm1 = unsubexpModule sm0
(sm,warnings) <- {- putPointE Normal opts "creating indirections" $ -}
runCheck $ extendModule cwd srcgr sm1
warnOut opts warnings
if flag optTagsOnly opts
then writeTags opts srcgr (gf2gftags opts file) sm1
else return ()
extendCompileEnv env (Just file) sm
-- for gf source, do full compilation and generate code
_ -> do
b1 <- liftIO $ doesFileExist file
if not b1
then compileOne opts env $ (gf2gfo opts file)
else do
sm <- putpOpt ("- parsing" +++ file) ("- compiling" +++ file ++ "... ")
$ getSourceModule opts file
intermOut opts (Dump Source) (ppModule Internal sm)
compileSourceModule opts cwd env (Just file) sm
where
isConcr (_,m) = isModCnc m && mstatus m /= MSIncomplete
compileSourceModule :: Options -> FilePath -> CompileEnv -> Maybe FilePath -> SourceModule -> IOE CompileEnv
compileSourceModule opts cwd env@(gr,_) mb_gfFile mo0 = do
mo1 <- runPass Extend "" . extendModule cwd gr
=<< runPass Rebuild "" (rebuildModule cwd gr mo0)
case mo1 of
(_,n) | not (isCompleteModule n) -> generateTagsOr generateGFO mo1
_ -> do
mo2 <- runPass Rename "renaming" $ renameModule cwd gr mo1
mo3 <- runPass TypeCheck "type checking" $ checkModule opts cwd gr mo2
generateTagsOr compileCompleteModule mo3
where
compileCompleteModule mo3 = do
mo4 <- runPass2 id Optimize "optimizing" $ optimizeModule opts gr mo3
mo5 <- if isModCnc (snd mo4) && flag optPMCFG opts
then runPass2' "generating PMCFG" $ generatePMCFG opts gr mb_gfFile mo4
else runPass2' "" $ return mo4
generateGFO mo5
------------------------------
generateTagsOr compile =
if flag optTagsOnly opts then generateTags else compile
generateGFO mo =
do let mb_gfo = fmap (gf2gfo opts) mb_gfFile
maybeM (flip (writeGFO opts) mo) mb_gfo
extendCompileEnv env mb_gfo mo
generateTags mo =
do maybeM (flip (writeTags opts gr) mo . gf2gftags opts) mb_gfFile
extendCompileEnv env Nothing mo
putpp s = if null s then id else putPointE Verbose opts (" "++s++" ")
idump pass = intermOut opts (Dump pass) . ppModule Internal
-- * Impedance matching
runPass = runPass' fst fst snd (liftErr . runCheck)
runPass2 = runPass2e liftErr
runPass2' = runPass2e id id Canon
runPass2e lift f = runPass' id f (const "") lift
runPass' ret dump warn lift pass pp m =
do out <- putpp pp $ lift m
warnOut opts (warn out)
idump pass (dump out)
return (ret out)
maybeM f = maybe (return ()) f
writeGFO :: Options -> FilePath -> SourceModule -> IOE ()
writeGFO opts file mo = do
let mo1 = subexpModule mo
mo2 = case mo1 of
(m,mi) -> (m,mi{jments=Map.filter (\x -> case x of {AnyInd _ _ -> False; _ -> True}) (jments mi)})
putPointE Normal opts (" write file" +++ file) $ liftIO $ encodeModule file mo2
compileOne' :: Options -> CompileEnv -> FullPath -> IOE CompileEnv
compileOne' opts env@(srcgr,_) file =
extendCompileEnv env =<< compileOne opts srcgr file
-- auxiliaries
@@ -238,7 +107,7 @@ type CompileEnv = (SourceGrammar,ModEnv)
emptyCompileEnv :: CompileEnv
emptyCompileEnv = (emptySourceGrammar,Map.empty)
extendCompileEnv (gr,menv) mfile mo = do
extendCompileEnv (gr,menv) (mfile,mo) = do
menv2 <- case mfile of
Just file -> do
let (mod,imps) = importsOfModule mo

View File

@@ -18,9 +18,8 @@
-- and @file.gfo@ otherwise.
-----------------------------------------------------------------------------
module GF.Compile.ReadFiles
module GF.Compile.ReadFiles
( getAllFiles,ModName,ModEnv,importsOfModule,
gfoFile,gfFile,isGFO,gf2gfo,
parseSource,lift,
getOptionsFromFile,getPragmas) where
@@ -44,7 +43,7 @@ import Data.Maybe(isJust)
import qualified Data.ByteString.Char8 as BS
import qualified Data.Map as Map
import Data.Time(UTCTime)
import GF.System.Directory
import GF.System.Directory(getModificationTime,doesFileExist,canonicalizePath)
import System.FilePath
import GF.Text.Pretty
@@ -91,58 +90,62 @@ getAllFiles opts ps env file = do
| otherwise = (st0,t0)
return ((name,st,t,has_src,imps,p):ds)
gfoDir = flag optGFODir opts
-- searches for module in the search path and if it is found
-- returns 'ModuleInfo'. It fails if there is no such module
--findModule :: ModName -> IOE ModuleInfo
findModule name = do
(file,gfTime,gfoTime) <- do
mb_gfFile <- getFilePath ps (gfFile name)
case mb_gfFile of
Just gfFile -> do gfTime <- modtime gfFile
mb_gfoTime <- maybeIO $ modtime (gf2gfo opts gfFile)
return (gfFile, Just gfTime, mb_gfoTime)
Nothing -> do mb_gfoFile <- getFilePath (maybe id (:) (flag optGFODir opts) ps) (gfoFile name)
case mb_gfoFile of
Just gfoFile -> do gfoTime <- modtime gfoFile
return (gfoFile, Nothing, Just gfoTime)
Nothing -> raise (render ("File" <+> gfFile name <+> "does not exist." $$
"searched in:" <+> vcat ps))
(file,gfTime,gfoTime) <- findFile gfoDir ps name
let mb_envmod = Map.lookup name env
(st,t) = selectFormat opts (fmap fst mb_envmod) gfTime gfoTime
(st,(mname,imps)) <-
case st of
CSEnv -> return (st, (name, maybe [] snd mb_envmod))
CSRead -> do mb_mo <- liftIO $ decodeModuleHeader ((if isGFO file then id else gf2gfo opts) file)
case mb_mo of
Just mo -> return (st,importsOfModule mo)
Nothing
| isGFO file -> raise (file ++ " is compiled with different GF version and I can't find the source file")
| otherwise -> do mo <- parseModHeader opts file
return (CSComp,importsOfModule mo)
CSComp -> do mo <- parseModHeader opts file
return (st,importsOfModule mo)
case st of
CSEnv -> return (st, (name, maybe [] snd mb_envmod))
CSRead -> do let gfo = if isGFO file then file else gf2gfo opts file
mb_imps <- gfoImports gfo
case mb_imps of
Just imps -> return (st,imps)
Nothing
| isGFO file -> raise (file ++ " is compiled with different GF version and I can't find the source file")
| otherwise -> do imps <- gfImports opts file
return (CSComp,imps)
CSComp -> do imps <- gfImports opts file
return (st,imps)
testErr (mname == name)
("module name" +++ mname +++ "differs from file name" +++ name)
return (name,st,t,isJust gfTime,imps,dropFileName file)
--------------------------------------------------------------------------------
findFile gfoDir ps name =
maybe noSource haveSource =<< getFilePath ps (gfFile name)
where
haveSource gfFile =
do gfTime <- modtime gfFile
mb_gfoTime <- maybeIO $ modtime (gf2gfo' gfoDir gfFile)
return (gfFile, Just gfTime, mb_gfoTime)
noSource =
maybe noGFO haveGFO =<< getFilePath gfoPath (gfoFile name)
where
gfoPath = maybe id (:) gfoDir ps
haveGFO gfoFile =
do gfoTime <- modtime gfoFile
return (gfoFile, Nothing, Just gfoTime)
noGFO = raise (render ("File" <+> gfFile name <+> "does not exist." $$
"searched in:" <+> vcat ps))
modtime path = liftIO $ getModificationTime path
isGFO :: FilePath -> Bool
isGFO = (== ".gfo") . takeExtensions
gfImports opts file = importsOfModule `fmap` parseModHeader opts file
gfoFile :: FilePath -> FilePath
gfoFile f = addExtension f "gfo"
gfoImports gfo = fmap importsOfModule `fmap` liftIO (decodeModuleHeader gfo)
gfFile :: FilePath -> FilePath
gfFile f = addExtension f "gf"
gf2gfo :: Options -> FilePath -> FilePath
gf2gfo opts file = maybe (gfoFile (dropExtension file))
(\dir -> dir </> gfoFile (dropExtension (takeFileName file)))
(flag optGFODir opts)
--------------------------------------------------------------------------------
-- From the given Options and the time stamps computes
-- whether the module have to be computed, read from .gfo or
@@ -255,7 +258,7 @@ getPragmas = parseModuleOptions .
map (BS.unpack . BS.unwords . BS.words . BS.drop 3) .
filter (BS.isPrefixOf (BS.pack "--#")) . BS.lines
--getFilePath :: MonadIO m => [FilePath] -> String -> m (Maybe FilePath)
getFilePath :: MonadIO m => [FilePath] -> String -> m (Maybe FilePath)
getFilePath paths file =
liftIO $ do --ePutStrLn $ "getFilePath "++show paths++" "++show file
get paths

View File

@@ -0,0 +1,157 @@
module GF.CompileOne(OneOutput,CompiledModule,
compileOne --, compileSourceModule
) where
import Prelude hiding (catch)
import GF.System.Catch
-- The main compiler passes
import GF.Compile.GetGrammar(getSourceModule)
import GF.Compile.Rename(renameModule)
import GF.Compile.CheckGrammar(checkModule)
import GF.Compile.Optimize(optimizeModule)
import GF.Compile.SubExOpt(subexpModule,unsubexpModule)
import GF.Compile.GeneratePMCFG(generatePMCFG)
import GF.Compile.Update(extendModule,rebuildModule)
import GF.Compile.Tags(writeTags,gf2gftags)
import GF.Grammar.Grammar
import GF.Grammar.Printer(ppModule,TermPrintQual(..))
import GF.Grammar.Binary(decodeModule,encodeModule)
import GF.Infra.Option
import GF.Infra.UseIO(FullPath,IOE,gf2gfo,liftIO,ePutStrLn,putPointE,putStrE)
import GF.Infra.CheckM(runCheck)
import GF.Data.Operations(liftErr,(+++))
import GF.System.Directory(doesFileExist,getCurrentDirectory)
import System.FilePath(dropFileName,dropExtension,takeExtensions)
import qualified Data.Map as Map
import GF.Text.Pretty(Doc,render,(<+>),($$))
type OneOutput = (Maybe FullPath,CompiledModule)
type CompiledModule = SourceModule
-- | Compile a given source file (or just load a .gfo file),
-- given a 'SourceGrammar' containing everything it depends on.
compileOne :: Options -> SourceGrammar -> FullPath -> IOE OneOutput
compileOne opts srcgr file = do
let putpOpt v m act
| verbAtLeast opts Verbose = putPointE Normal opts v act
| verbAtLeast opts Normal = putStrE m >> act
| otherwise = putPointE Verbose opts v act
let path = dropFileName file
let name = dropExtension file
case takeExtensions file of
".gfo" -> reuseGFO opts srcgr file
_ -> do
-- for gf source, do full compilation and generate code
b1 <- liftIO $ doesFileExist file
if not b1
then compileOne opts srcgr $ (gf2gfo opts file)
else do
sm <- putpOpt ("- parsing" +++ file) ("- compiling" +++ file ++ "... ")
$ getSourceModule opts file
intermOut opts (Dump Source) (ppModule Internal sm)
compileSourceModule opts srcgr (Just file) sm
-- | For compiled gf, read the file and update environment
-- also undo common subexp optimization, to enable normal computations
reuseGFO opts srcgr file =
do sm00 <- putPointE Verbose opts ("+ reading" +++ file) $
liftIO (decodeModule file)
let sm0 = (fst sm00,(snd sm00){mflags=mflags (snd sm00) `addOptions` opts})
intermOut opts (Dump Source) (ppModule Internal sm0)
let sm1 = unsubexpModule sm0
cwd <- liftIO getCurrentDirectory
(sm,warnings) <- -- putPointE Normal opts "creating indirections" $
runCheck $ extendModule cwd srcgr sm1
warnOut opts warnings
if flag optTagsOnly opts
then writeTags opts srcgr (gf2gftags opts file) sm1
else return ()
return (Just file,sm)
compileSourceModule :: Options -> SourceGrammar -> Maybe FilePath -> SourceModule -> IOE OneOutput
compileSourceModule opts gr mb_gfFile mo0 = do
cwd <- liftIO getCurrentDirectory
mo1 <- runPass Extend "" . extendModule cwd gr
=<< runPass Rebuild "" (rebuildModule cwd gr mo0)
case mo1 of
(_,n) | not (isCompleteModule n) -> generateTagsOr generateGFO mo1
_ -> do
mo2 <- runPass Rename "renaming" $ renameModule cwd gr mo1
mo3 <- runPass TypeCheck "type checking" $ checkModule opts cwd gr mo2
generateTagsOr compileCompleteModule mo3
where
compileCompleteModule mo3 = do
mo4 <- runPass2 id Optimize "optimizing" $ optimizeModule opts gr mo3
mo5 <- if isModCnc (snd mo4) && flag optPMCFG opts
then runPass2' "generating PMCFG" $ generatePMCFG opts gr mb_gfFile mo4
else runPass2' "" $ return mo4
generateGFO mo5
------------------------------
generateTagsOr compile =
if flag optTagsOnly opts then generateTags else compile
generateGFO mo =
do let mb_gfo = fmap (gf2gfo opts) mb_gfFile
maybeM (flip (writeGFO opts) mo) mb_gfo
return (mb_gfo,mo)
generateTags mo =
do maybeM (flip (writeTags opts gr) mo . gf2gftags opts) mb_gfFile
return (Nothing,mo)
putpp s = if null s then id else putPointE Verbose opts (" "++s++" ")
idump pass = intermOut opts (Dump pass) . ppModule Internal
-- * Impedance matching
runPass = runPass' fst fst snd (liftErr . runCheck)
runPass2 = runPass2e liftErr
runPass2' = runPass2e id id Canon
runPass2e lift f = runPass' id f (const "") lift
runPass' ret dump warn lift pass pp m =
do out <- putpp pp $ lift m
warnOut opts (warn out)
idump pass (dump out)
return (ret out)
maybeM f = maybe (return ()) f
writeGFO :: Options -> FilePath -> SourceModule -> IOE ()
writeGFO opts file mo = do
let mo1 = subexpModule mo
mo2 = case mo1 of
(m,mi) -> (m,mi{jments=Map.filter (\x -> case x of {AnyInd _ _ -> False; _ -> True}) (jments mi)})
putPointE Normal opts (" write file" +++ file) $ liftIO $ encodeModule file mo2
-- to output an intermediate stage
intermOut :: Options -> Dump -> Doc -> IOE ()
intermOut opts d doc
| dump opts d = ePutStrLn (render ("\n\n--#" <+> show d $$ doc))
| otherwise = return ()
warnOut opts warnings
| null warnings = return ()
| otherwise = liftIO $ ePutStrLn ws `catch` oops
where
oops _ = ePutStrLn "" -- prevent crash on character encoding problem
ws = if flag optVerbosity opts == Normal
then '\n':warnings
else warnings

View File

@@ -105,9 +105,26 @@ getSubdirs dir = do
then return (fpath:fs)
else return fs ) [] fs
--------------------------------------------------------------------------------
justModuleName :: FilePath -> String
justModuleName = dropExtension . takeFileName
isGFO :: FilePath -> Bool
isGFO = (== ".gfo") . takeExtensions
gfoFile :: FilePath -> FilePath
gfoFile f = addExtension f "gfo"
gfFile :: FilePath -> FilePath
gfFile f = addExtension f "gf"
gf2gfo :: Options -> FilePath -> FilePath
gf2gfo = gf2gfo' . flag optGFODir
gf2gfo' gfoDir file = maybe (gfoFile (dropExtension file))
(\dir -> dir </> gfoFile (takeBaseName file))
gfoDir
--------------------------------------------------------------------------------
splitInModuleSearchPath :: String -> [FilePath]
splitInModuleSearchPath s = case break isPathSep s of
(f,_:cs) -> f : splitInModuleSearchPath cs