Experimental: parallel batch compilation of grammars

On my laptop these changes speed up the full build of the RGL and example
grammars with 'cabal build' from ~95s to ~43s and the zero build from ~18s
to ~5s.

The main change is the introduction of the module GF.CompileInParallel that
replaces GF.Compile and the function GF.Compile.ReadFiles.getAllFiles. At
present, it is activated with the new -j flag, and it is only used when
combined with --make or --batch. In addition, to get parallel computations,
you need to add GHC run-time flags, e.g., +RTS -N -A20M -RTS, to the command
line.

The Setup.hs script has been modified to pass the appropriate flags to GF
for parallel compilation when compiling the RGL and example grammars, but you
need a recent version of Cabal for this to work (probably >=1.20).

Some additonal refactoring were made during this work. A new monad is used to
avoid warnings/error messages from different modules to be intertwined when
compiling in parallel, so some functios that were hardiwred to the IO or IOE
monads have been lifted to work in arbitrary monads that are instances in
the appropriate classes.
This commit is contained in:
hallgren
2014-08-25 09:56:00 +00:00
parent 9253d54b7e
commit d84c5ef171
11 changed files with 420 additions and 178 deletions

View File

@@ -20,8 +20,8 @@
module GF.Compile.ReadFiles
( getAllFiles,ModName,ModEnv,importsOfModule,
parseSource,lift,
getOptionsFromFile,getPragmas) where
findFile,gfImports,gfoImports,
parseSource,getOptionsFromFile,getPragmas) where
import Prelude hiding (catch)
import GF.System.Catch
@@ -32,15 +32,17 @@ import GF.Data.Operations
import GF.Grammar.Lexer
import GF.Grammar.Parser
import GF.Grammar.Grammar
import GF.Grammar.Binary
import GF.Grammar.Binary(decodeModuleHeader)
import System.IO(mkTextEncoding)
import qualified Data.ByteString.UTF8 as UTF8
import GF.Text.Coding(decodeUnicodeIO)
import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.ByteString.Char8 as BS
import Control.Monad
import Data.Maybe(isJust)
import qualified Data.ByteString.Char8 as BS
import Data.Char(isSpace)
import qualified Data.Map as Map
import Data.Time(UTCTime)
import GF.System.Directory(getModificationTime,doesFileExist,canonicalizePath)
@@ -123,8 +125,8 @@ findFile gfoDir ps name =
maybe noSource haveSource =<< getFilePath ps (gfFile name)
where
haveSource gfFile =
do gfTime <- modtime gfFile
mb_gfoTime <- maybeIO $ modtime (gf2gfo' gfoDir gfFile)
do gfTime <- getModificationTime gfFile
mb_gfoTime <- maybeIO $ getModificationTime (gf2gfo' gfoDir gfFile)
return (gfFile, Just gfTime, mb_gfoTime)
noSource =
@@ -133,14 +135,12 @@ findFile gfoDir ps name =
gfoPath = maybe id (:) gfoDir ps
haveGFO gfoFile =
do gfoTime <- modtime gfoFile
do gfoTime <- getModificationTime gfoFile
return (gfoFile, Nothing, Just gfoTime)
noGFO = raise (render ("File" <+> gfFile name <+> "does not exist." $$
"searched in:" <+> vcat ps))
modtime path = getModificationTime path
gfImports opts file = importsOfModule `fmap` parseModHeader opts file
gfoImports gfo = fmap importsOfModule `fmap` liftIO (decodeModuleHeader gfo)
@@ -216,7 +216,7 @@ importsOfModule (m,mi) = (modName m,depModInfo mi [])
parseModHeader opts file =
do --ePutStrLn file
(_,parsed) <- parseSource opts pModHeader =<< lift (BS.readFile file)
(_,parsed) <- parseSource opts pModHeader =<< liftIO (BS.readFile file)
case parsed of
Right mo -> return mo
Left (Pn l c,msg) ->
@@ -234,43 +234,44 @@ toUTF8 opts0 raw =
then return raw
else if coding=="CP1252" -- Latin1
then return . UTF8.fromString $ BS.unpack raw -- faster
else lift $
do --ePutStrLn $ "toUTF8 from "++coding
enc <- mkTextEncoding coding
-- decodeUnicodeIO uses a lot of stack space,
-- so we need to split the file into smaller pieces
ls <- mapM (decodeUnicodeIO enc) (BS.lines raw)
return $ UTF8.fromString (unlines ls)
else do --ePutStrLn $ "toUTF8 from "++coding
recodeToUTF8 coding raw
return (given,utf8)
--lift io = ioe (fmap Ok io `catch` (return . Bad . show))
lift io = liftIO io
recodeToUTF8 coding raw =
liftIO $
do enc <- mkTextEncoding coding
-- decodeUnicodeIO uses a lot of stack space,
-- so we need to split the file into smaller pieces
ls <- mapM (decodeUnicodeIO enc) (BS.lines raw)
return $ UTF8.fromString (unlines ls)
-- | options can be passed to the compiler by comments in @--#@, in the main file
getOptionsFromFile :: (MonadIO m,ErrorMonad m) => FilePath -> m Options
--getOptionsFromFile :: (MonadIO m,ErrorMonad m) => FilePath -> m Options
getOptionsFromFile file = do
s <- either (\_ -> raise $ "File " ++ file ++ " does not exist") return =<<
liftIO (try $ BS.readFile file)
opts <- getPragmas s
opts <- either failed getPragmas =<< (liftIO $ try $ BS.readFile file)
-- The coding flag should not be inherited by other files
return (addOptions opts (modifyFlags $ \ f -> f{optEncoding=Nothing}))
where
failed _ = raise $ "File " ++ file ++ " does not exist"
getPragmas :: (ErrorMonad m) => BS.ByteString -> m Options
getPragmas = parseModuleOptions .
map (BS.unpack . BS.unwords . BS.words . BS.drop 3) .
filter (BS.isPrefixOf (BS.pack "--#")) . BS.lines
filter (BS.isPrefixOf (BS.pack "--#")) .
-- takeWhile (BS.isPrefixOf (BS.pack "--")) .
-- filter (not . BS.null) .
map (BS.dropWhile isSpace) .
BS.lines
getFilePath :: MonadIO m => [FilePath] -> String -> m (Maybe FilePath)
getFilePath paths file =
liftIO $ do --ePutStrLn $ "getFilePath "++show paths++" "++show file
get paths
getFilePath paths file = get paths
where
get [] = return Nothing
get (p:ps) = do
let pfile = p </> file
exist <- doesFileExist pfile
if not exist
then get ps
else do pfile <- canonicalizePath pfile
return (Just pfile)
get (p:ps) = do let pfile = p </> file
exist <- doesFileExist pfile
if not exist
then get ps
else do pfile <- canonicalizePath pfile
return (Just pfile)