From d84c5ef1715c3e4aed4098ee9c847e2dcc86cba4 Mon Sep 17 00:00:00 2001 From: hallgren Date: Mon, 25 Aug 2014 09:56:00 +0000 Subject: [PATCH] Experimental: parallel batch compilation of grammars On my laptop these changes speed up the full build of the RGL and example grammars with 'cabal build' from ~95s to ~43s and the zero build from ~18s to ~5s. The main change is the introduction of the module GF.CompileInParallel that replaces GF.Compile and the function GF.Compile.ReadFiles.getAllFiles. At present, it is activated with the new -j flag, and it is only used when combined with --make or --batch. In addition, to get parallel computations, you need to add GHC run-time flags, e.g., +RTS -N -A20M -RTS, to the command line. The Setup.hs script has been modified to pass the appropriate flags to GF for parallel compilation when compiling the RGL and example grammars, but you need a recent version of Cabal for this to work (probably >=1.20). Some additonal refactoring were made during this work. A new monad is used to avoid warnings/error messages from different modules to be intertwined when compiling in parallel, so some functios that were hardiwred to the IO or IOE monads have been lifted to work in arbitrary monads that are instances in the appropriate classes. --- Setup.hs | 68 ++++--- WebSetup.hs | 59 +++--- src/compiler/GF/Compile/GeneratePMCFG.hs | 4 +- src/compiler/GF/Compile/GetGrammar.hs | 22 +-- src/compiler/GF/Compile/ReadFiles.hs | 71 ++++---- src/compiler/GF/CompileInParallel.hs | 218 +++++++++++++++++++++++ src/compiler/GF/CompileOne.hs | 49 +++-- src/compiler/GF/Infra/Option.hs | 9 +- src/compiler/GF/Infra/UseIO.hs | 76 ++++---- src/compiler/GF/System/Directory.hs | 9 +- src/compiler/GFC.hs | 13 +- 11 files changed, 420 insertions(+), 178 deletions(-) create mode 100644 src/compiler/GF/CompileInParallel.hs diff --git a/Setup.hs b/Setup.hs index 1d3ec7e93..066b86319 100644 --- a/Setup.hs +++ b/Setup.hs @@ -9,11 +9,10 @@ import Data.List(isPrefixOf,intersect) import Data.Maybe(listToMaybe) --import System.IO import qualified Control.Exception as E -import System.Process +import System.Process(readProcess) import System.FilePath import System.Directory -import System.Process -import System.Exit +--import System.Exit --import Control.Concurrent(forkIO) --import Control.Concurrent.Chan(newChan,writeChan,readChan) @@ -42,19 +41,19 @@ main = defaultMainWithHooks simpleUserHooks{ preBuild = gfPreBuild gfPostBuild args flags pkg lbi = do --writeFile "running" "" - buildRGL args flags (pkg,lbi) --- let gf = default_gf (pkg,lbi) + buildRGL args flags (flags,pkg,lbi) +-- let gf = default_gf lbi -- buildWeb gf (pkg,lbi) gfPostInst args flags pkg lbi = do installRGL args flags (pkg,lbi) - let gf = default_gf (pkg,lbi) - installWeb gf args flags (pkg,lbi) + let gf = default_gf lbi + installWeb (pkg,lbi) gfPostCopy args flags pkg lbi = - do copyRGL args flags (pkg,lbi) - let gf = default_gf (pkg,lbi) - copyWeb gf args flags (pkg,lbi) + do let gf = default_gf lbi + copyRGL args flags (pkg,lbi) + copyWeb flags (pkg,lbi) -------------------------------------------------------- -- Commands for building the Resource Grammar Library @@ -71,13 +70,16 @@ data RGLCommand , cmdAction :: [Mode] -> [String] -> Info -> IO () } -type Info = (PackageDescription,LocalBuildInfo) +type Info = (BuildFlags,PackageDescription,LocalBuildInfo) +bf (i,_,_) = i +--pd (_,i,_) = i +lbi (_,_,i) = i rglCommands = [ RGLCommand "prelude" True $ \mode args bi -> do putStrLn $ "Compiling [prelude]" let prelude_src_dir = rgl_src_dir "prelude" - prelude_dst_dir = rgl_dst_dir bi "prelude" + prelude_dst_dir = rgl_dst_dir (lbi bi) "prelude" createDirectoryIfMissing True prelude_dst_dir files <- ls prelude_src_dir run_gfc bi (["-s", "--gfo-dir="++prelude_dst_dir] ++ [prelude_src_dir file | file <- files]) @@ -86,11 +88,11 @@ rglCommands = , RGLCommand "lang" False $ gfcp [l,s] , RGLCommand "api" False $ gfcp [t,sc] , RGLCommand "compat" False $ gfcp [c] - , RGLCommand "web" True $ \ _ _ bi -> buildWeb (default_gf bi) bi + , RGLCommand "web" True $ \ _ _ bi -> buildWeb (default_gf (lbi bi)) bi , RGLCommand "pgf" False $ \modes args bi -> parallel_ [ - do let dir = getRGLBuildDir bi mode + do let dir = getRGLBuildDir (lbi bi) mode createDirectoryIfMissing True dir sequence_ [run_gfc bi ["-s","-make","-name=Lang"++la, dir ++ "/Lang" ++ la ++ ".gfo"] @@ -146,8 +148,8 @@ buildRGL args flags bi = do installRGL args flags bi = do let modes = getOptMode args let inst_gf_lib_dir = datadir (uncurry absoluteInstallDirs bi NoCopyDest) "lib" - copyAll "prelude" (rgl_dst_dir bi "prelude") (inst_gf_lib_dir "prelude") - sequence_ [copyAll (show mode) (getRGLBuildDir bi mode) (inst_gf_lib_dir getRGLBuildSubDir bi mode)|mode<-modes] + copyAll "prelude" (rgl_dst_dir (snd bi) "prelude") (inst_gf_lib_dir "prelude") + sequence_ [copyAll (show mode) (getRGLBuildDir (snd bi) mode) (inst_gf_lib_dir getRGLBuildSubDir mode)|mode<-modes] copyRGL args flags bi = do let modes = getOptMode args @@ -155,8 +157,8 @@ copyRGL args flags bi = do NoFlag -> NoCopyDest Flag d -> d let inst_gf_lib_dir = datadir (uncurry absoluteInstallDirs bi dest) "lib" - copyAll "prelude" (rgl_dst_dir bi "prelude") (inst_gf_lib_dir "prelude") - sequence_ [copyAll (show mode) (getRGLBuildDir bi mode) (inst_gf_lib_dir getRGLBuildSubDir bi mode)|mode<-modes] + copyAll "prelude" (rgl_dst_dir (snd bi) "prelude") (inst_gf_lib_dir "prelude") + sequence_ [copyAll (show mode) (getRGLBuildDir (snd bi) mode) (inst_gf_lib_dir getRGLBuildSubDir mode)|mode<-modes] copyAll s from to = do putStrLn $ "Installing [" ++ s ++ "] " ++ to @@ -181,7 +183,7 @@ sdistRGL pkg mb_lbi hooks flags = do else getRGLFiles path paths rgl_src_dir = "lib" "src" -rgl_dst_dir (_,lbi) = buildDir lbi "rgl" +rgl_dst_dir lbi = buildDir lbi "rgl" -- the languages have long directory names and short ISO codes (3 letters) -- we also give the decodings for postprocessing linearizations, as long as grammars @@ -258,7 +260,7 @@ langsCompat = langsLang `only` ["Cat","Eng","Fin","Fre","Ita","Lav","Spa","Swe"] gfc bi modes summary files = parallel_ [gfcn bi mode summary files | mode<-modes] gfcn bi mode summary files = do - let dir = getRGLBuildDir bi mode + let dir = getRGLBuildDir (lbi bi) mode preproc = case mode of AllTenses -> "" Present -> "-preproc="++({-rgl_src_dir -} "mkPresent") @@ -268,7 +270,7 @@ gfcn bi mode summary files = do gf bi comm files = do putStrLn $ "Reading " ++ unwords files - let gf = default_gf bi + let gf = default_gf (lbi bi) putStrLn ("executing: " ++ comm ++ "\n" ++ "in " ++ gf) out <- readProcess gf ("-s":files) comm @@ -318,13 +320,14 @@ getOptLangs defaultLangs args = then findLangs langs [l]++ls else ls -getRGLBuildSubDir (_,lbi) mode = +getRGLBuildSubDir mode = case mode of AllTenses -> "alltenses" Present -> "present" -getRGLBuildDir bi mode = rgl_dst_dir bi getRGLBuildSubDir bi mode +getRGLBuildDir :: LocalBuildInfo -> Mode -> FilePath +getRGLBuildDir lbi mode = rgl_dst_dir lbi getRGLBuildSubDir mode getRGLCommands args = let cmds0 = [cmd | arg <- args, @@ -350,22 +353,13 @@ unlexer abstr ls = -- | Runs the gf executable in compile mode with the given arguments. run_gfc :: Info -> [String] -> IO () run_gfc bi args = - do let args' = ["-batch","-gf-lib-path="++rgl_src_dir] - ++ ["+RTS","-A20M","-RTS"] + do let args' = numJobs (bf bi)++["-batch","-gf-lib-path="++rgl_src_dir] ++ filter (not . null) args - gf = default_gf bi - gf_cmdline = gf ++ " " ++ unwords (map showArg args') --- putStrLn $ "Running: " ++ gf_cmdline --- appendFile "running" (gf_cmdline++"\n") - e <- rawSystem gf args' - case e of - ExitSuccess -> return () - ExitFailure i -> do putStrLn $ "Ran: " ++ gf_cmdline - die $ "gf exited with exit code: " ++ show i - where - showArg arg = if ' ' `elem` arg then "'" ++ arg ++ "'" else arg + gf = default_gf (lbi bi) + execute gf args' -default_gf (_,lbi) = buildDir lbi exeName' exeNameReal +default_gf :: LocalBuildInfo -> FilePath +default_gf lbi = buildDir lbi exeName' exeNameReal where exeName' = "gf" exeNameReal = exeName' <.> exeExtension diff --git a/WebSetup.hs b/WebSetup.hs index b3007e3c7..80f5fe454 100644 --- a/WebSetup.hs +++ b/WebSetup.hs @@ -1,11 +1,12 @@ -module WebSetup(buildWeb,installWeb,copyWeb) where +module WebSetup(buildWeb,installWeb,copyWeb,numJobs,execute) where -import System.Directory(createDirectoryIfMissing,copyFile,removeFile) -import System.FilePath(()) -import System.Cmd(system) +import System.Directory(createDirectoryIfMissing,copyFile) +import System.FilePath((),dropExtension) +import System.Process(rawSystem) import System.Exit(ExitCode(..)) -import Distribution.Simple.Setup(Flag(..),CopyDest(..),copyDest) +import Distribution.Simple.Setup(BuildFlags(..),Flag(..),CopyDest(..),copyDest) import Distribution.Simple.LocalBuildInfo(datadir,buildDir,absoluteInstallDirs) +import Distribution.Simple.Utils(die) {- To test the GF web services, the minibar and the grammar editor, use @@ -33,7 +34,8 @@ example_grammars = -- :: [(pgf, subdir, src)] letterSrc = ["Letter"++lang++".gf"|lang<-letterLangs] letterLangs = words "Eng Fin Fre Heb Rus Swe" -buildWeb gf (pkg,lbi) = + +buildWeb gf (flags,pkg,lbi) = do --putStrLn "buildWeb" mapM_ build_pgf example_grammars where @@ -42,26 +44,26 @@ buildWeb gf (pkg,lbi) = build_pgf (pgf,subdir,src) = do createDirectoryIfMissing True tmp_dir putStrLn $ "Building "++pgf - execute cmd + execute gf args where tmp_dir = gfo_dirsubdir dir = "examples"subdir - cmd = gf++" -make -s -optimize-pgf --gfo-dir="++tmp_dir++ - " --gf-lib-path="++buildDir lbi "rgl"++ - " --output-dir="++gfo_dir++ - " "++unwords [dirfile|file<-src] + args = numJobs flags++["-make","-s","-optimize-pgf"] + ++["--gfo-dir="++tmp_dir, + "--gf-lib-path="++buildDir lbi "rgl", + "--name="++dropExtension pgf, + "--output-dir="++gfo_dir] + ++[dirfile|file<-src] -installWeb gf args flags = setupWeb gf args dest - where - dest = NoCopyDest +installWeb = setupWeb NoCopyDest -copyWeb gf args flags = setupWeb gf args dest +copyWeb flags = setupWeb dest where dest = case copyDest flags of NoFlag -> NoCopyDest Flag d -> d -setupWeb gf args dest (pkg,lbi) = +setupWeb dest (pkg,lbi) = do mapM_ (createDirectoryIfMissing True) [grammars_dir,cloud_dir] mapM_ copy_pgf example_grammars copyGFLogo @@ -83,10 +85,23 @@ setupWeb gf args dest (pkg,lbi) = do createDirectoryIfMissing True logo_dir copyFile ("doc""Logos"gf_logo) (logo_dirgf_logo) -execute command = - do --putStrLn command - e <- system command +execute command args = + do let cmdline = command ++ " " ++ unwords (map showArg args) +-- putStrLn $ "Running: " ++ cmdline +-- appendFile "running" (cmdline++"\n") + e <- rawSystem command args case e of - ExitSuccess -> return () - _ -> fail "Command failed" - return () + ExitSuccess -> return () + ExitFailure i -> do putStrLn $ "Ran: " ++ cmdline + die $ command++" exited with exit code: " ++ show i + where + showArg arg = if ' ' `elem` arg then "'" ++ arg ++ "'" else arg + +numJobs flags = + if null n + then [] + else ["-j=8"++n,"+RTS","-A20M","-N"++n,"-RTS"] + where + n = case buildNumJobs flags of + Flag mn | mn/=Just 1-> maybe "" show mn + _ -> "" diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index e6067c854..aa22ea412 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -45,7 +45,7 @@ import Control.Monad.Identity ---------------------------------------------------------------------- -- main conversion function -generatePMCFG :: Options -> SourceGrammar -> Maybe FilePath -> SourceModule -> IOE SourceModule +--generatePMCFG :: Options -> SourceGrammar -> Maybe FilePath -> SourceModule -> IOE SourceModule generatePMCFG opts sgr opath cmo@(cm,cmi) = do (seqs,js) <- mapAccumWithKeyM (addPMCFG opts gr cenv opath am cm) Map.empty (jments cmi) when (verbAtLeast opts Verbose) $ ePutStrLn "" @@ -67,7 +67,7 @@ mapAccumWithKeyM f a m = do let xs = Map.toAscList m return (a,(k,y):kys) -addPMCFG :: Options -> SourceGrammar -> GlobalEnv -> Maybe FilePath -> Ident -> Ident -> SeqSet -> Ident -> Info -> IOE (SeqSet, Info) +--addPMCFG :: Options -> SourceGrammar -> GlobalEnv -> Maybe FilePath -> Ident -> Ident -> SeqSet -> Ident -> Info -> IOE (SeqSet, Info) addPMCFG opts gr cenv opath am cm seqs id (GF.Grammar.CncFun mty@(Just (cat,cont,val)) mlin@(Just (L loc term)) mprn Nothing) = do --when (verbAtLeast opts Verbose) $ ePutStr ("\n+ "++showIdent id++" ...") let pres = protoFCat gr res val diff --git a/src/compiler/GF/Compile/GetGrammar.hs b/src/compiler/GF/Compile/GetGrammar.hs index e10081cff..b4d2e13ef 100644 --- a/src/compiler/GF/Compile/GetGrammar.hs +++ b/src/compiler/GF/Compile/GetGrammar.hs @@ -25,29 +25,29 @@ import GF.Grammar.Parser import GF.Grammar.Grammar import GF.Grammar.CFG import GF.Grammar.EBNF -import GF.Compile.ReadFiles(parseSource,lift) +import GF.Compile.ReadFiles(parseSource) import qualified Data.ByteString.Char8 as BS import Data.Char(isAscii) import Control.Monad (foldM,when,unless) import System.Process (system) -import System.Directory(removeFile,getCurrentDirectory) +import GF.System.Directory(removeFile,getCurrentDirectory) import System.FilePath(makeRelative) -getSourceModule :: Options -> FilePath -> IOE SourceModule +--getSourceModule :: Options -> FilePath -> IOE SourceModule getSourceModule opts file0 = --errIn file0 $ - do tmp <- lift $ foldM runPreprocessor (Source file0) (flag optPreprocessors opts) - raw <- lift $ keepTemp tmp + do tmp <- liftIO $ foldM runPreprocessor (Source file0) (flag optPreprocessors opts) + raw <- liftIO $ keepTemp tmp --ePutStrLn $ "1 "++file0 (optCoding,parsed) <- parseSource opts pModDef raw case parsed of - Left (Pn l c,msg) -> do file <- lift $ writeTemp tmp - cwd <- lift $ getCurrentDirectory + Left (Pn l c,msg) -> do file <- liftIO $ writeTemp tmp + cwd <- getCurrentDirectory let location = makeRelative cwd file++":"++show l++":"++show c raise (location++":\n "++msg) Right (i,mi0) -> - do lift $ removeTemp tmp + do liftIO $ removeTemp tmp let mi =mi0 {mflags=mflags mi0 `addOptions` opts, msrc=file0} optCoding' = renameEncoding `fmap` flag optEncoding (mflags mi0) case (optCoding,optCoding') of @@ -59,7 +59,7 @@ getSourceModule opts file0 = raise $ "Encoding mismatch: "++coding++" /= "++coding' where coding = maybe defaultEncoding renameEncoding optCoding _ -> return () - --lift $ transcodeModule' (i,mi) -- old lexer + --liftIO $ transcodeModule' (i,mi) -- old lexer return (i,mi) -- new lexer getCFRules :: Options -> FilePath -> IOE [CFRule] @@ -67,7 +67,7 @@ getCFRules opts fpath = do raw <- liftIO (BS.readFile fpath) (optCoding,parsed) <- parseSource opts pCFRules raw case parsed of - Left (Pn l c,msg) -> do cwd <- lift $ getCurrentDirectory + Left (Pn l c,msg) -> do cwd <- getCurrentDirectory let location = makeRelative cwd fpath++":"++show l++":"++show c raise (location++":\n "++msg) Right rules -> return rules @@ -77,7 +77,7 @@ getEBNFRules opts fpath = do raw <- liftIO (BS.readFile fpath) (optCoding,parsed) <- parseSource opts pEBNFRules raw case parsed of - Left (Pn l c,msg) -> do cwd <- lift $ getCurrentDirectory + Left (Pn l c,msg) -> do cwd <- getCurrentDirectory let location = makeRelative cwd fpath++":"++show l++":"++show c raise (location++":\n "++msg) Right rules -> return rules diff --git a/src/compiler/GF/Compile/ReadFiles.hs b/src/compiler/GF/Compile/ReadFiles.hs index 4e57e5ba4..1523e91f1 100644 --- a/src/compiler/GF/Compile/ReadFiles.hs +++ b/src/compiler/GF/Compile/ReadFiles.hs @@ -20,8 +20,8 @@ module GF.Compile.ReadFiles ( getAllFiles,ModName,ModEnv,importsOfModule, - parseSource,lift, - getOptionsFromFile,getPragmas) where + findFile,gfImports,gfoImports, + parseSource,getOptionsFromFile,getPragmas) where import Prelude hiding (catch) import GF.System.Catch @@ -32,15 +32,17 @@ import GF.Data.Operations import GF.Grammar.Lexer import GF.Grammar.Parser import GF.Grammar.Grammar -import GF.Grammar.Binary +import GF.Grammar.Binary(decodeModuleHeader) import System.IO(mkTextEncoding) -import qualified Data.ByteString.UTF8 as UTF8 import GF.Text.Coding(decodeUnicodeIO) +import qualified Data.ByteString.UTF8 as UTF8 +import qualified Data.ByteString.Char8 as BS + import Control.Monad import Data.Maybe(isJust) -import qualified Data.ByteString.Char8 as BS +import Data.Char(isSpace) import qualified Data.Map as Map import Data.Time(UTCTime) import GF.System.Directory(getModificationTime,doesFileExist,canonicalizePath) @@ -123,8 +125,8 @@ findFile gfoDir ps name = maybe noSource haveSource =<< getFilePath ps (gfFile name) where haveSource gfFile = - do gfTime <- modtime gfFile - mb_gfoTime <- maybeIO $ modtime (gf2gfo' gfoDir gfFile) + do gfTime <- getModificationTime gfFile + mb_gfoTime <- maybeIO $ getModificationTime (gf2gfo' gfoDir gfFile) return (gfFile, Just gfTime, mb_gfoTime) noSource = @@ -133,14 +135,12 @@ findFile gfoDir ps name = gfoPath = maybe id (:) gfoDir ps haveGFO gfoFile = - do gfoTime <- modtime gfoFile + do gfoTime <- getModificationTime gfoFile return (gfoFile, Nothing, Just gfoTime) noGFO = raise (render ("File" <+> gfFile name <+> "does not exist." $$ "searched in:" <+> vcat ps)) -modtime path = getModificationTime path - gfImports opts file = importsOfModule `fmap` parseModHeader opts file gfoImports gfo = fmap importsOfModule `fmap` liftIO (decodeModuleHeader gfo) @@ -216,7 +216,7 @@ importsOfModule (m,mi) = (modName m,depModInfo mi []) parseModHeader opts file = do --ePutStrLn file - (_,parsed) <- parseSource opts pModHeader =<< lift (BS.readFile file) + (_,parsed) <- parseSource opts pModHeader =<< liftIO (BS.readFile file) case parsed of Right mo -> return mo Left (Pn l c,msg) -> @@ -234,43 +234,44 @@ toUTF8 opts0 raw = then return raw else if coding=="CP1252" -- Latin1 then return . UTF8.fromString $ BS.unpack raw -- faster - else lift $ - do --ePutStrLn $ "toUTF8 from "++coding - enc <- mkTextEncoding coding - -- decodeUnicodeIO uses a lot of stack space, - -- so we need to split the file into smaller pieces - ls <- mapM (decodeUnicodeIO enc) (BS.lines raw) - return $ UTF8.fromString (unlines ls) + else do --ePutStrLn $ "toUTF8 from "++coding + recodeToUTF8 coding raw return (given,utf8) ---lift io = ioe (fmap Ok io `catch` (return . Bad . show)) -lift io = liftIO io +recodeToUTF8 coding raw = + liftIO $ + do enc <- mkTextEncoding coding + -- decodeUnicodeIO uses a lot of stack space, + -- so we need to split the file into smaller pieces + ls <- mapM (decodeUnicodeIO enc) (BS.lines raw) + return $ UTF8.fromString (unlines ls) -- | options can be passed to the compiler by comments in @--#@, in the main file -getOptionsFromFile :: (MonadIO m,ErrorMonad m) => FilePath -> m Options +--getOptionsFromFile :: (MonadIO m,ErrorMonad m) => FilePath -> m Options getOptionsFromFile file = do - s <- either (\_ -> raise $ "File " ++ file ++ " does not exist") return =<< - liftIO (try $ BS.readFile file) - opts <- getPragmas s + opts <- either failed getPragmas =<< (liftIO $ try $ BS.readFile file) -- The coding flag should not be inherited by other files return (addOptions opts (modifyFlags $ \ f -> f{optEncoding=Nothing})) + where + failed _ = raise $ "File " ++ file ++ " does not exist" getPragmas :: (ErrorMonad m) => BS.ByteString -> m Options getPragmas = parseModuleOptions . map (BS.unpack . BS.unwords . BS.words . BS.drop 3) . - filter (BS.isPrefixOf (BS.pack "--#")) . BS.lines + filter (BS.isPrefixOf (BS.pack "--#")) . +-- takeWhile (BS.isPrefixOf (BS.pack "--")) . +-- filter (not . BS.null) . + map (BS.dropWhile isSpace) . + BS.lines getFilePath :: MonadIO m => [FilePath] -> String -> m (Maybe FilePath) -getFilePath paths file = - liftIO $ do --ePutStrLn $ "getFilePath "++show paths++" "++show file - get paths +getFilePath paths file = get paths where get [] = return Nothing - get (p:ps) = do - let pfile = p file - exist <- doesFileExist pfile - if not exist - then get ps - else do pfile <- canonicalizePath pfile - return (Just pfile) + get (p:ps) = do let pfile = p file + exist <- doesFileExist pfile + if not exist + then get ps + else do pfile <- canonicalizePath pfile + return (Just pfile) diff --git a/src/compiler/GF/CompileInParallel.hs b/src/compiler/GF/CompileInParallel.hs new file mode 100644 index 000000000..ef2d36042 --- /dev/null +++ b/src/compiler/GF/CompileInParallel.hs @@ -0,0 +1,218 @@ +module GF.CompileInParallel where +import Control.Monad(join,ap,when,unless) +import Control.Applicative +import Control.Concurrent +import System.FilePath +import System.IO.Unsafe(unsafeInterleaveIO) +import qualified GF.System.Directory as D +import GF.System.Catch(catch) +import Data.List(nub,isPrefixOf,intercalate,partition) +import qualified Data.Map as M +import GF.Compile.ReadFiles(getOptionsFromFile,findFile,gfImports,gfoImports) +import GF.CompileOne(reuseGFO,useTheSource) +import GF.Infra.Option +import GF.Infra.UseIO +import GF.Data.Operations +import GF.Grammar.Grammar(emptySourceGrammar,prependModule,modules) +import GF.Infra.Ident(identS) +import GF.Text.Pretty +import qualified Data.ByteString.Lazy as BS + +batchCompile jobs opts rootfiles0 = + do rootfiles <- mapM canonical rootfiles0 + lib_dir <- canonical =<< getLibraryDirectory opts + filepaths <- mapM (getPathFromFile lib_dir opts) rootfiles + let groups = groupFiles lib_dir filepaths + n = length groups + when (n>1) $ ePutStrLn "Grammar mixes present and alltenses, dividing modules into two groups" + (ts,sgrs) <- unzip <$> mapM (batchCompile1 lib_dir) groups + return (maximum ts,sgrs) + where + groupFiles lib_dir filepaths = + if length groups>1 then groups else [(opts,filepaths)] + where + groups = filter (not.null.snd) [(opts_p,present),(opts_a,alltenses)] + (present,alltenses) = partition usesPresent filepaths + gfoDir = flag optGFODir opts + gfo = maybe "" id gfoDir + opts_p = setGFO "present" + opts_a = setGFO "alltenses" + setGFO d = addOptions opts + (modifyFlags $ \ f->f{optGFODir=Just (gfod)}) + + usesPresent (_,paths) = take 1 libs==["present"] + where + libs = [p|path<-paths, + let (d,p0) = splitAt n path + p = dropSlash p0, + d==lib_dir,p `elem` all_modes] + n = length lib_dir + + all_modes = ["alltenses","present"] + + dropSlash ('/':p) = p + dropSlash p = p + +batchCompile1 lib_dir (opts,filepaths) = + do cwd <- D.getCurrentDirectory + let rel = relativeTo lib_dir cwd + prelude_dir = lib_dir"prelude" + gfoDir = flag optGFODir opts + maybe (return ()) (D.createDirectoryIfMissing True) gfoDir + prelude_files <- maybe [] id <$> + maybeIO (D.getDirectoryContents prelude_dir) + let fromPrelude f = lib_dir `isPrefixOf` f && + takeFileName f `elem` prelude_files + ppPath ps = "-path="<>intercalate ":" (map rel ps) + logchan <- liftIO newChan + liftIO $ forkIO (mapM_ runIOE =<< getChanContents logchan) + let logStrLn = writeChan logchan . ePutStrLn + ok :: CollectOutput IOE a -> IO a + ok (CO m) = err bad good =<< appIOE m + where + good (o,r) = do writeChan logchan o; return r + bad e = do writeChan logchan (redPutStrLn e); fail "failed" + redPutStrLn s = do ePutStr "\ESC[31m";ePutStr s;ePutStrLn "\ESC[m" + sgr <- liftIO $ newMVar emptySourceGrammar + let extendSgr sgr m = + modifyMVar_ sgr $ \ gr -> + do let gr' = prependModule gr m +-- logStrLn $ "Finished "++show (length (modules gr'))++" modules." + return gr' + fcache <- liftIO $ newIOCache $ \ _ (imp,Hide (f,ps)) -> + do (file,_,_) <- runIOE $ findFile gfoDir ps imp + return (file,(f,ps)) + let find f ps imp = + do (file',(f',ps')) <- liftIO $ readIOCache fcache (imp,Hide (f,ps)) + when (ps'/=ps) $ + do (file,_,_) <- findFile gfoDir ps imp + unless (file==file' || any fromPrelude [file,file']) $ + do eq <- liftIO $ (==) <$> BS.readFile file <*> BS.readFile file' + unless eq $ + fail $ render $ + hang ("Ambiguous import of"<+>imp<>":") 4 + (hang (rel file<+>"from"<+>rel f) 4 (ppPath ps) + $$ + hang (rel file'<+>"from"<+>rel f') 4 (ppPath ps')) + return file' + compile cache (file,paths) = readIOCache cache (file,Hide paths) + compile' cache (f,Hide ps) = + do let compileImport f = compile cache (f,ps) + findImports (f,ps) = mapM (find f ps) . nub . snd + =<< getImports opts f + tis <- parMapM compileImport =<< ok (findImports (f,ps)) + let reuse gfo = do t <- D.getModificationTime gfo + gr <- readMVar sgr + r <- lazyIO $ ok (reuseGFO opts gr gfo) + return (t,snd r) + compileSrc f = + do gr <- readMVar sgr + (Just gfo,mo) <- ok (useTheSource opts gr f) + t <- D.getModificationTime gfo + return (t,mo) + (t,mo) <- if isGFO f + then reuse f + else do ts <- D.getModificationTime f + let gfo = gf2gfo' gfoDir f + to <- maybeIO (D.getModificationTime gfo) + if to>=Just (maximum (ts:tis)) + then reuse gfo + else compileSrc f + extendSgr sgr mo + return (maximum (t:tis)) + cache <- liftIO $ newIOCache compile' + ts <- liftIO $ parMapM (compile cache) filepaths + gr <- liftIO $ readMVar sgr + let cnc = identS (justModuleName (fst (last filepaths))) + return (maximum ts,(cnc,gr)) + +parMapM f xs = + do vs <- mapM (const newEmptyMVar) xs + sequence_ [ forkIO (putMVar v =<< f x) | (v,x) <- zip vs xs] + mapM takeMVar vs + +lazyIO = unsafeInterleaveIO + +canonical path = liftIO $ D.canonicalizePath path `catch` const (return path) + +getPathFromFile lib_dir cmdline_opts file = + do --file <- getRealFile file + file_opts <- getOptionsFromFile file + let file_dir = dropFileName file + opts = addOptions (fixRelativeLibPaths file_dir lib_dir file_opts) + cmdline_opts + paths <- mapM canonical . nub . (file_dir :) =<< extendPathEnv opts + return (file,nub paths) + +getImports opts file = + if isGFO file then gfoImports' file else gfImports opts file + where + gfoImports' file = maybe bad return =<< gfoImports file + where bad = raise $ file++": bad .gfo file" + +relativeTo lib_dir cwd path = + if length librelmakeRelative lib_dir path + cwdrel = makeRelative cwd path + +-------------------------------------------------------------------------------- + +data IOCache arg res + = IOCache { op::arg->IO res, + cache::MVar (M.Map arg (MVar res)) } + +newIOCache op = + do v <- newMVar M.empty + let cache = IOCache (op cache) v + return cache + +readIOCache (IOCache op cacheVar) arg = + join $ modifyMVar cacheVar $ \ cache -> + case M.lookup arg cache of + Nothing -> do v <- newEmptyMVar + let doit = do res <- op arg + putMVar v res + return res + return (M.insert arg v cache,doit) + Just v -> do return (cache,readMVar v) + + +newtype Hide a = Hide {reveal::a} +instance Eq (Hide a) where _ == _ = True +instance Ord (Hide a) where compare _ _ = EQ + +-------------------------------------------------------------------------------- + +newtype CollectOutput m a = CO {unCO::m (m (),a)} +{- +runCO (CO m) = do (o,x) <- m + o + return x +-} +instance Functor m => Functor (CollectOutput m) where + fmap f (CO m) = CO (fmap (fmap f) m) + +instance (Functor m,Monad m) => Applicative (CollectOutput m) where + pure = return + (<*>) = ap + +instance Monad m => Monad (CollectOutput m) where + return x = CO (return (return (),x)) + CO m >>= f = CO $ do (o1,x) <- m + let CO m2 = f x + (o2,y) <- m2 + return (o1>>o2,y) +instance MonadIO m => MonadIO (CollectOutput m) where + liftIO io = CO $ do x <- liftIO io + return (return (),x) + +instance Output m => Output (CollectOutput m) where + ePutStr s = CO (return (ePutStr s,())) + ePutStrLn s = CO (return (ePutStrLn s,())) + putStrLnE s = CO (return (putStrLnE s,())) + putStrE s = CO (return (putStrE s,())) + +instance ErrorMonad m => ErrorMonad (CollectOutput m) where + raise e = CO (raise e) + handle (CO m) h = CO $ handle m (unCO . h) diff --git a/src/compiler/GF/CompileOne.hs b/src/compiler/GF/CompileOne.hs index 5310a7ebb..0a6fcb56a 100644 --- a/src/compiler/GF/CompileOne.hs +++ b/src/compiler/GF/CompileOne.hs @@ -1,8 +1,7 @@ module GF.CompileOne(OneOutput,CompiledModule, - compileOne --, CompileSource, compileSourceModule + compileOne,reuseGFO,useTheSource + --, CompileSource, compileSourceModule ) where -import Prelude hiding (catch) -import GF.System.Catch -- The main compiler passes import GF.Compile.GetGrammar(getSourceModule) @@ -19,7 +18,7 @@ import GF.Grammar.Printer(ppModule,TermPrintQual(..)) import GF.Grammar.Binary(decodeModule,encodeModule) import GF.Infra.Option -import GF.Infra.UseIO(FullPath,IOE,isGFO,gf2gfo,liftIO,ePutStrLn,putPointE,putStrE) +import GF.Infra.UseIO(FullPath,IOE,isGFO,gf2gfo,liftIO,Output(..),putPointE) import GF.Infra.CheckM(runCheck') import GF.Data.Operations(liftErr,(+++)) @@ -33,27 +32,13 @@ type CompiledModule = SourceModule -- | Compile a given source file (or just load a .gfo file), -- given a 'SourceGrammar' containing everything it depends on. -compileOne :: Options -> SourceGrammar -> FullPath -> IOE OneOutput +--compileOne :: Options -> SourceGrammar -> FullPath -> IOE OneOutput compileOne opts srcgr file = if isGFO file then reuseGFO opts srcgr file else do b1 <- doesFileExist file - if b1 then useTheSource + if b1 then useTheSource opts srcgr file else reuseGFO opts srcgr (gf2gfo opts file) - where - -- | For gf source, do full compilation and generate code - useTheSource = - do sm <- putpOpt ("- parsing" +++ file) - ("- compiling" +++ file ++ "... ") - (getSourceModule opts file) - idump opts Source sm - cwd <- getCurrentDirectory - compileSourceModule opts cwd (Just file) srcgr sm - - putpOpt v m act - | verbAtLeast opts Verbose = putPointE Normal opts v act - | verbAtLeast opts Normal = putStrE m >> act - | otherwise = putPointE Verbose opts v act -- | For compiled gf, read the file and update environment -- also undo common subexp optimization, to enable normal computations @@ -76,9 +61,24 @@ reuseGFO opts srcgr file = return (Just file,sm) +-- | For gf source, do full compilation and generate code +--useTheSource :: Options -> SourceGrammar -> FullPath -> IOE OneOutput +useTheSource opts srcgr file = + do sm <- putpOpt ("- parsing" +++ file) + ("- compiling" +++ file ++ "... ") + (getSourceModule opts file) + idump opts Source sm + cwd <- getCurrentDirectory + compileSourceModule opts cwd (Just file) srcgr sm + where + putpOpt v m act + | verbAtLeast opts Verbose = putPointE Normal opts v act + | verbAtLeast opts Normal = putStrE m >> act + | otherwise = putPointE Verbose opts v act + type CompileSource = SourceGrammar -> SourceModule -> IOE OneOutput -compileSourceModule :: Options -> FilePath -> Maybe FilePath -> CompileSource +--compileSourceModule :: Options -> FilePath -> Maybe FilePath -> CompileSource compileSourceModule opts cwd mb_gfFile gr = if flag optTagsOnly opts then generateTags <=< ifComplete middle <=< frontend @@ -128,7 +128,7 @@ compileSourceModule opts cwd mb_gfFile gr = maybeM f = maybe (return ()) f -writeGFO :: Options -> FilePath -> SourceModule -> IOE () +--writeGFO :: Options -> FilePath -> SourceModule -> IOE () writeGFO opts file mo = putPointE Normal opts (" write file" +++ file) $ liftIO $ encodeModule file mo2 @@ -139,7 +139,7 @@ writeGFO opts file mo = notAnyInd x = case x of AnyInd{} -> False; _ -> True -- to output an intermediate stage -intermOut :: Options -> Dump -> Doc -> IOE () +--intermOut :: Options -> Dump -> Doc -> IOE () intermOut opts d doc | dump opts d = ePutStrLn (render ("\n\n--#" <+> show d $$ doc)) | otherwise = return () @@ -148,9 +148,8 @@ idump opts pass = intermOut opts (Dump pass) . ppModule Internal warnOut opts warnings | null warnings = return () - | otherwise = liftIO $ ePutStrLn ws `catch` oops + | otherwise = do ePutStr "\ESC[34m";ePutStr ws;ePutStrLn "\ESC[m" where - oops _ = ePutStrLn "" -- prevent crash on character encoding problem ws = if flag optVerbosity opts == Normal then '\n':warnings else warnings diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs index 88767c72e..15feda1d0 100644 --- a/src/compiler/GF/Infra/Option.hs +++ b/src/compiler/GF/Infra/Option.hs @@ -172,8 +172,8 @@ data Flags = Flags { optTagsOnly :: Bool, optHeuristicFactor :: Maybe Double, optMetaProb :: Maybe Double, - optMetaToknProb :: Maybe Double{-, - optNewComp :: Bool-} + optMetaToknProb :: Maybe Double, + optJobs :: Maybe (Maybe String) } deriving (Show) @@ -284,7 +284,8 @@ defaultFlags = Flags { optTagsOnly = False, optHeuristicFactor = Nothing, optMetaProb = Nothing, - optMetaToknProb = Nothing + optMetaToknProb = Nothing, + optJobs = Nothing } -- | Option descriptions @@ -297,6 +298,7 @@ optDescr = Option ['v'] ["verbose"] (OptArg verbosity "N") "Set verbosity (default 1). -v alone is the same as -v 2.", Option ['q','s'] ["quiet"] (NoArg (verbosity (Just "0"))) "Quiet, same as -v 0.", Option [] ["batch"] (NoArg (mode ModeCompiler)) "Run in batch compiler mode.", + Option ['j'] ["jobs"] (OptArg jobs "N") "Compile N modules in parallel with -batch (default 1).", Option [] ["interactive"] (NoArg (mode ModeInteractive)) "Run in interactive mode (default).", Option [] ["run"] (NoArg (mode ModeRun)) "Run in interactive mode, showing output only (no other messages).", Option [] ["server"] (OptArg modeServer "port") $ @@ -387,6 +389,7 @@ optDescr = ms = mode . ModeServer readPort p = maybe err ms (readMaybe p) where err = fail $ "Bad server port: "++p + jobs mv = set $ \ o -> o { optJobs = Just mv } verbosity mv = case mv of Nothing -> set $ \o -> o { optVerbosity = Verbose } Just v -> case readMaybe v >>= toEnumBounded of diff --git a/src/compiler/GF/Infra/UseIO.hs b/src/compiler/GF/Infra/UseIO.hs index a0a36ad52..6de68bc44 100644 --- a/src/compiler/GF/Infra/UseIO.hs +++ b/src/compiler/GF/Infra/UseIO.hs @@ -1,4 +1,3 @@ -{-# OPTIONS -cpp #-} ---------------------------------------------------------------------- -- | -- Module : UseIO @@ -22,7 +21,7 @@ import GF.Infra.Option import GF.System.Catch import Paths_gf(getDataDir) -import System.Directory +import GF.System.Directory import System.FilePath import System.IO import System.IO.Error(isUserError,ioeGetErrorString) @@ -36,24 +35,9 @@ import Control.Monad import Control.Monad.Trans(MonadIO(..)) import Control.Exception(evaluate) ---putShow' :: Show a => (c -> a) -> c -> IO () ---putShow' f = putStrLn . show . length . show . f +--putIfVerb :: MonadIO io => Options -> String -> io () +putIfVerb opts msg = when (verbAtLeast opts Verbose) $ putStrLnE msg -putIfVerb :: MonadIO io => Options -> String -> io () -putIfVerb opts msg = - when (verbAtLeast opts Verbose) $ liftIO $ putStrLn msg - -putIfVerbW :: MonadIO io => Options -> String -> io () -putIfVerbW opts msg = - when (verbAtLeast opts Verbose) $ liftIO $ putStr (' ' : msg) -{- -errOptIO :: Options -> a -> Err a -> IO a -errOptIO os e m = case m of - Ok x -> return x - Bad k -> do - putIfVerb os k - return e --} type FileName = String type InitPath = String -- ^ the directory portion of a pathname type FullPath = String @@ -68,8 +52,8 @@ getLibraryDirectory opts = Nothing -> liftIO $ catch (getEnv gfLibraryPath) (\ex -> fmap ( "lib") getDataDir) -getGrammarPath :: FilePath -> IO [FilePath] -getGrammarPath lib_dir = do +getGrammarPath :: MonadIO io => FilePath -> io [FilePath] +getGrammarPath lib_dir = liftIO $ do catch (fmap splitSearchPath $ getEnv gfGrammarPathVar) (\_ -> return [lib_dir "alltenses",lib_dir "prelude"]) -- e.g. GF_GRAMMAR_PATH @@ -110,15 +94,14 @@ getSubdirs dir = do justModuleName :: FilePath -> String justModuleName = dropExtension . takeFileName -isGFO :: FilePath -> Bool +isGF,isGFO :: FilePath -> Bool +isGF = (== ".gf") . takeExtensions isGFO = (== ".gfo") . takeExtensions -gfoFile :: FilePath -> FilePath +gfFile,gfoFile :: FilePath -> FilePath +gfFile f = addExtension f "gf" gfoFile f = addExtension f "gfo" -gfFile :: FilePath -> FilePath -gfFile f = addExtension f "gf" - gf2gfo :: Options -> FilePath -> FilePath gf2gfo = gf2gfo' . flag optGFODir @@ -143,6 +126,8 @@ newtype IOE a = IOE { appIOE :: IO (Err a) } ioe :: IO (Err a) -> IOE a ioe = IOE +runIOE m = err fail return =<< appIOE m + instance MonadIO IOE where liftIO io = ioe (io >>= return . return) instance ErrorMonad IOE where @@ -162,11 +147,11 @@ instance Monad IOE where appIOE $ err raise f x -- f :: a -> IOE a fail = raise -maybeIO io = either (const Nothing) Just `fmap` liftIO (try io) - useIOE :: a -> IOE a -> IO a useIOE a ioe = appIOE ioe >>= err (\s -> putStrLn s >> return a) return +maybeIO io = either (const Nothing) Just `fmap` liftIO (try io) + --foldIOE :: (a -> b -> IOE a) -> a -> [b] -> IOE (a, Maybe String) foldIOE f s xs = case xs of [] -> return (s,Nothing) @@ -180,27 +165,42 @@ die :: String -> IO a die s = do hPutStrLn stderr s exitFailure -ePutStr, ePutStrLn, putStrE, putStrLnE :: MonadIO m => String -> m () -ePutStr s = liftIO $ hPutStr stderr s -ePutStrLn s = liftIO $ hPutStrLn stderr s -putStrLnE s = liftIO $ putStrLn s >> hFlush stdout -putStrE s = liftIO $ putStr s >> hFlush stdout +class Monad m => Output m where + ePutStr, ePutStrLn, putStrE, putStrLnE :: String -> m () -putPointE :: MonadIO m => Verbosity -> Options -> String -> m a -> m a +instance Output IO where + ePutStr s = hPutStr stderr s `catch` oops + where oops _ = return () -- prevent crash on character encoding problem + ePutStrLn s = hPutStrLn stderr s `catch` oops + where oops _ = ePutStrLn "" -- prevent crash on character encoding problem + putStrLnE s = putStrLn s >> hFlush stdout + putStrE s = putStr s >> hFlush stdout + +instance Output IOE where + ePutStr = liftIO . ePutStr + ePutStrLn = liftIO . ePutStrLn + putStrLnE = liftIO . putStrLnE + putStrE = liftIO . putStrE + +--putPointE :: Verbosity -> Options -> String -> IO a -> IO a putPointE v opts msg act = do when (verbAtLeast opts v) $ putStrE msg - t1 <- liftIO $ getCPUTime - a <- act >>= liftIO . evaluate - t2 <- liftIO $ getCPUTime + (t,a) <- timeIt act if flag optShowCPUTime opts - then do let msec = (t2 - t1) `div` 1000000000 + then do let msec = t `div` 1000000000 putStrLnE (printf " %5d msec" msec) else when (verbAtLeast opts v) $ putStrLnE "" return a +timeIt act = + do t1 <- liftIO $ getCPUTime + a <- liftIO . evaluate =<< act + t2 <- liftIO $ getCPUTime + return (t2-t1,a) + -- * File IO writeUTF8File :: FilePath -> String -> IO () diff --git a/src/compiler/GF/System/Directory.hs b/src/compiler/GF/System/Directory.hs index 3cd8a8ef6..306c5fbcb 100644 --- a/src/compiler/GF/System/Directory.hs +++ b/src/compiler/GF/System/Directory.hs @@ -4,10 +4,14 @@ module GF.System.Directory(module GF.System.Directory,module D) where import Control.Monad.Trans(MonadIO(..)) import qualified System.Directory as D import System.Directory as D - hiding (doesDirectoryExist,doesFileExist,getModificationTime, - getCurrentDirectory,getDirectoryContents,removeFile) + hiding (canonicalizePath,createDirectoryIfMissing, + doesDirectoryExist,doesFileExist,getModificationTime, + getCurrentDirectory,getDirectoryContents,getPermissions, + removeFile) import Data.Time.Compat +canonicalizePath path = liftIO $ D.canonicalizePath path +createDirectoryIfMissing b = liftIO . D.createDirectoryIfMissing b doesDirectoryExist path = liftIO $ D.doesDirectoryExist path doesFileExist path = liftIO $ D.doesFileExist path getModificationTime path = liftIO $ fmap toUTCTime (D.getModificationTime path) @@ -15,5 +19,6 @@ getDirectoryContents path = liftIO $ D.getDirectoryContents path getCurrentDirectory :: MonadIO io => io FilePath getCurrentDirectory = liftIO D.getCurrentDirectory +getPermissions path = liftIO $ D.getPermissions path removeFile path = liftIO $ D.removeFile path \ No newline at end of file diff --git a/src/compiler/GFC.hs b/src/compiler/GFC.hs index 4b1034faa..6f909b511 100644 --- a/src/compiler/GFC.hs +++ b/src/compiler/GFC.hs @@ -4,7 +4,8 @@ module GFC (mainGFC, writePGF) where import PGF import PGF.Internal(concretes,optimizePGF,unionPGF) import PGF.Internal(putSplitAbs,encodeFile,runPut) -import GF.Compile +import GF.Compile as S(batchCompile,link,srcAbsName) +import qualified GF.CompileInParallel as P(batchCompile) import GF.Compile.Export import GF.Compile.CFGtoPGF import GF.Compile.GetGrammar @@ -41,7 +42,7 @@ mainGFC opts fs = do compileSourceFiles :: Options -> [FilePath] -> IOE () compileSourceFiles opts fs = - do cnc_gr@(cnc,t_src,gr) <- batchCompile opts fs + do (t_src,~cnc_grs@(~(cnc,gr):_)) <- batchCompile opts fs unless (flag optStopAfterPhase opts == Compile) $ do let abs = showIdent (srcAbsName gr cnc) pgfFile = outputPath opts (grammarName' opts abs<.>"pgf") @@ -50,9 +51,15 @@ compileSourceFiles opts fs = else return Nothing if t_pgf >= Just t_src then putIfVerb opts $ pgfFile ++ " is up-to-date." - else do pgf <- link opts cnc_gr + else do pgfs <- mapM (link opts) + [(cnc,t_src,gr)|(cnc,gr)<-cnc_grs] + let pgf = foldl1 unionPGF pgfs writePGF opts pgf writeOutputs opts pgf + where + batchCompile = maybe batchCompile' P.batchCompile (flag optJobs opts) + batchCompile' opts fs = do (cnc,t,gr) <- S.batchCompile opts fs + return (t,[(cnc,gr)]) compileCFFiles :: Options -> [FilePath] -> IOE () compileCFFiles opts fs = do