1
0
forked from GitHub/gf-core
Files
gf-core/src/compiler/GFC.hs
hallgren c0eb79b403 Experimental: parallel batch compilation of grammars
On my laptop these changes speed up the full build of the RGL and example
grammars with 'cabal build' from ~95s to ~43s and the zero build from ~18s
to ~5s.

The main change is the introduction of the module GF.CompileInParallel that
replaces GF.Compile and the function GF.Compile.ReadFiles.getAllFiles. At
present, it is activated with the new -j flag, and it is only used when
combined with --make or --batch. In addition, to get parallel computations,
you need to add GHC run-time flags, e.g., +RTS -N -A20M -RTS, to the command
line.

The Setup.hs script has been modified to pass the appropriate flags to GF
for parallel compilation when compiling the RGL and example grammars, but you
need a recent version of Cabal for this to work (probably >=1.20).

Some additonal refactoring were made during this work. A new monad is used to
avoid warnings/error messages from different modules to be intertwined when
compiling in parallel, so some functios that were hardiwred to the IO or IOE
monads have been lifted to work in arbitrary monads that are instances in
the appropriate classes.
2014-08-25 09:56:00 +00:00

145 lines
5.7 KiB
Haskell

module GFC (mainGFC, writePGF) where
-- module Main where
import PGF
import PGF.Internal(concretes,optimizePGF,unionPGF)
import PGF.Internal(putSplitAbs,encodeFile,runPut)
import GF.Compile as S(batchCompile,link,srcAbsName)
import qualified GF.CompileInParallel as P(batchCompile)
import GF.Compile.Export
import GF.Compile.CFGtoPGF
import GF.Compile.GetGrammar
import GF.Grammar.CFG
import GF.Infra.Ident(showIdent)
import GF.Infra.UseIO
import GF.Infra.Option
import GF.Data.ErrM
import GF.System.Directory
import Data.Maybe
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.ByteString.Lazy as BSL
import System.FilePath
import Control.Monad(unless,forM_)
mainGFC :: Options -> [FilePath] -> IO ()
mainGFC opts fs = do
r <- appIOE (case () of
_ | null fs -> fail $ "No input files."
_ | all (extensionIs ".cf") fs -> compileCFFiles opts fs
_ | all (\f -> extensionIs ".gf" f || extensionIs ".gfo" f) fs -> compileSourceFiles opts fs
_ | all (extensionIs ".pgf") fs -> unionPGFFiles opts fs
_ -> fail $ "Don't know what to do with these input files: " ++ unwords fs)
case r of
Ok x -> return x
Bad msg -> die $ if flag optVerbosity opts == Normal
then ('\n':msg)
else msg
where
extensionIs ext = (== ext) . takeExtension
compileSourceFiles :: Options -> [FilePath] -> IOE ()
compileSourceFiles opts fs =
do (t_src,~cnc_grs@(~(cnc,gr):_)) <- batchCompile opts fs
unless (flag optStopAfterPhase opts == Compile) $
do let abs = showIdent (srcAbsName gr cnc)
pgfFile = outputPath opts (grammarName' opts abs<.>"pgf")
t_pgf <- if outputJustPGF opts
then maybeIO $ getModificationTime pgfFile
else return Nothing
if t_pgf >= Just t_src
then putIfVerb opts $ pgfFile ++ " is up-to-date."
else do pgfs <- mapM (link opts)
[(cnc,t_src,gr)|(cnc,gr)<-cnc_grs]
let pgf = foldl1 unionPGF pgfs
writePGF opts pgf
writeOutputs opts pgf
where
batchCompile = maybe batchCompile' P.batchCompile (flag optJobs opts)
batchCompile' opts fs = do (cnc,t,gr) <- S.batchCompile opts fs
return (t,[(cnc,gr)])
compileCFFiles :: Options -> [FilePath] -> IOE ()
compileCFFiles opts fs = do
rules <- fmap concat $ mapM (getCFRules opts) fs
startCat <- case rules of
(CFRule cat _ _ : _) -> return cat
_ -> fail "empty CFG"
let pgf = cf2pgf (last fs) (uniqueFuns (mkCFG startCat Set.empty rules))
let cnc = justModuleName (last fs)
unless (flag optStopAfterPhase opts == Compile) $
do probs <- liftIO (maybe (return . defaultProbabilities) readProbabilitiesFromFile (flag optProbsFile opts) pgf)
let pgf' = setProbabilities probs $ if flag optOptimizePGF opts then optimizePGF pgf else pgf
writePGF opts pgf'
writeOutputs opts pgf'
unionPGFFiles :: Options -> [FilePath] -> IOE ()
unionPGFFiles opts fs =
if outputJustPGF opts
then maybe doIt checkFirst (flag optName opts)
else doIt
where
checkFirst name =
do let pgfFile = outputPath opts (name <.> "pgf")
sourceTime <- liftIO $ maximum `fmap` mapM getModificationTime fs
targetTime <- maybeIO $ getModificationTime pgfFile
if targetTime >= Just sourceTime
then putIfVerb opts $ pgfFile ++ " is up-to-date."
else doIt
doIt =
do pgfs <- mapM readPGFVerbose fs
let pgf0 = foldl1 unionPGF pgfs
pgf = if flag optOptimizePGF opts then optimizePGF pgf0 else pgf0
pgfFile = outputPath opts (grammarName opts pgf <.> "pgf")
if pgfFile `elem` fs
then putStrLnE $ "Refusing to overwrite " ++ pgfFile
else writePGF opts pgf
writeOutputs opts pgf
readPGFVerbose f =
putPointE Normal opts ("Reading " ++ f ++ "...") $ liftIO $ readPGF f
writeOutputs :: Options -> PGF -> IOE ()
writeOutputs opts pgf = do
sequence_ [writeOutput opts name str
| fmt <- outputFormats opts,
(name,str) <- exportPGF opts fmt pgf]
writePGF :: Options -> PGF -> IOE ()
writePGF opts pgf =
if flag optSplitPGF opts then writeSplitPGF else writeNormalPGF
where
writeNormalPGF =
do let outfile = outputPath opts (grammarName opts pgf <.> "pgf")
writing opts outfile $ encodeFile outfile pgf
writeSplitPGF =
do let outfile = outputPath opts (grammarName opts pgf <.> "pgf")
writing opts outfile $ BSL.writeFile outfile (runPut (putSplitAbs pgf))
--encodeFile_ outfile (putSplitAbs pgf)
forM_ (Map.toList (concretes pgf)) $ \cnc -> do
let outfile = outputPath opts (showCId (fst cnc) <.> "pgf_c")
writing opts outfile $ encodeFile outfile cnc
writeOutput :: Options -> FilePath-> String -> IOE ()
writeOutput opts file str = writing opts path $ writeUTF8File path str
where path = outputPath opts file
-- * Useful helper functions
grammarName :: Options -> PGF -> String
grammarName opts pgf = grammarName' opts (showCId (abstractName pgf))
grammarName' opts abs = fromMaybe abs (flag optName opts)
outputFormats opts = [fmt | fmt <- flag optOutputFormats opts, fmt/=FmtByteCode]
outputJustPGF opts = null (flag optOutputFormats opts) && not (flag optSplitPGF opts)
outputPath opts file = maybe id (</>) (flag optOutputDir opts) file
writing opts path io =
putPointE Normal opts ("Writing " ++ path ++ "...") $ liftIO io