diff --git a/GF.cabal b/GF.cabal index ab709337e..f16c9b298 100644 --- a/GF.cabal +++ b/GF.cabal @@ -57,6 +57,9 @@ library GF.Data.Assoc GF.Data.ErrM -- needed only for the on demand generation of PMCFG + GF.Infra.GetOpt + GF.Infra.Option + GF.Data.ErrM GF.Data.BacktrackM GF.Compile.GenerateFCFG GF.Compile.GeneratePMCFG diff --git a/src/GF/Compile.hs b/src/GF/Compile.hs index c66c2f86a..c23cfe655 100644 --- a/src/GF/Compile.hs +++ b/src/GF/Compile.hs @@ -68,7 +68,7 @@ link opts cnc gr = do _ -> ioeIO $ putStrLn $ "Corrupted PGF" return gc Bad s -> fail s - return $ buildParser opts $ optimize opts gc1 + ioeIO $ buildParser opts $ optimize opts gc1 optimize :: Options -> PGF -> PGF optimize opts = cse . suf @@ -76,12 +76,12 @@ optimize opts = cse . suf cse = if OptCSE `Set.member` os then cseOptimize else id suf = if OptStem `Set.member` os then suffixOptimize else id -buildParser :: Options -> PGF -> PGF +buildParser :: Options -> PGF -> IO PGF buildParser opts = case flag optBuildParser opts of BuildParser -> addParsers opts - DontBuildParser -> id - BuildParserOnDemand -> mapConcretes (\cnc -> cnc { cflags = Map.insert (mkCId "parser") "ondemand" (cflags cnc) }) + DontBuildParser -> return + BuildParserOnDemand -> return . mapConcretes (\cnc -> cnc { cflags = Map.insert (mkCId "parser") "ondemand" (cflags cnc) }) batchCompile :: Options -> [FilePath] -> IOE SourceGrammar batchCompile opts files = do diff --git a/src/GF/Compile/GeneratePMCFG.hs b/src/GF/Compile/GeneratePMCFG.hs index ab79f9b30..bb61a0461 100644 --- a/src/GF/Compile/GeneratePMCFG.hs +++ b/src/GF/Compile/GeneratePMCFG.hs @@ -16,9 +16,11 @@ import PGF.CId import PGF.Data import PGF.Macros +import GF.Infra.Option import GF.Data.BacktrackM import GF.Data.Utilities (updateNthM, updateNth, sortNub) +import System.IO import qualified Data.Map as Map import qualified Data.Set as Set import qualified Data.List as List @@ -27,22 +29,41 @@ import qualified Data.ByteString.Char8 as BS import Data.Array.IArray import Data.Maybe import Control.Monad -import Debug.Trace +import Control.Exception ---------------------------------------------------------------------- -- main conversion function -convertConcrete :: Abstr -> Concr -> ParserInfo -convertConcrete abs cnc = convert abs_defs conc cats - where abs_defs = Map.assocs (funs abs) - conc = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" - cats = lincats cnc -convert :: [(CId,(Type,Int,[Equation]))] -> TermMap -> TermMap -> ParserInfo -convert abs_defs cnc_defs cat_defs = - let env = expandHOAS abs_defs cnc_defs cat_defs (emptyGrammarEnv cnc_defs cat_defs) - in getParserInfo (List.foldl' (convertRule cnc_defs) env pfrules) +convertConcrete :: Options -> Abstr -> CId -> Concr -> IO ParserInfo +convertConcrete opts abs lang cnc = do + let env0 = emptyGrammarEnv cnc_defs cat_defs + when (flag optProf opts) $ do + let (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = env0 + hPutStrLn stderr "" + hPutStrLn stderr ("Language: " ++ show lang) + hPutStrLn stderr "" + hPutStrLn stderr "Categories Count" + hPutStrLn stderr "--------------------------------" + case IntMap.lookup 0 catSet of + Just cats -> sequence_ [hPutStrLn stderr (lformat 23 cid ++ rformat 9 (fcat2-fcat1+1)) + | (cid,(fcat1,fcat2,_)) <- Map.toList cats] + Nothing -> return () + hPutStrLn stderr "--------------------------------" + let env1 = expandHOAS abs_defs cnc_defs cat_defs env0 + when (flag optProf opts) $ do + hPutStrLn stderr "" + hPutStrLn stderr "Rules Count" + hPutStrLn stderr "--------------------------------" + env2 <- foldM (convertRule opts cnc_defs) env1 pfrules + when (flag optProf opts) $ do + hPutStrLn stderr "--------------------------------" + return $! getParserInfo env2 where + abs_defs = Map.assocs (funs abs) + cnc_defs = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" + cat_defs = lincats cnc + pfrules = [ (PFRule id args (0,res) (map findLinType args) (findLinType (0,res)) term) | (id, (ty,_,_)) <- abs_defs, let (args,res) = typeSkeleton ty, @@ -50,6 +71,16 @@ convert abs_defs cnc_defs cat_defs = findLinType (_,id) = fromMaybe (error $ "No lincat for " ++ show id) (Map.lookup id cat_defs) +lformat :: Show a => Int -> a -> String +lformat n x = s ++ replicate (n-length s) ' ' + where + s = show x + +rformat :: Show a => Int -> a -> String +rformat n x = replicate (n-length s) ' ' ++ s + where + s = show x + brk :: (GrammarEnv -> GrammarEnv) -> (GrammarEnv -> GrammarEnv) brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = case f (GrammarEnv last_id catSet seqSet funSet crcSet IntMap.empty) of @@ -67,8 +98,8 @@ brk f (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = count = length xs ys = foldr (zipWith Set.insert) (repeat Set.empty) xs -convertRule :: TermMap -> GrammarEnv -> ProtoFRule -> GrammarEnv -convertRule cnc_defs grammarEnv (PFRule fun args res ctypes ctype term) = +convertRule :: Options -> TermMap -> GrammarEnv -> ProtoFRule -> IO GrammarEnv +convertRule opts cnc_defs grammarEnv (PFRule fun args res ctypes ctype term) = do let pres = protoFCat cnc_defs res ctype pargs = zipWith (protoFCat cnc_defs) args ctypes @@ -78,7 +109,13 @@ convertRule cnc_defs grammarEnv (PFRule fun args res ctypes ctype term) = grammarEnv (go' b1 [] []) (pres,pargs) ) grammarEnv1 - in grammarEnv2 + when (flag optProf opts) $ do + hPutStr stderr (lformat 23 fun ++ rformat 9 (product [length xs | PFCat _ _ _ tcs <- pargs, (_,xs) <- tcs])) + hFlush stderr + grammarEnv3 <- evaluate grammarEnv2 + when (flag optProf opts) $ do + hPutStrLn stderr "" + return grammarEnv3 where addRule lins (newCat', newArgs') env0 = let [newCat] = getFCats env0 newCat' diff --git a/src/GF/Compile/GrammarToGFCC.hs b/src/GF/Compile/GrammarToGFCC.hs index 14187f04a..c8bb1c606 100644 --- a/src/GF/Compile/GrammarToGFCC.hs +++ b/src/GF/Compile/GrammarToGFCC.hs @@ -44,14 +44,15 @@ mkCanon2gfcc opts cnc gr = pars = mkParamLincat gr -- Adds parsers for all concretes -addParsers :: Options -> D.PGF -> D.PGF -addParsers opts pgf = CM.mapConcretes conv pgf +addParsers :: Options -> D.PGF -> IO D.PGF +addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)] + return pgf { D.concretes = Map.fromList cncs } where - conv cnc = cnc { D.parser = Just pinfo } + conv lang cnc = do pinfo <- if flag optErasing (erasingFromCnc `addOptions` opts) + then PMCFG.convertConcrete opts (D.abstract pgf) lang cnc + else return $ FCFG.convertConcrete (D.abstract pgf) cnc + return (lang,cnc { D.parser = Just pinfo }) where - pinfo - | flag optErasing (erasingFromCnc `addOptions` opts) = PMCFG.convertConcrete (D.abstract pgf) cnc - | otherwise = FCFG.convertConcrete (D.abstract pgf) cnc erasingFromCnc = modifyFlags (\o -> o { optErasing = Map.lookup (mkCId "erasing") (D.cflags cnc) == Just "on"}) -- Generate PGF from GFCM. diff --git a/src/GF/Infra/Option.hs b/src/GF/Infra/Option.hs index 40b8dc434..fc5ddf87c 100644 --- a/src/GF/Infra/Option.hs +++ b/src/GF/Infra/Option.hs @@ -144,6 +144,7 @@ data Flags = Flags { optMode :: Mode, optStopAfterPhase :: Phase, optVerbosity :: Verbosity, + optProf :: Bool, optShowCPUTime :: Bool, optEmitGFO :: Bool, optOutputFormats :: [OutputFormat], @@ -237,6 +238,7 @@ defaultFlags = Flags { optMode = ModeInteractive, optStopAfterPhase = Compile, optVerbosity = Normal, + optProf = False, optShowCPUTime = False, optEmitGFO = True, optOutputFormats = [], @@ -288,6 +290,7 @@ optDescr = Option ['C'] [] (NoArg (phase Convert)) "Stop after conversion to .gf.", Option ['c'] [] (NoArg (phase Compile)) "Stop after compiling to .gfo (default) .", Option [] ["make"] (NoArg (liftM2 addOptions (mode ModeCompiler) (phase Link))) "Build .pgf file and other output files and exit.", + Option [] ["prof"] (NoArg (prof True)) "Dump profiling information when compiling to PMCFG", Option [] ["cpu"] (NoArg (cpu True)) "Show compilation CPU time statistics.", Option [] ["no-cpu"] (NoArg (cpu False)) "Don't show compilation CPU time statistics (default).", Option [] ["emit-gfo"] (NoArg (emitGFO True)) "Create .gfo files (default).", @@ -371,6 +374,7 @@ optDescr = Just v -> case readMaybe v >>= toEnumBounded of Just i -> set $ \o -> o { optVerbosity = i } Nothing -> fail $ "Bad verbosity: " ++ show v + prof x = set $ \o -> o { optProf = x } cpu x = set $ \o -> o { optShowCPUTime = x } emitGFO x = set $ \o -> o { optEmitGFO = x } gfoDir x = set $ \o -> o { optGFODir = Just x } diff --git a/src/PGF.hs b/src/PGF.hs index 4d059fd00..5fd98fa25 100644 --- a/src/PGF.hs +++ b/src/PGF.hs @@ -74,6 +74,7 @@ import PGF.Parsing.FCFG import qualified PGF.Parsing.FCFG.Incremental as Incremental import qualified GF.Compile.GeneratePMCFG as PMCFG +import GF.Infra.Option import GF.Data.ErrM import GF.Data.Utilities (replace) @@ -219,16 +220,17 @@ readLanguage = readCId showLanguage = prCId -readPGF f = do - g <- decodeFile f - return $! addParsers g +readPGF f = decodeFile f >>= addParsers -- Adds parsers for all concretes that don't have a parser and that have parser=ondemand. -addParsers :: PGF -> PGF -addParsers pgf = mapConcretes (\cnc -> if wantsParser cnc then addParser cnc else cnc) pgf +addParsers :: PGF -> IO PGF +addParsers pgf = do cncs <- sequence [if wantsParser cnc then addParser lang cnc else return (lang,cnc) + | (lang,cnc) <- Map.toList (concretes pgf)] + return pgf { concretes = Map.fromList cncs } where wantsParser cnc = isNothing (parser cnc) && Map.lookup (mkCId "parser") (cflags cnc) == Just "ondemand" - addParser cnc = cnc { parser = Just (PMCFG.convertConcrete (abstract pgf) cnc) } + addParser lang cnc = do pinfo <- PMCFG.convertConcrete noOptions (abstract pgf) lang cnc + return (lang,cnc { parser = Just pinfo }) linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang