refactor the compilation of CFG and EBNF grammars. Now they are parsed by using GF.Grammar.Parser just like the ordinary GF grammars. Furthermore now GF.Speech.CFG is moved to GF.Grammar.CFG. The new module is used by both the speech conversion utils and by the compiler for CFG grammars. The parser for CFG now consumes a lot less memory and can be used with grammars with more than 4 000 000 productions.

This commit is contained in:
kr.angelov
2014-03-21 21:25:05 +00:00
parent d816c34986
commit 51a9ef72c7
19 changed files with 236 additions and 413 deletions

View File

@@ -5,23 +5,25 @@ import PGF.Data
import GF.Compile
import GF.Compile.Multi (readMulti)
import GF.Compile.GetGrammar (getCFRules, getEBNFRules)
import GF.Grammar (identS, SourceGrammar) -- for cc command
import GF.Grammar.CF
import GF.Grammar.CFG
import GF.Grammar.EBNF
import GF.Compile.CFGtoPGF
import GF.Infra.UseIO
import GF.Infra.Option
import GF.Data.ErrM
--import Data.List (nubBy)
import System.FilePath
import qualified Data.Set as Set
-- import a grammar in an environment where it extends an existing grammar
importGrammar :: PGF -> Options -> [FilePath] -> IO PGF
importGrammar pgf0 _ [] = return pgf0
importGrammar pgf0 opts files =
case takeExtensions (last files) of
".cf" -> importCF opts files getCF
".ebnf" -> importCF opts files getEBNF
".cf" -> importCF opts files getCFRules id
".ebnf" -> importCF opts files getEBNFRules ebnf2cf
".gfm" -> do
ascss <- mapM readMulti files
let cs = concatMap snd ascss
@@ -52,13 +54,17 @@ importSource src0 opts files = do
return src0
-- for different cf formats
importCF opts files get = do
s <- fmap unlines $ mapM readFile files
gf <- case get (last files) s of
Ok gf -> return gf
Bad s -> error s ----
Ok gr <- appIOE $ compileSourceGrammar opts gf
epgf <- appIOE $ link opts (identS (justModuleName (last files) ++ "Abs"), (), gr)
case epgf of
Ok pgf -> return pgf
Bad s -> error s ----
importCF opts files get convert = do
res <- appIOE impCF
case res of
Ok pgf -> return pgf
Bad s -> error s
where
impCF = do
rules <- fmap (convert . concat) $ mapM (get opts) files
startCat <- case rules of
(CFRule cat _ _ : _) -> return cat
_ -> fail "empty CFG"
let gf = cf2gf (last files) (uniqueFuns (mkCFG startCat Set.empty rules))
gr <- compileSourceGrammar opts gf
link opts (identS (justModuleName (last files) ++ "Abs"), (), gr)