1
0
forked from GitHub/gf-core

refactor the compilation of CFG and EBNF grammars. Now they are parsed by using GF.Grammar.Parser just like the ordinary GF grammars. Furthermore now GF.Speech.CFG is moved to GF.Grammar.CFG. The new module is used by both the speech conversion utils and by the compiler for CFG grammars. The parser for CFG now consumes a lot less memory and can be used with grammars with more than 4 000 000 productions.

This commit is contained in:
kr.angelov
2014-03-21 21:25:05 +00:00
parent d816c34986
commit 51a9ef72c7
19 changed files with 236 additions and 413 deletions

View File

@@ -0,0 +1,58 @@
module GF.Compile.CFGtoPGF (cf2gf) where
import GF.Grammar.Grammar hiding (Cat)
import GF.Grammar.Macros
import GF.Grammar.CFG
import GF.Infra.Ident(Ident,identS)
import GF.Infra.Option
import GF.Infra.UseIO
import GF.Data.Operations
import PGF(showCId)
import qualified Data.Set as Set
import qualified Data.Map as Map
--------------------------
-- the compiler ----------
--------------------------
cf2gf :: FilePath -> CFG -> SourceGrammar
cf2gf fpath cf = mGrammar [
(aname, ModInfo MTAbstract MSComplete (modifyFlags (\fs -> fs{optStartCat = Just cat})) [] Nothing [] [] fpath Nothing abs),
(cname, ModInfo (MTConcrete aname) MSComplete noOptions [] Nothing [] [] fpath Nothing cnc)
]
where
name = justModuleName fpath
(abs,cnc,cat) = cf2grammar cf
aname = identS $ name ++ "Abs"
cname = identS name
cf2grammar :: CFG -> (BinTree Ident Info, BinTree Ident Info, String)
cf2grammar cfg = (buildTree abs, buildTree conc, cfgStartCat cfg) where
abs = cats ++ funs
conc = lincats ++ lins
cats = [(identS cat, AbsCat (Just (L NoLoc []))) | cat <- Map.keys (cfgRules cfg)]
lincats = [(cat, CncCat (Just (L loc defLinType)) Nothing Nothing Nothing Nothing) | (cat,AbsCat (Just (L loc _))) <- cats]
(funs,lins) = unzip (map cf2rule (concatMap Set.toList (Map.elems (cfgRules cfg))))
cf2rule :: CFRule -> ((Ident,Info),(Ident,Info))
cf2rule (CFRule cat items (CFObj fun _)) = (def,ldef) where
f = identS (showCId fun)
def = (f, AbsFun (Just (L NoLoc (mkProd args' (Cn (identS cat)) []))) Nothing Nothing (Just True))
args0 = zip (map (identS . ("x" ++) . show) [0..]) items
args = [((Explicit,v), Cn (identS c)) | (v, NonTerminal c) <- args0]
args' = [(Explicit,identS "_", Cn (identS c)) | (_, NonTerminal c) <- args0]
ldef = (f, CncFun
Nothing
(Just (L NoLoc (mkAbs (map fst args)
(mkRecord (const theLinLabel) [foldconcat (map mkIt args0)]))))
Nothing
Nothing)
mkIt (v, NonTerminal _) = P (Vr v) theLinLabel
mkIt (_, Terminal a) = K a
foldconcat [] = K ""
foldconcat tt = foldr1 C tt

View File

@@ -12,27 +12,25 @@
-- this module builds the internal GF grammar that is sent to the type checker
-----------------------------------------------------------------------------
module GF.Compile.GetGrammar (getSourceModule) where
module GF.Compile.GetGrammar (getSourceModule, getCFRules, getEBNFRules) where
import Prelude hiding (catch)
import GF.Data.Operations
--import GF.System.Catch
import GF.Infra.UseIO
import GF.Infra.Option(Options,optPreprocessors,addOptions,renameEncoding,optEncoding,flag,defaultEncoding)
import GF.Grammar.Lexer
import GF.Grammar.Parser
import GF.Grammar.Grammar
--import GF.Compile.Coding
import GF.Grammar.CFG
import GF.Grammar.EBNF
import GF.Compile.ReadFiles(parseSource,lift)
--import GF.Text.Coding(decodeUnicodeIO)
import qualified Data.ByteString.Char8 as BS
import Data.Char(isAscii)
import Control.Monad (foldM,when,unless)
import System.Cmd (system)
--import System.IO(mkTextEncoding) --,utf8
import System.Directory(removeFile,getCurrentDirectory)
import System.FilePath(makeRelative)
@@ -64,17 +62,25 @@ getSourceModule opts file0 =
--lift $ transcodeModule' (i,mi) -- old lexer
return (i,mi) -- new lexer
{-
transcodeModule sm00 =
do enc <- mkTextEncoding (getEncoding (mflags (snd sm00)))
let sm = decodeStringsInModule enc sm00
return sm
getCFRules :: Options -> FilePath -> IOE [CFRule]
getCFRules opts fpath = do
raw <- liftIO (BS.readFile fpath)
(optCoding,parsed) <- parseSource opts pCFRules raw
case parsed of
Left (Pn l c,msg) -> do cwd <- lift $ getCurrentDirectory
let location = makeRelative cwd fpath++":"++show l++":"++show c
raise (location++":\n "++msg)
Right rules -> return rules
transcodeModule' sm00 =
do let enc = utf8
let sm = decodeStringsInModule enc sm00
return sm
-}
getEBNFRules :: Options -> FilePath -> IOE [ERule]
getEBNFRules opts fpath = do
raw <- liftIO (BS.readFile fpath)
(optCoding,parsed) <- parseSource opts pEBNFRules raw
case parsed of
Left (Pn l c,msg) -> do cwd <- lift $ getCurrentDirectory
let location = makeRelative cwd fpath++":"++show l++":"++show c
raise (location++":\n "++msg)
Right rules -> return rules
runPreprocessor :: Temporary -> String -> IO Temporary
runPreprocessor tmp0 p =