forked from GitHub/gf-core
refactor the compilation of CFG and EBNF grammars. Now they are parsed by using GF.Grammar.Parser just like the ordinary GF grammars. Furthermore now GF.Speech.CFG is moved to GF.Grammar.CFG. The new module is used by both the speech conversion utils and by the compiler for CFG grammars. The parser for CFG now consumes a lot less memory and can be used with grammars with more than 4 000 000 productions.
This commit is contained in:
58
src/compiler/GF/Compile/CFGtoPGF.hs
Normal file
58
src/compiler/GF/Compile/CFGtoPGF.hs
Normal file
@@ -0,0 +1,58 @@
|
||||
module GF.Compile.CFGtoPGF (cf2gf) where
|
||||
|
||||
import GF.Grammar.Grammar hiding (Cat)
|
||||
import GF.Grammar.Macros
|
||||
import GF.Grammar.CFG
|
||||
import GF.Infra.Ident(Ident,identS)
|
||||
import GF.Infra.Option
|
||||
import GF.Infra.UseIO
|
||||
|
||||
import GF.Data.Operations
|
||||
|
||||
import PGF(showCId)
|
||||
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.Map as Map
|
||||
|
||||
|
||||
--------------------------
|
||||
-- the compiler ----------
|
||||
--------------------------
|
||||
|
||||
cf2gf :: FilePath -> CFG -> SourceGrammar
|
||||
cf2gf fpath cf = mGrammar [
|
||||
(aname, ModInfo MTAbstract MSComplete (modifyFlags (\fs -> fs{optStartCat = Just cat})) [] Nothing [] [] fpath Nothing abs),
|
||||
(cname, ModInfo (MTConcrete aname) MSComplete noOptions [] Nothing [] [] fpath Nothing cnc)
|
||||
]
|
||||
where
|
||||
name = justModuleName fpath
|
||||
(abs,cnc,cat) = cf2grammar cf
|
||||
aname = identS $ name ++ "Abs"
|
||||
cname = identS name
|
||||
|
||||
|
||||
cf2grammar :: CFG -> (BinTree Ident Info, BinTree Ident Info, String)
|
||||
cf2grammar cfg = (buildTree abs, buildTree conc, cfgStartCat cfg) where
|
||||
abs = cats ++ funs
|
||||
conc = lincats ++ lins
|
||||
cats = [(identS cat, AbsCat (Just (L NoLoc []))) | cat <- Map.keys (cfgRules cfg)]
|
||||
lincats = [(cat, CncCat (Just (L loc defLinType)) Nothing Nothing Nothing Nothing) | (cat,AbsCat (Just (L loc _))) <- cats]
|
||||
(funs,lins) = unzip (map cf2rule (concatMap Set.toList (Map.elems (cfgRules cfg))))
|
||||
|
||||
cf2rule :: CFRule -> ((Ident,Info),(Ident,Info))
|
||||
cf2rule (CFRule cat items (CFObj fun _)) = (def,ldef) where
|
||||
f = identS (showCId fun)
|
||||
def = (f, AbsFun (Just (L NoLoc (mkProd args' (Cn (identS cat)) []))) Nothing Nothing (Just True))
|
||||
args0 = zip (map (identS . ("x" ++) . show) [0..]) items
|
||||
args = [((Explicit,v), Cn (identS c)) | (v, NonTerminal c) <- args0]
|
||||
args' = [(Explicit,identS "_", Cn (identS c)) | (_, NonTerminal c) <- args0]
|
||||
ldef = (f, CncFun
|
||||
Nothing
|
||||
(Just (L NoLoc (mkAbs (map fst args)
|
||||
(mkRecord (const theLinLabel) [foldconcat (map mkIt args0)]))))
|
||||
Nothing
|
||||
Nothing)
|
||||
mkIt (v, NonTerminal _) = P (Vr v) theLinLabel
|
||||
mkIt (_, Terminal a) = K a
|
||||
foldconcat [] = K ""
|
||||
foldconcat tt = foldr1 C tt
|
||||
@@ -12,27 +12,25 @@
|
||||
-- this module builds the internal GF grammar that is sent to the type checker
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Compile.GetGrammar (getSourceModule) where
|
||||
module GF.Compile.GetGrammar (getSourceModule, getCFRules, getEBNFRules) where
|
||||
|
||||
import Prelude hiding (catch)
|
||||
|
||||
import GF.Data.Operations
|
||||
|
||||
--import GF.System.Catch
|
||||
import GF.Infra.UseIO
|
||||
import GF.Infra.Option(Options,optPreprocessors,addOptions,renameEncoding,optEncoding,flag,defaultEncoding)
|
||||
import GF.Grammar.Lexer
|
||||
import GF.Grammar.Parser
|
||||
import GF.Grammar.Grammar
|
||||
--import GF.Compile.Coding
|
||||
import GF.Grammar.CFG
|
||||
import GF.Grammar.EBNF
|
||||
import GF.Compile.ReadFiles(parseSource,lift)
|
||||
--import GF.Text.Coding(decodeUnicodeIO)
|
||||
|
||||
import qualified Data.ByteString.Char8 as BS
|
||||
import Data.Char(isAscii)
|
||||
import Control.Monad (foldM,when,unless)
|
||||
import System.Cmd (system)
|
||||
--import System.IO(mkTextEncoding) --,utf8
|
||||
import System.Directory(removeFile,getCurrentDirectory)
|
||||
import System.FilePath(makeRelative)
|
||||
|
||||
@@ -64,17 +62,25 @@ getSourceModule opts file0 =
|
||||
--lift $ transcodeModule' (i,mi) -- old lexer
|
||||
return (i,mi) -- new lexer
|
||||
|
||||
{-
|
||||
transcodeModule sm00 =
|
||||
do enc <- mkTextEncoding (getEncoding (mflags (snd sm00)))
|
||||
let sm = decodeStringsInModule enc sm00
|
||||
return sm
|
||||
getCFRules :: Options -> FilePath -> IOE [CFRule]
|
||||
getCFRules opts fpath = do
|
||||
raw <- liftIO (BS.readFile fpath)
|
||||
(optCoding,parsed) <- parseSource opts pCFRules raw
|
||||
case parsed of
|
||||
Left (Pn l c,msg) -> do cwd <- lift $ getCurrentDirectory
|
||||
let location = makeRelative cwd fpath++":"++show l++":"++show c
|
||||
raise (location++":\n "++msg)
|
||||
Right rules -> return rules
|
||||
|
||||
transcodeModule' sm00 =
|
||||
do let enc = utf8
|
||||
let sm = decodeStringsInModule enc sm00
|
||||
return sm
|
||||
-}
|
||||
getEBNFRules :: Options -> FilePath -> IOE [ERule]
|
||||
getEBNFRules opts fpath = do
|
||||
raw <- liftIO (BS.readFile fpath)
|
||||
(optCoding,parsed) <- parseSource opts pEBNFRules raw
|
||||
case parsed of
|
||||
Left (Pn l c,msg) -> do cwd <- lift $ getCurrentDirectory
|
||||
let location = makeRelative cwd fpath++":"++show l++":"++show c
|
||||
raise (location++":\n "++msg)
|
||||
Right rules -> return rules
|
||||
|
||||
runPreprocessor :: Temporary -> String -> IO Temporary
|
||||
runPreprocessor tmp0 p =
|
||||
|
||||
Reference in New Issue
Block a user