From c8ebe093150509d9f01d82f0d698ef5df09bd985 Mon Sep 17 00:00:00 2001 From: krasimir Date: Mon, 21 Mar 2016 13:27:44 +0000 Subject: [PATCH] initial support for BNFC syntax in context-free grammars for GF. Not all features are supported yet. Based on contribution from Gleb Lobanov --- gf.cabal | 1 + src/compiler/GF/Command/Importing.hs | 7 ++--- src/compiler/GF/Compile/GetGrammar.hs | 10 +++---- src/compiler/GF/Compiler.hs | 5 ++-- src/compiler/GF/Grammar/Lexer.x | 8 ++++++ src/compiler/GF/Grammar/Parser.y | 39 ++++++++++++++++++--------- 6 files changed, 47 insertions(+), 23 deletions(-) diff --git a/gf.cabal b/gf.cabal index 51db22394..cfe6a4d9c 100644 --- a/gf.cabal +++ b/gf.cabal @@ -219,6 +219,7 @@ Library GF.Grammar.Binary GF.Grammar.CFG GF.Grammar.EBNF + GF.Grammar.BNFC GF.Grammar.Grammar GF.Grammar.Lexer GF.Grammar.Lockfield diff --git a/src/compiler/GF/Command/Importing.hs b/src/compiler/GF/Command/Importing.hs index 1b520cfc8..f3d93d87b 100644 --- a/src/compiler/GF/Command/Importing.hs +++ b/src/compiler/GF/Command/Importing.hs @@ -5,10 +5,11 @@ import PGF.Internal(optimizePGF,unionPGF,msgUnionPGF) import GF.Compile import GF.Compile.Multi (readMulti) -import GF.Compile.GetGrammar (getCFRules, getEBNFRules) +import GF.Compile.GetGrammar (getBNFCRules, getEBNFRules) import GF.Grammar (SourceGrammar) -- for cc command -import GF.Grammar.CFG +import GF.Grammar.BNFC import GF.Grammar.EBNF +import GF.Grammar.CFG import GF.Compile.CFGtoPGF import GF.Infra.UseIO(die,tryIOE) import GF.Infra.Option @@ -22,7 +23,7 @@ importGrammar :: PGF -> Options -> [FilePath] -> IO PGF importGrammar pgf0 _ [] = return pgf0 importGrammar pgf0 opts files = case takeExtensions (last files) of - ".cf" -> importCF opts files getCFRules id + ".cf" -> importCF opts files getBNFCRules bnfc2cf ".ebnf" -> importCF opts files getEBNFRules ebnf2cf ".gfm" -> do ascss <- mapM readMulti files diff --git a/src/compiler/GF/Compile/GetGrammar.hs b/src/compiler/GF/Compile/GetGrammar.hs index 4e2523d0b..c8a32ccc6 100644 --- a/src/compiler/GF/Compile/GetGrammar.hs +++ b/src/compiler/GF/Compile/GetGrammar.hs @@ -12,7 +12,7 @@ -- this module builds the internal GF grammar that is sent to the type checker ----------------------------------------------------------------------------- -module GF.Compile.GetGrammar (getSourceModule, getCFRules, getEBNFRules) where +module GF.Compile.GetGrammar (getSourceModule, getBNFCRules, getEBNFRules) where import Prelude hiding (catch) @@ -23,7 +23,7 @@ import GF.Infra.Option(Options,optPreprocessors,addOptions,renameEncoding,optEnc import GF.Grammar.Lexer import GF.Grammar.Parser import GF.Grammar.Grammar -import GF.Grammar.CFG +import GF.Grammar.BNFC import GF.Grammar.EBNF import GF.Compile.ReadFiles(parseSource) @@ -63,10 +63,10 @@ getSourceModule opts file0 = --liftIO $ transcodeModule' (i,mi) -- old lexer return (i,mi) -- new lexer -getCFRules :: Options -> FilePath -> IOE [CFRule] -getCFRules opts fpath = do +getBNFCRules :: Options -> FilePath -> IOE [BNFCRule] +getBNFCRules opts fpath = do raw <- liftIO (BS.readFile fpath) - (optCoding,parsed) <- parseSource opts pCFRules raw + (optCoding,parsed) <- parseSource opts pBNFCRules raw case parsed of Left (Pn l c,msg) -> do cwd <- getCurrentDirectory let location = makeRelative cwd fpath++":"++show l++":"++show c diff --git a/src/compiler/GF/Compiler.hs b/src/compiler/GF/Compiler.hs index 79ed66e7c..66d88eb69 100644 --- a/src/compiler/GF/Compiler.hs +++ b/src/compiler/GF/Compiler.hs @@ -9,6 +9,7 @@ import GF.Compile.Export import GF.Compile.ConcreteToHaskell(concretes2haskell) import GF.Compile.CFGtoPGF import GF.Compile.GetGrammar +import GF.Grammar.BNFC import GF.Grammar.CFG --import GF.Infra.Ident(showIdent) @@ -85,12 +86,12 @@ linkGrammars opts (t_src,~cnc_grs@(~(cnc,gr):_)) = compileCFFiles :: Options -> [FilePath] -> IOE () compileCFFiles opts fs = do - rules <- fmap concat $ mapM (getCFRules opts) fs + bnfc_rules <- fmap concat $ mapM (getBNFCRules opts) fs + let rules = bnfc2cf bnfc_rules startCat <- case rules of (CFRule cat _ _ : _) -> return cat _ -> fail "empty CFG" let pgf = cf2pgf (last fs) (uniqueFuns (mkCFG startCat Set.empty rules)) ---let cnc = justModuleName (last fs) unless (flag optStopAfterPhase opts == Compile) $ do probs <- liftIO (maybe (return . defaultProbabilities) readProbabilitiesFromFile (flag optProbsFile opts) pgf) let pgf' = setProbabilities probs $ if flag optOptimizePGF opts then optimizePGF pgf else pgf diff --git a/src/compiler/GF/Grammar/Lexer.x b/src/compiler/GF/Grammar/Lexer.x index f073bcdfc..c579b5609 100644 --- a/src/compiler/GF/Grammar/Lexer.x +++ b/src/compiler/GF/Grammar/Lexer.x @@ -124,6 +124,10 @@ data Token | T_variants | T_where | T_with + | T_coercions + | T_terminator + | T_separator + | T_nonempty | T_String String -- string literals | T_Integer Int -- integer literals | T_Double Double -- double precision float literals @@ -212,6 +216,10 @@ resWords = Map.fromList , b "variants" T_variants , b "where" T_where , b "with" T_with + , b "coercions" T_coercions + , b "terminator" T_terminator + , b "separator" T_separator + , b "nonempty" T_nonempty ] where b s t = (identS s, t) diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y index cf1f667da..9f2e7c95a 100644 --- a/src/compiler/GF/Grammar/Parser.y +++ b/src/compiler/GF/Grammar/Parser.y @@ -7,7 +7,7 @@ module GF.Grammar.Parser , pModHeader , pExp , pTopDef - , pCFRules + , pBNFCRules , pEBNFRules ) where @@ -16,7 +16,7 @@ import GF.Infra.Option import GF.Data.Operations import GF.Grammar.Predef import GF.Grammar.Grammar -import GF.Grammar.CFG +import GF.Grammar.BNFC import GF.Grammar.EBNF import GF.Grammar.Macros import GF.Grammar.Lexer @@ -31,7 +31,7 @@ import PGF(mkCId) %name pTopDef TopDef %partial pModHeader ModHeader %name pExp Exp -%name pCFRules ListCFRule +%name pBNFCRules ListCFRule %name pEBNFRules ListEBNFRule -- no lexer declaration @@ -108,6 +108,10 @@ import PGF(mkCId) 'variants' { T_variants } 'where' { T_where } 'with' { T_with } + 'coercions' { T_coercions } + 'terminator' { T_terminator } + 'separator' { T_separator } + 'nonempty' { T_nonempty } Integer { (T_Integer $$) } Double { (T_Double $$) } @@ -611,14 +615,14 @@ ListDDecl : {- empty -} { [] } | DDecl ListDDecl { $1 ++ $2 } -ListCFRule :: { [CFRule] } +ListCFRule :: { [BNFCRule] } ListCFRule : CFRule { $1 } | CFRule ListCFRule { $1 ++ $2 } -CFRule :: { [CFRule] } +CFRule :: { [BNFCRule] } CFRule - : Ident '.' Ident '::=' ListCFSymbol ';' { [CFRule (showIdent $3) $5 (CFObj (mkCId (showIdent $1)) [])] + : Ident '.' Ident '::=' ListCFSymbol ';' { [BNFCRule (showIdent $3) $5 (CFObj (mkCId (showIdent $1)) [])] } | Ident '::=' ListCFRHS ';' { let { cat = showIdent $1; mkFun cat its = @@ -628,25 +632,34 @@ CFRule }; clean sym = case sym of { - Terminal c -> filter isAlphaNum c; - NonTerminal t -> t + Terminal c -> filter isAlphaNum c; + NonTerminal (t,_) -> t } - } in map (\rhs -> CFRule cat rhs (CFObj (mkCId (mkFun cat rhs)) [])) $3 + } in map (\rhs -> BNFCRule cat rhs (CFObj (mkCId (mkFun cat rhs)) [])) $3 } + | 'coercions' Ident Integer ';' { [BNFCCoercions (showIdent $2) $3]} + | 'terminator' NonEmpty Ident String ';' { [BNFCTerminator $2 (showIdent $3) $4] } + | 'separator' NonEmpty Ident String ';' { [BNFCSeparator $2 (showIdent $3) $4] } -ListCFRHS :: { [[CFSymbol]] } +ListCFRHS :: { [[BNFCSymbol]] } ListCFRHS : ListCFSymbol { [$1] } | ListCFSymbol '|' ListCFRHS { $1 : $3 } -ListCFSymbol :: { [CFSymbol] } +ListCFSymbol :: { [BNFCSymbol] } ListCFSymbol : {- empty -} { [] } | CFSymbol ListCFSymbol { $1 : $2 } -CFSymbol :: { CFSymbol } +CFSymbol :: { BNFCSymbol } : String { Terminal $1 } - | Ident { NonTerminal (showIdent $1) } + | Ident { NonTerminal (showIdent $1, False) } + | '[' Ident ']' { NonTerminal (showIdent $2, True) } + +NonEmpty :: { Bool } +NonEmpty : 'nonempty' { True } + | {-empty-} { False } + ListEBNFRule :: { [ERule] } ListEBNFRule