diff --git a/src/compiler/GF/Command/Importing.hs b/src/compiler/GF/Command/Importing.hs index 2bdc091f8..78c019bd4 100644 --- a/src/compiler/GF/Command/Importing.hs +++ b/src/compiler/GF/Command/Importing.hs @@ -5,23 +5,25 @@ import PGF.Data import GF.Compile import GF.Compile.Multi (readMulti) +import GF.Compile.GetGrammar (getCFRules, getEBNFRules) import GF.Grammar (identS, SourceGrammar) -- for cc command -import GF.Grammar.CF +import GF.Grammar.CFG import GF.Grammar.EBNF +import GF.Compile.CFGtoPGF import GF.Infra.UseIO import GF.Infra.Option import GF.Data.ErrM ---import Data.List (nubBy) import System.FilePath +import qualified Data.Set as Set -- import a grammar in an environment where it extends an existing grammar importGrammar :: PGF -> Options -> [FilePath] -> IO PGF importGrammar pgf0 _ [] = return pgf0 importGrammar pgf0 opts files = case takeExtensions (last files) of - ".cf" -> importCF opts files getCF - ".ebnf" -> importCF opts files getEBNF + ".cf" -> importCF opts files getCFRules id + ".ebnf" -> importCF opts files getEBNFRules ebnf2cf ".gfm" -> do ascss <- mapM readMulti files let cs = concatMap snd ascss @@ -52,13 +54,17 @@ importSource src0 opts files = do return src0 -- for different cf formats -importCF opts files get = do - s <- fmap unlines $ mapM readFile files - gf <- case get (last files) s of - Ok gf -> return gf - Bad s -> error s ---- - Ok gr <- appIOE $ compileSourceGrammar opts gf - epgf <- appIOE $ link opts (identS (justModuleName (last files) ++ "Abs"), (), gr) - case epgf of - Ok pgf -> return pgf - Bad s -> error s ---- +importCF opts files get convert = do + res <- appIOE impCF + case res of + Ok pgf -> return pgf + Bad s -> error s + where + impCF = do + rules <- fmap (convert . concat) $ mapM (get opts) files + startCat <- case rules of + (CFRule cat _ _ : _) -> return cat + _ -> fail "empty CFG" + let gf = cf2gf (last files) (uniqueFuns (mkCFG startCat Set.empty rules)) + gr <- compileSourceGrammar opts gf + link opts (identS (justModuleName (last files) ++ "Abs"), (), gr) diff --git a/src/compiler/GF/Compile/CFGtoPGF.hs b/src/compiler/GF/Compile/CFGtoPGF.hs new file mode 100644 index 000000000..b42c0fbc4 --- /dev/null +++ b/src/compiler/GF/Compile/CFGtoPGF.hs @@ -0,0 +1,58 @@ +module GF.Compile.CFGtoPGF (cf2gf) where + +import GF.Grammar.Grammar hiding (Cat) +import GF.Grammar.Macros +import GF.Grammar.CFG +import GF.Infra.Ident(Ident,identS) +import GF.Infra.Option +import GF.Infra.UseIO + +import GF.Data.Operations + +import PGF(showCId) + +import qualified Data.Set as Set +import qualified Data.Map as Map + + +-------------------------- +-- the compiler ---------- +-------------------------- + +cf2gf :: FilePath -> CFG -> SourceGrammar +cf2gf fpath cf = mGrammar [ + (aname, ModInfo MTAbstract MSComplete (modifyFlags (\fs -> fs{optStartCat = Just cat})) [] Nothing [] [] fpath Nothing abs), + (cname, ModInfo (MTConcrete aname) MSComplete noOptions [] Nothing [] [] fpath Nothing cnc) + ] + where + name = justModuleName fpath + (abs,cnc,cat) = cf2grammar cf + aname = identS $ name ++ "Abs" + cname = identS name + + +cf2grammar :: CFG -> (BinTree Ident Info, BinTree Ident Info, String) +cf2grammar cfg = (buildTree abs, buildTree conc, cfgStartCat cfg) where + abs = cats ++ funs + conc = lincats ++ lins + cats = [(identS cat, AbsCat (Just (L NoLoc []))) | cat <- Map.keys (cfgRules cfg)] + lincats = [(cat, CncCat (Just (L loc defLinType)) Nothing Nothing Nothing Nothing) | (cat,AbsCat (Just (L loc _))) <- cats] + (funs,lins) = unzip (map cf2rule (concatMap Set.toList (Map.elems (cfgRules cfg)))) + +cf2rule :: CFRule -> ((Ident,Info),(Ident,Info)) +cf2rule (CFRule cat items (CFObj fun _)) = (def,ldef) where + f = identS (showCId fun) + def = (f, AbsFun (Just (L NoLoc (mkProd args' (Cn (identS cat)) []))) Nothing Nothing (Just True)) + args0 = zip (map (identS . ("x" ++) . show) [0..]) items + args = [((Explicit,v), Cn (identS c)) | (v, NonTerminal c) <- args0] + args' = [(Explicit,identS "_", Cn (identS c)) | (_, NonTerminal c) <- args0] + ldef = (f, CncFun + Nothing + (Just (L NoLoc (mkAbs (map fst args) + (mkRecord (const theLinLabel) [foldconcat (map mkIt args0)])))) + Nothing + Nothing) + mkIt (v, NonTerminal _) = P (Vr v) theLinLabel + mkIt (_, Terminal a) = K a + foldconcat [] = K "" + foldconcat tt = foldr1 C tt diff --git a/src/compiler/GF/Compile/GetGrammar.hs b/src/compiler/GF/Compile/GetGrammar.hs index 6393d51d2..4647cfcb4 100644 --- a/src/compiler/GF/Compile/GetGrammar.hs +++ b/src/compiler/GF/Compile/GetGrammar.hs @@ -12,27 +12,25 @@ -- this module builds the internal GF grammar that is sent to the type checker ----------------------------------------------------------------------------- -module GF.Compile.GetGrammar (getSourceModule) where +module GF.Compile.GetGrammar (getSourceModule, getCFRules, getEBNFRules) where import Prelude hiding (catch) import GF.Data.Operations ---import GF.System.Catch import GF.Infra.UseIO import GF.Infra.Option(Options,optPreprocessors,addOptions,renameEncoding,optEncoding,flag,defaultEncoding) import GF.Grammar.Lexer import GF.Grammar.Parser import GF.Grammar.Grammar ---import GF.Compile.Coding +import GF.Grammar.CFG +import GF.Grammar.EBNF import GF.Compile.ReadFiles(parseSource,lift) ---import GF.Text.Coding(decodeUnicodeIO) import qualified Data.ByteString.Char8 as BS import Data.Char(isAscii) import Control.Monad (foldM,when,unless) import System.Cmd (system) ---import System.IO(mkTextEncoding) --,utf8 import System.Directory(removeFile,getCurrentDirectory) import System.FilePath(makeRelative) @@ -64,17 +62,25 @@ getSourceModule opts file0 = --lift $ transcodeModule' (i,mi) -- old lexer return (i,mi) -- new lexer -{- -transcodeModule sm00 = - do enc <- mkTextEncoding (getEncoding (mflags (snd sm00))) - let sm = decodeStringsInModule enc sm00 - return sm +getCFRules :: Options -> FilePath -> IOE [CFRule] +getCFRules opts fpath = do + raw <- liftIO (BS.readFile fpath) + (optCoding,parsed) <- parseSource opts pCFRules raw + case parsed of + Left (Pn l c,msg) -> do cwd <- lift $ getCurrentDirectory + let location = makeRelative cwd fpath++":"++show l++":"++show c + raise (location++":\n "++msg) + Right rules -> return rules -transcodeModule' sm00 = - do let enc = utf8 - let sm = decodeStringsInModule enc sm00 - return sm --} +getEBNFRules :: Options -> FilePath -> IOE [ERule] +getEBNFRules opts fpath = do + raw <- liftIO (BS.readFile fpath) + (optCoding,parsed) <- parseSource opts pEBNFRules raw + case parsed of + Left (Pn l c,msg) -> do cwd <- lift $ getCurrentDirectory + let location = makeRelative cwd fpath++":"++show l++":"++show c + raise (location++":\n "++msg) + Right rules -> return rules runPreprocessor :: Temporary -> String -> IO Temporary runPreprocessor tmp0 p = diff --git a/src/compiler/GF/Grammar/CF.hs b/src/compiler/GF/Grammar/CF.hs deleted file mode 100644 index a48238e42..000000000 --- a/src/compiler/GF/Grammar/CF.hs +++ /dev/null @@ -1,143 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : CF --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/11/15 17:56:13 $ --- > CVS $Author: aarne $ --- > CVS $Revision: 1.13 $ --- --- parsing CF grammars and converting them to GF ------------------------------------------------------------------------------ - -module GF.Grammar.CF (getCF,CFItem,CFCat,CFFun,cf2gf,CFRule) where - -import GF.Grammar.Grammar -import GF.Grammar.Macros -import GF.Infra.Ident(Ident,identS) -import GF.Infra.Option -import GF.Infra.UseIO - -import GF.Data.Operations -import GF.Data.Utilities (nub') - -import qualified Data.Set as S -import Data.Char -import Data.List ---import System.FilePath - -getCF :: ErrorMonad m => FilePath -> String -> m SourceGrammar -getCF fpath = fmap (cf2gf fpath . uniqueFuns) . pCF - ---------------------- --- the parser ------- ---------------------- - -pCF :: ErrorMonad m => String -> m CF -pCF s = do - rules <- mapM getCFRule $ filter isRule $ lines s - return $ concat rules - where - isRule line = case dropWhile isSpace line of - '-':'-':_ -> False - _ -> not $ all isSpace line - --- rules have an amazingly easy parser, if we use the format --- fun. C -> item1 item2 ... where unquoted items are treated as cats --- Actually would be nice to add profiles to this. - -getCFRule :: ErrorMonad m => String -> m [CFRule] -getCFRule s = getcf (wrds s) where - getcf ws = case ws of - fun : cat : a : its | isArrow a -> - return [L NoLoc (init fun, (cat, map mkIt its))] - cat : a : its | isArrow a -> - return [L NoLoc (mkFun cat it, (cat, map mkIt it)) | it <- chunk its] - _ -> raise (" invalid rule:" +++ s) - isArrow a = elem a ["->", "::="] - mkIt w = case w of - ('"':w@(_:_)) -> Right (init w) - _ -> Left w - chunk its = case its of - [] -> [[]] - _ -> chunks "|" its - mkFun cat its = case its of - [] -> cat ++ "_" - _ -> concat $ intersperse "_" (cat : map clean its) -- CLE style - clean = filter isAlphaNum -- to form valid identifiers - wrds = takeWhile (/= ";") . words -- to permit semicolon in the end - -type CF = [CFRule] - -type CFRule = L (CFFun, (CFCat, [CFItem])) - -type CFItem = Either CFCat String - -type CFCat = String -type CFFun = String - - --------------------------------- --- make function names unique -- --------------------------------- - -uniqueFuns :: CF -> CF -uniqueFuns = snd . mapAccumL uniqueFun S.empty - where - uniqueFun funs (L l (fun,rule)) = (S.insert fun' funs,L l (fun',rule)) - where - fun' = head [fun'|suffix<-"":map show ([2..]::[Int]), - let fun'=fun++suffix, - not (fun' `S.member` funs)] - - --------------------------- --- the compiler ---------- --------------------------- - -cf2gf :: FilePath -> CF -> SourceGrammar -cf2gf fpath cf = mGrammar [ - (aname, ModInfo MTAbstract MSComplete (modifyFlags (\fs -> fs{optStartCat = Just cat})) [] Nothing [] [] fpath Nothing abs), - (cname, ModInfo (MTConcrete aname) MSComplete noOptions [] Nothing [] [] fpath Nothing cnc) - ] - where - name = justModuleName fpath - (abs,cnc,cat) = cf2grammar cf - aname = identS $ name ++ "Abs" - cname = identS name - - -cf2grammar :: CF -> (BinTree Ident Info, BinTree Ident Info, String) -cf2grammar rules = (buildTree abs, buildTree conc, cat) where - abs = cats ++ funs - conc = lincats ++ lins - cat = case rules of - (L _ (_,(c,_))):_ -> c -- the value category of the first rule - _ -> error "empty CF" - cats = [(cat, AbsCat (Just (L NoLoc []))) | - cat <- nub' (concat (map cf2cat rules))] ----notPredef cat - lincats = [(cat, CncCat (Just (L loc defLinType)) Nothing Nothing Nothing Nothing) | (cat,AbsCat (Just (L loc _))) <- cats] - (funs,lins) = unzip (map cf2rule rules) - -cf2cat :: CFRule -> [Ident] -cf2cat (L loc (_,(cat, items))) = map identS $ cat : [c | Left c <- items] - -cf2rule :: CFRule -> ((Ident,Info),(Ident,Info)) -cf2rule (L loc (fun, (cat, items))) = (def,ldef) where - f = identS fun - def = (f, AbsFun (Just (L loc (mkProd args' (Cn (identS cat)) []))) Nothing Nothing (Just True)) - args0 = zip (map (identS . ("x" ++) . show) [0..]) items - args = [((Explicit,v), Cn (identS c)) | (v, Left c) <- args0] - args' = [(Explicit,identS "_", Cn (identS c)) | (_, Left c) <- args0] - ldef = (f, CncFun - Nothing - (Just (L loc (mkAbs (map fst args) - (mkRecord (const theLinLabel) [foldconcat (map mkIt args0)])))) - Nothing - Nothing) - mkIt (v, Left _) = P (Vr v) theLinLabel - mkIt (_, Right a) = K a - foldconcat [] = K "" - foldconcat tt = foldr1 C tt diff --git a/src/compiler/GF/Speech/CFG.hs b/src/compiler/GF/Grammar/CFG.hs similarity index 94% rename from src/compiler/GF/Speech/CFG.hs rename to src/compiler/GF/Grammar/CFG.hs index 1a252139e..93bce2aad 100644 --- a/src/compiler/GF/Speech/CFG.hs +++ b/src/compiler/GF/Grammar/CFG.hs @@ -4,7 +4,7 @@ -- -- Context-free grammar representation and manipulation. ---------------------------------------------------------------------- -module GF.Speech.CFG where +module GF.Grammar.CFG where import GF.Data.Utilities import PGF @@ -53,6 +53,7 @@ data CFG = CFG { cfgStartCat :: Cat, cfgRules :: Map Cat (Set CFRule) } deriving (Eq, Ord, Show) + -- -- * Grammar filtering -- @@ -222,6 +223,21 @@ mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules groupProds :: [CFRule] -> Map Cat (Set CFRule) groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r)) +uniqueFuns :: CFG -> CFG +uniqueFuns cfg = CFG {cfgStartCat = cfgStartCat cfg + ,cfgExternalCats = cfgExternalCats cfg + ,cfgRules = Map.fromList (snd (mapAccumL uniqueFunSet Set.empty (Map.toList (cfgRules cfg)))) + } + where + uniqueFunSet funs (cat,rules) = + let (funs',rules') = mapAccumL uniqueFun funs (Set.toList rules) + in (funs',(cat,Set.fromList rules')) + uniqueFun funs (CFRule cat items (CFObj fun args)) = (Set.insert fun' funs,CFRule cat items (CFObj fun' args)) + where + fun' = head [fun'|suffix<-"":map show ([2..]::[Int]), + let fun'=mkCId (showCId fun++suffix), + not (fun' `Set.member` funs)] + -- | Gets all rules in a CFG. allRules :: CFG -> [CFRule] allRules = concat . map Set.toList . Map.elems . cfgRules diff --git a/src/compiler/GF/Grammar/EBNF.hs b/src/compiler/GF/Grammar/EBNF.hs index b1854da54..50a5ff90a 100644 --- a/src/compiler/GF/Grammar/EBNF.hs +++ b/src/compiler/GF/Grammar/EBNF.hs @@ -12,34 +12,19 @@ -- (Description of the module) ----------------------------------------------------------------------------- -module GF.Grammar.EBNF (getEBNF) where +module GF.Grammar.EBNF (EBNF, ERule, ERHS(..), ebnf2cf) where import GF.Data.Operations ---import GF.Infra.Comments -import GF.Grammar.CF ---import GF.CF.CFIdent -import GF.Grammar.Grammar ---import GF.Grammar.PrGrammar ---import qualified GF.Source.AbsGF as A +import GF.Grammar.CFG +import PGF (mkCId) -import Data.Char import Data.List ---import System.FilePath - - - --- AR 18/4/2000 - 31/3/2004 - -getEBNF :: FilePath -> String -> Err SourceGrammar -getEBNF fpath = fmap (cf2gf fpath . ebnf2cf) . pEBNF type EBNF = [ERule] type ERule = (ECat, ERHS) type ECat = (String,[Int]) type ETok = String -ebnfID = "EBNF" ---- make this parametric! - data ERHS = ETerm ETok | ENonTerm ECat @@ -50,13 +35,14 @@ data ERHS = | EOpt ERHS | EEmpty -type CFRHS = [CFItem] -type CFJustRule = (CFCat, CFRHS) +type CFRHS = [CFSymbol] +type CFJustRule = (Cat, CFRHS) ebnf2cf :: EBNF -> [CFRule] ebnf2cf ebnf = - [L NoLoc (mkCFF i rule,rule) | (i,rule) <- zip [0..] (normEBNF ebnf)] where - mkCFF i (c, _) = ("Mk" ++ c ++ "_" ++ show i) + [CFRule cat items (mkCFF i cat) | (i,(cat,items)) <- zip [0..] (normEBNF ebnf)] + where + mkCFF i c = CFObj (mkCId ("Mk" ++ c ++ "_" ++ show i)) [] normEBNF :: EBNF -> [CFJustRule] normEBNF erules = let @@ -115,13 +101,13 @@ substERules g (cat,itss) = (cat, map sub itss) where sub (EIPlus r : ii) = EIPlus (substERules g r) : ii sub (EIOpt r : ii) = EIOpt (substERules g r) : ii -eitem2cfitem :: EItem -> CFItem +eitem2cfitem :: EItem -> CFSymbol eitem2cfitem it = case it of - EITerm a -> Right a - EINonTerm cat -> Left (mkCFCatE cat) - EIStar (cat,_) -> Left (mkCFCatE (mkNewECat cat "Star")) - EIPlus (cat,_) -> Left (mkCFCatE (mkNewECat cat "Plus")) - EIOpt (cat,_) -> Left (mkCFCatE (mkNewECat cat "Opt")) + EITerm a -> Terminal a + EINonTerm cat -> NonTerminal (mkCFCatE cat) + EIStar (cat,_) -> NonTerminal (mkCFCatE (mkNewECat cat "Star")) + EIPlus (cat,_) -> NonTerminal (mkCFCatE (mkNewECat cat "Plus")) + EIOpt (cat,_) -> NonTerminal (mkCFCatE (mkNewECat cat "Opt")) type NormERule = (ECat,[[EItem]]) -- disjunction of sequences of items @@ -157,198 +143,10 @@ mkECat ints = ("C", ints) prECat (c,[]) = c prECat (c,ints) = c ++ "_" ++ prTList "_" (map show ints) -mkCFCatE :: ECat -> CFCat +mkCFCatE :: ECat -> Cat mkCFCatE = prECat updECat _ (c,[]) = (c,[]) updECat ii (c,_) = (c,ii) mkNewECat (c,ii) str = (c ++ str,ii) - ------- parser for EBNF grammars - -pEBNF :: String -> Err EBNF -pEBNF = parseResultErr (longestOfMany (pJ pERule)) - -pERule :: Parser Char ERule -pERule = pECat ... pJ (literals ":=" ||| literals "::=") +.. pERHS 0 ..+ jL ";" - -pERHS :: Int -> Parser Char ERHS -pERHS 0 = pTList "|" (pERHS 1) *** foldr1 EAlt -pERHS 1 = longestOfMany (pJ (pERHS 2)) *** foldr ESeq EEmpty -pERHS 2 = pERHS 3 ... pJ pUnaryEOp *** (\ (a,f) -> f a) -pERHS 3 = pQuotedString *** ETerm - ||| pECat *** ENonTerm ||| pParenth (pERHS 0) - -pUnaryEOp :: Parser Char (ERHS -> ERHS) -pUnaryEOp = - lits "*" <<< EStar ||| lits "+" <<< EPlus ||| lits "?" <<< EOpt ||| succeed id - -pECat = pIdent *** (\c -> (c,[])) - - - ----------------------------------------------------------------------- --- Module : Parsers --- some parser combinators a la Wadler and Hutton. --- (only used in module "EBNF") ------------------------------------------------------------------------------ - -infixr 2 |||, +|| -infixr 3 *** -infixr 5 .>. -infixr 5 ... -infixr 5 .... -infixr 5 +.. -infixr 5 ..+ -infixr 6 |> -infixr 3 <<< - - -type Parser a b = [a] -> [(b,[a])] - -parseResults :: Parser a b -> [a] -> [b] -parseResults p s = [x | (x,r) <- p s, null r] - -parseResultErr :: Show a => Parser a b -> [a] -> Err b -parseResultErr p s = case parseResults p s of - [x] -> return x - [] -> case - maximumBy (\x y -> compare (length y) (length x)) (s:[r | (_,r) <- p s]) of - r -> Bad $ "\nno parse; reached" ++++ take 300 (show r) - _ -> Bad "ambiguous" - -(...) :: Parser a b -> Parser a c -> Parser a (b,c) -(p ... q) s = [((x,y),r) | (x,t) <- p s, (y,r) <- q t] - -(.>.) :: Parser a b -> (b -> Parser a c) -> Parser a c -(p .>. f) s = [(c,r) | (x,t) <- p s, (c,r) <- f x t] - -(|||) :: Parser a b -> Parser a b -> Parser a b -(p ||| q) s = p s ++ q s - -(+||) :: Parser a b -> Parser a b -> Parser a b -p1 +|| p2 = take 1 . (p1 ||| p2) - -literal :: (Eq a) => a -> Parser a a -literal x (c:cs) = [(x,cs) | x == c] -literal _ _ = [] - -(***) :: Parser a b -> (b -> c) -> Parser a c -(p *** f) s = [(f x,r) | (x,r) <- p s] - -succeed :: b -> Parser a b -succeed v s = [(v,s)] - -fails :: Parser a b -fails s = [] - -(+..) :: Parser a b -> Parser a c -> Parser a c -p1 +.. p2 = p1 ... p2 *** snd - -(..+) :: Parser a b -> Parser a c -> Parser a b -p1 ..+ p2 = p1 ... p2 *** fst - -(<<<) :: Parser a b -> c -> Parser a c -- return -p <<< v = p *** (\x -> v) - -(|>) :: Parser a b -> (b -> Bool) -> Parser a b -p |> b = p .>. (\x -> if b x then succeed x else fails) - -many :: Parser a b -> Parser a [b] -many p = (p ... many p *** uncurry (:)) +|| succeed [] - -some :: Parser a b -> Parser a [b] -some p = (p ... many p) *** uncurry (:) - -longestOfMany :: Parser a b -> Parser a [b] -longestOfMany p = p .>. (\x -> longestOfMany p *** (x:)) +|| succeed [] - -closure :: (b -> Parser a b) -> (b -> Parser a b) -closure p v = p v .>. closure p ||| succeed v - -pJunk :: Parser Char String -pJunk = longestOfMany (satisfy (\x -> elem x "\n\t ")) - -pJ :: Parser Char a -> Parser Char a -pJ p = pJunk +.. p ..+ pJunk - -pTList :: String -> Parser Char a -> Parser Char [a] -pTList t p = p .... many (jL t +.. p) *** (\ (x,y) -> x:y) -- mod. AR 5/1/1999 - -pTJList :: String -> String -> Parser Char a -> Parser Char [a] -pTJList t1 t2 p = p .... many (literals t1 +.. jL t2 +.. p) *** (uncurry (:)) - -pElem :: [String] -> Parser Char String -pElem l = foldr (+||) fails (map literals l) - -(....) :: Parser Char b -> Parser Char c -> Parser Char (b,c) -p1 .... p2 = p1 ... pJunk +.. p2 - -item :: Parser a a -item (c:cs) = [(c,cs)] -item [] = [] - -satisfy :: (a -> Bool) -> Parser a a -satisfy b = item |> b - -literals :: (Eq a,Show a) => [a] -> Parser a [a] -literals l = case l of - [] -> succeed [] - a:l -> literal a ... literals l *** (\ (x,y) -> x:y) - -lits :: (Eq a,Show a) => [a] -> Parser a [a] -lits ts = literals ts - -jL :: String -> Parser Char String -jL = pJ . lits - -pParenth :: Parser Char a -> Parser Char a -pParenth p = literal '(' +.. pJunk +.. p ..+ pJunk ..+ literal ')' - --- | p,...,p -pCommaList :: Parser Char a -> Parser Char [a] -pCommaList p = pTList "," (pJ p) - --- | the same or nothing -pOptCommaList :: Parser Char a -> Parser Char [a] -pOptCommaList p = pCommaList p ||| succeed [] - --- | (p,...,p), poss. empty -pArgList :: Parser Char a -> Parser Char [a] -pArgList p = pParenth (pCommaList p) ||| succeed [] - --- | min. 2 args -pArgList2 :: Parser Char a -> Parser Char [a] -pArgList2 p = pParenth (p ... jL "," +.. pCommaList p) *** uncurry (:) - -longestOfSome :: Parser a b -> Parser a [b] -longestOfSome p = (p ... longestOfMany p) *** (\ (x,y) -> x:y) - -pIdent :: Parser Char String -pIdent = pLetter ... longestOfMany pAlphaPlusChar *** uncurry (:) - where alphaPlusChar c = isAlphaNum c || c=='_' || c=='\'' - -pLetter, pDigit :: Parser Char Char -pLetter = satisfy (`elem` (['A'..'Z'] ++ ['a'..'z'] ++ - ['\192' .. '\255'])) -- no such in Char -pDigit = satisfy isDigit - -pLetters :: Parser Char String -pLetters = longestOfSome pLetter - -pAlphanum, pAlphaPlusChar :: Parser Char Char -pAlphanum = pDigit ||| pLetter -pAlphaPlusChar = pAlphanum ||| satisfy (`elem` "_'") - -pQuotedString :: Parser Char String -pQuotedString = literal '"' +.. pEndQuoted where - pEndQuoted = - literal '"' *** (const []) - +|| (literal '\\' +.. item .>. \ c -> pEndQuoted *** (c:)) - +|| item .>. \ c -> pEndQuoted *** (c:) - -pIntc :: Parser Char Int -pIntc = some (satisfy numb) *** read - where numb x = elem x ['0'..'9'] - diff --git a/src/compiler/GF/Grammar/Lexer.x b/src/compiler/GF/Grammar/Lexer.x index c4f7159a2..0293d3915 100644 --- a/src/compiler/GF/Grammar/Lexer.x +++ b/src/compiler/GF/Grammar/Lexer.x @@ -26,7 +26,7 @@ $i = [$l $d _ '] -- identifier character $u = [.\n] -- universal: any character @rsyms = -- symbols and non-identifier-like reserved words - \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ + \; | \= | \{ | \} | \( | \) | \~ | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ | \: \= | \: \: \= :- "--" [.]* ; -- Toss single line comments @@ -83,6 +83,7 @@ data Token | T_ccurly | T_underscore | T_at + | T_cfarrow | T_PType | T_Str | T_Strs @@ -169,6 +170,8 @@ resWords = Map.fromList , b "|" T_bar , b "_" T_underscore , b "@" T_at + , b "::=" T_cfarrow + , b ":=" T_cfarrow , b "PType" T_PType , b "Str" T_Str , b "Strs" T_Strs diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y index 6f7f5854e..387b69dd3 100644 --- a/src/compiler/GF/Grammar/Parser.y +++ b/src/compiler/GF/Grammar/Parser.y @@ -7,6 +7,8 @@ module GF.Grammar.Parser , pModHeader , pExp , pTopDef + , pCFRules + , pEBNFRules ) where import GF.Infra.Ident @@ -14,17 +16,23 @@ import GF.Infra.Option import GF.Data.Operations import GF.Grammar.Predef import GF.Grammar.Grammar +import GF.Grammar.CFG +import GF.Grammar.EBNF import GF.Grammar.Macros import GF.Grammar.Lexer import GF.Compile.Update (buildAnyTree) ---import Codec.Binary.UTF8.String(decodeString) ---import Data.Char(toLower) +import Data.List(intersperse) +import Data.Char(isAlphaNum) +import PGF(mkCId) + } %name pModDef ModDef %name pTopDef TopDef %partial pModHeader ModHeader %name pExp Exp +%name pCFRules ListCFRule +%name pEBNFRules ListEBNFRule -- no lexer declaration %monad { P } { >>= } { return } @@ -64,6 +72,7 @@ import GF.Compile.Update (buildAnyTree) '\\\\' { T_lamlam } '_' { T_underscore} '|' { T_bar } + '::=' { T_cfarrow } 'PType' { T_PType } 'Str' { T_Str } 'Strs' { T_Strs } @@ -602,6 +611,70 @@ ListDDecl : {- empty -} { [] } | DDecl ListDDecl { $1 ++ $2 } +ListCFRule :: { [CFRule] } +ListCFRule + : CFRule { $1 } + | CFRule ListCFRule { $1 ++ $2 } + +CFRule :: { [CFRule] } +CFRule + : Ident '.' Ident '::=' ListCFSymbol ';' { [CFRule (showIdent $3) $5 (CFObj (mkCId (showIdent $1)) [])] + } + | Ident '::=' ListCFRHS ';' { let { cat = showIdent $1; + mkFun cat its = + case its of { + [] -> cat ++ "_"; + _ -> concat $ intersperse "_" (cat : filter (not . null) (map clean its)) -- CLE style + }; + clean sym = + case sym of { + Terminal c -> filter isAlphaNum c; + NonTerminal t -> t + } + } in map (\rhs -> CFRule cat rhs (CFObj (mkCId (mkFun cat rhs)) [])) $3 + } + +ListCFRHS :: { [[CFSymbol]] } +ListCFRHS + : ListCFSymbol { [$1] } + | ListCFSymbol '|' ListCFRHS { $1 : $3 } + +ListCFSymbol :: { [CFSymbol] } +ListCFSymbol + : {- empty -} { [] } + | CFSymbol ListCFSymbol { $1 : $2 } + +CFSymbol :: { CFSymbol } + : String { Terminal $1 } + | Ident { NonTerminal (showIdent $1) } + +ListEBNFRule :: { [ERule] } +ListEBNFRule + : EBNFRule { [$1] } + | EBNFRule ListEBNFRule { $1 : $2 } + +EBNFRule :: { ERule } + : Ident '::=' ERHS0 ';' { ((showIdent $1,[]),$3) } + +ERHS0 :: { ERHS } + : ERHS1 { $1 } + | ERHS1 '|' ERHS0 { EAlt $1 $3 } + +ERHS1 :: { ERHS } + : ERHS2 { $1 } + | ERHS2 ERHS1 { ESeq $1 $2 } + +ERHS2 :: { ERHS } + : ERHS3 '*' { EStar $1 } + | ERHS3 '+' { EPlus $1 } + | ERHS3 '?' { EOpt $1 } + | ERHS3 { $1 } + +ERHS3 :: { ERHS } + : String { ETerm $1 } + | Ident { ENonTerm (showIdent $1,[]) } + | '(' ERHS0 ')' { $2 } + Posn :: { Posn } Posn : {- empty -} {% getPosn } diff --git a/src/compiler/GF/Speech/CFGToFA.hs b/src/compiler/GF/Speech/CFGToFA.hs index 4f5e3621e..330c763e5 100644 --- a/src/compiler/GF/Speech/CFGToFA.hs +++ b/src/compiler/GF/Speech/CFGToFA.hs @@ -17,7 +17,7 @@ import qualified Data.Set as Set --import PGF.CId import PGF.Data import GF.Data.Utilities -import GF.Speech.CFG +import GF.Grammar.CFG --import GF.Speech.PGFToCFG --import GF.Infra.Ident (Ident) diff --git a/src/compiler/GF/Speech/GSL.hs b/src/compiler/GF/Speech/GSL.hs index 3557ff21f..3eb4c20a7 100644 --- a/src/compiler/GF/Speech/GSL.hs +++ b/src/compiler/GF/Speech/GSL.hs @@ -9,7 +9,7 @@ module GF.Speech.GSL (gslPrinter) where --import GF.Data.Utilities -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.SRG import GF.Speech.RegExp import GF.Infra.Option diff --git a/src/compiler/GF/Speech/JSGF.hs b/src/compiler/GF/Speech/JSGF.hs index 921108e11..6a4935a7f 100644 --- a/src/compiler/GF/Speech/JSGF.hs +++ b/src/compiler/GF/Speech/JSGF.hs @@ -14,7 +14,7 @@ module GF.Speech.JSGF (jsgfPrinter) where --import GF.Data.Utilities import GF.Infra.Option -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.RegExp import GF.Speech.SISR import GF.Speech.SRG diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs index 5c13ca471..d70a74fe7 100644 --- a/src/compiler/GF/Speech/PGFToCFG.hs +++ b/src/compiler/GF/Speech/PGFToCFG.hs @@ -10,7 +10,7 @@ import PGF(showCId) import PGF.Data as PGF import PGF.Macros --import GF.Infra.Ident -import GF.Speech.CFG hiding (Symbol) +import GF.Grammar.CFG hiding (Symbol) import Data.Array.IArray as Array --import Data.List diff --git a/src/compiler/GF/Speech/PrRegExp.hs b/src/compiler/GF/Speech/PrRegExp.hs index 0fc35d541..2829839f3 100644 --- a/src/compiler/GF/Speech/PrRegExp.hs +++ b/src/compiler/GF/Speech/PrRegExp.hs @@ -7,7 +7,7 @@ module GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter) where -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.CFGToFA import GF.Speech.PGFToCFG import GF.Speech.RegExp diff --git a/src/compiler/GF/Speech/SISR.hs b/src/compiler/GF/Speech/SISR.hs index 8417fb203..5f9161547 100644 --- a/src/compiler/GF/Speech/SISR.hs +++ b/src/compiler/GF/Speech/SISR.hs @@ -13,7 +13,7 @@ import Data.List --import GF.Data.Utilities --import GF.Infra.Ident import GF.Infra.Option (SISRFormat(..)) -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.SRG (SRGNT) import PGF(showCId) diff --git a/src/compiler/GF/Speech/SLF.hs b/src/compiler/GF/Speech/SLF.hs index 7785f2382..d93d1b362 100644 --- a/src/compiler/GF/Speech/SLF.hs +++ b/src/compiler/GF/Speech/SLF.hs @@ -13,7 +13,7 @@ module GF.Speech.SLF (slfPrinter,slfGraphvizPrinter, slfSubPrinter,slfSubGraphvizPrinter) where import GF.Data.Utilities -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.FiniteState --import GF.Speech.CFG import GF.Speech.CFGToFA diff --git a/src/compiler/GF/Speech/SRG.hs b/src/compiler/GF/Speech/SRG.hs index 4e5508de0..d5bedc797 100644 --- a/src/compiler/GF/Speech/SRG.hs +++ b/src/compiler/GF/Speech/SRG.hs @@ -21,7 +21,7 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem, SRGSymbol import GF.Data.Utilities --import GF.Infra.Ident import GF.Infra.Option -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.PGFToCFG --import GF.Data.Relation --import GF.Speech.FiniteState diff --git a/src/compiler/GF/Speech/SRGS_ABNF.hs b/src/compiler/GF/Speech/SRGS_ABNF.hs index 5d07762bb..a359b2c38 100644 --- a/src/compiler/GF/Speech/SRGS_ABNF.hs +++ b/src/compiler/GF/Speech/SRGS_ABNF.hs @@ -21,7 +21,7 @@ module GF.Speech.SRGS_ABNF (srgsAbnfPrinter, srgsAbnfNonRecursivePrinter) where --import GF.Data.Utilities import GF.Infra.Option -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.SISR as SISR import GF.Speech.SRG import GF.Speech.RegExp diff --git a/src/compiler/GF/Speech/SRGS_XML.hs b/src/compiler/GF/Speech/SRGS_XML.hs index fe973c2e6..397bfb739 100644 --- a/src/compiler/GF/Speech/SRGS_XML.hs +++ b/src/compiler/GF/Speech/SRGS_XML.hs @@ -9,7 +9,7 @@ module GF.Speech.SRGS_XML (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where --import GF.Data.Utilities import GF.Data.XML import GF.Infra.Option -import GF.Speech.CFG +import GF.Grammar.CFG import GF.Speech.RegExp import GF.Speech.SISR as SISR import GF.Speech.SRG diff --git a/src/compiler/GFC.hs b/src/compiler/GFC.hs index acb4e21ab..8d548e449 100644 --- a/src/compiler/GFC.hs +++ b/src/compiler/GFC.hs @@ -8,10 +8,11 @@ import PGF.Optimize import PGF.Binary(putSplitAbs) import GF.Compile import GF.Compile.Export +import GF.Compile.CFGtoPGF +import GF.Compile.GetGrammar +import GF.Grammar.CFG -import GF.Grammar.CF ---- should this be on a deeper level? AR 15/10/2008 import GF.Infra.Ident(identS,showIdent) - import GF.Infra.UseIO import GF.Infra.Option import GF.Data.ErrM @@ -21,6 +22,7 @@ import Data.Maybe import Data.Binary(encode,encodeFile) import Data.Binary.Put(runPut) import qualified Data.Map as Map +import qualified Data.Set as Set import qualified Data.ByteString as BSS import qualified Data.ByteString.Lazy as BSL import System.FilePath @@ -61,14 +63,18 @@ compileSourceFiles opts fs = writeOutputs opts pgf compileCFFiles :: Options -> [FilePath] -> IOE () -compileCFFiles opts fs = - do s <- liftIO $ fmap unlines $ mapM readFile fs - let cnc = justModuleName (last fs) - gr <- compileSourceGrammar opts =<< getCF cnc s - unless (flag optStopAfterPhase opts == Compile) $ - do pgf <- link opts (identS cnc, (), gr) - writePGF opts pgf - writeOutputs opts pgf +compileCFFiles opts fs = do + rules <- fmap concat $ mapM (getCFRules opts) fs + startCat <- case rules of + (CFRule cat _ _ : _) -> return cat + _ -> fail "empty CFG" + let gf = cf2gf (last fs) (uniqueFuns (mkCFG startCat Set.empty rules)) + gr <- compileSourceGrammar opts gf + let cnc = justModuleName (last fs) + unless (flag optStopAfterPhase opts == Compile) $ + do pgf <- link opts (identS cnc, (), gr) + writePGF opts pgf + writeOutputs opts pgf unionPGFFiles :: Options -> [FilePath] -> IOE () unionPGFFiles opts fs =