From 709b0518fa284b41777cbec015a4647303e7a0b3 Mon Sep 17 00:00:00 2001 From: aarne Date: Fri, 24 Sep 2010 12:39:49 +0000 Subject: [PATCH] restored the .ebnf grammar format --- src/compiler/GF/Command/Importing.hs | 27 +- src/compiler/GF/Grammar/CF.hs | 2 +- src/compiler/GF/Grammar/EBNF.hs | 353 +++++++++++++++++++++++++++ 3 files changed, 370 insertions(+), 12 deletions(-) create mode 100644 src/compiler/GF/Grammar/EBNF.hs diff --git a/src/compiler/GF/Command/Importing.hs b/src/compiler/GF/Command/Importing.hs index 194c993ba..80f3833ee 100644 --- a/src/compiler/GF/Command/Importing.hs +++ b/src/compiler/GF/Command/Importing.hs @@ -6,6 +6,7 @@ import PGF.Data import GF.Compile import GF.Grammar (identC, SourceGrammar) -- for cc command import GF.Grammar.CF +import GF.Grammar.EBNF import GF.Infra.UseIO import GF.Infra.Option import GF.Data.ErrM @@ -19,17 +20,8 @@ importGrammar :: PGF -> Options -> [FilePath] -> IO PGF importGrammar pgf0 _ [] = return pgf0 importGrammar pgf0 opts files = case takeExtensions (last files) of - ".cf" -> do - s <- fmap unlines $ mapM readFile files - let cnc = justModuleName (last files) - gf <- case getCF cnc s of - Ok g -> return g - Bad s -> error s ---- - Ok gr <- appIOE $ compileSourceGrammar opts gf - epgf <- appIOE $ link opts (identC (BS.pack (cnc ++ "Abs"))) gr - case epgf of - Ok pgf -> return pgf - Bad s -> error s ---- + ".cf" -> importCF opts files getCF + ".ebnf" -> importCF opts files getEBNF s | elem s [".gf",".gfo"] -> do res <- appIOE $ compileToPGF opts files case res of @@ -49,3 +41,16 @@ importSource src0 opts files = do Bad msg -> do putStrLn msg return src0 + +-- for different cf formats +importCF opts files get = do + s <- fmap unlines $ mapM readFile files + let cnc = justModuleName (last files) + gf <- case get cnc s of + Ok g -> return g + Bad s -> error s ---- + Ok gr <- appIOE $ compileSourceGrammar opts gf + epgf <- appIOE $ link opts (identC (BS.pack (cnc ++ "Abs"))) gr + case epgf of + Ok pgf -> return pgf + Bad s -> error s ---- diff --git a/src/compiler/GF/Grammar/CF.hs b/src/compiler/GF/Grammar/CF.hs index 06f67234b..e45008485 100644 --- a/src/compiler/GF/Grammar/CF.hs +++ b/src/compiler/GF/Grammar/CF.hs @@ -12,7 +12,7 @@ -- parsing CF grammars and converting them to GF ----------------------------------------------------------------------------- -module GF.Grammar.CF (getCF) where +module GF.Grammar.CF (getCF,CFItem,CFCat,CFFun,cf2gf,CFRule) where import GF.Grammar.Grammar import GF.Grammar.Macros diff --git a/src/compiler/GF/Grammar/EBNF.hs b/src/compiler/GF/Grammar/EBNF.hs new file mode 100644 index 000000000..11a2b3c4b --- /dev/null +++ b/src/compiler/GF/Grammar/EBNF.hs @@ -0,0 +1,353 @@ +---------------------------------------------------------------------- +-- | +-- Module : EBNF +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:21:13 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.5 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Grammar.EBNF (getEBNF) where + +import GF.Data.Operations +--import GF.Infra.Comments +import GF.Grammar.CF +--import GF.CF.CFIdent +import GF.Grammar.Grammar +--import GF.Grammar.PrGrammar +--import qualified GF.Source.AbsGF as A + +import Data.Char +import Data.List + + + +-- AR 18/4/2000 - 31/3/2004 + +getEBNF :: String -> String -> Err SourceGrammar +getEBNF name = fmap (cf2gf name . ebnf2cf) . pEBNF + +type EBNF = [ERule] +type ERule = (ECat, ERHS) +type ECat = (String,[Int]) +type ETok = String + +ebnfID = "EBNF" ---- make this parametric! + +data ERHS = + ETerm ETok + | ENonTerm ECat + | ESeq ERHS ERHS + | EAlt ERHS ERHS + | EStar ERHS + | EPlus ERHS + | EOpt ERHS + | EEmpty + +type CFRHS = [CFItem] +type CFJustRule = (CFCat, CFRHS) + +ebnf2cf :: EBNF -> [CFRule] +ebnf2cf ebnf = + [L (0,0) (mkCFF i rule,rule) | (i,rule) <- zip [0..] (normEBNF ebnf)] where + mkCFF i (c, _) = ("Mk" ++ c ++ "_" ++ show i) + +normEBNF :: EBNF -> [CFJustRule] +normEBNF erules = let + erules1 = [normERule ([i],r) | (i,r) <- zip [0..] erules] + erules2 = erules1 ---refreshECats erules1 --- this seems to be just bad ! + erules3 = concat (map pickERules erules2) + erules4 = nubERules erules3 + in [(mkCFCatE cat, map eitem2cfitem its) | (cat,itss) <- erules3, its <- itss] + +refreshECats :: [NormERule] -> [NormERule] +refreshECats rules = [recas [i] rule | (i,rule) <- zip [0..] rules] where + recas ii (cat,its) = (updECat ii cat, [recss ii 0 s | s <- its]) + recss ii n [] = [] + recss ii n (s:ss) = recit (ii ++ [n]) s : recss ii (n+1) ss + recit ii it = case it of + EINonTerm cat -> EINonTerm (updECat ii cat) + EIStar (cat,t) -> EIStar (updECat ii cat, [recss ii 0 s | s <- t]) + EIPlus (cat,t) -> EIPlus (updECat ii cat, [recss ii 0 s | s <- t]) + EIOpt (cat,t) -> EIOpt (updECat ii cat, [recss ii 0 s | s <- t]) + _ -> it + +pickERules :: NormERule -> [NormERule] +pickERules rule@(cat,alts) = rule : concat (map pics (concat alts)) where + pics it = case it of + EIStar ru@(cat,t) -> mkEStarRules cat ++ pickERules ru + EIPlus ru@(cat,t) -> mkEPlusRules cat ++ pickERules ru + EIOpt ru@(cat,t) -> mkEOptRules cat ++ pickERules ru + _ -> [] + mkEStarRules cat = [(cat', [[],[EINonTerm cat, EINonTerm cat']])] + where cat' = mkNewECat cat "Star" + mkEPlusRules cat = [(cat', [[EINonTerm cat],[EINonTerm cat, EINonTerm cat']])] + where cat' = mkNewECat cat "Plus" + mkEOptRules cat = [(cat', [[],[EINonTerm cat]])] + where cat' = mkNewECat cat "Opt" + +nubERules :: [NormERule] -> [NormERule] +nubERules rules = nub optim where + optim = map (substERules (map mkSubst replaces)) irreducibles + (replaces,irreducibles) = partition reducible rules + reducible (cat,[items]) = isNewCat cat && all isOldIt items + reducible _ = False + isNewCat (_,ints) = ints == [] + isOldIt (EITerm _) = True + isOldIt (EINonTerm cat) = not (isNewCat cat) + isOldIt _ = False + mkSubst (cat,its) = (cat, head its) -- def of reducible: its must be singleton +--- the optimization assumes each cat has at most one EBNF rule. + +substERules :: [(ECat,[EItem])] -> NormERule -> NormERule +substERules g (cat,itss) = (cat, map sub itss) where + sub [] = [] + sub (i@(EINonTerm cat') : ii) = case lookup cat g of + Just its -> its ++ sub ii + _ -> i : sub ii + sub (EIStar r : ii) = EIStar (substERules g r) : ii + sub (EIPlus r : ii) = EIPlus (substERules g r) : ii + sub (EIOpt r : ii) = EIOpt (substERules g r) : ii + +eitem2cfitem :: EItem -> CFItem +eitem2cfitem it = case it of + EITerm a -> Right a + EINonTerm cat -> Left (mkCFCatE cat) + EIStar (cat,_) -> Left (mkCFCatE (mkNewECat cat "Star")) + EIPlus (cat,_) -> Left (mkCFCatE (mkNewECat cat "Plus")) + EIOpt (cat,_) -> Left (mkCFCatE (mkNewECat cat "Opt")) + +type NormERule = (ECat,[[EItem]]) -- disjunction of sequences of items + +data EItem = + EITerm String + | EINonTerm ECat + | EIStar NormERule + | EIPlus NormERule + | EIOpt NormERule + deriving Eq + +normERule :: ([Int],ERule) -> NormERule +normERule (ii,(cat,rhs)) = + (cat,[map (mkEItem (ii ++ [i])) r' | (i,r') <- zip [0..] (disjNorm rhs)]) where + disjNorm r = case r of + ESeq r1 r2 -> [x ++ y | x <- disjNorm r1, y <- disjNorm r2] + EAlt r1 r2 -> disjNorm r1 ++ disjNorm r2 + EEmpty -> [[]] + _ -> [[r]] + +mkEItem :: [Int] -> ERHS -> EItem +mkEItem ii rhs = case rhs of + ETerm a -> EITerm a + ENonTerm cat -> EINonTerm cat + EStar r -> EIStar (normERule (ii,(mkECat ii, r))) + EPlus r -> EIPlus (normERule (ii,(mkECat ii, r))) + EOpt r -> EIOpt (normERule (ii,(mkECat ii, r))) + _ -> EINonTerm ("?????",[]) +-- _ -> error "should not happen in ebnf" --- + +mkECat ints = ("C", ints) + +prECat (c,[]) = c +prECat (c,ints) = c ++ "_" ++ prTList "_" (map show ints) + +mkCFCatE :: ECat -> CFCat +mkCFCatE = prECat + +updECat _ (c,[]) = (c,[]) +updECat ii (c,_) = (c,ii) + +mkNewECat (c,ii) str = (c ++ str,ii) + +------ parser for EBNF grammars + +pEBNF :: String -> Err EBNF +pEBNF = parseResultErr (longestOfMany (pJ pERule)) + +pERule :: Parser Char ERule +pERule = pECat ... pJ (literals ":=" ||| literals "::=") +.. pERHS 0 ..+ jL ";" + +pERHS :: Int -> Parser Char ERHS +pERHS 0 = pTList "|" (pERHS 1) *** foldr1 EAlt +pERHS 1 = longestOfMany (pJ (pERHS 2)) *** foldr ESeq EEmpty +pERHS 2 = pERHS 3 ... pJ pUnaryEOp *** (\ (a,f) -> f a) +pERHS 3 = pQuotedString *** ETerm + ||| pECat *** ENonTerm ||| pParenth (pERHS 0) + +pUnaryEOp :: Parser Char (ERHS -> ERHS) +pUnaryEOp = + lits "*" <<< EStar ||| lits "+" <<< EPlus ||| lits "?" <<< EOpt ||| succeed id + +pECat = pIdent *** (\c -> (c,[])) + + + +---------------------------------------------------------------------- +-- Module : Parsers +-- some parser combinators a la Wadler and Hutton. +-- (only used in module "EBNF") +----------------------------------------------------------------------------- + +infixr 2 |||, +|| +infixr 3 *** +infixr 5 .>. +infixr 5 ... +infixr 5 .... +infixr 5 +.. +infixr 5 ..+ +infixr 6 |> +infixr 3 <<< + + +type Parser a b = [a] -> [(b,[a])] + +parseResults :: Parser a b -> [a] -> [b] +parseResults p s = [x | (x,r) <- p s, null r] + +parseResultErr :: Show a => Parser a b -> [a] -> Err b +parseResultErr p s = case parseResults p s of + [x] -> return x + [] -> case + maximumBy (\x y -> compare (length y) (length x)) (s:[r | (_,r) <- p s]) of + r -> Bad $ "\nno parse; reached" ++++ take 300 (show r) + _ -> Bad "ambiguous" + +(...) :: Parser a b -> Parser a c -> Parser a (b,c) +(p ... q) s = [((x,y),r) | (x,t) <- p s, (y,r) <- q t] + +(.>.) :: Parser a b -> (b -> Parser a c) -> Parser a c +(p .>. f) s = [(c,r) | (x,t) <- p s, (c,r) <- f x t] + +(|||) :: Parser a b -> Parser a b -> Parser a b +(p ||| q) s = p s ++ q s + +(+||) :: Parser a b -> Parser a b -> Parser a b +p1 +|| p2 = take 1 . (p1 ||| p2) + +literal :: (Eq a) => a -> Parser a a +literal x (c:cs) = [(x,cs) | x == c] +literal _ _ = [] + +(***) :: Parser a b -> (b -> c) -> Parser a c +(p *** f) s = [(f x,r) | (x,r) <- p s] + +succeed :: b -> Parser a b +succeed v s = [(v,s)] + +fails :: Parser a b +fails s = [] + +(+..) :: Parser a b -> Parser a c -> Parser a c +p1 +.. p2 = p1 ... p2 *** snd + +(..+) :: Parser a b -> Parser a c -> Parser a b +p1 ..+ p2 = p1 ... p2 *** fst + +(<<<) :: Parser a b -> c -> Parser a c -- return +p <<< v = p *** (\x -> v) + +(|>) :: Parser a b -> (b -> Bool) -> Parser a b +p |> b = p .>. (\x -> if b x then succeed x else fails) + +many :: Parser a b -> Parser a [b] +many p = (p ... many p *** uncurry (:)) +|| succeed [] + +some :: Parser a b -> Parser a [b] +some p = (p ... many p) *** uncurry (:) + +longestOfMany :: Parser a b -> Parser a [b] +longestOfMany p = p .>. (\x -> longestOfMany p *** (x:)) +|| succeed [] + +closure :: (b -> Parser a b) -> (b -> Parser a b) +closure p v = p v .>. closure p ||| succeed v + +pJunk :: Parser Char String +pJunk = longestOfMany (satisfy (\x -> elem x "\n\t ")) + +pJ :: Parser Char a -> Parser Char a +pJ p = pJunk +.. p ..+ pJunk + +pTList :: String -> Parser Char a -> Parser Char [a] +pTList t p = p .... many (jL t +.. p) *** (\ (x,y) -> x:y) -- mod. AR 5/1/1999 + +pTJList :: String -> String -> Parser Char a -> Parser Char [a] +pTJList t1 t2 p = p .... many (literals t1 +.. jL t2 +.. p) *** (uncurry (:)) + +pElem :: [String] -> Parser Char String +pElem l = foldr (+||) fails (map literals l) + +(....) :: Parser Char b -> Parser Char c -> Parser Char (b,c) +p1 .... p2 = p1 ... pJunk +.. p2 + +item :: Parser a a +item (c:cs) = [(c,cs)] +item [] = [] + +satisfy :: (a -> Bool) -> Parser a a +satisfy b = item |> b + +literals :: (Eq a,Show a) => [a] -> Parser a [a] +literals l = case l of + [] -> succeed [] + a:l -> literal a ... literals l *** (\ (x,y) -> x:y) + +lits :: (Eq a,Show a) => [a] -> Parser a [a] +lits ts = literals ts + +jL :: String -> Parser Char String +jL = pJ . lits + +pParenth :: Parser Char a -> Parser Char a +pParenth p = literal '(' +.. pJunk +.. p ..+ pJunk ..+ literal ')' + +-- | p,...,p +pCommaList :: Parser Char a -> Parser Char [a] +pCommaList p = pTList "," (pJ p) + +-- | the same or nothing +pOptCommaList :: Parser Char a -> Parser Char [a] +pOptCommaList p = pCommaList p ||| succeed [] + +-- | (p,...,p), poss. empty +pArgList :: Parser Char a -> Parser Char [a] +pArgList p = pParenth (pCommaList p) ||| succeed [] + +-- | min. 2 args +pArgList2 :: Parser Char a -> Parser Char [a] +pArgList2 p = pParenth (p ... jL "," +.. pCommaList p) *** uncurry (:) + +longestOfSome :: Parser a b -> Parser a [b] +longestOfSome p = (p ... longestOfMany p) *** (\ (x,y) -> x:y) + +pIdent :: Parser Char String +pIdent = pLetter ... longestOfMany pAlphaPlusChar *** uncurry (:) + where alphaPlusChar c = isAlphaNum c || c=='_' || c=='\'' + +pLetter, pDigit :: Parser Char Char +pLetter = satisfy (`elem` (['A'..'Z'] ++ ['a'..'z'] ++ + ['\192' .. '\255'])) -- no such in Char +pDigit = satisfy isDigit + +pLetters :: Parser Char String +pLetters = longestOfSome pLetter + +pAlphanum, pAlphaPlusChar :: Parser Char Char +pAlphanum = pDigit ||| pLetter +pAlphaPlusChar = pAlphanum ||| satisfy (`elem` "_'") + +pQuotedString :: Parser Char String +pQuotedString = literal '"' +.. pEndQuoted where + pEndQuoted = + literal '"' *** (const []) + +|| (literal '\\' +.. item .>. \ c -> pEndQuoted *** (c:)) + +|| item .>. \ c -> pEndQuoted *** (c:) + +pIntc :: Parser Char Int +pIntc = some (satisfy numb) *** read + where numb x = elem x ['0'..'9'] +