forked from GitHub/gf-core
+ References to modules under src/compiler have been eliminated from the PGF library (under src/runtime/haskell). Only two functions had to be moved (from GF.Data.Utilities to PGF.Utilities) to make this possible, other apparent dependencies turned out to be vacuous. + In gf.cabal, the GF executable no longer directly depends on the PGF library source directory, but only on the exposed library modules. This means that there is less duplication in gf.cabal and that the 30 modules in the PGF library will no longer be compiled twice while building GF. To make this possible, additional PGF library modules have been exposed, even though they should probably be considered for internal use only. They could be collected in a PGF.Internal module, or marked as "unstable", to make this explicit. + Also, by using the -fwarn-unused-imports flag, ~220 redundant imports were found and removed, reducing the total number of imports by ~15%.
355 lines
11 KiB
Haskell
355 lines
11 KiB
Haskell
----------------------------------------------------------------------
|
|
-- |
|
|
-- Module : EBNF
|
|
-- Maintainer : AR
|
|
-- Stability : (stable)
|
|
-- Portability : (portable)
|
|
--
|
|
-- > CVS $Date: 2005/04/21 16:21:13 $
|
|
-- > CVS $Author: bringert $
|
|
-- > CVS $Revision: 1.5 $
|
|
--
|
|
-- (Description of the module)
|
|
-----------------------------------------------------------------------------
|
|
|
|
module GF.Grammar.EBNF (getEBNF) where
|
|
|
|
import GF.Data.Operations
|
|
--import GF.Infra.Comments
|
|
import GF.Grammar.CF
|
|
--import GF.CF.CFIdent
|
|
import GF.Grammar.Grammar
|
|
--import GF.Grammar.PrGrammar
|
|
--import qualified GF.Source.AbsGF as A
|
|
|
|
import Data.Char
|
|
import Data.List
|
|
--import System.FilePath
|
|
|
|
|
|
|
|
-- AR 18/4/2000 - 31/3/2004
|
|
|
|
getEBNF :: FilePath -> String -> Err SourceGrammar
|
|
getEBNF fpath = fmap (cf2gf fpath . ebnf2cf) . pEBNF
|
|
|
|
type EBNF = [ERule]
|
|
type ERule = (ECat, ERHS)
|
|
type ECat = (String,[Int])
|
|
type ETok = String
|
|
|
|
ebnfID = "EBNF" ---- make this parametric!
|
|
|
|
data ERHS =
|
|
ETerm ETok
|
|
| ENonTerm ECat
|
|
| ESeq ERHS ERHS
|
|
| EAlt ERHS ERHS
|
|
| EStar ERHS
|
|
| EPlus ERHS
|
|
| EOpt ERHS
|
|
| EEmpty
|
|
|
|
type CFRHS = [CFItem]
|
|
type CFJustRule = (CFCat, CFRHS)
|
|
|
|
ebnf2cf :: EBNF -> [CFRule]
|
|
ebnf2cf ebnf =
|
|
[L NoLoc (mkCFF i rule,rule) | (i,rule) <- zip [0..] (normEBNF ebnf)] where
|
|
mkCFF i (c, _) = ("Mk" ++ c ++ "_" ++ show i)
|
|
|
|
normEBNF :: EBNF -> [CFJustRule]
|
|
normEBNF erules = let
|
|
erules1 = [normERule ([i],r) | (i,r) <- zip [0..] erules]
|
|
erules2 = erules1 ---refreshECats erules1 --- this seems to be just bad !
|
|
erules3 = concat (map pickERules erules2)
|
|
erules4 = nubERules erules3
|
|
in [(mkCFCatE cat, map eitem2cfitem its) | (cat,itss) <- erules3, its <- itss]
|
|
|
|
refreshECats :: [NormERule] -> [NormERule]
|
|
refreshECats rules = [recas [i] rule | (i,rule) <- zip [0..] rules] where
|
|
recas ii (cat,its) = (updECat ii cat, [recss ii 0 s | s <- its])
|
|
recss ii n [] = []
|
|
recss ii n (s:ss) = recit (ii ++ [n]) s : recss ii (n+1) ss
|
|
recit ii it = case it of
|
|
EINonTerm cat -> EINonTerm (updECat ii cat)
|
|
EIStar (cat,t) -> EIStar (updECat ii cat, [recss ii 0 s | s <- t])
|
|
EIPlus (cat,t) -> EIPlus (updECat ii cat, [recss ii 0 s | s <- t])
|
|
EIOpt (cat,t) -> EIOpt (updECat ii cat, [recss ii 0 s | s <- t])
|
|
_ -> it
|
|
|
|
pickERules :: NormERule -> [NormERule]
|
|
pickERules rule@(cat,alts) = rule : concat (map pics (concat alts)) where
|
|
pics it = case it of
|
|
EIStar ru@(cat,t) -> mkEStarRules cat ++ pickERules ru
|
|
EIPlus ru@(cat,t) -> mkEPlusRules cat ++ pickERules ru
|
|
EIOpt ru@(cat,t) -> mkEOptRules cat ++ pickERules ru
|
|
_ -> []
|
|
mkEStarRules cat = [(cat', [[],[EINonTerm cat, EINonTerm cat']])]
|
|
where cat' = mkNewECat cat "Star"
|
|
mkEPlusRules cat = [(cat', [[EINonTerm cat],[EINonTerm cat, EINonTerm cat']])]
|
|
where cat' = mkNewECat cat "Plus"
|
|
mkEOptRules cat = [(cat', [[],[EINonTerm cat]])]
|
|
where cat' = mkNewECat cat "Opt"
|
|
|
|
nubERules :: [NormERule] -> [NormERule]
|
|
nubERules rules = nub optim where
|
|
optim = map (substERules (map mkSubst replaces)) irreducibles
|
|
(replaces,irreducibles) = partition reducible rules
|
|
reducible (cat,[items]) = isNewCat cat && all isOldIt items
|
|
reducible _ = False
|
|
isNewCat (_,ints) = ints == []
|
|
isOldIt (EITerm _) = True
|
|
isOldIt (EINonTerm cat) = not (isNewCat cat)
|
|
isOldIt _ = False
|
|
mkSubst (cat,its) = (cat, head its) -- def of reducible: its must be singleton
|
|
--- the optimization assumes each cat has at most one EBNF rule.
|
|
|
|
substERules :: [(ECat,[EItem])] -> NormERule -> NormERule
|
|
substERules g (cat,itss) = (cat, map sub itss) where
|
|
sub [] = []
|
|
sub (i@(EINonTerm cat') : ii) = case lookup cat g of
|
|
Just its -> its ++ sub ii
|
|
_ -> i : sub ii
|
|
sub (EIStar r : ii) = EIStar (substERules g r) : ii
|
|
sub (EIPlus r : ii) = EIPlus (substERules g r) : ii
|
|
sub (EIOpt r : ii) = EIOpt (substERules g r) : ii
|
|
|
|
eitem2cfitem :: EItem -> CFItem
|
|
eitem2cfitem it = case it of
|
|
EITerm a -> Right a
|
|
EINonTerm cat -> Left (mkCFCatE cat)
|
|
EIStar (cat,_) -> Left (mkCFCatE (mkNewECat cat "Star"))
|
|
EIPlus (cat,_) -> Left (mkCFCatE (mkNewECat cat "Plus"))
|
|
EIOpt (cat,_) -> Left (mkCFCatE (mkNewECat cat "Opt"))
|
|
|
|
type NormERule = (ECat,[[EItem]]) -- disjunction of sequences of items
|
|
|
|
data EItem =
|
|
EITerm String
|
|
| EINonTerm ECat
|
|
| EIStar NormERule
|
|
| EIPlus NormERule
|
|
| EIOpt NormERule
|
|
deriving Eq
|
|
|
|
normERule :: ([Int],ERule) -> NormERule
|
|
normERule (ii,(cat,rhs)) =
|
|
(cat,[map (mkEItem (ii ++ [i])) r' | (i,r') <- zip [0..] (disjNorm rhs)]) where
|
|
disjNorm r = case r of
|
|
ESeq r1 r2 -> [x ++ y | x <- disjNorm r1, y <- disjNorm r2]
|
|
EAlt r1 r2 -> disjNorm r1 ++ disjNorm r2
|
|
EEmpty -> [[]]
|
|
_ -> [[r]]
|
|
|
|
mkEItem :: [Int] -> ERHS -> EItem
|
|
mkEItem ii rhs = case rhs of
|
|
ETerm a -> EITerm a
|
|
ENonTerm cat -> EINonTerm cat
|
|
EStar r -> EIStar (normERule (ii,(mkECat ii, r)))
|
|
EPlus r -> EIPlus (normERule (ii,(mkECat ii, r)))
|
|
EOpt r -> EIOpt (normERule (ii,(mkECat ii, r)))
|
|
_ -> EINonTerm ("?????",[])
|
|
-- _ -> error "should not happen in ebnf" ---
|
|
|
|
mkECat ints = ("C", ints)
|
|
|
|
prECat (c,[]) = c
|
|
prECat (c,ints) = c ++ "_" ++ prTList "_" (map show ints)
|
|
|
|
mkCFCatE :: ECat -> CFCat
|
|
mkCFCatE = prECat
|
|
|
|
updECat _ (c,[]) = (c,[])
|
|
updECat ii (c,_) = (c,ii)
|
|
|
|
mkNewECat (c,ii) str = (c ++ str,ii)
|
|
|
|
------ parser for EBNF grammars
|
|
|
|
pEBNF :: String -> Err EBNF
|
|
pEBNF = parseResultErr (longestOfMany (pJ pERule))
|
|
|
|
pERule :: Parser Char ERule
|
|
pERule = pECat ... pJ (literals ":=" ||| literals "::=") +.. pERHS 0 ..+ jL ";"
|
|
|
|
pERHS :: Int -> Parser Char ERHS
|
|
pERHS 0 = pTList "|" (pERHS 1) *** foldr1 EAlt
|
|
pERHS 1 = longestOfMany (pJ (pERHS 2)) *** foldr ESeq EEmpty
|
|
pERHS 2 = pERHS 3 ... pJ pUnaryEOp *** (\ (a,f) -> f a)
|
|
pERHS 3 = pQuotedString *** ETerm
|
|
||| pECat *** ENonTerm ||| pParenth (pERHS 0)
|
|
|
|
pUnaryEOp :: Parser Char (ERHS -> ERHS)
|
|
pUnaryEOp =
|
|
lits "*" <<< EStar ||| lits "+" <<< EPlus ||| lits "?" <<< EOpt ||| succeed id
|
|
|
|
pECat = pIdent *** (\c -> (c,[]))
|
|
|
|
|
|
|
|
----------------------------------------------------------------------
|
|
-- Module : Parsers
|
|
-- some parser combinators a la Wadler and Hutton.
|
|
-- (only used in module "EBNF")
|
|
-----------------------------------------------------------------------------
|
|
|
|
infixr 2 |||, +||
|
|
infixr 3 ***
|
|
infixr 5 .>.
|
|
infixr 5 ...
|
|
infixr 5 ....
|
|
infixr 5 +..
|
|
infixr 5 ..+
|
|
infixr 6 |>
|
|
infixr 3 <<<
|
|
|
|
|
|
type Parser a b = [a] -> [(b,[a])]
|
|
|
|
parseResults :: Parser a b -> [a] -> [b]
|
|
parseResults p s = [x | (x,r) <- p s, null r]
|
|
|
|
parseResultErr :: Show a => Parser a b -> [a] -> Err b
|
|
parseResultErr p s = case parseResults p s of
|
|
[x] -> return x
|
|
[] -> case
|
|
maximumBy (\x y -> compare (length y) (length x)) (s:[r | (_,r) <- p s]) of
|
|
r -> Bad $ "\nno parse; reached" ++++ take 300 (show r)
|
|
_ -> Bad "ambiguous"
|
|
|
|
(...) :: Parser a b -> Parser a c -> Parser a (b,c)
|
|
(p ... q) s = [((x,y),r) | (x,t) <- p s, (y,r) <- q t]
|
|
|
|
(.>.) :: Parser a b -> (b -> Parser a c) -> Parser a c
|
|
(p .>. f) s = [(c,r) | (x,t) <- p s, (c,r) <- f x t]
|
|
|
|
(|||) :: Parser a b -> Parser a b -> Parser a b
|
|
(p ||| q) s = p s ++ q s
|
|
|
|
(+||) :: Parser a b -> Parser a b -> Parser a b
|
|
p1 +|| p2 = take 1 . (p1 ||| p2)
|
|
|
|
literal :: (Eq a) => a -> Parser a a
|
|
literal x (c:cs) = [(x,cs) | x == c]
|
|
literal _ _ = []
|
|
|
|
(***) :: Parser a b -> (b -> c) -> Parser a c
|
|
(p *** f) s = [(f x,r) | (x,r) <- p s]
|
|
|
|
succeed :: b -> Parser a b
|
|
succeed v s = [(v,s)]
|
|
|
|
fails :: Parser a b
|
|
fails s = []
|
|
|
|
(+..) :: Parser a b -> Parser a c -> Parser a c
|
|
p1 +.. p2 = p1 ... p2 *** snd
|
|
|
|
(..+) :: Parser a b -> Parser a c -> Parser a b
|
|
p1 ..+ p2 = p1 ... p2 *** fst
|
|
|
|
(<<<) :: Parser a b -> c -> Parser a c -- return
|
|
p <<< v = p *** (\x -> v)
|
|
|
|
(|>) :: Parser a b -> (b -> Bool) -> Parser a b
|
|
p |> b = p .>. (\x -> if b x then succeed x else fails)
|
|
|
|
many :: Parser a b -> Parser a [b]
|
|
many p = (p ... many p *** uncurry (:)) +|| succeed []
|
|
|
|
some :: Parser a b -> Parser a [b]
|
|
some p = (p ... many p) *** uncurry (:)
|
|
|
|
longestOfMany :: Parser a b -> Parser a [b]
|
|
longestOfMany p = p .>. (\x -> longestOfMany p *** (x:)) +|| succeed []
|
|
|
|
closure :: (b -> Parser a b) -> (b -> Parser a b)
|
|
closure p v = p v .>. closure p ||| succeed v
|
|
|
|
pJunk :: Parser Char String
|
|
pJunk = longestOfMany (satisfy (\x -> elem x "\n\t "))
|
|
|
|
pJ :: Parser Char a -> Parser Char a
|
|
pJ p = pJunk +.. p ..+ pJunk
|
|
|
|
pTList :: String -> Parser Char a -> Parser Char [a]
|
|
pTList t p = p .... many (jL t +.. p) *** (\ (x,y) -> x:y) -- mod. AR 5/1/1999
|
|
|
|
pTJList :: String -> String -> Parser Char a -> Parser Char [a]
|
|
pTJList t1 t2 p = p .... many (literals t1 +.. jL t2 +.. p) *** (uncurry (:))
|
|
|
|
pElem :: [String] -> Parser Char String
|
|
pElem l = foldr (+||) fails (map literals l)
|
|
|
|
(....) :: Parser Char b -> Parser Char c -> Parser Char (b,c)
|
|
p1 .... p2 = p1 ... pJunk +.. p2
|
|
|
|
item :: Parser a a
|
|
item (c:cs) = [(c,cs)]
|
|
item [] = []
|
|
|
|
satisfy :: (a -> Bool) -> Parser a a
|
|
satisfy b = item |> b
|
|
|
|
literals :: (Eq a,Show a) => [a] -> Parser a [a]
|
|
literals l = case l of
|
|
[] -> succeed []
|
|
a:l -> literal a ... literals l *** (\ (x,y) -> x:y)
|
|
|
|
lits :: (Eq a,Show a) => [a] -> Parser a [a]
|
|
lits ts = literals ts
|
|
|
|
jL :: String -> Parser Char String
|
|
jL = pJ . lits
|
|
|
|
pParenth :: Parser Char a -> Parser Char a
|
|
pParenth p = literal '(' +.. pJunk +.. p ..+ pJunk ..+ literal ')'
|
|
|
|
-- | p,...,p
|
|
pCommaList :: Parser Char a -> Parser Char [a]
|
|
pCommaList p = pTList "," (pJ p)
|
|
|
|
-- | the same or nothing
|
|
pOptCommaList :: Parser Char a -> Parser Char [a]
|
|
pOptCommaList p = pCommaList p ||| succeed []
|
|
|
|
-- | (p,...,p), poss. empty
|
|
pArgList :: Parser Char a -> Parser Char [a]
|
|
pArgList p = pParenth (pCommaList p) ||| succeed []
|
|
|
|
-- | min. 2 args
|
|
pArgList2 :: Parser Char a -> Parser Char [a]
|
|
pArgList2 p = pParenth (p ... jL "," +.. pCommaList p) *** uncurry (:)
|
|
|
|
longestOfSome :: Parser a b -> Parser a [b]
|
|
longestOfSome p = (p ... longestOfMany p) *** (\ (x,y) -> x:y)
|
|
|
|
pIdent :: Parser Char String
|
|
pIdent = pLetter ... longestOfMany pAlphaPlusChar *** uncurry (:)
|
|
where alphaPlusChar c = isAlphaNum c || c=='_' || c=='\''
|
|
|
|
pLetter, pDigit :: Parser Char Char
|
|
pLetter = satisfy (`elem` (['A'..'Z'] ++ ['a'..'z'] ++
|
|
['\192' .. '\255'])) -- no such in Char
|
|
pDigit = satisfy isDigit
|
|
|
|
pLetters :: Parser Char String
|
|
pLetters = longestOfSome pLetter
|
|
|
|
pAlphanum, pAlphaPlusChar :: Parser Char Char
|
|
pAlphanum = pDigit ||| pLetter
|
|
pAlphaPlusChar = pAlphanum ||| satisfy (`elem` "_'")
|
|
|
|
pQuotedString :: Parser Char String
|
|
pQuotedString = literal '"' +.. pEndQuoted where
|
|
pEndQuoted =
|
|
literal '"' *** (const [])
|
|
+|| (literal '\\' +.. item .>. \ c -> pEndQuoted *** (c:))
|
|
+|| item .>. \ c -> pEndQuoted *** (c:)
|
|
|
|
pIntc :: Parser Char Int
|
|
pIntc = some (satisfy numb) *** read
|
|
where numb x = elem x ['0'..'9']
|
|
|