mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-05 09:12:51 -06:00
refactor the compilation of CFG and EBNF grammars. Now they are parsed by using GF.Grammar.Parser just like the ordinary GF grammars. Furthermore now GF.Speech.CFG is moved to GF.Grammar.CFG. The new module is used by both the speech conversion utils and by the compiler for CFG grammars. The parser for CFG now consumes a lot less memory and can be used with grammars with more than 4 000 000 productions.
This commit is contained in:
@@ -12,34 +12,19 @@
|
||||
-- (Description of the module)
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Grammar.EBNF (getEBNF) where
|
||||
module GF.Grammar.EBNF (EBNF, ERule, ERHS(..), ebnf2cf) where
|
||||
|
||||
import GF.Data.Operations
|
||||
--import GF.Infra.Comments
|
||||
import GF.Grammar.CF
|
||||
--import GF.CF.CFIdent
|
||||
import GF.Grammar.Grammar
|
||||
--import GF.Grammar.PrGrammar
|
||||
--import qualified GF.Source.AbsGF as A
|
||||
import GF.Grammar.CFG
|
||||
import PGF (mkCId)
|
||||
|
||||
import Data.Char
|
||||
import Data.List
|
||||
--import System.FilePath
|
||||
|
||||
|
||||
|
||||
-- AR 18/4/2000 - 31/3/2004
|
||||
|
||||
getEBNF :: FilePath -> String -> Err SourceGrammar
|
||||
getEBNF fpath = fmap (cf2gf fpath . ebnf2cf) . pEBNF
|
||||
|
||||
type EBNF = [ERule]
|
||||
type ERule = (ECat, ERHS)
|
||||
type ECat = (String,[Int])
|
||||
type ETok = String
|
||||
|
||||
ebnfID = "EBNF" ---- make this parametric!
|
||||
|
||||
data ERHS =
|
||||
ETerm ETok
|
||||
| ENonTerm ECat
|
||||
@@ -50,13 +35,14 @@ data ERHS =
|
||||
| EOpt ERHS
|
||||
| EEmpty
|
||||
|
||||
type CFRHS = [CFItem]
|
||||
type CFJustRule = (CFCat, CFRHS)
|
||||
type CFRHS = [CFSymbol]
|
||||
type CFJustRule = (Cat, CFRHS)
|
||||
|
||||
ebnf2cf :: EBNF -> [CFRule]
|
||||
ebnf2cf ebnf =
|
||||
[L NoLoc (mkCFF i rule,rule) | (i,rule) <- zip [0..] (normEBNF ebnf)] where
|
||||
mkCFF i (c, _) = ("Mk" ++ c ++ "_" ++ show i)
|
||||
[CFRule cat items (mkCFF i cat) | (i,(cat,items)) <- zip [0..] (normEBNF ebnf)]
|
||||
where
|
||||
mkCFF i c = CFObj (mkCId ("Mk" ++ c ++ "_" ++ show i)) []
|
||||
|
||||
normEBNF :: EBNF -> [CFJustRule]
|
||||
normEBNF erules = let
|
||||
@@ -115,13 +101,13 @@ substERules g (cat,itss) = (cat, map sub itss) where
|
||||
sub (EIPlus r : ii) = EIPlus (substERules g r) : ii
|
||||
sub (EIOpt r : ii) = EIOpt (substERules g r) : ii
|
||||
|
||||
eitem2cfitem :: EItem -> CFItem
|
||||
eitem2cfitem :: EItem -> CFSymbol
|
||||
eitem2cfitem it = case it of
|
||||
EITerm a -> Right a
|
||||
EINonTerm cat -> Left (mkCFCatE cat)
|
||||
EIStar (cat,_) -> Left (mkCFCatE (mkNewECat cat "Star"))
|
||||
EIPlus (cat,_) -> Left (mkCFCatE (mkNewECat cat "Plus"))
|
||||
EIOpt (cat,_) -> Left (mkCFCatE (mkNewECat cat "Opt"))
|
||||
EITerm a -> Terminal a
|
||||
EINonTerm cat -> NonTerminal (mkCFCatE cat)
|
||||
EIStar (cat,_) -> NonTerminal (mkCFCatE (mkNewECat cat "Star"))
|
||||
EIPlus (cat,_) -> NonTerminal (mkCFCatE (mkNewECat cat "Plus"))
|
||||
EIOpt (cat,_) -> NonTerminal (mkCFCatE (mkNewECat cat "Opt"))
|
||||
|
||||
type NormERule = (ECat,[[EItem]]) -- disjunction of sequences of items
|
||||
|
||||
@@ -157,198 +143,10 @@ mkECat ints = ("C", ints)
|
||||
prECat (c,[]) = c
|
||||
prECat (c,ints) = c ++ "_" ++ prTList "_" (map show ints)
|
||||
|
||||
mkCFCatE :: ECat -> CFCat
|
||||
mkCFCatE :: ECat -> Cat
|
||||
mkCFCatE = prECat
|
||||
|
||||
updECat _ (c,[]) = (c,[])
|
||||
updECat ii (c,_) = (c,ii)
|
||||
|
||||
mkNewECat (c,ii) str = (c ++ str,ii)
|
||||
|
||||
------ parser for EBNF grammars
|
||||
|
||||
pEBNF :: String -> Err EBNF
|
||||
pEBNF = parseResultErr (longestOfMany (pJ pERule))
|
||||
|
||||
pERule :: Parser Char ERule
|
||||
pERule = pECat ... pJ (literals ":=" ||| literals "::=") +.. pERHS 0 ..+ jL ";"
|
||||
|
||||
pERHS :: Int -> Parser Char ERHS
|
||||
pERHS 0 = pTList "|" (pERHS 1) *** foldr1 EAlt
|
||||
pERHS 1 = longestOfMany (pJ (pERHS 2)) *** foldr ESeq EEmpty
|
||||
pERHS 2 = pERHS 3 ... pJ pUnaryEOp *** (\ (a,f) -> f a)
|
||||
pERHS 3 = pQuotedString *** ETerm
|
||||
||| pECat *** ENonTerm ||| pParenth (pERHS 0)
|
||||
|
||||
pUnaryEOp :: Parser Char (ERHS -> ERHS)
|
||||
pUnaryEOp =
|
||||
lits "*" <<< EStar ||| lits "+" <<< EPlus ||| lits "?" <<< EOpt ||| succeed id
|
||||
|
||||
pECat = pIdent *** (\c -> (c,[]))
|
||||
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Module : Parsers
|
||||
-- some parser combinators a la Wadler and Hutton.
|
||||
-- (only used in module "EBNF")
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
infixr 2 |||, +||
|
||||
infixr 3 ***
|
||||
infixr 5 .>.
|
||||
infixr 5 ...
|
||||
infixr 5 ....
|
||||
infixr 5 +..
|
||||
infixr 5 ..+
|
||||
infixr 6 |>
|
||||
infixr 3 <<<
|
||||
|
||||
|
||||
type Parser a b = [a] -> [(b,[a])]
|
||||
|
||||
parseResults :: Parser a b -> [a] -> [b]
|
||||
parseResults p s = [x | (x,r) <- p s, null r]
|
||||
|
||||
parseResultErr :: Show a => Parser a b -> [a] -> Err b
|
||||
parseResultErr p s = case parseResults p s of
|
||||
[x] -> return x
|
||||
[] -> case
|
||||
maximumBy (\x y -> compare (length y) (length x)) (s:[r | (_,r) <- p s]) of
|
||||
r -> Bad $ "\nno parse; reached" ++++ take 300 (show r)
|
||||
_ -> Bad "ambiguous"
|
||||
|
||||
(...) :: Parser a b -> Parser a c -> Parser a (b,c)
|
||||
(p ... q) s = [((x,y),r) | (x,t) <- p s, (y,r) <- q t]
|
||||
|
||||
(.>.) :: Parser a b -> (b -> Parser a c) -> Parser a c
|
||||
(p .>. f) s = [(c,r) | (x,t) <- p s, (c,r) <- f x t]
|
||||
|
||||
(|||) :: Parser a b -> Parser a b -> Parser a b
|
||||
(p ||| q) s = p s ++ q s
|
||||
|
||||
(+||) :: Parser a b -> Parser a b -> Parser a b
|
||||
p1 +|| p2 = take 1 . (p1 ||| p2)
|
||||
|
||||
literal :: (Eq a) => a -> Parser a a
|
||||
literal x (c:cs) = [(x,cs) | x == c]
|
||||
literal _ _ = []
|
||||
|
||||
(***) :: Parser a b -> (b -> c) -> Parser a c
|
||||
(p *** f) s = [(f x,r) | (x,r) <- p s]
|
||||
|
||||
succeed :: b -> Parser a b
|
||||
succeed v s = [(v,s)]
|
||||
|
||||
fails :: Parser a b
|
||||
fails s = []
|
||||
|
||||
(+..) :: Parser a b -> Parser a c -> Parser a c
|
||||
p1 +.. p2 = p1 ... p2 *** snd
|
||||
|
||||
(..+) :: Parser a b -> Parser a c -> Parser a b
|
||||
p1 ..+ p2 = p1 ... p2 *** fst
|
||||
|
||||
(<<<) :: Parser a b -> c -> Parser a c -- return
|
||||
p <<< v = p *** (\x -> v)
|
||||
|
||||
(|>) :: Parser a b -> (b -> Bool) -> Parser a b
|
||||
p |> b = p .>. (\x -> if b x then succeed x else fails)
|
||||
|
||||
many :: Parser a b -> Parser a [b]
|
||||
many p = (p ... many p *** uncurry (:)) +|| succeed []
|
||||
|
||||
some :: Parser a b -> Parser a [b]
|
||||
some p = (p ... many p) *** uncurry (:)
|
||||
|
||||
longestOfMany :: Parser a b -> Parser a [b]
|
||||
longestOfMany p = p .>. (\x -> longestOfMany p *** (x:)) +|| succeed []
|
||||
|
||||
closure :: (b -> Parser a b) -> (b -> Parser a b)
|
||||
closure p v = p v .>. closure p ||| succeed v
|
||||
|
||||
pJunk :: Parser Char String
|
||||
pJunk = longestOfMany (satisfy (\x -> elem x "\n\t "))
|
||||
|
||||
pJ :: Parser Char a -> Parser Char a
|
||||
pJ p = pJunk +.. p ..+ pJunk
|
||||
|
||||
pTList :: String -> Parser Char a -> Parser Char [a]
|
||||
pTList t p = p .... many (jL t +.. p) *** (\ (x,y) -> x:y) -- mod. AR 5/1/1999
|
||||
|
||||
pTJList :: String -> String -> Parser Char a -> Parser Char [a]
|
||||
pTJList t1 t2 p = p .... many (literals t1 +.. jL t2 +.. p) *** (uncurry (:))
|
||||
|
||||
pElem :: [String] -> Parser Char String
|
||||
pElem l = foldr (+||) fails (map literals l)
|
||||
|
||||
(....) :: Parser Char b -> Parser Char c -> Parser Char (b,c)
|
||||
p1 .... p2 = p1 ... pJunk +.. p2
|
||||
|
||||
item :: Parser a a
|
||||
item (c:cs) = [(c,cs)]
|
||||
item [] = []
|
||||
|
||||
satisfy :: (a -> Bool) -> Parser a a
|
||||
satisfy b = item |> b
|
||||
|
||||
literals :: (Eq a,Show a) => [a] -> Parser a [a]
|
||||
literals l = case l of
|
||||
[] -> succeed []
|
||||
a:l -> literal a ... literals l *** (\ (x,y) -> x:y)
|
||||
|
||||
lits :: (Eq a,Show a) => [a] -> Parser a [a]
|
||||
lits ts = literals ts
|
||||
|
||||
jL :: String -> Parser Char String
|
||||
jL = pJ . lits
|
||||
|
||||
pParenth :: Parser Char a -> Parser Char a
|
||||
pParenth p = literal '(' +.. pJunk +.. p ..+ pJunk ..+ literal ')'
|
||||
|
||||
-- | p,...,p
|
||||
pCommaList :: Parser Char a -> Parser Char [a]
|
||||
pCommaList p = pTList "," (pJ p)
|
||||
|
||||
-- | the same or nothing
|
||||
pOptCommaList :: Parser Char a -> Parser Char [a]
|
||||
pOptCommaList p = pCommaList p ||| succeed []
|
||||
|
||||
-- | (p,...,p), poss. empty
|
||||
pArgList :: Parser Char a -> Parser Char [a]
|
||||
pArgList p = pParenth (pCommaList p) ||| succeed []
|
||||
|
||||
-- | min. 2 args
|
||||
pArgList2 :: Parser Char a -> Parser Char [a]
|
||||
pArgList2 p = pParenth (p ... jL "," +.. pCommaList p) *** uncurry (:)
|
||||
|
||||
longestOfSome :: Parser a b -> Parser a [b]
|
||||
longestOfSome p = (p ... longestOfMany p) *** (\ (x,y) -> x:y)
|
||||
|
||||
pIdent :: Parser Char String
|
||||
pIdent = pLetter ... longestOfMany pAlphaPlusChar *** uncurry (:)
|
||||
where alphaPlusChar c = isAlphaNum c || c=='_' || c=='\''
|
||||
|
||||
pLetter, pDigit :: Parser Char Char
|
||||
pLetter = satisfy (`elem` (['A'..'Z'] ++ ['a'..'z'] ++
|
||||
['\192' .. '\255'])) -- no such in Char
|
||||
pDigit = satisfy isDigit
|
||||
|
||||
pLetters :: Parser Char String
|
||||
pLetters = longestOfSome pLetter
|
||||
|
||||
pAlphanum, pAlphaPlusChar :: Parser Char Char
|
||||
pAlphanum = pDigit ||| pLetter
|
||||
pAlphaPlusChar = pAlphanum ||| satisfy (`elem` "_'")
|
||||
|
||||
pQuotedString :: Parser Char String
|
||||
pQuotedString = literal '"' +.. pEndQuoted where
|
||||
pEndQuoted =
|
||||
literal '"' *** (const [])
|
||||
+|| (literal '\\' +.. item .>. \ c -> pEndQuoted *** (c:))
|
||||
+|| item .>. \ c -> pEndQuoted *** (c:)
|
||||
|
||||
pIntc :: Parser Char Int
|
||||
pIntc = some (satisfy numb) *** read
|
||||
where numb x = elem x ['0'..'9']
|
||||
|
||||
|
||||
Reference in New Issue
Block a user