Fixed bug in SRG EBNF generation. Before it assumed that all variation came from variants, and overgenerated if this was not true.

This commit is contained in:
bringert
2007-01-07 23:16:32 +00:00
parent d18ccbf02e
commit 1b8bc71b28
2 changed files with 46 additions and 10 deletions

View File

@@ -2,7 +2,8 @@ module GF.Speech.RegExp (RE(..),
epsilonRE, nullRE,
isEpsilon, isNull,
unionRE, concatRE, seqRE,
repeatRE,
repeatRE, minimizeRE,
mapRE, joinRE,
dfa2re, prRE) where
import Data.List
@@ -85,6 +86,38 @@ finalRE fa = concatRE [repeatRE r1, r2,
r3 = unionRE $ loops sF fa
r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
reverseRE :: RE a -> RE a
reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs
reverseRE (REUnion xs) = REUnion (map reverseRE xs)
reverseRE (RERepeat x) = RERepeat (reverseRE x)
reverseRE x = x
minimizeRE :: Ord a => RE a -> RE a
minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward
mergeForward :: Ord a => RE a -> RE a
mergeForward (REUnion xs) =
unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)]
mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)]
mergeForward (RERepeat r) = repeatRE (mergeForward r)
mergeForward r = r
firstRE :: RE a -> (RE a, RE a)
firstRE (REConcat (x:xs)) = (x, REConcat xs)
firstRE r = (r,epsilonRE)
mapRE :: (a -> b) -> RE a -> RE b
mapRE f (REConcat xs) = REConcat (map (mapRE f) xs)
mapRE f (REUnion xs) = REUnion (map (mapRE f) xs)
mapRE f (RERepeat xs) = RERepeat (mapRE f xs)
mapRE f (RESymbol s) = RESymbol (f s)
joinRE :: RE (RE a) -> RE a
joinRE (REConcat xs) = REConcat (map joinRE xs)
joinRE (REUnion xs) = REUnion (map joinRE xs)
joinRE (RERepeat xs) = RERepeat (joinRE xs)
joinRE (RESymbol ss) = ss
-- Debugging
prRE :: Show a => RE a -> String

View File

@@ -190,17 +190,20 @@ ebnfSRGAlts alts = [EBnfSRGAlt p n (ebnfSRGItem sss)
ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem
ebnfSRGItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
-- ^ Merges a list of right-hand sides which all have the same
-- | Merges a list of right-hand sides which all have the same
-- sequence of non-terminals.
mergeItems :: [[Symbol SRGNT Token]] -> EBnfSRGItem
--mergeItems = unionRE . map seqRE
mergeItems [] = nullRE
mergeItems sss | any null rss = t
| otherwise = concatRE [t,seqRE (head cs), mergeItems nss]
where (tss,rss) = unzip $ map (span isToken) sss
t = unionRE (map seqRE tss)
(cs,nss) = unzip $ map (splitAt 1) rss
isToken = symbol (const False) (const True)
mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens
groupTokens :: [Symbol SRGNT Token] -> [Symbol SRGNT [Token]]
groupTokens [] = []
groupTokens (Tok t:ss) = case groupTokens ss of
Tok ts:ss' -> Tok (t:ts):ss'
ss' -> Tok [t]:ss'
groupTokens (Cat c:ss) = Cat c : groupTokens ss
ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE (Symbol SRGNT Token)
ungroupTokens = joinRE . mapRE (symbol (RESymbol . Cat) (REConcat . map (RESymbol . Tok)))
--
-- * Utilities for building and printing SRGs