Fixed bug in SRG EBNF generation. Before it assumed that all variation came from variants, and overgenerated if this was not true.

This commit is contained in:
bringert
2007-01-07 23:16:32 +00:00
parent f6fe45ac5a
commit 1f3cb2546a
2 changed files with 46 additions and 10 deletions

View File

@@ -2,7 +2,8 @@ module GF.Speech.RegExp (RE(..),
epsilonRE, nullRE, epsilonRE, nullRE,
isEpsilon, isNull, isEpsilon, isNull,
unionRE, concatRE, seqRE, unionRE, concatRE, seqRE,
repeatRE, repeatRE, minimizeRE,
mapRE, joinRE,
dfa2re, prRE) where dfa2re, prRE) where
import Data.List import Data.List
@@ -85,6 +86,38 @@ finalRE fa = concatRE [repeatRE r1, r2,
r3 = unionRE $ loops sF fa r3 = unionRE $ loops sF fa
r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
reverseRE :: RE a -> RE a
reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs
reverseRE (REUnion xs) = REUnion (map reverseRE xs)
reverseRE (RERepeat x) = RERepeat (reverseRE x)
reverseRE x = x
minimizeRE :: Ord a => RE a -> RE a
minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward
mergeForward :: Ord a => RE a -> RE a
mergeForward (REUnion xs) =
unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)]
mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)]
mergeForward (RERepeat r) = repeatRE (mergeForward r)
mergeForward r = r
firstRE :: RE a -> (RE a, RE a)
firstRE (REConcat (x:xs)) = (x, REConcat xs)
firstRE r = (r,epsilonRE)
mapRE :: (a -> b) -> RE a -> RE b
mapRE f (REConcat xs) = REConcat (map (mapRE f) xs)
mapRE f (REUnion xs) = REUnion (map (mapRE f) xs)
mapRE f (RERepeat xs) = RERepeat (mapRE f xs)
mapRE f (RESymbol s) = RESymbol (f s)
joinRE :: RE (RE a) -> RE a
joinRE (REConcat xs) = REConcat (map joinRE xs)
joinRE (REUnion xs) = REUnion (map joinRE xs)
joinRE (RERepeat xs) = RERepeat (joinRE xs)
joinRE (RESymbol ss) = ss
-- Debugging -- Debugging
prRE :: Show a => RE a -> String prRE :: Show a => RE a -> String

View File

@@ -190,17 +190,20 @@ ebnfSRGAlts alts = [EBnfSRGAlt p n (ebnfSRGItem sss)
ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem
ebnfSRGItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats) ebnfSRGItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
-- ^ Merges a list of right-hand sides which all have the same -- | Merges a list of right-hand sides which all have the same
-- sequence of non-terminals. -- sequence of non-terminals.
mergeItems :: [[Symbol SRGNT Token]] -> EBnfSRGItem mergeItems :: [[Symbol SRGNT Token]] -> EBnfSRGItem
--mergeItems = unionRE . map seqRE mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens
mergeItems [] = nullRE
mergeItems sss | any null rss = t groupTokens :: [Symbol SRGNT Token] -> [Symbol SRGNT [Token]]
| otherwise = concatRE [t,seqRE (head cs), mergeItems nss] groupTokens [] = []
where (tss,rss) = unzip $ map (span isToken) sss groupTokens (Tok t:ss) = case groupTokens ss of
t = unionRE (map seqRE tss) Tok ts:ss' -> Tok (t:ts):ss'
(cs,nss) = unzip $ map (splitAt 1) rss ss' -> Tok [t]:ss'
isToken = symbol (const False) (const True) groupTokens (Cat c:ss) = Cat c : groupTokens ss
ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE (Symbol SRGNT Token)
ungroupTokens = joinRE . mapRE (symbol (RESymbol . Cat) (REConcat . map (RESymbol . Tok)))
-- --
-- * Utilities for building and printing SRGs -- * Utilities for building and printing SRGs