From 1b8bc71b28997f7902d2809ce34254ac0168514f Mon Sep 17 00:00:00 2001 From: bringert Date: Sun, 7 Jan 2007 23:16:32 +0000 Subject: [PATCH] Fixed bug in SRG EBNF generation. Before it assumed that all variation came from variants, and overgenerated if this was not true. --- src/GF/Speech/RegExp.hs | 35 ++++++++++++++++++++++++++++++++++- src/GF/Speech/SRG.hs | 21 ++++++++++++--------- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/src/GF/Speech/RegExp.hs b/src/GF/Speech/RegExp.hs index 1eb6efa4d..6c787b714 100644 --- a/src/GF/Speech/RegExp.hs +++ b/src/GF/Speech/RegExp.hs @@ -2,7 +2,8 @@ module GF.Speech.RegExp (RE(..), epsilonRE, nullRE, isEpsilon, isNull, unionRE, concatRE, seqRE, - repeatRE, + repeatRE, minimizeRE, + mapRE, joinRE, dfa2re, prRE) where import Data.List @@ -85,6 +86,38 @@ finalRE fa = concatRE [repeatRE r1, r2, r3 = unionRE $ loops sF fa r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa +reverseRE :: RE a -> RE a +reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs +reverseRE (REUnion xs) = REUnion (map reverseRE xs) +reverseRE (RERepeat x) = RERepeat (reverseRE x) +reverseRE x = x + +minimizeRE :: Ord a => RE a -> RE a +minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward + +mergeForward :: Ord a => RE a -> RE a +mergeForward (REUnion xs) = + unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)] +mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)] +mergeForward (RERepeat r) = repeatRE (mergeForward r) +mergeForward r = r + +firstRE :: RE a -> (RE a, RE a) +firstRE (REConcat (x:xs)) = (x, REConcat xs) +firstRE r = (r,epsilonRE) + +mapRE :: (a -> b) -> RE a -> RE b +mapRE f (REConcat xs) = REConcat (map (mapRE f) xs) +mapRE f (REUnion xs) = REUnion (map (mapRE f) xs) +mapRE f (RERepeat xs) = RERepeat (mapRE f xs) +mapRE f (RESymbol s) = RESymbol (f s) + +joinRE :: RE (RE a) -> RE a +joinRE (REConcat xs) = REConcat (map joinRE xs) +joinRE (REUnion xs) = REUnion (map joinRE xs) +joinRE (RERepeat xs) = RERepeat (joinRE xs) +joinRE (RESymbol ss) = ss + -- Debugging prRE :: Show a => RE a -> String diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs index cc03cdca6..e0a347480 100644 --- a/src/GF/Speech/SRG.hs +++ b/src/GF/Speech/SRG.hs @@ -190,17 +190,20 @@ ebnfSRGAlts alts = [EBnfSRGAlt p n (ebnfSRGItem sss) ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem ebnfSRGItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats) --- ^ Merges a list of right-hand sides which all have the same +-- | Merges a list of right-hand sides which all have the same -- sequence of non-terminals. mergeItems :: [[Symbol SRGNT Token]] -> EBnfSRGItem ---mergeItems = unionRE . map seqRE -mergeItems [] = nullRE -mergeItems sss | any null rss = t - | otherwise = concatRE [t,seqRE (head cs), mergeItems nss] - where (tss,rss) = unzip $ map (span isToken) sss - t = unionRE (map seqRE tss) - (cs,nss) = unzip $ map (splitAt 1) rss - isToken = symbol (const False) (const True) +mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens + +groupTokens :: [Symbol SRGNT Token] -> [Symbol SRGNT [Token]] +groupTokens [] = [] +groupTokens (Tok t:ss) = case groupTokens ss of + Tok ts:ss' -> Tok (t:ts):ss' + ss' -> Tok [t]:ss' +groupTokens (Cat c:ss) = Cat c : groupTokens ss + +ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE (Symbol SRGNT Token) +ungroupTokens = joinRE . mapRE (symbol (RESymbol . Cat) (REConcat . map (RESymbol . Tok))) -- -- * Utilities for building and printing SRGs