mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 11:42:49 -06:00
Fixed bug in SRG EBNF generation. Before it assumed that all variation came from variants, and overgenerated if this was not true.
This commit is contained in:
@@ -2,7 +2,8 @@ module GF.Speech.RegExp (RE(..),
|
|||||||
epsilonRE, nullRE,
|
epsilonRE, nullRE,
|
||||||
isEpsilon, isNull,
|
isEpsilon, isNull,
|
||||||
unionRE, concatRE, seqRE,
|
unionRE, concatRE, seqRE,
|
||||||
repeatRE,
|
repeatRE, minimizeRE,
|
||||||
|
mapRE, joinRE,
|
||||||
dfa2re, prRE) where
|
dfa2re, prRE) where
|
||||||
|
|
||||||
import Data.List
|
import Data.List
|
||||||
@@ -85,6 +86,38 @@ finalRE fa = concatRE [repeatRE r1, r2,
|
|||||||
r3 = unionRE $ loops sF fa
|
r3 = unionRE $ loops sF fa
|
||||||
r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
|
r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
|
||||||
|
|
||||||
|
reverseRE :: RE a -> RE a
|
||||||
|
reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs
|
||||||
|
reverseRE (REUnion xs) = REUnion (map reverseRE xs)
|
||||||
|
reverseRE (RERepeat x) = RERepeat (reverseRE x)
|
||||||
|
reverseRE x = x
|
||||||
|
|
||||||
|
minimizeRE :: Ord a => RE a -> RE a
|
||||||
|
minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward
|
||||||
|
|
||||||
|
mergeForward :: Ord a => RE a -> RE a
|
||||||
|
mergeForward (REUnion xs) =
|
||||||
|
unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)]
|
||||||
|
mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)]
|
||||||
|
mergeForward (RERepeat r) = repeatRE (mergeForward r)
|
||||||
|
mergeForward r = r
|
||||||
|
|
||||||
|
firstRE :: RE a -> (RE a, RE a)
|
||||||
|
firstRE (REConcat (x:xs)) = (x, REConcat xs)
|
||||||
|
firstRE r = (r,epsilonRE)
|
||||||
|
|
||||||
|
mapRE :: (a -> b) -> RE a -> RE b
|
||||||
|
mapRE f (REConcat xs) = REConcat (map (mapRE f) xs)
|
||||||
|
mapRE f (REUnion xs) = REUnion (map (mapRE f) xs)
|
||||||
|
mapRE f (RERepeat xs) = RERepeat (mapRE f xs)
|
||||||
|
mapRE f (RESymbol s) = RESymbol (f s)
|
||||||
|
|
||||||
|
joinRE :: RE (RE a) -> RE a
|
||||||
|
joinRE (REConcat xs) = REConcat (map joinRE xs)
|
||||||
|
joinRE (REUnion xs) = REUnion (map joinRE xs)
|
||||||
|
joinRE (RERepeat xs) = RERepeat (joinRE xs)
|
||||||
|
joinRE (RESymbol ss) = ss
|
||||||
|
|
||||||
-- Debugging
|
-- Debugging
|
||||||
|
|
||||||
prRE :: Show a => RE a -> String
|
prRE :: Show a => RE a -> String
|
||||||
|
|||||||
@@ -190,17 +190,20 @@ ebnfSRGAlts alts = [EBnfSRGAlt p n (ebnfSRGItem sss)
|
|||||||
ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem
|
ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem
|
||||||
ebnfSRGItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
|
ebnfSRGItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
|
||||||
|
|
||||||
-- ^ Merges a list of right-hand sides which all have the same
|
-- | Merges a list of right-hand sides which all have the same
|
||||||
-- sequence of non-terminals.
|
-- sequence of non-terminals.
|
||||||
mergeItems :: [[Symbol SRGNT Token]] -> EBnfSRGItem
|
mergeItems :: [[Symbol SRGNT Token]] -> EBnfSRGItem
|
||||||
--mergeItems = unionRE . map seqRE
|
mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens
|
||||||
mergeItems [] = nullRE
|
|
||||||
mergeItems sss | any null rss = t
|
groupTokens :: [Symbol SRGNT Token] -> [Symbol SRGNT [Token]]
|
||||||
| otherwise = concatRE [t,seqRE (head cs), mergeItems nss]
|
groupTokens [] = []
|
||||||
where (tss,rss) = unzip $ map (span isToken) sss
|
groupTokens (Tok t:ss) = case groupTokens ss of
|
||||||
t = unionRE (map seqRE tss)
|
Tok ts:ss' -> Tok (t:ts):ss'
|
||||||
(cs,nss) = unzip $ map (splitAt 1) rss
|
ss' -> Tok [t]:ss'
|
||||||
isToken = symbol (const False) (const True)
|
groupTokens (Cat c:ss) = Cat c : groupTokens ss
|
||||||
|
|
||||||
|
ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE (Symbol SRGNT Token)
|
||||||
|
ungroupTokens = joinRE . mapRE (symbol (RESymbol . Cat) (REConcat . map (RESymbol . Tok)))
|
||||||
|
|
||||||
--
|
--
|
||||||
-- * Utilities for building and printing SRGs
|
-- * Utilities for building and printing SRGs
|
||||||
|
|||||||
Reference in New Issue
Block a user