Some refactorings needed for recursion removal.

This commit is contained in:
bringert
2007-06-25 13:38:40 +00:00
parent 33d7b2567b
commit 815cc5a7eb
4 changed files with 73 additions and 59 deletions

View File

@@ -68,33 +68,6 @@ makeSimpleRegular opts s = makeRegular $ cfgToCFRules s
preprocess = fix (topDownFilter start . bottomUpFilter) preprocess = fix (topDownFilter start . bottomUpFilter)
. removeCycles . removeCycles
--
-- * Approximate context-free grammars with regular grammars.
--
-- Use the transformation algorithm from \"Regular Approximation of Context-free
-- Grammars through Approximation\", Mohri and Nederhof, 2000
-- to create an over-generating regular frammar for a context-free
-- grammar
makeRegular :: CFRules -> CFRules
makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
where trSet cs | allXLinear cs rs = rs
| otherwise = concatMap handleCat csl
where csl = Set.toList cs
rs = catSetRules g cs
handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
++ concatMap (makeRightLinearRules c) (catRules g c)
where c' = newCat c
makeRightLinearRules b' (CFRule c ss n) =
case ys of
[] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left
(Cat b:zs) -> newRule b' (xs ++ [Cat b]) n
++ makeRightLinearRules (newCat b) (CFRule c zs n)
where (xs,ys) = break (`catElem` cs) ss
-- don't add rules on the form A -> A
newRule c rhs n | rhs == [Cat c] = []
| otherwise = [CFRule c rhs n]
newCat c = c ++ "$"
-- --
-- * Compile strongly regular grammars to NFAs -- * Compile strongly regular grammars to NFAs
@@ -300,26 +273,3 @@ addStatesForCats :: Set Cat_ -> NFA t -> (NFA t, Map Cat_ State)
addStatesForCats cs fa = (fa', m) addStatesForCats cs fa = (fa', m)
where (fa', ns) = newStates (replicate (Set.size cs) ()) fa where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
m = Map.fromList (zip (Set.toList cs) (map fst ns)) m = Map.fromList (zip (Set.toList cs) (map fst ns))
ruleIsNonRecursive :: Set Cat_ -> CFRule_ -> Bool
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
noCatsInSet :: Set Cat_ -> [Symbol Cat_ t] -> Bool
noCatsInSet cs = not . any (`catElem` cs)
-- | Check if all the rules are right-linear, or all the rules are
-- left-linear, with respect to given categories.
allXLinear :: Set Cat_ -> [CFRule_] -> Bool
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-- | Checks if a context-free rule is right-linear.
isRightLinear :: Set Cat_ -- ^ The categories to consider
-> CFRule_ -- ^ The rule to check for right-linearity
-> Bool
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
-- | Checks if a context-free rule is left-linear.
isLeftLinear :: Set Cat_ -- ^ The categories to consider
-> CFRule_ -- ^ The rule to check for right-linearity
-> Bool
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs

View File

@@ -3,7 +3,8 @@ module GF.Speech.RegExp (RE(..),
isEpsilon, isNull, isEpsilon, isNull,
unionRE, concatRE, seqRE, unionRE, concatRE, seqRE,
repeatRE, minimizeRE, repeatRE, minimizeRE,
mapRE, joinRE, mapRE, mapRE', joinRE,
symbolsRE,
dfa2re, prRE) where dfa2re, prRE) where
import Data.List import Data.List
@@ -107,10 +108,13 @@ firstRE (REConcat (x:xs)) = (x, REConcat xs)
firstRE r = (r,epsilonRE) firstRE r = (r,epsilonRE)
mapRE :: (a -> b) -> RE a -> RE b mapRE :: (a -> b) -> RE a -> RE b
mapRE f (REConcat xs) = REConcat (map (mapRE f) xs) mapRE f = mapRE' (RESymbol . f)
mapRE f (REUnion xs) = REUnion (map (mapRE f) xs)
mapRE f (RERepeat xs) = RERepeat (mapRE f xs) mapRE' :: (a -> RE b) -> RE a -> RE b
mapRE f (RESymbol s) = RESymbol (f s) mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs)
mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs)
mapRE' f (RERepeat x) = RERepeat (mapRE' f x)
mapRE' f (RESymbol s) = f s
joinRE :: RE (RE a) -> RE a joinRE :: RE (RE a) -> RE a
joinRE (REConcat xs) = REConcat (map joinRE xs) joinRE (REConcat xs) = REConcat (map joinRE xs)
@@ -118,6 +122,12 @@ joinRE (REUnion xs) = REUnion (map joinRE xs)
joinRE (RERepeat xs) = RERepeat (joinRE xs) joinRE (RERepeat xs) = RERepeat (joinRE xs)
joinRE (RESymbol ss) = ss joinRE (RESymbol ss) = ss
symbolsRE :: RE a -> [a]
symbolsRE (REConcat xs) = concatMap symbolsRE xs
symbolsRE (REUnion xs) = concatMap symbolsRE xs
symbolsRE (RERepeat x) = symbolsRE x
symbolsRE (RESymbol x) = [x]
-- Debugging -- Debugging
prRE :: RE String -> String prRE :: RE String -> String

View File

@@ -49,7 +49,7 @@ import Data.Set (Set)
import qualified Data.Set as Set import qualified Data.Set as Set
data SRG = SRG { grammarName :: String -- ^ grammar name data SRG = SRG { grammarName :: String -- ^ grammar name
, startCat :: String -- ^ start category name , startCat :: SRGCat -- ^ start category name
, origStartCat :: String -- ^ original start category name , origStartCat :: String -- ^ original start category name
, grammarLanguage :: Maybe String -- ^ The language for which the grammar , grammarLanguage :: Maybe String -- ^ The language for which the grammar
-- is intended, e.g. en-UK -- is intended, e.g. en-UK
@@ -61,7 +61,7 @@ data SRGRule = SRGRule SRGCat String [SRGAlt] -- ^ SRG category name, original c
-- and productions -- and productions
deriving (Eq,Show) deriving (Eq,Show)
-- | maybe a probability, a rule name and a list of symbols -- | maybe a probability, a rule name and an EBNF right-hand side
data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem
deriving (Eq,Show) deriving (Eq,Show)
@@ -163,6 +163,8 @@ srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg,
srgItem :: [[Symbol SRGNT Token]] -> SRGItem srgItem :: [[Symbol SRGNT Token]] -> SRGItem
srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats) srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
-- non-optimizing version:
--srgItem = unionRE . map seqRE
-- | Merges a list of right-hand sides which all have the same -- | Merges a list of right-hand sides which all have the same
-- sequence of non-terminals. -- sequence of non-terminals.

View File

@@ -257,6 +257,33 @@ mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transit
allCats = map fst g allCats = map fst g
refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation
--
-- * Approximate context-free grammars with regular grammars.
--
-- Use the transformation algorithm from \"Regular Approximation of Context-free
-- Grammars through Approximation\", Mohri and Nederhof, 2000
-- to create an over-generating regular frammar for a context-free
-- grammar
makeRegular :: CFRules -> CFRules
makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
where trSet cs | allXLinear cs rs = rs
| otherwise = concatMap handleCat csl
where csl = Set.toList cs
rs = catSetRules g cs
handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
++ concatMap (makeRightLinearRules c) (catRules g c)
where c' = newCat c
makeRightLinearRules b' (CFRule c ss n) =
case ys of
[] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left
(Cat b:zs) -> newRule b' (xs ++ [Cat b]) n
++ makeRightLinearRules (newCat b) (CFRule c zs n)
where (xs,ys) = break (`catElem` cs) ss
-- don't add rules on the form A -> A
newRule c rhs n | rhs == [Cat c] = []
| otherwise = [CFRule c rhs n]
newCat c = c ++ "$"
-- --
-- * CFG rule utilities -- * CFG rule utilities
@@ -292,7 +319,7 @@ ruleFun (CFRule _ _ t) = f t
f _ = IC "" f _ = IC ""
-- | Checks if a symbol is a non-terminal of one of the given categories. -- | Checks if a symbol is a non-terminal of one of the given categories.
catElem :: Symbol Cat_ t -> Set Cat_ -> Bool catElem :: Ord c => Symbol c t -> Set c -> Bool
catElem s cs = symbol (`Set.member` cs) (const False) s catElem s cs = symbol (`Set.member` cs) (const False) s
-- | Check if any of the categories used on the right-hand side -- | Check if any of the categories used on the right-hand side
@@ -301,4 +328,29 @@ anyUsedBy :: Eq c => [c] -> CFRule c n t -> Bool
anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss) anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
mkCFTerm :: String -> CFTerm mkCFTerm :: String -> CFTerm
mkCFTerm n = CFObj (IC n) [] mkCFTerm n = CFObj (IC n) []
ruleIsNonRecursive :: Ord c => Set c -> CFRule c n t -> Bool
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
noCatsInSet cs = not . any (`catElem` cs)
-- | Check if all the rules are right-linear, or all the rules are
-- left-linear, with respect to given categories.
allXLinear :: Ord c => Set c -> [CFRule c n t] -> Bool
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-- | Checks if a context-free rule is right-linear.
isRightLinear :: Ord c =>
Set c -- ^ The categories to consider
-> CFRule c n t -- ^ The rule to check for right-linearity
-> Bool
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
-- | Checks if a context-free rule is left-linear.
isLeftLinear :: Ord c =>
Set c -- ^ The categories to consider
-> CFRule c n t -- ^ The rule to check for left-linearity
-> Bool
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs