From 2b63a895690e6f4eb57c0a1b95692b640b9d9e2c Mon Sep 17 00:00:00 2001 From: bringert Date: Mon, 25 Jun 2007 13:38:40 +0000 Subject: [PATCH] Some refactorings needed for recursion removal. --- src/GF/Speech/CFGToFiniteState.hs | 50 --------------------------- src/GF/Speech/RegExp.hs | 20 ++++++++--- src/GF/Speech/SRG.hs | 6 ++-- src/GF/Speech/TransformCFG.hs | 56 +++++++++++++++++++++++++++++-- 4 files changed, 73 insertions(+), 59 deletions(-) diff --git a/src/GF/Speech/CFGToFiniteState.hs b/src/GF/Speech/CFGToFiniteState.hs index efc4c562e..a8eb4e1de 100644 --- a/src/GF/Speech/CFGToFiniteState.hs +++ b/src/GF/Speech/CFGToFiniteState.hs @@ -68,33 +68,6 @@ makeSimpleRegular opts s = makeRegular $ cfgToCFRules s preprocess = fix (topDownFilter start . bottomUpFilter) . removeCycles --- --- * Approximate context-free grammars with regular grammars. --- - --- Use the transformation algorithm from \"Regular Approximation of Context-free --- Grammars through Approximation\", Mohri and Nederhof, 2000 --- to create an over-generating regular frammar for a context-free --- grammar -makeRegular :: CFRules -> CFRules -makeRegular g = groupProds $ concatMap trSet (mutRecCats True g) - where trSet cs | allXLinear cs rs = rs - | otherwise = concatMap handleCat csl - where csl = Set.toList cs - rs = catSetRules g cs - handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e - ++ concatMap (makeRightLinearRules c) (catRules g c) - where c' = newCat c - makeRightLinearRules b' (CFRule c ss n) = - case ys of - [] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left - (Cat b:zs) -> newRule b' (xs ++ [Cat b]) n - ++ makeRightLinearRules (newCat b) (CFRule c zs n) - where (xs,ys) = break (`catElem` cs) ss - -- don't add rules on the form A -> A - newRule c rhs n | rhs == [Cat c] = [] - | otherwise = [CFRule c rhs n] - newCat c = c ++ "$" -- -- * Compile strongly regular grammars to NFAs @@ -300,26 +273,3 @@ addStatesForCats :: Set Cat_ -> NFA t -> (NFA t, Map Cat_ State) addStatesForCats cs fa = (fa', m) where (fa', ns) = newStates (replicate (Set.size cs) ()) fa m = Map.fromList (zip (Set.toList cs) (map fst ns)) - -ruleIsNonRecursive :: Set Cat_ -> CFRule_ -> Bool -ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs - -noCatsInSet :: Set Cat_ -> [Symbol Cat_ t] -> Bool -noCatsInSet cs = not . any (`catElem` cs) - --- | Check if all the rules are right-linear, or all the rules are --- left-linear, with respect to given categories. -allXLinear :: Set Cat_ -> [CFRule_] -> Bool -allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs - --- | Checks if a context-free rule is right-linear. -isRightLinear :: Set Cat_ -- ^ The categories to consider - -> CFRule_ -- ^ The rule to check for right-linearity - -> Bool -isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs - --- | Checks if a context-free rule is left-linear. -isLeftLinear :: Set Cat_ -- ^ The categories to consider - -> CFRule_ -- ^ The rule to check for right-linearity - -> Bool -isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs diff --git a/src/GF/Speech/RegExp.hs b/src/GF/Speech/RegExp.hs index 120a43d26..1842780ee 100644 --- a/src/GF/Speech/RegExp.hs +++ b/src/GF/Speech/RegExp.hs @@ -3,7 +3,8 @@ module GF.Speech.RegExp (RE(..), isEpsilon, isNull, unionRE, concatRE, seqRE, repeatRE, minimizeRE, - mapRE, joinRE, + mapRE, mapRE', joinRE, + symbolsRE, dfa2re, prRE) where import Data.List @@ -107,10 +108,13 @@ firstRE (REConcat (x:xs)) = (x, REConcat xs) firstRE r = (r,epsilonRE) mapRE :: (a -> b) -> RE a -> RE b -mapRE f (REConcat xs) = REConcat (map (mapRE f) xs) -mapRE f (REUnion xs) = REUnion (map (mapRE f) xs) -mapRE f (RERepeat xs) = RERepeat (mapRE f xs) -mapRE f (RESymbol s) = RESymbol (f s) +mapRE f = mapRE' (RESymbol . f) + +mapRE' :: (a -> RE b) -> RE a -> RE b +mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs) +mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs) +mapRE' f (RERepeat x) = RERepeat (mapRE' f x) +mapRE' f (RESymbol s) = f s joinRE :: RE (RE a) -> RE a joinRE (REConcat xs) = REConcat (map joinRE xs) @@ -118,6 +122,12 @@ joinRE (REUnion xs) = REUnion (map joinRE xs) joinRE (RERepeat xs) = RERepeat (joinRE xs) joinRE (RESymbol ss) = ss +symbolsRE :: RE a -> [a] +symbolsRE (REConcat xs) = concatMap symbolsRE xs +symbolsRE (REUnion xs) = concatMap symbolsRE xs +symbolsRE (RERepeat x) = symbolsRE x +symbolsRE (RESymbol x) = [x] + -- Debugging prRE :: RE String -> String diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs index 8370f130a..43969ab0d 100644 --- a/src/GF/Speech/SRG.hs +++ b/src/GF/Speech/SRG.hs @@ -49,7 +49,7 @@ import Data.Set (Set) import qualified Data.Set as Set data SRG = SRG { grammarName :: String -- ^ grammar name - , startCat :: String -- ^ start category name + , startCat :: SRGCat -- ^ start category name , origStartCat :: String -- ^ original start category name , grammarLanguage :: Maybe String -- ^ The language for which the grammar -- is intended, e.g. en-UK @@ -61,7 +61,7 @@ data SRGRule = SRGRule SRGCat String [SRGAlt] -- ^ SRG category name, original c -- and productions deriving (Eq,Show) --- | maybe a probability, a rule name and a list of symbols +-- | maybe a probability, a rule name and an EBNF right-hand side data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem deriving (Eq,Show) @@ -163,6 +163,8 @@ srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg, srgItem :: [[Symbol SRGNT Token]] -> SRGItem srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats) +-- non-optimizing version: +--srgItem = unionRE . map seqRE -- | Merges a list of right-hand sides which all have the same -- sequence of non-terminals. diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs index a94cf3817..c640caa0f 100644 --- a/src/GF/Speech/TransformCFG.hs +++ b/src/GF/Speech/TransformCFG.hs @@ -257,6 +257,33 @@ mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transit allCats = map fst g refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation +-- +-- * Approximate context-free grammars with regular grammars. +-- + +-- Use the transformation algorithm from \"Regular Approximation of Context-free +-- Grammars through Approximation\", Mohri and Nederhof, 2000 +-- to create an over-generating regular frammar for a context-free +-- grammar +makeRegular :: CFRules -> CFRules +makeRegular g = groupProds $ concatMap trSet (mutRecCats True g) + where trSet cs | allXLinear cs rs = rs + | otherwise = concatMap handleCat csl + where csl = Set.toList cs + rs = catSetRules g cs + handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e + ++ concatMap (makeRightLinearRules c) (catRules g c) + where c' = newCat c + makeRightLinearRules b' (CFRule c ss n) = + case ys of + [] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left + (Cat b:zs) -> newRule b' (xs ++ [Cat b]) n + ++ makeRightLinearRules (newCat b) (CFRule c zs n) + where (xs,ys) = break (`catElem` cs) ss + -- don't add rules on the form A -> A + newRule c rhs n | rhs == [Cat c] = [] + | otherwise = [CFRule c rhs n] + newCat c = c ++ "$" -- -- * CFG rule utilities @@ -292,7 +319,7 @@ ruleFun (CFRule _ _ t) = f t f _ = IC "" -- | Checks if a symbol is a non-terminal of one of the given categories. -catElem :: Symbol Cat_ t -> Set Cat_ -> Bool +catElem :: Ord c => Symbol c t -> Set c -> Bool catElem s cs = symbol (`Set.member` cs) (const False) s -- | Check if any of the categories used on the right-hand side @@ -301,4 +328,29 @@ anyUsedBy :: Eq c => [c] -> CFRule c n t -> Bool anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss) mkCFTerm :: String -> CFTerm -mkCFTerm n = CFObj (IC n) [] \ No newline at end of file +mkCFTerm n = CFObj (IC n) [] + +ruleIsNonRecursive :: Ord c => Set c -> CFRule c n t -> Bool +ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs + +noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool +noCatsInSet cs = not . any (`catElem` cs) + +-- | Check if all the rules are right-linear, or all the rules are +-- left-linear, with respect to given categories. +allXLinear :: Ord c => Set c -> [CFRule c n t] -> Bool +allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs + +-- | Checks if a context-free rule is right-linear. +isRightLinear :: Ord c => + Set c -- ^ The categories to consider + -> CFRule c n t -- ^ The rule to check for right-linearity + -> Bool +isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs + +-- | Checks if a context-free rule is left-linear. +isLeftLinear :: Ord c => + Set c -- ^ The categories to consider + -> CFRule c n t -- ^ The rule to check for left-linearity + -> Bool +isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs