From 29ba681242acd62a28e2facdad733619bf8d994f Mon Sep 17 00:00:00 2001 From: bringert Date: Thu, 22 Dec 2005 18:04:05 +0000 Subject: [PATCH] Some tracing and formatting stuff looking for the the stack overflow problem in the FA generation. --- src/GF/Speech/CFGToFiniteState.hs | 128 ++++++++++++++++-------------- src/GF/Speech/PrFA.hs | 5 +- 2 files changed, 71 insertions(+), 62 deletions(-) diff --git a/src/GF/Speech/CFGToFiniteState.hs b/src/GF/Speech/CFGToFiniteState.hs index 405e4324e..89ec88872 100644 --- a/src/GF/Speech/CFGToFiniteState.hs +++ b/src/GF/Speech/CFGToFiniteState.hs @@ -27,9 +27,13 @@ import GF.Speech.FiniteState import GF.Speech.Relation import GF.Speech.TransformCFG +import Debug.Trace + cfgToFA :: Options -> CGrammar -> DFA String cfgToFA opts = minimize . compileAutomaton start . makeSimpleRegular +--cfgToFA opts = trfa "minimal" . minimize . trfa "initial" . compileAutomaton start . makeSimpleRegular where start = getStartCat opts + trfa s fa = trace (s ++ ", states: " ++ show (length (states fa)) ++ ", transitions: " ++ show (length (transitions fa))) fa makeSimpleRegular :: CGrammar -> CFRules makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgToCFRules @@ -41,74 +45,76 @@ makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgTo makeRegular :: CFRules -> CFRules makeRegular g = groupProds $ concatMap trSet (mutRecCats True g) where trSet cs | allXLinear cs rs = rs - | otherwise = concatMap handleCat cs - where rs = catSetRules g cs - handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e - ++ concatMap (makeRightLinearRules c) (catRules g c) - where c' = newCat c - makeRightLinearRules b' (CFRule c ss n) = - case ys of - [] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left - (Cat b:zs) -> CFRule b' (xs ++ [Cat b]) n - : makeRightLinearRules (newCat b) (CFRule c zs n) - where (xs,ys) = break (`catElem` cs) ss - newCat c = c ++ "$" + | otherwise = concatMap handleCat cs + where rs = catSetRules g cs + handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e + ++ concatMap (makeRightLinearRules c) (catRules g c) + where c' = newCat c + makeRightLinearRules b' (CFRule c ss n) = + case ys of + [] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left + (Cat b:zs) -> CFRule b' (xs ++ [Cat b]) n + : makeRightLinearRules (newCat b) (CFRule c zs n) + where (xs,ys) = break (`catElem` cs) ss + newCat c = c ++ "$" -- | Get the sets of mutually recursive non-terminals for a grammar. mutRecCats :: Bool -- ^ If true, all categories will be in some set. -- If false, only recursive categories will be included. - -> CFRules -> [[Cat_]] + -> CFRules -> [[Cat_]] mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss] - allCats = map fst g - refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation + allCats = map fst g + refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation -- Convert a strongly regular grammar to a finite automaton. compileAutomaton :: Cat_ -- ^ Start category - -> CFRules - -> NFA Token + -> CFRules + -> NFA Token compileAutomaton start g = make_fa s [Cat start] f fa'' - where fa = newFA () - s = startState fa - (fa',f) = newState () fa - fa'' = addFinalState f fa' - ns = mutRecCats False g - -- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", - -- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997. - make_fa :: State -> [Symbol Cat_ Token] -> State - -> NFA Token -> NFA Token - make_fa q0 alpha q1 fa = - case alpha of - [] -> newTransition q0 q1 Nothing fa - [Tok t] -> newTransition q0 q1 (Just t) fa - [Cat a] -> case findSet a ns of - -- a is recursive - Just ni -> let (fa',ss) = addStatesForCats ni fa - getState x = lookup' x ss - niRules = catSetRules g ni - (nrs,rs) = partition (ruleIsNonRecursive ni) niRules - in if all (isRightLinear ni) niRules then - -- the set Ni is right-recursive or cyclic - let fa'' = foldFuns [make_fa (getState c) xs q1 | CFRule c xs _ <- nrs] fa' - fa''' = foldFuns [make_fa (getState c) xs (getState d) | CFRule c ss _ <- rs, - let (xs,Cat d) = (init ss,last ss)] fa'' - in newTransition q0 (getState a) Nothing fa''' - else - -- the set Ni is left-recursive - let fa'' = foldFuns [make_fa q0 xs (getState c) | CFRule c xs _ <- nrs] fa' - fa''' = foldFuns [make_fa (getState d) xs (getState c) | CFRule c (Cat d:xs) _ <- rs] fa'' - in newTransition (getState a) q1 Nothing fa''' - -- a is not recursive - Nothing -> let rs = catRules g a - in foldl (\fa -> \ (CFRule _ b _) -> make_fa q0 b q1 fa) fa rs - (x:beta) -> let (fa',q) = newState () fa - in make_fa q beta q1 $ make_fa q0 [x] q fa' - addStatesForCats [] fa = (fa,[]) - addStatesForCats (c:cs) fa = let (fa',s) = newState () fa - (fa'',ss) = addStatesForCats cs fa' - in (fa'',(c,s):ss) - ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs + where + fa = newFA () + s = startState fa + (fa',f) = newState () fa + fa'' = addFinalState f fa' + ns = mutRecCats False g + -- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", + -- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997. + make_fa :: State -> [Symbol Cat_ Token] -> State + -> NFA Token -> NFA Token + make_fa q0 alpha q1 fa = + case alpha of + [] -> newTransition q0 q1 Nothing fa + [Tok t] -> newTransition q0 q1 (Just t) fa + [Cat a] -> case findSet a ns of + -- a is recursive + Just ni -> let (fa',ss) = addStatesForCats ni fa + getState x = lookup' x ss + niRules = catSetRules g ni + (nrs,rs) = partition (ruleIsNonRecursive ni) niRules + in if all (isRightLinear ni) niRules + then + -- the set Ni is right-recursive or cyclic + let fa'' = foldFuns [make_fa (getState c) xs q1 | CFRule c xs _ <- nrs] fa' + fa''' = foldFuns [make_fa (getState c) xs (getState d) | CFRule c ss _ <- rs, + let (xs,Cat d) = (init ss,last ss)] fa'' + in newTransition q0 (getState a) Nothing fa''' + else + -- the set Ni is left-recursive + let fa'' = foldFuns [make_fa q0 xs (getState c) | CFRule c xs _ <- nrs] fa' + fa''' = foldFuns [make_fa (getState d) xs (getState c) | CFRule c (Cat d:xs) _ <- rs] fa'' + in newTransition (getState a) q1 Nothing fa''' + -- a is not recursive + Nothing -> let rs = catRules g a + in foldl (\fa -> \ (CFRule _ b _) -> make_fa q0 b q1 fa) fa rs + (x:beta) -> let (fa',q) = newState () fa + in make_fa q beta q1 $ make_fa q0 [x] q fa' + addStatesForCats [] fa = (fa,[]) + addStatesForCats (c:cs) fa = let (fa',s) = newState () fa + (fa'',ss) = addStatesForCats cs fa' + in (fa'',(c,s):ss) + ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs noCatsInSet :: Eq c => [c] -> [Symbol c t] -> Bool @@ -121,12 +127,12 @@ allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs -- | Checks if a context-free rule is right-linear. isRightLinear :: Eq c => [c] -- ^ The categories to consider - -> CFRule c n t -- ^ The rule to check for right-linearity - -> Bool + -> CFRule c n t -- ^ The rule to check for right-linearity + -> Bool isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs -- | Checks if a context-free rule is left-linear. isLeftLinear :: Eq c => [c] -- ^ The categories to consider - -> CFRule c n t -- ^ The rule to check for right-linearity - -> Bool + -> CFRule c n t -- ^ The rule to check for right-linearity + -> Bool isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs diff --git a/src/GF/Speech/PrFA.hs b/src/GF/Speech/PrFA.hs index 1dac4fab0..e3c22ef1d 100644 --- a/src/GF/Speech/PrFA.hs +++ b/src/GF/Speech/PrFA.hs @@ -34,10 +34,13 @@ import Data.Char (toUpper,toLower) import Data.List import Data.Maybe (fromMaybe) + + faGraphvizPrinter :: Ident -- ^ Grammar name -> Options -> CGrammar -> String faGraphvizPrinter name opts cfg = - prFAGraphviz $ mapStates (const "") $ cfgToFA opts cfg + prFAGraphviz $ mapStates (const "") fa + where fa = cfgToFA opts cfg -- | Convert the grammar to a regular grammar and print it in BNF