Some performance improvements in the FA generation.

This commit is contained in:
bringert
2005-12-22 23:16:15 +00:00
parent cb9769788e
commit 5c12a73dae
6 changed files with 92 additions and 53 deletions

View File

@@ -15,6 +15,10 @@
module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular) where
import Data.List
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import GF.Data.Utilities
import GF.Formalism.CFG
@@ -27,13 +31,19 @@ import GF.Speech.FiniteState
import GF.Speech.Relation
import GF.Speech.TransformCFG
import Debug.Trace
data MutRecSet = MutRecSet {
mrCats :: [Cat_],
mrNonRecRules :: [CFRule_],
mrRecRules :: [CFRule_],
mrIsRightRec :: Bool
}
type MutRecSets = Map Cat_ MutRecSet
cfgToFA :: Options -> CGrammar -> DFA String
cfgToFA opts = minimize . compileAutomaton start . makeSimpleRegular
--cfgToFA opts = trfa "minimal" . minimize . trfa "initial" . compileAutomaton start . makeSimpleRegular
where start = getStartCat opts
trfa s fa = trace (s ++ ", states: " ++ show (length (states fa)) ++ ", transitions: " ++ show (length (transitions fa))) fa
makeSimpleRegular :: CGrammar -> CFRules
makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgToCFRules
@@ -45,8 +55,9 @@ makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgTo
makeRegular :: CFRules -> CFRules
makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
where trSet cs | allXLinear cs rs = rs
| otherwise = concatMap handleCat cs
where rs = catSetRules g cs
| otherwise = concatMap handleCat csl
where csl = Set.toList cs
rs = catSetRules g csl
handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
++ concatMap (makeRightLinearRules c) (catRules g c)
where c' = newCat c
@@ -62,7 +73,7 @@ makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
-- | Get the sets of mutually recursive non-terminals for a grammar.
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
-- If false, only recursive categories will be included.
-> CFRules -> [[Cat_]]
-> CFRules -> [Set Cat_]
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
allCats = map fst g
@@ -72,67 +83,88 @@ mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transit
compileAutomaton :: Cat_ -- ^ Start category
-> CFRules
-> NFA Token
compileAutomaton start g = make_fa s [Cat start] f fa''
compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa''
where
fa = newFA ()
s = startState fa
(fa',f) = newState () fa
fa'' = addFinalState f fa'
ns = mutRecCats False g
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
make_fa :: State -> [Symbol Cat_ Token] -> State
ns = mutRecSets g $ mutRecCats False g
mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets
mutRecSets g = Map.fromList . concatMap mkMutRecSet
where
mkMutRecSet cs = [ (c,ms) | c <- csl ]
where csl = Set.toList cs
rs = catSetRules g csl
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
ms = MutRecSet {
mrCats = csl,
mrNonRecRules = nrs,
mrRecRules = rrs,
mrIsRightRec = all (isRightLinear cs) rrs
}
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State
-> NFA Token -> NFA Token
make_fa q0 alpha q1 fa =
make_fa c@(g,ns) q0 alpha q1 fa =
case alpha of
[] -> newTransition q0 q1 Nothing fa
[Tok t] -> newTransition q0 q1 (Just t) fa
[Cat a] -> case findSet a ns of
[Cat a] -> case Map.lookup a ns of
-- a is recursive
Just ni -> let (fa',ss) = addStatesForCats ni fa
getState x = lookup' x ss
niRules = catSetRules g ni
(nrs,rs) = partition (ruleIsNonRecursive ni) niRules
in if all (isRightLinear ni) niRules
then
-- the set Ni is right-recursive or cyclic
let fa'' = foldFuns [make_fa (getState c) xs q1 | CFRule c xs _ <- nrs] fa'
fa''' = foldFuns [make_fa (getState c) xs (getState d) | CFRule c ss _ <- rs,
let (xs,Cat d) = (init ss,last ss)] fa''
in newTransition q0 (getState a) Nothing fa'''
else
-- the set Ni is left-recursive
let fa'' = foldFuns [make_fa q0 xs (getState c) | CFRule c xs _ <- nrs] fa'
fa''' = foldFuns [make_fa (getState d) xs (getState c) | CFRule c (Cat d:xs) _ <- rs] fa''
in newTransition (getState a) q1 Nothing fa'''
Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
if mrIsRightRec n
then
-- the set Ni is right-recursive or cyclic
let fa'' = foldl (\ f (CFRule c xs _) -> make_fa_ (getState c) xs q1 f) fa' nrs
fa''' = foldl (\ f (CFRule c ss _) ->
let (xs,Cat d) = (init ss,last ss)
in make_fa_ (getState c) xs (getState d) f) fa'' rs
in newTransition q0 (getState a) Nothing fa'''
else
-- the set Ni is left-recursive
let fa'' = foldl (\f (CFRule c xs _) -> make_fa_ q0 xs (getState c) f) fa' nrs
fa''' = foldl (\f (CFRule c (Cat d:xs) _) -> make_fa_ (getState d) xs (getState c) f) fa'' rs
in newTransition (getState a) q1 Nothing fa'''
where
(fa',ss) = addStatesForCats ni fa
getState x = lookup' x ss
-- a is not recursive
Nothing -> let rs = catRules g a
in foldl (\fa -> \ (CFRule _ b _) -> make_fa q0 b q1 fa) fa rs
in foldl (\fa -> \ (CFRule _ b _) -> make_fa_ q0 b q1 fa) fa rs
(x:beta) -> let (fa',q) = newState () fa
in make_fa q beta q1 $ make_fa q0 [x] q fa'
addStatesForCats [] fa = (fa,[])
addStatesForCats (c:cs) fa = let (fa',s) = newState () fa
(fa'',ss) = addStatesForCats cs fa'
in (fa'',(c,s):ss)
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
in make_fa_ q beta q1 $! make_fa_ q0 [x] q fa'
where
make_fa_ = make_fa c
addStatesForCats :: [Cat_] -> NFA Token -> (NFA Token, [(Cat_,State)])
addStatesForCats cs fa = (fa', zip cs (map fst ns))
where (fa', ns) = newStates (replicate (length cs) ()) fa
ruleIsNonRecursive :: Set Cat_ -> CFRule_ -> Bool
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
noCatsInSet :: Eq c => [c] -> [Symbol c t] -> Bool
noCatsInSet :: Set Cat_ -> [Symbol Cat_ t] -> Bool
noCatsInSet cs = not . any (`catElem` cs)
-- | Check if all the rules are right-linear, or all the rules are
-- left-linear, with respect to given categories.
allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool
allXLinear :: Set Cat_ -> [CFRule_] -> Bool
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-- | Checks if a context-free rule is right-linear.
isRightLinear :: Eq c => [c] -- ^ The categories to consider
-> CFRule c n t -- ^ The rule to check for right-linearity
isRightLinear :: Set Cat_ -- ^ The categories to consider
-> CFRule_ -- ^ The rule to check for right-linearity
-> Bool
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
-- | Checks if a context-free rule is left-linear.
isLeftLinear :: Eq c => [c] -- ^ The categories to consider
-> CFRule c n t -- ^ The rule to check for right-linearity
isLeftLinear :: Set Cat_ -- ^ The categories to consider
-> CFRule_ -- ^ The rule to check for right-linearity
-> Bool
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs