1
0
forked from GitHub/gf-core

Some performance improvements in the FA generation.

This commit is contained in:
bringert
2005-12-22 23:16:15 +00:00
parent 29ba681242
commit e32472a9bb
6 changed files with 92 additions and 53 deletions

View File

@@ -34,11 +34,6 @@ lookupList a [] = []
lookupList a (p:ps) | a == fst p = snd p : lookupList a ps lookupList a (p:ps) | a == fst p = snd p : lookupList a ps
| otherwise = lookupList a ps | otherwise = lookupList a ps
-- | Find the first list in a list of lists
-- which contains the argument.
findSet :: Eq c => c -> [[c]] -> Maybe [c]
findSet x = find (x `elem`)
split :: [a] -> ([a], [a]) split :: [a] -> ([a], [a])
split (x : y : as) = (x:xs, y:ys) split (x : y : as) = (x:xs, y:ys)
where (xs, ys) = split as where (xs, ys) = split as

View File

@@ -15,6 +15,10 @@
module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular) where module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular) where
import Data.List import Data.List
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import GF.Data.Utilities import GF.Data.Utilities
import GF.Formalism.CFG import GF.Formalism.CFG
@@ -27,13 +31,19 @@ import GF.Speech.FiniteState
import GF.Speech.Relation import GF.Speech.Relation
import GF.Speech.TransformCFG import GF.Speech.TransformCFG
import Debug.Trace data MutRecSet = MutRecSet {
mrCats :: [Cat_],
mrNonRecRules :: [CFRule_],
mrRecRules :: [CFRule_],
mrIsRightRec :: Bool
}
type MutRecSets = Map Cat_ MutRecSet
cfgToFA :: Options -> CGrammar -> DFA String cfgToFA :: Options -> CGrammar -> DFA String
cfgToFA opts = minimize . compileAutomaton start . makeSimpleRegular cfgToFA opts = minimize . compileAutomaton start . makeSimpleRegular
--cfgToFA opts = trfa "minimal" . minimize . trfa "initial" . compileAutomaton start . makeSimpleRegular
where start = getStartCat opts where start = getStartCat opts
trfa s fa = trace (s ++ ", states: " ++ show (length (states fa)) ++ ", transitions: " ++ show (length (transitions fa))) fa
makeSimpleRegular :: CGrammar -> CFRules makeSimpleRegular :: CGrammar -> CFRules
makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgToCFRules makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgToCFRules
@@ -45,8 +55,9 @@ makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgTo
makeRegular :: CFRules -> CFRules makeRegular :: CFRules -> CFRules
makeRegular g = groupProds $ concatMap trSet (mutRecCats True g) makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
where trSet cs | allXLinear cs rs = rs where trSet cs | allXLinear cs rs = rs
| otherwise = concatMap handleCat cs | otherwise = concatMap handleCat csl
where rs = catSetRules g cs where csl = Set.toList cs
rs = catSetRules g csl
handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
++ concatMap (makeRightLinearRules c) (catRules g c) ++ concatMap (makeRightLinearRules c) (catRules g c)
where c' = newCat c where c' = newCat c
@@ -62,7 +73,7 @@ makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
-- | Get the sets of mutually recursive non-terminals for a grammar. -- | Get the sets of mutually recursive non-terminals for a grammar.
mutRecCats :: Bool -- ^ If true, all categories will be in some set. mutRecCats :: Bool -- ^ If true, all categories will be in some set.
-- If false, only recursive categories will be included. -- If false, only recursive categories will be included.
-> CFRules -> [[Cat_]] -> CFRules -> [Set Cat_]
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss] where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
allCats = map fst g allCats = map fst g
@@ -72,67 +83,88 @@ mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transit
compileAutomaton :: Cat_ -- ^ Start category compileAutomaton :: Cat_ -- ^ Start category
-> CFRules -> CFRules
-> NFA Token -> NFA Token
compileAutomaton start g = make_fa s [Cat start] f fa'' compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa''
where where
fa = newFA () fa = newFA ()
s = startState fa s = startState fa
(fa',f) = newState () fa (fa',f) = newState () fa
fa'' = addFinalState f fa' fa'' = addFinalState f fa'
ns = mutRecCats False g ns = mutRecSets g $ mutRecCats False g
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997. mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets
make_fa :: State -> [Symbol Cat_ Token] -> State mutRecSets g = Map.fromList . concatMap mkMutRecSet
where
mkMutRecSet cs = [ (c,ms) | c <- csl ]
where csl = Set.toList cs
rs = catSetRules g csl
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
ms = MutRecSet {
mrCats = csl,
mrNonRecRules = nrs,
mrRecRules = rrs,
mrIsRightRec = all (isRightLinear cs) rrs
}
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State
-> NFA Token -> NFA Token -> NFA Token -> NFA Token
make_fa q0 alpha q1 fa = make_fa c@(g,ns) q0 alpha q1 fa =
case alpha of case alpha of
[] -> newTransition q0 q1 Nothing fa [] -> newTransition q0 q1 Nothing fa
[Tok t] -> newTransition q0 q1 (Just t) fa [Tok t] -> newTransition q0 q1 (Just t) fa
[Cat a] -> case findSet a ns of [Cat a] -> case Map.lookup a ns of
-- a is recursive -- a is recursive
Just ni -> let (fa',ss) = addStatesForCats ni fa Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
getState x = lookup' x ss if mrIsRightRec n
niRules = catSetRules g ni then
(nrs,rs) = partition (ruleIsNonRecursive ni) niRules -- the set Ni is right-recursive or cyclic
in if all (isRightLinear ni) niRules let fa'' = foldl (\ f (CFRule c xs _) -> make_fa_ (getState c) xs q1 f) fa' nrs
then fa''' = foldl (\ f (CFRule c ss _) ->
-- the set Ni is right-recursive or cyclic let (xs,Cat d) = (init ss,last ss)
let fa'' = foldFuns [make_fa (getState c) xs q1 | CFRule c xs _ <- nrs] fa' in make_fa_ (getState c) xs (getState d) f) fa'' rs
fa''' = foldFuns [make_fa (getState c) xs (getState d) | CFRule c ss _ <- rs, in newTransition q0 (getState a) Nothing fa'''
let (xs,Cat d) = (init ss,last ss)] fa'' else
in newTransition q0 (getState a) Nothing fa''' -- the set Ni is left-recursive
else let fa'' = foldl (\f (CFRule c xs _) -> make_fa_ q0 xs (getState c) f) fa' nrs
-- the set Ni is left-recursive fa''' = foldl (\f (CFRule c (Cat d:xs) _) -> make_fa_ (getState d) xs (getState c) f) fa'' rs
let fa'' = foldFuns [make_fa q0 xs (getState c) | CFRule c xs _ <- nrs] fa' in newTransition (getState a) q1 Nothing fa'''
fa''' = foldFuns [make_fa (getState d) xs (getState c) | CFRule c (Cat d:xs) _ <- rs] fa'' where
in newTransition (getState a) q1 Nothing fa''' (fa',ss) = addStatesForCats ni fa
getState x = lookup' x ss
-- a is not recursive -- a is not recursive
Nothing -> let rs = catRules g a Nothing -> let rs = catRules g a
in foldl (\fa -> \ (CFRule _ b _) -> make_fa q0 b q1 fa) fa rs in foldl (\fa -> \ (CFRule _ b _) -> make_fa_ q0 b q1 fa) fa rs
(x:beta) -> let (fa',q) = newState () fa (x:beta) -> let (fa',q) = newState () fa
in make_fa q beta q1 $ make_fa q0 [x] q fa' in make_fa_ q beta q1 $! make_fa_ q0 [x] q fa'
addStatesForCats [] fa = (fa,[]) where
addStatesForCats (c:cs) fa = let (fa',s) = newState () fa make_fa_ = make_fa c
(fa'',ss) = addStatesForCats cs fa'
in (fa'',(c,s):ss) addStatesForCats :: [Cat_] -> NFA Token -> (NFA Token, [(Cat_,State)])
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs addStatesForCats cs fa = (fa', zip cs (map fst ns))
where (fa', ns) = newStates (replicate (length cs) ()) fa
ruleIsNonRecursive :: Set Cat_ -> CFRule_ -> Bool
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
noCatsInSet :: Eq c => [c] -> [Symbol c t] -> Bool
noCatsInSet :: Set Cat_ -> [Symbol Cat_ t] -> Bool
noCatsInSet cs = not . any (`catElem` cs) noCatsInSet cs = not . any (`catElem` cs)
-- | Check if all the rules are right-linear, or all the rules are -- | Check if all the rules are right-linear, or all the rules are
-- left-linear, with respect to given categories. -- left-linear, with respect to given categories.
allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool allXLinear :: Set Cat_ -> [CFRule_] -> Bool
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-- | Checks if a context-free rule is right-linear. -- | Checks if a context-free rule is right-linear.
isRightLinear :: Eq c => [c] -- ^ The categories to consider isRightLinear :: Set Cat_ -- ^ The categories to consider
-> CFRule c n t -- ^ The rule to check for right-linearity -> CFRule_ -- ^ The rule to check for right-linearity
-> Bool -> Bool
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
-- | Checks if a context-free rule is left-linear. -- | Checks if a context-free rule is left-linear.
isLeftLinear :: Eq c => [c] -- ^ The categories to consider isLeftLinear :: Set Cat_ -- ^ The categories to consider
-> CFRule c n t -- ^ The rule to check for right-linearity -> CFRule_ -- ^ The rule to check for right-linearity
-> Bool -> Bool
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs

View File

@@ -16,7 +16,8 @@ module GF.Speech.FiniteState (FA, State, NFA, DFA,
states, transitions, states, transitions,
newFA, newFA,
addFinalState, addFinalState,
newState, newTransition, newState, newStates,
newTransition,
mapStates, mapTransitions, mapStates, mapTransitions,
oneFinalState, oneFinalState,
moveLabelsToNodes, minimize, moveLabelsToNodes, minimize,
@@ -65,6 +66,10 @@ newState :: a -> FA n a b -> (FA n a b, n)
newState x (FA g s ss) = (FA g' s ss, n) newState x (FA g s ss) = (FA g' s ss, n)
where (g',n) = newNode x g where (g',n) = newNode x g
newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)])
newStates xs (FA g s ss) = (FA g' s ss, ns)
where (g',ns) = newNodes xs g
newTransition :: n -> n -> b -> FA n a b -> FA n a b newTransition :: n -> n -> b -> FA n a b -> FA n a b
newTransition f t l = onGraph (newEdge (f,t,l)) newTransition f t l = onGraph (newEdge (f,t,l))

View File

@@ -13,7 +13,7 @@
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
module GF.Speech.Graph ( Graph(..), Node, Edge, Incoming, Outgoing module GF.Speech.Graph ( Graph(..), Node, Edge, Incoming, Outgoing
, newGraph, nodes, edges , newGraph, nodes, edges
, nmap, emap, newNode, newEdge, newEdges , nmap, emap, newNode, newNodes, newEdge, newEdges
, incoming, outgoing, getOutgoing , incoming, outgoing, getOutgoing
, getFrom, getTo, getLabel , getFrom, getTo, getLabel
, reverseGraph, renameNodes , reverseGraph, renameNodes
@@ -52,6 +52,11 @@ emap f (Graph c ns es) = Graph c ns [(x,y,f l) | (x,y,l) <- es]
newNode :: a -> Graph n a b -> (Graph n a b,n) newNode :: a -> Graph n a b -> (Graph n a b,n)
newNode l (Graph (c:cs) ns es) = (Graph cs ((c,l):ns) es, c) newNode l (Graph (c:cs) ns es) = (Graph cs ((c,l):ns) es, c)
newNodes :: [a] -> Graph n a b -> (Graph n a b,[Node n a])
newNodes ls (Graph cs ns es) = (Graph cs' (ns'++ns) es, ns')
where (xs,cs') = splitAt (length ls) cs
ns' = zip xs ls
newEdge :: Edge n b -> Graph n a b -> Graph n a b newEdge :: Edge n b -> Graph n a b -> Graph n a b
newEdge e (Graph c ns es) = Graph c ns (e:es) newEdge e (Graph c ns es) = Graph c ns (e:es)

View File

@@ -100,10 +100,10 @@ purgeEmpty r = Map.filter (not . Set.null) r
-- | Get the equivalence classes from an equivalence relation. -- | Get the equivalence classes from an equivalence relation.
equivalenceClasses :: Ord a => Rel a -> [[a]] equivalenceClasses :: Ord a => Rel a -> [Set a]
equivalenceClasses r = equivalenceClasses_ (Map.keys r) r equivalenceClasses r = equivalenceClasses_ (Map.keys r) r
where equivalenceClasses_ [] _ = [] where equivalenceClasses_ [] _ = []
equivalenceClasses_ (x:xs) r = Set.toList ys:equivalenceClasses_ zs r equivalenceClasses_ (x:xs) r = ys:equivalenceClasses_ zs r
where ys = allRelated r x where ys = allRelated r x
zs = [x' | x' <- xs, not (x' `Set.member` ys)] zs = [x' | x' <- xs, not (x' `Set.member` ys)]

View File

@@ -37,6 +37,8 @@ import Control.Monad
import Data.FiniteMap import Data.FiniteMap
import Data.List import Data.List
import Data.Maybe (fromJust, fromMaybe) import Data.Maybe (fromJust, fromMaybe)
import Data.Set (Set)
import qualified Data.Set as Set
-- | not very nice to replace the structured CFCat type with a simple string -- | not very nice to replace the structured CFCat type with a simple string
@@ -134,8 +136,8 @@ ruleFun :: CFRule_ -> Fun
ruleFun (CFRule _ _ n) = name2fun n ruleFun (CFRule _ _ n) = name2fun n
-- | Checks if a symbol is a non-terminal of one of the given categories. -- | Checks if a symbol is a non-terminal of one of the given categories.
catElem :: Eq c => Symbol c t -> [c] -> Bool catElem :: Symbol Cat_ t -> Set Cat_ -> Bool
catElem s cs = symbol (`elem` cs) (const False) s catElem s cs = symbol (`Set.member` cs) (const False) s
-- | Check if any of the categories used on the right-hand side -- | Check if any of the categories used on the right-hand side
-- are in the given list of categories. -- are in the given list of categories.