forked from GitHub/gf-core
Some performance improvements in the FA generation.
This commit is contained in:
@@ -34,11 +34,6 @@ lookupList a [] = []
|
|||||||
lookupList a (p:ps) | a == fst p = snd p : lookupList a ps
|
lookupList a (p:ps) | a == fst p = snd p : lookupList a ps
|
||||||
| otherwise = lookupList a ps
|
| otherwise = lookupList a ps
|
||||||
|
|
||||||
-- | Find the first list in a list of lists
|
|
||||||
-- which contains the argument.
|
|
||||||
findSet :: Eq c => c -> [[c]] -> Maybe [c]
|
|
||||||
findSet x = find (x `elem`)
|
|
||||||
|
|
||||||
split :: [a] -> ([a], [a])
|
split :: [a] -> ([a], [a])
|
||||||
split (x : y : as) = (x:xs, y:ys)
|
split (x : y : as) = (x:xs, y:ys)
|
||||||
where (xs, ys) = split as
|
where (xs, ys) = split as
|
||||||
|
|||||||
@@ -15,6 +15,10 @@
|
|||||||
module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular) where
|
module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular) where
|
||||||
|
|
||||||
import Data.List
|
import Data.List
|
||||||
|
import Data.Map (Map)
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import Data.Set (Set)
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
|
||||||
import GF.Data.Utilities
|
import GF.Data.Utilities
|
||||||
import GF.Formalism.CFG
|
import GF.Formalism.CFG
|
||||||
@@ -27,13 +31,19 @@ import GF.Speech.FiniteState
|
|||||||
import GF.Speech.Relation
|
import GF.Speech.Relation
|
||||||
import GF.Speech.TransformCFG
|
import GF.Speech.TransformCFG
|
||||||
|
|
||||||
import Debug.Trace
|
data MutRecSet = MutRecSet {
|
||||||
|
mrCats :: [Cat_],
|
||||||
|
mrNonRecRules :: [CFRule_],
|
||||||
|
mrRecRules :: [CFRule_],
|
||||||
|
mrIsRightRec :: Bool
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
type MutRecSets = Map Cat_ MutRecSet
|
||||||
|
|
||||||
cfgToFA :: Options -> CGrammar -> DFA String
|
cfgToFA :: Options -> CGrammar -> DFA String
|
||||||
cfgToFA opts = minimize . compileAutomaton start . makeSimpleRegular
|
cfgToFA opts = minimize . compileAutomaton start . makeSimpleRegular
|
||||||
--cfgToFA opts = trfa "minimal" . minimize . trfa "initial" . compileAutomaton start . makeSimpleRegular
|
|
||||||
where start = getStartCat opts
|
where start = getStartCat opts
|
||||||
trfa s fa = trace (s ++ ", states: " ++ show (length (states fa)) ++ ", transitions: " ++ show (length (transitions fa))) fa
|
|
||||||
|
|
||||||
makeSimpleRegular :: CGrammar -> CFRules
|
makeSimpleRegular :: CGrammar -> CFRules
|
||||||
makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgToCFRules
|
makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgToCFRules
|
||||||
@@ -45,8 +55,9 @@ makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgTo
|
|||||||
makeRegular :: CFRules -> CFRules
|
makeRegular :: CFRules -> CFRules
|
||||||
makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
|
makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
|
||||||
where trSet cs | allXLinear cs rs = rs
|
where trSet cs | allXLinear cs rs = rs
|
||||||
| otherwise = concatMap handleCat cs
|
| otherwise = concatMap handleCat csl
|
||||||
where rs = catSetRules g cs
|
where csl = Set.toList cs
|
||||||
|
rs = catSetRules g csl
|
||||||
handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
|
handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
|
||||||
++ concatMap (makeRightLinearRules c) (catRules g c)
|
++ concatMap (makeRightLinearRules c) (catRules g c)
|
||||||
where c' = newCat c
|
where c' = newCat c
|
||||||
@@ -62,7 +73,7 @@ makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
|
|||||||
-- | Get the sets of mutually recursive non-terminals for a grammar.
|
-- | Get the sets of mutually recursive non-terminals for a grammar.
|
||||||
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
|
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
|
||||||
-- If false, only recursive categories will be included.
|
-- If false, only recursive categories will be included.
|
||||||
-> CFRules -> [[Cat_]]
|
-> CFRules -> [Set Cat_]
|
||||||
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
|
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
|
||||||
where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
|
where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
|
||||||
allCats = map fst g
|
allCats = map fst g
|
||||||
@@ -72,67 +83,88 @@ mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transit
|
|||||||
compileAutomaton :: Cat_ -- ^ Start category
|
compileAutomaton :: Cat_ -- ^ Start category
|
||||||
-> CFRules
|
-> CFRules
|
||||||
-> NFA Token
|
-> NFA Token
|
||||||
compileAutomaton start g = make_fa s [Cat start] f fa''
|
compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa''
|
||||||
where
|
where
|
||||||
fa = newFA ()
|
fa = newFA ()
|
||||||
s = startState fa
|
s = startState fa
|
||||||
(fa',f) = newState () fa
|
(fa',f) = newState () fa
|
||||||
fa'' = addFinalState f fa'
|
fa'' = addFinalState f fa'
|
||||||
ns = mutRecCats False g
|
ns = mutRecSets g $ mutRecCats False g
|
||||||
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
|
||||||
-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
|
mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets
|
||||||
make_fa :: State -> [Symbol Cat_ Token] -> State
|
mutRecSets g = Map.fromList . concatMap mkMutRecSet
|
||||||
|
where
|
||||||
|
mkMutRecSet cs = [ (c,ms) | c <- csl ]
|
||||||
|
where csl = Set.toList cs
|
||||||
|
rs = catSetRules g csl
|
||||||
|
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
|
||||||
|
ms = MutRecSet {
|
||||||
|
mrCats = csl,
|
||||||
|
mrNonRecRules = nrs,
|
||||||
|
mrRecRules = rrs,
|
||||||
|
mrIsRightRec = all (isRightLinear cs) rrs
|
||||||
|
}
|
||||||
|
|
||||||
|
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
||||||
|
-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
|
||||||
|
make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State
|
||||||
-> NFA Token -> NFA Token
|
-> NFA Token -> NFA Token
|
||||||
make_fa q0 alpha q1 fa =
|
make_fa c@(g,ns) q0 alpha q1 fa =
|
||||||
case alpha of
|
case alpha of
|
||||||
[] -> newTransition q0 q1 Nothing fa
|
[] -> newTransition q0 q1 Nothing fa
|
||||||
[Tok t] -> newTransition q0 q1 (Just t) fa
|
[Tok t] -> newTransition q0 q1 (Just t) fa
|
||||||
[Cat a] -> case findSet a ns of
|
[Cat a] -> case Map.lookup a ns of
|
||||||
-- a is recursive
|
-- a is recursive
|
||||||
Just ni -> let (fa',ss) = addStatesForCats ni fa
|
Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
|
||||||
getState x = lookup' x ss
|
if mrIsRightRec n
|
||||||
niRules = catSetRules g ni
|
then
|
||||||
(nrs,rs) = partition (ruleIsNonRecursive ni) niRules
|
-- the set Ni is right-recursive or cyclic
|
||||||
in if all (isRightLinear ni) niRules
|
let fa'' = foldl (\ f (CFRule c xs _) -> make_fa_ (getState c) xs q1 f) fa' nrs
|
||||||
then
|
fa''' = foldl (\ f (CFRule c ss _) ->
|
||||||
-- the set Ni is right-recursive or cyclic
|
let (xs,Cat d) = (init ss,last ss)
|
||||||
let fa'' = foldFuns [make_fa (getState c) xs q1 | CFRule c xs _ <- nrs] fa'
|
in make_fa_ (getState c) xs (getState d) f) fa'' rs
|
||||||
fa''' = foldFuns [make_fa (getState c) xs (getState d) | CFRule c ss _ <- rs,
|
in newTransition q0 (getState a) Nothing fa'''
|
||||||
let (xs,Cat d) = (init ss,last ss)] fa''
|
else
|
||||||
in newTransition q0 (getState a) Nothing fa'''
|
-- the set Ni is left-recursive
|
||||||
else
|
let fa'' = foldl (\f (CFRule c xs _) -> make_fa_ q0 xs (getState c) f) fa' nrs
|
||||||
-- the set Ni is left-recursive
|
fa''' = foldl (\f (CFRule c (Cat d:xs) _) -> make_fa_ (getState d) xs (getState c) f) fa'' rs
|
||||||
let fa'' = foldFuns [make_fa q0 xs (getState c) | CFRule c xs _ <- nrs] fa'
|
in newTransition (getState a) q1 Nothing fa'''
|
||||||
fa''' = foldFuns [make_fa (getState d) xs (getState c) | CFRule c (Cat d:xs) _ <- rs] fa''
|
where
|
||||||
in newTransition (getState a) q1 Nothing fa'''
|
(fa',ss) = addStatesForCats ni fa
|
||||||
|
getState x = lookup' x ss
|
||||||
-- a is not recursive
|
-- a is not recursive
|
||||||
Nothing -> let rs = catRules g a
|
Nothing -> let rs = catRules g a
|
||||||
in foldl (\fa -> \ (CFRule _ b _) -> make_fa q0 b q1 fa) fa rs
|
in foldl (\fa -> \ (CFRule _ b _) -> make_fa_ q0 b q1 fa) fa rs
|
||||||
(x:beta) -> let (fa',q) = newState () fa
|
(x:beta) -> let (fa',q) = newState () fa
|
||||||
in make_fa q beta q1 $ make_fa q0 [x] q fa'
|
in make_fa_ q beta q1 $! make_fa_ q0 [x] q fa'
|
||||||
addStatesForCats [] fa = (fa,[])
|
where
|
||||||
addStatesForCats (c:cs) fa = let (fa',s) = newState () fa
|
make_fa_ = make_fa c
|
||||||
(fa'',ss) = addStatesForCats cs fa'
|
|
||||||
in (fa'',(c,s):ss)
|
addStatesForCats :: [Cat_] -> NFA Token -> (NFA Token, [(Cat_,State)])
|
||||||
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
|
addStatesForCats cs fa = (fa', zip cs (map fst ns))
|
||||||
|
where (fa', ns) = newStates (replicate (length cs) ()) fa
|
||||||
|
|
||||||
|
ruleIsNonRecursive :: Set Cat_ -> CFRule_ -> Bool
|
||||||
|
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
|
||||||
|
|
||||||
|
|
||||||
noCatsInSet :: Eq c => [c] -> [Symbol c t] -> Bool
|
|
||||||
|
noCatsInSet :: Set Cat_ -> [Symbol Cat_ t] -> Bool
|
||||||
noCatsInSet cs = not . any (`catElem` cs)
|
noCatsInSet cs = not . any (`catElem` cs)
|
||||||
|
|
||||||
-- | Check if all the rules are right-linear, or all the rules are
|
-- | Check if all the rules are right-linear, or all the rules are
|
||||||
-- left-linear, with respect to given categories.
|
-- left-linear, with respect to given categories.
|
||||||
allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool
|
allXLinear :: Set Cat_ -> [CFRule_] -> Bool
|
||||||
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
|
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
|
||||||
|
|
||||||
-- | Checks if a context-free rule is right-linear.
|
-- | Checks if a context-free rule is right-linear.
|
||||||
isRightLinear :: Eq c => [c] -- ^ The categories to consider
|
isRightLinear :: Set Cat_ -- ^ The categories to consider
|
||||||
-> CFRule c n t -- ^ The rule to check for right-linearity
|
-> CFRule_ -- ^ The rule to check for right-linearity
|
||||||
-> Bool
|
-> Bool
|
||||||
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
|
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
|
||||||
|
|
||||||
-- | Checks if a context-free rule is left-linear.
|
-- | Checks if a context-free rule is left-linear.
|
||||||
isLeftLinear :: Eq c => [c] -- ^ The categories to consider
|
isLeftLinear :: Set Cat_ -- ^ The categories to consider
|
||||||
-> CFRule c n t -- ^ The rule to check for right-linearity
|
-> CFRule_ -- ^ The rule to check for right-linearity
|
||||||
-> Bool
|
-> Bool
|
||||||
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
|
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
|
||||||
|
|||||||
@@ -16,7 +16,8 @@ module GF.Speech.FiniteState (FA, State, NFA, DFA,
|
|||||||
states, transitions,
|
states, transitions,
|
||||||
newFA,
|
newFA,
|
||||||
addFinalState,
|
addFinalState,
|
||||||
newState, newTransition,
|
newState, newStates,
|
||||||
|
newTransition,
|
||||||
mapStates, mapTransitions,
|
mapStates, mapTransitions,
|
||||||
oneFinalState,
|
oneFinalState,
|
||||||
moveLabelsToNodes, minimize,
|
moveLabelsToNodes, minimize,
|
||||||
@@ -65,6 +66,10 @@ newState :: a -> FA n a b -> (FA n a b, n)
|
|||||||
newState x (FA g s ss) = (FA g' s ss, n)
|
newState x (FA g s ss) = (FA g' s ss, n)
|
||||||
where (g',n) = newNode x g
|
where (g',n) = newNode x g
|
||||||
|
|
||||||
|
newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)])
|
||||||
|
newStates xs (FA g s ss) = (FA g' s ss, ns)
|
||||||
|
where (g',ns) = newNodes xs g
|
||||||
|
|
||||||
newTransition :: n -> n -> b -> FA n a b -> FA n a b
|
newTransition :: n -> n -> b -> FA n a b -> FA n a b
|
||||||
newTransition f t l = onGraph (newEdge (f,t,l))
|
newTransition f t l = onGraph (newEdge (f,t,l))
|
||||||
|
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
module GF.Speech.Graph ( Graph(..), Node, Edge, Incoming, Outgoing
|
module GF.Speech.Graph ( Graph(..), Node, Edge, Incoming, Outgoing
|
||||||
, newGraph, nodes, edges
|
, newGraph, nodes, edges
|
||||||
, nmap, emap, newNode, newEdge, newEdges
|
, nmap, emap, newNode, newNodes, newEdge, newEdges
|
||||||
, incoming, outgoing, getOutgoing
|
, incoming, outgoing, getOutgoing
|
||||||
, getFrom, getTo, getLabel
|
, getFrom, getTo, getLabel
|
||||||
, reverseGraph, renameNodes
|
, reverseGraph, renameNodes
|
||||||
@@ -52,6 +52,11 @@ emap f (Graph c ns es) = Graph c ns [(x,y,f l) | (x,y,l) <- es]
|
|||||||
newNode :: a -> Graph n a b -> (Graph n a b,n)
|
newNode :: a -> Graph n a b -> (Graph n a b,n)
|
||||||
newNode l (Graph (c:cs) ns es) = (Graph cs ((c,l):ns) es, c)
|
newNode l (Graph (c:cs) ns es) = (Graph cs ((c,l):ns) es, c)
|
||||||
|
|
||||||
|
newNodes :: [a] -> Graph n a b -> (Graph n a b,[Node n a])
|
||||||
|
newNodes ls (Graph cs ns es) = (Graph cs' (ns'++ns) es, ns')
|
||||||
|
where (xs,cs') = splitAt (length ls) cs
|
||||||
|
ns' = zip xs ls
|
||||||
|
|
||||||
newEdge :: Edge n b -> Graph n a b -> Graph n a b
|
newEdge :: Edge n b -> Graph n a b -> Graph n a b
|
||||||
newEdge e (Graph c ns es) = Graph c ns (e:es)
|
newEdge e (Graph c ns es) = Graph c ns (e:es)
|
||||||
|
|
||||||
|
|||||||
@@ -100,10 +100,10 @@ purgeEmpty r = Map.filter (not . Set.null) r
|
|||||||
|
|
||||||
|
|
||||||
-- | Get the equivalence classes from an equivalence relation.
|
-- | Get the equivalence classes from an equivalence relation.
|
||||||
equivalenceClasses :: Ord a => Rel a -> [[a]]
|
equivalenceClasses :: Ord a => Rel a -> [Set a]
|
||||||
equivalenceClasses r = equivalenceClasses_ (Map.keys r) r
|
equivalenceClasses r = equivalenceClasses_ (Map.keys r) r
|
||||||
where equivalenceClasses_ [] _ = []
|
where equivalenceClasses_ [] _ = []
|
||||||
equivalenceClasses_ (x:xs) r = Set.toList ys:equivalenceClasses_ zs r
|
equivalenceClasses_ (x:xs) r = ys:equivalenceClasses_ zs r
|
||||||
where ys = allRelated r x
|
where ys = allRelated r x
|
||||||
zs = [x' | x' <- xs, not (x' `Set.member` ys)]
|
zs = [x' | x' <- xs, not (x' `Set.member` ys)]
|
||||||
|
|
||||||
|
|||||||
@@ -37,6 +37,8 @@ import Control.Monad
|
|||||||
import Data.FiniteMap
|
import Data.FiniteMap
|
||||||
import Data.List
|
import Data.List
|
||||||
import Data.Maybe (fromJust, fromMaybe)
|
import Data.Maybe (fromJust, fromMaybe)
|
||||||
|
import Data.Set (Set)
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
|
||||||
|
|
||||||
-- | not very nice to replace the structured CFCat type with a simple string
|
-- | not very nice to replace the structured CFCat type with a simple string
|
||||||
@@ -134,8 +136,8 @@ ruleFun :: CFRule_ -> Fun
|
|||||||
ruleFun (CFRule _ _ n) = name2fun n
|
ruleFun (CFRule _ _ n) = name2fun n
|
||||||
|
|
||||||
-- | Checks if a symbol is a non-terminal of one of the given categories.
|
-- | Checks if a symbol is a non-terminal of one of the given categories.
|
||||||
catElem :: Eq c => Symbol c t -> [c] -> Bool
|
catElem :: Symbol Cat_ t -> Set Cat_ -> Bool
|
||||||
catElem s cs = symbol (`elem` cs) (const False) s
|
catElem s cs = symbol (`Set.member` cs) (const False) s
|
||||||
|
|
||||||
-- | Check if any of the categories used on the right-hand side
|
-- | Check if any of the categories used on the right-hand side
|
||||||
-- are in the given list of categories.
|
-- are in the given list of categories.
|
||||||
|
|||||||
Reference in New Issue
Block a user