diff --git a/src/GF/Data/Utilities.hs b/src/GF/Data/Utilities.hs index d7e6be2f0..50d1f5195 100644 --- a/src/GF/Data/Utilities.hs +++ b/src/GF/Data/Utilities.hs @@ -34,11 +34,6 @@ lookupList a [] = [] lookupList a (p:ps) | a == fst p = snd p : lookupList a ps | otherwise = lookupList a ps --- | Find the first list in a list of lists --- which contains the argument. -findSet :: Eq c => c -> [[c]] -> Maybe [c] -findSet x = find (x `elem`) - split :: [a] -> ([a], [a]) split (x : y : as) = (x:xs, y:ys) where (xs, ys) = split as diff --git a/src/GF/Speech/CFGToFiniteState.hs b/src/GF/Speech/CFGToFiniteState.hs index 89ec88872..25790786a 100644 --- a/src/GF/Speech/CFGToFiniteState.hs +++ b/src/GF/Speech/CFGToFiniteState.hs @@ -15,6 +15,10 @@ module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular) where import Data.List +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set import GF.Data.Utilities import GF.Formalism.CFG @@ -27,13 +31,19 @@ import GF.Speech.FiniteState import GF.Speech.Relation import GF.Speech.TransformCFG -import Debug.Trace +data MutRecSet = MutRecSet { + mrCats :: [Cat_], + mrNonRecRules :: [CFRule_], + mrRecRules :: [CFRule_], + mrIsRightRec :: Bool + } + + +type MutRecSets = Map Cat_ MutRecSet cfgToFA :: Options -> CGrammar -> DFA String cfgToFA opts = minimize . compileAutomaton start . makeSimpleRegular ---cfgToFA opts = trfa "minimal" . minimize . trfa "initial" . compileAutomaton start . makeSimpleRegular where start = getStartCat opts - trfa s fa = trace (s ++ ", states: " ++ show (length (states fa)) ++ ", transitions: " ++ show (length (transitions fa))) fa makeSimpleRegular :: CGrammar -> CFRules makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgToCFRules @@ -45,8 +55,9 @@ makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgTo makeRegular :: CFRules -> CFRules makeRegular g = groupProds $ concatMap trSet (mutRecCats True g) where trSet cs | allXLinear cs rs = rs - | otherwise = concatMap handleCat cs - where rs = catSetRules g cs + | otherwise = concatMap handleCat csl + where csl = Set.toList cs + rs = catSetRules g csl handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e ++ concatMap (makeRightLinearRules c) (catRules g c) where c' = newCat c @@ -62,7 +73,7 @@ makeRegular g = groupProds $ concatMap trSet (mutRecCats True g) -- | Get the sets of mutually recursive non-terminals for a grammar. mutRecCats :: Bool -- ^ If true, all categories will be in some set. -- If false, only recursive categories will be included. - -> CFRules -> [[Cat_]] + -> CFRules -> [Set Cat_] mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss] allCats = map fst g @@ -72,67 +83,88 @@ mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transit compileAutomaton :: Cat_ -- ^ Start category -> CFRules -> NFA Token -compileAutomaton start g = make_fa s [Cat start] f fa'' +compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa'' where fa = newFA () s = startState fa (fa',f) = newState () fa fa'' = addFinalState f fa' - ns = mutRecCats False g - -- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", - -- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997. - make_fa :: State -> [Symbol Cat_ Token] -> State + ns = mutRecSets g $ mutRecCats False g + +mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets +mutRecSets g = Map.fromList . concatMap mkMutRecSet + where + mkMutRecSet cs = [ (c,ms) | c <- csl ] + where csl = Set.toList cs + rs = catSetRules g csl + (nrs,rrs) = partition (ruleIsNonRecursive cs) rs + ms = MutRecSet { + mrCats = csl, + mrNonRecRules = nrs, + mrRecRules = rrs, + mrIsRightRec = all (isRightLinear cs) rrs + } + +-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\", +-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997. +make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State -> NFA Token -> NFA Token - make_fa q0 alpha q1 fa = +make_fa c@(g,ns) q0 alpha q1 fa = case alpha of [] -> newTransition q0 q1 Nothing fa [Tok t] -> newTransition q0 q1 (Just t) fa - [Cat a] -> case findSet a ns of + [Cat a] -> case Map.lookup a ns of -- a is recursive - Just ni -> let (fa',ss) = addStatesForCats ni fa - getState x = lookup' x ss - niRules = catSetRules g ni - (nrs,rs) = partition (ruleIsNonRecursive ni) niRules - in if all (isRightLinear ni) niRules - then - -- the set Ni is right-recursive or cyclic - let fa'' = foldFuns [make_fa (getState c) xs q1 | CFRule c xs _ <- nrs] fa' - fa''' = foldFuns [make_fa (getState c) xs (getState d) | CFRule c ss _ <- rs, - let (xs,Cat d) = (init ss,last ss)] fa'' - in newTransition q0 (getState a) Nothing fa''' - else - -- the set Ni is left-recursive - let fa'' = foldFuns [make_fa q0 xs (getState c) | CFRule c xs _ <- nrs] fa' - fa''' = foldFuns [make_fa (getState d) xs (getState c) | CFRule c (Cat d:xs) _ <- rs] fa'' - in newTransition (getState a) q1 Nothing fa''' + Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) -> + if mrIsRightRec n + then + -- the set Ni is right-recursive or cyclic + let fa'' = foldl (\ f (CFRule c xs _) -> make_fa_ (getState c) xs q1 f) fa' nrs + fa''' = foldl (\ f (CFRule c ss _) -> + let (xs,Cat d) = (init ss,last ss) + in make_fa_ (getState c) xs (getState d) f) fa'' rs + in newTransition q0 (getState a) Nothing fa''' + else + -- the set Ni is left-recursive + let fa'' = foldl (\f (CFRule c xs _) -> make_fa_ q0 xs (getState c) f) fa' nrs + fa''' = foldl (\f (CFRule c (Cat d:xs) _) -> make_fa_ (getState d) xs (getState c) f) fa'' rs + in newTransition (getState a) q1 Nothing fa''' + where + (fa',ss) = addStatesForCats ni fa + getState x = lookup' x ss -- a is not recursive Nothing -> let rs = catRules g a - in foldl (\fa -> \ (CFRule _ b _) -> make_fa q0 b q1 fa) fa rs + in foldl (\fa -> \ (CFRule _ b _) -> make_fa_ q0 b q1 fa) fa rs (x:beta) -> let (fa',q) = newState () fa - in make_fa q beta q1 $ make_fa q0 [x] q fa' - addStatesForCats [] fa = (fa,[]) - addStatesForCats (c:cs) fa = let (fa',s) = newState () fa - (fa'',ss) = addStatesForCats cs fa' - in (fa'',(c,s):ss) - ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs + in make_fa_ q beta q1 $! make_fa_ q0 [x] q fa' + where + make_fa_ = make_fa c + +addStatesForCats :: [Cat_] -> NFA Token -> (NFA Token, [(Cat_,State)]) +addStatesForCats cs fa = (fa', zip cs (map fst ns)) + where (fa', ns) = newStates (replicate (length cs) ()) fa + +ruleIsNonRecursive :: Set Cat_ -> CFRule_ -> Bool +ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs -noCatsInSet :: Eq c => [c] -> [Symbol c t] -> Bool + +noCatsInSet :: Set Cat_ -> [Symbol Cat_ t] -> Bool noCatsInSet cs = not . any (`catElem` cs) -- | Check if all the rules are right-linear, or all the rules are -- left-linear, with respect to given categories. -allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool +allXLinear :: Set Cat_ -> [CFRule_] -> Bool allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs -- | Checks if a context-free rule is right-linear. -isRightLinear :: Eq c => [c] -- ^ The categories to consider - -> CFRule c n t -- ^ The rule to check for right-linearity +isRightLinear :: Set Cat_ -- ^ The categories to consider + -> CFRule_ -- ^ The rule to check for right-linearity -> Bool isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs -- | Checks if a context-free rule is left-linear. -isLeftLinear :: Eq c => [c] -- ^ The categories to consider - -> CFRule c n t -- ^ The rule to check for right-linearity +isLeftLinear :: Set Cat_ -- ^ The categories to consider + -> CFRule_ -- ^ The rule to check for right-linearity -> Bool isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs diff --git a/src/GF/Speech/FiniteState.hs b/src/GF/Speech/FiniteState.hs index dfd5d941d..a2f8952cb 100644 --- a/src/GF/Speech/FiniteState.hs +++ b/src/GF/Speech/FiniteState.hs @@ -16,7 +16,8 @@ module GF.Speech.FiniteState (FA, State, NFA, DFA, states, transitions, newFA, addFinalState, - newState, newTransition, + newState, newStates, + newTransition, mapStates, mapTransitions, oneFinalState, moveLabelsToNodes, minimize, @@ -65,6 +66,10 @@ newState :: a -> FA n a b -> (FA n a b, n) newState x (FA g s ss) = (FA g' s ss, n) where (g',n) = newNode x g +newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)]) +newStates xs (FA g s ss) = (FA g' s ss, ns) + where (g',ns) = newNodes xs g + newTransition :: n -> n -> b -> FA n a b -> FA n a b newTransition f t l = onGraph (newEdge (f,t,l)) diff --git a/src/GF/Speech/Graph.hs b/src/GF/Speech/Graph.hs index 2f80ef0ca..7ebecdc34 100644 --- a/src/GF/Speech/Graph.hs +++ b/src/GF/Speech/Graph.hs @@ -13,7 +13,7 @@ ----------------------------------------------------------------------------- module GF.Speech.Graph ( Graph(..), Node, Edge, Incoming, Outgoing , newGraph, nodes, edges - , nmap, emap, newNode, newEdge, newEdges + , nmap, emap, newNode, newNodes, newEdge, newEdges , incoming, outgoing, getOutgoing , getFrom, getTo, getLabel , reverseGraph, renameNodes @@ -52,6 +52,11 @@ emap f (Graph c ns es) = Graph c ns [(x,y,f l) | (x,y,l) <- es] newNode :: a -> Graph n a b -> (Graph n a b,n) newNode l (Graph (c:cs) ns es) = (Graph cs ((c,l):ns) es, c) +newNodes :: [a] -> Graph n a b -> (Graph n a b,[Node n a]) +newNodes ls (Graph cs ns es) = (Graph cs' (ns'++ns) es, ns') + where (xs,cs') = splitAt (length ls) cs + ns' = zip xs ls + newEdge :: Edge n b -> Graph n a b -> Graph n a b newEdge e (Graph c ns es) = Graph c ns (e:es) diff --git a/src/GF/Speech/Relation.hs b/src/GF/Speech/Relation.hs index 2ad3faccb..c66a07d10 100644 --- a/src/GF/Speech/Relation.hs +++ b/src/GF/Speech/Relation.hs @@ -100,10 +100,10 @@ purgeEmpty r = Map.filter (not . Set.null) r -- | Get the equivalence classes from an equivalence relation. -equivalenceClasses :: Ord a => Rel a -> [[a]] +equivalenceClasses :: Ord a => Rel a -> [Set a] equivalenceClasses r = equivalenceClasses_ (Map.keys r) r where equivalenceClasses_ [] _ = [] - equivalenceClasses_ (x:xs) r = Set.toList ys:equivalenceClasses_ zs r + equivalenceClasses_ (x:xs) r = ys:equivalenceClasses_ zs r where ys = allRelated r x zs = [x' | x' <- xs, not (x' `Set.member` ys)] diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs index 39060206b..2c920cbda 100644 --- a/src/GF/Speech/TransformCFG.hs +++ b/src/GF/Speech/TransformCFG.hs @@ -37,6 +37,8 @@ import Control.Monad import Data.FiniteMap import Data.List import Data.Maybe (fromJust, fromMaybe) +import Data.Set (Set) +import qualified Data.Set as Set -- | not very nice to replace the structured CFCat type with a simple string @@ -134,8 +136,8 @@ ruleFun :: CFRule_ -> Fun ruleFun (CFRule _ _ n) = name2fun n -- | Checks if a symbol is a non-terminal of one of the given categories. -catElem :: Eq c => Symbol c t -> [c] -> Bool -catElem s cs = symbol (`elem` cs) (const False) s +catElem :: Symbol Cat_ t -> Set Cat_ -> Bool +catElem s cs = symbol (`Set.member` cs) (const False) s -- | Check if any of the categories used on the right-hand side -- are in the given list of categories.