forked from GitHub/gf-core
174 lines
7.4 KiB
Haskell
174 lines
7.4 KiB
Haskell
----------------------------------------------------------------------
|
|
-- |
|
|
-- Module : CFGToFiniteState
|
|
-- Maintainer : BB
|
|
-- Stability : (stable)
|
|
-- Portability : (portable)
|
|
--
|
|
-- > CVS $Date: 2005/11/10 16:43:44 $
|
|
-- > CVS $Author: bringert $
|
|
-- > CVS $Revision: 1.6 $
|
|
--
|
|
-- Approximates CFGs with finite state networks.
|
|
-----------------------------------------------------------------------------
|
|
|
|
module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular) where
|
|
|
|
import Data.List
|
|
import Data.Map (Map)
|
|
import qualified Data.Map as Map
|
|
import Data.Set (Set)
|
|
import qualified Data.Set as Set
|
|
|
|
import GF.Data.Utilities
|
|
import GF.Formalism.CFG
|
|
import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
|
|
import GF.Conversion.Types
|
|
import GF.Infra.Ident (Ident)
|
|
import GF.Infra.Option (Options)
|
|
|
|
import GF.Speech.FiniteState
|
|
import GF.Speech.Relation
|
|
import GF.Speech.TransformCFG
|
|
|
|
data MutRecSet = MutRecSet {
|
|
mrCats :: [Cat_],
|
|
mrNonRecRules :: [CFRule_],
|
|
mrRecRules :: [CFRule_],
|
|
mrIsRightRec :: Bool
|
|
}
|
|
|
|
|
|
type MutRecSets = Map Cat_ MutRecSet
|
|
|
|
cfgToFA :: Options -> CGrammar -> DFA String
|
|
cfgToFA opts = minimize . compileAutomaton start . makeSimpleRegular
|
|
where start = getStartCat opts
|
|
|
|
makeSimpleRegular :: CGrammar -> CFRules
|
|
makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgToCFRules
|
|
|
|
-- Use the transformation algorithm from \"Regular Approximation of Context-free
|
|
-- Grammars through Approximation\", Mohri and Nederhof, 2000
|
|
-- to create an over-generating regular frammar for a context-free
|
|
-- grammar
|
|
makeRegular :: CFRules -> CFRules
|
|
makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
|
|
where trSet cs | allXLinear cs rs = rs
|
|
| otherwise = concatMap handleCat csl
|
|
where csl = Set.toList cs
|
|
rs = catSetRules g csl
|
|
handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
|
|
++ concatMap (makeRightLinearRules c) (catRules g c)
|
|
where c' = newCat c
|
|
makeRightLinearRules b' (CFRule c ss n) =
|
|
case ys of
|
|
[] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left
|
|
(Cat b:zs) -> CFRule b' (xs ++ [Cat b]) n
|
|
: makeRightLinearRules (newCat b) (CFRule c zs n)
|
|
where (xs,ys) = break (`catElem` cs) ss
|
|
newCat c = c ++ "$"
|
|
|
|
|
|
-- | Get the sets of mutually recursive non-terminals for a grammar.
|
|
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
|
|
-- If false, only recursive categories will be included.
|
|
-> CFRules -> [Set Cat_]
|
|
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
|
|
where r = mkRel [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
|
|
allCats = map fst g
|
|
refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation
|
|
|
|
-- Convert a strongly regular grammar to a finite automaton.
|
|
compileAutomaton :: Cat_ -- ^ Start category
|
|
-> CFRules
|
|
-> NFA Token
|
|
compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa''
|
|
where
|
|
fa = newFA ()
|
|
s = startState fa
|
|
(fa',f) = newState () fa
|
|
fa'' = addFinalState f fa'
|
|
ns = mutRecSets g $ mutRecCats False g
|
|
|
|
mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets
|
|
mutRecSets g = Map.fromList . concatMap mkMutRecSet
|
|
where
|
|
mkMutRecSet cs = [ (c,ms) | c <- csl ]
|
|
where csl = Set.toList cs
|
|
rs = catSetRules g csl
|
|
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
|
|
ms = MutRecSet {
|
|
mrCats = csl,
|
|
mrNonRecRules = nrs,
|
|
mrRecRules = rrs,
|
|
mrIsRightRec = all (isRightLinear cs) rrs
|
|
}
|
|
|
|
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
|
-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
|
|
make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State
|
|
-> NFA Token -> NFA Token
|
|
make_fa c@(g,ns) q0 alpha q1 fa =
|
|
case alpha of
|
|
[] -> newTransition q0 q1 Nothing fa
|
|
[Tok t] -> newTransition q0 q1 (Just t) fa
|
|
[Cat a] -> case Map.lookup a ns of
|
|
-- a is recursive
|
|
Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
|
|
if mrIsRightRec n
|
|
then
|
|
-- the set Ni is right-recursive or cyclic
|
|
let fa'' = foldl (\ f (CFRule c xs _) -> make_fa_ (getState c) xs q1 f) fa' nrs
|
|
fa''' = foldl (\ f (CFRule c ss _) ->
|
|
let (xs,Cat d) = (init ss,last ss)
|
|
in make_fa_ (getState c) xs (getState d) f) fa'' rs
|
|
in newTransition q0 (getState a) Nothing fa'''
|
|
else
|
|
-- the set Ni is left-recursive
|
|
let fa'' = foldl (\f (CFRule c xs _) -> make_fa_ q0 xs (getState c) f) fa' nrs
|
|
fa''' = foldl (\f (CFRule c (Cat d:xs) _) -> make_fa_ (getState d) xs (getState c) f) fa'' rs
|
|
in newTransition (getState a) q1 Nothing fa'''
|
|
where
|
|
(fa',stateMap) = addStatesForCats ni fa
|
|
getState x = Map.findWithDefault
|
|
(error $ "CFGToFiniteState: No state for " ++ x)
|
|
x stateMap
|
|
-- a is not recursive
|
|
Nothing -> let rs = catRules g a
|
|
in foldl (\fa -> \ (CFRule _ b _) -> make_fa_ q0 b q1 fa) fa rs
|
|
(x:beta) -> let (fa',q) = newState () fa
|
|
in make_fa_ q beta q1 $! make_fa_ q0 [x] q fa'
|
|
where
|
|
make_fa_ = make_fa c
|
|
|
|
addStatesForCats :: [Cat_] -> NFA Token -> (NFA Token, Map Cat_ State)
|
|
addStatesForCats cs fa = (fa', m)
|
|
where (fa', ns) = newStates (replicate (length cs) ()) fa
|
|
m = Map.fromList (zip cs (map fst ns))
|
|
|
|
ruleIsNonRecursive :: Set Cat_ -> CFRule_ -> Bool
|
|
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
|
|
|
|
|
|
|
|
noCatsInSet :: Set Cat_ -> [Symbol Cat_ t] -> Bool
|
|
noCatsInSet cs = not . any (`catElem` cs)
|
|
|
|
-- | Check if all the rules are right-linear, or all the rules are
|
|
-- left-linear, with respect to given categories.
|
|
allXLinear :: Set Cat_ -> [CFRule_] -> Bool
|
|
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
|
|
|
|
-- | Checks if a context-free rule is right-linear.
|
|
isRightLinear :: Set Cat_ -- ^ The categories to consider
|
|
-> CFRule_ -- ^ The rule to check for right-linearity
|
|
-> Bool
|
|
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
|
|
|
|
-- | Checks if a context-free rule is left-linear.
|
|
isLeftLinear :: Set Cat_ -- ^ The categories to consider
|
|
-> CFRule_ -- ^ The rule to check for right-linearity
|
|
-> Bool
|
|
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
|