mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-29 14:32:51 -06:00
Completed unoptimized SLF generation.
This commit is contained in:
171
src/GF/Speech/CFGToFiniteState.hs
Normal file
171
src/GF/Speech/CFGToFiniteState.hs
Normal file
@@ -0,0 +1,171 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : CFGToFiniteState
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/09/12 15:46:44 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
--
|
||||
-- Approximates CFGs with finite state networks.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.CFGToFiniteState (cfgToFA) where
|
||||
|
||||
import Data.List
|
||||
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
|
||||
import GF.Conversion.Types
|
||||
import GF.Infra.Ident (Ident)
|
||||
import GF.Infra.Option (Options)
|
||||
|
||||
import GF.Speech.FiniteState
|
||||
import GF.Speech.TransformCFG
|
||||
|
||||
cfgToFA :: Ident -- ^ Grammar name
|
||||
-> Options -> CGrammar -> FA () (Maybe String)
|
||||
cfgToFA name opts cfg = minimize $ compileAutomaton start rgr
|
||||
where start = getStartCat opts
|
||||
rgr = makeRegular $ removeIdenticalRules $ removeEmptyCats $ cfgToCFRules cfg
|
||||
|
||||
|
||||
-- Use the transformation algorithm from \"Regular Approximation of Context-free
|
||||
-- Grammars through Approximation\", Mohri and Nederhof, 2000
|
||||
-- to create an over-generating regular frammar for a context-free
|
||||
-- grammar
|
||||
makeRegular :: CFRules -> CFRules
|
||||
makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
|
||||
where trSet cs | allXLinear cs rs = rs
|
||||
| otherwise = concatMap handleCat cs
|
||||
where rs = catSetRules g cs
|
||||
handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
|
||||
++ concatMap (makeRightLinearRules c) (catRules g c)
|
||||
where c' = newCat c
|
||||
makeRightLinearRules b' (CFRule c ss n) =
|
||||
case ys of
|
||||
[] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left
|
||||
(Cat b:zs) -> CFRule b' (xs ++ [Cat b]) n
|
||||
: makeRightLinearRules (newCat b) (CFRule c zs n)
|
||||
where (xs,ys) = break (`catElem` cs) ss
|
||||
newCat c = c ++ "$"
|
||||
|
||||
|
||||
-- | Get the sets of mutually recursive non-terminals for a grammar.
|
||||
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
|
||||
-- If false, only recursive categories will be included.
|
||||
-> CFRules -> [[Cat_]]
|
||||
mutRecCats incAll g = equivalenceClasses $ symmetricSubrelation $ transitiveClosure r'
|
||||
where r = nub [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
|
||||
allCats = map fst g
|
||||
r' = (if incAll then reflexiveClosure allCats else id) r
|
||||
|
||||
-- Convert a strongly regular grammar to a finite automaton.
|
||||
compileAutomaton :: Cat_ -- ^ Start category
|
||||
-> CFRules
|
||||
-> FA () (Maybe Token)
|
||||
compileAutomaton start g = make_fa s [Cat start] f fa''
|
||||
where fa = newFA ()
|
||||
s = startState fa
|
||||
(fa',f) = newState () fa
|
||||
fa'' = addFinalState f fa'
|
||||
ns = mutRecCats False g
|
||||
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
||||
-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
|
||||
make_fa :: State -> [Symbol Cat_ Token] -> State
|
||||
-> FA () (Maybe Token) -> FA () (Maybe Token)
|
||||
make_fa q0 alpha q1 fa =
|
||||
case alpha of
|
||||
[] -> newTransition q0 q1 Nothing fa
|
||||
[Tok t] -> newTransition q0 q1 (Just t) fa
|
||||
[Cat a] -> case findSet a ns of
|
||||
-- a is recursive
|
||||
Just ni -> let (fa',ss) = addStatesForCats ni fa
|
||||
getState x = lookup' x ss
|
||||
niRules = catSetRules g ni
|
||||
(nrs,rs) = partition (ruleIsNonRecursive ni) niRules
|
||||
in if all (isRightLinear ni) niRules then
|
||||
-- the set Ni is right-recursive or cyclic
|
||||
let fa'' = foldFuns [make_fa (getState c) xs q1 | CFRule c xs _ <- nrs] fa'
|
||||
fa''' = foldFuns [make_fa (getState c) xs (getState d) | CFRule c ss _ <- rs,
|
||||
let (xs,Cat d) = (init ss,last ss)] fa''
|
||||
in newTransition q0 (getState a) Nothing fa'''
|
||||
else
|
||||
-- the set Ni is left-recursive
|
||||
let fa'' = foldFuns [make_fa q0 xs (getState c) | CFRule c xs _ <- nrs] fa'
|
||||
fa''' = foldFuns [make_fa (getState d) xs (getState c) | CFRule c (Cat d:xs) _ <- rs] fa''
|
||||
in newTransition (getState a) q1 Nothing fa'''
|
||||
-- a is not recursive
|
||||
Nothing -> let rs = catRules g a
|
||||
in foldr (\ (CFRule _ b _) -> make_fa q0 b q1) fa rs
|
||||
(x:beta) -> let (fa',q) = newState () fa
|
||||
in make_fa q beta q1 $ make_fa q0 [x] q fa'
|
||||
addStatesForCats [] fa = (fa,[])
|
||||
addStatesForCats (c:cs) fa = let (fa',s) = newState () fa
|
||||
(fa'',ss) = addStatesForCats cs fa'
|
||||
in (fa'',(c,s):ss)
|
||||
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
|
||||
|
||||
|
||||
noCatsInSet :: Eq c => [c] -> [Symbol c t] -> Bool
|
||||
noCatsInSet cs = not . any (`catElem` cs)
|
||||
|
||||
-- | Check if all the rules are right-linear, or all the rules are
|
||||
-- left-linear, with respect to given categories.
|
||||
allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool
|
||||
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
|
||||
|
||||
-- | Checks if a context-free rule is right-linear.
|
||||
isRightLinear :: Eq c => [c] -- ^ The categories to consider
|
||||
-> CFRule c n t -- ^ The rule to check for right-linearity
|
||||
-> Bool
|
||||
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
|
||||
|
||||
-- | Checks if a context-free rule is left-linear.
|
||||
isLeftLinear :: Eq c => [c] -- ^ The categories to consider
|
||||
-> CFRule c n t -- ^ The rule to check for right-linearity
|
||||
-> Bool
|
||||
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
|
||||
|
||||
|
||||
--
|
||||
-- * Relations
|
||||
--
|
||||
|
||||
-- FIXME: these could use a more efficent data structures and algorithms.
|
||||
|
||||
type Rel a = [(a,a)]
|
||||
|
||||
isRelatedTo :: Eq a => Rel a -> a -> a -> Bool
|
||||
isRelatedTo r x y = (x,y) `elem` r
|
||||
|
||||
transitiveClosure :: Eq a => Rel a -> Rel a
|
||||
transitiveClosure r = fix (\r -> r `union` [ (x,w) | (x,y) <- r, (z,w) <- r, y == z ]) r
|
||||
|
||||
reflexiveClosure :: Eq a => [a] -- ^ The set over which the relation is defined.
|
||||
-> Rel a -> Rel a
|
||||
reflexiveClosure u r = [(x,x) | x <- u] `union` r
|
||||
|
||||
symmetricSubrelation :: Eq a => Rel a -> Rel a
|
||||
symmetricSubrelation r = [p | p@(x,y) <- r, (y,x) `elem` r]
|
||||
|
||||
-- | Get the equivalence classes from an equivalence relation. Since
|
||||
-- the relation is relexive, the set can be recoved from the relation.
|
||||
equivalenceClasses :: Eq a => Rel a -> [[a]]
|
||||
equivalenceClasses r = equivalenceClasses_ (nub (map fst r)) r
|
||||
where equivalenceClasses_ [] _ = []
|
||||
equivalenceClasses_ (x:xs) r = (x:ys):equivalenceClasses_ zs r
|
||||
where (ys,zs) = partition (isRelatedTo r x) xs
|
||||
|
||||
--
|
||||
-- * Utilities
|
||||
--
|
||||
|
||||
foldFuns :: [a -> a] -> a -> a
|
||||
foldFuns fs x = foldr ($) x fs
|
||||
|
||||
safeInit :: [a] -> [a]
|
||||
safeInit [] = []
|
||||
safeInit xs = init xs
|
||||
Reference in New Issue
Block a user