forked from GitHub/gf-core
Completed unoptimized SLF generation.
This commit is contained in:
171
src/GF/Speech/CFGToFiniteState.hs
Normal file
171
src/GF/Speech/CFGToFiniteState.hs
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : CFGToFiniteState
|
||||||
|
-- Maintainer : BB
|
||||||
|
-- Stability : (stable)
|
||||||
|
-- Portability : (portable)
|
||||||
|
--
|
||||||
|
-- > CVS $Date: 2005/09/12 15:46:44 $
|
||||||
|
-- > CVS $Author: bringert $
|
||||||
|
-- > CVS $Revision: 1.1 $
|
||||||
|
--
|
||||||
|
-- Approximates CFGs with finite state networks.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
module GF.Speech.CFGToFiniteState (cfgToFA) where
|
||||||
|
|
||||||
|
import Data.List
|
||||||
|
|
||||||
|
import GF.Formalism.CFG
|
||||||
|
import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
|
||||||
|
import GF.Conversion.Types
|
||||||
|
import GF.Infra.Ident (Ident)
|
||||||
|
import GF.Infra.Option (Options)
|
||||||
|
|
||||||
|
import GF.Speech.FiniteState
|
||||||
|
import GF.Speech.TransformCFG
|
||||||
|
|
||||||
|
cfgToFA :: Ident -- ^ Grammar name
|
||||||
|
-> Options -> CGrammar -> FA () (Maybe String)
|
||||||
|
cfgToFA name opts cfg = minimize $ compileAutomaton start rgr
|
||||||
|
where start = getStartCat opts
|
||||||
|
rgr = makeRegular $ removeIdenticalRules $ removeEmptyCats $ cfgToCFRules cfg
|
||||||
|
|
||||||
|
|
||||||
|
-- Use the transformation algorithm from \"Regular Approximation of Context-free
|
||||||
|
-- Grammars through Approximation\", Mohri and Nederhof, 2000
|
||||||
|
-- to create an over-generating regular frammar for a context-free
|
||||||
|
-- grammar
|
||||||
|
makeRegular :: CFRules -> CFRules
|
||||||
|
makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
|
||||||
|
where trSet cs | allXLinear cs rs = rs
|
||||||
|
| otherwise = concatMap handleCat cs
|
||||||
|
where rs = catSetRules g cs
|
||||||
|
handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
|
||||||
|
++ concatMap (makeRightLinearRules c) (catRules g c)
|
||||||
|
where c' = newCat c
|
||||||
|
makeRightLinearRules b' (CFRule c ss n) =
|
||||||
|
case ys of
|
||||||
|
[] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left
|
||||||
|
(Cat b:zs) -> CFRule b' (xs ++ [Cat b]) n
|
||||||
|
: makeRightLinearRules (newCat b) (CFRule c zs n)
|
||||||
|
where (xs,ys) = break (`catElem` cs) ss
|
||||||
|
newCat c = c ++ "$"
|
||||||
|
|
||||||
|
|
||||||
|
-- | Get the sets of mutually recursive non-terminals for a grammar.
|
||||||
|
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
|
||||||
|
-- If false, only recursive categories will be included.
|
||||||
|
-> CFRules -> [[Cat_]]
|
||||||
|
mutRecCats incAll g = equivalenceClasses $ symmetricSubrelation $ transitiveClosure r'
|
||||||
|
where r = nub [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
|
||||||
|
allCats = map fst g
|
||||||
|
r' = (if incAll then reflexiveClosure allCats else id) r
|
||||||
|
|
||||||
|
-- Convert a strongly regular grammar to a finite automaton.
|
||||||
|
compileAutomaton :: Cat_ -- ^ Start category
|
||||||
|
-> CFRules
|
||||||
|
-> FA () (Maybe Token)
|
||||||
|
compileAutomaton start g = make_fa s [Cat start] f fa''
|
||||||
|
where fa = newFA ()
|
||||||
|
s = startState fa
|
||||||
|
(fa',f) = newState () fa
|
||||||
|
fa'' = addFinalState f fa'
|
||||||
|
ns = mutRecCats False g
|
||||||
|
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
||||||
|
-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
|
||||||
|
make_fa :: State -> [Symbol Cat_ Token] -> State
|
||||||
|
-> FA () (Maybe Token) -> FA () (Maybe Token)
|
||||||
|
make_fa q0 alpha q1 fa =
|
||||||
|
case alpha of
|
||||||
|
[] -> newTransition q0 q1 Nothing fa
|
||||||
|
[Tok t] -> newTransition q0 q1 (Just t) fa
|
||||||
|
[Cat a] -> case findSet a ns of
|
||||||
|
-- a is recursive
|
||||||
|
Just ni -> let (fa',ss) = addStatesForCats ni fa
|
||||||
|
getState x = lookup' x ss
|
||||||
|
niRules = catSetRules g ni
|
||||||
|
(nrs,rs) = partition (ruleIsNonRecursive ni) niRules
|
||||||
|
in if all (isRightLinear ni) niRules then
|
||||||
|
-- the set Ni is right-recursive or cyclic
|
||||||
|
let fa'' = foldFuns [make_fa (getState c) xs q1 | CFRule c xs _ <- nrs] fa'
|
||||||
|
fa''' = foldFuns [make_fa (getState c) xs (getState d) | CFRule c ss _ <- rs,
|
||||||
|
let (xs,Cat d) = (init ss,last ss)] fa''
|
||||||
|
in newTransition q0 (getState a) Nothing fa'''
|
||||||
|
else
|
||||||
|
-- the set Ni is left-recursive
|
||||||
|
let fa'' = foldFuns [make_fa q0 xs (getState c) | CFRule c xs _ <- nrs] fa'
|
||||||
|
fa''' = foldFuns [make_fa (getState d) xs (getState c) | CFRule c (Cat d:xs) _ <- rs] fa''
|
||||||
|
in newTransition (getState a) q1 Nothing fa'''
|
||||||
|
-- a is not recursive
|
||||||
|
Nothing -> let rs = catRules g a
|
||||||
|
in foldr (\ (CFRule _ b _) -> make_fa q0 b q1) fa rs
|
||||||
|
(x:beta) -> let (fa',q) = newState () fa
|
||||||
|
in make_fa q beta q1 $ make_fa q0 [x] q fa'
|
||||||
|
addStatesForCats [] fa = (fa,[])
|
||||||
|
addStatesForCats (c:cs) fa = let (fa',s) = newState () fa
|
||||||
|
(fa'',ss) = addStatesForCats cs fa'
|
||||||
|
in (fa'',(c,s):ss)
|
||||||
|
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
|
||||||
|
|
||||||
|
|
||||||
|
noCatsInSet :: Eq c => [c] -> [Symbol c t] -> Bool
|
||||||
|
noCatsInSet cs = not . any (`catElem` cs)
|
||||||
|
|
||||||
|
-- | Check if all the rules are right-linear, or all the rules are
|
||||||
|
-- left-linear, with respect to given categories.
|
||||||
|
allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool
|
||||||
|
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
|
||||||
|
|
||||||
|
-- | Checks if a context-free rule is right-linear.
|
||||||
|
isRightLinear :: Eq c => [c] -- ^ The categories to consider
|
||||||
|
-> CFRule c n t -- ^ The rule to check for right-linearity
|
||||||
|
-> Bool
|
||||||
|
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
|
||||||
|
|
||||||
|
-- | Checks if a context-free rule is left-linear.
|
||||||
|
isLeftLinear :: Eq c => [c] -- ^ The categories to consider
|
||||||
|
-> CFRule c n t -- ^ The rule to check for right-linearity
|
||||||
|
-> Bool
|
||||||
|
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
|
||||||
|
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Relations
|
||||||
|
--
|
||||||
|
|
||||||
|
-- FIXME: these could use a more efficent data structures and algorithms.
|
||||||
|
|
||||||
|
type Rel a = [(a,a)]
|
||||||
|
|
||||||
|
isRelatedTo :: Eq a => Rel a -> a -> a -> Bool
|
||||||
|
isRelatedTo r x y = (x,y) `elem` r
|
||||||
|
|
||||||
|
transitiveClosure :: Eq a => Rel a -> Rel a
|
||||||
|
transitiveClosure r = fix (\r -> r `union` [ (x,w) | (x,y) <- r, (z,w) <- r, y == z ]) r
|
||||||
|
|
||||||
|
reflexiveClosure :: Eq a => [a] -- ^ The set over which the relation is defined.
|
||||||
|
-> Rel a -> Rel a
|
||||||
|
reflexiveClosure u r = [(x,x) | x <- u] `union` r
|
||||||
|
|
||||||
|
symmetricSubrelation :: Eq a => Rel a -> Rel a
|
||||||
|
symmetricSubrelation r = [p | p@(x,y) <- r, (y,x) `elem` r]
|
||||||
|
|
||||||
|
-- | Get the equivalence classes from an equivalence relation. Since
|
||||||
|
-- the relation is relexive, the set can be recoved from the relation.
|
||||||
|
equivalenceClasses :: Eq a => Rel a -> [[a]]
|
||||||
|
equivalenceClasses r = equivalenceClasses_ (nub (map fst r)) r
|
||||||
|
where equivalenceClasses_ [] _ = []
|
||||||
|
equivalenceClasses_ (x:xs) r = (x:ys):equivalenceClasses_ zs r
|
||||||
|
where (ys,zs) = partition (isRelatedTo r x) xs
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Utilities
|
||||||
|
--
|
||||||
|
|
||||||
|
foldFuns :: [a -> a] -> a -> a
|
||||||
|
foldFuns fs x = foldr ($) x fs
|
||||||
|
|
||||||
|
safeInit :: [a] -> [a]
|
||||||
|
safeInit [] = []
|
||||||
|
safeInit xs = init xs
|
||||||
@@ -1,9 +1,22 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : FiniteState
|
||||||
|
-- Maintainer : BB
|
||||||
|
-- Stability : (stable)
|
||||||
|
-- Portability : (portable)
|
||||||
|
--
|
||||||
|
-- > CVS $Date: 2005/09/12 15:46:44 $
|
||||||
|
-- > CVS $Author: bringert $
|
||||||
|
-- > CVS $Revision: 1.3 $
|
||||||
|
--
|
||||||
|
-- A simple finite state network module.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
module GF.Speech.FiniteState (FA, State,
|
module GF.Speech.FiniteState (FA, State,
|
||||||
startState, finalStates,
|
startState, finalStates,
|
||||||
states, transitions,
|
states, transitions,
|
||||||
newFA, addFinalState,
|
newFA, addFinalState,
|
||||||
newState, newTrans,
|
newState, newTransition, newTransitions,
|
||||||
moveLabelsToNodes) where
|
moveLabelsToNodes, minimize, asGraph) where
|
||||||
|
|
||||||
import Data.Graph.Inductive
|
import Data.Graph.Inductive
|
||||||
import Data.List (nub,partition)
|
import Data.List (nub,partition)
|
||||||
@@ -41,8 +54,20 @@ newState :: a -> FA a b -> (FA a b, State)
|
|||||||
newState x (FA g s ss) = (FA g' s ss, n)
|
newState x (FA g s ss) = (FA g' s ss, n)
|
||||||
where (g',n) = addNode x g
|
where (g',n) = addNode x g
|
||||||
|
|
||||||
newTrans :: Node -> Node -> b -> FA a b -> FA a b
|
newTransition :: Node -> Node -> b -> FA a b -> FA a b
|
||||||
newTrans f t l = onGraph (insEdge (f,t,l))
|
newTransition f t l = onGraph (insEdge (f,t,l))
|
||||||
|
|
||||||
|
newTransitions :: [(Node,Node,b)] -> FA a b -> FA a b
|
||||||
|
newTransitions ts = onGraph (insEdges ts)
|
||||||
|
|
||||||
|
mapStates :: (a -> c) -> FA a b -> FA c b
|
||||||
|
mapStates f (FA g s ss) = FA (nmap f g) s ss
|
||||||
|
|
||||||
|
asGraph :: FA a b -> Gr a b
|
||||||
|
asGraph (FA g _ _) = g
|
||||||
|
|
||||||
|
minimize :: FA () (Maybe a) -> FA () (Maybe a)
|
||||||
|
minimize = onGraph mimimizeGr1
|
||||||
|
|
||||||
--
|
--
|
||||||
-- * Graph functions
|
-- * Graph functions
|
||||||
@@ -111,6 +136,17 @@ ledgeToEdge (f,t,_) = (f,t)
|
|||||||
addContexts :: DynGraph gr => [Context a b] -> gr a b -> gr a b
|
addContexts :: DynGraph gr => [Context a b] -> gr a b -> gr a b
|
||||||
addContexts cs gr = foldr (&) gr cs
|
addContexts cs gr = foldr (&) gr cs
|
||||||
|
|
||||||
|
mimimizeGr1 :: DynGraph gr => gr () (Maybe a) -> gr () (Maybe a)
|
||||||
|
mimimizeGr1 = removeEmptyLoops
|
||||||
|
|
||||||
|
removeEmptyLoops :: DynGraph gr => gr () (Maybe a) -> gr () (Maybe a)
|
||||||
|
removeEmptyLoops = gmap (\ (i,n,(),o) -> (filter (r n) i,n,(),filter (r n) o))
|
||||||
|
where r n (Nothing,n') | n' == n = False
|
||||||
|
r _ _ = True
|
||||||
|
|
||||||
|
mimimizeGr2 :: DynGraph gr => gr (Maybe a) () -> gr (Maybe a) ()
|
||||||
|
mimimizeGr2 gr = gr
|
||||||
|
|
||||||
--
|
--
|
||||||
-- * Utilities
|
-- * Utilities
|
||||||
--
|
--
|
||||||
|
|||||||
@@ -5,9 +5,9 @@
|
|||||||
-- Stability : (stable)
|
-- Stability : (stable)
|
||||||
-- Portability : (portable)
|
-- Portability : (portable)
|
||||||
--
|
--
|
||||||
-- > CVS $Date: 2005/09/07 14:21:30 $
|
-- > CVS $Date: 2005/09/12 15:46:44 $
|
||||||
-- > CVS $Author: bringert $
|
-- > CVS $Author: bringert $
|
||||||
-- > CVS $Revision: 1.3 $
|
-- > CVS $Revision: 1.4 $
|
||||||
--
|
--
|
||||||
-- This module converts a CFG to an SLF finite-state network
|
-- This module converts a CFG to an SLF finite-state network
|
||||||
-- for use with the ATK recognizer. The SLF format is described
|
-- for use with the ATK recognizer. The SLF format is described
|
||||||
@@ -18,10 +18,11 @@
|
|||||||
-- categories in the grammar
|
-- categories in the grammar
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
module GF.Speech.PrSLF (slfPrinter) where
|
module GF.Speech.PrSLF (slfPrinter,slfGraphvizPrinter,faGraphvizPrinter) where
|
||||||
|
|
||||||
import GF.Speech.SRG
|
import GF.Speech.SRG
|
||||||
import GF.Speech.TransformCFG
|
import GF.Speech.TransformCFG
|
||||||
|
import GF.Speech.CFGToFiniteState
|
||||||
import GF.Speech.FiniteState
|
import GF.Speech.FiniteState
|
||||||
import GF.Infra.Ident
|
import GF.Infra.Ident
|
||||||
|
|
||||||
@@ -34,6 +35,9 @@ import GF.Infra.Option
|
|||||||
import Data.Char (toUpper,toLower)
|
import Data.Char (toUpper,toLower)
|
||||||
import Data.Maybe (fromMaybe)
|
import Data.Maybe (fromMaybe)
|
||||||
|
|
||||||
|
import Data.Graph.Inductive (emap,nmap)
|
||||||
|
import Data.Graph.Inductive.Graphviz
|
||||||
|
|
||||||
data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
|
data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
|
||||||
|
|
||||||
data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord }
|
data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord }
|
||||||
@@ -46,31 +50,35 @@ data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int }
|
|||||||
|
|
||||||
slfPrinter :: Ident -- ^ Grammar name
|
slfPrinter :: Ident -- ^ Grammar name
|
||||||
-> Options -> CGrammar -> String
|
-> Options -> CGrammar -> String
|
||||||
slfPrinter name opts cfg = prSLF (regularToSLF start rgr) ""
|
slfPrinter name opts cfg = prSLF (automatonToSLF $ moveLabelsToNodes $ cfgToFA name opts cfg) ""
|
||||||
where start = getStartCat opts
|
|
||||||
rgr = makeRegular $ removeEmptyCats $ cfgToCFRules cfg
|
|
||||||
|
|
||||||
regularToSLF :: String -> CFRules -> SLF
|
slfGraphvizPrinter :: Ident -- ^ Grammar name
|
||||||
regularToSLF s rs = automatonToSLF $ compileAutomaton s rs
|
-> Options -> CGrammar -> String
|
||||||
|
slfGraphvizPrinter name opts cfg =
|
||||||
|
graphviz (nmap (fromMaybe "") $ asGraph $ moveLabelsToNodes $ cfgToFA name opts cfg) (prIdent name) (8.5,11.0) (1,1) Landscape
|
||||||
|
|
||||||
automatonToSLF :: FA () (Maybe String) -> SLF
|
faGraphvizPrinter :: Ident -- ^ Grammar name
|
||||||
|
-> Options -> CGrammar -> String
|
||||||
|
faGraphvizPrinter name opts cfg =
|
||||||
|
graphviz (nmap (const "") $ emap (fromMaybe "") $ asGraph $ cfgToFA name opts cfg) (prIdent name) (8.5,11.0) (1,1) Landscape
|
||||||
|
|
||||||
|
automatonToSLF :: FA (Maybe String) () -> SLF
|
||||||
automatonToSLF fa =
|
automatonToSLF fa =
|
||||||
SLF { slfNodes = map mkSLFNode (states fa'),
|
SLF { slfNodes = map mkSLFNode (states fa),
|
||||||
slfEdges = zipWith mkSLFEdge [0..] (transitions fa') }
|
slfEdges = zipWith mkSLFEdge [0..] (transitions fa) }
|
||||||
where fa' = moveLabelsToNodes fa
|
where mkSLFNode (i,w) = SLFNode { nId = i, nWord = w }
|
||||||
mkSLFNode (i,w) = SLFNode { nId = i, nWord = w }
|
|
||||||
mkSLFEdge i (f,t,()) = SLFEdge { eId = i, eStart = f, eEnd = t }
|
mkSLFEdge i (f,t,()) = SLFEdge { eId = i, eStart = f, eEnd = t }
|
||||||
|
|
||||||
|
|
||||||
prSLF :: SLF -> ShowS
|
prSLF :: SLF -> ShowS
|
||||||
prSLF (SLF { slfNodes = ns, slfEdges = es}) = header . unlinesS (map prNode ns) . unlinesS (map prEdge es)
|
prSLF (SLF { slfNodes = ns, slfEdges = es})
|
||||||
|
= header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl
|
||||||
where
|
where
|
||||||
header = showString "VERSION=1.0" . nl
|
header = showString "VERSION=1.0" . nl
|
||||||
. prFields [("N",show (length ns)),("L", show (length es))] . nl
|
. prFields [("N",show (length ns)),("L", show (length es))] . nl
|
||||||
prNode n = prFields [("I",show (nId n)),("W",showWord (nWord n))]
|
prNode n = prFields [("I",show (nId n)),("W",showWord (nWord n))]
|
||||||
prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))]
|
prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))]
|
||||||
|
|
||||||
|
|
||||||
showWord :: SLFWord -> String
|
showWord :: SLFWord -> String
|
||||||
showWord Nothing = "!NULL"
|
showWord Nothing = "!NULL"
|
||||||
showWord (Just w) = w -- FIXME: convert words to upper case
|
showWord (Just w) = w -- FIXME: convert words to upper case
|
||||||
|
|||||||
@@ -5,9 +5,9 @@
|
|||||||
-- Stability : (stable)
|
-- Stability : (stable)
|
||||||
-- Portability : (portable)
|
-- Portability : (portable)
|
||||||
--
|
--
|
||||||
-- > CVS $Date: 2005/09/07 14:21:30 $
|
-- > CVS $Date: 2005/09/12 15:46:44 $
|
||||||
-- > CVS $Author: bringert $
|
-- > CVS $Author: bringert $
|
||||||
-- > CVS $Revision: 1.15 $
|
-- > CVS $Revision: 1.16 $
|
||||||
--
|
--
|
||||||
-- Representation of, conversion to, and utilities for
|
-- Representation of, conversion to, and utilities for
|
||||||
-- printing of a general Speech Recognition Grammar.
|
-- printing of a general Speech Recognition Grammar.
|
||||||
@@ -58,24 +58,18 @@ makeSRG i opts gr = SRG { grammarName = name,
|
|||||||
where
|
where
|
||||||
name = prIdent i
|
name = prIdent i
|
||||||
origStart = getStartCat opts
|
origStart = getStartCat opts
|
||||||
gr' = removeLeftRecursion $ removeEmptyCats $ cfgToCFRules gr
|
gr' = removeLeftRecursion $ removeIdenticalRules $ removeEmptyCats $ cfgToCFRules gr
|
||||||
(cats,cfgRules) = unzip gr'
|
(cats,cfgRules) = unzip gr'
|
||||||
names = mkCatNames name cats
|
names = mkCatNames name cats
|
||||||
|
|
||||||
cfgRulesToSRGRule :: FiniteMap String String -> [CFRule_] -> SRGRule
|
cfgRulesToSRGRule :: FiniteMap String String -> [CFRule_] -> SRGRule
|
||||||
cfgRulesToSRGRule names rs@(r:_) = SRGRule cat origCat rhs
|
cfgRulesToSRGRule names rs@(r:_) = SRGRule cat origCat rhs
|
||||||
where origCat = ruleCat r
|
where origCat = lhsCat r
|
||||||
cat = lookupFM_ names origCat
|
cat = lookupFM_ names origCat
|
||||||
rhs = nub $ map (map renameCat . ruleRhs) rs
|
rhs = nub $ map (map renameCat . ruleRhs) rs
|
||||||
renameCat (Cat c) = Cat (lookupFM_ names c)
|
renameCat (Cat c) = Cat (lookupFM_ names c)
|
||||||
renameCat t = t
|
renameCat t = t
|
||||||
|
|
||||||
ruleCat :: CFRule c n t -> c
|
|
||||||
ruleCat (CFRule c _ _) = c
|
|
||||||
|
|
||||||
ruleRhs :: CFRule c n t -> [Symbol c t]
|
|
||||||
ruleRhs (CFRule _ r _) = r
|
|
||||||
|
|
||||||
mkCatNames :: String -- ^ Category name prefix
|
mkCatNames :: String -- ^ Category name prefix
|
||||||
-> [String] -- ^ Original category names
|
-> [String] -- ^ Original category names
|
||||||
-> FiniteMap String String -- ^ Maps original names to SRG names
|
-> FiniteMap String String -- ^ Maps original names to SRG names
|
||||||
|
|||||||
@@ -5,9 +5,9 @@
|
|||||||
-- Stability : (stable)
|
-- Stability : (stable)
|
||||||
-- Portability : (portable)
|
-- Portability : (portable)
|
||||||
--
|
--
|
||||||
-- > CVS $Date: 2005/09/08 15:45:17 $
|
-- > CVS $Date: 2005/09/12 15:46:44 $
|
||||||
-- > CVS $Author: bringert $
|
-- > CVS $Author: bringert $
|
||||||
-- > CVS $Revision: 1.19 $
|
-- > CVS $Revision: 1.20 $
|
||||||
--
|
--
|
||||||
-- This module does some useful transformations on CFGs.
|
-- This module does some useful transformations on CFGs.
|
||||||
--
|
--
|
||||||
@@ -16,12 +16,12 @@
|
|||||||
-- peb thinks: most of this module should be moved to GF.Conversion...
|
-- peb thinks: most of this module should be moved to GF.Conversion...
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
module GF.Speech.TransformCFG (CFRule_, CFRules,
|
-- FIXME: lots of this stuff is used by CFGToFiniteState, thus
|
||||||
|
-- the missing explicit expot list.
|
||||||
|
module GF.Speech.TransformCFG {- (CFRule_, CFRules,
|
||||||
cfgToCFRules, getStartCat,
|
cfgToCFRules, getStartCat,
|
||||||
removeLeftRecursion,
|
removeLeftRecursion,
|
||||||
removeEmptyCats,
|
removeEmptyCats, removeIdenticalRules) -} where
|
||||||
makeRegular,
|
|
||||||
compileAutomaton) where
|
|
||||||
|
|
||||||
import GF.Infra.Ident
|
import GF.Infra.Ident
|
||||||
import GF.Formalism.CFG
|
import GF.Formalism.CFG
|
||||||
@@ -62,8 +62,6 @@ groupProds = fmToList . addListToFM_C (++) emptyFM . map (\r -> (lhsCat r,[r]))
|
|||||||
ungroupProds :: CFRules -> [CFRule_]
|
ungroupProds :: CFRules -> [CFRule_]
|
||||||
ungroupProds = concat . map snd
|
ungroupProds = concat . map snd
|
||||||
|
|
||||||
catRules :: CFRules -> Cat_ -> [CFRule_]
|
|
||||||
catRules rs c = fromMaybe [] (lookup c rs)
|
|
||||||
|
|
||||||
-- | Remove productions which use categories which have no productions
|
-- | Remove productions which use categories which have no productions
|
||||||
removeEmptyCats :: CFRules -> CFRules
|
removeEmptyCats :: CFRules -> CFRules
|
||||||
@@ -77,13 +75,18 @@ removeEmptyCats = fix removeEmptyCats'
|
|||||||
emptyCats = filter (nothingOrNull . flip lookup rs) allCats
|
emptyCats = filter (nothingOrNull . flip lookup rs) allCats
|
||||||
k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep
|
k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep
|
||||||
|
|
||||||
|
-- | Remove rules which are identical, not caring about the rule names.
|
||||||
|
removeIdenticalRules :: CFRules -> CFRules
|
||||||
|
removeIdenticalRules g = [(c,nubBy sameCatAndRhs rs) | (c,rs) <- g]
|
||||||
|
where sameCatAndRhs (CFRule c1 ss1 _) (CFRule c2 ss2 _) = c1 == c2 && ss1 == ss2
|
||||||
|
|
||||||
removeLeftRecursion :: CFRules -> CFRules
|
removeLeftRecursion :: CFRules -> CFRules
|
||||||
removeLeftRecursion rs = concatMap removeDirectLeftRecursion $ map handleProds rs
|
removeLeftRecursion rs = concatMap removeDirectLeftRecursion $ map handleProds rs
|
||||||
where
|
where
|
||||||
handleProds (c, r) = (c, concatMap handleProd r)
|
handleProds (c, r) = (c, concatMap handleProd r)
|
||||||
handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai =
|
handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai =
|
||||||
-- FIXME: this will give multiple rules with the same name
|
-- FIXME: this will give multiple rules with the same name
|
||||||
[CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- fromJust (lookup aj rs)]
|
[CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- lookup' aj rs]
|
||||||
handleProd r = [r]
|
handleProd r = [r]
|
||||||
|
|
||||||
removeDirectLeftRecursion :: (Cat_,[CFRule_]) -- ^ All productions for a category
|
removeDirectLeftRecursion :: (Cat_,[CFRule_]) -- ^ All productions for a category
|
||||||
@@ -103,92 +106,22 @@ isDirectLeftRecursive (CFRule c (Cat c':_) _) = c == c'
|
|||||||
isDirectLeftRecursive _ = False
|
isDirectLeftRecursive _ = False
|
||||||
|
|
||||||
|
|
||||||
-- Use the transformation algorithm from \"Regular Approximation of Context-free
|
|
||||||
-- Grammars through Approximation\", Mohri and Nederhof, 2000
|
|
||||||
-- to create an over-generating regular frammar for a context-free
|
|
||||||
-- grammar
|
|
||||||
makeRegular :: CFRules -> CFRules
|
|
||||||
makeRegular g = groupProds $ concatMap trSet (mutRecCats g)
|
|
||||||
where trSet cs | allXLinear cs rs = rs
|
|
||||||
| otherwise = concatMap handleCat cs
|
|
||||||
where rs = concatMap (catRules g) cs
|
|
||||||
handleCat c = [CFRule c' [] (mkName (c++"-empty"))] -- introduce A' -> e
|
|
||||||
++ concatMap (makeRightLinearRules c) (catRules g c)
|
|
||||||
where c' = newCat c
|
|
||||||
makeRightLinearRules b' (CFRule c ss n) =
|
|
||||||
case ys of
|
|
||||||
[] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left
|
|
||||||
(Cat b:zs) -> CFRule b' (xs ++ [Cat b]) n
|
|
||||||
: makeRightLinearRules (newCat b) (CFRule c zs n)
|
|
||||||
where (xs,ys) = break (`catElem` cs) ss
|
|
||||||
newCat c = c ++ "$"
|
|
||||||
|
|
||||||
|
|
||||||
-- | Get the sets of mutually recursive non-terminals for a grammar.
|
|
||||||
mutRecCats :: CFRules -> [[Cat_]]
|
|
||||||
mutRecCats g = equivalenceClasses $ symmetricSubrelation $ transitiveClosure $ reflexiveClosure allCats r
|
|
||||||
where r = nub [(c,c') | (_,rs) <- g, CFRule c ss _ <- rs, Cat c' <- ss]
|
|
||||||
allCats = map fst g
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-- Convert a strongly regular grammar to a finite automaton.
|
|
||||||
compileAutomaton :: Cat_ -- ^ Start category
|
|
||||||
-> CFRules
|
|
||||||
-> FA () (Maybe Token)
|
|
||||||
compileAutomaton start g = make_fa s [Cat start] f g fa''
|
|
||||||
where fa = newFA ()
|
|
||||||
s = startState fa
|
|
||||||
(fa',f) = newState () fa
|
|
||||||
fa'' = addFinalState f fa'
|
|
||||||
|
|
||||||
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
|
||||||
-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
|
|
||||||
make_fa :: State -> [Symbol Cat_ Token] -> State
|
|
||||||
-> CFRules -> FA () (Maybe Token) -> FA () (Maybe Token)
|
|
||||||
make_fa q0 a q1 g fa =
|
|
||||||
case a of
|
|
||||||
[] -> newTrans q0 q1 Nothing fa
|
|
||||||
[Tok t] -> newTrans q0 q1 (Just t) fa
|
|
||||||
[Cat c] -> undefined
|
|
||||||
(x:beta) -> let (fa',q) = newState () fa
|
|
||||||
fa'' = make_fa q0 [x] q g fa'
|
|
||||||
fa''' = make_fa q beta q1 g fa''
|
|
||||||
in fa'''
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- * CFG rule utilities
|
-- * CFG rule utilities
|
||||||
--
|
--
|
||||||
|
|
||||||
{-
|
catRules :: CFRules -> Cat_ -> [CFRule_]
|
||||||
-- | Get all the rules for a given category.
|
catRules rs c = fromMaybe [] (lookup c rs)
|
||||||
catRules :: Eq c => [CFRule c n t] -> c -> [CFRule c n t]
|
|
||||||
catRules rs c = [r | r@(CFRule c' _ _) <- rs, c' == c]
|
|
||||||
-}
|
|
||||||
|
|
||||||
-- | Gets the set of LHS categories of a set of rules.
|
catSetRules :: CFRules -> [Cat_] -> [CFRule_]
|
||||||
lhsCats :: Eq c => [CFRule c n t] -> [c]
|
catSetRules g s = concatMap (catRules g) s
|
||||||
lhsCats = nub . map lhsCat
|
|
||||||
|
|
||||||
lhsCat :: CFRule c n t -> c
|
lhsCat :: CFRule c n t -> c
|
||||||
lhsCat (CFRule c _ _) = c
|
lhsCat (CFRule c _ _) = c
|
||||||
|
|
||||||
-- | Check if all the rules are right-linear, or all the rules are
|
ruleRhs :: CFRule c n t -> [Symbol c t]
|
||||||
-- left-linear, with respect to given categories.
|
ruleRhs (CFRule _ ss _) = ss
|
||||||
allXLinear :: Eq c => [c] -> [CFRule c n t] -> Bool
|
|
||||||
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
|
|
||||||
|
|
||||||
-- | Checks if a context-free rule is right-linear.
|
|
||||||
isRightLinear :: Eq c => [c] -- ^ The categories to consider
|
|
||||||
-> CFRule c n t -- ^ The rule to check for right-linearity
|
|
||||||
-> Bool
|
|
||||||
isRightLinear cs (CFRule _ ss _) = all (not . (`catElem` cs)) (safeInit ss)
|
|
||||||
|
|
||||||
-- | Checks if a context-free rule is left-linear.
|
|
||||||
isLeftLinear :: Eq c => [c] -- ^ The categories to consider
|
|
||||||
-> CFRule c n t -- ^ The rule to check for right-linearity
|
|
||||||
-> Bool
|
|
||||||
isLeftLinear cs (CFRule _ ss _) = all (not . (`catElem` cs)) (drop 1 ss)
|
|
||||||
|
|
||||||
-- | Checks if a symbol is a non-terminal of one of the given categories.
|
-- | Checks if a symbol is a non-terminal of one of the given categories.
|
||||||
catElem :: Eq c => Symbol c t -> [c] -> Bool
|
catElem :: Eq c => Symbol c t -> [c] -> Bool
|
||||||
@@ -202,37 +135,14 @@ anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
|
|||||||
mkName :: String -> Name
|
mkName :: String -> Name
|
||||||
mkName n = Name (IC n) []
|
mkName n = Name (IC n) []
|
||||||
|
|
||||||
--
|
|
||||||
-- * Relations
|
|
||||||
--
|
|
||||||
|
|
||||||
-- FIXME: these could use a more efficent data structures and algorithms.
|
|
||||||
|
|
||||||
isRelatedTo :: Eq a => [(a,a)] -> a -> a -> Bool
|
|
||||||
isRelatedTo r x y = (x,y) `elem` r
|
|
||||||
|
|
||||||
transitiveClosure :: Eq a => [(a,a)] -> [(a,a)]
|
|
||||||
transitiveClosure r = fix (\r -> r `union` [ (x,w) | (x,y) <- r, (z,w) <- r, y == z ]) r
|
|
||||||
|
|
||||||
reflexiveClosure :: Eq a => [a] -- ^ The set over which the relation is defined.
|
|
||||||
-> [(a,a)] -> [(a,a)]
|
|
||||||
reflexiveClosure u r = [(x,x) | x <- u] `union` r
|
|
||||||
|
|
||||||
symmetricSubrelation :: Eq a => [(a,a)] -> [(a,a)]
|
|
||||||
symmetricSubrelation r = [p | p@(x,y) <- r, (y,x) `elem` r]
|
|
||||||
|
|
||||||
-- | Get the equivalence classes from an equivalence relation. Since
|
|
||||||
-- the relation is relexive, the set can be recoved from the relation.
|
|
||||||
equivalenceClasses :: Eq a => [(a,a)] -> [[a]]
|
|
||||||
equivalenceClasses r = equivalenceClasses_ (nub (map fst r)) r
|
|
||||||
where equivalenceClasses_ [] _ = []
|
|
||||||
equivalenceClasses_ (x:xs) r = (x:ys):equivalenceClasses_ zs r
|
|
||||||
where (ys,zs) = partition (isRelatedTo r x) xs
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- * Utilities
|
-- * Utilities
|
||||||
--
|
--
|
||||||
|
|
||||||
|
findSet :: Eq c => c -> [[c]] -> Maybe [c]
|
||||||
|
findSet x = find (x `elem`)
|
||||||
|
|
||||||
fix :: Eq a => (a -> a) -> a -> a
|
fix :: Eq a => (a -> a) -> a -> a
|
||||||
fix f x = let x' = f x in if x' == x then x else fix f x'
|
fix f x = let x' = f x in if x' == x then x else fix f x'
|
||||||
|
|
||||||
@@ -240,26 +150,12 @@ nothingOrNull :: Maybe [a] -> Bool
|
|||||||
nothingOrNull Nothing = True
|
nothingOrNull Nothing = True
|
||||||
nothingOrNull (Just xs) = null xs
|
nothingOrNull (Just xs) = null xs
|
||||||
|
|
||||||
safeInit :: [a] -> [a]
|
|
||||||
safeInit [] = []
|
|
||||||
safeInit xs = init xs
|
|
||||||
|
|
||||||
unionAll :: Eq a => [[a]] -> [a]
|
unionAll :: Eq a => [[a]] -> [a]
|
||||||
unionAll = nub . concat
|
unionAll = nub . concat
|
||||||
|
|
||||||
whenMP :: MonadPlus m => Bool -> a -> m a
|
whenMP :: MonadPlus m => Bool -> a -> m a
|
||||||
whenMP b x = if b then return x else mzero
|
whenMP b x = if b then return x else mzero
|
||||||
|
|
||||||
--
|
lookup' :: Eq a => a -> [(a,b)] -> b
|
||||||
-- * Testing stuff, can be removed
|
lookup' x = fromJust . lookup x
|
||||||
--
|
|
||||||
|
|
||||||
c --> ss = CFRule c ss (mkName "")
|
|
||||||
|
|
||||||
prGr g = putStrLn $ showGr g
|
|
||||||
|
|
||||||
showGr g = unlines $ map showRule g
|
|
||||||
|
|
||||||
showRule (CFRule c ss _) = c ++ " --> " ++ unwords (map showSym ss)
|
|
||||||
|
|
||||||
showSym s = symbol id show s
|
|
||||||
@@ -5,9 +5,9 @@
|
|||||||
-- Stability : (stable)
|
-- Stability : (stable)
|
||||||
-- Portability : (portable)
|
-- Portability : (portable)
|
||||||
--
|
--
|
||||||
-- > CVS $Date: 2005/09/04 11:45:38 $
|
-- > CVS $Date: 2005/09/12 15:46:44 $
|
||||||
-- > CVS $Author: aarne $
|
-- > CVS $Author: bringert $
|
||||||
-- > CVS $Revision: 1.70 $
|
-- > CVS $Revision: 1.71 $
|
||||||
--
|
--
|
||||||
-- A database for customizable GF shell commands.
|
-- A database for customizable GF shell commands.
|
||||||
--
|
--
|
||||||
@@ -57,7 +57,7 @@ import GF.Canon.MkGFC
|
|||||||
import GF.CF.CFtoSRG
|
import GF.CF.CFtoSRG
|
||||||
import GF.Speech.PrGSL (gslPrinter)
|
import GF.Speech.PrGSL (gslPrinter)
|
||||||
import GF.Speech.PrJSGF (jsgfPrinter)
|
import GF.Speech.PrJSGF (jsgfPrinter)
|
||||||
import GF.Speech.PrSLF (slfPrinter)
|
import GF.Speech.PrSLF (slfPrinter,slfGraphvizPrinter,faGraphvizPrinter)
|
||||||
|
|
||||||
import GF.Data.Zipper
|
import GF.Data.Zipper
|
||||||
|
|
||||||
@@ -241,6 +241,12 @@ customGrammarPrinter =
|
|||||||
,(strCI "slf", \s -> let opts = stateOptions s
|
,(strCI "slf", \s -> let opts = stateOptions s
|
||||||
name = cncId s
|
name = cncId s
|
||||||
in slfPrinter name opts $ stateCFG s)
|
in slfPrinter name opts $ stateCFG s)
|
||||||
|
,(strCI "slf_graphviz", \s -> let opts = stateOptions s
|
||||||
|
name = cncId s
|
||||||
|
in slfGraphvizPrinter name opts $ stateCFG s)
|
||||||
|
,(strCI "fa_graphviz", \s -> let opts = stateOptions s
|
||||||
|
name = cncId s
|
||||||
|
in faGraphvizPrinter name opts $ stateCFG s)
|
||||||
,(strCI "plbnf", prLBNF True)
|
,(strCI "plbnf", prLBNF True)
|
||||||
,(strCI "lbnf", prLBNF False)
|
,(strCI "lbnf", prLBNF False)
|
||||||
,(strCI "bnf", prBNF False)
|
,(strCI "bnf", prBNF False)
|
||||||
|
|||||||
Reference in New Issue
Block a user