Build SLF networks with sublattices.

This commit is contained in:
bringert
2006-01-04 21:41:12 +00:00
parent e22275d467
commit a4ba93cc55
6 changed files with 252 additions and 72 deletions

View File

@@ -12,9 +12,11 @@
-- Approximates CFGs with finite state networks.
-----------------------------------------------------------------------------
module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular) where
module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular,
MFALabel(..), MFA(..), cfgToMFA) where
import Data.List
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
@@ -31,11 +33,13 @@ import GF.Speech.FiniteState
import GF.Speech.Relation
import GF.Speech.TransformCFG
data Recursivity = RightR | LeftR | NotR
data MutRecSet = MutRecSet {
mrCats :: [Cat_],
mrCats :: Set Cat_,
mrNonRecRules :: [CFRule_],
mrRecRules :: [CFRule_],
mrIsRightRec :: Bool
mrRec :: Recursivity
}
@@ -48,6 +52,10 @@ cfgToFA opts = minimize . compileAutomaton start . makeSimpleRegular
makeSimpleRegular :: CGrammar -> CFRules
makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgToCFRules
--
-- * Approximate context-free grammars with regular grammars.
--
-- Use the transformation algorithm from \"Regular Approximation of Context-free
-- Grammars through Approximation\", Mohri and Nederhof, 2000
-- to create an over-generating regular frammar for a context-free
@@ -63,13 +71,15 @@ makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
where c' = newCat c
makeRightLinearRules b' (CFRule c ss n) =
case ys of
[] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left
(Cat b:zs) -> CFRule b' (xs ++ [Cat b]) n
: makeRightLinearRules (newCat b) (CFRule c zs n)
[] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left
(Cat b:zs) -> newRule b' (xs ++ [Cat b]) n
++ makeRightLinearRules (newCat b) (CFRule c zs n)
where (xs,ys) = break (`catElem` cs) ss
-- don't add rules on the form A -> A
newRule c rhs n | rhs == [Cat c] = []
| otherwise = [CFRule c rhs n]
newCat c = c ++ "$"
-- | Get the sets of mutually recursive non-terminals for a grammar.
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
-- If false, only recursive categories will be included.
@@ -79,32 +89,19 @@ mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transit
allCats = map fst g
refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation
--
-- * Compile strongly regular grammars to NFAs
--
-- Convert a strongly regular grammar to a finite automaton.
compileAutomaton :: Cat_ -- ^ Start category
-> CFRules
-> NFA Token
compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa''
compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa
where
fa = newFA ()
s = startState fa
(fa',f) = newState () fa
fa'' = addFinalState f fa'
(fa,s,f) = newFA_
ns = mutRecSets g $ mutRecCats False g
mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets
mutRecSets g = Map.fromList . concatMap mkMutRecSet
where
mkMutRecSet cs = [ (c,ms) | c <- csl ]
where csl = Set.toList cs
rs = catSetRules g csl
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
ms = MutRecSet {
mrCats = csl,
mrNonRecRules = nrs,
mrRecRules = rrs,
mrIsRightRec = all (isRightLinear cs) rrs
}
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State
@@ -116,14 +113,14 @@ make_fa c@(g,ns) q0 alpha q1 fa =
[Cat a] -> case Map.lookup a ns of
-- a is recursive
Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
if mrIsRightRec n
then
case mrRec n of
RightR ->
-- the set Ni is right-recursive or cyclic
let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
let (xs,Cat d) = (init ss,last ss)]
in make_fas new $ newTransition q0 (getState a) Nothing fa'
else
LeftR ->
-- the set Ni is left-recursive
let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- rs]
@@ -143,16 +140,123 @@ make_fa c@(g,ns) q0 alpha q1 fa =
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
addStatesForCats :: [Cat_] -> NFA Token -> (NFA Token, Map Cat_ State)
--
-- * Multiple DFA type
--
data MFALabel a = MFASym a | MFASub String
deriving Eq
data MFA a = MFA (DFA (MFALabel a)) [(String,DFA (MFALabel a))]
--
-- * Compile strongly regular grammars to multiple DFAs
--
cfgToMFA :: Options -> CGrammar -> MFA String
cfgToMFA opts g = MFA startFA [(c, toMFA (minimize fa)) | (c,fa) <- fas]
where start = getStartCat opts
startFA = let (fa,s,f) = newFA_
in newTransition s f (MFASub start) fa
fas = compileAutomata $ makeSimpleRegular g
mkMFALabel (Cat c) = MFASub c
mkMFALabel (Tok t) = MFASym t
toMFA = mapTransitions mkMFALabel
-- | Convert a strongly regular grammar to a number of finite automata,
-- one for each non-terminal.
-- The edges in the automata accept tokens, or name another automaton to use.
compileAutomata :: CFRules
-> [(Cat_,NFA (Symbol Cat_ Token))]
-- ^ A map of non-terminals and their automata.
compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
where
mrs = mutRecSets g $ mutRecCats True g
makeOneFA c = make_fa1 mr s [Cat c] f fa
where (fa,s,f) = newFA_
mr = fromJust (Map.lookup c mrs)
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997,
-- adapted to build a finite automaton for a single (mutually recursive) set only.
-- Categories not in the set (fromJustMap.lookup c mrs)will result in category-labelled edges.
make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
-- we are building the automaton.
-> State -- ^ State to come from
-> [Symbol Cat_ Token] -- ^ Symbols to accept
-> State -- ^ State to end up in
-> NFA (Symbol Cat_ Token) -- ^ FA to add to.
-> NFA (Symbol Cat_ Token)
make_fa1 mr q0 alpha q1 fa =
case alpha of
[] -> newTransition q0 q1 Nothing fa
[t@(Tok _)] -> newTransition q0 q1 (Just t) fa
[c@(Cat a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
[Cat a] ->
case mrRec mr of
NotR -> -- the set is a non-recursive (always singleton) set of categories
-- so the set of category rules is the set of rules for the whole set
make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
RightR -> -- the set is right-recursive or cyclic
let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr,
let (xs,Cat d) = (init ss,last ss)]
in make_fas new $ newTransition q0 (getState a) Nothing fa'
LeftR -> -- the set is left-recursive
let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- mrRecRules mr]
in make_fas new $ newTransition (getState a) q1 Nothing fa'
where
(fa',stateMap) = addStatesForCats (mrCats mr) fa
getState x = Map.findWithDefault
(error $ "CFGToFiniteState: No state for " ++ x)
x stateMap
(x:beta) -> let (fa',q) = newState () fa
in make_fas [(q0,[x],q),(q,beta,q1)] fa'
where
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs
mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets
mutRecSets g = Map.fromList . concatMap mkMutRecSet
where
mkMutRecSet cs = [ (c,ms) | c <- csl ]
where csl = Set.toList cs
rs = catSetRules g csl
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
ms = MutRecSet {
mrCats = cs,
mrNonRecRules = nrs,
mrRecRules = rrs,
mrRec = rec
}
rec | null rrs = NotR
| all (isRightLinear cs) rrs = RightR
| otherwise = LeftR
--
-- * Utilities
--
-- | Create a new finite automaton with an initial and a final state.
newFA_ :: Enum n => (FA n () b, n, n)
newFA_ = (fa'', s, f)
where fa = newFA ()
s = startState fa
(fa',f) = newState () fa
fa'' = addFinalState f fa'
-- | Add a state for the given NFA for each of the categories
-- in the given set. Returns a map of categories to their
-- corresponding states.
addStatesForCats :: Set Cat_ -> NFA t -> (NFA t, Map Cat_ State)
addStatesForCats cs fa = (fa', m)
where (fa', ns) = newStates (replicate (length cs) ()) fa
m = Map.fromList (zip cs (map fst ns))
where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
m = Map.fromList (zip (Set.toList cs) (map fst ns))
ruleIsNonRecursive :: Set Cat_ -> CFRule_ -> Bool
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
noCatsInSet :: Set Cat_ -> [Symbol Cat_ t] -> Bool
noCatsInSet cs = not . any (`catElem` cs)

View File

@@ -22,7 +22,7 @@ module GF.Speech.FiniteState (FA, State, NFA, DFA,
oneFinalState,
moveLabelsToNodes, minimize,
dfa2nfa,
prFAGraphviz) where
prFAGraphviz, faToGraphviz) where
import Data.List
import Data.Maybe
@@ -213,13 +213,14 @@ dfa2nfa = mapTransitions Just
--
prFAGraphviz :: (Eq n,Show n) => FA n String String -> String
prFAGraphviz = Dot.prGraphviz . toGraphviz
prFAGraphviz = Dot.prGraphviz . faToGraphviz ""
prFAGraphviz_ :: (Eq n,Show n,Show a, Show b) => FA n a b -> String
prFAGraphviz_ = Dot.prGraphviz . toGraphviz . mapStates show . mapTransitions show
prFAGraphviz_ = Dot.prGraphviz . faToGraphviz "" . mapStates show . mapTransitions show
toGraphviz :: (Eq n,Show n) => FA n String String -> Dot.Graph
toGraphviz (FA (Graph _ ns es) s f) = Dot.Graph Dot.Directed [] (map mkNode ns) (map mkEdge es)
faToGraphviz :: (Eq n,Show n) => String -- ^ Graph ID
-> FA n String String -> Dot.Graph
faToGraphviz i (FA (Graph _ ns es) s f) = Dot.Graph Dot.Directed i [] (map mkNode ns) (map mkEdge es) []
where mkNode (n,l) = Dot.Node (show n) attrs
where attrs = [("label",l)]
++ if n == s then [("shape","box")] else []
@@ -231,4 +232,4 @@ toGraphviz (FA (Graph _ ns es) s f) = Dot.Graph Dot.Directed [] (map mkNode ns)
--
lookups :: Ord k => [k] -> Map k a -> [a]
lookups xs m = mapMaybe (flip Map.lookup m) xs
lookups xs m = mapMaybe (flip Map.lookup m) xs

View File

@@ -31,40 +31,70 @@ import GF.Speech.CFGToFiniteState
import GF.Speech.FiniteState
import GF.Speech.SRG
import GF.Speech.TransformCFG
import qualified GF.Visualization.Graphviz as Dot
import Control.Monad
import qualified Control.Monad.State as STM
import Data.Char (toUpper)
import Data.List
import Data.Maybe (fromMaybe)
import Data.Maybe (maybe)
data SLFs = SLFs [(String,SLF)] SLF
data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String }
| SLFSubLat { nId :: Int, nLat :: String }
-- | An SLF word is a word, or the empty string.
type SLFWord = Maybe String
data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int }
type SLF_FA = FA State (Maybe (MFALabel String)) ()
-- | Make a network with subnetworks in SLF
slfPrinter :: Ident -- ^ Grammar name
-> Options -> CGrammar -> String
slfPrinter name opts cfg = prSLF (automatonToSLF $ mkSLFFA opts cfg) ""
slfPrinter name opts cfg = prSLFs (mfaToSLFs $ cfgToMFA opts cfg) ""
slfGraphvizPrinter :: Ident -- ^ Grammar name
-> Options -> CGrammar -> String
slfGraphvizPrinter name opts cfg =
prFAGraphviz $ mapStates (fromMaybe "") $ mapTransitions (const "") $ mkSLFFA opts cfg
-> Options -> CGrammar -> String
slfGraphvizPrinter name opts cfg = Dot.prGraphviz g
where MFA main subs = cfgToMFA opts cfg
g = Dot.addSubGraphs (map (uncurry gvSLFFA) subs) $ gvSLFFA "" main
mkSLFFA :: Options -> CGrammar -> FA State (Maybe String) ()
mkSLFFA opts cfg = oneFinalState Nothing () $ moveLabelsToNodes $ dfa2nfa $ cfgToFA opts cfg
gvSLFFA :: String -> DFA (MFALabel String) -> Dot.Graph
gvSLFFA n = faToGraphviz n . mapStates (maybe "" mfaLabelToGv)
. mapTransitions (const "") . slfStyleFA
where mfaLabelToGv (MFASym s) = s
mfaLabelToGv (MFASub s) = "<" ++ s ++ ">"
automatonToSLF :: FA State (Maybe String) () -> SLF
automatonToSLF fa = SLF { slfNodes = map mkSLFNode (states fa),
slfEdges = zipWith mkSLFEdge [0..] (transitions fa) }
mapMFA :: (DFA (MFALabel a) -> b) -> MFA a -> (b,[(String,b)])
mapMFA f (MFA main subs) = (f main, [(c, f fa) | (c,fa) <- subs])
mkSLFNode :: (Int, Maybe String) -> SLFNode
mkSLFNode (i, Nothing) = SLFNode { nId = i, nWord = Nothing, nTag = Nothing }
mkSLFNode (i, Just w)
slfStyleFA :: DFA (MFALabel String) -> SLF_FA
slfStyleFA = oneFinalState Nothing () . moveLabelsToNodes . dfa2nfa
mfaToSLFs :: MFA String -> SLFs
mfaToSLFs (MFA main subs)
= SLFs [(c, dfaToSLF fa) | (c,fa) <- subs] (dfaToSLF main)
where dfaToSLF = automatonToSLF . slfStyleFA
automatonToSLF :: SLF_FA -> SLF
automatonToSLF fa = SLF { slfNodes = ns, slfEdges = es }
where ns = map (uncurry mfaNodeToSLFNode) (states fa)
es = zipWith (\i (f,t,()) -> mkSLFEdge i (f,t)) [0..] (transitions fa)
mfaNodeToSLFNode :: Int -> Maybe (MFALabel String) -> SLFNode
mfaNodeToSLFNode i l = case l of
Nothing -> mkSLFNode i Nothing
Just (MFASym x) -> mkSLFNode i (Just x)
Just (MFASub s) -> mkSLFSubLat i s
mkSLFNode :: Int -> Maybe String -> SLFNode
mkSLFNode i Nothing = SLFNode { nId = i, nWord = Nothing, nTag = Nothing }
mkSLFNode i (Just w)
| isNonWord w = SLFNode { nId = i,
nWord = Nothing,
nTag = Just w }
@@ -72,17 +102,30 @@ mkSLFNode (i, Just w)
nWord = Just (map toUpper w),
nTag = Just w }
mkSLFEdge :: Int -> (Int,Int,()) -> SLFEdge
mkSLFEdge i (f,t,()) = SLFEdge { eId = i, eStart = f, eEnd = t }
mkSLFSubLat :: Int -> String -> SLFNode
mkSLFSubLat i sub = SLFSubLat { nId = i, nLat = sub }
mkSLFEdge :: Int -> (Int,Int) -> SLFEdge
mkSLFEdge i (f,t) = SLFEdge { eId = i, eStart = f, eEnd = t }
prSLFs :: SLFs -> ShowS
prSLFs (SLFs subs main) = unlinesS (map prSub subs) . prOneSLF main
where prSub (n,s) = showString "SUBLAT=" . shows n
. nl . prOneSLF s . showString "." . nl
prSLF :: SLF -> ShowS
prSLF (SLF { slfNodes = ns, slfEdges = es})
prSLF slf = {- showString "VERSION=1.0" . nl . -} prOneSLF slf
prOneSLF :: SLF -> ShowS
prOneSLF (SLF { slfNodes = ns, slfEdges = es})
= header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl
where
header = showString "VERSION=1.0" . nl
. prFields [("N",show (length ns)),("L", show (length es))] . nl
prNode n = prFields $ [("I",show (nId n)),("W",showWord (nWord n))]
++ maybe [] (\t -> [("s",t)]) (nTag n)
header = prFields [("N",show (length ns)),("L", show (length es))] . nl
prNode (SLFNode { nId = i, nWord = w, nTag = t })
= prFields $ [("I",show i),("W",showWord w)]
++ maybe [] (\t -> [("s",t)]) t
prNode (SLFSubLat { nId = i, nLat = l })
= prFields [("I",show i),("L",show l)]
prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))]
-- | Check if a word should not correspond to a word in the SLF file.

View File

@@ -120,6 +120,9 @@ isDirectLeftRecursive _ = False
-- * CFG rule utilities
--
allCats :: CFRules -> [Cat_]
allCats = map fst
catRules :: CFRules -> Cat_ -> [CFRule_]
catRules rs c = fromMaybe [] (lookup c rs)

View File

@@ -22,13 +22,14 @@ import GF.Speech.PrSLF
import Speech.ATKRec
import Control.Monad
import Data.Maybe
import Data.IORef
import System.Environment
import System.IO
import System.IO.Unsafe
data ATKLang = ATKLang {
cmndef :: FilePath,
cmndef :: Maybe FilePath,
hmmlist :: FilePath,
mmf0 :: FilePath,
mmf1 :: FilePath,
@@ -50,11 +51,19 @@ getLanguage l =
atk_home <- getEnv_ "ATK_HOME" atk_home_error
let res = atk_home ++ "/Resources"
return $ ATKLang {
cmndef = res ++ "/UK_SI_ZMFCC/cepmean",
cmndef = Just $ res ++ "/UK_SI_ZMFCC/cepmean",
hmmlist = res ++ "/UK_SI_ZMFCC/hmmlistbg",
mmf0 = res ++ "/UK_SI_ZMFCC/WI4",
mmf1 = res ++ "/UK_SI_ZMFCC/BGHMM2",
dict = res ++ "/beep.dct" }
"sv_SE" -> do
let res = "/home/bjorn/projects/atkswe/stoneage-swe"
return $ ATKLang {
cmndef = Nothing,
hmmlist = res ++ "/triphones1",
mmf0 = res ++ "/hmm12/macros",
mmf1 = res ++ "/hmm12/hmmdefs",
dict = res ++ "/dict" }
_ -> fail $ "ATKSpeechInput: language " ++ l ++ " not supported"
-- | List of the languages for which we have already loaded the HMM
@@ -71,8 +80,8 @@ initATK language =
when (null ls) $ do
config <- getEnv_ "GF_ATK_CFG" gf_atk_cfg_error
hPutStrLn stderr $ "Initializing ATK..."
-- FIXME: CMNDEFAULT should be set in the per-language setup
initialize (Just config) [("HPARM:CMNDEFAULT",cmndef l)]
let ps = map ((,) "HPARM:CMNDEFAULT") (maybeToList (cmndef l))
initialize (Just config) ps
when (language `notElem` ls) $
do
let hmmName = "hmm_" ++ language
@@ -88,7 +97,7 @@ recognizeSpeech name opts cfg =
do
let slf = slfPrinter name opts cfg
n = prIdent name
language = "en_UK"
language = "sv_SE"
hmmName = "hmm_" ++ language
dictName = "dict_" ++ language
slfName = "gram_" ++ n

View File

@@ -16,6 +16,7 @@ module GF.Visualization.Graphviz (
Graph(..), GraphType(..),
Node(..), Edge(..),
Attr,
addSubGraphs,
prGraphviz
) where
@@ -23,7 +24,8 @@ import Data.Char
import GF.Data.Utilities
data Graph = Graph GraphType [Attr] [Node] [Edge]
-- | Graph type, graph ID, graph attirbutes, graph nodes, graph edges, subgraphs
data Graph = Graph GraphType String [Attr] [Node] [Edge] [Graph]
deriving (Show)
data GraphType = Directed | Undirected
@@ -37,13 +39,31 @@ data Edge = Edge String String [Attr]
type Attr = (String,String)
--
-- * Graph construction
--
addSubGraphs :: [Graph] -> Graph -> Graph
addSubGraphs nss (Graph t i at ns es ss) = Graph t i at ns es (nss++ss)
--
-- * Pretty-printing
--
prGraphviz :: Graph -> String
prGraphviz (Graph t at ns es) =
unlines $ [graphtype t ++ " {"]
++ map (++";") (map prAttr at
++ map prNode ns
++ map (prEdge t) es)
++ ["}\n"]
prGraphviz g@(Graph t i _ _ _ _) =
graphtype t ++ " " ++ esc i ++ " {\n" ++ prGraph g ++ "}\n"
prSubGraph :: Graph -> String
prSubGraph g@(Graph _ i _ _ _ _) =
"subgraph" ++ " " ++ esc i ++ " {\n" ++ prGraph g ++ "}"
prGraph :: Graph -> String
prGraph (Graph t id at ns es ss) =
unlines $ map (++";") (map prAttr at
++ map prNode ns
++ map (prEdge t) es
++ map prSubGraph ss)
graphtype :: GraphType -> String
graphtype Directed = "digraph"