mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-22 19:22:50 -06:00
Build SLF networks with sublattices.
This commit is contained in:
@@ -12,9 +12,11 @@
|
|||||||
-- Approximates CFGs with finite state networks.
|
-- Approximates CFGs with finite state networks.
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular) where
|
module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular,
|
||||||
|
MFALabel(..), MFA(..), cfgToMFA) where
|
||||||
|
|
||||||
import Data.List
|
import Data.List
|
||||||
|
import Data.Maybe
|
||||||
import Data.Map (Map)
|
import Data.Map (Map)
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
import Data.Set (Set)
|
import Data.Set (Set)
|
||||||
@@ -31,11 +33,13 @@ import GF.Speech.FiniteState
|
|||||||
import GF.Speech.Relation
|
import GF.Speech.Relation
|
||||||
import GF.Speech.TransformCFG
|
import GF.Speech.TransformCFG
|
||||||
|
|
||||||
|
data Recursivity = RightR | LeftR | NotR
|
||||||
|
|
||||||
data MutRecSet = MutRecSet {
|
data MutRecSet = MutRecSet {
|
||||||
mrCats :: [Cat_],
|
mrCats :: Set Cat_,
|
||||||
mrNonRecRules :: [CFRule_],
|
mrNonRecRules :: [CFRule_],
|
||||||
mrRecRules :: [CFRule_],
|
mrRecRules :: [CFRule_],
|
||||||
mrIsRightRec :: Bool
|
mrRec :: Recursivity
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -48,6 +52,10 @@ cfgToFA opts = minimize . compileAutomaton start . makeSimpleRegular
|
|||||||
makeSimpleRegular :: CGrammar -> CFRules
|
makeSimpleRegular :: CGrammar -> CFRules
|
||||||
makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgToCFRules
|
makeSimpleRegular = makeRegular . removeIdenticalRules . removeEmptyCats . cfgToCFRules
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Approximate context-free grammars with regular grammars.
|
||||||
|
--
|
||||||
|
|
||||||
-- Use the transformation algorithm from \"Regular Approximation of Context-free
|
-- Use the transformation algorithm from \"Regular Approximation of Context-free
|
||||||
-- Grammars through Approximation\", Mohri and Nederhof, 2000
|
-- Grammars through Approximation\", Mohri and Nederhof, 2000
|
||||||
-- to create an over-generating regular frammar for a context-free
|
-- to create an over-generating regular frammar for a context-free
|
||||||
@@ -63,13 +71,15 @@ makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
|
|||||||
where c' = newCat c
|
where c' = newCat c
|
||||||
makeRightLinearRules b' (CFRule c ss n) =
|
makeRightLinearRules b' (CFRule c ss n) =
|
||||||
case ys of
|
case ys of
|
||||||
[] -> [CFRule b' (xs ++ [Cat (newCat c)]) n] -- no non-terminals left
|
[] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left
|
||||||
(Cat b:zs) -> CFRule b' (xs ++ [Cat b]) n
|
(Cat b:zs) -> newRule b' (xs ++ [Cat b]) n
|
||||||
: makeRightLinearRules (newCat b) (CFRule c zs n)
|
++ makeRightLinearRules (newCat b) (CFRule c zs n)
|
||||||
where (xs,ys) = break (`catElem` cs) ss
|
where (xs,ys) = break (`catElem` cs) ss
|
||||||
|
-- don't add rules on the form A -> A
|
||||||
|
newRule c rhs n | rhs == [Cat c] = []
|
||||||
|
| otherwise = [CFRule c rhs n]
|
||||||
newCat c = c ++ "$"
|
newCat c = c ++ "$"
|
||||||
|
|
||||||
|
|
||||||
-- | Get the sets of mutually recursive non-terminals for a grammar.
|
-- | Get the sets of mutually recursive non-terminals for a grammar.
|
||||||
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
|
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
|
||||||
-- If false, only recursive categories will be included.
|
-- If false, only recursive categories will be included.
|
||||||
@@ -79,32 +89,19 @@ mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transit
|
|||||||
allCats = map fst g
|
allCats = map fst g
|
||||||
refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation
|
refl = if incAll then reflexiveClosure_ allCats else reflexiveSubrelation
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Compile strongly regular grammars to NFAs
|
||||||
|
--
|
||||||
|
|
||||||
-- Convert a strongly regular grammar to a finite automaton.
|
-- Convert a strongly regular grammar to a finite automaton.
|
||||||
compileAutomaton :: Cat_ -- ^ Start category
|
compileAutomaton :: Cat_ -- ^ Start category
|
||||||
-> CFRules
|
-> CFRules
|
||||||
-> NFA Token
|
-> NFA Token
|
||||||
compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa''
|
compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa
|
||||||
where
|
where
|
||||||
fa = newFA ()
|
(fa,s,f) = newFA_
|
||||||
s = startState fa
|
|
||||||
(fa',f) = newState () fa
|
|
||||||
fa'' = addFinalState f fa'
|
|
||||||
ns = mutRecSets g $ mutRecCats False g
|
ns = mutRecSets g $ mutRecCats False g
|
||||||
|
|
||||||
mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets
|
|
||||||
mutRecSets g = Map.fromList . concatMap mkMutRecSet
|
|
||||||
where
|
|
||||||
mkMutRecSet cs = [ (c,ms) | c <- csl ]
|
|
||||||
where csl = Set.toList cs
|
|
||||||
rs = catSetRules g csl
|
|
||||||
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
|
|
||||||
ms = MutRecSet {
|
|
||||||
mrCats = csl,
|
|
||||||
mrNonRecRules = nrs,
|
|
||||||
mrRecRules = rrs,
|
|
||||||
mrIsRightRec = all (isRightLinear cs) rrs
|
|
||||||
}
|
|
||||||
|
|
||||||
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
||||||
-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
|
-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997.
|
||||||
make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State
|
make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State
|
||||||
@@ -116,14 +113,14 @@ make_fa c@(g,ns) q0 alpha q1 fa =
|
|||||||
[Cat a] -> case Map.lookup a ns of
|
[Cat a] -> case Map.lookup a ns of
|
||||||
-- a is recursive
|
-- a is recursive
|
||||||
Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
|
Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
|
||||||
if mrIsRightRec n
|
case mrRec n of
|
||||||
then
|
RightR ->
|
||||||
-- the set Ni is right-recursive or cyclic
|
-- the set Ni is right-recursive or cyclic
|
||||||
let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
|
let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
|
||||||
++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
|
++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
|
||||||
let (xs,Cat d) = (init ss,last ss)]
|
let (xs,Cat d) = (init ss,last ss)]
|
||||||
in make_fas new $ newTransition q0 (getState a) Nothing fa'
|
in make_fas new $ newTransition q0 (getState a) Nothing fa'
|
||||||
else
|
LeftR ->
|
||||||
-- the set Ni is left-recursive
|
-- the set Ni is left-recursive
|
||||||
let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
|
let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
|
||||||
++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- rs]
|
++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- rs]
|
||||||
@@ -143,16 +140,123 @@ make_fa c@(g,ns) q0 alpha q1 fa =
|
|||||||
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
|
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
|
||||||
|
|
||||||
|
|
||||||
addStatesForCats :: [Cat_] -> NFA Token -> (NFA Token, Map Cat_ State)
|
--
|
||||||
|
-- * Multiple DFA type
|
||||||
|
--
|
||||||
|
|
||||||
|
data MFALabel a = MFASym a | MFASub String
|
||||||
|
deriving Eq
|
||||||
|
|
||||||
|
data MFA a = MFA (DFA (MFALabel a)) [(String,DFA (MFALabel a))]
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Compile strongly regular grammars to multiple DFAs
|
||||||
|
--
|
||||||
|
|
||||||
|
cfgToMFA :: Options -> CGrammar -> MFA String
|
||||||
|
cfgToMFA opts g = MFA startFA [(c, toMFA (minimize fa)) | (c,fa) <- fas]
|
||||||
|
where start = getStartCat opts
|
||||||
|
startFA = let (fa,s,f) = newFA_
|
||||||
|
in newTransition s f (MFASub start) fa
|
||||||
|
fas = compileAutomata $ makeSimpleRegular g
|
||||||
|
mkMFALabel (Cat c) = MFASub c
|
||||||
|
mkMFALabel (Tok t) = MFASym t
|
||||||
|
toMFA = mapTransitions mkMFALabel
|
||||||
|
|
||||||
|
-- | Convert a strongly regular grammar to a number of finite automata,
|
||||||
|
-- one for each non-terminal.
|
||||||
|
-- The edges in the automata accept tokens, or name another automaton to use.
|
||||||
|
compileAutomata :: CFRules
|
||||||
|
-> [(Cat_,NFA (Symbol Cat_ Token))]
|
||||||
|
-- ^ A map of non-terminals and their automata.
|
||||||
|
compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
|
||||||
|
where
|
||||||
|
mrs = mutRecSets g $ mutRecCats True g
|
||||||
|
makeOneFA c = make_fa1 mr s [Cat c] f fa
|
||||||
|
where (fa,s,f) = newFA_
|
||||||
|
mr = fromJust (Map.lookup c mrs)
|
||||||
|
|
||||||
|
|
||||||
|
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
||||||
|
-- Mark-Jan Nederhof. International Workshop on Parsing Technologies, 1997,
|
||||||
|
-- adapted to build a finite automaton for a single (mutually recursive) set only.
|
||||||
|
-- Categories not in the set (fromJustMap.lookup c mrs)will result in category-labelled edges.
|
||||||
|
make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
|
||||||
|
-- we are building the automaton.
|
||||||
|
-> State -- ^ State to come from
|
||||||
|
-> [Symbol Cat_ Token] -- ^ Symbols to accept
|
||||||
|
-> State -- ^ State to end up in
|
||||||
|
-> NFA (Symbol Cat_ Token) -- ^ FA to add to.
|
||||||
|
-> NFA (Symbol Cat_ Token)
|
||||||
|
make_fa1 mr q0 alpha q1 fa =
|
||||||
|
case alpha of
|
||||||
|
[] -> newTransition q0 q1 Nothing fa
|
||||||
|
[t@(Tok _)] -> newTransition q0 q1 (Just t) fa
|
||||||
|
[c@(Cat a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
|
||||||
|
[Cat a] ->
|
||||||
|
case mrRec mr of
|
||||||
|
NotR -> -- the set is a non-recursive (always singleton) set of categories
|
||||||
|
-- so the set of category rules is the set of rules for the whole set
|
||||||
|
make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
|
||||||
|
RightR -> -- the set is right-recursive or cyclic
|
||||||
|
let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
|
||||||
|
++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr,
|
||||||
|
let (xs,Cat d) = (init ss,last ss)]
|
||||||
|
in make_fas new $ newTransition q0 (getState a) Nothing fa'
|
||||||
|
LeftR -> -- the set is left-recursive
|
||||||
|
let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
|
||||||
|
++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- mrRecRules mr]
|
||||||
|
in make_fas new $ newTransition (getState a) q1 Nothing fa'
|
||||||
|
where
|
||||||
|
(fa',stateMap) = addStatesForCats (mrCats mr) fa
|
||||||
|
getState x = Map.findWithDefault
|
||||||
|
(error $ "CFGToFiniteState: No state for " ++ x)
|
||||||
|
x stateMap
|
||||||
|
(x:beta) -> let (fa',q) = newState () fa
|
||||||
|
in make_fas [(q0,[x],q),(q,beta,q1)] fa'
|
||||||
|
where
|
||||||
|
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs
|
||||||
|
|
||||||
|
mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets
|
||||||
|
mutRecSets g = Map.fromList . concatMap mkMutRecSet
|
||||||
|
where
|
||||||
|
mkMutRecSet cs = [ (c,ms) | c <- csl ]
|
||||||
|
where csl = Set.toList cs
|
||||||
|
rs = catSetRules g csl
|
||||||
|
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
|
||||||
|
ms = MutRecSet {
|
||||||
|
mrCats = cs,
|
||||||
|
mrNonRecRules = nrs,
|
||||||
|
mrRecRules = rrs,
|
||||||
|
mrRec = rec
|
||||||
|
}
|
||||||
|
rec | null rrs = NotR
|
||||||
|
| all (isRightLinear cs) rrs = RightR
|
||||||
|
| otherwise = LeftR
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Utilities
|
||||||
|
--
|
||||||
|
|
||||||
|
-- | Create a new finite automaton with an initial and a final state.
|
||||||
|
newFA_ :: Enum n => (FA n () b, n, n)
|
||||||
|
newFA_ = (fa'', s, f)
|
||||||
|
where fa = newFA ()
|
||||||
|
s = startState fa
|
||||||
|
(fa',f) = newState () fa
|
||||||
|
fa'' = addFinalState f fa'
|
||||||
|
|
||||||
|
-- | Add a state for the given NFA for each of the categories
|
||||||
|
-- in the given set. Returns a map of categories to their
|
||||||
|
-- corresponding states.
|
||||||
|
addStatesForCats :: Set Cat_ -> NFA t -> (NFA t, Map Cat_ State)
|
||||||
addStatesForCats cs fa = (fa', m)
|
addStatesForCats cs fa = (fa', m)
|
||||||
where (fa', ns) = newStates (replicate (length cs) ()) fa
|
where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
|
||||||
m = Map.fromList (zip cs (map fst ns))
|
m = Map.fromList (zip (Set.toList cs) (map fst ns))
|
||||||
|
|
||||||
ruleIsNonRecursive :: Set Cat_ -> CFRule_ -> Bool
|
ruleIsNonRecursive :: Set Cat_ -> CFRule_ -> Bool
|
||||||
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
|
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
noCatsInSet :: Set Cat_ -> [Symbol Cat_ t] -> Bool
|
noCatsInSet :: Set Cat_ -> [Symbol Cat_ t] -> Bool
|
||||||
noCatsInSet cs = not . any (`catElem` cs)
|
noCatsInSet cs = not . any (`catElem` cs)
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ module GF.Speech.FiniteState (FA, State, NFA, DFA,
|
|||||||
oneFinalState,
|
oneFinalState,
|
||||||
moveLabelsToNodes, minimize,
|
moveLabelsToNodes, minimize,
|
||||||
dfa2nfa,
|
dfa2nfa,
|
||||||
prFAGraphviz) where
|
prFAGraphviz, faToGraphviz) where
|
||||||
|
|
||||||
import Data.List
|
import Data.List
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
@@ -213,13 +213,14 @@ dfa2nfa = mapTransitions Just
|
|||||||
--
|
--
|
||||||
|
|
||||||
prFAGraphviz :: (Eq n,Show n) => FA n String String -> String
|
prFAGraphviz :: (Eq n,Show n) => FA n String String -> String
|
||||||
prFAGraphviz = Dot.prGraphviz . toGraphviz
|
prFAGraphviz = Dot.prGraphviz . faToGraphviz ""
|
||||||
|
|
||||||
prFAGraphviz_ :: (Eq n,Show n,Show a, Show b) => FA n a b -> String
|
prFAGraphviz_ :: (Eq n,Show n,Show a, Show b) => FA n a b -> String
|
||||||
prFAGraphviz_ = Dot.prGraphviz . toGraphviz . mapStates show . mapTransitions show
|
prFAGraphviz_ = Dot.prGraphviz . faToGraphviz "" . mapStates show . mapTransitions show
|
||||||
|
|
||||||
toGraphviz :: (Eq n,Show n) => FA n String String -> Dot.Graph
|
faToGraphviz :: (Eq n,Show n) => String -- ^ Graph ID
|
||||||
toGraphviz (FA (Graph _ ns es) s f) = Dot.Graph Dot.Directed [] (map mkNode ns) (map mkEdge es)
|
-> FA n String String -> Dot.Graph
|
||||||
|
faToGraphviz i (FA (Graph _ ns es) s f) = Dot.Graph Dot.Directed i [] (map mkNode ns) (map mkEdge es) []
|
||||||
where mkNode (n,l) = Dot.Node (show n) attrs
|
where mkNode (n,l) = Dot.Node (show n) attrs
|
||||||
where attrs = [("label",l)]
|
where attrs = [("label",l)]
|
||||||
++ if n == s then [("shape","box")] else []
|
++ if n == s then [("shape","box")] else []
|
||||||
@@ -231,4 +232,4 @@ toGraphviz (FA (Graph _ ns es) s f) = Dot.Graph Dot.Directed [] (map mkNode ns)
|
|||||||
--
|
--
|
||||||
|
|
||||||
lookups :: Ord k => [k] -> Map k a -> [a]
|
lookups :: Ord k => [k] -> Map k a -> [a]
|
||||||
lookups xs m = mapMaybe (flip Map.lookup m) xs
|
lookups xs m = mapMaybe (flip Map.lookup m) xs
|
||||||
|
|||||||
@@ -31,40 +31,70 @@ import GF.Speech.CFGToFiniteState
|
|||||||
import GF.Speech.FiniteState
|
import GF.Speech.FiniteState
|
||||||
import GF.Speech.SRG
|
import GF.Speech.SRG
|
||||||
import GF.Speech.TransformCFG
|
import GF.Speech.TransformCFG
|
||||||
|
import qualified GF.Visualization.Graphviz as Dot
|
||||||
|
|
||||||
|
import Control.Monad
|
||||||
|
import qualified Control.Monad.State as STM
|
||||||
import Data.Char (toUpper)
|
import Data.Char (toUpper)
|
||||||
import Data.List
|
import Data.List
|
||||||
import Data.Maybe (fromMaybe)
|
import Data.Maybe (maybe)
|
||||||
|
|
||||||
|
data SLFs = SLFs [(String,SLF)] SLF
|
||||||
|
|
||||||
data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
|
data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
|
||||||
|
|
||||||
data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String }
|
data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String }
|
||||||
|
| SLFSubLat { nId :: Int, nLat :: String }
|
||||||
|
|
||||||
-- | An SLF word is a word, or the empty string.
|
-- | An SLF word is a word, or the empty string.
|
||||||
type SLFWord = Maybe String
|
type SLFWord = Maybe String
|
||||||
|
|
||||||
data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int }
|
data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int }
|
||||||
|
|
||||||
|
type SLF_FA = FA State (Maybe (MFALabel String)) ()
|
||||||
|
|
||||||
|
-- | Make a network with subnetworks in SLF
|
||||||
slfPrinter :: Ident -- ^ Grammar name
|
slfPrinter :: Ident -- ^ Grammar name
|
||||||
-> Options -> CGrammar -> String
|
-> Options -> CGrammar -> String
|
||||||
slfPrinter name opts cfg = prSLF (automatonToSLF $ mkSLFFA opts cfg) ""
|
slfPrinter name opts cfg = prSLFs (mfaToSLFs $ cfgToMFA opts cfg) ""
|
||||||
|
|
||||||
slfGraphvizPrinter :: Ident -- ^ Grammar name
|
slfGraphvizPrinter :: Ident -- ^ Grammar name
|
||||||
-> Options -> CGrammar -> String
|
-> Options -> CGrammar -> String
|
||||||
slfGraphvizPrinter name opts cfg =
|
slfGraphvizPrinter name opts cfg = Dot.prGraphviz g
|
||||||
prFAGraphviz $ mapStates (fromMaybe "") $ mapTransitions (const "") $ mkSLFFA opts cfg
|
where MFA main subs = cfgToMFA opts cfg
|
||||||
|
g = Dot.addSubGraphs (map (uncurry gvSLFFA) subs) $ gvSLFFA "" main
|
||||||
|
|
||||||
mkSLFFA :: Options -> CGrammar -> FA State (Maybe String) ()
|
gvSLFFA :: String -> DFA (MFALabel String) -> Dot.Graph
|
||||||
mkSLFFA opts cfg = oneFinalState Nothing () $ moveLabelsToNodes $ dfa2nfa $ cfgToFA opts cfg
|
gvSLFFA n = faToGraphviz n . mapStates (maybe "" mfaLabelToGv)
|
||||||
|
. mapTransitions (const "") . slfStyleFA
|
||||||
|
where mfaLabelToGv (MFASym s) = s
|
||||||
|
mfaLabelToGv (MFASub s) = "<" ++ s ++ ">"
|
||||||
|
|
||||||
automatonToSLF :: FA State (Maybe String) () -> SLF
|
mapMFA :: (DFA (MFALabel a) -> b) -> MFA a -> (b,[(String,b)])
|
||||||
automatonToSLF fa = SLF { slfNodes = map mkSLFNode (states fa),
|
mapMFA f (MFA main subs) = (f main, [(c, f fa) | (c,fa) <- subs])
|
||||||
slfEdges = zipWith mkSLFEdge [0..] (transitions fa) }
|
|
||||||
|
|
||||||
mkSLFNode :: (Int, Maybe String) -> SLFNode
|
slfStyleFA :: DFA (MFALabel String) -> SLF_FA
|
||||||
mkSLFNode (i, Nothing) = SLFNode { nId = i, nWord = Nothing, nTag = Nothing }
|
slfStyleFA = oneFinalState Nothing () . moveLabelsToNodes . dfa2nfa
|
||||||
mkSLFNode (i, Just w)
|
|
||||||
|
mfaToSLFs :: MFA String -> SLFs
|
||||||
|
mfaToSLFs (MFA main subs)
|
||||||
|
= SLFs [(c, dfaToSLF fa) | (c,fa) <- subs] (dfaToSLF main)
|
||||||
|
where dfaToSLF = automatonToSLF . slfStyleFA
|
||||||
|
|
||||||
|
automatonToSLF :: SLF_FA -> SLF
|
||||||
|
automatonToSLF fa = SLF { slfNodes = ns, slfEdges = es }
|
||||||
|
where ns = map (uncurry mfaNodeToSLFNode) (states fa)
|
||||||
|
es = zipWith (\i (f,t,()) -> mkSLFEdge i (f,t)) [0..] (transitions fa)
|
||||||
|
|
||||||
|
mfaNodeToSLFNode :: Int -> Maybe (MFALabel String) -> SLFNode
|
||||||
|
mfaNodeToSLFNode i l = case l of
|
||||||
|
Nothing -> mkSLFNode i Nothing
|
||||||
|
Just (MFASym x) -> mkSLFNode i (Just x)
|
||||||
|
Just (MFASub s) -> mkSLFSubLat i s
|
||||||
|
|
||||||
|
mkSLFNode :: Int -> Maybe String -> SLFNode
|
||||||
|
mkSLFNode i Nothing = SLFNode { nId = i, nWord = Nothing, nTag = Nothing }
|
||||||
|
mkSLFNode i (Just w)
|
||||||
| isNonWord w = SLFNode { nId = i,
|
| isNonWord w = SLFNode { nId = i,
|
||||||
nWord = Nothing,
|
nWord = Nothing,
|
||||||
nTag = Just w }
|
nTag = Just w }
|
||||||
@@ -72,17 +102,30 @@ mkSLFNode (i, Just w)
|
|||||||
nWord = Just (map toUpper w),
|
nWord = Just (map toUpper w),
|
||||||
nTag = Just w }
|
nTag = Just w }
|
||||||
|
|
||||||
mkSLFEdge :: Int -> (Int,Int,()) -> SLFEdge
|
mkSLFSubLat :: Int -> String -> SLFNode
|
||||||
mkSLFEdge i (f,t,()) = SLFEdge { eId = i, eStart = f, eEnd = t }
|
mkSLFSubLat i sub = SLFSubLat { nId = i, nLat = sub }
|
||||||
|
|
||||||
|
mkSLFEdge :: Int -> (Int,Int) -> SLFEdge
|
||||||
|
mkSLFEdge i (f,t) = SLFEdge { eId = i, eStart = f, eEnd = t }
|
||||||
|
|
||||||
|
prSLFs :: SLFs -> ShowS
|
||||||
|
prSLFs (SLFs subs main) = unlinesS (map prSub subs) . prOneSLF main
|
||||||
|
where prSub (n,s) = showString "SUBLAT=" . shows n
|
||||||
|
. nl . prOneSLF s . showString "." . nl
|
||||||
|
|
||||||
prSLF :: SLF -> ShowS
|
prSLF :: SLF -> ShowS
|
||||||
prSLF (SLF { slfNodes = ns, slfEdges = es})
|
prSLF slf = {- showString "VERSION=1.0" . nl . -} prOneSLF slf
|
||||||
|
|
||||||
|
prOneSLF :: SLF -> ShowS
|
||||||
|
prOneSLF (SLF { slfNodes = ns, slfEdges = es})
|
||||||
= header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl
|
= header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl
|
||||||
where
|
where
|
||||||
header = showString "VERSION=1.0" . nl
|
header = prFields [("N",show (length ns)),("L", show (length es))] . nl
|
||||||
. prFields [("N",show (length ns)),("L", show (length es))] . nl
|
prNode (SLFNode { nId = i, nWord = w, nTag = t })
|
||||||
prNode n = prFields $ [("I",show (nId n)),("W",showWord (nWord n))]
|
= prFields $ [("I",show i),("W",showWord w)]
|
||||||
++ maybe [] (\t -> [("s",t)]) (nTag n)
|
++ maybe [] (\t -> [("s",t)]) t
|
||||||
|
prNode (SLFSubLat { nId = i, nLat = l })
|
||||||
|
= prFields [("I",show i),("L",show l)]
|
||||||
prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))]
|
prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))]
|
||||||
|
|
||||||
-- | Check if a word should not correspond to a word in the SLF file.
|
-- | Check if a word should not correspond to a word in the SLF file.
|
||||||
|
|||||||
@@ -120,6 +120,9 @@ isDirectLeftRecursive _ = False
|
|||||||
-- * CFG rule utilities
|
-- * CFG rule utilities
|
||||||
--
|
--
|
||||||
|
|
||||||
|
allCats :: CFRules -> [Cat_]
|
||||||
|
allCats = map fst
|
||||||
|
|
||||||
catRules :: CFRules -> Cat_ -> [CFRule_]
|
catRules :: CFRules -> Cat_ -> [CFRule_]
|
||||||
catRules rs c = fromMaybe [] (lookup c rs)
|
catRules rs c = fromMaybe [] (lookup c rs)
|
||||||
|
|
||||||
|
|||||||
@@ -22,13 +22,14 @@ import GF.Speech.PrSLF
|
|||||||
import Speech.ATKRec
|
import Speech.ATKRec
|
||||||
|
|
||||||
import Control.Monad
|
import Control.Monad
|
||||||
|
import Data.Maybe
|
||||||
import Data.IORef
|
import Data.IORef
|
||||||
import System.Environment
|
import System.Environment
|
||||||
import System.IO
|
import System.IO
|
||||||
import System.IO.Unsafe
|
import System.IO.Unsafe
|
||||||
|
|
||||||
data ATKLang = ATKLang {
|
data ATKLang = ATKLang {
|
||||||
cmndef :: FilePath,
|
cmndef :: Maybe FilePath,
|
||||||
hmmlist :: FilePath,
|
hmmlist :: FilePath,
|
||||||
mmf0 :: FilePath,
|
mmf0 :: FilePath,
|
||||||
mmf1 :: FilePath,
|
mmf1 :: FilePath,
|
||||||
@@ -50,11 +51,19 @@ getLanguage l =
|
|||||||
atk_home <- getEnv_ "ATK_HOME" atk_home_error
|
atk_home <- getEnv_ "ATK_HOME" atk_home_error
|
||||||
let res = atk_home ++ "/Resources"
|
let res = atk_home ++ "/Resources"
|
||||||
return $ ATKLang {
|
return $ ATKLang {
|
||||||
cmndef = res ++ "/UK_SI_ZMFCC/cepmean",
|
cmndef = Just $ res ++ "/UK_SI_ZMFCC/cepmean",
|
||||||
hmmlist = res ++ "/UK_SI_ZMFCC/hmmlistbg",
|
hmmlist = res ++ "/UK_SI_ZMFCC/hmmlistbg",
|
||||||
mmf0 = res ++ "/UK_SI_ZMFCC/WI4",
|
mmf0 = res ++ "/UK_SI_ZMFCC/WI4",
|
||||||
mmf1 = res ++ "/UK_SI_ZMFCC/BGHMM2",
|
mmf1 = res ++ "/UK_SI_ZMFCC/BGHMM2",
|
||||||
dict = res ++ "/beep.dct" }
|
dict = res ++ "/beep.dct" }
|
||||||
|
"sv_SE" -> do
|
||||||
|
let res = "/home/bjorn/projects/atkswe/stoneage-swe"
|
||||||
|
return $ ATKLang {
|
||||||
|
cmndef = Nothing,
|
||||||
|
hmmlist = res ++ "/triphones1",
|
||||||
|
mmf0 = res ++ "/hmm12/macros",
|
||||||
|
mmf1 = res ++ "/hmm12/hmmdefs",
|
||||||
|
dict = res ++ "/dict" }
|
||||||
_ -> fail $ "ATKSpeechInput: language " ++ l ++ " not supported"
|
_ -> fail $ "ATKSpeechInput: language " ++ l ++ " not supported"
|
||||||
|
|
||||||
-- | List of the languages for which we have already loaded the HMM
|
-- | List of the languages for which we have already loaded the HMM
|
||||||
@@ -71,8 +80,8 @@ initATK language =
|
|||||||
when (null ls) $ do
|
when (null ls) $ do
|
||||||
config <- getEnv_ "GF_ATK_CFG" gf_atk_cfg_error
|
config <- getEnv_ "GF_ATK_CFG" gf_atk_cfg_error
|
||||||
hPutStrLn stderr $ "Initializing ATK..."
|
hPutStrLn stderr $ "Initializing ATK..."
|
||||||
-- FIXME: CMNDEFAULT should be set in the per-language setup
|
let ps = map ((,) "HPARM:CMNDEFAULT") (maybeToList (cmndef l))
|
||||||
initialize (Just config) [("HPARM:CMNDEFAULT",cmndef l)]
|
initialize (Just config) ps
|
||||||
when (language `notElem` ls) $
|
when (language `notElem` ls) $
|
||||||
do
|
do
|
||||||
let hmmName = "hmm_" ++ language
|
let hmmName = "hmm_" ++ language
|
||||||
@@ -88,7 +97,7 @@ recognizeSpeech name opts cfg =
|
|||||||
do
|
do
|
||||||
let slf = slfPrinter name opts cfg
|
let slf = slfPrinter name opts cfg
|
||||||
n = prIdent name
|
n = prIdent name
|
||||||
language = "en_UK"
|
language = "sv_SE"
|
||||||
hmmName = "hmm_" ++ language
|
hmmName = "hmm_" ++ language
|
||||||
dictName = "dict_" ++ language
|
dictName = "dict_" ++ language
|
||||||
slfName = "gram_" ++ n
|
slfName = "gram_" ++ n
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ module GF.Visualization.Graphviz (
|
|||||||
Graph(..), GraphType(..),
|
Graph(..), GraphType(..),
|
||||||
Node(..), Edge(..),
|
Node(..), Edge(..),
|
||||||
Attr,
|
Attr,
|
||||||
|
addSubGraphs,
|
||||||
prGraphviz
|
prGraphviz
|
||||||
) where
|
) where
|
||||||
|
|
||||||
@@ -23,7 +24,8 @@ import Data.Char
|
|||||||
|
|
||||||
import GF.Data.Utilities
|
import GF.Data.Utilities
|
||||||
|
|
||||||
data Graph = Graph GraphType [Attr] [Node] [Edge]
|
-- | Graph type, graph ID, graph attirbutes, graph nodes, graph edges, subgraphs
|
||||||
|
data Graph = Graph GraphType String [Attr] [Node] [Edge] [Graph]
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
|
||||||
data GraphType = Directed | Undirected
|
data GraphType = Directed | Undirected
|
||||||
@@ -37,13 +39,31 @@ data Edge = Edge String String [Attr]
|
|||||||
|
|
||||||
type Attr = (String,String)
|
type Attr = (String,String)
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Graph construction
|
||||||
|
--
|
||||||
|
|
||||||
|
addSubGraphs :: [Graph] -> Graph -> Graph
|
||||||
|
addSubGraphs nss (Graph t i at ns es ss) = Graph t i at ns es (nss++ss)
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Pretty-printing
|
||||||
|
--
|
||||||
|
|
||||||
prGraphviz :: Graph -> String
|
prGraphviz :: Graph -> String
|
||||||
prGraphviz (Graph t at ns es) =
|
prGraphviz g@(Graph t i _ _ _ _) =
|
||||||
unlines $ [graphtype t ++ " {"]
|
graphtype t ++ " " ++ esc i ++ " {\n" ++ prGraph g ++ "}\n"
|
||||||
++ map (++";") (map prAttr at
|
|
||||||
++ map prNode ns
|
prSubGraph :: Graph -> String
|
||||||
++ map (prEdge t) es)
|
prSubGraph g@(Graph _ i _ _ _ _) =
|
||||||
++ ["}\n"]
|
"subgraph" ++ " " ++ esc i ++ " {\n" ++ prGraph g ++ "}"
|
||||||
|
|
||||||
|
prGraph :: Graph -> String
|
||||||
|
prGraph (Graph t id at ns es ss) =
|
||||||
|
unlines $ map (++";") (map prAttr at
|
||||||
|
++ map prNode ns
|
||||||
|
++ map (prEdge t) es
|
||||||
|
++ map prSubGraph ss)
|
||||||
|
|
||||||
graphtype :: GraphType -> String
|
graphtype :: GraphType -> String
|
||||||
graphtype Directed = "digraph"
|
graphtype Directed = "digraph"
|
||||||
|
|||||||
Reference in New Issue
Block a user