1
0
forked from GitHub/gf-core

Added speech recognition grammar generation code. There is no way yet to invoke the SRG printer, and only SRGS is included.

This commit is contained in:
bjorn
2008-06-03 18:53:53 +00:00
parent 762a9d16f0
commit 61ccd948d3
11 changed files with 1918 additions and 0 deletions

338
src-3.0/GF/Speech/CFG.hs Normal file
View File

@@ -0,0 +1,338 @@
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.CFG
--
-- Context-free grammar representation and manipulation.
----------------------------------------------------------------------
module GF.Speech.CFG where
import GF.Data.Utilities
import PGF.CId
import GF.Infra.Option
import GF.Infra.PrintClass
import GF.Speech.Relation
import Control.Monad
import Control.Monad.State (State, get, put, evalState)
import qualified Data.ByteString.Char8 as BS
import Data.Map (Map)
import qualified Data.Map as Map
import Data.List
import Data.Maybe (fromMaybe)
import Data.Monoid (mconcat)
import Data.Set (Set)
import qualified Data.Set as Set
--
-- * Types
--
type Cat = String
type Token = String
data Symbol c t = NonTerminal c | Terminal t
deriving (Eq, Ord, Show)
type CFSymbol = Symbol Cat Token
data CFRule = CFRule {
lhsCat :: Cat,
ruleRhs :: [CFSymbol],
ruleName :: CFTerm
}
deriving (Eq, Ord, Show)
data CFTerm
= CFObj CId [CFTerm] -- ^ an abstract syntax function with arguments
| CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id.
| CFApp CFTerm CFTerm -- ^ Application
| CFRes Int -- ^ The result of the n:th (0-based) non-terminal
| CFVar Int -- ^ A lambda-bound variable
| CFMeta CId -- ^ A metavariable
deriving (Eq, Ord, Show)
data CFG = CFG { cfgStartCat :: Cat,
cfgExternalCats :: Set Cat,
cfgRules :: Map Cat (Set CFRule) }
deriving (Eq, Ord, Show)
--
-- * Grammar filtering
--
-- | Removes all directly and indirectly cyclic productions.
-- FIXME: this may be too aggressive, only one production
-- needs to be removed to break a given cycle. But which
-- one should we pick?
-- FIXME: Does not (yet) remove productions which are cyclic
-- because of empty productions.
removeCycles :: CFG -> CFG
removeCycles = onRules f
where f rs = filter (not . isCycle) rs
where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [NonTerminal c'] _ <- rs]
isCycle (CFRule c [NonTerminal c'] _) = isRelatedTo alias c' c
isCycle _ = False
-- | Better bottom-up filter that also removes categories which contain no finite
-- strings.
bottomUpFilter :: CFG -> CFG
bottomUpFilter gr = fix grow (gr { cfgRules = Map.empty })
where grow g = g `unionCFG` filterCFG (all (okSym g) . ruleRhs) gr
okSym g = symbol (`elem` allCats g) (const True)
-- | Removes categories which are not reachable from the start category.
topDownFilter :: CFG -> CFG
topDownFilter cfg = filterCFGCats (isRelatedTo uses (cfgStartCat cfg)) cfg
where
rhsCats = [ (lhsCat r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ]
uses = reflexiveClosure_ (allCats cfg) $ transitiveClosure $ mkRel rhsCats
-- | Merges categories with identical right-hand-sides.
-- FIXME: handle probabilities
mergeIdentical :: CFG -> CFG
mergeIdentical g = onRules (map subst) g
where
-- maps categories to their replacement
m = Map.fromList [(y,concat (intersperse "+" xs))
| (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList (cfgRules g)], y <- xs]
-- build data to compare for each category: a set of name,rhs pairs
rulesKey = Set.map (\ (CFRule _ r n) -> (n,r))
subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
--
-- * Removing left recursion
--
-- The LC_LR algorithm from
-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
removeLeftRecursion :: CFG -> CFG
removeLeftRecursion gr
= gr { cfgRules = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] }
where
scheme1 = [CFRule a [x,NonTerminal a_x] n' |
a <- retainedLeftRecursive,
x <- properLeftCornersOf a,
not (isLeftRecursive x),
let a_x = mkCat (NonTerminal a) x,
-- this is an extension of LC_LR to avoid generating
-- A-X categories for which there are no productions:
a_x `Set.member` newCats,
let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0))
(\_ -> CFRes 0) x]
scheme2 = [CFRule a_x (beta++[NonTerminal a_b]) n' |
a <- retainedLeftRecursive,
b@(NonTerminal b') <- properLeftCornersOf a,
isLeftRecursive b,
CFRule _ (x:beta) n <- catRules gr b',
let a_x = mkCat (NonTerminal a) x,
let a_b = mkCat (NonTerminal a) b,
let i = length $ filterCats beta,
let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n)))
(\_ -> CFApp (CFRes i) n) x]
scheme3 = [CFRule a_x beta n' |
a <- retainedLeftRecursive,
x <- properLeftCornersOf a,
CFRule _ (x':beta) n <- catRules gr a,
x == x',
let a_x = mkCat (NonTerminal a) x,
let n' = symbol (\_ -> CFAbs 1 (shiftTerm n))
(\_ -> n) x]
scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . NonTerminal) cats
newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3))
shiftTerm :: CFTerm -> CFTerm
shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts)
shiftTerm (CFRes 0) = CFVar 1
shiftTerm (CFRes n) = CFRes (n-1)
shiftTerm t = t
-- note: the rest don't occur in the original grammar
cats = allCats gr
rules = allRules gr
directLeftCorner = mkRel [(NonTerminal c,t) | CFRule c (t:_) _ <- allRules gr]
leftCorner = reflexiveClosure_ (map NonTerminal cats) $ transitiveClosure directLeftCorner
properLeftCorner = transitiveClosure directLeftCorner
properLeftCornersOf = Set.toList . allRelated properLeftCorner . NonTerminal
isProperLeftCornerOf = flip (isRelatedTo properLeftCorner)
leftRecursive = reflexiveElements properLeftCorner
isLeftRecursive = (`Set.member` leftRecursive)
retained = cfgStartCat gr `Set.insert`
Set.fromList [a | r <- allRules (filterCFGCats (not . isLeftRecursive . NonTerminal) gr),
NonTerminal a <- ruleRhs r]
isRetained = (`Set.member` retained)
retainedLeftRecursive = filter (isLeftRecursive . NonTerminal) $ Set.toList retained
mkCat :: CFSymbol -> CFSymbol -> Cat
mkCat x y = showSymbol x ++ "-" ++ showSymbol y
where showSymbol = symbol id show
-- | Get the sets of mutually recursive non-terminals for a grammar.
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
-- If false, only recursive categories will be included.
-> CFG -> [Set Cat]
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, NonTerminal c' <- ss]
refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation
--
-- * Approximate context-free grammars with regular grammars.
--
makeSimpleRegular :: CFG -> CFG
makeSimpleRegular = makeRegular . topDownFilter . bottomUpFilter . removeCycles
-- Use the transformation algorithm from \"Regular Approximation of Context-free
-- Grammars through Approximation\", Mohri and Nederhof, 2000
-- to create an over-generating regular frammar for a context-free
-- grammar
makeRegular :: CFG -> CFG
makeRegular g = g { cfgRules = groupProds $ concatMap trSet (mutRecCats True g) }
where trSet cs | allXLinear cs rs = rs
| otherwise = concatMap handleCat csl
where csl = Set.toList cs
rs = catSetRules g cs
handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
++ concatMap (makeRightLinearRules c) (catRules g c)
where c' = newCat c
makeRightLinearRules b' (CFRule c ss n) =
case ys of
[] -> newRule b' (xs ++ [NonTerminal (newCat c)]) n -- no non-terminals left
(NonTerminal b:zs) -> newRule b' (xs ++ [NonTerminal b]) n
++ makeRightLinearRules (newCat b) (CFRule c zs n)
where (xs,ys) = break (`catElem` cs) ss
-- don't add rules on the form A -> A
newRule c rhs n | rhs == [NonTerminal c] = []
| otherwise = [CFRule c rhs n]
newCat c = c ++ "$"
--
-- * CFG Utilities
--
mkCFG :: Cat -> Set Cat -> [CFRule] -> CFG
mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs }
groupProds :: [CFRule] -> Map Cat (Set CFRule)
groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r))
-- | Gets all rules in a CFG.
allRules :: CFG -> [CFRule]
allRules = concat . map Set.toList . Map.elems . cfgRules
-- | Gets all rules in a CFG, grouped by their LHS categories.
allRulesGrouped :: CFG -> [(Cat,[CFRule])]
allRulesGrouped = Map.toList . Map.map Set.toList . cfgRules
-- | Gets all categories which have rules.
allCats :: CFG -> [Cat]
allCats = Map.keys . cfgRules
-- | Gets all rules for the given category.
catRules :: CFG -> Cat -> [CFRule]
catRules gr c = Set.toList $ Map.findWithDefault Set.empty c (cfgRules gr)
-- | Gets all rules for categories in the given set.
catSetRules :: CFG -> Set Cat -> [CFRule]
catSetRules gr cs = allRules $ filterCFGCats (`Set.member` cs) gr
onCFG :: (Map Cat (Set CFRule) -> Map Cat (Set CFRule)) -> CFG -> CFG
onCFG f cfg = cfg { cfgRules = f (cfgRules cfg) }
onRules :: ([CFRule] -> [CFRule]) -> CFG -> CFG
onRules f cfg = cfg { cfgRules = groupProds $ f $ allRules cfg }
-- | Clean up CFG after rules have been removed.
cleanCFG :: CFG -> CFG
cleanCFG = onCFG (Map.filter (not . Set.null))
-- | Combine two CFGs.
unionCFG :: CFG -> CFG -> CFG
unionCFG x y = onCFG (\rs -> Map.unionWith Set.union rs (cfgRules y)) x
filterCFG :: (CFRule -> Bool) -> CFG -> CFG
filterCFG p = cleanCFG . onCFG (Map.map (Set.filter p))
filterCFGCats :: (Cat -> Bool) -> CFG -> CFG
filterCFGCats p = onCFG (Map.filterWithKey (\c _ -> p c))
countCats :: CFG -> Int
countCats = Map.size . cfgRules . cleanCFG
countRules :: CFG -> Int
countRules = length . allRules
prCFG :: CFG -> String
prCFG = unlines . map prRule . allRules
where
prRule r = lhsCat r ++ " --> " ++ unwords (map prSym (ruleRhs r))
prSym = symbol id (\t -> "\""++ t ++"\"")
--
-- * CFRule Utilities
--
ruleFun :: CFRule -> CId
ruleFun (CFRule _ _ t) = f t
where f (CFObj n _) = n
f (CFApp _ x) = f x
f (CFAbs _ x) = f x
f _ = mkCId ""
-- | Check if any of the categories used on the right-hand side
-- are in the given list of categories.
anyUsedBy :: [Cat] -> CFRule -> Bool
anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
mkCFTerm :: String -> CFTerm
mkCFTerm n = CFObj (mkCId n) []
ruleIsNonRecursive :: Set Cat -> CFRule -> Bool
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
-- | Check if all the rules are right-linear, or all the rules are
-- left-linear, with respect to given categories.
allXLinear :: Set Cat -> [CFRule] -> Bool
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-- | Checks if a context-free rule is right-linear.
isRightLinear :: Set Cat -- ^ The categories to consider
-> CFRule -- ^ The rule to check for right-linearity
-> Bool
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
-- | Checks if a context-free rule is left-linear.
isLeftLinear :: Set Cat -- ^ The categories to consider
-> CFRule -- ^ The rule to check for left-linearity
-> Bool
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
--
-- * Symbol utilities
--
symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a
symbol fc ft (NonTerminal cat) = fc cat
symbol fc ft (Terminal tok) = ft tok
mapSymbol :: (c -> c') -> (t -> t') -> Symbol c t -> Symbol c' t'
mapSymbol fc ft = symbol (NonTerminal . fc) (Terminal . ft)
filterCats :: [Symbol c t] -> [c]
filterCats syms = [ cat | NonTerminal cat <- syms ]
filterToks :: [Symbol c t] -> [t]
filterToks syms = [ tok | Terminal tok <- syms ]
-- | Checks if a symbol is a non-terminal of one of the given categories.
catElem :: Ord c => Symbol c t -> Set c -> Bool
catElem s cs = symbol (`Set.member` cs) (const False) s
noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
noCatsInSet cs = not . any (`catElem` cs)

View File

@@ -0,0 +1,244 @@
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.CFGToFA
--
-- Approximates CFGs with finite state networks.
----------------------------------------------------------------------
module GF.Speech.CFGToFA (cfgToFA, makeSimpleRegular,
MFA(..), cfgToMFA, cfgToFA') where
import Data.List
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import PGF.CId
import PGF.Data
import GF.Data.Utilities
import GF.Speech.CFG
import GF.Speech.PGFToCFG
import GF.Infra.Ident (Ident)
import GF.Speech.FiniteState
import GF.Speech.Graph
import GF.Speech.Relation
import GF.Speech.CFG
data Recursivity = RightR | LeftR | NotR
data MutRecSet = MutRecSet {
mrCats :: Set Cat,
mrNonRecRules :: [CFRule],
mrRecRules :: [CFRule],
mrRec :: Recursivity
}
type MutRecSets = Map Cat MutRecSet
--
-- * Multiple DFA type
--
data MFA = MFA Cat [(Cat,DFA CFSymbol)]
cfgToFA :: CFG -> DFA Token
cfgToFA = minimize . compileAutomaton . makeSimpleRegular
--
-- * Compile strongly regular grammars to NFAs
--
-- Convert a strongly regular grammar to a finite automaton.
compileAutomaton :: CFG -> NFA Token
compileAutomaton g = make_fa (g,ns) s [NonTerminal (cfgStartCat g)] f fa
where
(fa,s,f) = newFA_
ns = mutRecSets g $ mutRecCats False g
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000.
make_fa :: (CFG,MutRecSets) -> State -> [CFSymbol] -> State
-> NFA Token -> NFA Token
make_fa c@(g,ns) q0 alpha q1 fa =
case alpha of
[] -> newTransition q0 q1 Nothing fa
[Terminal t] -> newTransition q0 q1 (Just t) fa
[NonTerminal a] ->
case Map.lookup a ns of
-- a is recursive
Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
case mrRec n of
-- the set Ni is right-recursive or cyclic
RightR ->
let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
let (xs,NonTerminal d) = (init ss,last ss)]
in make_fas new $ newTransition q0 (getState a) Nothing fa'
-- the set Ni is left-recursive
LeftR ->
let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- rs]
in make_fas new $ newTransition (getState a) q1 Nothing fa'
where
(fa',stateMap) = addStatesForCats ni fa
getState x = Map.findWithDefault
(error $ "CFGToFiniteState: No state for " ++ x)
x stateMap
-- a is not recursive
Nothing -> let rs = catRules g a
in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs
(x:beta) -> let (fa',q) = newState () fa
in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa'
where
make_fa_ = make_fa c
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
--
-- * Compile a strongly regular grammar to a DFA with sub-automata
--
cfgToMFA :: CFG -> MFA
cfgToMFA = buildMFA . makeSimpleRegular
-- | Build a DFA by building and expanding an MFA
cfgToFA' :: CFG -> DFA Token
cfgToFA' = mfaToDFA . cfgToMFA
buildMFA :: CFG -> MFA
buildMFA g = sortSubLats $ removeUnusedSubLats mfa
where fas = compileAutomata g
mfa = MFA (cfgStartCat g) [(c, minimize fa) | (c,fa) <- fas]
mfaStartDFA :: MFA -> DFA CFSymbol
mfaStartDFA (MFA start subs) =
fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs
mfaToDFA :: MFA -> DFA Token
mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa
where
subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs]
getSub l = fromJust $ Map.lookup l subs'
expand (FA (Graph c ns es) s f)
= foldl' expandEdge (FA (Graph c ns []) s f) es
expandEdge fa (f,t,x) =
case x of
Nothing -> newTransition f t Nothing fa
Just (Terminal s) -> newTransition f t (Just s) fa
Just (NonTerminal l) -> insertNFA fa (f,t) (expand $ getSub l)
removeUnusedSubLats :: MFA -> MFA
removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c]
where
usedMap = subLatUseMap mfa
used = growUsedSet (Set.singleton start)
isUsed c = c `Set.member` used
growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s)
subLatUseMap :: MFA -> Map Cat (Set Cat)
subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs]
usedSubLats :: DFA CFSymbol -> Set Cat
usedSubLats fa = Set.fromList [s | (_,_,NonTerminal s) <- transitions fa]
-- | Sort sub-networks topologically.
sortSubLats :: MFA -> MFA
sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs)
where
usedByMap = revMultiMap (subLatUseMap mfa)
sortLats _ [] = []
sortLats ub ls = xs ++ sortLats ub' ys
where (xs,ys) = partition ((==0) . indeg) ls
ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub
indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub
-- | Convert a strongly regular grammar to a number of finite automata,
-- one for each non-terminal.
-- The edges in the automata accept tokens, or name another automaton to use.
compileAutomata :: CFG
-> [(Cat,NFA CFSymbol)]
-- ^ A map of non-terminals and their automata.
compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
where
mrs = mutRecSets g $ mutRecCats True g
makeOneFA c = make_fa1 mr s [NonTerminal c] f fa
where (fa,s,f) = newFA_
mr = fromJust (Map.lookup c mrs)
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000,
-- adapted to build a finite automaton for a single (mutually recursive) set only.
-- Categories not in the set will result in category-labelled edges.
make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
-- we are building the automaton.
-> State -- ^ State to come from
-> [CFSymbol] -- ^ Symbols to accept
-> State -- ^ State to end up in
-> NFA CFSymbol -- ^ FA to add to.
-> NFA CFSymbol
make_fa1 mr q0 alpha q1 fa =
case alpha of
[] -> newTransition q0 q1 Nothing fa
[t@(Terminal _)] -> newTransition q0 q1 (Just t) fa
[c@(NonTerminal a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
[NonTerminal a] ->
case mrRec mr of
NotR -> -- the set is a non-recursive (always singleton) set of categories
-- so the set of category rules is the set of rules for the whole set
make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
RightR -> -- the set is right-recursive or cyclic
let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr,
let (xs,NonTerminal d) = (init ss,last ss)]
in make_fas new $ newTransition q0 (getState a) Nothing fa'
LeftR -> -- the set is left-recursive
let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- mrRecRules mr]
in make_fas new $ newTransition (getState a) q1 Nothing fa'
where
(fa',stateMap) = addStatesForCats (mrCats mr) fa
getState x = Map.findWithDefault
(error $ "CFGToFiniteState: No state for " ++ x)
x stateMap
(x:beta) -> let (fa',q) = newState () fa
in make_fas [(q0,[x],q),(q,beta,q1)] fa'
where
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs
mutRecSets :: CFG -> [Set Cat] -> MutRecSets
mutRecSets g = Map.fromList . concatMap mkMutRecSet
where
mkMutRecSet cs = [ (c,ms) | c <- csl ]
where csl = Set.toList cs
rs = catSetRules g cs
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
ms = MutRecSet {
mrCats = cs,
mrNonRecRules = nrs,
mrRecRules = rrs,
mrRec = rec
}
rec | null rrs = NotR
| all (isRightLinear cs) rrs = RightR
| otherwise = LeftR
--
-- * Utilities
--
-- | Add a state for the given NFA for each of the categories
-- in the given set. Returns a map of categories to their
-- corresponding states.
addStatesForCats :: Set Cat -> NFA t -> (NFA t, Map Cat State)
addStatesForCats cs fa = (fa', m)
where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
m = Map.fromList (zip (Set.toList cs) (map fst ns))
revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s]

View File

@@ -0,0 +1,329 @@
----------------------------------------------------------------------
-- |
-- Module : FiniteState
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/10 16:43:44 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.16 $
--
-- A simple finite state network module.
-----------------------------------------------------------------------------
module GF.Speech.FiniteState (FA(..), State, NFA, DFA,
startState, finalStates,
states, transitions,
isInternal,
newFA, newFA_,
addFinalState,
newState, newStates,
newTransition, newTransitions,
insertTransitionWith, insertTransitionsWith,
mapStates, mapTransitions,
modifyTransitions,
nonLoopTransitionsTo, nonLoopTransitionsFrom,
loops,
removeState,
oneFinalState,
insertNFA,
onGraph,
moveLabelsToNodes, removeTrivialEmptyNodes,
minimize,
dfa2nfa,
unusedNames, renameStates,
prFAGraphviz, faToGraphviz) where
import Data.List
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import GF.Data.Utilities
import GF.Speech.Graph
import qualified GF.Speech.Graphviz as Dot
type State = Int
-- | Type parameters: node id type, state label type, edge label type
-- Data constructor arguments: nodes and edges, start state, final states
data FA n a b = FA !(Graph n a b) !n ![n]
type NFA a = FA State () (Maybe a)
type DFA a = FA State () a
startState :: FA n a b -> n
startState (FA _ s _) = s
finalStates :: FA n a b -> [n]
finalStates (FA _ _ ss) = ss
states :: FA n a b -> [(n,a)]
states (FA g _ _) = nodes g
transitions :: FA n a b -> [(n,n,b)]
transitions (FA g _ _) = edges g
newFA :: Enum n => a -- ^ Start node label
-> FA n a b
newFA l = FA g s []
where (g,s) = newNode l (newGraph [toEnum 0..])
-- | Create a new finite automaton with an initial and a final state.
newFA_ :: Enum n => (FA n () b, n, n)
newFA_ = (fa'', s, f)
where fa = newFA ()
s = startState fa
(fa',f) = newState () fa
fa'' = addFinalState f fa'
addFinalState :: n -> FA n a b -> FA n a b
addFinalState f (FA g s ss) = FA g s (f:ss)
newState :: a -> FA n a b -> (FA n a b, n)
newState x (FA g s ss) = (FA g' s ss, n)
where (g',n) = newNode x g
newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)])
newStates xs (FA g s ss) = (FA g' s ss, ns)
where (g',ns) = newNodes xs g
newTransition :: n -> n -> b -> FA n a b -> FA n a b
newTransition f t l = onGraph (newEdge (f,t,l))
newTransitions :: [(n, n, b)] -> FA n a b -> FA n a b
newTransitions es = onGraph (newEdges es)
insertTransitionWith :: Eq n =>
(b -> b -> b) -> (n, n, b) -> FA n a b -> FA n a b
insertTransitionWith f t = onGraph (insertEdgeWith f t)
insertTransitionsWith :: Eq n =>
(b -> b -> b) -> [(n, n, b)] -> FA n a b -> FA n a b
insertTransitionsWith f ts fa =
foldl' (flip (insertTransitionWith f)) fa ts
mapStates :: (a -> c) -> FA n a b -> FA n c b
mapStates f = onGraph (nmap f)
mapTransitions :: (b -> c) -> FA n a b -> FA n a c
mapTransitions f = onGraph (emap f)
modifyTransitions :: ([(n,n,b)] -> [(n,n,b)]) -> FA n a b -> FA n a b
modifyTransitions f = onGraph (\ (Graph r ns es) -> Graph r ns (f es))
removeState :: Ord n => n -> FA n a b -> FA n a b
removeState n = onGraph (removeNode n)
minimize :: Ord a => NFA a -> DFA a
minimize = determinize . reverseNFA . dfa2nfa . determinize . reverseNFA
unusedNames :: FA n a b -> [n]
unusedNames (FA (Graph names _ _) _ _) = names
-- | Gets all incoming transitions to a given state, excluding
-- transtions from the state itself.
nonLoopTransitionsTo :: Eq n => n -> FA n a b -> [(n,b)]
nonLoopTransitionsTo s fa =
[(f,l) | (f,t,l) <- transitions fa, t == s && f /= s]
nonLoopTransitionsFrom :: Eq n => n -> FA n a b -> [(n,b)]
nonLoopTransitionsFrom s fa =
[(t,l) | (f,t,l) <- transitions fa, f == s && t /= s]
loops :: Eq n => n -> FA n a b -> [b]
loops s fa = [l | (f,t,l) <- transitions fa, f == s && t == s]
-- | Give new names to all nodes.
renameStates :: Ord x => [y] -- ^ Infinite supply of new names
-> FA x a b
-> FA y a b
renameStates supply (FA g s fs) = FA (renameNodes newName rest g) s' fs'
where (ns,rest) = splitAt (length (nodes g)) supply
newNodes = Map.fromList (zip (map fst (nodes g)) ns)
newName n = Map.findWithDefault (error "FiniteState.newName") n newNodes
s' = newName s
fs' = map newName fs
-- | Insert an NFA into another
insertNFA :: NFA a -- ^ NFA to insert into
-> (State, State) -- ^ States to insert between
-> NFA a -- ^ NFA to insert.
-> NFA a
insertNFA (FA g1 s1 fs1) (f,t) (FA g2 s2 fs2)
= FA (newEdges es g') s1 fs1
where
es = (f,ren s2,Nothing):[(ren f2,t,Nothing) | f2 <- fs2]
(g',ren) = mergeGraphs g1 g2
onGraph :: (Graph n a b -> Graph n c d) -> FA n a b -> FA n c d
onGraph f (FA g s ss) = FA (f g) s ss
-- | Make the finite automaton have a single final state
-- by adding a new final state and adding an edge
-- from the old final states to the new state.
oneFinalState :: a -- ^ Label to give the new node
-> b -- ^ Label to give the new edges
-> FA n a b -- ^ The old network
-> FA n a b -- ^ The new network
oneFinalState nl el fa =
let (FA g s fs,nf) = newState nl fa
es = [ (f,nf,el) | f <- fs ]
in FA (newEdges es g) s [nf]
-- | Transform a standard finite automaton with labelled edges
-- to one where the labels are on the nodes instead. This can add
-- up to one extra node per edge.
moveLabelsToNodes :: (Ord n,Eq a) => FA n () (Maybe a) -> FA n (Maybe a) ()
moveLabelsToNodes = onGraph f
where f g@(Graph c _ _) = Graph c' ns (concat ess)
where is = [ ((n,l),inc) | (n, (l,inc,_)) <- Map.toList (nodeInfo g)]
(c',is') = mapAccumL fixIncoming c is
(ns,ess) = unzip (concat is')
-- | Remove empty nodes which are not start or final, and have
-- exactly one outgoing edge or exactly one incoming edge.
removeTrivialEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
removeTrivialEmptyNodes = pruneUnusable . skipSimpleEmptyNodes
-- | Move edges to empty nodes to point to the next node(s).
-- This is not done if the pointed-to node is a final node.
skipSimpleEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
skipSimpleEmptyNodes fa = onGraph og fa
where
og g@(Graph c ns es) = if es' == es then g else og (Graph c ns es')
where
es' = concatMap changeEdge es
info = nodeInfo g
changeEdge e@(f,t,())
| isNothing (getNodeLabel info t)
-- && (i * o <= i + o)
&& not (isFinal fa t)
= [ (f,t',()) | (_,t',()) <- getOutgoing info t]
| otherwise = [e]
-- where i = inDegree info t
-- o = outDegree info t
isInternal :: Eq n => FA n a b -> n -> Bool
isInternal (FA _ start final) n = n /= start && n `notElem` final
isFinal :: Eq n => FA n a b -> n -> Bool
isFinal (FA _ _ final) n = n `elem` final
-- | Remove all internal nodes with no incoming edges
-- or no outgoing edges.
pruneUnusable :: Ord n => FA n (Maybe a) () -> FA n (Maybe a) ()
pruneUnusable fa = onGraph f fa
where
f g = if Set.null rns then g else f (removeNodes rns g)
where info = nodeInfo g
rns = Set.fromList [ n | (n,_) <- nodes g,
isInternal fa n,
inDegree info n == 0
|| outDegree info n == 0]
fixIncoming :: (Ord n, Eq a) => [n]
-> (Node n (),[Edge n (Maybe a)]) -- ^ A node and its incoming edges
-> ([n],[(Node n (Maybe a),[Edge n ()])]) -- ^ Replacement nodes with their
-- incoming edges.
fixIncoming cs c@((n,()),es) = (cs'', ((n,Nothing),es'):newContexts)
where ls = nub $ map edgeLabel es
(cs',cs'') = splitAt (length ls) cs
newNodes = zip cs' ls
es' = [ (x,n,()) | x <- map fst newNodes ]
-- separate cyclic and non-cyclic edges
(cyc,ncyc) = partition (\ (f,_,_) -> f == n) es
-- keep all incoming non-cyclic edges with the right label
to (x,l) = [ (f,x,()) | (f,_,l') <- ncyc, l == l']
-- for each cyclic edge with the right label,
-- add an edge from each of the new nodes (including this one)
++ [ (y,x,()) | (f,_,l') <- cyc, l == l', (y,_) <- newNodes]
newContexts = [ (v, to v) | v <- newNodes ]
alphabet :: Eq b => Graph n a (Maybe b) -> [b]
alphabet = nub . catMaybes . map edgeLabel . edges
determinize :: Ord a => NFA a -> DFA a
determinize (FA g s f) = let (ns,es) = h (Set.singleton start) Set.empty Set.empty
(ns',es') = (Set.toList ns, Set.toList es)
final = filter isDFAFinal ns'
fa = FA (Graph undefined [(n,()) | n <- ns'] es') start final
in renameStates [0..] fa
where info = nodeInfo g
-- reach = nodesReachable out
start = closure info $ Set.singleton s
isDFAFinal n = not (Set.null (Set.fromList f `Set.intersection` n))
h currentStates oldStates es
| Set.null currentStates = (oldStates,es)
| otherwise = ((h $! uniqueNewStates) $! allOldStates) $! es'
where
allOldStates = oldStates `Set.union` currentStates
(newStates,es') = new (Set.toList currentStates) Set.empty es
uniqueNewStates = newStates Set.\\ allOldStates
-- Get the sets of states reachable from the given states
-- by consuming one symbol, and the associated edges.
new [] rs es = (rs,es)
new (n:ns) rs es = new ns rs' es'
where cs = reachable info n --reachable reach n
rs' = rs `Set.union` Set.fromList (map snd cs)
es' = es `Set.union` Set.fromList [(n,s,c) | (c,s) <- cs]
-- | Get all the nodes reachable from a list of nodes by only empty edges.
closure :: Ord n => NodeInfo n a (Maybe b) -> Set n -> Set n
closure info x = closure_ x x
where closure_ acc check | Set.null check = acc
| otherwise = closure_ acc' check'
where
reach = Set.fromList [y | x <- Set.toList check,
(_,y,Nothing) <- getOutgoing info x]
acc' = acc `Set.union` reach
check' = reach Set.\\ acc
-- | Get a map of labels to sets of all nodes reachable
-- from a the set of nodes by one edge with the given
-- label and then any number of empty edges.
reachable :: (Ord n,Ord b) => NodeInfo n a (Maybe b) -> Set n -> [(b,Set n)]
reachable info ns = Map.toList $ Map.map (closure info . Set.fromList) $ reachable1 info ns
reachable1 info ns = Map.fromListWith (++) [(c, [y]) | n <- Set.toList ns, (_,y,Just c) <- getOutgoing info n]
reverseNFA :: NFA a -> NFA a
reverseNFA (FA g s fs) = FA g''' s' [s]
where g' = reverseGraph g
(g'',s') = newNode () g'
g''' = newEdges [(s',f,Nothing) | f <- fs] g''
dfa2nfa :: DFA a -> NFA a
dfa2nfa = mapTransitions Just
--
-- * Visualization
--
prFAGraphviz :: (Eq n,Show n) => FA n String String -> String
prFAGraphviz = Dot.prGraphviz . faToGraphviz
prFAGraphviz_ :: (Eq n,Show n,Show a, Show b) => FA n a b -> String
prFAGraphviz_ = Dot.prGraphviz . faToGraphviz . mapStates show . mapTransitions show
faToGraphviz :: (Eq n,Show n) => FA n String String -> Dot.Graph
faToGraphviz (FA (Graph _ ns es) s f)
= Dot.Graph Dot.Directed Nothing [] (map mkNode ns) (map mkEdge es) []
where mkNode (n,l) = Dot.Node (show n) attrs
where attrs = [("label",l)]
++ if n == s then [("shape","box")] else []
++ if n `elem` f then [("style","bold")] else []
mkEdge (x,y,l) = Dot.Edge (show x) (show y) [("label",l)]
--
-- * Utilities
--
lookups :: Ord k => [k] -> Map k a -> [a]
lookups xs m = mapMaybe (flip Map.lookup m) xs

178
src-3.0/GF/Speech/Graph.hs Normal file
View File

@@ -0,0 +1,178 @@
----------------------------------------------------------------------
-- |
-- Module : Graph
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/10 16:43:44 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.2 $
--
-- A simple graph module.
-----------------------------------------------------------------------------
module GF.Speech.Graph ( Graph(..), Node, Edge, NodeInfo
, newGraph, nodes, edges
, nmap, emap, newNode, newNodes, newEdge, newEdges
, insertEdgeWith
, removeNode, removeNodes
, nodeInfo
, getIncoming, getOutgoing, getNodeLabel
, inDegree, outDegree
, nodeLabel
, edgeFrom, edgeTo, edgeLabel
, reverseGraph, mergeGraphs, renameNodes
) where
import GF.Data.Utilities
import Data.List
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
data Graph n a b = Graph [n] ![Node n a] ![Edge n b]
deriving (Eq,Show)
type Node n a = (n,a)
type Edge n b = (n,n,b)
type NodeInfo n a b = Map n (a, [Edge n b], [Edge n b])
-- | Create a new empty graph.
newGraph :: [n] -> Graph n a b
newGraph ns = Graph ns [] []
-- | Get all the nodes in the graph.
nodes :: Graph n a b -> [Node n a]
nodes (Graph _ ns _) = ns
-- | Get all the edges in the graph.
edges :: Graph n a b -> [Edge n b]
edges (Graph _ _ es) = es
-- | Map a function over the node labels.
nmap :: (a -> c) -> Graph n a b -> Graph n c b
nmap f (Graph c ns es) = Graph c [(n,f l) | (n,l) <- ns] es
-- | Map a function over the edge labels.
emap :: (b -> c) -> Graph n a b -> Graph n a c
emap f (Graph c ns es) = Graph c ns [(x,y,f l) | (x,y,l) <- es]
-- | Add a node to the graph.
newNode :: a -- ^ Node label
-> Graph n a b
-> (Graph n a b,n) -- ^ Node graph and name of new node
newNode l (Graph (c:cs) ns es) = (Graph cs ((c,l):ns) es, c)
newNodes :: [a] -> Graph n a b -> (Graph n a b,[Node n a])
newNodes ls g = (g', zip ns ls)
where (g',ns) = mapAccumL (flip newNode) g ls
-- lazy version:
--newNodes ls (Graph cs ns es) = (Graph cs' (ns'++ns) es, ns')
-- where (xs,cs') = splitAt (length ls) cs
-- ns' = zip xs ls
newEdge :: Edge n b -> Graph n a b -> Graph n a b
newEdge e (Graph c ns es) = Graph c ns (e:es)
newEdges :: [Edge n b] -> Graph n a b -> Graph n a b
newEdges es g = foldl' (flip newEdge) g es
-- lazy version:
-- newEdges es' (Graph c ns es) = Graph c ns (es'++es)
insertEdgeWith :: Eq n =>
(b -> b -> b) -> Edge n b -> Graph n a b -> Graph n a b
insertEdgeWith f e@(x,y,l) (Graph c ns es) = Graph c ns (h es)
where h [] = [e]
h (e'@(x',y',l'):es') | x' == x && y' == y = (x',y', f l l'):es'
| otherwise = e':h es'
-- | Remove a node and all edges to and from that node.
removeNode :: Ord n => n -> Graph n a b -> Graph n a b
removeNode n = removeNodes (Set.singleton n)
-- | Remove a set of nodes and all edges to and from those nodes.
removeNodes :: Ord n => Set n -> Graph n a b -> Graph n a b
removeNodes xs (Graph c ns es) = Graph c ns' es'
where
keepNode n = not (Set.member n xs)
ns' = [ x | x@(n,_) <- ns, keepNode n ]
es' = [ e | e@(f,t,_) <- es, keepNode f && keepNode t ]
-- | Get a map of node names to info about each node.
nodeInfo :: Ord n => Graph n a b -> NodeInfo n a b
nodeInfo g = Map.fromList [ (n, (x, fn inc n, fn out n)) | (n,x) <- nodes g ]
where
inc = groupEdgesBy edgeTo g
out = groupEdgesBy edgeFrom g
fn m n = fromMaybe [] (Map.lookup n m)
groupEdgesBy :: (Ord n) => (Edge n b -> n) -- ^ Gets the node to group by
-> Graph n a b -> Map n [Edge n b]
groupEdgesBy f g = Map.fromListWith (++) [(f e, [e]) | e <- edges g]
lookupNode :: Ord n => NodeInfo n a b -> n -> (a, [Edge n b], [Edge n b])
lookupNode i n = fromJust $ Map.lookup n i
getIncoming :: Ord n => NodeInfo n a b -> n -> [Edge n b]
getIncoming i n = let (_,inc,_) = lookupNode i n in inc
getOutgoing :: Ord n => NodeInfo n a b -> n -> [Edge n b]
getOutgoing i n = let (_,_,out) = lookupNode i n in out
inDegree :: Ord n => NodeInfo n a b -> n -> Int
inDegree i n = length $ getIncoming i n
outDegree :: Ord n => NodeInfo n a b -> n -> Int
outDegree i n = length $ getOutgoing i n
getNodeLabel :: Ord n => NodeInfo n a b -> n -> a
getNodeLabel i n = let (l,_,_) = lookupNode i n in l
nodeLabel :: Node n a -> a
nodeLabel = snd
edgeFrom :: Edge n b -> n
edgeFrom (f,_,_) = f
edgeTo :: Edge n b -> n
edgeTo (_,t,_) = t
edgeLabel :: Edge n b -> b
edgeLabel (_,_,l) = l
reverseGraph :: Graph n a b -> Graph n a b
reverseGraph (Graph c ns es) = Graph c ns [ (t,f,l) | (f,t,l) <- es ]
-- | Add the nodes from the second graph to the first graph.
-- The nodes in the second graph will be renamed using the name
-- supply in the first graph.
-- This function is more efficient when the second graph
-- is smaller than the first.
mergeGraphs :: Ord m => Graph n a b -> Graph m a b
-> (Graph n a b, m -> n) -- ^ The new graph and a function translating
-- the old names of nodes in the second graph
-- to names in the new graph.
mergeGraphs (Graph c ns1 es1) g2 = (Graph c' (ns2++ns1) (es2++es1), newName)
where
(xs,c') = splitAt (length (nodes g2)) c
newNames = Map.fromList (zip (map fst (nodes g2)) xs)
newName n = fromJust $ Map.lookup n newNames
Graph _ ns2 es2 = renameNodes newName undefined g2
-- | Rename the nodes in the graph.
renameNodes :: (n -> m) -- ^ renaming function
-> [m] -- ^ infinite supply of fresh node names, to
-- use when adding nodes in the future.
-> Graph n a b -> Graph m a b
renameNodes newName c (Graph _ ns es) = Graph c ns' es'
where ns' = map' (\ (n,x) -> (newName n,x)) ns
es' = map' (\ (f,t,l) -> (newName f, newName t, l)) es
-- | A strict 'map'
map' :: (a -> b) -> [a] -> [b]
map' _ [] = []
map' f (x:xs) = ((:) $! f x) $! map' f xs

View File

@@ -0,0 +1,116 @@
----------------------------------------------------------------------
-- |
-- Module : Graphviz
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/09/15 18:10:44 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.2 $
--
-- Graphviz DOT format representation and printing.
-----------------------------------------------------------------------------
module GF.Speech.Graphviz (
Graph(..), GraphType(..),
Node(..), Edge(..),
Attr,
addSubGraphs,
setName,
setAttr,
prGraphviz
) where
import Data.Char
import GF.Data.Utilities
-- | Graph type, graph ID, graph attirbutes, graph nodes, graph edges, subgraphs
data Graph = Graph {
gType :: GraphType,
gId :: Maybe String,
gAttrs :: [Attr],
gNodes :: [Node],
gEdges :: [Edge],
gSubgraphs :: [Graph]
}
deriving (Show)
data GraphType = Directed | Undirected
deriving (Show)
data Node = Node String [Attr]
deriving Show
data Edge = Edge String String [Attr]
deriving Show
type Attr = (String,String)
--
-- * Graph construction
--
addSubGraphs :: [Graph] -> Graph -> Graph
addSubGraphs gs g = g { gSubgraphs = gs ++ gSubgraphs g }
setName :: String -> Graph -> Graph
setName n g = g { gId = Just n }
setAttr :: String -> String -> Graph -> Graph
setAttr n v g = g { gAttrs = tableSet n v (gAttrs g) }
--
-- * Pretty-printing
--
prGraphviz :: Graph -> String
prGraphviz g@(Graph t i _ _ _ _) =
graphtype t ++ " " ++ maybe "" esc i ++ " {\n" ++ prGraph g ++ "}\n"
prSubGraph :: Graph -> String
prSubGraph g@(Graph _ i _ _ _ _) =
"subgraph" ++ " " ++ maybe "" esc i ++ " {\n" ++ prGraph g ++ "}"
prGraph :: Graph -> String
prGraph (Graph t id at ns es ss) =
unlines $ map (++";") (map prAttr at
++ map prNode ns
++ map (prEdge t) es
++ map prSubGraph ss)
graphtype :: GraphType -> String
graphtype Directed = "digraph"
graphtype Undirected = "graph"
prNode :: Node -> String
prNode (Node n at) = esc n ++ " " ++ prAttrList at
prEdge :: GraphType -> Edge -> String
prEdge t (Edge x y at) = esc x ++ " " ++ edgeop t ++ " " ++ esc y ++ " " ++ prAttrList at
edgeop :: GraphType -> String
edgeop Directed = "->"
edgeop Undirected = "--"
prAttrList :: [Attr] -> String
prAttrList [] = ""
prAttrList at = "[" ++ join "," (map prAttr at) ++ "]"
prAttr :: Attr -> String
prAttr (n,v) = esc n ++ " = " ++ esc v
esc :: String -> String
esc s | needEsc s = "\"" ++ concat [ if shouldEsc c then ['\\',c] else [c] | c <- s ] ++ "\""
| otherwise = s
where shouldEsc = (`elem` ['"', '\\'])
needEsc :: String -> Bool
needEsc [] = True
needEsc xs | all isDigit xs = False
needEsc (x:xs) = not (isIDFirst x && all isIDChar xs)
isIDFirst, isIDChar :: Char -> Bool
isIDFirst c = c `elem` (['_']++['a'..'z']++['A'..'Z'])
isIDChar c = isIDFirst c || isDigit c

View File

@@ -0,0 +1,75 @@
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.PGFToCFG
--
-- Approximates PGF grammars with context-free grammars.
----------------------------------------------------------------------
module GF.Speech.PGFToCFG (pgfToCFG) where
import PGF.CId
import PGF.Data as PGF
import PGF.Macros
import GF.Infra.Ident
import GF.Speech.CFG
import Data.Array as Array
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Maybe
import Data.Set (Set)
import qualified Data.Set as Set
pgfToCFG :: PGF
-> CId -- ^ Concrete syntax name
-> CFG
pgfToCFG pgf lang = mkCFG (lookStartCat pgf) extCats (startRules ++ concatMap fruleToCFRule rules)
where
pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang)
rules :: [FRule]
rules = Array.elems (PGF.allRules pinfo)
fcatGFCats :: Map FCat CId
fcatGFCats = Map.fromList [(fc,c) | (c,fcs) <- Map.toList (startupCats pinfo), fc <- fcs]
fcatGFCat :: FCat -> CId
fcatGFCat c = fromMaybe (mkCId "Unknown") (Map.lookup c fcatGFCats)
fcatToCat :: FCat -> FIndex -> Cat
fcatToCat c l = prCId (fcatGFCat c) ++ "_" ++ show c ++ "_" ++ show l
extCats :: Set Cat
extCats = Set.fromList $ map lhsCat startRules
-- NOTE: this is only correct for cats that have a lincat with exactly one row.
startRules :: [CFRule]
startRules = [CFRule (prCId c) [NonTerminal (fcatToCat fc 0)] (CFRes 0)
| (c,fcs) <- Map.toList (startupCats pinfo), fc <- fcs]
fruleToCFRule :: FRule -> [CFRule]
fruleToCFRule (FRule f ps args c rhs) =
[CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps))
| (l,row) <- Array.assocs rhs]
where
mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
mkRhs = map fsymbolToSymbol . Array.elems
fsymbolToSymbol :: FSymbol -> CFSymbol
fsymbolToSymbol (FSymCat l n) = NonTerminal (fcatToCat (args!!n) l)
fsymbolToSymbol (FSymTok t) = Terminal t
fixProfile :: Array FPointPos FSymbol -> Profile -> Profile
fixProfile row = concatMap positions
where
nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row ]
positions i = [k | (k,FSymCat _ j) <- nts, j == i]
profilesToTerm :: [Profile] -> CFTerm
profilesToTerm [[n]] | f == wildCId = CFRes n
profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps)
where (argTypes,_) = catSkeleton $ lookType pgf f
profileToTerm :: CId -> Profile -> CFTerm
profileToTerm t [] = CFMeta t
profileToTerm _ xs = CFRes (last xs) -- FIXME: unify

143
src-3.0/GF/Speech/RegExp.hs Normal file
View File

@@ -0,0 +1,143 @@
module GF.Speech.RegExp (RE(..),
epsilonRE, nullRE,
isEpsilon, isNull,
unionRE, concatRE, seqRE,
repeatRE, minimizeRE,
mapRE, mapRE', joinRE,
symbolsRE,
dfa2re, prRE) where
import Data.List
import GF.Data.Utilities
import GF.Speech.FiniteState
data RE a =
REUnion [RE a] -- ^ REUnion [] is null
| REConcat [RE a] -- ^ REConcat [] is epsilon
| RERepeat (RE a)
| RESymbol a
deriving (Eq,Ord,Show)
dfa2re :: (Ord a) => DFA a -> RE a
dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
. oneFinalState () epsilonRE . mapTransitions RESymbol
where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
merge es = [(f,t,unionRE ls)
| ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a)
elimStates fa =
case [s | (s,_) <- states fa, isInternal fa s] of
[] -> fa
sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa
where sAs = nonLoopTransitionsTo sE fa
sBs = nonLoopTransitionsFrom sE fa
r2 = unionRE $ loops sE fa
ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
r r1 r3 = concatRE [r1, repeatRE r2, r3]
epsilonRE :: RE a
epsilonRE = REConcat []
nullRE :: RE a
nullRE = REUnion []
isNull :: RE a -> Bool
isNull (REUnion []) = True
isNull _ = False
isEpsilon :: RE a -> Bool
isEpsilon (REConcat []) = True
isEpsilon _ = False
unionRE :: Ord a => [RE a] -> RE a
unionRE = unionOrId . sortNub . concatMap toList
where
toList (REUnion xs) = xs
toList x = [x]
unionOrId [r] = r
unionOrId rs = REUnion rs
concatRE :: [RE a] -> RE a
concatRE xs | any isNull xs = nullRE
| otherwise = case concatMap toList xs of
[r] -> r
rs -> REConcat rs
where
toList (REConcat xs) = xs
toList x = [x]
seqRE :: [a] -> RE a
seqRE = concatRE . map RESymbol
repeatRE :: RE a -> RE a
repeatRE x | isNull x || isEpsilon x = epsilonRE
| otherwise = RERepeat x
finalRE :: Ord a => DFA (RE a) -> RE a
finalRE fa = concatRE [repeatRE r1, r2,
repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
where
s0 = startState fa
[sF] = finalStates fa
r1 = unionRE $ loops s0 fa
r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa
r3 = unionRE $ loops sF fa
r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
reverseRE :: RE a -> RE a
reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs
reverseRE (REUnion xs) = REUnion (map reverseRE xs)
reverseRE (RERepeat x) = RERepeat (reverseRE x)
reverseRE x = x
minimizeRE :: Ord a => RE a -> RE a
minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward
mergeForward :: Ord a => RE a -> RE a
mergeForward (REUnion xs) =
unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)]
mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)]
mergeForward (RERepeat r) = repeatRE (mergeForward r)
mergeForward r = r
firstRE :: RE a -> (RE a, RE a)
firstRE (REConcat (x:xs)) = (x, REConcat xs)
firstRE r = (r,epsilonRE)
mapRE :: (a -> b) -> RE a -> RE b
mapRE f = mapRE' (RESymbol . f)
mapRE' :: (a -> RE b) -> RE a -> RE b
mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs)
mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs)
mapRE' f (RERepeat x) = RERepeat (mapRE' f x)
mapRE' f (RESymbol s) = f s
joinRE :: RE (RE a) -> RE a
joinRE (REConcat xs) = REConcat (map joinRE xs)
joinRE (REUnion xs) = REUnion (map joinRE xs)
joinRE (RERepeat xs) = RERepeat (joinRE xs)
joinRE (RESymbol ss) = ss
symbolsRE :: RE a -> [a]
symbolsRE (REConcat xs) = concatMap symbolsRE xs
symbolsRE (REUnion xs) = concatMap symbolsRE xs
symbolsRE (RERepeat x) = symbolsRE x
symbolsRE (RESymbol x) = [x]
-- Debugging
prRE :: RE String -> String
prRE = prRE' 0
prRE' _ (REUnion []) = "<NULL>"
prRE' n (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1) xs)))
prRE' n (REConcat xs) = p n 2 (unwords (map (prRE' 2) xs))
prRE' n (RERepeat x) = p n 3 (prRE' 3 x) ++ "*"
prRE' _ (RESymbol s) = s
p n m s | n >= m = "(" ++ s ++ ")"
| True = s

View File

@@ -0,0 +1,130 @@
----------------------------------------------------------------------
-- |
-- Module : Relation
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/10/26 17:13:13 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.1 $
--
-- A simple module for relations.
-----------------------------------------------------------------------------
module GF.Speech.Relation (Rel, mkRel, mkRel'
, allRelated , isRelatedTo
, transitiveClosure
, reflexiveClosure, reflexiveClosure_
, symmetricClosure
, symmetricSubrelation, reflexiveSubrelation
, reflexiveElements
, equivalenceClasses
, isTransitive, isReflexive, isSymmetric
, isEquivalence
, isSubRelationOf) where
import Data.List
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import GF.Data.Utilities
type Rel a = Map a (Set a)
-- | Creates a relation from a list of related pairs.
mkRel :: Ord a => [(a,a)] -> Rel a
mkRel ps = relates ps Map.empty
-- | Creates a relation from a list pairs of elements and the elements
-- related to them.
mkRel' :: Ord a => [(a,[a])] -> Rel a
mkRel' xs = Map.fromListWith Set.union [(x,Set.fromList ys) | (x,ys) <- xs]
relToList :: Rel a -> [(a,a)]
relToList r = [ (x,y) | (x,ys) <- Map.toList r, y <- Set.toList ys ]
-- | Add a pair to the relation.
relate :: Ord a => a -> a -> Rel a -> Rel a
relate x y r = Map.insertWith Set.union x (Set.singleton y) r
-- | Add a list of pairs to the relation.
relates :: Ord a => [(a,a)] -> Rel a -> Rel a
relates ps r = foldl (\r' (x,y) -> relate x y r') r ps
-- | Checks if an element is related to another.
isRelatedTo :: Ord a => Rel a -> a -> a -> Bool
isRelatedTo r x y = maybe False (y `Set.member`) (Map.lookup x r)
-- | Get the set of elements to which a given element is related.
allRelated :: Ord a => Rel a -> a -> Set a
allRelated r x = fromMaybe Set.empty (Map.lookup x r)
-- | Get all elements in the relation.
domain :: Ord a => Rel a -> Set a
domain r = foldl Set.union (Map.keysSet r) (Map.elems r)
-- | Keep only pairs for which both elements are in the given set.
intersectSetRel :: Ord a => Set a -> Rel a -> Rel a
intersectSetRel s = filterRel (\x y -> x `Set.member` s && y `Set.member` s)
transitiveClosure :: Ord a => Rel a -> Rel a
transitiveClosure r = fix (Map.map growSet) r
where growSet ys = foldl Set.union ys (map (allRelated r) $ Set.toList ys)
reflexiveClosure_ :: Ord a => [a] -- ^ The set over which the relation is defined.
-> Rel a -> Rel a
reflexiveClosure_ u r = relates [(x,x) | x <- u] r
-- | Uses 'domain'
reflexiveClosure :: Ord a => Rel a -> Rel a
reflexiveClosure r = reflexiveClosure_ (Set.toList $ domain r) r
symmetricClosure :: Ord a => Rel a -> Rel a
symmetricClosure r = relates [ (y,x) | (x,y) <- relToList r ] r
symmetricSubrelation :: Ord a => Rel a -> Rel a
symmetricSubrelation r = filterRel (flip $ isRelatedTo r) r
reflexiveSubrelation :: Ord a => Rel a -> Rel a
reflexiveSubrelation r = intersectSetRel (reflexiveElements r) r
-- | Get the set of elements which are related to themselves.
reflexiveElements :: Ord a => Rel a -> Set a
reflexiveElements r = Set.fromList [ x | (x,ys) <- Map.toList r, x `Set.member` ys ]
-- | Keep the related pairs for which the predicate is true.
filterRel :: Ord a => (a -> a -> Bool) -> Rel a -> Rel a
filterRel p = purgeEmpty . Map.mapWithKey (Set.filter . p)
-- | Remove keys that map to no elements.
purgeEmpty :: Ord a => Rel a -> Rel a
purgeEmpty r = Map.filter (not . Set.null) r
-- | Get the equivalence classes from an equivalence relation.
equivalenceClasses :: Ord a => Rel a -> [Set a]
equivalenceClasses r = equivalenceClasses_ (Map.keys r) r
where equivalenceClasses_ [] _ = []
equivalenceClasses_ (x:xs) r = ys:equivalenceClasses_ zs r
where ys = allRelated r x
zs = [x' | x' <- xs, not (x' `Set.member` ys)]
isTransitive :: Ord a => Rel a -> Bool
isTransitive r = and [z `Set.member` ys | (x,ys) <- Map.toList r,
y <- Set.toList ys, z <- Set.toList (allRelated r y)]
isReflexive :: Ord a => Rel a -> Bool
isReflexive r = all (\ (x,ys) -> x `Set.member` ys) (Map.toList r)
isSymmetric :: Ord a => Rel a -> Bool
isSymmetric r = and [isRelatedTo r y x | (x,y) <- relToList r]
isEquivalence :: Ord a => Rel a -> Bool
isEquivalence r = isReflexive r && isSymmetric r && isTransitive r
isSubRelationOf :: Ord a => Rel a -> Rel a -> Bool
isSubRelationOf r1 r2 = all (uncurry (isRelatedTo r2)) (relToList r1)

80
src-3.0/GF/Speech/SISR.hs Normal file
View File

@@ -0,0 +1,80 @@
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.SISR
--
-- Abstract syntax and pretty printer for SISR,
-- (Semantic Interpretation for Speech Recognition)
----------------------------------------------------------------------
module GF.Speech.SISR (SISRFormat(..), SISRTag, prSISR,
topCatSISR, profileInitSISR, catSISR, profileFinalSISR) where
import Data.List
import GF.Data.Utilities
import GF.Infra.Ident
import GF.Speech.CFG
import GF.Speech.SRG (SRGNT)
import PGF.CId
import qualified GF.JavaScript.AbsJS as JS
import qualified GF.JavaScript.PrintJS as JS
data SISRFormat =
-- SISR Working draft 1 April 2003
-- http://www.w3.org/TR/2003/WD-semantic-interpretation-20030401/
SISROld
deriving Show
type SISRTag = [JS.DeclOrExpr]
prSISR :: SISRTag -> String
prSISR = JS.printTree
topCatSISR :: String -> SISRFormat -> SISRTag
topCatSISR c fmt = map JS.DExpr [fmtOut fmt `ass` fmtRef fmt c]
profileInitSISR :: CFTerm -> SISRFormat -> SISRTag
profileInitSISR t fmt
| null (usedArgs t) = []
| otherwise = [JS.Decl [JS.DInit args (JS.EArray [])]]
usedArgs :: CFTerm -> [Int]
usedArgs (CFObj _ ts) = foldr union [] (map usedArgs ts)
usedArgs (CFAbs _ x) = usedArgs x
usedArgs (CFApp x y) = usedArgs x `union` usedArgs y
usedArgs (CFRes i) = [i]
usedArgs _ = []
catSISR :: CFTerm -> SRGNT -> SISRFormat -> SISRTag
catSISR t (c,i) fmt
| i `elem` usedArgs t = map JS.DExpr
[JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i)) `ass` fmtRef fmt c]
| otherwise = []
profileFinalSISR :: CFTerm -> SISRFormat -> SISRTag
profileFinalSISR term fmt = [JS.DExpr $ fmtOut fmt `ass` f term]
where
f (CFObj n ts) = tree (prCId n) (map f ts)
f (CFAbs v x) = JS.EFun [var v] [JS.SReturn (f x)]
f (CFApp x y) = JS.ECall (f x) [f y]
f (CFRes i) = JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i))
f (CFVar v) = JS.EVar (var v)
f (CFMeta typ) = obj [("name",JS.EStr "?"), ("type",JS.EStr (prCId typ))]
fmtOut SISROld = JS.EVar (JS.Ident "$")
fmtRef SISROld c = JS.EVar (JS.Ident ("$" ++ c))
args = JS.Ident "a"
var v = JS.Ident ("x" ++ show v)
field x y = JS.EMember x (JS.Ident y)
ass = JS.EAssign
tree n xs = obj [("name", JS.EStr n), ("args", JS.EArray xs)]
obj ps = JS.EObj [JS.Prop (JS.StringPropName x) y | (x,y) <- ps]

175
src-3.0/GF/Speech/SRG.hs Normal file
View File

@@ -0,0 +1,175 @@
----------------------------------------------------------------------
-- |
-- Module : SRG
--
-- Representation of, conversion to, and utilities for
-- printing of a general Speech Recognition Grammar.
--
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
-- categories in the grammar
----------------------------------------------------------------------
module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem
, SRGNT, CFTerm
, makeSRG
, makeSimpleSRG
, makeNonRecursiveSRG
, lookupFM_, prtS
) where
import GF.Data.Operations
import GF.Data.Utilities
import GF.Infra.Ident
import GF.Infra.PrintClass
import GF.Speech.CFG
import GF.Speech.PGFToCFG
import GF.Speech.Relation
import GF.Speech.FiniteState
import GF.Speech.RegExp
import GF.Speech.CFGToFA
import GF.Infra.Option
import PGF.CId
import PGF.Data
import PGF.Macros
import Data.List
import Data.Maybe (fromMaybe, maybeToList)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import Debug.Trace
data SRG = SRG { srgName :: String -- ^ grammar name
, srgStartCat :: Cat -- ^ start category name
, srgExternalCats :: Set Cat
, srgLanguage :: Maybe String -- ^ The language for which the grammar
-- is intended, e.g. en-UK
, srgRules :: [SRGRule]
}
deriving (Eq,Show)
data SRGRule = SRGRule Cat [SRGAlt] -- ^ SRG category name, original category name
-- and productions
deriving (Eq,Show)
-- | maybe a probability, a rule name and an EBNF right-hand side
data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem
deriving (Eq,Show)
type SRGItem = RE SRGSymbol
type SRGSymbol = Symbol SRGNT Token
-- | An SRG non-terminal. Category name and its number in the profile.
type SRGNT = (Cat, Int)
-- | Create a non-left-recursive SRG.
-- FIXME: the probabilities in the returned
-- grammar may be meaningless.
makeSimpleSRG :: PGF
-> CId -- ^ Concrete syntax name.
-> SRG
makeSimpleSRG = makeSRG preprocess
where
preprocess = traceStats "After mergeIdentical"
. mergeIdentical
. traceStats "After removeLeftRecursion"
. removeLeftRecursion
. traceStats "After topDownFilter"
. topDownFilter
. traceStats "After bottomUpFilter"
. bottomUpFilter
. traceStats "After removeCycles"
. removeCycles
. traceStats "Inital CFG"
traceStats s g = trace ("---- " ++ s ++ ": " ++ stats g {- ++ "\n" ++ prCFRules g ++ "----" -}) g
stats g = "Categories: " ++ show (countCats g)
++ " Rules: " ++ show (countRules g)
makeNonRecursiveSRG :: PGF
-> CId -- ^ Concrete syntax name.
-> SRG
makeNonRecursiveSRG pgf cnc =
SRG { srgName = prCId cnc,
srgStartCat = start,
srgExternalCats = cfgExternalCats cfg,
srgLanguage = getSpeechLanguage pgf cnc,
srgRules = rs }
where
cfg = pgfToCFG pgf cnc
MFA start dfas = cfgToMFA cfg
rs = [SRGRule l [SRGAlt Nothing dummyCFTerm (dfaToSRGItem dfa)] | (l,dfa) <- dfas]
where dfaToSRGItem = mapRE dummySRGNT . minimizeRE . dfa2re
dummyCFTerm = CFMeta (mkCId "dummy")
dummySRGNT = mapSymbol (\c -> (c,0)) id
makeSRG :: (CFG -> CFG)
-> PGF
-> CId -- ^ Concrete syntax name.
-> SRG
makeSRG preprocess pgf cnc =
SRG { srgName = prCId cnc,
srgStartCat = cfgStartCat cfg,
srgExternalCats = cfgExternalCats cfg,
srgLanguage = getSpeechLanguage pgf cnc,
srgRules = rs }
where
cfg = pgfToCFG pgf cnc
(_,cfgRules) = unzip $ allRulesGrouped $ preprocess cfg
rs = map cfRulesToSRGRule cfgRules
getSpeechLanguage :: PGF -> CId -> Maybe String
getSpeechLanguage pgf cnc = fmap (replace '_' '-') $ lookConcrFlag pgf cnc (mkCId "language")
cfRulesToSRGRule :: [CFRule] -> SRGRule
cfRulesToSRGRule rs@(r:_) = SRGRule (lhsCat r) rhs
where
alts = [((n,Nothing),mkSRGSymbols 0 ss) | CFRule c ss n <- rs]
rhs = [SRGAlt p n (srgItem sss) | ((n,p),sss) <- buildMultiMap alts ]
mkSRGSymbols _ [] = []
mkSRGSymbols i (NonTerminal c:ss) = NonTerminal (c,i) : mkSRGSymbols (i+1) ss
mkSRGSymbols i (Terminal t:ss) = Terminal t : mkSRGSymbols i ss
allSRGCats :: SRG -> [String]
allSRGCats SRG { srgRules = rs } = [c | SRGRule c _ <- rs]
--
-- * Size-optimized EBNF SRGs
--
srgItem :: [[SRGSymbol]] -> SRGItem
srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
-- non-optimizing version:
--srgItem = unionRE . map seqRE
-- | Merges a list of right-hand sides which all have the same
-- sequence of non-terminals.
mergeItems :: [[SRGSymbol]] -> SRGItem
mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens
groupTokens :: [SRGSymbol] -> [Symbol SRGNT [Token]]
groupTokens [] = []
groupTokens (Terminal t:ss) = case groupTokens ss of
Terminal ts:ss' -> Terminal (t:ts):ss'
ss' -> Terminal [t]:ss'
groupTokens (NonTerminal c:ss) = NonTerminal c : groupTokens ss
ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE SRGSymbol
ungroupTokens = joinRE . mapRE (symbol (RESymbol . NonTerminal) (REConcat . map (RESymbol . Terminal)))
--
-- * Utilities for building and printing SRGs
--
lookupFM_ :: (Ord key, Show key) => Map key elt -> key -> elt
lookupFM_ fm k = Map.findWithDefault err k fm
where err = error $ "Key not found: " ++ show k
++ "\namong " ++ show (Map.keys fm)
prtS :: Print a => a -> ShowS
prtS = showString . prt

110
src-3.0/GF/Speech/SRGS.hs Normal file
View File

@@ -0,0 +1,110 @@
----------------------------------------------------------------------
-- |
-- Module : SRGS
--
-- Prints an SRGS XML speech recognition grammars.
----------------------------------------------------------------------
module GF.Speech.SRGS (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where
import GF.Data.Utilities
import GF.Data.XML
import GF.Infra.Option
import GF.Speech.CFG
import GF.Speech.RegExp
import GF.Speech.SISR as SISR
import GF.Speech.SRG
import PGF (PGF, CId)
import Control.Monad
import Data.Char (toUpper,toLower)
import Data.List
import Data.Maybe
import qualified Data.Map as Map
import qualified Data.Set as Set
srgsXmlPrinter :: Maybe SISRFormat
-> PGF -> CId -> String
srgsXmlPrinter sisr pgf cnc = prSrgsXml sisr $ makeSimpleSRG pgf cnc
srgsXmlNonRecursivePrinter :: PGF -> CId -> String
srgsXmlNonRecursivePrinter pgf cnc = prSrgsXml Nothing $ makeNonRecursiveSRG pgf cnc
prSrgsXml :: Maybe SISRFormat -> SRG -> String
prSrgsXml sisr srg = showXMLDoc (optimizeSRGS xmlGr)
where
xmlGr = grammar sisr (catFormId (srgStartCat srg)) (srgLanguage srg) $
[meta "description"
("SRGS XML speech recognition grammar for " ++ srgName srg ++ "."),
meta "generator" "Grammatical Framework"]
++ map ruleToXML (srgRules srg)
ruleToXML (SRGRule cat alts) = Tag "rule" ([("id",cat)]++pub) (prRhs alts)
where pub | cat `Set.member` srgExternalCats srg = [("scope","public")]
| otherwise = []
prRhs rhss = [oneOf (map (mkProd sisr) rhss)]
mkProd :: Maybe SISRFormat -> SRGAlt -> XML
mkProd sisr (SRGAlt mp n rhs) = Tag "item" [] (ti ++ [x] ++ tf)
where x = mkItem sisr n rhs
ti = tag sisr (profileInitSISR n)
tf = tag sisr (profileFinalSISR n)
mkItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> XML
mkItem sisr cn = f
where
f (REUnion []) = ETag "ruleref" [("special","VOID")]
f (REUnion xs)
| not (null es) = Tag "item" [("repeat","0-1")] [f (REUnion nes)]
| otherwise = oneOf (map f xs)
where (es,nes) = partition isEpsilon xs
f (REConcat []) = ETag "ruleref" [("special","NULL")]
f (REConcat xs) = Tag "item" [] (map f xs)
f (RERepeat x) = Tag "item" [("repeat","0-")] [f x]
f (RESymbol s) = symItem sisr cn s
symItem :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> XML
symItem sisr cn (NonTerminal n@(c,_)) =
Tag "item" [] $ [ETag "ruleref" [("uri","#" ++ c)]] ++ tag sisr (catSISR cn n)
symItem _ _ (Terminal t) = Tag "item" [] [Data (showToken t)]
tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> [XML]
tag Nothing _ = []
tag (Just fmt) t = case t fmt of
[] -> []
ts -> [Tag "tag" [] [Data (prSISR ts)]]
catFormId :: String -> String
catFormId = (++ "_cat")
showToken :: Token -> String
showToken t = t
oneOf :: [XML] -> XML
oneOf = Tag "one-of" []
grammar :: Maybe SISRFormat
-> String -- ^ root
-> Maybe String -- ^language
-> [XML] -> XML
grammar sisr root ml =
Tag "grammar" $ [("xmlns","http://www.w3.org/2001/06/grammar"),
("version","1.0"),
("mode","voice"),
("root",root)]
++ (if isJust sisr then [("tag-format","semantics/1.0")] else [])
++ maybe [] (\l -> [("xml:lang", l)]) ml
meta :: String -> String -> XML
meta n c = ETag "meta" [("name",n),("content",c)]
optimizeSRGS :: XML -> XML
optimizeSRGS = bottomUpXML f
where f (Tag "item" [] [x@(Tag "item" _ _)]) = x
f (Tag "item" [] [x@(Tag "one-of" _ _)]) = x
f (Tag "item" as [Tag "item" [] xs]) = Tag "item" as xs
f (Tag "item" as xs) = Tag "item" as (map g xs)
where g (Tag "item" [] [x@(ETag "ruleref" _)]) = x
g x = x
f (Tag "one-of" [] [x]) = x
f x = x