reorganize the directories under src, and rescue the JavaScript interpreter from deprecated

This commit is contained in:
krasimir
2009-12-13 18:50:29 +00:00
parent d88a865faf
commit f85232947e
189 changed files with 2 additions and 2 deletions

View File

@@ -0,0 +1,372 @@
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.CFG
--
-- Context-free grammar representation and manipulation.
----------------------------------------------------------------------
module GF.Speech.CFG where
import GF.Data.Utilities
import PGF.CId
import GF.Infra.Option
import GF.Data.Relation
import Control.Monad
import Control.Monad.State (State, get, put, evalState)
import qualified Data.ByteString.Char8 as BS
import Data.Map (Map)
import qualified Data.Map as Map
import Data.List
import Data.Maybe (fromMaybe)
import Data.Monoid (mconcat)
import Data.Set (Set)
import qualified Data.Set as Set
--
-- * Types
--
type Cat = String
type Token = String
data Symbol c t = NonTerminal c | Terminal t
deriving (Eq, Ord, Show)
type CFSymbol = Symbol Cat Token
data CFRule = CFRule {
lhsCat :: Cat,
ruleRhs :: [CFSymbol],
ruleName :: CFTerm
}
deriving (Eq, Ord, Show)
data CFTerm
= CFObj CId [CFTerm] -- ^ an abstract syntax function with arguments
| CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id.
| CFApp CFTerm CFTerm -- ^ Application
| CFRes Int -- ^ The result of the n:th (0-based) non-terminal
| CFVar Int -- ^ A lambda-bound variable
| CFMeta CId -- ^ A metavariable
deriving (Eq, Ord, Show)
data CFG = CFG { cfgStartCat :: Cat,
cfgExternalCats :: Set Cat,
cfgRules :: Map Cat (Set CFRule) }
deriving (Eq, Ord, Show)
--
-- * Grammar filtering
--
-- | Removes all directly and indirectly cyclic productions.
-- FIXME: this may be too aggressive, only one production
-- needs to be removed to break a given cycle. But which
-- one should we pick?
-- FIXME: Does not (yet) remove productions which are cyclic
-- because of empty productions.
removeCycles :: CFG -> CFG
removeCycles = onRules f
where f rs = filter (not . isCycle) rs
where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [NonTerminal c'] _ <- rs]
isCycle (CFRule c [NonTerminal c'] _) = isRelatedTo alias c' c
isCycle _ = False
-- | Better bottom-up filter that also removes categories which contain no finite
-- strings.
bottomUpFilter :: CFG -> CFG
bottomUpFilter gr = fix grow (gr { cfgRules = Map.empty })
where grow g = g `unionCFG` filterCFG (all (okSym g) . ruleRhs) gr
okSym g = symbol (`elem` allCats g) (const True)
-- | Removes categories which are not reachable from any external category.
topDownFilter :: CFG -> CFG
topDownFilter cfg = filterCFGCats (`Set.member` keep) cfg
where
rhsCats = [ (lhsCat r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ]
uses = reflexiveClosure_ (allCats cfg) $ transitiveClosure $ mkRel rhsCats
keep = Set.unions $ map (allRelated uses) $ Set.toList $ cfgExternalCats cfg
-- | Merges categories with identical right-hand-sides.
-- FIXME: handle probabilities
mergeIdentical :: CFG -> CFG
mergeIdentical g = onRules (map subst) g
where
-- maps categories to their replacement
m = Map.fromList [(y,concat (intersperse "+" xs))
| (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList (cfgRules g)], y <- xs]
-- build data to compare for each category: a set of name,rhs pairs
rulesKey = Set.map (\ (CFRule _ r n) -> (n,r))
subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
-- | Keeps only the start category as an external category.
purgeExternalCats :: CFG -> CFG
purgeExternalCats cfg = cfg { cfgExternalCats = Set.singleton (cfgStartCat cfg) }
--
-- * Removing left recursion
--
-- The LC_LR algorithm from
-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
removeLeftRecursion :: CFG -> CFG
removeLeftRecursion gr
= gr { cfgRules = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] }
where
scheme1 = [CFRule a [x,NonTerminal a_x] n' |
a <- retainedLeftRecursive,
x <- properLeftCornersOf a,
not (isLeftRecursive x),
let a_x = mkCat (NonTerminal a) x,
-- this is an extension of LC_LR to avoid generating
-- A-X categories for which there are no productions:
a_x `Set.member` newCats,
let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0))
(\_ -> CFRes 0) x]
scheme2 = [CFRule a_x (beta++[NonTerminal a_b]) n' |
a <- retainedLeftRecursive,
b@(NonTerminal b') <- properLeftCornersOf a,
isLeftRecursive b,
CFRule _ (x:beta) n <- catRules gr b',
let a_x = mkCat (NonTerminal a) x,
let a_b = mkCat (NonTerminal a) b,
let i = length $ filterCats beta,
let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n)))
(\_ -> CFApp (CFRes i) n) x]
scheme3 = [CFRule a_x beta n' |
a <- retainedLeftRecursive,
x <- properLeftCornersOf a,
CFRule _ (x':beta) n <- catRules gr a,
x == x',
let a_x = mkCat (NonTerminal a) x,
let n' = symbol (\_ -> CFAbs 1 (shiftTerm n))
(\_ -> n) x]
scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . NonTerminal) cats
newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3))
shiftTerm :: CFTerm -> CFTerm
shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts)
shiftTerm (CFRes 0) = CFVar 1
shiftTerm (CFRes n) = CFRes (n-1)
shiftTerm t = t
-- note: the rest don't occur in the original grammar
cats = allCats gr
rules = allRules gr
directLeftCorner = mkRel [(NonTerminal c,t) | CFRule c (t:_) _ <- allRules gr]
leftCorner = reflexiveClosure_ (map NonTerminal cats) $ transitiveClosure directLeftCorner
properLeftCorner = transitiveClosure directLeftCorner
properLeftCornersOf = Set.toList . allRelated properLeftCorner . NonTerminal
isProperLeftCornerOf = flip (isRelatedTo properLeftCorner)
leftRecursive = reflexiveElements properLeftCorner
isLeftRecursive = (`Set.member` leftRecursive)
retained = cfgStartCat gr `Set.insert`
Set.fromList [a | r <- allRules (filterCFGCats (not . isLeftRecursive . NonTerminal) gr),
NonTerminal a <- ruleRhs r]
isRetained = (`Set.member` retained)
retainedLeftRecursive = filter (isLeftRecursive . NonTerminal) $ Set.toList retained
mkCat :: CFSymbol -> CFSymbol -> Cat
mkCat x y = showSymbol x ++ "-" ++ showSymbol y
where showSymbol = symbol id show
-- | Get the sets of mutually recursive non-terminals for a grammar.
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
-- If false, only recursive categories will be included.
-> CFG -> [Set Cat]
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, NonTerminal c' <- ss]
refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation
--
-- * Approximate context-free grammars with regular grammars.
--
makeSimpleRegular :: CFG -> CFG
makeSimpleRegular = makeRegular . topDownFilter . bottomUpFilter . removeCycles
-- Use the transformation algorithm from \"Regular Approximation of Context-free
-- Grammars through Approximation\", Mohri and Nederhof, 2000
-- to create an over-generating regular grammar for a context-free
-- grammar
makeRegular :: CFG -> CFG
makeRegular g = g { cfgRules = groupProds $ concatMap trSet (mutRecCats True g) }
where trSet cs | allXLinear cs rs = rs
| otherwise = concatMap handleCat (Set.toList cs)
where rs = catSetRules g cs
handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
++ concatMap (makeRightLinearRules c) (catRules g c)
where c' = newCat c
makeRightLinearRules b' (CFRule c ss n) =
case ys of
[] -> newRule b' (xs ++ [NonTerminal (newCat c)]) n -- no non-terminals left
(NonTerminal b:zs) -> newRule b' (xs ++ [NonTerminal b]) n
++ makeRightLinearRules (newCat b) (CFRule c zs n)
where (xs,ys) = break (`catElem` cs) ss
-- don't add rules on the form A -> A
newRule c rhs n | rhs == [NonTerminal c] = []
| otherwise = [CFRule c rhs n]
newCat c = c ++ "$"
--
-- * CFG Utilities
--
mkCFG :: Cat -> Set Cat -> [CFRule] -> CFG
mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs }
groupProds :: [CFRule] -> Map Cat (Set CFRule)
groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r))
-- | Gets all rules in a CFG.
allRules :: CFG -> [CFRule]
allRules = concat . map Set.toList . Map.elems . cfgRules
-- | Gets all rules in a CFG, grouped by their LHS categories.
allRulesGrouped :: CFG -> [(Cat,[CFRule])]
allRulesGrouped = Map.toList . Map.map Set.toList . cfgRules
-- | Gets all categories which have rules.
allCats :: CFG -> [Cat]
allCats = Map.keys . cfgRules
-- | Gets all categories which have rules or occur in a RHS.
allCats' :: CFG -> [Cat]
allCats' cfg = Set.toList (Map.keysSet (cfgRules cfg) `Set.union`
Set.fromList [c | rs <- Map.elems (cfgRules cfg),
r <- Set.toList rs,
NonTerminal c <- ruleRhs r])
-- | Gets all rules for the given category.
catRules :: CFG -> Cat -> [CFRule]
catRules gr c = Set.toList $ Map.findWithDefault Set.empty c (cfgRules gr)
-- | Gets all rules for categories in the given set.
catSetRules :: CFG -> Set Cat -> [CFRule]
catSetRules gr cs = allRules $ filterCFGCats (`Set.member` cs) gr
mapCFGCats :: (Cat -> Cat) -> CFG -> CFG
mapCFGCats f cfg = mkCFG (f (cfgStartCat cfg))
(Set.map f (cfgExternalCats cfg))
[CFRule (f lhs) (map (mapSymbol f id) rhs) t | CFRule lhs rhs t <- allRules cfg]
onCFG :: (Map Cat (Set CFRule) -> Map Cat (Set CFRule)) -> CFG -> CFG
onCFG f cfg = cfg { cfgRules = f (cfgRules cfg) }
onRules :: ([CFRule] -> [CFRule]) -> CFG -> CFG
onRules f cfg = cfg { cfgRules = groupProds $ f $ allRules cfg }
-- | Clean up CFG after rules have been removed.
cleanCFG :: CFG -> CFG
cleanCFG = onCFG (Map.filter (not . Set.null))
-- | Combine two CFGs.
unionCFG :: CFG -> CFG -> CFG
unionCFG x y = onCFG (\rs -> Map.unionWith Set.union rs (cfgRules y)) x
filterCFG :: (CFRule -> Bool) -> CFG -> CFG
filterCFG p = cleanCFG . onCFG (Map.map (Set.filter p))
filterCFGCats :: (Cat -> Bool) -> CFG -> CFG
filterCFGCats p = onCFG (Map.filterWithKey (\c _ -> p c))
countCats :: CFG -> Int
countCats = Map.size . cfgRules . cleanCFG
countRules :: CFG -> Int
countRules = length . allRules
prCFG :: CFG -> String
prCFG = prProductions . map prRule . allRules
where
prRule r = (lhsCat r, unwords (map prSym (ruleRhs r)))
prSym = symbol id (\t -> "\""++ t ++"\"")
prProductions :: [(Cat,String)] -> String
prProductions prods =
unlines [rpad maxLHSWidth lhs ++ " ::= " ++ rhs | (lhs,rhs) <- prods]
where
maxLHSWidth = maximum $ 0:(map (length . fst) prods)
rpad n s = s ++ replicate (n - length s) ' '
prCFTerm :: CFTerm -> String
prCFTerm = pr 0
where
pr p (CFObj f args) = paren p (showCId f ++ " (" ++ concat (intersperse "," (map (pr 0) args)) ++ ")")
pr p (CFAbs i t) = paren p ("\\x" ++ show i ++ ". " ++ pr 0 t)
pr p (CFApp t1 t2) = paren p (pr 1 t1 ++ "(" ++ pr 0 t2 ++ ")")
pr _ (CFRes i) = "$" ++ show i
pr _ (CFVar i) = "x" ++ show i
pr _ (CFMeta c) = "?" ++ showCId c
paren 0 x = x
paren 1 x = "(" ++ x ++ ")"
--
-- * CFRule Utilities
--
ruleFun :: CFRule -> CId
ruleFun (CFRule _ _ t) = f t
where f (CFObj n _) = n
f (CFApp _ x) = f x
f (CFAbs _ x) = f x
f _ = mkCId ""
-- | Check if any of the categories used on the right-hand side
-- are in the given list of categories.
anyUsedBy :: [Cat] -> CFRule -> Bool
anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
mkCFTerm :: String -> CFTerm
mkCFTerm n = CFObj (mkCId n) []
ruleIsNonRecursive :: Set Cat -> CFRule -> Bool
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
-- | Check if all the rules are right-linear, or all the rules are
-- left-linear, with respect to given categories.
allXLinear :: Set Cat -> [CFRule] -> Bool
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-- | Checks if a context-free rule is right-linear.
isRightLinear :: Set Cat -- ^ The categories to consider
-> CFRule -- ^ The rule to check for right-linearity
-> Bool
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
-- | Checks if a context-free rule is left-linear.
isLeftLinear :: Set Cat -- ^ The categories to consider
-> CFRule -- ^ The rule to check for left-linearity
-> Bool
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
--
-- * Symbol utilities
--
symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a
symbol fc ft (NonTerminal cat) = fc cat
symbol fc ft (Terminal tok) = ft tok
mapSymbol :: (c -> c') -> (t -> t') -> Symbol c t -> Symbol c' t'
mapSymbol fc ft = symbol (NonTerminal . fc) (Terminal . ft)
filterCats :: [Symbol c t] -> [c]
filterCats syms = [ cat | NonTerminal cat <- syms ]
filterToks :: [Symbol c t] -> [t]
filterToks syms = [ tok | Terminal tok <- syms ]
-- | Checks if a symbol is a non-terminal of one of the given categories.
catElem :: Ord c => Symbol c t -> Set c -> Bool
catElem s cs = symbol (`Set.member` cs) (const False) s
noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
noCatsInSet cs = not . any (`catElem` cs)

View File

@@ -0,0 +1,244 @@
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.CFGToFA
--
-- Approximates CFGs with finite state networks.
----------------------------------------------------------------------
module GF.Speech.CFGToFA (cfgToFA, makeSimpleRegular,
MFA(..), cfgToMFA, cfgToFA') where
import Data.List
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import PGF.CId
import PGF.Data
import GF.Data.Utilities
import GF.Speech.CFG
import GF.Speech.PGFToCFG
import GF.Infra.Ident (Ident)
import GF.Data.Graph
import GF.Data.Relation
import GF.Speech.FiniteState
import GF.Speech.CFG
data Recursivity = RightR | LeftR | NotR
data MutRecSet = MutRecSet {
mrCats :: Set Cat,
mrNonRecRules :: [CFRule],
mrRecRules :: [CFRule],
mrRec :: Recursivity
}
type MutRecSets = Map Cat MutRecSet
--
-- * Multiple DFA type
--
data MFA = MFA Cat [(Cat,DFA CFSymbol)]
cfgToFA :: CFG -> DFA Token
cfgToFA = minimize . compileAutomaton . makeSimpleRegular
--
-- * Compile strongly regular grammars to NFAs
--
-- Convert a strongly regular grammar to a finite automaton.
compileAutomaton :: CFG -> NFA Token
compileAutomaton g = make_fa (g,ns) s [NonTerminal (cfgStartCat g)] f fa
where
(fa,s,f) = newFA_
ns = mutRecSets g $ mutRecCats False g
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000.
make_fa :: (CFG,MutRecSets) -> State -> [CFSymbol] -> State
-> NFA Token -> NFA Token
make_fa c@(g,ns) q0 alpha q1 fa =
case alpha of
[] -> newTransition q0 q1 Nothing fa
[Terminal t] -> newTransition q0 q1 (Just t) fa
[NonTerminal a] ->
case Map.lookup a ns of
-- a is recursive
Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
case mrRec n of
-- the set Ni is right-recursive or cyclic
RightR ->
let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
let (xs,NonTerminal d) = (init ss,last ss)]
in make_fas new $ newTransition q0 (getState a) Nothing fa'
-- the set Ni is left-recursive
LeftR ->
let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- rs]
in make_fas new $ newTransition (getState a) q1 Nothing fa'
where
(fa',stateMap) = addStatesForCats ni fa
getState x = Map.findWithDefault
(error $ "CFGToFiniteState: No state for " ++ x)
x stateMap
-- a is not recursive
Nothing -> let rs = catRules g a
in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs
(x:beta) -> let (fa',q) = newState () fa
in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa'
where
make_fa_ = make_fa c
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
--
-- * Compile a strongly regular grammar to a DFA with sub-automata
--
cfgToMFA :: CFG -> MFA
cfgToMFA = buildMFA . makeSimpleRegular
-- | Build a DFA by building and expanding an MFA
cfgToFA' :: CFG -> DFA Token
cfgToFA' = mfaToDFA . cfgToMFA
buildMFA :: CFG -> MFA
buildMFA g = sortSubLats $ removeUnusedSubLats mfa
where fas = compileAutomata g
mfa = MFA (cfgStartCat g) [(c, minimize fa) | (c,fa) <- fas]
mfaStartDFA :: MFA -> DFA CFSymbol
mfaStartDFA (MFA start subs) =
fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs
mfaToDFA :: MFA -> DFA Token
mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa
where
subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs]
getSub l = fromJust $ Map.lookup l subs'
expand (FA (Graph c ns es) s f)
= foldl' expandEdge (FA (Graph c ns []) s f) es
expandEdge fa (f,t,x) =
case x of
Nothing -> newTransition f t Nothing fa
Just (Terminal s) -> newTransition f t (Just s) fa
Just (NonTerminal l) -> insertNFA fa (f,t) (expand $ getSub l)
removeUnusedSubLats :: MFA -> MFA
removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c]
where
usedMap = subLatUseMap mfa
used = growUsedSet (Set.singleton start)
isUsed c = c `Set.member` used
growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s)
subLatUseMap :: MFA -> Map Cat (Set Cat)
subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs]
usedSubLats :: DFA CFSymbol -> Set Cat
usedSubLats fa = Set.fromList [s | (_,_,NonTerminal s) <- transitions fa]
-- | Sort sub-networks topologically.
sortSubLats :: MFA -> MFA
sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs)
where
usedByMap = revMultiMap (subLatUseMap mfa)
sortLats _ [] = []
sortLats ub ls = xs ++ sortLats ub' ys
where (xs,ys) = partition ((==0) . indeg) ls
ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub
indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub
-- | Convert a strongly regular grammar to a number of finite automata,
-- one for each non-terminal.
-- The edges in the automata accept tokens, or name another automaton to use.
compileAutomata :: CFG
-> [(Cat,NFA CFSymbol)]
-- ^ A map of non-terminals and their automata.
compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
where
mrs = mutRecSets g $ mutRecCats True g
makeOneFA c = make_fa1 mr s [NonTerminal c] f fa
where (fa,s,f) = newFA_
mr = fromJust (Map.lookup c mrs)
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000,
-- adapted to build a finite automaton for a single (mutually recursive) set only.
-- Categories not in the set will result in category-labelled edges.
make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
-- we are building the automaton.
-> State -- ^ State to come from
-> [CFSymbol] -- ^ Symbols to accept
-> State -- ^ State to end up in
-> NFA CFSymbol -- ^ FA to add to.
-> NFA CFSymbol
make_fa1 mr q0 alpha q1 fa =
case alpha of
[] -> newTransition q0 q1 Nothing fa
[t@(Terminal _)] -> newTransition q0 q1 (Just t) fa
[c@(NonTerminal a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
[NonTerminal a] ->
case mrRec mr of
NotR -> -- the set is a non-recursive (always singleton) set of categories
-- so the set of category rules is the set of rules for the whole set
make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
RightR -> -- the set is right-recursive or cyclic
let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr,
let (xs,NonTerminal d) = (init ss,last ss)]
in make_fas new $ newTransition q0 (getState a) Nothing fa'
LeftR -> -- the set is left-recursive
let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- mrRecRules mr]
in make_fas new $ newTransition (getState a) q1 Nothing fa'
where
(fa',stateMap) = addStatesForCats (mrCats mr) fa
getState x = Map.findWithDefault
(error $ "CFGToFiniteState: No state for " ++ x)
x stateMap
(x:beta) -> let (fa',q) = newState () fa
in make_fas [(q0,[x],q),(q,beta,q1)] fa'
where
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs
mutRecSets :: CFG -> [Set Cat] -> MutRecSets
mutRecSets g = Map.fromList . concatMap mkMutRecSet
where
mkMutRecSet cs = [ (c,ms) | c <- csl ]
where csl = Set.toList cs
rs = catSetRules g cs
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
ms = MutRecSet {
mrCats = cs,
mrNonRecRules = nrs,
mrRecRules = rrs,
mrRec = rec
}
rec | null rrs = NotR
| all (isRightLinear cs) rrs = RightR
| otherwise = LeftR
--
-- * Utilities
--
-- | Add a state for the given NFA for each of the categories
-- in the given set. Returns a map of categories to their
-- corresponding states.
addStatesForCats :: Set Cat -> NFA t -> (NFA t, Map Cat State)
addStatesForCats cs fa = (fa', m)
where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
m = Map.fromList (zip (Set.toList cs) (map fst ns))
revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s]

View File

@@ -0,0 +1,329 @@
----------------------------------------------------------------------
-- |
-- Module : FiniteState
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/10 16:43:44 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.16 $
--
-- A simple finite state network module.
-----------------------------------------------------------------------------
module GF.Speech.FiniteState (FA(..), State, NFA, DFA,
startState, finalStates,
states, transitions,
isInternal,
newFA, newFA_,
addFinalState,
newState, newStates,
newTransition, newTransitions,
insertTransitionWith, insertTransitionsWith,
mapStates, mapTransitions,
modifyTransitions,
nonLoopTransitionsTo, nonLoopTransitionsFrom,
loops,
removeState,
oneFinalState,
insertNFA,
onGraph,
moveLabelsToNodes, removeTrivialEmptyNodes,
minimize,
dfa2nfa,
unusedNames, renameStates,
prFAGraphviz, faToGraphviz) where
import Data.List
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import GF.Data.Utilities
import GF.Data.Graph
import qualified GF.Data.Graphviz as Dot
type State = Int
-- | Type parameters: node id type, state label type, edge label type
-- Data constructor arguments: nodes and edges, start state, final states
data FA n a b = FA !(Graph n a b) !n ![n]
type NFA a = FA State () (Maybe a)
type DFA a = FA State () a
startState :: FA n a b -> n
startState (FA _ s _) = s
finalStates :: FA n a b -> [n]
finalStates (FA _ _ ss) = ss
states :: FA n a b -> [(n,a)]
states (FA g _ _) = nodes g
transitions :: FA n a b -> [(n,n,b)]
transitions (FA g _ _) = edges g
newFA :: Enum n => a -- ^ Start node label
-> FA n a b
newFA l = FA g s []
where (g,s) = newNode l (newGraph [toEnum 0..])
-- | Create a new finite automaton with an initial and a final state.
newFA_ :: Enum n => (FA n () b, n, n)
newFA_ = (fa'', s, f)
where fa = newFA ()
s = startState fa
(fa',f) = newState () fa
fa'' = addFinalState f fa'
addFinalState :: n -> FA n a b -> FA n a b
addFinalState f (FA g s ss) = FA g s (f:ss)
newState :: a -> FA n a b -> (FA n a b, n)
newState x (FA g s ss) = (FA g' s ss, n)
where (g',n) = newNode x g
newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)])
newStates xs (FA g s ss) = (FA g' s ss, ns)
where (g',ns) = newNodes xs g
newTransition :: n -> n -> b -> FA n a b -> FA n a b
newTransition f t l = onGraph (newEdge (f,t,l))
newTransitions :: [(n, n, b)] -> FA n a b -> FA n a b
newTransitions es = onGraph (newEdges es)
insertTransitionWith :: Eq n =>
(b -> b -> b) -> (n, n, b) -> FA n a b -> FA n a b
insertTransitionWith f t = onGraph (insertEdgeWith f t)
insertTransitionsWith :: Eq n =>
(b -> b -> b) -> [(n, n, b)] -> FA n a b -> FA n a b
insertTransitionsWith f ts fa =
foldl' (flip (insertTransitionWith f)) fa ts
mapStates :: (a -> c) -> FA n a b -> FA n c b
mapStates f = onGraph (nmap f)
mapTransitions :: (b -> c) -> FA n a b -> FA n a c
mapTransitions f = onGraph (emap f)
modifyTransitions :: ([(n,n,b)] -> [(n,n,b)]) -> FA n a b -> FA n a b
modifyTransitions f = onGraph (\ (Graph r ns es) -> Graph r ns (f es))
removeState :: Ord n => n -> FA n a b -> FA n a b
removeState n = onGraph (removeNode n)
minimize :: Ord a => NFA a -> DFA a
minimize = determinize . reverseNFA . dfa2nfa . determinize . reverseNFA
unusedNames :: FA n a b -> [n]
unusedNames (FA (Graph names _ _) _ _) = names
-- | Gets all incoming transitions to a given state, excluding
-- transtions from the state itself.
nonLoopTransitionsTo :: Eq n => n -> FA n a b -> [(n,b)]
nonLoopTransitionsTo s fa =
[(f,l) | (f,t,l) <- transitions fa, t == s && f /= s]
nonLoopTransitionsFrom :: Eq n => n -> FA n a b -> [(n,b)]
nonLoopTransitionsFrom s fa =
[(t,l) | (f,t,l) <- transitions fa, f == s && t /= s]
loops :: Eq n => n -> FA n a b -> [b]
loops s fa = [l | (f,t,l) <- transitions fa, f == s && t == s]
-- | Give new names to all nodes.
renameStates :: Ord x => [y] -- ^ Infinite supply of new names
-> FA x a b
-> FA y a b
renameStates supply (FA g s fs) = FA (renameNodes newName rest g) s' fs'
where (ns,rest) = splitAt (length (nodes g)) supply
newNodes = Map.fromList (zip (map fst (nodes g)) ns)
newName n = Map.findWithDefault (error "FiniteState.newName") n newNodes
s' = newName s
fs' = map newName fs
-- | Insert an NFA into another
insertNFA :: NFA a -- ^ NFA to insert into
-> (State, State) -- ^ States to insert between
-> NFA a -- ^ NFA to insert.
-> NFA a
insertNFA (FA g1 s1 fs1) (f,t) (FA g2 s2 fs2)
= FA (newEdges es g') s1 fs1
where
es = (f,ren s2,Nothing):[(ren f2,t,Nothing) | f2 <- fs2]
(g',ren) = mergeGraphs g1 g2
onGraph :: (Graph n a b -> Graph n c d) -> FA n a b -> FA n c d
onGraph f (FA g s ss) = FA (f g) s ss
-- | Make the finite automaton have a single final state
-- by adding a new final state and adding an edge
-- from the old final states to the new state.
oneFinalState :: a -- ^ Label to give the new node
-> b -- ^ Label to give the new edges
-> FA n a b -- ^ The old network
-> FA n a b -- ^ The new network
oneFinalState nl el fa =
let (FA g s fs,nf) = newState nl fa
es = [ (f,nf,el) | f <- fs ]
in FA (newEdges es g) s [nf]
-- | Transform a standard finite automaton with labelled edges
-- to one where the labels are on the nodes instead. This can add
-- up to one extra node per edge.
moveLabelsToNodes :: (Ord n,Eq a) => FA n () (Maybe a) -> FA n (Maybe a) ()
moveLabelsToNodes = onGraph f
where f g@(Graph c _ _) = Graph c' ns (concat ess)
where is = [ ((n,l),inc) | (n, (l,inc,_)) <- Map.toList (nodeInfo g)]
(c',is') = mapAccumL fixIncoming c is
(ns,ess) = unzip (concat is')
-- | Remove empty nodes which are not start or final, and have
-- exactly one outgoing edge or exactly one incoming edge.
removeTrivialEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
removeTrivialEmptyNodes = pruneUnusable . skipSimpleEmptyNodes
-- | Move edges to empty nodes to point to the next node(s).
-- This is not done if the pointed-to node is a final node.
skipSimpleEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
skipSimpleEmptyNodes fa = onGraph og fa
where
og g@(Graph c ns es) = if es' == es then g else og (Graph c ns es')
where
es' = concatMap changeEdge es
info = nodeInfo g
changeEdge e@(f,t,())
| isNothing (getNodeLabel info t)
-- && (i * o <= i + o)
&& not (isFinal fa t)
= [ (f,t',()) | (_,t',()) <- getOutgoing info t]
| otherwise = [e]
-- where i = inDegree info t
-- o = outDegree info t
isInternal :: Eq n => FA n a b -> n -> Bool
isInternal (FA _ start final) n = n /= start && n `notElem` final
isFinal :: Eq n => FA n a b -> n -> Bool
isFinal (FA _ _ final) n = n `elem` final
-- | Remove all internal nodes with no incoming edges
-- or no outgoing edges.
pruneUnusable :: Ord n => FA n (Maybe a) () -> FA n (Maybe a) ()
pruneUnusable fa = onGraph f fa
where
f g = if Set.null rns then g else f (removeNodes rns g)
where info = nodeInfo g
rns = Set.fromList [ n | (n,_) <- nodes g,
isInternal fa n,
inDegree info n == 0
|| outDegree info n == 0]
fixIncoming :: (Ord n, Eq a) => [n]
-> (Node n (),[Edge n (Maybe a)]) -- ^ A node and its incoming edges
-> ([n],[(Node n (Maybe a),[Edge n ()])]) -- ^ Replacement nodes with their
-- incoming edges.
fixIncoming cs c@((n,()),es) = (cs'', ((n,Nothing),es'):newContexts)
where ls = nub $ map edgeLabel es
(cs',cs'') = splitAt (length ls) cs
newNodes = zip cs' ls
es' = [ (x,n,()) | x <- map fst newNodes ]
-- separate cyclic and non-cyclic edges
(cyc,ncyc) = partition (\ (f,_,_) -> f == n) es
-- keep all incoming non-cyclic edges with the right label
to (x,l) = [ (f,x,()) | (f,_,l') <- ncyc, l == l']
-- for each cyclic edge with the right label,
-- add an edge from each of the new nodes (including this one)
++ [ (y,x,()) | (f,_,l') <- cyc, l == l', (y,_) <- newNodes]
newContexts = [ (v, to v) | v <- newNodes ]
alphabet :: Eq b => Graph n a (Maybe b) -> [b]
alphabet = nub . catMaybes . map edgeLabel . edges
determinize :: Ord a => NFA a -> DFA a
determinize (FA g s f) = let (ns,es) = h (Set.singleton start) Set.empty Set.empty
(ns',es') = (Set.toList ns, Set.toList es)
final = filter isDFAFinal ns'
fa = FA (Graph undefined [(n,()) | n <- ns'] es') start final
in renameStates [0..] fa
where info = nodeInfo g
-- reach = nodesReachable out
start = closure info $ Set.singleton s
isDFAFinal n = not (Set.null (Set.fromList f `Set.intersection` n))
h currentStates oldStates es
| Set.null currentStates = (oldStates,es)
| otherwise = ((h $! uniqueNewStates) $! allOldStates) $! es'
where
allOldStates = oldStates `Set.union` currentStates
(newStates,es') = new (Set.toList currentStates) Set.empty es
uniqueNewStates = newStates Set.\\ allOldStates
-- Get the sets of states reachable from the given states
-- by consuming one symbol, and the associated edges.
new [] rs es = (rs,es)
new (n:ns) rs es = new ns rs' es'
where cs = reachable info n --reachable reach n
rs' = rs `Set.union` Set.fromList (map snd cs)
es' = es `Set.union` Set.fromList [(n,s,c) | (c,s) <- cs]
-- | Get all the nodes reachable from a list of nodes by only empty edges.
closure :: Ord n => NodeInfo n a (Maybe b) -> Set n -> Set n
closure info x = closure_ x x
where closure_ acc check | Set.null check = acc
| otherwise = closure_ acc' check'
where
reach = Set.fromList [y | x <- Set.toList check,
(_,y,Nothing) <- getOutgoing info x]
acc' = acc `Set.union` reach
check' = reach Set.\\ acc
-- | Get a map of labels to sets of all nodes reachable
-- from a the set of nodes by one edge with the given
-- label and then any number of empty edges.
reachable :: (Ord n,Ord b) => NodeInfo n a (Maybe b) -> Set n -> [(b,Set n)]
reachable info ns = Map.toList $ Map.map (closure info . Set.fromList) $ reachable1 info ns
reachable1 info ns = Map.fromListWith (++) [(c, [y]) | n <- Set.toList ns, (_,y,Just c) <- getOutgoing info n]
reverseNFA :: NFA a -> NFA a
reverseNFA (FA g s fs) = FA g''' s' [s]
where g' = reverseGraph g
(g'',s') = newNode () g'
g''' = newEdges [(s',f,Nothing) | f <- fs] g''
dfa2nfa :: DFA a -> NFA a
dfa2nfa = mapTransitions Just
--
-- * Visualization
--
prFAGraphviz :: (Eq n,Show n) => FA n String String -> String
prFAGraphviz = Dot.prGraphviz . faToGraphviz
prFAGraphviz_ :: (Eq n,Show n,Show a, Show b) => FA n a b -> String
prFAGraphviz_ = Dot.prGraphviz . faToGraphviz . mapStates show . mapTransitions show
faToGraphviz :: (Eq n,Show n) => FA n String String -> Dot.Graph
faToGraphviz (FA (Graph _ ns es) s f)
= Dot.Graph Dot.Directed Nothing [] (map mkNode ns) (map mkEdge es) []
where mkNode (n,l) = Dot.Node (show n) attrs
where attrs = [("label",l)]
++ if n == s then [("shape","box")] else []
++ if n `elem` f then [("style","bold")] else []
mkEdge (x,y,l) = Dot.Edge (show x) (show y) [("label",l)]
--
-- * Utilities
--
lookups :: Ord k => [k] -> Map k a -> [a]
lookups xs m = mapMaybe (flip Map.lookup m) xs

View File

@@ -0,0 +1,95 @@
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.GSL
--
-- This module prints a CFG as a Nuance GSL 2.0 grammar.
--
-----------------------------------------------------------------------------
module GF.Speech.GSL (gslPrinter) where
import GF.Data.Utilities
import GF.Speech.CFG
import GF.Speech.SRG
import GF.Speech.RegExp
import GF.Infra.Option
import GF.Infra.Ident
import PGF.CId
import PGF.Data
import Data.Char (toUpper,toLower)
import Data.List (partition)
import Text.PrettyPrint.HughesPJ
width :: Int
width = 75
gslPrinter :: Options -> PGF -> CId -> String
gslPrinter opts pgf cnc = renderStyle st $ prGSL $ makeNonLeftRecursiveSRG opts pgf cnc
where st = style { lineLength = width }
prGSL :: SRG -> Doc
prGSL srg = header $++$ mainCat $++$ foldr ($++$) empty (map prRule (srgRules srg))
where
header = text ";GSL2.0" $$
comment ("Nuance speech recognition grammar for " ++ srgName srg) $$
comment ("Generated by GF")
mainCat = text ".MAIN" <+> prCat (srgStartCat srg)
prRule (SRGRule cat rhs) = prCat cat <+> union (map prAlt rhs)
-- FIXME: use the probability
prAlt (SRGAlt mp _ rhs) = prItem rhs
prItem :: SRGItem -> Doc
prItem = f
where
f (REUnion xs) = (if null es then empty else text "?") <> union (map f nes)
where (es,nes) = partition isEpsilon xs
f (REConcat [x]) = f x
f (REConcat xs) = text "(" <> fsep (map f xs) <> text ")"
f (RERepeat x) = text "*" <> f x
f (RESymbol s) = prSymbol s
union :: [Doc] -> Doc
union [x] = x
union xs = text "[" <> fsep xs <> text "]"
prSymbol :: Symbol SRGNT Token -> Doc
prSymbol = symbol (prCat . fst) (doubleQuotes . showToken)
-- GSL requires an upper case letter in category names
prCat :: Cat -> Doc
prCat = text . firstToUpper
firstToUpper :: String -> String
firstToUpper [] = []
firstToUpper (x:xs) = toUpper x : xs
{-
rmPunctCFG :: CGrammar -> CGrammar
rmPunctCFG g = [CFRule c (filter keepSymbol ss) n | CFRule c ss n <- g]
keepSymbol :: Symbol c Token -> Bool
keepSymbol (Tok t) = not (all isPunct (prt t))
keepSymbol _ = True
-}
-- Nuance does not like upper case characters in tokens
showToken :: Token -> Doc
showToken = text . map toLower
isPunct :: Char -> Bool
isPunct c = c `elem` "-_.:;.,?!()[]{}"
comment :: String -> Doc
comment s = text ";" <+> text s
-- Pretty-printing utilities
emptyLine :: Doc
emptyLine = text ""
($++$) :: Doc -> Doc -> Doc
x $++$ y = x $$ emptyLine $$ y

View File

@@ -0,0 +1,113 @@
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.JSGF
--
-- This module prints a CFG as a JSGF grammar.
--
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
-- categories in the grammar
--
-- FIXME: convert to UTF-8
-----------------------------------------------------------------------------
module GF.Speech.JSGF (jsgfPrinter) where
import GF.Data.Utilities
import GF.Infra.Option
import GF.Speech.CFG
import GF.Speech.RegExp
import GF.Speech.SISR
import GF.Speech.SRG
import PGF.CId
import PGF.Data
import Data.Char
import Data.List
import Data.Maybe
import Text.PrettyPrint.HughesPJ
import Debug.Trace
width :: Int
width = 75
jsgfPrinter :: Options
-> PGF
-> CId -> String
jsgfPrinter opts pgf cnc = renderStyle st $ prJSGF sisr $ makeNonLeftRecursiveSRG opts pgf cnc
where st = style { lineLength = width }
sisr = flag optSISR opts
prJSGF :: Maybe SISRFormat -> SRG -> Doc
prJSGF sisr srg
= header $++$ mainCat $++$ foldr ($++$) empty (map prRule (srgRules srg))
where
header = text "#JSGF" <+> text "V1.0" <+> text "UTF-8" <+> lang <> char ';' $$
comment ("JSGF speech recognition grammar for " ++ srgName srg) $$
comment "Generated by GF" $$
text ("grammar " ++ srgName srg ++ ";")
lang = maybe empty text (srgLanguage srg)
mainCat = rule True "MAIN" [prCat (srgStartCat srg)]
prRule (SRGRule cat rhs) = rule (isExternalCat srg cat) cat (map prAlt rhs)
prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag]
where initTag | isEmpty t = empty
| otherwise = text "<NULL>" <+> t
where t = tag sisr (profileInitSISR n)
finalTag = tag sisr (profileFinalSISR n)
p = if isEmpty initTag && isEmpty finalTag then id else parens
prCat :: Cat -> Doc
prCat c = char '<' <> text c <> char '>'
prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc
prItem sisr t = f 0
where
f _ (REUnion []) = text "<VOID>"
f p (REUnion xs)
| not (null es) = brackets (f 0 (REUnion nes))
| otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs))
where (es,nes) = partition isEpsilon xs
f _ (REConcat []) = text "<NULL>"
f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs))
f p (RERepeat x) = f 3 x <> char '*'
f _ (RESymbol s) = prSymbol sisr t s
prSymbol :: Maybe SISRFormat -> CFTerm -> SRGSymbol -> Doc
prSymbol sisr cn (NonTerminal n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
prSymbol _ cn (Terminal t) | all isPunct t = empty -- removes punctuation
| otherwise = text t -- FIXME: quote if there is whitespace or odd chars
tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc
tag Nothing _ = empty
tag (Just fmt) t = case t fmt of
[] -> empty
ts -> char '{' <+> (text (e $ prSISR ts)) <+> char '}'
where e [] = []
e ('}':xs) = '\\':'}':e xs
e ('\n':xs) = ' ' : e (dropWhile isSpace xs)
e (x:xs) = x:e xs
isPunct :: Char -> Bool
isPunct c = c `elem` "-_.;.,?!"
comment :: String -> Doc
comment s = text "//" <+> text s
alts :: [Doc] -> Doc
alts = fsep . prepunctuate (text "| ")
rule :: Bool -> Cat -> [Doc] -> Doc
rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';'
where p = if pub then text "public" else empty
-- Pretty-printing utilities
emptyLine :: Doc
emptyLine = text ""
prepunctuate :: Doc -> [Doc] -> [Doc]
prepunctuate _ [] = []
prepunctuate p (x:xs) = x : map (p <>) xs
($++$) :: Doc -> Doc -> Doc
x $++$ y = x $$ emptyLine $$ y

View File

@@ -0,0 +1,116 @@
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.PGFToCFG
--
-- Approximates PGF grammars with context-free grammars.
----------------------------------------------------------------------
module GF.Speech.PGFToCFG (bnfPrinter, pgfToCFG) where
import PGF.CId
import PGF.Data as PGF
import PGF.Macros
import GF.Infra.Ident
import GF.Speech.CFG
import Data.Array.IArray as Array
import Data.List
import Data.Map (Map)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.Maybe
import Data.Set (Set)
import qualified Data.Set as Set
bnfPrinter :: PGF -> CId -> String
bnfPrinter = toBNF id
toBNF :: (CFG -> CFG) -> PGF -> CId -> String
toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc
pgfToCFG :: PGF
-> CId -- ^ Concrete syntax name
-> CFG
pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ concatMap fruleToCFRule rules)
where
pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang)
rules :: [(FCat,Production)]
rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions pinfo)
, prod <- Set.toList set]
fcatCats :: Map FCat Cat
fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i)
| (c,fcs) <- Map.toList (startCats pinfo),
(fc,i) <- zip fcs [1..]]
fcatCat :: FCat -> Cat
fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats
fcatToCat :: FCat -> FIndex -> Cat
fcatToCat c l = fcatCat c ++ row
where row = if catLinArity c == 1 then "" else "_" ++ show l
-- gets the number of fields in the lincat for the given category
catLinArity :: FCat -> Int
catLinArity c = maximum (1:[rangeSize (bounds rhs) | (FFun _ _ rhs, _) <- topdownRules c])
topdownRules cat = f cat []
where
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
g (FApply funid args) rules = (functions pinfo ! funid,args) : rules
g (FCoerce cat) rules = f cat rules
extCats :: Set Cat
extCats = Set.fromList $ map lhsCat startRules
startRules :: [CFRule]
startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
| (c,fcs) <- Map.toList (startCats pinfo),
fc <- fcs, not (isLiteralFCat fc),
r <- [0..catLinArity fc-1]]
fruleToCFRule :: (FCat,Production) -> [CFRule]
fruleToCFRule (c,FApply funid args) =
[CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps))
| (l,seqid) <- Array.assocs rhs
, let row = sequences pinfo ! seqid
, not (containsLiterals row)]
where
FFun f ps rhs = functions pinfo ! funid
mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
mkRhs = concatMap fsymbolToSymbol . Array.elems
containsLiterals :: Array FPointPos FSymbol -> Bool
containsLiterals row = any isLiteralFCat [args!!n | FSymCat n _ <- Array.elems row] ||
not (null [n | FSymLit n _ <- Array.elems row]) -- only this is needed for PMCFG.
-- The first line is for backward compat.
fsymbolToSymbol :: FSymbol -> [CFSymbol]
fsymbolToSymbol (FSymCat n l) = [NonTerminal (fcatToCat (args!!n) l)]
fsymbolToSymbol (FSymLit n l) = [NonTerminal (fcatToCat (args!!n) l)]
fsymbolToSymbol (FSymKS ts) = map Terminal ts
fixProfile :: Array FPointPos FSymbol -> Profile -> Profile
fixProfile row = concatMap positions
where
nts = zip [0..] [j | nt <- Array.elems row, j <- getPos nt]
positions i = [k | (k,j) <- nts, j == i]
getPos (FSymCat j _) = [j]
getPos (FSymLit j _) = [j]
getPos _ = []
profilesToTerm :: [Profile] -> CFTerm
profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps)
where (argTypes,_) = catSkeleton $ lookType pgf f
profileToTerm :: CId -> Profile -> CFTerm
profileToTerm t [] = CFMeta t
profileToTerm _ xs = CFRes (last xs) -- FIXME: unify
fruleToCFRule (c,FCoerce c') =
[CFRule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0)
| l <- [0..catLinArity c-1]]

View File

@@ -0,0 +1,27 @@
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.PrRegExp
--
-- This module prints a grammar as a regular expression.
-----------------------------------------------------------------------------
module GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter) where
import GF.Speech.CFG
import GF.Speech.CFGToFA
import GF.Speech.PGFToCFG
import GF.Speech.RegExp
import PGF
regexpPrinter :: PGF -> CId -> String
regexpPrinter pgf cnc = (++"\n") $ prRE id $ dfa2re $ cfgToFA $ pgfToCFG pgf cnc
multiRegexpPrinter :: PGF -> CId -> String
multiRegexpPrinter pgf cnc = prREs $ mfa2res $ cfgToMFA $ pgfToCFG pgf cnc
prREs :: [(String,RE CFSymbol)] -> String
prREs res = unlines [l ++ " = " ++ prRE id (mapRE showLabel re) | (l,re) <- res]
where showLabel = symbol (\l -> "<" ++ l ++ ">") id
mfa2res :: MFA -> [(String,RE CFSymbol)]
mfa2res (MFA _ dfas) = [(l, minimizeRE (dfa2re dfa)) | (l,dfa) <- dfas]

View File

@@ -0,0 +1,144 @@
module GF.Speech.RegExp (RE(..),
epsilonRE, nullRE,
isEpsilon, isNull,
unionRE, concatRE, seqRE,
repeatRE, minimizeRE,
mapRE, mapRE', joinRE,
symbolsRE,
dfa2re, prRE) where
import Data.List
import GF.Data.Utilities
import GF.Speech.FiniteState
data RE a =
REUnion [RE a] -- ^ REUnion [] is null
| REConcat [RE a] -- ^ REConcat [] is epsilon
| RERepeat (RE a)
| RESymbol a
deriving (Eq,Ord,Show)
dfa2re :: (Ord a) => DFA a -> RE a
dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
. oneFinalState () epsilonRE . mapTransitions RESymbol
where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
merge es = [(f,t,unionRE ls)
| ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a)
elimStates fa =
case [s | (s,_) <- states fa, isInternal fa s] of
[] -> fa
sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa
where sAs = nonLoopTransitionsTo sE fa
sBs = nonLoopTransitionsFrom sE fa
r2 = unionRE $ loops sE fa
ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
r r1 r3 = concatRE [r1, repeatRE r2, r3]
epsilonRE :: RE a
epsilonRE = REConcat []
nullRE :: RE a
nullRE = REUnion []
isNull :: RE a -> Bool
isNull (REUnion []) = True
isNull _ = False
isEpsilon :: RE a -> Bool
isEpsilon (REConcat []) = True
isEpsilon _ = False
unionRE :: Ord a => [RE a] -> RE a
unionRE = unionOrId . sortNub . concatMap toList
where
toList (REUnion xs) = xs
toList x = [x]
unionOrId [r] = r
unionOrId rs = REUnion rs
concatRE :: [RE a] -> RE a
concatRE xs | any isNull xs = nullRE
| otherwise = case concatMap toList xs of
[r] -> r
rs -> REConcat rs
where
toList (REConcat xs) = xs
toList x = [x]
seqRE :: [a] -> RE a
seqRE = concatRE . map RESymbol
repeatRE :: RE a -> RE a
repeatRE x | isNull x || isEpsilon x = epsilonRE
| otherwise = RERepeat x
finalRE :: Ord a => DFA (RE a) -> RE a
finalRE fa = concatRE [repeatRE r1, r2,
repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
where
s0 = startState fa
[sF] = finalStates fa
r1 = unionRE $ loops s0 fa
r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa
r3 = unionRE $ loops sF fa
r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
reverseRE :: RE a -> RE a
reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs
reverseRE (REUnion xs) = REUnion (map reverseRE xs)
reverseRE (RERepeat x) = RERepeat (reverseRE x)
reverseRE x = x
minimizeRE :: Ord a => RE a -> RE a
minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward
mergeForward :: Ord a => RE a -> RE a
mergeForward (REUnion xs) =
unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)]
mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)]
mergeForward (RERepeat r) = repeatRE (mergeForward r)
mergeForward r = r
firstRE :: RE a -> (RE a, RE a)
firstRE (REConcat (x:xs)) = (x, REConcat xs)
firstRE r = (r,epsilonRE)
mapRE :: (a -> b) -> RE a -> RE b
mapRE f = mapRE' (RESymbol . f)
mapRE' :: (a -> RE b) -> RE a -> RE b
mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs)
mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs)
mapRE' f (RERepeat x) = RERepeat (mapRE' f x)
mapRE' f (RESymbol s) = f s
joinRE :: RE (RE a) -> RE a
joinRE (REConcat xs) = REConcat (map joinRE xs)
joinRE (REUnion xs) = REUnion (map joinRE xs)
joinRE (RERepeat xs) = RERepeat (joinRE xs)
joinRE (RESymbol ss) = ss
symbolsRE :: RE a -> [a]
symbolsRE (REConcat xs) = concatMap symbolsRE xs
symbolsRE (REUnion xs) = concatMap symbolsRE xs
symbolsRE (RERepeat x) = symbolsRE x
symbolsRE (RESymbol x) = [x]
-- Debugging
prRE :: (a -> String) -> RE a -> String
prRE = prRE' 0
prRE' :: Int -> (a -> String) -> RE a -> String
prRE' _ _ (REUnion []) = "<NULL>"
prRE' n f (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1 f) xs)))
prRE' n f (REConcat xs) = p n 2 (unwords (map (prRE' 2 f) xs))
prRE' n f (RERepeat x) = p n 3 (prRE' 3 f x) ++ "*"
prRE' _ f (RESymbol s) = f s
p n m s | n >= m = "(" ++ s ++ ")"
| True = s

View File

@@ -0,0 +1,77 @@
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.SISR
--
-- Abstract syntax and pretty printer for SISR,
-- (Semantic Interpretation for Speech Recognition)
----------------------------------------------------------------------
module GF.Speech.SISR (SISRFormat(..), SISRTag, prSISR,
topCatSISR, profileInitSISR, catSISR, profileFinalSISR) where
import Data.List
import GF.Data.Utilities
import GF.Infra.Ident
import GF.Infra.Option (SISRFormat(..))
import GF.Speech.CFG
import GF.Speech.SRG (SRGNT)
import PGF.CId
import qualified GF.JavaScript.AbsJS as JS
import qualified GF.JavaScript.PrintJS as JS
type SISRTag = [JS.DeclOrExpr]
prSISR :: SISRTag -> String
prSISR = JS.printTree
topCatSISR :: String -> SISRFormat -> SISRTag
topCatSISR c fmt = map JS.DExpr [fmtOut fmt `ass` fmtRef fmt c]
profileInitSISR :: CFTerm -> SISRFormat -> SISRTag
profileInitSISR t fmt
| null (usedArgs t) = []
| otherwise = [JS.Decl [JS.DInit args (JS.EArray [])]]
usedArgs :: CFTerm -> [Int]
usedArgs (CFObj _ ts) = foldr union [] (map usedArgs ts)
usedArgs (CFAbs _ x) = usedArgs x
usedArgs (CFApp x y) = usedArgs x `union` usedArgs y
usedArgs (CFRes i) = [i]
usedArgs _ = []
catSISR :: CFTerm -> SRGNT -> SISRFormat -> SISRTag
catSISR t (c,i) fmt
| i `elem` usedArgs t = map JS.DExpr
[JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i)) `ass` fmtRef fmt c]
| otherwise = []
profileFinalSISR :: CFTerm -> SISRFormat -> SISRTag
profileFinalSISR term fmt = [JS.DExpr $ fmtOut fmt `ass` f term]
where
f (CFObj n ts) = tree (showCId n) (map f ts)
f (CFAbs v x) = JS.EFun [var v] [JS.SReturn (f x)]
f (CFApp x y) = JS.ECall (f x) [f y]
f (CFRes i) = JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i))
f (CFVar v) = JS.EVar (var v)
f (CFMeta typ) = obj [("name",JS.EStr "?"), ("type",JS.EStr (showCId typ))]
fmtOut SISR_WD20030401 = JS.EVar (JS.Ident "$")
fmtOut SISR_1_0 = JS.EVar (JS.Ident "out")
fmtRef SISR_WD20030401 c = JS.EVar (JS.Ident ("$" ++ c))
fmtRef SISR_1_0 c = field (JS.EVar (JS.Ident "rules")) c
args = JS.Ident "a"
var v = JS.Ident ("x" ++ show v)
field x y = JS.EMember x (JS.Ident y)
ass = JS.EAssign
tree n xs = obj [("name", JS.EStr n), ("args", JS.EArray xs)]
obj ps = JS.EObj [JS.Prop (JS.StringPropName x) y | (x,y) <- ps]

View File

@@ -0,0 +1,178 @@
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.SLF
--
-- This module converts a CFG to an SLF finite-state network
-- for use with the ATK recognizer. The SLF format is described
-- in the HTK manual, and an example for use in ATK is shown
-- in the ATK manual.
--
-----------------------------------------------------------------------------
module GF.Speech.SLF (slfPrinter,slfGraphvizPrinter,
slfSubPrinter,slfSubGraphvizPrinter) where
import GF.Data.Utilities
import GF.Speech.CFG
import GF.Speech.FiniteState
import GF.Speech.CFG
import GF.Speech.CFGToFA
import GF.Speech.PGFToCFG
import qualified GF.Data.Graphviz as Dot
import PGF
import PGF.CId
import Control.Monad
import qualified Control.Monad.State as STM
import Data.Char (toUpper)
import Data.List
import Data.Maybe
data SLFs = SLFs [(String,SLF)] SLF
data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String }
| SLFSubLat { nId :: Int, nLat :: String }
-- | An SLF word is a word, or the empty string.
type SLFWord = Maybe String
data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int }
type SLF_FA = FA State (Maybe CFSymbol) ()
mkFAs :: PGF -> CId -> (SLF_FA, [(String,SLF_FA)])
mkFAs pgf cnc = (slfStyleFA main, [(c,slfStyleFA n) | (c,n) <- subs])
where MFA start subs = {- renameSubs $ -} cfgToMFA $ pgfToCFG pgf cnc
main = let (fa,s,f) = newFA_ in newTransition s f (NonTerminal start) fa
slfStyleFA :: Eq a => DFA a -> FA State (Maybe a) ()
slfStyleFA = renameStates [0..] . removeTrivialEmptyNodes . oneFinalState Nothing ()
. moveLabelsToNodes . dfa2nfa
-- | Give sequential names to subnetworks.
renameSubs :: MFA -> MFA
renameSubs (MFA start subs) = MFA (newName start) subs'
where newNames = zip (map fst subs) ["sub"++show n | n <- [0..]]
newName s = lookup' s newNames
subs' = [(newName s,renameLabels n) | (s,n) <- subs]
renameLabels = mapTransitions (mapSymbol newName id)
--
-- * SLF graphviz printing (without sub-networks)
--
slfGraphvizPrinter :: PGF -> CId -> String
slfGraphvizPrinter pgf cnc
= prFAGraphviz $ gvFA $ slfStyleFA $ cfgToFA' $ pgfToCFG pgf cnc
where
gvFA = mapStates (fromMaybe "") . mapTransitions (const "")
--
-- * SLF graphviz printing (with sub-networks)
--
slfSubGraphvizPrinter :: PGF -> CId -> String
slfSubGraphvizPrinter pgf cnc = Dot.prGraphviz g
where (main, subs) = mkFAs pgf cnc
g = STM.evalState (liftM2 Dot.addSubGraphs ss m) [0..]
ss = mapM (\ (c,f) -> gvSLFFA (Just c) f) subs
m = gvSLFFA Nothing main
gvSLFFA :: Maybe String -> SLF_FA -> STM.State [State] Dot.Graph
gvSLFFA n fa =
liftM (mkCluster n . faToGraphviz . mapStates (maybe "" mfaLabelToGv)
. mapTransitions (const "")) (rename fa)
where mfaLabelToGv = symbol ("#"++) id
mkCluster Nothing = id
mkCluster (Just x)
= Dot.setName ("cluster_"++x) . Dot.setAttr "label" x
rename fa = do
names <- STM.get
let fa' = renameStates names fa
names' = unusedNames fa'
STM.put names'
return fa'
--
-- * SLF printing (without sub-networks)
--
slfPrinter :: PGF -> CId -> String
slfPrinter pgf cnc
= prSLF $ automatonToSLF mkSLFNode $ slfStyleFA $ cfgToFA' $ pgfToCFG pgf cnc
--
-- * SLF printing (with sub-networks)
--
-- | Make a network with subnetworks in SLF
slfSubPrinter :: PGF -> CId -> String
slfSubPrinter pgf cnc = prSLFs slfs
where
(main,subs) = mkFAs pgf cnc
slfs = SLFs [(c, faToSLF fa) | (c,fa) <- subs] (faToSLF main)
faToSLF = automatonToSLF mfaNodeToSLFNode
automatonToSLF :: (Int -> a -> SLFNode) -> FA State a () -> SLF
automatonToSLF mkNode fa = SLF { slfNodes = ns, slfEdges = es }
where ns = map (uncurry mkNode) (states fa)
es = zipWith (\i (f,t,()) -> mkSLFEdge i (f,t)) [0..] (transitions fa)
mfaNodeToSLFNode :: Int -> Maybe CFSymbol -> SLFNode
mfaNodeToSLFNode i l = case l of
Nothing -> mkSLFNode i Nothing
Just (Terminal x) -> mkSLFNode i (Just x)
Just (NonTerminal s) -> mkSLFSubLat i s
mkSLFNode :: Int -> Maybe String -> SLFNode
mkSLFNode i Nothing = SLFNode { nId = i, nWord = Nothing, nTag = Nothing }
mkSLFNode i (Just w)
| isNonWord w = SLFNode { nId = i,
nWord = Nothing,
nTag = Just w }
| otherwise = SLFNode { nId = i,
nWord = Just (map toUpper w),
nTag = Just w }
mkSLFSubLat :: Int -> String -> SLFNode
mkSLFSubLat i sub = SLFSubLat { nId = i, nLat = sub }
mkSLFEdge :: Int -> (Int,Int) -> SLFEdge
mkSLFEdge i (f,t) = SLFEdge { eId = i, eStart = f, eEnd = t }
prSLFs :: SLFs -> String
prSLFs (SLFs subs main) = unlinesS (map prSub subs ++ [prOneSLF main]) ""
where prSub (n,s) = showString "SUBLAT=" . shows n
. nl . prOneSLF s . showString "." . nl
prSLF :: SLF -> String
prSLF slf = prOneSLF slf ""
prOneSLF :: SLF -> ShowS
prOneSLF (SLF { slfNodes = ns, slfEdges = es})
= header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl
where
header = prFields [("N",show (length ns)),("L", show (length es))] . nl
prNode (SLFNode { nId = i, nWord = w, nTag = t })
= prFields $ [("I",show i),("W",showWord w)]
++ maybe [] (\t -> [("s",t)]) t
prNode (SLFSubLat { nId = i, nLat = l })
= prFields [("I",show i),("L",show l)]
prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))]
-- | Check if a word should not correspond to a word in the SLF file.
isNonWord :: String -> Bool
isNonWord = any isPunct
isPunct :: Char -> Bool
isPunct c = c `elem` "-_.;.,?!()[]{}"
showWord :: SLFWord -> String
showWord Nothing = "!NULL"
showWord (Just w) | null w = "!NULL"
| otherwise = w
prFields :: [(String,String)] -> ShowS
prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ]

View File

@@ -0,0 +1,205 @@
----------------------------------------------------------------------
-- |
-- Module : SRG
--
-- Representation of, conversion to, and utilities for
-- printing of a general Speech Recognition Grammar.
--
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
-- categories in the grammar
----------------------------------------------------------------------
module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem, SRGSymbol
, SRGNT, CFTerm
, ebnfPrinter
, makeNonLeftRecursiveSRG
, makeNonRecursiveSRG
, getSpeechLanguage
, isExternalCat
, lookupFM_
) where
import GF.Data.Operations
import GF.Data.Utilities
import GF.Infra.Ident
import GF.Infra.Option
import GF.Speech.CFG
import GF.Speech.PGFToCFG
import GF.Data.Relation
import GF.Speech.FiniteState
import GF.Speech.RegExp
import GF.Speech.CFGToFA
import GF.Infra.Option
import PGF.CId
import PGF.Data
import PGF.Macros
import Data.List
import Data.Maybe (fromMaybe, maybeToList)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import Debug.Trace
data SRG = SRG { srgName :: String -- ^ grammar name
, srgStartCat :: Cat -- ^ start category name
, srgExternalCats :: Set Cat
, srgLanguage :: Maybe String -- ^ The language for which the grammar
-- is intended, e.g. en-UK
, srgRules :: [SRGRule]
}
deriving (Eq,Show)
data SRGRule = SRGRule Cat [SRGAlt]
deriving (Eq,Show)
-- | maybe a probability, a rule name and an EBNF right-hand side
data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem
deriving (Eq,Show)
type SRGItem = RE SRGSymbol
type SRGSymbol = Symbol SRGNT Token
-- | An SRG non-terminal. Category name and its number in the profile.
type SRGNT = (Cat, Int)
ebnfPrinter :: Options -> PGF -> CId -> String
ebnfPrinter opts pgf cnc = prSRG opts $ makeSRG opts pgf cnc
-- | Create a compact filtered non-left-recursive SRG.
makeNonLeftRecursiveSRG :: Options -> PGF -> CId -> SRG
makeNonLeftRecursiveSRG opts = makeSRG opts'
where
opts' = setDefaultCFGTransform opts CFGNoLR True
makeSRG :: Options -> PGF -> CId -> SRG
makeSRG opts = mkSRG cfgToSRG preprocess
where
cfgToSRG cfg = [cfRulesToSRGRule rs | (_,rs) <- allRulesGrouped cfg]
preprocess = maybeTransform opts CFGMergeIdentical mergeIdentical
. maybeTransform opts CFGNoLR removeLeftRecursion
. maybeTransform opts CFGRegular makeRegular
. maybeTransform opts CFGTopDownFilter topDownFilter
. maybeTransform opts CFGBottomUpFilter bottomUpFilter
. maybeTransform opts CFGRemoveCycles removeCycles
. maybeTransform opts CFGStartCatOnly purgeExternalCats
setDefaultCFGTransform :: Options -> CFGTransform -> Bool -> Options
setDefaultCFGTransform opts t b = setCFGTransform t b `addOptions` opts
maybeTransform :: Options -> CFGTransform -> (CFG -> CFG) -> (CFG -> CFG)
maybeTransform opts t f = if cfgTransform opts t then f else id
traceStats s g = trace ("---- " ++ s ++ ": " ++ stats g {- ++ "\n" ++ prCFRules g ++ "----" -}) g
stats g = "Categories: " ++ show (countCats g)
++ ", External categories: " ++ show (Set.size (cfgExternalCats g))
++ ", Rules: " ++ show (countRules g)
makeNonRecursiveSRG :: Options
-> PGF
-> CId -- ^ Concrete syntax name.
-> SRG
makeNonRecursiveSRG opts = mkSRG cfgToSRG id
where
cfgToSRG cfg = [SRGRule l [SRGAlt Nothing dummyCFTerm (dfaToSRGItem dfa)] | (l,dfa) <- dfas]
where
MFA _ dfas = cfgToMFA cfg
dfaToSRGItem = mapRE dummySRGNT . minimizeRE . dfa2re
dummyCFTerm = CFMeta (mkCId "dummy")
dummySRGNT = mapSymbol (\c -> (c,0)) id
mkSRG :: (CFG -> [SRGRule]) -> (CFG -> CFG) -> PGF -> CId -> SRG
mkSRG mkRules preprocess pgf cnc =
SRG { srgName = showCId cnc,
srgStartCat = cfgStartCat cfg,
srgExternalCats = cfgExternalCats cfg,
srgLanguage = getSpeechLanguage pgf cnc,
srgRules = mkRules cfg }
where cfg = renameCats (showCId cnc) $ preprocess $ pgfToCFG pgf cnc
-- | Renames all external cats C to C_cat, and all internal cats C_X (where X is any string),
-- to C_N where N is an integer.
renameCats :: String -> CFG -> CFG
renameCats prefix cfg = mapCFGCats renameCat cfg
where renameCat c | isExternal c = c ++ "_cat"
| otherwise = Map.findWithDefault (badCat c) c names
isExternal c = c `Set.member` cfgExternalCats cfg
catsByPrefix = buildMultiMap [(takeWhile (/='_') cat, cat) | cat <- allCats' cfg, not (isExternal cat)]
names = Map.fromList [(c,pref++"_"++show i) | (pref,cs) <- catsByPrefix, (c,i) <- zip cs [1..]]
badCat c = error ("GF.Speech.SRG.renameCats: " ++ c ++ "\n" ++ prCFG cfg)
getSpeechLanguage :: PGF -> CId -> Maybe String
getSpeechLanguage pgf cnc = fmap (replace '_' '-') $ lookConcrFlag pgf cnc (mkCId "language")
cfRulesToSRGRule :: [CFRule] -> SRGRule
cfRulesToSRGRule rs@(r:_) = SRGRule (lhsCat r) rhs
where
alts = [((n,Nothing),mkSRGSymbols 0 ss) | CFRule c ss n <- rs]
rhs = [SRGAlt p n (srgItem sss) | ((n,p),sss) <- buildMultiMap alts ]
mkSRGSymbols _ [] = []
mkSRGSymbols i (NonTerminal c:ss) = NonTerminal (c,i) : mkSRGSymbols (i+1) ss
mkSRGSymbols i (Terminal t:ss) = Terminal t : mkSRGSymbols i ss
srgLHSCat :: SRGRule -> Cat
srgLHSCat (SRGRule c _) = c
isExternalCat :: SRG -> Cat -> Bool
isExternalCat srg c = c `Set.member` srgExternalCats srg
--
-- * Size-optimized EBNF SRGs
--
srgItem :: [[SRGSymbol]] -> SRGItem
srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
-- non-optimizing version:
--srgItem = unionRE . map seqRE
-- | Merges a list of right-hand sides which all have the same
-- sequence of non-terminals.
mergeItems :: [[SRGSymbol]] -> SRGItem
mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens
groupTokens :: [SRGSymbol] -> [Symbol SRGNT [Token]]
groupTokens [] = []
groupTokens (Terminal t:ss) = case groupTokens ss of
Terminal ts:ss' -> Terminal (t:ts):ss'
ss' -> Terminal [t]:ss'
groupTokens (NonTerminal c:ss) = NonTerminal c : groupTokens ss
ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE SRGSymbol
ungroupTokens = joinRE . mapRE (symbol (RESymbol . NonTerminal) (REConcat . map (RESymbol . Terminal)))
--
-- * Utilities for building and printing SRGs
--
prSRG :: Options -> SRG -> String
prSRG opts srg = prProductions $ map prRule $ ext ++ int
where
sisr = flag optSISR opts
(ext,int) = partition (isExternalCat srg . srgLHSCat) (srgRules srg)
prRule (SRGRule c alts) = (c,unwords (intersperse "|" (concatMap prAlt alts)))
prAlt (SRGAlt _ t rhs) =
-- FIXME: hack: we high-jack the --sisr flag to add
-- a simple lambda calculus format for semantic interpretation
-- Maybe the --sisr flag should be renamed.
case sisr of
Just _ ->
-- copy tags to each part of a top-level union,
-- to get simpler output
case rhs of
REUnion xs -> map prOneAlt xs
_ -> [prOneAlt rhs]
where prOneAlt a = prRE prSym a ++ " { " ++ prCFTerm t ++ " }"
Nothing -> [prRE prSym rhs]
prSym = symbol fst (\t -> "\""++ t ++"\"")
lookupFM_ :: (Ord key, Show key) => Map key elt -> key -> elt
lookupFM_ fm k = Map.findWithDefault err k fm
where err = error $ "Key not found: " ++ show k
++ "\namong " ++ show (Map.keys fm)

View File

@@ -0,0 +1,127 @@
----------------------------------------------------------------------
-- |
-- Module : PrJSRGS_ABNF
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.16 $
--
-- This module prints a CFG as a JSGF grammar.
--
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
-- categories in the grammar
--
-- FIXME: convert to UTF-8
-----------------------------------------------------------------------------
module GF.Speech.SRGS_ABNF (srgsAbnfPrinter, srgsAbnfNonRecursivePrinter) where
import GF.Data.Utilities
import GF.Infra.Option
import GF.Speech.CFG
import GF.Speech.SISR as SISR
import GF.Speech.SRG
import GF.Speech.RegExp
import PGF (PGF, CId)
import Data.Char
import Data.List
import Data.Maybe
import Text.PrettyPrint.HughesPJ
import Debug.Trace
width :: Int
width = 75
srgsAbnfPrinter :: Options
-> PGF -> CId -> String
srgsAbnfPrinter opts pgf cnc = showDoc $ prABNF sisr $ makeNonLeftRecursiveSRG opts pgf cnc
where sisr = flag optSISR opts
srgsAbnfNonRecursivePrinter :: Options -> PGF -> CId -> String
srgsAbnfNonRecursivePrinter opts pgf cnc = showDoc $ prABNF Nothing $ makeNonRecursiveSRG opts pgf cnc
showDoc = renderStyle (style { lineLength = width })
prABNF :: Maybe SISRFormat -> SRG -> Doc
prABNF sisr srg
= header $++$ foldr ($++$) empty (map prRule (srgRules srg))
where
header = text "#ABNF 1.0 UTF-8;" $$
meta "description" ("Speech recognition grammar for " ++ srgName srg) $$
meta "generator" "Grammatical Framework" $$
language $$ tagFormat $$ mainCat
language = maybe empty (\l -> text "language" <+> text l <> char ';') (srgLanguage srg)
tagFormat | isJust sisr = text "tag-format" <+> text "<semantics/1.0>" <> char ';'
| otherwise = empty
mainCat = text "root" <+> prCat (srgStartCat srg) <> char ';'
prRule (SRGRule cat alts) = rule (isExternalCat srg cat) cat (map prAlt alts)
prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag]
where initTag = tag sisr (profileInitSISR n)
finalTag = tag sisr (profileFinalSISR n)
p = if isEmpty initTag && isEmpty finalTag then id else parens
prCat :: Cat -> Doc
prCat c = char '$' <> text c
prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc
prItem sisr t = f 0
where
f _ (REUnion []) = text "$VOID"
f p (REUnion xs)
| not (null es) = brackets (f 0 (REUnion nes))
| otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs))
where (es,nes) = partition isEpsilon xs
f _ (REConcat []) = text "$NULL"
f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs))
f p (RERepeat x) = f 3 x <> text "<0->"
f _ (RESymbol s) = prSymbol sisr t s
prSymbol :: Maybe SISRFormat -> CFTerm -> SRGSymbol -> Doc
prSymbol sisr cn (NonTerminal n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
prSymbol _ cn (Terminal t)
| all isPunct t = empty -- removes punctuation
| otherwise = text t -- FIXME: quote if there is whitespace or odd chars
tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc
tag Nothing _ = empty
tag (Just fmt) t =
case t fmt of
[] -> empty
-- grr, silly SRGS ABNF does not have an escaping mechanism
ts | '{' `elem` x || '}' `elem` x -> text "{!{" <+> text x <+> text "}!}"
| otherwise -> text "{" <+> text x <+> text "}"
where x = prSISR ts
isPunct :: Char -> Bool
isPunct c = c `elem` "-_.;.,?!"
comment :: String -> Doc
comment s = text "//" <+> text s
alts :: [Doc] -> Doc
alts = fsep . prepunctuate (text "| ")
rule :: Bool -> Cat -> [Doc] -> Doc
rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';'
where p = if pub then text "public" else empty
meta :: String -> String -> Doc
meta n v = text "meta" <+> text (show n) <+> text "is" <+> text (show v) <> char ';'
-- Pretty-printing utilities
emptyLine :: Doc
emptyLine = text ""
prepunctuate :: Doc -> [Doc] -> [Doc]
prepunctuate _ [] = []
prepunctuate p (x:xs) = x : map (p <>) xs
($++$) :: Doc -> Doc -> Doc
x $++$ y = x $$ emptyLine $$ y

View File

@@ -0,0 +1,105 @@
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.SRGS_XML
--
-- Prints an SRGS XML speech recognition grammars.
----------------------------------------------------------------------
module GF.Speech.SRGS_XML (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where
import GF.Data.Utilities
import GF.Data.XML
import GF.Infra.Option
import GF.Speech.CFG
import GF.Speech.RegExp
import GF.Speech.SISR as SISR
import GF.Speech.SRG
import PGF (PGF, CId)
import Control.Monad
import Data.Char (toUpper,toLower)
import Data.List
import Data.Maybe
import qualified Data.Map as Map
srgsXmlPrinter :: Options
-> PGF -> CId -> String
srgsXmlPrinter opts pgf cnc = prSrgsXml sisr $ makeNonLeftRecursiveSRG opts pgf cnc
where sisr = flag optSISR opts
srgsXmlNonRecursivePrinter :: Options -> PGF -> CId -> String
srgsXmlNonRecursivePrinter opts pgf cnc = prSrgsXml Nothing $ makeNonRecursiveSRG opts pgf cnc
prSrgsXml :: Maybe SISRFormat -> SRG -> String
prSrgsXml sisr srg = showXMLDoc (optimizeSRGS xmlGr)
where
xmlGr = grammar sisr (srgStartCat srg) (srgLanguage srg) $
[meta "description"
("SRGS XML speech recognition grammar for " ++ srgName srg ++ "."),
meta "generator" "Grammatical Framework"]
++ map ruleToXML (srgRules srg)
ruleToXML (SRGRule cat alts) = Tag "rule" ([("id",cat)]++pub) (prRhs alts)
where pub = if isExternalCat srg cat then [("scope","public")] else []
prRhs rhss = [oneOf (map (mkProd sisr) rhss)]
mkProd :: Maybe SISRFormat -> SRGAlt -> XML
mkProd sisr (SRGAlt mp n rhs) = Tag "item" [] (ti ++ [x] ++ tf)
where x = mkItem sisr n rhs
ti = tag sisr (profileInitSISR n)
tf = tag sisr (profileFinalSISR n)
mkItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> XML
mkItem sisr cn = f
where
f (REUnion []) = ETag "ruleref" [("special","VOID")]
f (REUnion xs)
| not (null es) = Tag "item" [("repeat","0-1")] [f (REUnion nes)]
| otherwise = oneOf (map f xs)
where (es,nes) = partition isEpsilon xs
f (REConcat []) = ETag "ruleref" [("special","NULL")]
f (REConcat xs) = Tag "item" [] (map f xs)
f (RERepeat x) = Tag "item" [("repeat","0-")] [f x]
f (RESymbol s) = symItem sisr cn s
symItem :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> XML
symItem sisr cn (NonTerminal n@(c,_)) =
Tag "item" [] $ [ETag "ruleref" [("uri","#" ++ c)]] ++ tag sisr (catSISR cn n)
symItem _ _ (Terminal t) = Tag "item" [] [Data (showToken t)]
tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> [XML]
tag Nothing _ = []
tag (Just fmt) t = case t fmt of
[] -> []
ts -> [Tag "tag" [] [Data (prSISR ts)]]
showToken :: Token -> String
showToken t = t
oneOf :: [XML] -> XML
oneOf = Tag "one-of" []
grammar :: Maybe SISRFormat
-> String -- ^ root
-> Maybe String -- ^language
-> [XML] -> XML
grammar sisr root ml =
Tag "grammar" $ [("xmlns","http://www.w3.org/2001/06/grammar"),
("version","1.0"),
("mode","voice"),
("root",root)]
++ (if isJust sisr then [("tag-format","semantics/1.0")] else [])
++ maybe [] (\l -> [("xml:lang", l)]) ml
meta :: String -> String -> XML
meta n c = ETag "meta" [("name",n),("content",c)]
optimizeSRGS :: XML -> XML
optimizeSRGS = bottomUpXML f
where f (Tag "item" [] [x@(Tag "item" _ _)]) = x
f (Tag "item" [] [x@(Tag "one-of" _ _)]) = x
f (Tag "item" as [Tag "item" [] xs]) = Tag "item" as xs
f (Tag "item" as xs) = Tag "item" as (map g xs)
where g (Tag "item" [] [x@(ETag "ruleref" _)]) = x
g x = x
f (Tag "one-of" [] [x]) = x
f x = x

View File

@@ -0,0 +1,243 @@
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.VoiceXML
--
-- Creates VoiceXML dialogue systems from PGF grammars.
-----------------------------------------------------------------------------
module GF.Speech.VoiceXML (grammar2vxml) where
import GF.Data.Operations
import GF.Data.Str (sstrV)
import GF.Data.Utilities
import GF.Data.XML
import GF.Infra.Ident
import GF.Infra.Modules
import GF.Speech.SRG (getSpeechLanguage)
import PGF.CId
import PGF.Data
import PGF.Macros
import PGF.Linearize (realize)
import Control.Monad (liftM)
import Data.List (isPrefixOf, find, intersperse)
import qualified Data.Map as Map
import Data.Maybe (fromMaybe)
import Debug.Trace
-- | the main function
grammar2vxml :: PGF -> CId -> String
grammar2vxml pgf cnc = showsXMLDoc (skel2vxml name language start skel qs) ""
where skel = pgfSkeleton pgf
name = showCId cnc
qs = catQuestions pgf cnc (map fst skel)
language = getSpeechLanguage pgf cnc
start = lookStartCat pgf
--
-- * VSkeleton: a simple description of the abstract syntax.
--
type Skeleton = [(CId, [(CId, [CId])])]
pgfSkeleton :: PGF -> Skeleton
pgfSkeleton pgf = [(c,[(f,fst (catSkeleton (lookType pgf f))) | f <- fs])
| (c,fs) <- Map.toList (catfuns (abstract pgf)),
not (isLiteralCat c)]
--
-- * Questions to ask
--
type CatQuestions = [(CId,String)]
catQuestions :: PGF -> CId -> [CId] -> CatQuestions
catQuestions pgf cnc cats = [(c,catQuestion pgf cnc c) | c <- cats]
catQuestion :: PGF -> CId -> CId -> String
catQuestion pgf cnc cat = realize (lookPrintName pgf cnc cat)
{-
lin :: StateGrammar -> String -> Err String
lin gr fun = do
tree <- string2treeErr gr fun
let ls = map unt $ linTree2strings noMark g c tree
case ls of
[] -> fail $ "No linearization of " ++ fun
l:_ -> return l
where c = cncId gr
g = stateGrammarST gr
unt = formatAsText
-}
getCatQuestion :: CId -> CatQuestions -> String
getCatQuestion c qs =
fromMaybe (error "No question for category " ++ showCId c) (lookup c qs)
--
-- * Generate VoiceXML
--
skel2vxml :: String -> Maybe String -> CId -> Skeleton -> CatQuestions -> XML
skel2vxml name language start skel qs =
vxml language ([startForm] ++ concatMap (uncurry (catForms gr qs)) skel)
where
gr = grammarURI name
startForm = Tag "form" [] [subdialog "sub" [("src", "#"++catFormId start)]
[param "old" "{ name : '?' }"]]
grammarURI :: String -> String
grammarURI name = name ++ ".grxml"
catForms :: String -> CatQuestions -> CId -> [(CId, [CId])] -> [XML]
catForms gr qs cat fs =
comments [showCId cat ++ " category."]
++ [cat2form gr qs cat fs]
cat2form :: String -> CatQuestions -> CId -> [(CId, [CId])] -> XML
cat2form gr qs cat fs =
form (catFormId cat) $
[var "old" Nothing,
blockCond "old.name != '?'" [assign "term" "old"],
field "term" []
[promptString (getCatQuestion cat qs),
vxmlGrammar (gr++"#"++catFormId cat)
]
]
++ concatMap (uncurry (fun2sub gr cat)) fs
++ [block [return_ ["term"]{-]-}]]
fun2sub :: String -> CId -> CId -> [CId] -> [XML]
fun2sub gr cat fun args =
comments [showCId fun ++ " : ("
++ concat (intersperse ", " (map showCId args))
++ ") " ++ showCId cat] ++ ss
where
ss = zipWith mkSub [0..] args
mkSub n t = subdialog s [("src","#"++catFormId t),
("cond","term.name == "++string (showCId fun))]
[param "old" v,
filled [] [assign v (s++".term")]]
where s = showCId fun ++ "_" ++ show n
v = "term.args["++show n++"]"
catFormId :: CId -> String
catFormId c = showCId c ++ "_cat"
--
-- * VoiceXML stuff
--
vxml :: Maybe String -> [XML] -> XML
vxml ml = Tag "vxml" $ [("version","2.0"),
("xmlns","http://www.w3.org/2001/vxml")]
++ maybe [] (\l -> [("xml:lang", l)]) ml
form :: String -> [XML] -> XML
form id xs = Tag "form" [("id", id)] xs
field :: String -> [(String,String)] -> [XML] -> XML
field name attrs = Tag "field" ([("name",name)]++attrs)
subdialog :: String -> [(String,String)] -> [XML] -> XML
subdialog name attrs = Tag "subdialog" ([("name",name)]++attrs)
filled :: [(String,String)] -> [XML] -> XML
filled = Tag "filled"
vxmlGrammar :: String -> XML
vxmlGrammar uri = ETag "grammar" [("src",uri)]
prompt :: [XML] -> XML
prompt = Tag "prompt" []
promptString :: String -> XML
promptString p = prompt [Data p]
reprompt :: XML
reprompt = ETag "reprompt" []
assign :: String -> String -> XML
assign n e = ETag "assign" [("name",n),("expr",e)]
value :: String -> XML
value expr = ETag "value" [("expr",expr)]
if_ :: String -> [XML] -> XML
if_ c b = if_else c b []
if_else :: String -> [XML] -> [XML] -> XML
if_else c t f = cond [(c,t)] f
cond :: [(String,[XML])] -> [XML] -> XML
cond ((c,b):rest) els = Tag "if" [("cond",c)] (b ++ es)
where es = [Tag "elseif" [("cond",c')] b' | (c',b') <- rest]
++ if null els then [] else (Tag "else" [] []:els)
goto_item :: String -> XML
goto_item nextitem = ETag "goto" [("nextitem",nextitem)]
return_ :: [String] -> XML
return_ names = ETag "return" [("namelist", unwords names)]
block :: [XML] -> XML
block = Tag "block" []
blockCond :: String -> [XML] -> XML
blockCond cond = Tag "block" [("cond", cond)]
throw :: String -> String -> XML
throw event msg = Tag "throw" [("event",event),("message",msg)] []
nomatch :: [XML] -> XML
nomatch = Tag "nomatch" []
help :: [XML] -> XML
help = Tag "help" []
param :: String -> String -> XML
param name expr = ETag "param" [("name",name),("expr",expr)]
var :: String -> Maybe String -> XML
var name expr = ETag "var" ([("name",name)]++e)
where e = maybe [] ((:[]) . (,) "expr") expr
script :: String -> XML
script s = Tag "script" [] [CData s]
scriptURI :: String -> XML
scriptURI uri = Tag "script" [("uri", uri)] []
--
-- * ECMAScript stuff
--
string :: String -> String
string s = "'" ++ concatMap esc s ++ "'"
where esc '\'' = "\\'"
esc c = [c]
{-
--
-- * List stuff
--
isListCat :: (CId, [(CId, [CId])]) -> Bool
isListCat (cat,rules) = "List" `isPrefixOf` showIdent cat && length rules == 2
&& ("Base"++c) `elem` fs && ("Cons"++c) `elem` fs
where c = drop 4 (showIdent cat)
fs = map (showIdent . fst) rules
isBaseFun :: CId -> Bool
isBaseFun f = "Base" `isPrefixOf` showIdent f
isConsFun :: CId -> Bool
isConsFun f = "Cons" `isPrefixOf` showIdent f
baseSize :: (CId, [(CId, [CId])]) -> Int
baseSize (_,rules) = length bs
where Just (_,bs) = find (isBaseFun . fst) rules
-}