1
0
forked from GitHub/gf-core

remove all files that aren't used in GF-3.0

This commit is contained in:
kr.angelov
2008-05-22 11:59:31 +00:00
parent d78e8d5469
commit fc42d8ec3b
286 changed files with 21 additions and 53176 deletions

View File

@@ -1,265 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : CFGToFiniteState
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/10 16:43:44 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.6 $
--
-- Approximates CFGs with finite state networks.
-----------------------------------------------------------------------------
module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular,
MFA(..), MFALabel, cfgToMFA,cfgToFA') where
import Data.List
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import GF.Data.Utilities
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
import GF.Conversion.Types
import GF.Infra.Ident (Ident)
import GF.Infra.Option (Options)
import GF.Compile.ShellState (StateGrammar)
import GF.Speech.FiniteState
import GF.Speech.Graph
import GF.Speech.Relation
import GF.Speech.TransformCFG
data Recursivity = RightR | LeftR | NotR
data MutRecSet = MutRecSet {
mrCats :: Set Cat_,
mrNonRecRules :: [CFRule_],
mrRecRules :: [CFRule_],
mrRec :: Recursivity
}
type MutRecSets = Map Cat_ MutRecSet
--
-- * Multiple DFA type
--
type MFALabel a = Symbol String a
data MFA a = MFA String [(String,DFA (MFALabel a))]
cfgToFA :: Options -> StateGrammar -> DFA Token
cfgToFA opts s = minimize $ compileAutomaton start $ makeSimpleRegular opts s
where start = getStartCatCF opts s
makeSimpleRegular :: Options -> StateGrammar -> CFRules
makeSimpleRegular opts s = makeRegular $ preprocess $ cfgToCFRules s
where start = getStartCatCF opts s
preprocess = topDownFilter start . bottomUpFilter
. removeCycles
--
-- * Compile strongly regular grammars to NFAs
--
-- Convert a strongly regular grammar to a finite automaton.
compileAutomaton :: Cat_ -- ^ Start category
-> CFRules
-> NFA Token
compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa
where
(fa,s,f) = newFA_
ns = mutRecSets g $ mutRecCats False g
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000.
make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State
-> NFA Token -> NFA Token
make_fa c@(g,ns) q0 alpha q1 fa =
case alpha of
[] -> newTransition q0 q1 Nothing fa
[Tok t] -> newTransition q0 q1 (Just t) fa
[Cat a] -> case Map.lookup a ns of
-- a is recursive
Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
case mrRec n of
RightR ->
-- the set Ni is right-recursive or cyclic
let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
let (xs,Cat d) = (init ss,last ss)]
in make_fas new $ newTransition q0 (getState a) Nothing fa'
LeftR ->
-- the set Ni is left-recursive
let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- rs]
in make_fas new $ newTransition (getState a) q1 Nothing fa'
where
(fa',stateMap) = addStatesForCats ni fa
getState x = Map.findWithDefault
(error $ "CFGToFiniteState: No state for " ++ x)
x stateMap
-- a is not recursive
Nothing -> let rs = catRules g a
in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs
(x:beta) -> let (fa',q) = newState () fa
in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa'
where
make_fa_ = make_fa c
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
--
-- * Compile a strongly regular grammar to a DFA with sub-automata
--
cfgToMFA :: Options -> StateGrammar -> MFA Token
cfgToMFA opts s = buildMFA start $ makeSimpleRegular opts s
where start = getStartCatCF opts s
-- | Build a DFA by building and expanding an MFA
cfgToFA' :: Options -> StateGrammar -> DFA Token
cfgToFA' opts s = mfaToDFA $ cfgToMFA opts s
buildMFA :: Cat_ -- ^ Start category
-> CFRules -> MFA Token
buildMFA start g = sortSubLats $ removeUnusedSubLats mfa
where fas = compileAutomata g
mfa = MFA start [(c, minimize fa) | (c,fa) <- fas]
mfaStartDFA :: MFA a -> DFA (MFALabel a)
mfaStartDFA (MFA start subs) =
fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs
mfaToDFA :: Ord a => MFA a -> DFA a
mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa
where
subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs]
getSub l = fromJust $ Map.lookup l subs'
expand (FA (Graph c ns es) s f)
= foldl' expandEdge (FA (Graph c ns []) s f) es
expandEdge fa (f,t,x) =
case x of
Nothing -> newTransition f t Nothing fa
Just (Tok s) -> newTransition f t (Just s) fa
Just (Cat l) -> insertNFA fa (f,t) (expand $ getSub l)
removeUnusedSubLats :: MFA a -> MFA a
removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c]
where
usedMap = subLatUseMap mfa
used = growUsedSet (Set.singleton start)
isUsed c = c `Set.member` used
growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s)
subLatUseMap :: MFA a -> Map String (Set String)
subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs]
usedSubLats :: DFA (MFALabel a) -> Set String
usedSubLats fa = Set.fromList [s | (_,_,Cat s) <- transitions fa]
revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s]
-- | Sort sub-networks topologically.
sortSubLats :: MFA a -> MFA a
sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs)
where
usedByMap = revMultiMap (subLatUseMap mfa)
sortLats _ [] = []
sortLats ub ls = xs ++ sortLats ub' ys
where (xs,ys) = partition ((==0) . indeg) ls
ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub
indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub
-- | Convert a strongly regular grammar to a number of finite automata,
-- one for each non-terminal.
-- The edges in the automata accept tokens, or name another automaton to use.
compileAutomata :: CFRules
-> [(Cat_,NFA (Symbol Cat_ Token))]
-- ^ A map of non-terminals and their automata.
compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
where
mrs = mutRecSets g $ mutRecCats True g
makeOneFA c = make_fa1 mr s [Cat c] f fa
where (fa,s,f) = newFA_
mr = fromJust (Map.lookup c mrs)
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000,
-- adapted to build a finite automaton for a single (mutually recursive) set only.
-- Categories not in the set will result in category-labelled edges.
make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
-- we are building the automaton.
-> State -- ^ State to come from
-> [Symbol Cat_ Token] -- ^ Symbols to accept
-> State -- ^ State to end up in
-> NFA (Symbol Cat_ Token) -- ^ FA to add to.
-> NFA (Symbol Cat_ Token)
make_fa1 mr q0 alpha q1 fa =
case alpha of
[] -> newTransition q0 q1 Nothing fa
[t@(Tok _)] -> newTransition q0 q1 (Just t) fa
[c@(Cat a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
[Cat a] ->
case mrRec mr of
NotR -> -- the set is a non-recursive (always singleton) set of categories
-- so the set of category rules is the set of rules for the whole set
make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
RightR -> -- the set is right-recursive or cyclic
let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr,
let (xs,Cat d) = (init ss,last ss)]
in make_fas new $ newTransition q0 (getState a) Nothing fa'
LeftR -> -- the set is left-recursive
let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- mrRecRules mr]
in make_fas new $ newTransition (getState a) q1 Nothing fa'
where
(fa',stateMap) = addStatesForCats (mrCats mr) fa
getState x = Map.findWithDefault
(error $ "CFGToFiniteState: No state for " ++ x)
x stateMap
(x:beta) -> let (fa',q) = newState () fa
in make_fas [(q0,[x],q),(q,beta,q1)] fa'
where
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs
mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets
mutRecSets g = Map.fromList . concatMap mkMutRecSet
where
mkMutRecSet cs = [ (c,ms) | c <- csl ]
where csl = Set.toList cs
rs = catSetRules g cs
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
ms = MutRecSet {
mrCats = cs,
mrNonRecRules = nrs,
mrRecRules = rrs,
mrRec = rec
}
rec | null rrs = NotR
| all (isRightLinear cs) rrs = RightR
| otherwise = LeftR
--
-- * Utilities
--
-- | Add a state for the given NFA for each of the categories
-- in the given set. Returns a map of categories to their
-- corresponding states.
addStatesForCats :: Set Cat_ -> NFA t -> (NFA t, Map Cat_ State)
addStatesForCats cs fa = (fa', m)
where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
m = Map.fromList (zip (Set.toList cs) (map fst ns))

View File

@@ -1,329 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : FiniteState
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/10 16:43:44 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.16 $
--
-- A simple finite state network module.
-----------------------------------------------------------------------------
module GF.Speech.FiniteState (FA(..), State, NFA, DFA,
startState, finalStates,
states, transitions,
isInternal,
newFA, newFA_,
addFinalState,
newState, newStates,
newTransition, newTransitions,
insertTransitionWith, insertTransitionsWith,
mapStates, mapTransitions,
modifyTransitions,
nonLoopTransitionsTo, nonLoopTransitionsFrom,
loops,
removeState,
oneFinalState,
insertNFA,
onGraph,
moveLabelsToNodes, removeTrivialEmptyNodes,
minimize,
dfa2nfa,
unusedNames, renameStates,
prFAGraphviz, faToGraphviz) where
import Data.List
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import GF.Data.Utilities
import GF.Speech.Graph
import qualified GF.Visualization.Graphviz as Dot
type State = Int
-- | Type parameters: node id type, state label type, edge label type
-- Data constructor arguments: nodes and edges, start state, final states
data FA n a b = FA !(Graph n a b) !n ![n]
type NFA a = FA State () (Maybe a)
type DFA a = FA State () a
startState :: FA n a b -> n
startState (FA _ s _) = s
finalStates :: FA n a b -> [n]
finalStates (FA _ _ ss) = ss
states :: FA n a b -> [(n,a)]
states (FA g _ _) = nodes g
transitions :: FA n a b -> [(n,n,b)]
transitions (FA g _ _) = edges g
newFA :: Enum n => a -- ^ Start node label
-> FA n a b
newFA l = FA g s []
where (g,s) = newNode l (newGraph [toEnum 0..])
-- | Create a new finite automaton with an initial and a final state.
newFA_ :: Enum n => (FA n () b, n, n)
newFA_ = (fa'', s, f)
where fa = newFA ()
s = startState fa
(fa',f) = newState () fa
fa'' = addFinalState f fa'
addFinalState :: n -> FA n a b -> FA n a b
addFinalState f (FA g s ss) = FA g s (f:ss)
newState :: a -> FA n a b -> (FA n a b, n)
newState x (FA g s ss) = (FA g' s ss, n)
where (g',n) = newNode x g
newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)])
newStates xs (FA g s ss) = (FA g' s ss, ns)
where (g',ns) = newNodes xs g
newTransition :: n -> n -> b -> FA n a b -> FA n a b
newTransition f t l = onGraph (newEdge (f,t,l))
newTransitions :: [(n, n, b)] -> FA n a b -> FA n a b
newTransitions es = onGraph (newEdges es)
insertTransitionWith :: Eq n =>
(b -> b -> b) -> (n, n, b) -> FA n a b -> FA n a b
insertTransitionWith f t = onGraph (insertEdgeWith f t)
insertTransitionsWith :: Eq n =>
(b -> b -> b) -> [(n, n, b)] -> FA n a b -> FA n a b
insertTransitionsWith f ts fa =
foldl' (flip (insertTransitionWith f)) fa ts
mapStates :: (a -> c) -> FA n a b -> FA n c b
mapStates f = onGraph (nmap f)
mapTransitions :: (b -> c) -> FA n a b -> FA n a c
mapTransitions f = onGraph (emap f)
modifyTransitions :: ([(n,n,b)] -> [(n,n,b)]) -> FA n a b -> FA n a b
modifyTransitions f = onGraph (\ (Graph r ns es) -> Graph r ns (f es))
removeState :: Ord n => n -> FA n a b -> FA n a b
removeState n = onGraph (removeNode n)
minimize :: Ord a => NFA a -> DFA a
minimize = determinize . reverseNFA . dfa2nfa . determinize . reverseNFA
unusedNames :: FA n a b -> [n]
unusedNames (FA (Graph names _ _) _ _) = names
-- | Gets all incoming transitions to a given state, excluding
-- transtions from the state itself.
nonLoopTransitionsTo :: Eq n => n -> FA n a b -> [(n,b)]
nonLoopTransitionsTo s fa =
[(f,l) | (f,t,l) <- transitions fa, t == s && f /= s]
nonLoopTransitionsFrom :: Eq n => n -> FA n a b -> [(n,b)]
nonLoopTransitionsFrom s fa =
[(t,l) | (f,t,l) <- transitions fa, f == s && t /= s]
loops :: Eq n => n -> FA n a b -> [b]
loops s fa = [l | (f,t,l) <- transitions fa, f == s && t == s]
-- | Give new names to all nodes.
renameStates :: Ord x => [y] -- ^ Infinite supply of new names
-> FA x a b
-> FA y a b
renameStates supply (FA g s fs) = FA (renameNodes newName rest g) s' fs'
where (ns,rest) = splitAt (length (nodes g)) supply
newNodes = Map.fromList (zip (map fst (nodes g)) ns)
newName n = Map.findWithDefault (error "FiniteState.newName") n newNodes
s' = newName s
fs' = map newName fs
-- | Insert an NFA into another
insertNFA :: NFA a -- ^ NFA to insert into
-> (State, State) -- ^ States to insert between
-> NFA a -- ^ NFA to insert.
-> NFA a
insertNFA (FA g1 s1 fs1) (f,t) (FA g2 s2 fs2)
= FA (newEdges es g') s1 fs1
where
es = (f,ren s2,Nothing):[(ren f2,t,Nothing) | f2 <- fs2]
(g',ren) = mergeGraphs g1 g2
onGraph :: (Graph n a b -> Graph n c d) -> FA n a b -> FA n c d
onGraph f (FA g s ss) = FA (f g) s ss
-- | Make the finite automaton have a single final state
-- by adding a new final state and adding an edge
-- from the old final states to the new state.
oneFinalState :: a -- ^ Label to give the new node
-> b -- ^ Label to give the new edges
-> FA n a b -- ^ The old network
-> FA n a b -- ^ The new network
oneFinalState nl el fa =
let (FA g s fs,nf) = newState nl fa
es = [ (f,nf,el) | f <- fs ]
in FA (newEdges es g) s [nf]
-- | Transform a standard finite automaton with labelled edges
-- to one where the labels are on the nodes instead. This can add
-- up to one extra node per edge.
moveLabelsToNodes :: (Ord n,Eq a) => FA n () (Maybe a) -> FA n (Maybe a) ()
moveLabelsToNodes = onGraph f
where f g@(Graph c _ _) = Graph c' ns (concat ess)
where is = [ ((n,l),inc) | (n, (l,inc,_)) <- Map.toList (nodeInfo g)]
(c',is') = mapAccumL fixIncoming c is
(ns,ess) = unzip (concat is')
-- | Remove empty nodes which are not start or final, and have
-- exactly one outgoing edge or exactly one incoming edge.
removeTrivialEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
removeTrivialEmptyNodes = pruneUnusable . skipSimpleEmptyNodes
-- | Move edges to empty nodes to point to the next node(s).
-- This is not done if the pointed-to node is a final node.
skipSimpleEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
skipSimpleEmptyNodes fa = onGraph og fa
where
og g@(Graph c ns es) = if es' == es then g else og (Graph c ns es')
where
es' = concatMap changeEdge es
info = nodeInfo g
changeEdge e@(f,t,())
| isNothing (getNodeLabel info t)
-- && (i * o <= i + o)
&& not (isFinal fa t)
= [ (f,t',()) | (_,t',()) <- getOutgoing info t]
| otherwise = [e]
-- where i = inDegree info t
-- o = outDegree info t
isInternal :: Eq n => FA n a b -> n -> Bool
isInternal (FA _ start final) n = n /= start && n `notElem` final
isFinal :: Eq n => FA n a b -> n -> Bool
isFinal (FA _ _ final) n = n `elem` final
-- | Remove all internal nodes with no incoming edges
-- or no outgoing edges.
pruneUnusable :: Ord n => FA n (Maybe a) () -> FA n (Maybe a) ()
pruneUnusable fa = onGraph f fa
where
f g = if Set.null rns then g else f (removeNodes rns g)
where info = nodeInfo g
rns = Set.fromList [ n | (n,_) <- nodes g,
isInternal fa n,
inDegree info n == 0
|| outDegree info n == 0]
fixIncoming :: (Ord n, Eq a) => [n]
-> (Node n (),[Edge n (Maybe a)]) -- ^ A node and its incoming edges
-> ([n],[(Node n (Maybe a),[Edge n ()])]) -- ^ Replacement nodes with their
-- incoming edges.
fixIncoming cs c@((n,()),es) = (cs'', ((n,Nothing),es'):newContexts)
where ls = nub $ map edgeLabel es
(cs',cs'') = splitAt (length ls) cs
newNodes = zip cs' ls
es' = [ (x,n,()) | x <- map fst newNodes ]
-- separate cyclic and non-cyclic edges
(cyc,ncyc) = partition (\ (f,_,_) -> f == n) es
-- keep all incoming non-cyclic edges with the right label
to (x,l) = [ (f,x,()) | (f,_,l') <- ncyc, l == l']
-- for each cyclic edge with the right label,
-- add an edge from each of the new nodes (including this one)
++ [ (y,x,()) | (f,_,l') <- cyc, l == l', (y,_) <- newNodes]
newContexts = [ (v, to v) | v <- newNodes ]
alphabet :: Eq b => Graph n a (Maybe b) -> [b]
alphabet = nub . catMaybes . map edgeLabel . edges
determinize :: Ord a => NFA a -> DFA a
determinize (FA g s f) = let (ns,es) = h (Set.singleton start) Set.empty Set.empty
(ns',es') = (Set.toList ns, Set.toList es)
final = filter isDFAFinal ns'
fa = FA (Graph undefined [(n,()) | n <- ns'] es') start final
in renameStates [0..] fa
where info = nodeInfo g
-- reach = nodesReachable out
start = closure info $ Set.singleton s
isDFAFinal n = not (Set.null (Set.fromList f `Set.intersection` n))
h currentStates oldStates es
| Set.null currentStates = (oldStates,es)
| otherwise = ((h $! uniqueNewStates) $! allOldStates) $! es'
where
allOldStates = oldStates `Set.union` currentStates
(newStates,es') = new (Set.toList currentStates) Set.empty es
uniqueNewStates = newStates Set.\\ allOldStates
-- Get the sets of states reachable from the given states
-- by consuming one symbol, and the associated edges.
new [] rs es = (rs,es)
new (n:ns) rs es = new ns rs' es'
where cs = reachable info n --reachable reach n
rs' = rs `Set.union` Set.fromList (map snd cs)
es' = es `Set.union` Set.fromList [(n,s,c) | (c,s) <- cs]
-- | Get all the nodes reachable from a list of nodes by only empty edges.
closure :: Ord n => NodeInfo n a (Maybe b) -> Set n -> Set n
closure info x = closure_ x x
where closure_ acc check | Set.null check = acc
| otherwise = closure_ acc' check'
where
reach = Set.fromList [y | x <- Set.toList check,
(_,y,Nothing) <- getOutgoing info x]
acc' = acc `Set.union` reach
check' = reach Set.\\ acc
-- | Get a map of labels to sets of all nodes reachable
-- from a the set of nodes by one edge with the given
-- label and then any number of empty edges.
reachable :: (Ord n,Ord b) => NodeInfo n a (Maybe b) -> Set n -> [(b,Set n)]
reachable info ns = Map.toList $ Map.map (closure info . Set.fromList) $ reachable1 info ns
reachable1 info ns = Map.fromListWith (++) [(c, [y]) | n <- Set.toList ns, (_,y,Just c) <- getOutgoing info n]
reverseNFA :: NFA a -> NFA a
reverseNFA (FA g s fs) = FA g''' s' [s]
where g' = reverseGraph g
(g'',s') = newNode () g'
g''' = newEdges [(s',f,Nothing) | f <- fs] g''
dfa2nfa :: DFA a -> NFA a
dfa2nfa = mapTransitions Just
--
-- * Visualization
--
prFAGraphviz :: (Eq n,Show n) => FA n String String -> String
prFAGraphviz = Dot.prGraphviz . faToGraphviz
prFAGraphviz_ :: (Eq n,Show n,Show a, Show b) => FA n a b -> String
prFAGraphviz_ = Dot.prGraphviz . faToGraphviz . mapStates show . mapTransitions show
faToGraphviz :: (Eq n,Show n) => FA n String String -> Dot.Graph
faToGraphviz (FA (Graph _ ns es) s f)
= Dot.Graph Dot.Directed Nothing [] (map mkNode ns) (map mkEdge es) []
where mkNode (n,l) = Dot.Node (show n) attrs
where attrs = [("label",l)]
++ if n == s then [("shape","box")] else []
++ if n `elem` f then [("style","bold")] else []
mkEdge (x,y,l) = Dot.Edge (show x) (show y) [("label",l)]
--
-- * Utilities
--
lookups :: Ord k => [k] -> Map k a -> [a]
lookups xs m = mapMaybe (flip Map.lookup m) xs

View File

@@ -1,285 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : GrammarToVoiceXML
-- Maintainer : Bjorn Bringert
-- Stability : (stable)
-- Portability : (portable)
--
-- Create VoiceXML dialogue system from a GF grammar.
-----------------------------------------------------------------------------
module GF.Speech.GrammarToVoiceXML (grammar2vxml) where
import GF.Canon.CanonToGFCC (canon2gfcc)
import qualified GF.GFCC.CId as C
import GF.GFCC.DataGFCC (GFCC(..), Abstr(..))
import GF.GFCC.Macros
import qualified GF.Canon.GFC as GFC
import GF.Canon.AbsGFC (Term)
import GF.Canon.PrintGFC (printTree)
import GF.Canon.CMacros (noMark, strsFromTerm)
import GF.Canon.Unlex (formatAsText)
import GF.Data.Utilities
import GF.CF.CFIdent (cfCat2Ident)
import GF.Compile.ShellState (StateGrammar,stateGrammarST,cncId,grammar,
startCatStateOpts,stateOptions)
import GF.Data.Str (sstrV)
import GF.Grammar.Macros hiding (assign,strsFromTerm)
import GF.Grammar.Grammar (Fun)
import GF.Grammar.Values (Tree)
import GF.Infra.Option (Options, addOptions, getOptVal, speechLanguage)
import GF.UseGrammar.GetTree (string2treeErr)
import GF.UseGrammar.Linear (linTree2strings)
import GF.Infra.Ident
import GF.Infra.Option (noOptions)
import GF.Infra.Modules
import GF.Data.Operations
import GF.Data.XML
import Control.Monad (liftM)
import Data.List (isPrefixOf, find, intersperse)
import qualified Data.Map as Map
import Data.Maybe (fromMaybe)
import Debug.Trace
-- | the main function
grammar2vxml :: Options -> StateGrammar -> String
grammar2vxml opt s = showsXMLDoc (skel2vxml name language startcat gr' qs) ""
where (_, gr') = vSkeleton (stateGrammarST s)
name = prIdent (cncId s)
qs = catQuestions s (map fst gr')
opts = addOptions opt (stateOptions s)
language = fmap (replace '_' '-') $ getOptVal opts speechLanguage
startcat = C.CId $ prIdent $ cfCat2Ident $ startCatStateOpts opts s
--
-- * VSkeleton: a simple description of the abstract syntax.
--
type VSkeleton = [(VIdent, [(VIdent, [VIdent])])]
type VIdent = C.CId
prid :: VIdent -> String
prid (C.CId x) = x
vSkeleton :: GFC.CanonGrammar -> (VIdent,VSkeleton)
vSkeleton = gfccSkeleton . canon2gfcc noOptions
gfccSkeleton :: GFCC -> (VIdent,VSkeleton)
gfccSkeleton gfcc = (absname gfcc, ts)
where a = abstract gfcc
ts = [(c,[(f,ft f) | f <- fs]) | (c,fs) <- Map.toList (catfuns a)]
ft f = case lookMap (error $ prid f) f (funs a) of
(ty,_) -> fst $ GF.GFCC.Macros.catSkeleton ty
--
-- * Questions to ask
--
type CatQuestions = [(VIdent,String)]
catQuestions :: StateGrammar -> [VIdent] -> CatQuestions
catQuestions gr cats = [(c,catQuestion gr c) | c <- cats]
catQuestion :: StateGrammar -> VIdent -> String
catQuestion gr cat = err errHandler id (getPrintname gr cat >>= term2string)
where -- FIXME: use some better warning facility
errHandler e = trace ("GrammarToVoiceXML: " ++ e) ("quest_"++prid cat)
term2string = liftM sstrV . strsFromTerm
getPrintname :: StateGrammar -> VIdent -> Err Term
getPrintname gr cat =
do m <- lookupModMod (grammar gr) (cncId gr)
i <- lookupInfo m (IC (prid cat))
case i of
GFC.CncCat _ _ p -> return p
_ -> fail $ "getPrintname " ++ prid cat
++ ": Expected CncCat, got " ++ show i
{-
lin :: StateGrammar -> String -> Err String
lin gr fun = do
tree <- string2treeErr gr fun
let ls = map unt $ linTree2strings noMark g c tree
case ls of
[] -> fail $ "No linearization of " ++ fun
l:_ -> return l
where c = cncId gr
g = stateGrammarST gr
unt = formatAsText
-}
getCatQuestion :: VIdent -> CatQuestions -> String
getCatQuestion c qs =
fromMaybe (error "No question for category " ++ prid c) (lookup c qs)
--
-- * Generate VoiceXML
--
skel2vxml :: String -> Maybe String -> VIdent -> VSkeleton -> CatQuestions -> XML
skel2vxml name language start skel qs =
vxml language ([startForm] ++ concatMap (uncurry (catForms gr qs)) skel)
where
gr = grammarURI name
startForm = Tag "form" [] [subdialog "sub" [("src", "#"++catFormId start)]
[param "old" "{ name : '?' }"]]
grammarURI :: String -> String
grammarURI name = name ++ ".grxml"
catForms :: String -> CatQuestions -> VIdent -> [(VIdent, [VIdent])] -> [XML]
catForms gr qs cat fs =
comments [prid cat ++ " category."]
++ [cat2form gr qs cat fs]
cat2form :: String -> CatQuestions -> VIdent -> [(VIdent, [VIdent])] -> XML
cat2form gr qs cat fs =
form (catFormId cat) $
[var "old" Nothing,
blockCond "old.name != '?'" [assign "term" "old"],
field "term" []
[promptString (getCatQuestion cat qs),
vxmlGrammar (gr++"#"++catFormId cat)
]
]
++ concatMap (uncurry (fun2sub gr cat)) fs
++ [block [return_ ["term"]{-]-}]]
fun2sub :: String -> VIdent -> VIdent -> [VIdent] -> [XML]
fun2sub gr cat fun args =
comments [prid fun ++ " : ("
++ concat (intersperse ", " (map prid args))
++ ") " ++ prid cat] ++ ss
where
ss = zipWith mkSub [0..] args
mkSub n t = subdialog s [("src","#"++catFormId t),
("cond","term.name == "++string (prid fun))]
[param "old" v,
filled [] [assign v (s++".term")]]
where s = prid fun ++ "_" ++ show n
v = "term.args["++show n++"]"
catFormId :: VIdent -> String
catFormId c = prid c ++ "_cat"
--
-- * VoiceXML stuff
--
vxml :: Maybe String -> [XML] -> XML
vxml ml = Tag "vxml" $ [("version","2.0"),
("xmlns","http://www.w3.org/2001/vxml")]
++ maybe [] (\l -> [("xml:lang", l)]) ml
form :: String -> [XML] -> XML
form id xs = Tag "form" [("id", id)] xs
field :: String -> [(String,String)] -> [XML] -> XML
field name attrs = Tag "field" ([("name",name)]++attrs)
subdialog :: String -> [(String,String)] -> [XML] -> XML
subdialog name attrs = Tag "subdialog" ([("name",name)]++attrs)
filled :: [(String,String)] -> [XML] -> XML
filled = Tag "filled"
vxmlGrammar :: String -> XML
vxmlGrammar uri = ETag "grammar" [("src",uri)]
prompt :: [XML] -> XML
prompt = Tag "prompt" []
promptString :: String -> XML
promptString p = prompt [Data p]
reprompt :: XML
reprompt = ETag "reprompt" []
assign :: String -> String -> XML
assign n e = ETag "assign" [("name",n),("expr",e)]
value :: String -> XML
value expr = ETag "value" [("expr",expr)]
if_ :: String -> [XML] -> XML
if_ c b = if_else c b []
if_else :: String -> [XML] -> [XML] -> XML
if_else c t f = cond [(c,t)] f
cond :: [(String,[XML])] -> [XML] -> XML
cond ((c,b):rest) els = Tag "if" [("cond",c)] (b ++ es)
where es = [Tag "elseif" [("cond",c')] b' | (c',b') <- rest]
++ if null els then [] else (Tag "else" [] []:els)
goto_item :: String -> XML
goto_item nextitem = ETag "goto" [("nextitem",nextitem)]
return_ :: [String] -> XML
return_ names = ETag "return" [("namelist", unwords names)]
block :: [XML] -> XML
block = Tag "block" []
blockCond :: String -> [XML] -> XML
blockCond cond = Tag "block" [("cond", cond)]
throw :: String -> String -> XML
throw event msg = Tag "throw" [("event",event),("message",msg)] []
nomatch :: [XML] -> XML
nomatch = Tag "nomatch" []
help :: [XML] -> XML
help = Tag "help" []
param :: String -> String -> XML
param name expr = ETag "param" [("name",name),("expr",expr)]
var :: String -> Maybe String -> XML
var name expr = ETag "var" ([("name",name)]++e)
where e = maybe [] ((:[]) . (,) "expr") expr
script :: String -> XML
script s = Tag "script" [] [CData s]
scriptURI :: String -> XML
scriptURI uri = Tag "script" [("uri", uri)] []
--
-- * ECMAScript stuff
--
string :: String -> String
string s = "'" ++ concatMap esc s ++ "'"
where esc '\'' = "\\'"
esc c = [c]
{-
--
-- * List stuff
--
isListCat :: (VIdent, [(VIdent, [VIdent])]) -> Bool
isListCat (cat,rules) = "List" `isPrefixOf` prIdent cat && length rules == 2
&& ("Base"++c) `elem` fs && ("Cons"++c) `elem` fs
where c = drop 4 (prIdent cat)
fs = map (prIdent . fst) rules
isBaseFun :: VIdent -> Bool
isBaseFun f = "Base" `isPrefixOf` prIdent f
isConsFun :: VIdent -> Bool
isConsFun f = "Cons" `isPrefixOf` prIdent f
baseSize :: (VIdent, [(VIdent, [VIdent])]) -> Int
baseSize (_,rules) = length bs
where Just (_,bs) = find (isBaseFun . fst) rules
-}

View File

@@ -1,178 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : Graph
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/10 16:43:44 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.2 $
--
-- A simple graph module.
-----------------------------------------------------------------------------
module GF.Speech.Graph ( Graph(..), Node, Edge, NodeInfo
, newGraph, nodes, edges
, nmap, emap, newNode, newNodes, newEdge, newEdges
, insertEdgeWith
, removeNode, removeNodes
, nodeInfo
, getIncoming, getOutgoing, getNodeLabel
, inDegree, outDegree
, nodeLabel
, edgeFrom, edgeTo, edgeLabel
, reverseGraph, mergeGraphs, renameNodes
) where
import GF.Data.Utilities
import Data.List
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
data Graph n a b = Graph [n] ![Node n a] ![Edge n b]
deriving (Eq,Show)
type Node n a = (n,a)
type Edge n b = (n,n,b)
type NodeInfo n a b = Map n (a, [Edge n b], [Edge n b])
-- | Create a new empty graph.
newGraph :: [n] -> Graph n a b
newGraph ns = Graph ns [] []
-- | Get all the nodes in the graph.
nodes :: Graph n a b -> [Node n a]
nodes (Graph _ ns _) = ns
-- | Get all the edges in the graph.
edges :: Graph n a b -> [Edge n b]
edges (Graph _ _ es) = es
-- | Map a function over the node labels.
nmap :: (a -> c) -> Graph n a b -> Graph n c b
nmap f (Graph c ns es) = Graph c [(n,f l) | (n,l) <- ns] es
-- | Map a function over the edge labels.
emap :: (b -> c) -> Graph n a b -> Graph n a c
emap f (Graph c ns es) = Graph c ns [(x,y,f l) | (x,y,l) <- es]
-- | Add a node to the graph.
newNode :: a -- ^ Node label
-> Graph n a b
-> (Graph n a b,n) -- ^ Node graph and name of new node
newNode l (Graph (c:cs) ns es) = (Graph cs ((c,l):ns) es, c)
newNodes :: [a] -> Graph n a b -> (Graph n a b,[Node n a])
newNodes ls g = (g', zip ns ls)
where (g',ns) = mapAccumL (flip newNode) g ls
-- lazy version:
--newNodes ls (Graph cs ns es) = (Graph cs' (ns'++ns) es, ns')
-- where (xs,cs') = splitAt (length ls) cs
-- ns' = zip xs ls
newEdge :: Edge n b -> Graph n a b -> Graph n a b
newEdge e (Graph c ns es) = Graph c ns (e:es)
newEdges :: [Edge n b] -> Graph n a b -> Graph n a b
newEdges es g = foldl' (flip newEdge) g es
-- lazy version:
-- newEdges es' (Graph c ns es) = Graph c ns (es'++es)
insertEdgeWith :: Eq n =>
(b -> b -> b) -> Edge n b -> Graph n a b -> Graph n a b
insertEdgeWith f e@(x,y,l) (Graph c ns es) = Graph c ns (h es)
where h [] = [e]
h (e'@(x',y',l'):es') | x' == x && y' == y = (x',y', f l l'):es'
| otherwise = e':h es'
-- | Remove a node and all edges to and from that node.
removeNode :: Ord n => n -> Graph n a b -> Graph n a b
removeNode n = removeNodes (Set.singleton n)
-- | Remove a set of nodes and all edges to and from those nodes.
removeNodes :: Ord n => Set n -> Graph n a b -> Graph n a b
removeNodes xs (Graph c ns es) = Graph c ns' es'
where
keepNode n = not (Set.member n xs)
ns' = [ x | x@(n,_) <- ns, keepNode n ]
es' = [ e | e@(f,t,_) <- es, keepNode f && keepNode t ]
-- | Get a map of node names to info about each node.
nodeInfo :: Ord n => Graph n a b -> NodeInfo n a b
nodeInfo g = Map.fromList [ (n, (x, fn inc n, fn out n)) | (n,x) <- nodes g ]
where
inc = groupEdgesBy edgeTo g
out = groupEdgesBy edgeFrom g
fn m n = fromMaybe [] (Map.lookup n m)
groupEdgesBy :: (Ord n) => (Edge n b -> n) -- ^ Gets the node to group by
-> Graph n a b -> Map n [Edge n b]
groupEdgesBy f g = Map.fromListWith (++) [(f e, [e]) | e <- edges g]
lookupNode :: Ord n => NodeInfo n a b -> n -> (a, [Edge n b], [Edge n b])
lookupNode i n = fromJust $ Map.lookup n i
getIncoming :: Ord n => NodeInfo n a b -> n -> [Edge n b]
getIncoming i n = let (_,inc,_) = lookupNode i n in inc
getOutgoing :: Ord n => NodeInfo n a b -> n -> [Edge n b]
getOutgoing i n = let (_,_,out) = lookupNode i n in out
inDegree :: Ord n => NodeInfo n a b -> n -> Int
inDegree i n = length $ getIncoming i n
outDegree :: Ord n => NodeInfo n a b -> n -> Int
outDegree i n = length $ getOutgoing i n
getNodeLabel :: Ord n => NodeInfo n a b -> n -> a
getNodeLabel i n = let (l,_,_) = lookupNode i n in l
nodeLabel :: Node n a -> a
nodeLabel = snd
edgeFrom :: Edge n b -> n
edgeFrom (f,_,_) = f
edgeTo :: Edge n b -> n
edgeTo (_,t,_) = t
edgeLabel :: Edge n b -> b
edgeLabel (_,_,l) = l
reverseGraph :: Graph n a b -> Graph n a b
reverseGraph (Graph c ns es) = Graph c ns [ (t,f,l) | (f,t,l) <- es ]
-- | Add the nodes from the second graph to the first graph.
-- The nodes in the second graph will be renamed using the name
-- supply in the first graph.
-- This function is more efficient when the second graph
-- is smaller than the first.
mergeGraphs :: Ord m => Graph n a b -> Graph m a b
-> (Graph n a b, m -> n) -- ^ The new graph and a function translating
-- the old names of nodes in the second graph
-- to names in the new graph.
mergeGraphs (Graph c ns1 es1) g2 = (Graph c' (ns2++ns1) (es2++es1), newName)
where
(xs,c') = splitAt (length (nodes g2)) c
newNames = Map.fromList (zip (map fst (nodes g2)) xs)
newName n = fromJust $ Map.lookup n newNames
Graph _ ns2 es2 = renameNodes newName undefined g2
-- | Rename the nodes in the graph.
renameNodes :: (n -> m) -- ^ renaming function
-> [m] -- ^ infinite supply of fresh node names, to
-- use when adding nodes in the future.
-> Graph n a b -> Graph m a b
renameNodes newName c (Graph _ ns es) = Graph c ns' es'
where ns' = map' (\ (n,x) -> (newName n,x)) ns
es' = map' (\ (f,t,l) -> (newName f, newName t, l)) es
-- | A strict 'map'
map' :: (a -> b) -> [a] -> [b]
map' _ [] = []
map' f (x:xs) = ((:) $! f x) $! map' f xs

View File

@@ -1,56 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : PrSLF
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/10 16:43:44 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.2 $
--
-- This module prints finite automata and regular grammars
-- for a context-free grammar.
--
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
-- categories in the grammar
-----------------------------------------------------------------------------
module GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter) where
import GF.Data.Utilities
import GF.Conversion.Types
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..),symbol)
import GF.Infra.Ident
import GF.Infra.Option (Options)
import GF.Infra.Print
import GF.Speech.CFGToFiniteState
import GF.Speech.FiniteState
import GF.Speech.TransformCFG
import GF.Compile.ShellState (StateGrammar)
import Data.Char (toUpper,toLower)
import Data.List
import Data.Maybe (fromMaybe)
faGraphvizPrinter :: Options -> StateGrammar -> String
faGraphvizPrinter opts s =
prFAGraphviz $ mapStates (const "") $ cfgToFA opts s
-- | Convert the grammar to a regular grammar and print it in BNF
regularPrinter :: Options -> StateGrammar -> String
regularPrinter opts s = prCFRules $ makeSimpleRegular opts s
where
prCFRules :: CFRules -> String
prCFRules g = unlines [ c ++ " ::= " ++ join " | " (map (showRhs . ruleRhs) rs) | (c,rs) <- allRulesGrouped g]
join g = concat . intersperse g
showRhs = unwords . map (symbol id show)
faCPrinter :: Options -> StateGrammar -> String
faCPrinter opts s = fa2c $ cfgToFA opts s
fa2c :: DFA String -> String
fa2c fa = undefined

View File

@@ -1,113 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : PrGSL
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.22 $
--
-- This module prints a CFG as a Nuance GSL 2.0 grammar.
--
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
-- categories in the grammar
-----------------------------------------------------------------------------
module GF.Speech.PrGSL (gslPrinter) where
import GF.Data.Utilities
import GF.Speech.SRG
import GF.Speech.RegExp
import GF.Infra.Ident
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..))
import GF.Conversion.Types
import GF.Infra.Print
import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs)
import GF.Compile.ShellState (StateGrammar)
import Data.Char (toUpper,toLower)
import Data.List (partition)
import Text.PrettyPrint.HughesPJ
width :: Int
width = 75
gslPrinter :: Options -> StateGrammar -> String
gslPrinter opts s = renderStyle st $ prGSL $ makeSimpleSRG opts s
where st = style { lineLength = width }
prGSL :: SRG -> Doc
prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
= header $++$ mainCat $++$ foldr ($++$) empty (map prRule rs)
where
header = text ";GSL2.0" $$
comment ("Nuance speech recognition grammar for " ++ name) $$
comment ("Generated by GF")
mainCat = comment ("Start category: " ++ origStart) $$
text ".MAIN" <+> prCat start
prRule (SRGRule cat origCat rhs) =
comment (prt origCat) $$
prCat cat <+> union (map prAlt rhs)
-- FIXME: use the probability
prAlt (SRGAlt mp _ rhs) = prItem rhs
prItem :: SRGItem -> Doc
prItem = f
where
f (REUnion xs) = (if null es then empty else text "?") <> union (map f nes)
where (es,nes) = partition isEpsilon xs
f (REConcat [x]) = f x
f (REConcat xs) = text "(" <> fsep (map f xs) <> text ")"
f (RERepeat x) = text "*" <> f x
f (RESymbol s) = prSymbol s
union :: [Doc] -> Doc
union [x] = x
union xs = text "[" <> fsep xs <> text "]"
prSymbol :: Symbol SRGNT Token -> Doc
prSymbol (Cat (c,_)) = prCat c
prSymbol (Tok t) = doubleQuotes (showToken t)
-- GSL requires an upper case letter in category names
prCat :: SRGCat -> Doc
prCat c = text (firstToUpper c)
firstToUpper :: String -> String
firstToUpper [] = []
firstToUpper (x:xs) = toUpper x : xs
{-
rmPunctCFG :: CGrammar -> CGrammar
rmPunctCFG g = [CFRule c (filter keepSymbol ss) n | CFRule c ss n <- g]
keepSymbol :: Symbol c Token -> Bool
keepSymbol (Tok t) = not (all isPunct (prt t))
keepSymbol _ = True
-}
-- Nuance does not like upper case characters in tokens
showToken :: Token -> Doc
showToken t = text (map toLower (prt t))
isPunct :: Char -> Bool
isPunct c = c `elem` "-_.:;.,?!()[]{}"
comment :: String -> Doc
comment s = text ";" <+> text s
-- Pretty-printing utilities
emptyLine :: Doc
emptyLine = text ""
($++$) :: Doc -> Doc -> Doc
x $++$ y = x $$ emptyLine $$ y

View File

@@ -1,145 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : PrJSGF
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.16 $
--
-- This module prints a CFG as a JSGF grammar.
--
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
-- categories in the grammar
--
-- FIXME: convert to UTF-8
-----------------------------------------------------------------------------
module GF.Speech.PrJSGF (jsgfPrinter) where
import GF.Conversion.Types
import GF.Data.Utilities
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), filterCats)
import GF.Infra.Ident
import GF.Infra.Print
import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs)
import GF.Speech.SISR
import GF.Speech.SRG
import GF.Speech.RegExp
import GF.Compile.ShellState (StateGrammar)
import Data.Char
import Data.List
import Data.Maybe
import Text.PrettyPrint.HughesPJ
import Debug.Trace
width :: Int
width = 75
jsgfPrinter :: Maybe SISRFormat
-> Options
-> StateGrammar -> String
jsgfPrinter sisr opts s = renderStyle st $ prJSGF sisr $ makeSimpleSRG opts s
where st = style { lineLength = width }
prJSGF :: Maybe SISRFormat -> SRG -> Doc
prJSGF sisr srg@(SRG{grammarName=name,grammarLanguage=ml,
startCat=start,origStartCat=origStart,rules=rs})
= header $++$ mainCat $++$ vcat topCatRules $++$ foldr ($++$) empty (map prRule rs)
where
header = text "#JSGF" <+> text "V1.0" <+> text "UTF-8" <+> lang <> char ';' $$
comment ("JSGF speech recognition grammar for " ++ name) $$
comment "Generated by GF" $$
text ("grammar " ++ name ++ ";")
lang = maybe empty text ml
mainCat = comment ("Start category: " ++ origStart) $$
case cfgCatToGFCat origStart of
Just c -> rule True "MAIN" [prCat (catFormId c)]
Nothing -> empty
prRule (SRGRule cat origCat rhs) =
comment origCat $$
rule False cat (map prAlt rhs)
-- rule False cat (map prAlt rhs)
-- FIXME: use the probability
prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag]
-- prAlt (SRGAlt mp n rhs) = initTag <+> prItem sisr n rhs <+> finalTag
where initTag | isEmpty t = empty
| otherwise = text "<NULL>" <+> t
where t = tag sisr (profileInitSISR n)
finalTag = tag sisr (profileFinalSISR n)
p = if isEmpty initTag && isEmpty finalTag then id else parens
topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- srgTopCats srg]
where it i c = prCat c <+> tag sisr (topCatSISR c)
catFormId :: String -> String
catFormId = (++ "_cat")
prCat :: SRGCat -> Doc
prCat c = char '<' <> text c <> char '>'
prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc
prItem sisr t = f 0
where
f _ (REUnion []) = text "<VOID>"
f p (REUnion xs)
| not (null es) = brackets (f 0 (REUnion nes))
| otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs))
where (es,nes) = partition isEpsilon xs
f _ (REConcat []) = text "<NULL>"
f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs))
f p (RERepeat x) = f 3 x <> char '*'
f _ (RESymbol s) = prSymbol sisr t s
{-
prItem :: Maybe SISRFormat -> CFTerm -> [Symbol SRGNT Token] -> Doc
prItem _ _ [] = text "<NULL>"
prItem sisr cn ss = paren $ hsep $ map (prSymbol sisr cn) ss
where paren = if length ss == 1 then id else parens
-}
prSymbol :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> Doc
prSymbol sisr cn (Cat n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
prSymbol _ cn (Tok t) | all isPunct (prt t) = empty -- removes punctuation
| otherwise = text (prt t) -- FIXME: quote if there is whitespace or odd chars
tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc
tag Nothing _ = empty
tag (Just fmt) t = case t fmt of
[] -> empty
ts -> char '{' <+> (text (e $ prSISR ts)) <+> char '}'
where e [] = []
e ('}':xs) = '\\':'}':e xs
e ('\n':xs) = ' ' : e (dropWhile isSpace xs)
e (x:xs) = x:e xs
isPunct :: Char -> Bool
isPunct c = c `elem` "-_.;.,?!"
comment :: String -> Doc
comment s = text "//" <+> text s
alts :: [Doc] -> Doc
alts = fsep . prepunctuate (text "| ")
rule :: Bool -> SRGCat -> [Doc] -> Doc
rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';'
where p = if pub then text "public" else empty
-- Pretty-printing utilities
emptyLine :: Doc
emptyLine = text ""
prepunctuate :: Doc -> [Doc] -> [Doc]
prepunctuate _ [] = []
prepunctuate p (x:xs) = x : map (p <>) xs
($++$) :: Doc -> Doc -> Doc
x $++$ y = x $$ emptyLine $$ y

View File

@@ -1,33 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : PrSLF
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- This module prints a grammar as a regular expression.
-----------------------------------------------------------------------------
module GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter) where
import GF.Conversion.Types
import GF.Formalism.Utilities
import GF.Infra.Ident
import GF.Infra.Option (Options)
import GF.Speech.CFGToFiniteState
import GF.Speech.RegExp
import GF.Compile.ShellState (StateGrammar)
regexpPrinter :: Options -> StateGrammar -> String
regexpPrinter opts s = (++"\n") $ prRE $ dfa2re $ cfgToFA opts s
multiRegexpPrinter :: Options -> StateGrammar -> String
multiRegexpPrinter opts s = prREs $ mfa2res $ cfgToMFA opts s
prREs :: [(String,RE (MFALabel String))] -> String
prREs res = unlines [l ++ " = " ++ prRE (mapRE showLabel re) | (l,re) <- res]
where showLabel = symbol (\l -> "<" ++ l ++ ">") id
mfa2res :: MFA String -> [(String,RE (MFALabel String))]
mfa2res (MFA _ dfas) = [(l, minimizeRE (dfa2re dfa)) | (l,dfa) <- dfas]

View File

@@ -1,190 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : PrSLF
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/10 16:43:44 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.12 $
--
-- This module converts a CFG to an SLF finite-state network
-- for use with the ATK recognizer. The SLF format is described
-- in the HTK manual, and an example for use in ATK is shown
-- in the ATK manual.
--
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
-- categories in the grammar
-----------------------------------------------------------------------------
module GF.Speech.PrSLF (slfPrinter,slfGraphvizPrinter,
slfSubPrinter,slfSubGraphvizPrinter) where
import GF.Data.Utilities
import GF.Conversion.Types
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..),symbol,mapSymbol)
import GF.Infra.Ident
import GF.Infra.Option (Options)
import GF.Infra.Print
import GF.Speech.CFGToFiniteState
import GF.Speech.FiniteState
import GF.Speech.TransformCFG
import qualified GF.Visualization.Graphviz as Dot
import GF.Compile.ShellState (StateGrammar)
import Control.Monad
import qualified Control.Monad.State as STM
import Data.Char (toUpper)
import Data.List
import Data.Maybe
data SLFs = SLFs [(String,SLF)] SLF
data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String }
| SLFSubLat { nId :: Int, nLat :: String }
-- | An SLF word is a word, or the empty string.
type SLFWord = Maybe String
data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int }
type SLF_FA = FA State (Maybe (MFALabel String)) ()
mkFAs :: Options -> StateGrammar -> (SLF_FA, [(String,SLF_FA)])
mkFAs opts s = (slfStyleFA main, [(c,slfStyleFA n) | (c,n) <- subs])
where MFA start subs = {- renameSubs $ -} cfgToMFA opts s
main = let (fa,s,f) = newFA_ in newTransition s f (Cat start) fa
slfStyleFA :: Eq a => DFA a -> FA State (Maybe a) ()
slfStyleFA = renameStates [0..] . removeTrivialEmptyNodes . oneFinalState Nothing ()
. moveLabelsToNodes . dfa2nfa
-- | Give sequential names to subnetworks.
renameSubs :: MFA String -> MFA String
renameSubs (MFA start subs) = MFA (newName start) subs'
where newNames = zip (map fst subs) ["sub"++show n | n <- [0..]]
newName s = lookup' s newNames
subs' = [(newName s,renameLabels n) | (s,n) <- subs]
renameLabels = mapTransitions (mapSymbol newName id)
--
-- * SLF graphviz printing (without sub-networks)
--
slfGraphvizPrinter :: Options -> StateGrammar -> String
slfGraphvizPrinter opts s
= prFAGraphviz $ gvFA $ slfStyleFA $ cfgToFA' opts s
where
gvFA = mapStates (fromMaybe "") . mapTransitions (const "")
--
-- * SLF graphviz printing (with sub-networks)
--
slfSubGraphvizPrinter :: Options -> StateGrammar -> String
slfSubGraphvizPrinter opts s = Dot.prGraphviz g
where (main, subs) = mkFAs opts s
g = STM.evalState (liftM2 Dot.addSubGraphs ss m) [0..]
ss = mapM (\ (c,f) -> gvSLFFA (Just c) f) subs
m = gvSLFFA Nothing main
gvSLFFA :: Maybe String -> SLF_FA -> STM.State [State] Dot.Graph
gvSLFFA n fa =
liftM (mkCluster n . faToGraphviz . mapStates (maybe "" mfaLabelToGv)
. mapTransitions (const "")) (rename fa)
where mfaLabelToGv = symbol ("#"++) id
mkCluster Nothing = id
mkCluster (Just x)
= Dot.setName ("cluster_"++x) . Dot.setAttr "label" x
rename fa = do
names <- STM.get
let fa' = renameStates names fa
names' = unusedNames fa'
STM.put names'
return fa'
--
-- * SLF printing (without sub-networks)
--
slfPrinter :: Options -> StateGrammar -> String
slfPrinter opts s
= prSLF $ automatonToSLF mkSLFNode $ slfStyleFA $ cfgToFA' opts s
--
-- * SLF printing (with sub-networks)
--
-- | Make a network with subnetworks in SLF
slfSubPrinter :: Options -> StateGrammar -> String
slfSubPrinter opts s = prSLFs slfs
where
(main,subs) = mkFAs opts s
slfs = SLFs [(c, faToSLF fa) | (c,fa) <- subs] (faToSLF main)
faToSLF = automatonToSLF mfaNodeToSLFNode
automatonToSLF :: (Int -> a -> SLFNode) -> FA State a () -> SLF
automatonToSLF mkNode fa = SLF { slfNodes = ns, slfEdges = es }
where ns = map (uncurry mkNode) (states fa)
es = zipWith (\i (f,t,()) -> mkSLFEdge i (f,t)) [0..] (transitions fa)
mfaNodeToSLFNode :: Int -> Maybe (MFALabel String) -> SLFNode
mfaNodeToSLFNode i l = case l of
Nothing -> mkSLFNode i Nothing
Just (Tok x) -> mkSLFNode i (Just x)
Just (Cat s) -> mkSLFSubLat i s
mkSLFNode :: Int -> Maybe String -> SLFNode
mkSLFNode i Nothing = SLFNode { nId = i, nWord = Nothing, nTag = Nothing }
mkSLFNode i (Just w)
| isNonWord w = SLFNode { nId = i,
nWord = Nothing,
nTag = Just w }
| otherwise = SLFNode { nId = i,
nWord = Just (map toUpper w),
nTag = Just w }
mkSLFSubLat :: Int -> String -> SLFNode
mkSLFSubLat i sub = SLFSubLat { nId = i, nLat = sub }
mkSLFEdge :: Int -> (Int,Int) -> SLFEdge
mkSLFEdge i (f,t) = SLFEdge { eId = i, eStart = f, eEnd = t }
prSLFs :: SLFs -> String
prSLFs (SLFs subs main) = unlinesS (map prSub subs ++ [prOneSLF main]) ""
where prSub (n,s) = showString "SUBLAT=" . shows n
. nl . prOneSLF s . showString "." . nl
prSLF :: SLF -> String
prSLF slf = prOneSLF slf ""
prOneSLF :: SLF -> ShowS
prOneSLF (SLF { slfNodes = ns, slfEdges = es})
= header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl
where
header = prFields [("N",show (length ns)),("L", show (length es))] . nl
prNode (SLFNode { nId = i, nWord = w, nTag = t })
= prFields $ [("I",show i),("W",showWord w)]
++ maybe [] (\t -> [("s",t)]) t
prNode (SLFSubLat { nId = i, nLat = l })
= prFields [("I",show i),("L",show l)]
prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))]
-- | Check if a word should not correspond to a word in the SLF file.
isNonWord :: String -> Bool
isNonWord = any isPunct
isPunct :: Char -> Bool
isPunct c = c `elem` "-_.;.,?!()[]{}"
showWord :: SLFWord -> String
showWord Nothing = "!NULL"
showWord (Just w) | null w = "!NULL"
| otherwise = w
prFields :: [(String,String)] -> ShowS
prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ]

View File

@@ -1,153 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : PrSRGS
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- This module prints a CFG as an SRGS XML grammar.
--
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
-- categories in the grammar
-----------------------------------------------------------------------------
module GF.Speech.PrSRGS (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where
import GF.Data.Utilities
import GF.Data.XML
import GF.Speech.RegExp
import GF.Speech.SISR as SISR
import GF.Speech.SRG
import GF.Infra.Ident
import GF.Today
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), forestName, filterCats)
import GF.Conversion.Types
import GF.Infra.Print
import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs)
import GF.Compile.ShellState (StateGrammar)
import Data.Char (toUpper,toLower)
import Data.List
import Data.Maybe
import qualified Data.Map as Map
import qualified Data.Set as Set
srgsXmlPrinter :: Maybe SISRFormat
-> Bool -- ^ Include probabilities
-> Options
-> StateGrammar -> String
srgsXmlPrinter sisr probs opts s = prSrgsXml sisr probs $ makeSimpleSRG opts s
srgsXmlNonRecursivePrinter :: Options -> StateGrammar -> String
srgsXmlNonRecursivePrinter opts s = prSrgsXml Nothing False $ makeNonRecursiveSRG opts s
prSrgsXml :: Maybe SISRFormat -> Bool -> SRG -> String
prSrgsXml sisr probs srg@(SRG{grammarName=name,startCat=start,
origStartCat=origStart,grammarLanguage=l,rules=rs})
= showXMLDoc (optimizeSRGS xmlGr)
where
Just root = cfgCatToGFCat origStart
xmlGr = grammar sisr (catFormId root) l $
[meta "description"
("SRGS XML speech recognition grammar for " ++ name
++ ". " ++ "Original start category: " ++ origStart),
meta "generator" ("Grammatical Framework " ++ version)]
++ topCatRules
++ concatMap ruleToXML rs
ruleToXML (SRGRule cat origCat alts) =
comments ["Category " ++ origCat] ++ [rule cat (prRhs alts)]
prRhs rhss = [oneOf (map (mkProd sisr probs) rhss)]
-- externally visible rules for each of the GF categories
topCatRules = [topRule tc [oneOf (map (it tc) cs)] | (tc,cs) <- srgTopCats srg]
where it i c = Tag "item" [] ([ETag "ruleref" [("uri","#" ++ c)]]
++ tag sisr (topCatSISR c))
topRule i is = Tag "rule" [("id",catFormId i),("scope","public")] is
rule :: String -> [XML] -> XML
rule i = Tag "rule" [("id",i)]
mkProd :: Maybe SISRFormat -> Bool -> SRGAlt -> XML
mkProd sisr probs (SRGAlt mp n rhs) = Tag "item" w (ti ++ [x] ++ tf)
where x = mkItem sisr n rhs
w | probs = maybe [] (\p -> [("weight", show p)]) mp
| otherwise = []
ti = tag sisr (profileInitSISR n)
tf = tag sisr (profileFinalSISR n)
mkItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> XML
mkItem sisr cn = f
where
f (REUnion []) = ETag "ruleref" [("special","VOID")]
f (REUnion xs)
| not (null es) = Tag "item" [("repeat","0-1")] [f (REUnion nes)]
| otherwise = oneOf (map f xs)
where (es,nes) = partition isEpsilon xs
f (REConcat []) = ETag "ruleref" [("special","NULL")]
f (REConcat xs) = Tag "item" [] (map f xs)
f (RERepeat x) = Tag "item" [("repeat","0-")] [f x]
f (RESymbol s) = symItem sisr cn s
{-
mkProd :: Maybe SISRFormat -> Bool -> SRGAlt -> XML
mkProd sisr probs (SRGAlt mp n rhs) = Tag "item" w (ti ++ xs ++ tf)
where xs = mkItem sisr n rhs
w | probs = maybe [] (\p -> [("weight", show p)]) mp
| otherwise = []
ti = [tag sisr (profileInitSISR n)]
tf = [tag sisr (profileFinalSISR n)]
mkItem :: Maybe SISRFormat -> CFTerm -> [Symbol SRGNT Token] -> [XML]
mkItem sisr cn ss = map (symItem sisr cn) ss
-}
symItem :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> XML
symItem sisr cn (Cat n@(c,_)) =
Tag "item" [] $ [ETag "ruleref" [("uri","#" ++ c)]] ++ tag sisr (catSISR cn n)
symItem _ _ (Tok t) = Tag "item" [] [Data (showToken t)]
tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> [XML]
tag Nothing _ = []
tag (Just fmt) t = case t fmt of
[] -> []
ts -> [Tag "tag" [] [Data (prSISR ts)]]
catFormId :: String -> String
catFormId = (++ "_cat")
showToken :: Token -> String
showToken t = t
oneOf :: [XML] -> XML
oneOf = Tag "one-of" []
grammar :: Maybe SISRFormat
-> String -- ^ root
-> Maybe String -- ^language
-> [XML] -> XML
grammar sisr root ml =
Tag "grammar" $ [("xmlns","http://www.w3.org/2001/06/grammar"),
("version","1.0"),
("mode","voice"),
("root",root)]
++ (if isJust sisr then [("tag-format","semantics/1.0")] else [])
++ maybe [] (\l -> [("xml:lang", l)]) ml
meta :: String -> String -> XML
meta n c = ETag "meta" [("name",n),("content",c)]
optimizeSRGS :: XML -> XML
optimizeSRGS = bottomUpXML f
where f (Tag "item" [] [x@(Tag "item" _ _)]) = x
f (Tag "item" [] [x@(Tag "one-of" _ _)]) = x
f (Tag "item" as [Tag "item" [] xs]) = Tag "item" as xs
f (Tag "item" as xs) = Tag "item" as (map g xs)
where g (Tag "item" [] [x@(ETag "ruleref" _)]) = x
g x = x
f (Tag "one-of" [] [x]) = x
f x = x

View File

@@ -1,147 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : PrJSRGS_ABNF
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.16 $
--
-- This module prints a CFG as a JSGF grammar.
--
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
-- categories in the grammar
--
-- FIXME: convert to UTF-8
-----------------------------------------------------------------------------
module GF.Speech.PrSRGS_ABNF (srgsAbnfPrinter, srgsAbnfNonRecursivePrinter) where
import GF.Conversion.Types
import GF.Data.Utilities
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), filterCats)
import GF.Infra.Ident
import GF.Infra.Print
import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs)
import GF.Speech.SISR
import GF.Speech.SRG
import GF.Speech.RegExp
import GF.Compile.ShellState (StateGrammar)
import GF.Today
import Data.Char
import Data.List
import Data.Maybe
import Text.PrettyPrint.HughesPJ
import Debug.Trace
width :: Int
width = 75
srgsAbnfPrinter :: Maybe SISRFormat
-> Bool -- ^ Include probabilities
-> Options
-> StateGrammar -> String
srgsAbnfPrinter sisr probs opts s = showDoc $ prABNF sisr probs $ makeSimpleSRG opts s
srgsAbnfNonRecursivePrinter :: Options -> StateGrammar -> String
srgsAbnfNonRecursivePrinter opts s = showDoc $ prABNF Nothing False $ makeNonRecursiveSRG opts s
showDoc = renderStyle (style { lineLength = width })
prABNF :: Maybe SISRFormat -> Bool -> SRG -> Doc
prABNF sisr probs srg@(SRG{grammarName=name,grammarLanguage=ml,
startCat=start,origStartCat=origStart,rules=rs})
= header $++$ vcat topCatRules $++$ foldr ($++$) empty (map prRule rs)
where
header = text "#ABNF 1.0 UTF-8;" $$
meta "description"
("Speech recognition grammar for " ++ name
++ ". " ++ "Original start category: " ++ origStart) $$
meta "generator" ("Grammatical Framework " ++ version) $$
language $$ tagFormat $$ mainCat
language = maybe empty (\l -> text "language" <+> text l <> char ';') ml
tagFormat | isJust sisr = text "tag-format" <+> text "<semantics/1.0>" <> char ';'
| otherwise = empty
mainCat = case cfgCatToGFCat origStart of
Just c -> text "root" <+> prCat (catFormId c) <> char ';'
Nothing -> empty
prRule (SRGRule cat origCat rhs) =
comment origCat $$
rule False cat (map prAlt rhs)
-- FIXME: use the probability
prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag]
where initTag = tag sisr (profileInitSISR n)
finalTag = tag sisr (profileFinalSISR n)
p = if isEmpty initTag && isEmpty finalTag then id else parens
topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- srgTopCats srg]
where it i c = prCat c <+> tag sisr (topCatSISR c)
catFormId :: String -> String
catFormId = (++ "_cat")
prCat :: SRGCat -> Doc
prCat c = char '$' <> text c
prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc
prItem sisr t = f 0
where
f _ (REUnion []) = text "$VOID"
f p (REUnion xs)
| not (null es) = brackets (f 0 (REUnion nes))
| otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs))
where (es,nes) = partition isEpsilon xs
f _ (REConcat []) = text "$NULL"
f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs))
f p (RERepeat x) = f 3 x <> text "<0->"
f _ (RESymbol s) = prSymbol sisr t s
prSymbol :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> Doc
prSymbol sisr cn (Cat n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
prSymbol _ cn (Tok t) | all isPunct (prt t) = empty -- removes punctuation
| otherwise = text (prt t) -- FIXME: quote if there is whitespace or odd chars
tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc
tag Nothing _ = empty
tag (Just fmt) t =
case t fmt of
[] -> empty
-- grr, silly SRGS ABNF does not have an escaping mechanism
ts | '{' `elem` x || '}' `elem` x -> text "{!{" <+> text x <+> text "}!}"
| otherwise -> text "{" <+> text x <+> text "}"
where x = prSISR ts
isPunct :: Char -> Bool
isPunct c = c `elem` "-_.;.,?!"
comment :: String -> Doc
comment s = text "//" <+> text s
alts :: [Doc] -> Doc
alts = fsep . prepunctuate (text "| ")
rule :: Bool -> SRGCat -> [Doc] -> Doc
rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';'
where p = if pub then text "public" else empty
meta :: String -> String -> Doc
meta n v = text "meta" <+> text (show n) <+> text "is" <+> text (show v) <> char ';'
-- Pretty-printing utilities
emptyLine :: Doc
emptyLine = text ""
prepunctuate :: Doc -> [Doc] -> [Doc]
prepunctuate _ [] = []
prepunctuate p (x:xs) = x : map (p <>) xs
($++$) :: Doc -> Doc -> Doc
x $++$ y = x $$ emptyLine $$ y

View File

@@ -1,143 +0,0 @@
module GF.Speech.RegExp (RE(..),
epsilonRE, nullRE,
isEpsilon, isNull,
unionRE, concatRE, seqRE,
repeatRE, minimizeRE,
mapRE, mapRE', joinRE,
symbolsRE,
dfa2re, prRE) where
import Data.List
import GF.Data.Utilities
import GF.Speech.FiniteState
data RE a =
REUnion [RE a] -- ^ REUnion [] is null
| REConcat [RE a] -- ^ REConcat [] is epsilon
| RERepeat (RE a)
| RESymbol a
deriving (Eq,Ord,Show)
dfa2re :: (Ord a) => DFA a -> RE a
dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
. oneFinalState () epsilonRE . mapTransitions RESymbol
where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
merge es = [(f,t,unionRE ls)
| ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a)
elimStates fa =
case [s | (s,_) <- states fa, isInternal fa s] of
[] -> fa
sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa
where sAs = nonLoopTransitionsTo sE fa
sBs = nonLoopTransitionsFrom sE fa
r2 = unionRE $ loops sE fa
ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
r r1 r3 = concatRE [r1, repeatRE r2, r3]
epsilonRE :: RE a
epsilonRE = REConcat []
nullRE :: RE a
nullRE = REUnion []
isNull :: RE a -> Bool
isNull (REUnion []) = True
isNull _ = False
isEpsilon :: RE a -> Bool
isEpsilon (REConcat []) = True
isEpsilon _ = False
unionRE :: Ord a => [RE a] -> RE a
unionRE = unionOrId . sortNub . concatMap toList
where
toList (REUnion xs) = xs
toList x = [x]
unionOrId [r] = r
unionOrId rs = REUnion rs
concatRE :: [RE a] -> RE a
concatRE xs | any isNull xs = nullRE
| otherwise = case concatMap toList xs of
[r] -> r
rs -> REConcat rs
where
toList (REConcat xs) = xs
toList x = [x]
seqRE :: [a] -> RE a
seqRE = concatRE . map RESymbol
repeatRE :: RE a -> RE a
repeatRE x | isNull x || isEpsilon x = epsilonRE
| otherwise = RERepeat x
finalRE :: Ord a => DFA (RE a) -> RE a
finalRE fa = concatRE [repeatRE r1, r2,
repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
where
s0 = startState fa
[sF] = finalStates fa
r1 = unionRE $ loops s0 fa
r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa
r3 = unionRE $ loops sF fa
r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
reverseRE :: RE a -> RE a
reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs
reverseRE (REUnion xs) = REUnion (map reverseRE xs)
reverseRE (RERepeat x) = RERepeat (reverseRE x)
reverseRE x = x
minimizeRE :: Ord a => RE a -> RE a
minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward
mergeForward :: Ord a => RE a -> RE a
mergeForward (REUnion xs) =
unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)]
mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)]
mergeForward (RERepeat r) = repeatRE (mergeForward r)
mergeForward r = r
firstRE :: RE a -> (RE a, RE a)
firstRE (REConcat (x:xs)) = (x, REConcat xs)
firstRE r = (r,epsilonRE)
mapRE :: (a -> b) -> RE a -> RE b
mapRE f = mapRE' (RESymbol . f)
mapRE' :: (a -> RE b) -> RE a -> RE b
mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs)
mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs)
mapRE' f (RERepeat x) = RERepeat (mapRE' f x)
mapRE' f (RESymbol s) = f s
joinRE :: RE (RE a) -> RE a
joinRE (REConcat xs) = REConcat (map joinRE xs)
joinRE (REUnion xs) = REUnion (map joinRE xs)
joinRE (RERepeat xs) = RERepeat (joinRE xs)
joinRE (RESymbol ss) = ss
symbolsRE :: RE a -> [a]
symbolsRE (REConcat xs) = concatMap symbolsRE xs
symbolsRE (REUnion xs) = concatMap symbolsRE xs
symbolsRE (RERepeat x) = symbolsRE x
symbolsRE (RESymbol x) = [x]
-- Debugging
prRE :: RE String -> String
prRE = prRE' 0
prRE' _ (REUnion []) = "<NULL>"
prRE' n (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1) xs)))
prRE' n (REConcat xs) = p n 2 (unwords (map (prRE' 2) xs))
prRE' n (RERepeat x) = p n 3 (prRE' 3 x) ++ "*"
prRE' _ (RESymbol s) = s
p n m s | n >= m = "(" ++ s ++ ")"
| True = s

View File

@@ -1,130 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : Relation
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/10/26 17:13:13 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.1 $
--
-- A simple module for relations.
-----------------------------------------------------------------------------
module GF.Speech.Relation (Rel, mkRel, mkRel'
, allRelated , isRelatedTo
, transitiveClosure
, reflexiveClosure, reflexiveClosure_
, symmetricClosure
, symmetricSubrelation, reflexiveSubrelation
, reflexiveElements
, equivalenceClasses
, isTransitive, isReflexive, isSymmetric
, isEquivalence
, isSubRelationOf) where
import Data.List
import Data.Maybe
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import GF.Data.Utilities
type Rel a = Map a (Set a)
-- | Creates a relation from a list of related pairs.
mkRel :: Ord a => [(a,a)] -> Rel a
mkRel ps = relates ps Map.empty
-- | Creates a relation from a list pairs of elements and the elements
-- related to them.
mkRel' :: Ord a => [(a,[a])] -> Rel a
mkRel' xs = Map.fromListWith Set.union [(x,Set.fromList ys) | (x,ys) <- xs]
relToList :: Rel a -> [(a,a)]
relToList r = [ (x,y) | (x,ys) <- Map.toList r, y <- Set.toList ys ]
-- | Add a pair to the relation.
relate :: Ord a => a -> a -> Rel a -> Rel a
relate x y r = Map.insertWith Set.union x (Set.singleton y) r
-- | Add a list of pairs to the relation.
relates :: Ord a => [(a,a)] -> Rel a -> Rel a
relates ps r = foldl (\r' (x,y) -> relate x y r') r ps
-- | Checks if an element is related to another.
isRelatedTo :: Ord a => Rel a -> a -> a -> Bool
isRelatedTo r x y = maybe False (y `Set.member`) (Map.lookup x r)
-- | Get the set of elements to which a given element is related.
allRelated :: Ord a => Rel a -> a -> Set a
allRelated r x = fromMaybe Set.empty (Map.lookup x r)
-- | Get all elements in the relation.
domain :: Ord a => Rel a -> Set a
domain r = foldl Set.union (Map.keysSet r) (Map.elems r)
-- | Keep only pairs for which both elements are in the given set.
intersectSetRel :: Ord a => Set a -> Rel a -> Rel a
intersectSetRel s = filterRel (\x y -> x `Set.member` s && y `Set.member` s)
transitiveClosure :: Ord a => Rel a -> Rel a
transitiveClosure r = fix (Map.map growSet) r
where growSet ys = foldl Set.union ys (map (allRelated r) $ Set.toList ys)
reflexiveClosure_ :: Ord a => [a] -- ^ The set over which the relation is defined.
-> Rel a -> Rel a
reflexiveClosure_ u r = relates [(x,x) | x <- u] r
-- | Uses 'domain'
reflexiveClosure :: Ord a => Rel a -> Rel a
reflexiveClosure r = reflexiveClosure_ (Set.toList $ domain r) r
symmetricClosure :: Ord a => Rel a -> Rel a
symmetricClosure r = relates [ (y,x) | (x,y) <- relToList r ] r
symmetricSubrelation :: Ord a => Rel a -> Rel a
symmetricSubrelation r = filterRel (flip $ isRelatedTo r) r
reflexiveSubrelation :: Ord a => Rel a -> Rel a
reflexiveSubrelation r = intersectSetRel (reflexiveElements r) r
-- | Get the set of elements which are related to themselves.
reflexiveElements :: Ord a => Rel a -> Set a
reflexiveElements r = Set.fromList [ x | (x,ys) <- Map.toList r, x `Set.member` ys ]
-- | Keep the related pairs for which the predicate is true.
filterRel :: Ord a => (a -> a -> Bool) -> Rel a -> Rel a
filterRel p = purgeEmpty . Map.mapWithKey (Set.filter . p)
-- | Remove keys that map to no elements.
purgeEmpty :: Ord a => Rel a -> Rel a
purgeEmpty r = Map.filter (not . Set.null) r
-- | Get the equivalence classes from an equivalence relation.
equivalenceClasses :: Ord a => Rel a -> [Set a]
equivalenceClasses r = equivalenceClasses_ (Map.keys r) r
where equivalenceClasses_ [] _ = []
equivalenceClasses_ (x:xs) r = ys:equivalenceClasses_ zs r
where ys = allRelated r x
zs = [x' | x' <- xs, not (x' `Set.member` ys)]
isTransitive :: Ord a => Rel a -> Bool
isTransitive r = and [z `Set.member` ys | (x,ys) <- Map.toList r,
y <- Set.toList ys, z <- Set.toList (allRelated r y)]
isReflexive :: Ord a => Rel a -> Bool
isReflexive r = all (\ (x,ys) -> x `Set.member` ys) (Map.toList r)
isSymmetric :: Ord a => Rel a -> Bool
isSymmetric r = and [isRelatedTo r y x | (x,y) <- relToList r]
isEquivalence :: Ord a => Rel a -> Bool
isEquivalence r = isReflexive r && isSymmetric r && isTransitive r
isSubRelationOf :: Ord a => Rel a -> Rel a -> Bool
isSubRelationOf r1 r2 = all (uncurry (isRelatedTo r2)) (relToList r1)

View File

@@ -1,39 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : RelationQC
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/10/26 17:13:13 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.1 $
--
-- QuickCheck properties for GF.Speech.Relation
-----------------------------------------------------------------------------
module GF.Speech.RelationQC where
import GF.Speech.Relation
import Test.QuickCheck
prop_transitiveClosure_trans :: [(Int,Int)] -> Bool
prop_transitiveClosure_trans ps = isTransitive (transitiveClosure (mkRel ps))
prop_symmetricSubrelation_symm :: [(Int,Int)] -> Bool
prop_symmetricSubrelation_symm ps = isSymmetric (symmetricSubrelation (mkRel ps))
prop_symmetricSubrelation_sub :: [(Int,Int)] -> Bool
prop_symmetricSubrelation_sub ps = symmetricSubrelation r `isSubRelationOf` r
where r = mkRel ps
prop_symmetricClosure_symm :: [(Int,Int)] -> Bool
prop_symmetricClosure_symm ps = isSymmetric (symmetricClosure (mkRel ps))
prop_reflexiveClosure_refl :: [(Int,Int)] -> Bool
prop_reflexiveClosure_refl ps = isReflexive (reflexiveClosure (mkRel ps))
prop_mkEquiv_equiv :: [(Int,Int)] -> Bool
prop_mkEquiv_equiv ps = isEquivalence (mkEquiv ps)
where mkEquiv = transitiveClosure . symmetricClosure . reflexiveClosure . mkRel

View File

@@ -1,87 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : GF.Speech.SISR
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- Abstract syntax and pretty printer for SISR,
-- (Semantic Interpretation for Speech Recognition)
--
-----------------------------------------------------------------------------
module GF.Speech.SISR (SISRFormat(..), SISRTag, prSISR,
topCatSISR, profileInitSISR, catSISR, profileFinalSISR) where
import Data.List
import GF.Conversion.Types
import GF.Data.Utilities
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), forestName)
import GF.Infra.Ident
import GF.Speech.TransformCFG
import GF.Speech.SRG (SRGNT)
import qualified GF.JavaScript.AbsJS as JS
import qualified GF.JavaScript.PrintJS as JS
data SISRFormat =
-- SISR Working draft 1 April 2003
-- http://www.w3.org/TR/2003/WD-semantic-interpretation-20030401/
SISROld
deriving Show
type SISRTag = [JS.DeclOrExpr]
prSISR :: SISRTag -> String
prSISR = JS.printTree
topCatSISR :: String -> SISRFormat -> SISRTag
topCatSISR c fmt = map JS.DExpr [fmtOut fmt `ass` fmtRef fmt c]
profileInitSISR :: CFTerm -> SISRFormat -> SISRTag
profileInitSISR t fmt
| null (usedArgs t) = []
| otherwise = [JS.Decl [JS.DInit args (JS.EArray [])]]
usedArgs :: CFTerm -> [Int]
usedArgs (CFObj _ ts) = foldr union [] (map usedArgs ts)
usedArgs (CFAbs _ x) = usedArgs x
usedArgs (CFApp x y) = usedArgs x `union` usedArgs y
usedArgs (CFRes i) = [i]
usedArgs _ = []
catSISR :: CFTerm -> SRGNT -> SISRFormat -> SISRTag
catSISR t (c,i) fmt
| i `elem` usedArgs t = map JS.DExpr
[JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i)) `ass` fmtRef fmt c]
| otherwise = []
profileFinalSISR :: CFTerm -> SISRFormat -> SISRTag
profileFinalSISR term fmt = [JS.DExpr $ fmtOut fmt `ass` f term]
where
f (CFObj n ts) = tree (prIdent n) (map f ts)
f (CFAbs v x) = JS.EFun [var v] [JS.SReturn (f x)]
f (CFApp x y) = JS.ECall (f x) [f y]
f (CFRes i) = JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i))
f (CFVar v) = JS.EVar (var v)
f (CFMeta typ) = obj [("name",JS.EStr "?"), ("type",JS.EStr typ)]
fmtOut SISROld = JS.EVar (JS.Ident "$")
fmtRef SISROld c = JS.EVar (JS.Ident ("$" ++ c))
args = JS.Ident "a"
var v = JS.Ident ("x" ++ show v)
field x y = JS.EMember x (JS.Ident y)
ass = JS.EAssign
tree n xs = obj [("name", JS.EStr n), ("args", JS.EArray xs)]
obj ps = JS.EObj [JS.Prop (JS.StringPropName x) y | (x,y) <- ps]

View File

@@ -1,235 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : SRG
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.20 $
--
-- Representation of, conversion to, and utilities for
-- printing of a general Speech Recognition Grammar.
--
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
-- categories in the grammar
-----------------------------------------------------------------------------
module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem,
SRGCat, SRGNT, CFTerm
, makeSRG
, makeSimpleSRG
, makeNonRecursiveSRG
, lookupFM_, prtS
, cfgCatToGFCat, srgTopCats
) where
import GF.Data.Operations
import GF.Data.Utilities
import GF.Infra.Ident
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), NameProfile(..)
, Profile(..), SyntaxForest
, filterCats, mapSymbol, symbol)
import GF.Conversion.Types
import GF.Infra.Print
import GF.Speech.TransformCFG
import GF.Speech.Relation
import GF.Speech.FiniteState
import GF.Speech.RegExp
import GF.Speech.CFGToFiniteState
import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs)
import GF.Compile.ShellState (StateGrammar, stateProbs, stateOptions, cncId)
import Data.List
import Data.Maybe (fromMaybe, maybeToList)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set
import Debug.Trace
data SRG = SRG { grammarName :: String -- ^ grammar name
, startCat :: SRGCat -- ^ start category name
, origStartCat :: String -- ^ original start category name
, grammarLanguage :: Maybe String -- ^ The language for which the grammar
-- is intended, e.g. en-UK
, rules :: [SRGRule]
}
deriving (Eq,Show)
data SRGRule = SRGRule SRGCat String [SRGAlt] -- ^ SRG category name, original category name
-- and productions
deriving (Eq,Show)
-- | maybe a probability, a rule name and an EBNF right-hand side
data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem
deriving (Eq,Show)
type SRGItem = RE (Symbol SRGNT Token)
type SRGCat = String
-- | An SRG non-terminal. Category name and its number in the profile.
type SRGNT = (SRGCat, Int)
-- | SRG category name and original name
type CatName = (SRGCat,String)
type CatNames = Map String String
-- | Create a non-left-recursive SRG.
-- FIXME: the probabilities in the returned
-- grammar may be meaningless.
makeSimpleSRG :: Options -- ^ Grammar options
-> StateGrammar
-> SRG
makeSimpleSRG opt s = makeSRG preprocess opt s
where
preprocess origStart = traceStats "After mergeIdentical"
. mergeIdentical
. traceStats "After removeLeftRecursion"
. removeLeftRecursion origStart
. traceStats "After topDownFilter"
. topDownFilter origStart
. traceStats "After bottomUpFilter"
. bottomUpFilter
. traceStats "After removeCycles"
. removeCycles
. traceStats "Inital CFG"
traceStats s g = trace ("---- " ++ s ++ ": " ++ stats g {- ++ "\n" ++ prCFRules g ++ "----" -}) g
stats g = "Categories: " ++ show (countCats g)
++ " Rules: " ++ show (countRules g)
makeNonRecursiveSRG :: Options
-> StateGrammar
-> SRG
makeNonRecursiveSRG opt s = renameSRG $
SRG { grammarName = prIdent (cncId s),
startCat = start,
origStartCat = origStart,
grammarLanguage = getSpeechLanguage opt s,
rules = rs }
where
origStart = getStartCatCF opt s
MFA start dfas = cfgToMFA opt s
rs = [SRGRule l l [SRGAlt Nothing dummyCFTerm (dfaToSRGItem dfa)] | (l,dfa) <- dfas]
where dfaToSRGItem = mapRE dummySRGNT . minimizeRE . dfa2re
dummyCFTerm = CFMeta "dummy"
dummySRGNT = mapSymbol (\c -> (c,0)) id
makeSRG :: (Cat_ -> CFRules -> CFRules)
-> Options -- ^ Grammar options
-> StateGrammar
-> SRG
makeSRG preprocess opt s = renameSRG $
SRG { grammarName = name,
startCat = origStart,
origStartCat = origStart,
grammarLanguage = getSpeechLanguage opt s,
rules = rs }
where
name = prIdent (cncId s)
origStart = getStartCatCF opt s
(_,cfgRules) = unzip $ allRulesGrouped $ preprocess origStart $ cfgToCFRules s
rs = map (cfgRulesToSRGRule (stateProbs s)) cfgRules
-- | Give names on the form NameX to all categories.
renameSRG :: SRG -> SRG
renameSRG srg = srg { startCat = renameCat (startCat srg),
rules = map renameRule (rules srg) }
where
names = mkCatNames (grammarName srg) (allSRGCats srg)
renameRule (SRGRule _ origCat alts) = SRGRule (renameCat origCat) origCat (map renameAlt alts)
renameAlt (SRGAlt mp n rhs) = SRGAlt mp n (mapRE renameSymbol rhs)
renameSymbol = mapSymbol (\ (c,x) -> (renameCat c, x)) id
renameCat = lookupFM_ names
getSpeechLanguage :: Options -> StateGrammar -> Maybe String
getSpeechLanguage opt s =
fmap (replace '_' '-') $ getOptVal (addOptions opt (stateOptions s)) speechLanguage
-- FIXME: merge alternatives with same rhs and profile but different probabilities
cfgRulesToSRGRule :: Probs -> [CFRule_] -> SRGRule
cfgRulesToSRGRule probs rs@(r:_) = SRGRule origCat origCat rhs
where
origCat = lhsCat r
alts = [((n,ruleProb probs r),mkSRGSymbols 0 ss) | CFRule c ss n <- rs]
rhs = [SRGAlt p n (srgItem sss) | ((n,p),sss) <- buildMultiMap alts ]
mkSRGSymbols _ [] = []
mkSRGSymbols i (Cat c:ss) = Cat (c,i) : mkSRGSymbols (i+1) ss
mkSRGSymbols i (Tok t:ss) = Tok t : mkSRGSymbols i ss
ruleProb :: Probs -> CFRule_ -> Maybe Double
ruleProb probs r = lookupProb probs (ruleFun r)
-- FIXME: move to GF.Probabilistic.Probabilistic?
lookupProb :: Probs -> Ident -> Maybe Double
lookupProb probs i = lookupTree prIdent i probs
mkCatNames :: String -- ^ Category name prefix
-> [String] -- ^ Original category names
-> Map String String -- ^ Maps original names to SRG names
mkCatNames prefix origNames = Map.fromList (zip origNames names)
where names = [prefix ++ "_" ++ show x | x <- [0..]]
allSRGCats :: SRG -> [String]
allSRGCats SRG { rules = rs } = [c | SRGRule c _ _ <- rs]
cfgCatToGFCat :: SRGCat -> Maybe String
cfgCatToGFCat c
-- categories introduced by removeLeftRecursion contain dashes
| '-' `elem` c = Nothing
-- some categories introduced by -conversion=finite have the form
-- "{fun:cat}..."
| "{" `isPrefixOf` c = case dropWhile (/=':') $ takeWhile (/='}') $ tail c of
':':c' -> Just c'
_ -> error $ "cfgCatToGFCat: Strange category " ++ show c
| otherwise = Just $ takeWhile (/='{') c
srgTopCats :: SRG -> [(String,[SRGCat])]
srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg,
oc <- maybeToList $ cfgCatToGFCat origCat]
--
-- * Size-optimized EBNF SRGs
--
srgItem :: [[Symbol SRGNT Token]] -> SRGItem
srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
-- non-optimizing version:
--srgItem = unionRE . map seqRE
-- | Merges a list of right-hand sides which all have the same
-- sequence of non-terminals.
mergeItems :: [[Symbol SRGNT Token]] -> SRGItem
mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens
groupTokens :: [Symbol SRGNT Token] -> [Symbol SRGNT [Token]]
groupTokens [] = []
groupTokens (Tok t:ss) = case groupTokens ss of
Tok ts:ss' -> Tok (t:ts):ss'
ss' -> Tok [t]:ss'
groupTokens (Cat c:ss) = Cat c : groupTokens ss
ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE (Symbol SRGNT Token)
ungroupTokens = joinRE . mapRE (symbol (RESymbol . Cat) (REConcat . map (RESymbol . Tok)))
--
-- * Utilities for building and printing SRGs
--
lookupFM_ :: (Ord key, Show key) => Map key elt -> key -> elt
lookupFM_ fm k = Map.findWithDefault err k fm
where err = error $ "Key not found: " ++ show k
++ "\namong " ++ show (Map.keys fm)
prtS :: Print a => a -> ShowS
prtS = showString . prt

View File

@@ -1,378 +0,0 @@
----------------------------------------------------------------------
-- |
-- Module : TransformCFG
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.24 $
--
-- This module does some useful transformations on CFGs.
--
-- peb thinks: most of this module should be moved to GF.Conversion...
-----------------------------------------------------------------------------
module GF.Speech.TransformCFG where
import GF.Canon.CanonToGFCC (canon2gfcc)
import qualified GF.GFCC.CId as C
import GF.GFCC.Macros (lookType,catSkeleton)
import GF.GFCC.DataGFCC (GFCC)
import GF.Conversion.Types
import GF.CF.PPrCF (prCFCat)
import GF.Data.Utilities
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol,
NameProfile(..), Profile(..), name2fun, forestName)
import GF.Infra.Ident
import GF.Infra.Option
import GF.Infra.Print
import GF.Speech.Relation
import GF.Compile.ShellState (StateGrammar, stateCFG, stateGrammarST, startCatStateOpts, stateOptions)
import Control.Monad
import Control.Monad.State (State, get, put, evalState)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.List
import Data.Maybe (fromMaybe)
import Data.Monoid (mconcat)
import Data.Set (Set)
import qualified Data.Set as Set
-- not very nice to replace the structured CFCat type with a simple string
type CFRule_ = CFRule Cat_ CFTerm Token
data CFTerm
= CFObj Fun [CFTerm] -- ^ an abstract syntax function with arguments
| CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id.
| CFApp CFTerm CFTerm -- ^ Application
| CFRes Int -- ^ The result of the n:th (0-based) non-terminal
| CFVar Int -- ^ A lambda-bound variable
| CFMeta String -- ^ A metavariable
deriving (Eq,Ord,Show)
type Cat_ = String
type CFSymbol_ = Symbol Cat_ Token
type CFRules = Map Cat_ (Set CFRule_)
cfgToCFRules :: StateGrammar -> CFRules
cfgToCFRules s =
groupProds [CFRule (catToString c) (map symb r) (nameToTerm n)
| CFRule c r n <- cfg]
where cfg = stateCFG s
symb = mapSymbol catToString id
catToString = prt
gfcc = stateGFCC s
nameToTerm (Name IW [Unify [n]]) = CFRes n
nameToTerm (Name f@(IC c) prs) =
CFObj f (zipWith profileToTerm args prs)
where (args,_) = catSkeleton $ lookType gfcc (C.CId c)
nameToTerm n = error $ "cfgToCFRules.nameToTerm" ++ show n
profileToTerm (C.CId t) (Unify []) = CFMeta t
profileToTerm _ (Unify xs) = CFRes (last xs) -- FIXME: unify
profileToTerm (C.CId t) (Constant f) = maybe (CFMeta t) (\x -> CFObj x []) (forestName f)
getStartCat :: Options -> StateGrammar -> String
getStartCat opts sgr = prCFCat (startCatStateOpts opts' sgr)
where opts' = addOptions opts (stateOptions sgr)
getStartCatCF :: Options -> StateGrammar -> String
getStartCatCF opts sgr = getStartCat opts sgr ++ "{}.s"
stateGFCC :: StateGrammar -> GFCC
stateGFCC = canon2gfcc noOptions . stateGrammarST
-- * Grammar filtering
-- | Removes all directly and indirectly cyclic productions.
-- FIXME: this may be too aggressive, only one production
-- needs to be removed to break a given cycle. But which
-- one should we pick?
-- FIXME: Does not (yet) remove productions which are cyclic
-- because of empty productions.
removeCycles :: CFRules -> CFRules
removeCycles = groupProds . f . allRules
where f rs = filter (not . isCycle) rs
where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [Cat c'] _ <- rs]
isCycle (CFRule c [Cat c'] _) = isRelatedTo alias c' c
isCycle _ = False
-- | Better bottom-up filter that also removes categories which contain no finite
-- strings.
bottomUpFilter :: CFRules -> CFRules
bottomUpFilter gr = fix grow Map.empty
where grow g = g `unionCFRules` filterCFRules (all (okSym g) . ruleRhs) gr
okSym g = symbol (`elem` allCats g) (const True)
-- | Removes categories which are not reachable from the start category.
topDownFilter :: Cat_ -> CFRules -> CFRules
topDownFilter start rules = filterCFRulesCats (isRelatedTo uses start) rules
where
rhsCats = [ (lhsCat r, c') | r <- allRules rules, c' <- filterCats (ruleRhs r) ]
uses = reflexiveClosure_ (allCats rules) $ transitiveClosure $ mkRel rhsCats
-- | Merges categories with identical right-hand-sides.
-- FIXME: handle probabilities
mergeIdentical :: CFRules -> CFRules
mergeIdentical g = groupProds $ map subst $ allRules g
where
-- maps categories to their replacement
m = Map.fromList [(y,concat (intersperse "+" xs))
| (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList g], y <- xs]
-- build data to compare for each category: a set of name,rhs pairs
rulesKey = Set.map (\ (CFRule _ r n) -> (n,r))
subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
-- * Removing left recursion
-- The LC_LR algorithm from
-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
removeLeftRecursion :: Cat_ -> CFRules -> CFRules
removeLeftRecursion start gr
= groupProds $ concat [scheme1, scheme2, scheme3, scheme4]
where
scheme1 = [CFRule a [x,Cat a_x] n' |
a <- retainedLeftRecursive,
x <- properLeftCornersOf a,
not (isLeftRecursive x),
let a_x = mkCat (Cat a) x,
-- this is an extension of LC_LR to avoid generating
-- A-X categories for which there are no productions:
a_x `Set.member` newCats,
let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0))
(\_ -> CFRes 0) x]
scheme2 = [CFRule a_x (beta++[Cat a_b]) n' |
a <- retainedLeftRecursive,
b@(Cat b') <- properLeftCornersOf a,
isLeftRecursive b,
CFRule _ (x:beta) n <- catRules gr b',
let a_x = mkCat (Cat a) x,
let a_b = mkCat (Cat a) b,
let i = length $ filterCats beta,
let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n)))
(\_ -> CFApp (CFRes i) n) x]
scheme3 = [CFRule a_x beta n' |
a <- retainedLeftRecursive,
x <- properLeftCornersOf a,
CFRule _ (x':beta) n <- catRules gr a,
x == x',
let a_x = mkCat (Cat a) x,
let n' = symbol (\_ -> CFAbs 1 (shiftTerm n))
(\_ -> n) x]
scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . Cat) cats
newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3))
shiftTerm :: CFTerm -> CFTerm
shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts)
shiftTerm (CFRes 0) = CFVar 1
shiftTerm (CFRes n) = CFRes (n-1)
shiftTerm t = t
-- note: the rest don't occur in the original grammar
cats = allCats gr
rules = allRules gr
directLeftCorner = mkRel [(Cat c,t) | CFRule c (t:_) _ <- allRules gr]
leftCorner = reflexiveClosure_ (map Cat cats) $ transitiveClosure directLeftCorner
properLeftCorner = transitiveClosure directLeftCorner
properLeftCornersOf = Set.toList . allRelated properLeftCorner . Cat
isProperLeftCornerOf = flip (isRelatedTo properLeftCorner)
leftRecursive = reflexiveElements properLeftCorner
isLeftRecursive = (`Set.member` leftRecursive)
retained = start `Set.insert`
Set.fromList [a | r <- allRules (filterCFRulesCats (not . isLeftRecursive . Cat) gr),
Cat a <- ruleRhs r]
isRetained = (`Set.member` retained)
retainedLeftRecursive = filter (isLeftRecursive . Cat) $ Set.toList retained
mkCat :: CFSymbol_ -> CFSymbol_ -> Cat_
mkCat x y = showSymbol x ++ "-" ++ showSymbol y
where showSymbol = symbol id show
{-
-- Paull's algorithm, see
-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
removeLeftRecursion :: Cat_ -> CFRules -> CFRules
removeLeftRecursion start rs = removeDirectLeftRecursions $ map handleProds rs
where
handleProds (c, r) = (c, concatMap handleProd r)
handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai =
-- FIXME: for non-recursive categories, this changes
-- the grammar unneccessarily, maybe we can use mutRecCats
-- to make this less invasive
-- FIXME: this will give multiple rules with the same name,
-- which may mess up the probabilities.
[CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- lookup' aj rs]
handleProd r = [r]
removeDirectLeftRecursions :: CFRules -> CFRules
removeDirectLeftRecursions = concat . flip evalState 0 . mapM removeDirectLeftRecursion
removeDirectLeftRecursion :: (Cat_,[CFRule_]) -- ^ All productions for a category
-> State Int CFRules
removeDirectLeftRecursion (a,rs)
| null dr = return [(a,rs)]
| otherwise =
do
a' <- fresh a
let as = maybeEndWithA' nr
is = [CFRule a' (tail r) n | CFRule _ r n <- dr]
a's = maybeEndWithA' is
-- the not null constraint here avoids creating new
-- left recursive (cyclic) rules.
maybeEndWithA' xs = xs ++ [CFRule c (r++[Cat a']) n | CFRule c r n <- xs,
not (null r)]
return [(a, as), (a', a's)]
where
(dr,nr) = partition isDirectLeftRecursive rs
fresh x = do { n <- get; put (n+1); return $ x ++ "-" ++ show n }
isDirectLeftRecursive :: CFRule_ -> Bool
isDirectLeftRecursive (CFRule c (Cat c':_) _) = c == c'
isDirectLeftRecursive _ = False
-}
-- | Get the sets of mutually recursive non-terminals for a grammar.
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
-- If false, only recursive categories will be included.
-> CFRules -> [Set Cat_]
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, Cat c' <- ss]
refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation
--
-- * Approximate context-free grammars with regular grammars.
--
-- Use the transformation algorithm from \"Regular Approximation of Context-free
-- Grammars through Approximation\", Mohri and Nederhof, 2000
-- to create an over-generating regular frammar for a context-free
-- grammar
makeRegular :: CFRules -> CFRules
makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
where trSet cs | allXLinear cs rs = rs
| otherwise = concatMap handleCat csl
where csl = Set.toList cs
rs = catSetRules g cs
handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
++ concatMap (makeRightLinearRules c) (catRules g c)
where c' = newCat c
makeRightLinearRules b' (CFRule c ss n) =
case ys of
[] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left
(Cat b:zs) -> newRule b' (xs ++ [Cat b]) n
++ makeRightLinearRules (newCat b) (CFRule c zs n)
where (xs,ys) = break (`catElem` cs) ss
-- don't add rules on the form A -> A
newRule c rhs n | rhs == [Cat c] = []
| otherwise = [CFRule c rhs n]
newCat c = c ++ "$"
--
-- * CFG rule utilities
--
-- | Group productions by their lhs categories
groupProds :: [CFRule_] -> CFRules
groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r))
allRules :: CFRules -> [CFRule_]
allRules = concat . map Set.toList . Map.elems
allRulesGrouped :: CFRules -> [(Cat_,[CFRule_])]
allRulesGrouped = Map.toList . Map.map Set.toList
allCats :: CFRules -> [Cat_]
allCats = Map.keys
catRules :: CFRules -> Cat_ -> [CFRule_]
catRules rs c = Set.toList $ Map.findWithDefault Set.empty c rs
catSetRules :: CFRules -> Set Cat_ -> [CFRule_]
catSetRules g cs = allRules $ Map.filterWithKey (\c _ -> c `Set.member` cs) g
cleanCFRules :: CFRules -> CFRules
cleanCFRules = Map.filter (not . Set.null)
unionCFRules :: CFRules -> CFRules -> CFRules
unionCFRules = Map.unionWith Set.union
filterCFRules :: (CFRule_ -> Bool) -> CFRules -> CFRules
filterCFRules p = cleanCFRules . Map.map (Set.filter p)
filterCFRulesCats :: (Cat_ -> Bool) -> CFRules -> CFRules
filterCFRulesCats p = Map.filterWithKey (\c _ -> p c)
countCats :: CFRules -> Int
countCats = Map.size . cleanCFRules
countRules :: CFRules -> Int
countRules = length . allRules
lhsCat :: CFRule c n t -> c
lhsCat (CFRule c _ _) = c
ruleRhs :: CFRule c n t -> [Symbol c t]
ruleRhs (CFRule _ ss _) = ss
ruleFun :: CFRule_ -> Fun
ruleFun (CFRule _ _ t) = f t
where f (CFObj n _) = n
f (CFApp _ x) = f x
f (CFAbs _ x) = f x
f _ = IC ""
-- | Checks if a symbol is a non-terminal of one of the given categories.
catElem :: Ord c => Symbol c t -> Set c -> Bool
catElem s cs = symbol (`Set.member` cs) (const False) s
-- | Check if any of the categories used on the right-hand side
-- are in the given list of categories.
anyUsedBy :: Eq c => [c] -> CFRule c n t -> Bool
anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
mkCFTerm :: String -> CFTerm
mkCFTerm n = CFObj (IC n) []
ruleIsNonRecursive :: Ord c => Set c -> CFRule c n t -> Bool
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
noCatsInSet cs = not . any (`catElem` cs)
-- | Check if all the rules are right-linear, or all the rules are
-- left-linear, with respect to given categories.
allXLinear :: Ord c => Set c -> [CFRule c n t] -> Bool
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-- | Checks if a context-free rule is right-linear.
isRightLinear :: Ord c =>
Set c -- ^ The categories to consider
-> CFRule c n t -- ^ The rule to check for right-linearity
-> Bool
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
-- | Checks if a context-free rule is left-linear.
isLeftLinear :: Ord c =>
Set c -- ^ The categories to consider
-> CFRule c n t -- ^ The rule to check for left-linearity
-> Bool
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
prCFRules :: CFRules -> String
prCFRules = unlines . map prRule . allRules
where
prRule r = lhsCat r ++ " --> " ++ unwords (map prSym (ruleRhs r))
prSym = symbol id (\t -> "\""++ t ++"\"")