forked from GitHub/gf-core
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
This commit is contained in:
265
src-3.0/GF/Speech/CFGToFiniteState.hs
Normal file
265
src-3.0/GF/Speech/CFGToFiniteState.hs
Normal file
@@ -0,0 +1,265 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : CFGToFiniteState
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/11/10 16:43:44 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.6 $
|
||||
--
|
||||
-- Approximates CFGs with finite state networks.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.CFGToFiniteState (cfgToFA, makeSimpleRegular,
|
||||
MFA(..), MFALabel, cfgToMFA,cfgToFA') where
|
||||
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
|
||||
import GF.Data.Utilities
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
|
||||
import GF.Conversion.Types
|
||||
import GF.Infra.Ident (Ident)
|
||||
import GF.Infra.Option (Options)
|
||||
import GF.Compile.ShellState (StateGrammar)
|
||||
|
||||
import GF.Speech.FiniteState
|
||||
import GF.Speech.Graph
|
||||
import GF.Speech.Relation
|
||||
import GF.Speech.TransformCFG
|
||||
|
||||
data Recursivity = RightR | LeftR | NotR
|
||||
|
||||
data MutRecSet = MutRecSet {
|
||||
mrCats :: Set Cat_,
|
||||
mrNonRecRules :: [CFRule_],
|
||||
mrRecRules :: [CFRule_],
|
||||
mrRec :: Recursivity
|
||||
}
|
||||
|
||||
|
||||
type MutRecSets = Map Cat_ MutRecSet
|
||||
|
||||
--
|
||||
-- * Multiple DFA type
|
||||
--
|
||||
|
||||
type MFALabel a = Symbol String a
|
||||
|
||||
data MFA a = MFA String [(String,DFA (MFALabel a))]
|
||||
|
||||
|
||||
|
||||
cfgToFA :: Options -> StateGrammar -> DFA Token
|
||||
cfgToFA opts s = minimize $ compileAutomaton start $ makeSimpleRegular opts s
|
||||
where start = getStartCatCF opts s
|
||||
|
||||
makeSimpleRegular :: Options -> StateGrammar -> CFRules
|
||||
makeSimpleRegular opts s = makeRegular $ preprocess $ cfgToCFRules s
|
||||
where start = getStartCatCF opts s
|
||||
preprocess = topDownFilter start . bottomUpFilter
|
||||
. removeCycles
|
||||
|
||||
|
||||
--
|
||||
-- * Compile strongly regular grammars to NFAs
|
||||
--
|
||||
|
||||
-- Convert a strongly regular grammar to a finite automaton.
|
||||
compileAutomaton :: Cat_ -- ^ Start category
|
||||
-> CFRules
|
||||
-> NFA Token
|
||||
compileAutomaton start g = make_fa (g,ns) s [Cat start] f fa
|
||||
where
|
||||
(fa,s,f) = newFA_
|
||||
ns = mutRecSets g $ mutRecCats False g
|
||||
|
||||
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
||||
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000.
|
||||
make_fa :: (CFRules,MutRecSets) -> State -> [Symbol Cat_ Token] -> State
|
||||
-> NFA Token -> NFA Token
|
||||
make_fa c@(g,ns) q0 alpha q1 fa =
|
||||
case alpha of
|
||||
[] -> newTransition q0 q1 Nothing fa
|
||||
[Tok t] -> newTransition q0 q1 (Just t) fa
|
||||
[Cat a] -> case Map.lookup a ns of
|
||||
-- a is recursive
|
||||
Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
|
||||
case mrRec n of
|
||||
RightR ->
|
||||
-- the set Ni is right-recursive or cyclic
|
||||
let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
|
||||
++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
|
||||
let (xs,Cat d) = (init ss,last ss)]
|
||||
in make_fas new $ newTransition q0 (getState a) Nothing fa'
|
||||
LeftR ->
|
||||
-- the set Ni is left-recursive
|
||||
let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
|
||||
++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- rs]
|
||||
in make_fas new $ newTransition (getState a) q1 Nothing fa'
|
||||
where
|
||||
(fa',stateMap) = addStatesForCats ni fa
|
||||
getState x = Map.findWithDefault
|
||||
(error $ "CFGToFiniteState: No state for " ++ x)
|
||||
x stateMap
|
||||
-- a is not recursive
|
||||
Nothing -> let rs = catRules g a
|
||||
in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs
|
||||
(x:beta) -> let (fa',q) = newState () fa
|
||||
in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa'
|
||||
where
|
||||
make_fa_ = make_fa c
|
||||
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
|
||||
|
||||
--
|
||||
-- * Compile a strongly regular grammar to a DFA with sub-automata
|
||||
--
|
||||
|
||||
cfgToMFA :: Options -> StateGrammar -> MFA Token
|
||||
cfgToMFA opts s = buildMFA start $ makeSimpleRegular opts s
|
||||
where start = getStartCatCF opts s
|
||||
|
||||
-- | Build a DFA by building and expanding an MFA
|
||||
cfgToFA' :: Options -> StateGrammar -> DFA Token
|
||||
cfgToFA' opts s = mfaToDFA $ cfgToMFA opts s
|
||||
|
||||
buildMFA :: Cat_ -- ^ Start category
|
||||
-> CFRules -> MFA Token
|
||||
buildMFA start g = sortSubLats $ removeUnusedSubLats mfa
|
||||
where fas = compileAutomata g
|
||||
mfa = MFA start [(c, minimize fa) | (c,fa) <- fas]
|
||||
|
||||
mfaStartDFA :: MFA a -> DFA (MFALabel a)
|
||||
mfaStartDFA (MFA start subs) =
|
||||
fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs
|
||||
|
||||
mfaToDFA :: Ord a => MFA a -> DFA a
|
||||
mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa
|
||||
where
|
||||
subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs]
|
||||
getSub l = fromJust $ Map.lookup l subs'
|
||||
expand (FA (Graph c ns es) s f)
|
||||
= foldl' expandEdge (FA (Graph c ns []) s f) es
|
||||
expandEdge fa (f,t,x) =
|
||||
case x of
|
||||
Nothing -> newTransition f t Nothing fa
|
||||
Just (Tok s) -> newTransition f t (Just s) fa
|
||||
Just (Cat l) -> insertNFA fa (f,t) (expand $ getSub l)
|
||||
|
||||
removeUnusedSubLats :: MFA a -> MFA a
|
||||
removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c]
|
||||
where
|
||||
usedMap = subLatUseMap mfa
|
||||
used = growUsedSet (Set.singleton start)
|
||||
isUsed c = c `Set.member` used
|
||||
growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s)
|
||||
|
||||
subLatUseMap :: MFA a -> Map String (Set String)
|
||||
subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs]
|
||||
|
||||
usedSubLats :: DFA (MFALabel a) -> Set String
|
||||
usedSubLats fa = Set.fromList [s | (_,_,Cat s) <- transitions fa]
|
||||
|
||||
revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
|
||||
revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s]
|
||||
|
||||
-- | Sort sub-networks topologically.
|
||||
sortSubLats :: MFA a -> MFA a
|
||||
sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs)
|
||||
where
|
||||
usedByMap = revMultiMap (subLatUseMap mfa)
|
||||
sortLats _ [] = []
|
||||
sortLats ub ls = xs ++ sortLats ub' ys
|
||||
where (xs,ys) = partition ((==0) . indeg) ls
|
||||
ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub
|
||||
indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub
|
||||
|
||||
-- | Convert a strongly regular grammar to a number of finite automata,
|
||||
-- one for each non-terminal.
|
||||
-- The edges in the automata accept tokens, or name another automaton to use.
|
||||
compileAutomata :: CFRules
|
||||
-> [(Cat_,NFA (Symbol Cat_ Token))]
|
||||
-- ^ A map of non-terminals and their automata.
|
||||
compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
|
||||
where
|
||||
mrs = mutRecSets g $ mutRecCats True g
|
||||
makeOneFA c = make_fa1 mr s [Cat c] f fa
|
||||
where (fa,s,f) = newFA_
|
||||
mr = fromJust (Map.lookup c mrs)
|
||||
|
||||
|
||||
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
||||
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000,
|
||||
-- adapted to build a finite automaton for a single (mutually recursive) set only.
|
||||
-- Categories not in the set will result in category-labelled edges.
|
||||
make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
|
||||
-- we are building the automaton.
|
||||
-> State -- ^ State to come from
|
||||
-> [Symbol Cat_ Token] -- ^ Symbols to accept
|
||||
-> State -- ^ State to end up in
|
||||
-> NFA (Symbol Cat_ Token) -- ^ FA to add to.
|
||||
-> NFA (Symbol Cat_ Token)
|
||||
make_fa1 mr q0 alpha q1 fa =
|
||||
case alpha of
|
||||
[] -> newTransition q0 q1 Nothing fa
|
||||
[t@(Tok _)] -> newTransition q0 q1 (Just t) fa
|
||||
[c@(Cat a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
|
||||
[Cat a] ->
|
||||
case mrRec mr of
|
||||
NotR -> -- the set is a non-recursive (always singleton) set of categories
|
||||
-- so the set of category rules is the set of rules for the whole set
|
||||
make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
|
||||
RightR -> -- the set is right-recursive or cyclic
|
||||
let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
|
||||
++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr,
|
||||
let (xs,Cat d) = (init ss,last ss)]
|
||||
in make_fas new $ newTransition q0 (getState a) Nothing fa'
|
||||
LeftR -> -- the set is left-recursive
|
||||
let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
|
||||
++ [(getState d, xs, getState c) | CFRule c (Cat d:xs) _ <- mrRecRules mr]
|
||||
in make_fas new $ newTransition (getState a) q1 Nothing fa'
|
||||
where
|
||||
(fa',stateMap) = addStatesForCats (mrCats mr) fa
|
||||
getState x = Map.findWithDefault
|
||||
(error $ "CFGToFiniteState: No state for " ++ x)
|
||||
x stateMap
|
||||
(x:beta) -> let (fa',q) = newState () fa
|
||||
in make_fas [(q0,[x],q),(q,beta,q1)] fa'
|
||||
where
|
||||
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs
|
||||
|
||||
mutRecSets :: CFRules -> [Set Cat_] -> MutRecSets
|
||||
mutRecSets g = Map.fromList . concatMap mkMutRecSet
|
||||
where
|
||||
mkMutRecSet cs = [ (c,ms) | c <- csl ]
|
||||
where csl = Set.toList cs
|
||||
rs = catSetRules g cs
|
||||
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
|
||||
ms = MutRecSet {
|
||||
mrCats = cs,
|
||||
mrNonRecRules = nrs,
|
||||
mrRecRules = rrs,
|
||||
mrRec = rec
|
||||
}
|
||||
rec | null rrs = NotR
|
||||
| all (isRightLinear cs) rrs = RightR
|
||||
| otherwise = LeftR
|
||||
|
||||
--
|
||||
-- * Utilities
|
||||
--
|
||||
|
||||
-- | Add a state for the given NFA for each of the categories
|
||||
-- in the given set. Returns a map of categories to their
|
||||
-- corresponding states.
|
||||
addStatesForCats :: Set Cat_ -> NFA t -> (NFA t, Map Cat_ State)
|
||||
addStatesForCats cs fa = (fa', m)
|
||||
where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
|
||||
m = Map.fromList (zip (Set.toList cs) (map fst ns))
|
||||
329
src-3.0/GF/Speech/FiniteState.hs
Normal file
329
src-3.0/GF/Speech/FiniteState.hs
Normal file
@@ -0,0 +1,329 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : FiniteState
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/11/10 16:43:44 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.16 $
|
||||
--
|
||||
-- A simple finite state network module.
|
||||
-----------------------------------------------------------------------------
|
||||
module GF.Speech.FiniteState (FA(..), State, NFA, DFA,
|
||||
startState, finalStates,
|
||||
states, transitions,
|
||||
isInternal,
|
||||
newFA, newFA_,
|
||||
addFinalState,
|
||||
newState, newStates,
|
||||
newTransition, newTransitions,
|
||||
insertTransitionWith, insertTransitionsWith,
|
||||
mapStates, mapTransitions,
|
||||
modifyTransitions,
|
||||
nonLoopTransitionsTo, nonLoopTransitionsFrom,
|
||||
loops,
|
||||
removeState,
|
||||
oneFinalState,
|
||||
insertNFA,
|
||||
onGraph,
|
||||
moveLabelsToNodes, removeTrivialEmptyNodes,
|
||||
minimize,
|
||||
dfa2nfa,
|
||||
unusedNames, renameStates,
|
||||
prFAGraphviz, faToGraphviz) where
|
||||
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
|
||||
import GF.Data.Utilities
|
||||
import GF.Speech.Graph
|
||||
import qualified GF.Visualization.Graphviz as Dot
|
||||
|
||||
type State = Int
|
||||
|
||||
-- | Type parameters: node id type, state label type, edge label type
|
||||
-- Data constructor arguments: nodes and edges, start state, final states
|
||||
data FA n a b = FA !(Graph n a b) !n ![n]
|
||||
|
||||
type NFA a = FA State () (Maybe a)
|
||||
|
||||
type DFA a = FA State () a
|
||||
|
||||
|
||||
startState :: FA n a b -> n
|
||||
startState (FA _ s _) = s
|
||||
|
||||
finalStates :: FA n a b -> [n]
|
||||
finalStates (FA _ _ ss) = ss
|
||||
|
||||
states :: FA n a b -> [(n,a)]
|
||||
states (FA g _ _) = nodes g
|
||||
|
||||
transitions :: FA n a b -> [(n,n,b)]
|
||||
transitions (FA g _ _) = edges g
|
||||
|
||||
newFA :: Enum n => a -- ^ Start node label
|
||||
-> FA n a b
|
||||
newFA l = FA g s []
|
||||
where (g,s) = newNode l (newGraph [toEnum 0..])
|
||||
|
||||
-- | Create a new finite automaton with an initial and a final state.
|
||||
newFA_ :: Enum n => (FA n () b, n, n)
|
||||
newFA_ = (fa'', s, f)
|
||||
where fa = newFA ()
|
||||
s = startState fa
|
||||
(fa',f) = newState () fa
|
||||
fa'' = addFinalState f fa'
|
||||
|
||||
addFinalState :: n -> FA n a b -> FA n a b
|
||||
addFinalState f (FA g s ss) = FA g s (f:ss)
|
||||
|
||||
newState :: a -> FA n a b -> (FA n a b, n)
|
||||
newState x (FA g s ss) = (FA g' s ss, n)
|
||||
where (g',n) = newNode x g
|
||||
|
||||
newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)])
|
||||
newStates xs (FA g s ss) = (FA g' s ss, ns)
|
||||
where (g',ns) = newNodes xs g
|
||||
|
||||
newTransition :: n -> n -> b -> FA n a b -> FA n a b
|
||||
newTransition f t l = onGraph (newEdge (f,t,l))
|
||||
|
||||
newTransitions :: [(n, n, b)] -> FA n a b -> FA n a b
|
||||
newTransitions es = onGraph (newEdges es)
|
||||
|
||||
insertTransitionWith :: Eq n =>
|
||||
(b -> b -> b) -> (n, n, b) -> FA n a b -> FA n a b
|
||||
insertTransitionWith f t = onGraph (insertEdgeWith f t)
|
||||
|
||||
insertTransitionsWith :: Eq n =>
|
||||
(b -> b -> b) -> [(n, n, b)] -> FA n a b -> FA n a b
|
||||
insertTransitionsWith f ts fa =
|
||||
foldl' (flip (insertTransitionWith f)) fa ts
|
||||
|
||||
mapStates :: (a -> c) -> FA n a b -> FA n c b
|
||||
mapStates f = onGraph (nmap f)
|
||||
|
||||
mapTransitions :: (b -> c) -> FA n a b -> FA n a c
|
||||
mapTransitions f = onGraph (emap f)
|
||||
|
||||
modifyTransitions :: ([(n,n,b)] -> [(n,n,b)]) -> FA n a b -> FA n a b
|
||||
modifyTransitions f = onGraph (\ (Graph r ns es) -> Graph r ns (f es))
|
||||
|
||||
removeState :: Ord n => n -> FA n a b -> FA n a b
|
||||
removeState n = onGraph (removeNode n)
|
||||
|
||||
minimize :: Ord a => NFA a -> DFA a
|
||||
minimize = determinize . reverseNFA . dfa2nfa . determinize . reverseNFA
|
||||
|
||||
unusedNames :: FA n a b -> [n]
|
||||
unusedNames (FA (Graph names _ _) _ _) = names
|
||||
|
||||
-- | Gets all incoming transitions to a given state, excluding
|
||||
-- transtions from the state itself.
|
||||
nonLoopTransitionsTo :: Eq n => n -> FA n a b -> [(n,b)]
|
||||
nonLoopTransitionsTo s fa =
|
||||
[(f,l) | (f,t,l) <- transitions fa, t == s && f /= s]
|
||||
|
||||
nonLoopTransitionsFrom :: Eq n => n -> FA n a b -> [(n,b)]
|
||||
nonLoopTransitionsFrom s fa =
|
||||
[(t,l) | (f,t,l) <- transitions fa, f == s && t /= s]
|
||||
|
||||
loops :: Eq n => n -> FA n a b -> [b]
|
||||
loops s fa = [l | (f,t,l) <- transitions fa, f == s && t == s]
|
||||
|
||||
-- | Give new names to all nodes.
|
||||
renameStates :: Ord x => [y] -- ^ Infinite supply of new names
|
||||
-> FA x a b
|
||||
-> FA y a b
|
||||
renameStates supply (FA g s fs) = FA (renameNodes newName rest g) s' fs'
|
||||
where (ns,rest) = splitAt (length (nodes g)) supply
|
||||
newNodes = Map.fromList (zip (map fst (nodes g)) ns)
|
||||
newName n = Map.findWithDefault (error "FiniteState.newName") n newNodes
|
||||
s' = newName s
|
||||
fs' = map newName fs
|
||||
|
||||
-- | Insert an NFA into another
|
||||
insertNFA :: NFA a -- ^ NFA to insert into
|
||||
-> (State, State) -- ^ States to insert between
|
||||
-> NFA a -- ^ NFA to insert.
|
||||
-> NFA a
|
||||
insertNFA (FA g1 s1 fs1) (f,t) (FA g2 s2 fs2)
|
||||
= FA (newEdges es g') s1 fs1
|
||||
where
|
||||
es = (f,ren s2,Nothing):[(ren f2,t,Nothing) | f2 <- fs2]
|
||||
(g',ren) = mergeGraphs g1 g2
|
||||
|
||||
onGraph :: (Graph n a b -> Graph n c d) -> FA n a b -> FA n c d
|
||||
onGraph f (FA g s ss) = FA (f g) s ss
|
||||
|
||||
|
||||
-- | Make the finite automaton have a single final state
|
||||
-- by adding a new final state and adding an edge
|
||||
-- from the old final states to the new state.
|
||||
oneFinalState :: a -- ^ Label to give the new node
|
||||
-> b -- ^ Label to give the new edges
|
||||
-> FA n a b -- ^ The old network
|
||||
-> FA n a b -- ^ The new network
|
||||
oneFinalState nl el fa =
|
||||
let (FA g s fs,nf) = newState nl fa
|
||||
es = [ (f,nf,el) | f <- fs ]
|
||||
in FA (newEdges es g) s [nf]
|
||||
|
||||
-- | Transform a standard finite automaton with labelled edges
|
||||
-- to one where the labels are on the nodes instead. This can add
|
||||
-- up to one extra node per edge.
|
||||
moveLabelsToNodes :: (Ord n,Eq a) => FA n () (Maybe a) -> FA n (Maybe a) ()
|
||||
moveLabelsToNodes = onGraph f
|
||||
where f g@(Graph c _ _) = Graph c' ns (concat ess)
|
||||
where is = [ ((n,l),inc) | (n, (l,inc,_)) <- Map.toList (nodeInfo g)]
|
||||
(c',is') = mapAccumL fixIncoming c is
|
||||
(ns,ess) = unzip (concat is')
|
||||
|
||||
|
||||
-- | Remove empty nodes which are not start or final, and have
|
||||
-- exactly one outgoing edge or exactly one incoming edge.
|
||||
removeTrivialEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
|
||||
removeTrivialEmptyNodes = pruneUnusable . skipSimpleEmptyNodes
|
||||
|
||||
-- | Move edges to empty nodes to point to the next node(s).
|
||||
-- This is not done if the pointed-to node is a final node.
|
||||
skipSimpleEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
|
||||
skipSimpleEmptyNodes fa = onGraph og fa
|
||||
where
|
||||
og g@(Graph c ns es) = if es' == es then g else og (Graph c ns es')
|
||||
where
|
||||
es' = concatMap changeEdge es
|
||||
info = nodeInfo g
|
||||
changeEdge e@(f,t,())
|
||||
| isNothing (getNodeLabel info t)
|
||||
-- && (i * o <= i + o)
|
||||
&& not (isFinal fa t)
|
||||
= [ (f,t',()) | (_,t',()) <- getOutgoing info t]
|
||||
| otherwise = [e]
|
||||
-- where i = inDegree info t
|
||||
-- o = outDegree info t
|
||||
|
||||
isInternal :: Eq n => FA n a b -> n -> Bool
|
||||
isInternal (FA _ start final) n = n /= start && n `notElem` final
|
||||
|
||||
isFinal :: Eq n => FA n a b -> n -> Bool
|
||||
isFinal (FA _ _ final) n = n `elem` final
|
||||
|
||||
-- | Remove all internal nodes with no incoming edges
|
||||
-- or no outgoing edges.
|
||||
pruneUnusable :: Ord n => FA n (Maybe a) () -> FA n (Maybe a) ()
|
||||
pruneUnusable fa = onGraph f fa
|
||||
where
|
||||
f g = if Set.null rns then g else f (removeNodes rns g)
|
||||
where info = nodeInfo g
|
||||
rns = Set.fromList [ n | (n,_) <- nodes g,
|
||||
isInternal fa n,
|
||||
inDegree info n == 0
|
||||
|| outDegree info n == 0]
|
||||
|
||||
fixIncoming :: (Ord n, Eq a) => [n]
|
||||
-> (Node n (),[Edge n (Maybe a)]) -- ^ A node and its incoming edges
|
||||
-> ([n],[(Node n (Maybe a),[Edge n ()])]) -- ^ Replacement nodes with their
|
||||
-- incoming edges.
|
||||
fixIncoming cs c@((n,()),es) = (cs'', ((n,Nothing),es'):newContexts)
|
||||
where ls = nub $ map edgeLabel es
|
||||
(cs',cs'') = splitAt (length ls) cs
|
||||
newNodes = zip cs' ls
|
||||
es' = [ (x,n,()) | x <- map fst newNodes ]
|
||||
-- separate cyclic and non-cyclic edges
|
||||
(cyc,ncyc) = partition (\ (f,_,_) -> f == n) es
|
||||
-- keep all incoming non-cyclic edges with the right label
|
||||
to (x,l) = [ (f,x,()) | (f,_,l') <- ncyc, l == l']
|
||||
-- for each cyclic edge with the right label,
|
||||
-- add an edge from each of the new nodes (including this one)
|
||||
++ [ (y,x,()) | (f,_,l') <- cyc, l == l', (y,_) <- newNodes]
|
||||
newContexts = [ (v, to v) | v <- newNodes ]
|
||||
|
||||
alphabet :: Eq b => Graph n a (Maybe b) -> [b]
|
||||
alphabet = nub . catMaybes . map edgeLabel . edges
|
||||
|
||||
determinize :: Ord a => NFA a -> DFA a
|
||||
determinize (FA g s f) = let (ns,es) = h (Set.singleton start) Set.empty Set.empty
|
||||
(ns',es') = (Set.toList ns, Set.toList es)
|
||||
final = filter isDFAFinal ns'
|
||||
fa = FA (Graph undefined [(n,()) | n <- ns'] es') start final
|
||||
in renameStates [0..] fa
|
||||
where info = nodeInfo g
|
||||
-- reach = nodesReachable out
|
||||
start = closure info $ Set.singleton s
|
||||
isDFAFinal n = not (Set.null (Set.fromList f `Set.intersection` n))
|
||||
h currentStates oldStates es
|
||||
| Set.null currentStates = (oldStates,es)
|
||||
| otherwise = ((h $! uniqueNewStates) $! allOldStates) $! es'
|
||||
where
|
||||
allOldStates = oldStates `Set.union` currentStates
|
||||
(newStates,es') = new (Set.toList currentStates) Set.empty es
|
||||
uniqueNewStates = newStates Set.\\ allOldStates
|
||||
-- Get the sets of states reachable from the given states
|
||||
-- by consuming one symbol, and the associated edges.
|
||||
new [] rs es = (rs,es)
|
||||
new (n:ns) rs es = new ns rs' es'
|
||||
where cs = reachable info n --reachable reach n
|
||||
rs' = rs `Set.union` Set.fromList (map snd cs)
|
||||
es' = es `Set.union` Set.fromList [(n,s,c) | (c,s) <- cs]
|
||||
|
||||
|
||||
-- | Get all the nodes reachable from a list of nodes by only empty edges.
|
||||
closure :: Ord n => NodeInfo n a (Maybe b) -> Set n -> Set n
|
||||
closure info x = closure_ x x
|
||||
where closure_ acc check | Set.null check = acc
|
||||
| otherwise = closure_ acc' check'
|
||||
where
|
||||
reach = Set.fromList [y | x <- Set.toList check,
|
||||
(_,y,Nothing) <- getOutgoing info x]
|
||||
acc' = acc `Set.union` reach
|
||||
check' = reach Set.\\ acc
|
||||
|
||||
-- | Get a map of labels to sets of all nodes reachable
|
||||
-- from a the set of nodes by one edge with the given
|
||||
-- label and then any number of empty edges.
|
||||
reachable :: (Ord n,Ord b) => NodeInfo n a (Maybe b) -> Set n -> [(b,Set n)]
|
||||
reachable info ns = Map.toList $ Map.map (closure info . Set.fromList) $ reachable1 info ns
|
||||
reachable1 info ns = Map.fromListWith (++) [(c, [y]) | n <- Set.toList ns, (_,y,Just c) <- getOutgoing info n]
|
||||
|
||||
reverseNFA :: NFA a -> NFA a
|
||||
reverseNFA (FA g s fs) = FA g''' s' [s]
|
||||
where g' = reverseGraph g
|
||||
(g'',s') = newNode () g'
|
||||
g''' = newEdges [(s',f,Nothing) | f <- fs] g''
|
||||
|
||||
dfa2nfa :: DFA a -> NFA a
|
||||
dfa2nfa = mapTransitions Just
|
||||
|
||||
--
|
||||
-- * Visualization
|
||||
--
|
||||
|
||||
prFAGraphviz :: (Eq n,Show n) => FA n String String -> String
|
||||
prFAGraphviz = Dot.prGraphviz . faToGraphviz
|
||||
|
||||
prFAGraphviz_ :: (Eq n,Show n,Show a, Show b) => FA n a b -> String
|
||||
prFAGraphviz_ = Dot.prGraphviz . faToGraphviz . mapStates show . mapTransitions show
|
||||
|
||||
faToGraphviz :: (Eq n,Show n) => FA n String String -> Dot.Graph
|
||||
faToGraphviz (FA (Graph _ ns es) s f)
|
||||
= Dot.Graph Dot.Directed Nothing [] (map mkNode ns) (map mkEdge es) []
|
||||
where mkNode (n,l) = Dot.Node (show n) attrs
|
||||
where attrs = [("label",l)]
|
||||
++ if n == s then [("shape","box")] else []
|
||||
++ if n `elem` f then [("style","bold")] else []
|
||||
mkEdge (x,y,l) = Dot.Edge (show x) (show y) [("label",l)]
|
||||
|
||||
--
|
||||
-- * Utilities
|
||||
--
|
||||
|
||||
lookups :: Ord k => [k] -> Map k a -> [a]
|
||||
lookups xs m = mapMaybe (flip Map.lookup m) xs
|
||||
285
src-3.0/GF/Speech/GrammarToVoiceXML.hs
Normal file
285
src-3.0/GF/Speech/GrammarToVoiceXML.hs
Normal file
@@ -0,0 +1,285 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : GrammarToVoiceXML
|
||||
-- Maintainer : Bjorn Bringert
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- Create VoiceXML dialogue system from a GF grammar.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.GrammarToVoiceXML (grammar2vxml) where
|
||||
|
||||
import GF.Canon.CanonToGFCC (canon2gfcc)
|
||||
import qualified GF.GFCC.CId as C
|
||||
import GF.GFCC.DataGFCC (GFCC(..), Abstr(..))
|
||||
import GF.GFCC.Macros
|
||||
import qualified GF.Canon.GFC as GFC
|
||||
import GF.Canon.AbsGFC (Term)
|
||||
import GF.Canon.PrintGFC (printTree)
|
||||
import GF.Canon.CMacros (noMark, strsFromTerm)
|
||||
import GF.Canon.Unlex (formatAsText)
|
||||
import GF.Data.Utilities
|
||||
import GF.CF.CFIdent (cfCat2Ident)
|
||||
import GF.Compile.ShellState (StateGrammar,stateGrammarST,cncId,grammar,
|
||||
startCatStateOpts,stateOptions)
|
||||
import GF.Data.Str (sstrV)
|
||||
import GF.Grammar.Macros hiding (assign,strsFromTerm)
|
||||
import GF.Grammar.Grammar (Fun)
|
||||
import GF.Grammar.Values (Tree)
|
||||
import GF.Infra.Option (Options, addOptions, getOptVal, speechLanguage)
|
||||
import GF.UseGrammar.GetTree (string2treeErr)
|
||||
import GF.UseGrammar.Linear (linTree2strings)
|
||||
|
||||
import GF.Infra.Ident
|
||||
import GF.Infra.Option (noOptions)
|
||||
import GF.Infra.Modules
|
||||
import GF.Data.Operations
|
||||
|
||||
import GF.Data.XML
|
||||
|
||||
import Control.Monad (liftM)
|
||||
import Data.List (isPrefixOf, find, intersperse)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Maybe (fromMaybe)
|
||||
|
||||
import Debug.Trace
|
||||
|
||||
-- | the main function
|
||||
grammar2vxml :: Options -> StateGrammar -> String
|
||||
grammar2vxml opt s = showsXMLDoc (skel2vxml name language startcat gr' qs) ""
|
||||
where (_, gr') = vSkeleton (stateGrammarST s)
|
||||
name = prIdent (cncId s)
|
||||
qs = catQuestions s (map fst gr')
|
||||
opts = addOptions opt (stateOptions s)
|
||||
language = fmap (replace '_' '-') $ getOptVal opts speechLanguage
|
||||
startcat = C.CId $ prIdent $ cfCat2Ident $ startCatStateOpts opts s
|
||||
|
||||
--
|
||||
-- * VSkeleton: a simple description of the abstract syntax.
|
||||
--
|
||||
|
||||
type VSkeleton = [(VIdent, [(VIdent, [VIdent])])]
|
||||
type VIdent = C.CId
|
||||
|
||||
prid :: VIdent -> String
|
||||
prid (C.CId x) = x
|
||||
|
||||
vSkeleton :: GFC.CanonGrammar -> (VIdent,VSkeleton)
|
||||
vSkeleton = gfccSkeleton . canon2gfcc noOptions
|
||||
|
||||
gfccSkeleton :: GFCC -> (VIdent,VSkeleton)
|
||||
gfccSkeleton gfcc = (absname gfcc, ts)
|
||||
where a = abstract gfcc
|
||||
ts = [(c,[(f,ft f) | f <- fs]) | (c,fs) <- Map.toList (catfuns a)]
|
||||
ft f = case lookMap (error $ prid f) f (funs a) of
|
||||
(ty,_) -> fst $ GF.GFCC.Macros.catSkeleton ty
|
||||
|
||||
--
|
||||
-- * Questions to ask
|
||||
--
|
||||
|
||||
type CatQuestions = [(VIdent,String)]
|
||||
|
||||
catQuestions :: StateGrammar -> [VIdent] -> CatQuestions
|
||||
catQuestions gr cats = [(c,catQuestion gr c) | c <- cats]
|
||||
|
||||
catQuestion :: StateGrammar -> VIdent -> String
|
||||
catQuestion gr cat = err errHandler id (getPrintname gr cat >>= term2string)
|
||||
where -- FIXME: use some better warning facility
|
||||
errHandler e = trace ("GrammarToVoiceXML: " ++ e) ("quest_"++prid cat)
|
||||
term2string = liftM sstrV . strsFromTerm
|
||||
|
||||
getPrintname :: StateGrammar -> VIdent -> Err Term
|
||||
getPrintname gr cat =
|
||||
do m <- lookupModMod (grammar gr) (cncId gr)
|
||||
i <- lookupInfo m (IC (prid cat))
|
||||
case i of
|
||||
GFC.CncCat _ _ p -> return p
|
||||
_ -> fail $ "getPrintname " ++ prid cat
|
||||
++ ": Expected CncCat, got " ++ show i
|
||||
|
||||
|
||||
{-
|
||||
lin :: StateGrammar -> String -> Err String
|
||||
lin gr fun = do
|
||||
tree <- string2treeErr gr fun
|
||||
let ls = map unt $ linTree2strings noMark g c tree
|
||||
case ls of
|
||||
[] -> fail $ "No linearization of " ++ fun
|
||||
l:_ -> return l
|
||||
where c = cncId gr
|
||||
g = stateGrammarST gr
|
||||
unt = formatAsText
|
||||
-}
|
||||
|
||||
getCatQuestion :: VIdent -> CatQuestions -> String
|
||||
getCatQuestion c qs =
|
||||
fromMaybe (error "No question for category " ++ prid c) (lookup c qs)
|
||||
|
||||
--
|
||||
-- * Generate VoiceXML
|
||||
--
|
||||
|
||||
skel2vxml :: String -> Maybe String -> VIdent -> VSkeleton -> CatQuestions -> XML
|
||||
skel2vxml name language start skel qs =
|
||||
vxml language ([startForm] ++ concatMap (uncurry (catForms gr qs)) skel)
|
||||
where
|
||||
gr = grammarURI name
|
||||
startForm = Tag "form" [] [subdialog "sub" [("src", "#"++catFormId start)]
|
||||
[param "old" "{ name : '?' }"]]
|
||||
|
||||
grammarURI :: String -> String
|
||||
grammarURI name = name ++ ".grxml"
|
||||
|
||||
|
||||
catForms :: String -> CatQuestions -> VIdent -> [(VIdent, [VIdent])] -> [XML]
|
||||
catForms gr qs cat fs =
|
||||
comments [prid cat ++ " category."]
|
||||
++ [cat2form gr qs cat fs]
|
||||
|
||||
cat2form :: String -> CatQuestions -> VIdent -> [(VIdent, [VIdent])] -> XML
|
||||
cat2form gr qs cat fs =
|
||||
form (catFormId cat) $
|
||||
[var "old" Nothing,
|
||||
blockCond "old.name != '?'" [assign "term" "old"],
|
||||
field "term" []
|
||||
[promptString (getCatQuestion cat qs),
|
||||
vxmlGrammar (gr++"#"++catFormId cat)
|
||||
]
|
||||
]
|
||||
++ concatMap (uncurry (fun2sub gr cat)) fs
|
||||
++ [block [return_ ["term"]{-]-}]]
|
||||
|
||||
fun2sub :: String -> VIdent -> VIdent -> [VIdent] -> [XML]
|
||||
fun2sub gr cat fun args =
|
||||
comments [prid fun ++ " : ("
|
||||
++ concat (intersperse ", " (map prid args))
|
||||
++ ") " ++ prid cat] ++ ss
|
||||
where
|
||||
ss = zipWith mkSub [0..] args
|
||||
mkSub n t = subdialog s [("src","#"++catFormId t),
|
||||
("cond","term.name == "++string (prid fun))]
|
||||
[param "old" v,
|
||||
filled [] [assign v (s++".term")]]
|
||||
where s = prid fun ++ "_" ++ show n
|
||||
v = "term.args["++show n++"]"
|
||||
|
||||
catFormId :: VIdent -> String
|
||||
catFormId c = prid c ++ "_cat"
|
||||
|
||||
|
||||
--
|
||||
-- * VoiceXML stuff
|
||||
--
|
||||
|
||||
vxml :: Maybe String -> [XML] -> XML
|
||||
vxml ml = Tag "vxml" $ [("version","2.0"),
|
||||
("xmlns","http://www.w3.org/2001/vxml")]
|
||||
++ maybe [] (\l -> [("xml:lang", l)]) ml
|
||||
|
||||
form :: String -> [XML] -> XML
|
||||
form id xs = Tag "form" [("id", id)] xs
|
||||
|
||||
field :: String -> [(String,String)] -> [XML] -> XML
|
||||
field name attrs = Tag "field" ([("name",name)]++attrs)
|
||||
|
||||
subdialog :: String -> [(String,String)] -> [XML] -> XML
|
||||
subdialog name attrs = Tag "subdialog" ([("name",name)]++attrs)
|
||||
|
||||
filled :: [(String,String)] -> [XML] -> XML
|
||||
filled = Tag "filled"
|
||||
|
||||
vxmlGrammar :: String -> XML
|
||||
vxmlGrammar uri = ETag "grammar" [("src",uri)]
|
||||
|
||||
prompt :: [XML] -> XML
|
||||
prompt = Tag "prompt" []
|
||||
|
||||
promptString :: String -> XML
|
||||
promptString p = prompt [Data p]
|
||||
|
||||
reprompt :: XML
|
||||
reprompt = ETag "reprompt" []
|
||||
|
||||
assign :: String -> String -> XML
|
||||
assign n e = ETag "assign" [("name",n),("expr",e)]
|
||||
|
||||
value :: String -> XML
|
||||
value expr = ETag "value" [("expr",expr)]
|
||||
|
||||
if_ :: String -> [XML] -> XML
|
||||
if_ c b = if_else c b []
|
||||
|
||||
if_else :: String -> [XML] -> [XML] -> XML
|
||||
if_else c t f = cond [(c,t)] f
|
||||
|
||||
cond :: [(String,[XML])] -> [XML] -> XML
|
||||
cond ((c,b):rest) els = Tag "if" [("cond",c)] (b ++ es)
|
||||
where es = [Tag "elseif" [("cond",c')] b' | (c',b') <- rest]
|
||||
++ if null els then [] else (Tag "else" [] []:els)
|
||||
|
||||
goto_item :: String -> XML
|
||||
goto_item nextitem = ETag "goto" [("nextitem",nextitem)]
|
||||
|
||||
return_ :: [String] -> XML
|
||||
return_ names = ETag "return" [("namelist", unwords names)]
|
||||
|
||||
block :: [XML] -> XML
|
||||
block = Tag "block" []
|
||||
|
||||
blockCond :: String -> [XML] -> XML
|
||||
blockCond cond = Tag "block" [("cond", cond)]
|
||||
|
||||
throw :: String -> String -> XML
|
||||
throw event msg = Tag "throw" [("event",event),("message",msg)] []
|
||||
|
||||
nomatch :: [XML] -> XML
|
||||
nomatch = Tag "nomatch" []
|
||||
|
||||
help :: [XML] -> XML
|
||||
help = Tag "help" []
|
||||
|
||||
param :: String -> String -> XML
|
||||
param name expr = ETag "param" [("name",name),("expr",expr)]
|
||||
|
||||
var :: String -> Maybe String -> XML
|
||||
var name expr = ETag "var" ([("name",name)]++e)
|
||||
where e = maybe [] ((:[]) . (,) "expr") expr
|
||||
|
||||
script :: String -> XML
|
||||
script s = Tag "script" [] [CData s]
|
||||
|
||||
scriptURI :: String -> XML
|
||||
scriptURI uri = Tag "script" [("uri", uri)] []
|
||||
|
||||
--
|
||||
-- * ECMAScript stuff
|
||||
--
|
||||
|
||||
string :: String -> String
|
||||
string s = "'" ++ concatMap esc s ++ "'"
|
||||
where esc '\'' = "\\'"
|
||||
esc c = [c]
|
||||
|
||||
{-
|
||||
--
|
||||
-- * List stuff
|
||||
--
|
||||
|
||||
isListCat :: (VIdent, [(VIdent, [VIdent])]) -> Bool
|
||||
isListCat (cat,rules) = "List" `isPrefixOf` prIdent cat && length rules == 2
|
||||
&& ("Base"++c) `elem` fs && ("Cons"++c) `elem` fs
|
||||
where c = drop 4 (prIdent cat)
|
||||
fs = map (prIdent . fst) rules
|
||||
|
||||
isBaseFun :: VIdent -> Bool
|
||||
isBaseFun f = "Base" `isPrefixOf` prIdent f
|
||||
|
||||
isConsFun :: VIdent -> Bool
|
||||
isConsFun f = "Cons" `isPrefixOf` prIdent f
|
||||
|
||||
baseSize :: (VIdent, [(VIdent, [VIdent])]) -> Int
|
||||
baseSize (_,rules) = length bs
|
||||
where Just (_,bs) = find (isBaseFun . fst) rules
|
||||
-}
|
||||
178
src-3.0/GF/Speech/Graph.hs
Normal file
178
src-3.0/GF/Speech/Graph.hs
Normal file
@@ -0,0 +1,178 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Graph
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/11/10 16:43:44 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
--
|
||||
-- A simple graph module.
|
||||
-----------------------------------------------------------------------------
|
||||
module GF.Speech.Graph ( Graph(..), Node, Edge, NodeInfo
|
||||
, newGraph, nodes, edges
|
||||
, nmap, emap, newNode, newNodes, newEdge, newEdges
|
||||
, insertEdgeWith
|
||||
, removeNode, removeNodes
|
||||
, nodeInfo
|
||||
, getIncoming, getOutgoing, getNodeLabel
|
||||
, inDegree, outDegree
|
||||
, nodeLabel
|
||||
, edgeFrom, edgeTo, edgeLabel
|
||||
, reverseGraph, mergeGraphs, renameNodes
|
||||
) where
|
||||
|
||||
import GF.Data.Utilities
|
||||
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
|
||||
data Graph n a b = Graph [n] ![Node n a] ![Edge n b]
|
||||
deriving (Eq,Show)
|
||||
|
||||
type Node n a = (n,a)
|
||||
type Edge n b = (n,n,b)
|
||||
|
||||
type NodeInfo n a b = Map n (a, [Edge n b], [Edge n b])
|
||||
|
||||
-- | Create a new empty graph.
|
||||
newGraph :: [n] -> Graph n a b
|
||||
newGraph ns = Graph ns [] []
|
||||
|
||||
-- | Get all the nodes in the graph.
|
||||
nodes :: Graph n a b -> [Node n a]
|
||||
nodes (Graph _ ns _) = ns
|
||||
|
||||
-- | Get all the edges in the graph.
|
||||
edges :: Graph n a b -> [Edge n b]
|
||||
edges (Graph _ _ es) = es
|
||||
|
||||
-- | Map a function over the node labels.
|
||||
nmap :: (a -> c) -> Graph n a b -> Graph n c b
|
||||
nmap f (Graph c ns es) = Graph c [(n,f l) | (n,l) <- ns] es
|
||||
|
||||
-- | Map a function over the edge labels.
|
||||
emap :: (b -> c) -> Graph n a b -> Graph n a c
|
||||
emap f (Graph c ns es) = Graph c ns [(x,y,f l) | (x,y,l) <- es]
|
||||
|
||||
-- | Add a node to the graph.
|
||||
newNode :: a -- ^ Node label
|
||||
-> Graph n a b
|
||||
-> (Graph n a b,n) -- ^ Node graph and name of new node
|
||||
newNode l (Graph (c:cs) ns es) = (Graph cs ((c,l):ns) es, c)
|
||||
|
||||
newNodes :: [a] -> Graph n a b -> (Graph n a b,[Node n a])
|
||||
newNodes ls g = (g', zip ns ls)
|
||||
where (g',ns) = mapAccumL (flip newNode) g ls
|
||||
-- lazy version:
|
||||
--newNodes ls (Graph cs ns es) = (Graph cs' (ns'++ns) es, ns')
|
||||
-- where (xs,cs') = splitAt (length ls) cs
|
||||
-- ns' = zip xs ls
|
||||
|
||||
newEdge :: Edge n b -> Graph n a b -> Graph n a b
|
||||
newEdge e (Graph c ns es) = Graph c ns (e:es)
|
||||
|
||||
newEdges :: [Edge n b] -> Graph n a b -> Graph n a b
|
||||
newEdges es g = foldl' (flip newEdge) g es
|
||||
-- lazy version:
|
||||
-- newEdges es' (Graph c ns es) = Graph c ns (es'++es)
|
||||
|
||||
insertEdgeWith :: Eq n =>
|
||||
(b -> b -> b) -> Edge n b -> Graph n a b -> Graph n a b
|
||||
insertEdgeWith f e@(x,y,l) (Graph c ns es) = Graph c ns (h es)
|
||||
where h [] = [e]
|
||||
h (e'@(x',y',l'):es') | x' == x && y' == y = (x',y', f l l'):es'
|
||||
| otherwise = e':h es'
|
||||
|
||||
-- | Remove a node and all edges to and from that node.
|
||||
removeNode :: Ord n => n -> Graph n a b -> Graph n a b
|
||||
removeNode n = removeNodes (Set.singleton n)
|
||||
|
||||
-- | Remove a set of nodes and all edges to and from those nodes.
|
||||
removeNodes :: Ord n => Set n -> Graph n a b -> Graph n a b
|
||||
removeNodes xs (Graph c ns es) = Graph c ns' es'
|
||||
where
|
||||
keepNode n = not (Set.member n xs)
|
||||
ns' = [ x | x@(n,_) <- ns, keepNode n ]
|
||||
es' = [ e | e@(f,t,_) <- es, keepNode f && keepNode t ]
|
||||
|
||||
-- | Get a map of node names to info about each node.
|
||||
nodeInfo :: Ord n => Graph n a b -> NodeInfo n a b
|
||||
nodeInfo g = Map.fromList [ (n, (x, fn inc n, fn out n)) | (n,x) <- nodes g ]
|
||||
where
|
||||
inc = groupEdgesBy edgeTo g
|
||||
out = groupEdgesBy edgeFrom g
|
||||
fn m n = fromMaybe [] (Map.lookup n m)
|
||||
|
||||
groupEdgesBy :: (Ord n) => (Edge n b -> n) -- ^ Gets the node to group by
|
||||
-> Graph n a b -> Map n [Edge n b]
|
||||
groupEdgesBy f g = Map.fromListWith (++) [(f e, [e]) | e <- edges g]
|
||||
|
||||
lookupNode :: Ord n => NodeInfo n a b -> n -> (a, [Edge n b], [Edge n b])
|
||||
lookupNode i n = fromJust $ Map.lookup n i
|
||||
|
||||
getIncoming :: Ord n => NodeInfo n a b -> n -> [Edge n b]
|
||||
getIncoming i n = let (_,inc,_) = lookupNode i n in inc
|
||||
|
||||
getOutgoing :: Ord n => NodeInfo n a b -> n -> [Edge n b]
|
||||
getOutgoing i n = let (_,_,out) = lookupNode i n in out
|
||||
|
||||
inDegree :: Ord n => NodeInfo n a b -> n -> Int
|
||||
inDegree i n = length $ getIncoming i n
|
||||
|
||||
outDegree :: Ord n => NodeInfo n a b -> n -> Int
|
||||
outDegree i n = length $ getOutgoing i n
|
||||
|
||||
getNodeLabel :: Ord n => NodeInfo n a b -> n -> a
|
||||
getNodeLabel i n = let (l,_,_) = lookupNode i n in l
|
||||
|
||||
nodeLabel :: Node n a -> a
|
||||
nodeLabel = snd
|
||||
|
||||
edgeFrom :: Edge n b -> n
|
||||
edgeFrom (f,_,_) = f
|
||||
|
||||
edgeTo :: Edge n b -> n
|
||||
edgeTo (_,t,_) = t
|
||||
|
||||
edgeLabel :: Edge n b -> b
|
||||
edgeLabel (_,_,l) = l
|
||||
|
||||
reverseGraph :: Graph n a b -> Graph n a b
|
||||
reverseGraph (Graph c ns es) = Graph c ns [ (t,f,l) | (f,t,l) <- es ]
|
||||
|
||||
-- | Add the nodes from the second graph to the first graph.
|
||||
-- The nodes in the second graph will be renamed using the name
|
||||
-- supply in the first graph.
|
||||
-- This function is more efficient when the second graph
|
||||
-- is smaller than the first.
|
||||
mergeGraphs :: Ord m => Graph n a b -> Graph m a b
|
||||
-> (Graph n a b, m -> n) -- ^ The new graph and a function translating
|
||||
-- the old names of nodes in the second graph
|
||||
-- to names in the new graph.
|
||||
mergeGraphs (Graph c ns1 es1) g2 = (Graph c' (ns2++ns1) (es2++es1), newName)
|
||||
where
|
||||
(xs,c') = splitAt (length (nodes g2)) c
|
||||
newNames = Map.fromList (zip (map fst (nodes g2)) xs)
|
||||
newName n = fromJust $ Map.lookup n newNames
|
||||
Graph _ ns2 es2 = renameNodes newName undefined g2
|
||||
|
||||
-- | Rename the nodes in the graph.
|
||||
renameNodes :: (n -> m) -- ^ renaming function
|
||||
-> [m] -- ^ infinite supply of fresh node names, to
|
||||
-- use when adding nodes in the future.
|
||||
-> Graph n a b -> Graph m a b
|
||||
renameNodes newName c (Graph _ ns es) = Graph c ns' es'
|
||||
where ns' = map' (\ (n,x) -> (newName n,x)) ns
|
||||
es' = map' (\ (f,t,l) -> (newName f, newName t, l)) es
|
||||
|
||||
-- | A strict 'map'
|
||||
map' :: (a -> b) -> [a] -> [b]
|
||||
map' _ [] = []
|
||||
map' f (x:xs) = ((:) $! f x) $! map' f xs
|
||||
56
src-3.0/GF/Speech/PrFA.hs
Normal file
56
src-3.0/GF/Speech/PrFA.hs
Normal file
@@ -0,0 +1,56 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : PrSLF
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/11/10 16:43:44 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
--
|
||||
-- This module prints finite automata and regular grammars
|
||||
-- for a context-free grammar.
|
||||
--
|
||||
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
|
||||
-- categories in the grammar
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter) where
|
||||
|
||||
import GF.Data.Utilities
|
||||
import GF.Conversion.Types
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..),symbol)
|
||||
import GF.Infra.Ident
|
||||
import GF.Infra.Option (Options)
|
||||
import GF.Infra.Print
|
||||
import GF.Speech.CFGToFiniteState
|
||||
import GF.Speech.FiniteState
|
||||
import GF.Speech.TransformCFG
|
||||
import GF.Compile.ShellState (StateGrammar)
|
||||
|
||||
import Data.Char (toUpper,toLower)
|
||||
import Data.List
|
||||
import Data.Maybe (fromMaybe)
|
||||
|
||||
|
||||
|
||||
faGraphvizPrinter :: Options -> StateGrammar -> String
|
||||
faGraphvizPrinter opts s =
|
||||
prFAGraphviz $ mapStates (const "") $ cfgToFA opts s
|
||||
|
||||
-- | Convert the grammar to a regular grammar and print it in BNF
|
||||
regularPrinter :: Options -> StateGrammar -> String
|
||||
regularPrinter opts s = prCFRules $ makeSimpleRegular opts s
|
||||
where
|
||||
prCFRules :: CFRules -> String
|
||||
prCFRules g = unlines [ c ++ " ::= " ++ join " | " (map (showRhs . ruleRhs) rs) | (c,rs) <- allRulesGrouped g]
|
||||
join g = concat . intersperse g
|
||||
showRhs = unwords . map (symbol id show)
|
||||
|
||||
faCPrinter :: Options -> StateGrammar -> String
|
||||
faCPrinter opts s = fa2c $ cfgToFA opts s
|
||||
|
||||
fa2c :: DFA String -> String
|
||||
fa2c fa = undefined
|
||||
113
src-3.0/GF/Speech/PrGSL.hs
Normal file
113
src-3.0/GF/Speech/PrGSL.hs
Normal file
@@ -0,0 +1,113 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : PrGSL
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/11/01 20:09:04 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.22 $
|
||||
--
|
||||
-- This module prints a CFG as a Nuance GSL 2.0 grammar.
|
||||
--
|
||||
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
|
||||
-- categories in the grammar
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.PrGSL (gslPrinter) where
|
||||
|
||||
import GF.Data.Utilities
|
||||
import GF.Speech.SRG
|
||||
import GF.Speech.RegExp
|
||||
import GF.Infra.Ident
|
||||
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..))
|
||||
import GF.Conversion.Types
|
||||
import GF.Infra.Print
|
||||
import GF.Infra.Option
|
||||
import GF.Probabilistic.Probabilistic (Probs)
|
||||
import GF.Compile.ShellState (StateGrammar)
|
||||
|
||||
import Data.Char (toUpper,toLower)
|
||||
import Data.List (partition)
|
||||
import Text.PrettyPrint.HughesPJ
|
||||
|
||||
width :: Int
|
||||
width = 75
|
||||
|
||||
gslPrinter :: Options -> StateGrammar -> String
|
||||
gslPrinter opts s = renderStyle st $ prGSL $ makeSimpleSRG opts s
|
||||
where st = style { lineLength = width }
|
||||
|
||||
prGSL :: SRG -> Doc
|
||||
prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||
= header $++$ mainCat $++$ foldr ($++$) empty (map prRule rs)
|
||||
where
|
||||
header = text ";GSL2.0" $$
|
||||
comment ("Nuance speech recognition grammar for " ++ name) $$
|
||||
comment ("Generated by GF")
|
||||
mainCat = comment ("Start category: " ++ origStart) $$
|
||||
text ".MAIN" <+> prCat start
|
||||
prRule (SRGRule cat origCat rhs) =
|
||||
comment (prt origCat) $$
|
||||
prCat cat <+> union (map prAlt rhs)
|
||||
-- FIXME: use the probability
|
||||
prAlt (SRGAlt mp _ rhs) = prItem rhs
|
||||
|
||||
|
||||
prItem :: SRGItem -> Doc
|
||||
prItem = f
|
||||
where
|
||||
f (REUnion xs) = (if null es then empty else text "?") <> union (map f nes)
|
||||
where (es,nes) = partition isEpsilon xs
|
||||
f (REConcat [x]) = f x
|
||||
f (REConcat xs) = text "(" <> fsep (map f xs) <> text ")"
|
||||
f (RERepeat x) = text "*" <> f x
|
||||
f (RESymbol s) = prSymbol s
|
||||
|
||||
union :: [Doc] -> Doc
|
||||
union [x] = x
|
||||
union xs = text "[" <> fsep xs <> text "]"
|
||||
|
||||
prSymbol :: Symbol SRGNT Token -> Doc
|
||||
prSymbol (Cat (c,_)) = prCat c
|
||||
prSymbol (Tok t) = doubleQuotes (showToken t)
|
||||
|
||||
-- GSL requires an upper case letter in category names
|
||||
prCat :: SRGCat -> Doc
|
||||
prCat c = text (firstToUpper c)
|
||||
|
||||
|
||||
firstToUpper :: String -> String
|
||||
firstToUpper [] = []
|
||||
firstToUpper (x:xs) = toUpper x : xs
|
||||
|
||||
{-
|
||||
rmPunctCFG :: CGrammar -> CGrammar
|
||||
rmPunctCFG g = [CFRule c (filter keepSymbol ss) n | CFRule c ss n <- g]
|
||||
|
||||
keepSymbol :: Symbol c Token -> Bool
|
||||
keepSymbol (Tok t) = not (all isPunct (prt t))
|
||||
keepSymbol _ = True
|
||||
-}
|
||||
|
||||
-- Nuance does not like upper case characters in tokens
|
||||
showToken :: Token -> Doc
|
||||
showToken t = text (map toLower (prt t))
|
||||
|
||||
isPunct :: Char -> Bool
|
||||
isPunct c = c `elem` "-_.:;.,?!()[]{}"
|
||||
|
||||
comment :: String -> Doc
|
||||
comment s = text ";" <+> text s
|
||||
|
||||
|
||||
-- Pretty-printing utilities
|
||||
|
||||
emptyLine :: Doc
|
||||
emptyLine = text ""
|
||||
|
||||
($++$) :: Doc -> Doc -> Doc
|
||||
x $++$ y = x $$ emptyLine $$ y
|
||||
145
src-3.0/GF/Speech/PrJSGF.hs
Normal file
145
src-3.0/GF/Speech/PrJSGF.hs
Normal file
@@ -0,0 +1,145 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : PrJSGF
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/11/01 20:09:04 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.16 $
|
||||
--
|
||||
-- This module prints a CFG as a JSGF grammar.
|
||||
--
|
||||
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
|
||||
-- categories in the grammar
|
||||
--
|
||||
-- FIXME: convert to UTF-8
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.PrJSGF (jsgfPrinter) where
|
||||
|
||||
import GF.Conversion.Types
|
||||
import GF.Data.Utilities
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), filterCats)
|
||||
import GF.Infra.Ident
|
||||
import GF.Infra.Print
|
||||
import GF.Infra.Option
|
||||
import GF.Probabilistic.Probabilistic (Probs)
|
||||
import GF.Speech.SISR
|
||||
import GF.Speech.SRG
|
||||
import GF.Speech.RegExp
|
||||
import GF.Compile.ShellState (StateGrammar)
|
||||
|
||||
import Data.Char
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import Text.PrettyPrint.HughesPJ
|
||||
import Debug.Trace
|
||||
|
||||
width :: Int
|
||||
width = 75
|
||||
|
||||
jsgfPrinter :: Maybe SISRFormat
|
||||
-> Options
|
||||
-> StateGrammar -> String
|
||||
jsgfPrinter sisr opts s = renderStyle st $ prJSGF sisr $ makeSimpleSRG opts s
|
||||
where st = style { lineLength = width }
|
||||
|
||||
prJSGF :: Maybe SISRFormat -> SRG -> Doc
|
||||
prJSGF sisr srg@(SRG{grammarName=name,grammarLanguage=ml,
|
||||
startCat=start,origStartCat=origStart,rules=rs})
|
||||
= header $++$ mainCat $++$ vcat topCatRules $++$ foldr ($++$) empty (map prRule rs)
|
||||
where
|
||||
header = text "#JSGF" <+> text "V1.0" <+> text "UTF-8" <+> lang <> char ';' $$
|
||||
comment ("JSGF speech recognition grammar for " ++ name) $$
|
||||
comment "Generated by GF" $$
|
||||
text ("grammar " ++ name ++ ";")
|
||||
lang = maybe empty text ml
|
||||
mainCat = comment ("Start category: " ++ origStart) $$
|
||||
case cfgCatToGFCat origStart of
|
||||
Just c -> rule True "MAIN" [prCat (catFormId c)]
|
||||
Nothing -> empty
|
||||
prRule (SRGRule cat origCat rhs) =
|
||||
comment origCat $$
|
||||
rule False cat (map prAlt rhs)
|
||||
-- rule False cat (map prAlt rhs)
|
||||
-- FIXME: use the probability
|
||||
prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag]
|
||||
-- prAlt (SRGAlt mp n rhs) = initTag <+> prItem sisr n rhs <+> finalTag
|
||||
where initTag | isEmpty t = empty
|
||||
| otherwise = text "<NULL>" <+> t
|
||||
where t = tag sisr (profileInitSISR n)
|
||||
finalTag = tag sisr (profileFinalSISR n)
|
||||
p = if isEmpty initTag && isEmpty finalTag then id else parens
|
||||
|
||||
topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- srgTopCats srg]
|
||||
where it i c = prCat c <+> tag sisr (topCatSISR c)
|
||||
|
||||
catFormId :: String -> String
|
||||
catFormId = (++ "_cat")
|
||||
|
||||
prCat :: SRGCat -> Doc
|
||||
prCat c = char '<' <> text c <> char '>'
|
||||
|
||||
prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc
|
||||
prItem sisr t = f 0
|
||||
where
|
||||
f _ (REUnion []) = text "<VOID>"
|
||||
f p (REUnion xs)
|
||||
| not (null es) = brackets (f 0 (REUnion nes))
|
||||
| otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs))
|
||||
where (es,nes) = partition isEpsilon xs
|
||||
f _ (REConcat []) = text "<NULL>"
|
||||
f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs))
|
||||
f p (RERepeat x) = f 3 x <> char '*'
|
||||
f _ (RESymbol s) = prSymbol sisr t s
|
||||
|
||||
{-
|
||||
prItem :: Maybe SISRFormat -> CFTerm -> [Symbol SRGNT Token] -> Doc
|
||||
prItem _ _ [] = text "<NULL>"
|
||||
prItem sisr cn ss = paren $ hsep $ map (prSymbol sisr cn) ss
|
||||
where paren = if length ss == 1 then id else parens
|
||||
-}
|
||||
|
||||
prSymbol :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> Doc
|
||||
prSymbol sisr cn (Cat n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
|
||||
prSymbol _ cn (Tok t) | all isPunct (prt t) = empty -- removes punctuation
|
||||
| otherwise = text (prt t) -- FIXME: quote if there is whitespace or odd chars
|
||||
|
||||
tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc
|
||||
tag Nothing _ = empty
|
||||
tag (Just fmt) t = case t fmt of
|
||||
[] -> empty
|
||||
ts -> char '{' <+> (text (e $ prSISR ts)) <+> char '}'
|
||||
where e [] = []
|
||||
e ('}':xs) = '\\':'}':e xs
|
||||
e ('\n':xs) = ' ' : e (dropWhile isSpace xs)
|
||||
e (x:xs) = x:e xs
|
||||
|
||||
isPunct :: Char -> Bool
|
||||
isPunct c = c `elem` "-_.;.,?!"
|
||||
|
||||
comment :: String -> Doc
|
||||
comment s = text "//" <+> text s
|
||||
|
||||
alts :: [Doc] -> Doc
|
||||
alts = fsep . prepunctuate (text "| ")
|
||||
|
||||
rule :: Bool -> SRGCat -> [Doc] -> Doc
|
||||
rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';'
|
||||
where p = if pub then text "public" else empty
|
||||
|
||||
-- Pretty-printing utilities
|
||||
|
||||
emptyLine :: Doc
|
||||
emptyLine = text ""
|
||||
|
||||
prepunctuate :: Doc -> [Doc] -> [Doc]
|
||||
prepunctuate _ [] = []
|
||||
prepunctuate p (x:xs) = x : map (p <>) xs
|
||||
|
||||
($++$) :: Doc -> Doc -> Doc
|
||||
x $++$ y = x $$ emptyLine $$ y
|
||||
|
||||
33
src-3.0/GF/Speech/PrRegExp.hs
Normal file
33
src-3.0/GF/Speech/PrRegExp.hs
Normal file
@@ -0,0 +1,33 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : PrSLF
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- This module prints a grammar as a regular expression.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter) where
|
||||
|
||||
import GF.Conversion.Types
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Infra.Ident
|
||||
import GF.Infra.Option (Options)
|
||||
import GF.Speech.CFGToFiniteState
|
||||
import GF.Speech.RegExp
|
||||
import GF.Compile.ShellState (StateGrammar)
|
||||
|
||||
|
||||
regexpPrinter :: Options -> StateGrammar -> String
|
||||
regexpPrinter opts s = (++"\n") $ prRE $ dfa2re $ cfgToFA opts s
|
||||
|
||||
multiRegexpPrinter :: Options -> StateGrammar -> String
|
||||
multiRegexpPrinter opts s = prREs $ mfa2res $ cfgToMFA opts s
|
||||
|
||||
prREs :: [(String,RE (MFALabel String))] -> String
|
||||
prREs res = unlines [l ++ " = " ++ prRE (mapRE showLabel re) | (l,re) <- res]
|
||||
where showLabel = symbol (\l -> "<" ++ l ++ ">") id
|
||||
|
||||
mfa2res :: MFA String -> [(String,RE (MFALabel String))]
|
||||
mfa2res (MFA _ dfas) = [(l, minimizeRE (dfa2re dfa)) | (l,dfa) <- dfas]
|
||||
190
src-3.0/GF/Speech/PrSLF.hs
Normal file
190
src-3.0/GF/Speech/PrSLF.hs
Normal file
@@ -0,0 +1,190 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : PrSLF
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/11/10 16:43:44 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.12 $
|
||||
--
|
||||
-- This module converts a CFG to an SLF finite-state network
|
||||
-- for use with the ATK recognizer. The SLF format is described
|
||||
-- in the HTK manual, and an example for use in ATK is shown
|
||||
-- in the ATK manual.
|
||||
--
|
||||
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
|
||||
-- categories in the grammar
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.PrSLF (slfPrinter,slfGraphvizPrinter,
|
||||
slfSubPrinter,slfSubGraphvizPrinter) where
|
||||
|
||||
import GF.Data.Utilities
|
||||
import GF.Conversion.Types
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..),symbol,mapSymbol)
|
||||
import GF.Infra.Ident
|
||||
import GF.Infra.Option (Options)
|
||||
import GF.Infra.Print
|
||||
import GF.Speech.CFGToFiniteState
|
||||
import GF.Speech.FiniteState
|
||||
import GF.Speech.TransformCFG
|
||||
import qualified GF.Visualization.Graphviz as Dot
|
||||
import GF.Compile.ShellState (StateGrammar)
|
||||
|
||||
import Control.Monad
|
||||
import qualified Control.Monad.State as STM
|
||||
import Data.Char (toUpper)
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
|
||||
data SLFs = SLFs [(String,SLF)] SLF
|
||||
|
||||
data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
|
||||
|
||||
data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String }
|
||||
| SLFSubLat { nId :: Int, nLat :: String }
|
||||
|
||||
-- | An SLF word is a word, or the empty string.
|
||||
type SLFWord = Maybe String
|
||||
|
||||
data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int }
|
||||
|
||||
type SLF_FA = FA State (Maybe (MFALabel String)) ()
|
||||
|
||||
mkFAs :: Options -> StateGrammar -> (SLF_FA, [(String,SLF_FA)])
|
||||
mkFAs opts s = (slfStyleFA main, [(c,slfStyleFA n) | (c,n) <- subs])
|
||||
where MFA start subs = {- renameSubs $ -} cfgToMFA opts s
|
||||
main = let (fa,s,f) = newFA_ in newTransition s f (Cat start) fa
|
||||
|
||||
slfStyleFA :: Eq a => DFA a -> FA State (Maybe a) ()
|
||||
slfStyleFA = renameStates [0..] . removeTrivialEmptyNodes . oneFinalState Nothing ()
|
||||
. moveLabelsToNodes . dfa2nfa
|
||||
|
||||
-- | Give sequential names to subnetworks.
|
||||
renameSubs :: MFA String -> MFA String
|
||||
renameSubs (MFA start subs) = MFA (newName start) subs'
|
||||
where newNames = zip (map fst subs) ["sub"++show n | n <- [0..]]
|
||||
newName s = lookup' s newNames
|
||||
subs' = [(newName s,renameLabels n) | (s,n) <- subs]
|
||||
renameLabels = mapTransitions (mapSymbol newName id)
|
||||
|
||||
--
|
||||
-- * SLF graphviz printing (without sub-networks)
|
||||
--
|
||||
|
||||
slfGraphvizPrinter :: Options -> StateGrammar -> String
|
||||
slfGraphvizPrinter opts s
|
||||
= prFAGraphviz $ gvFA $ slfStyleFA $ cfgToFA' opts s
|
||||
where
|
||||
gvFA = mapStates (fromMaybe "") . mapTransitions (const "")
|
||||
|
||||
--
|
||||
-- * SLF graphviz printing (with sub-networks)
|
||||
--
|
||||
|
||||
slfSubGraphvizPrinter :: Options -> StateGrammar -> String
|
||||
slfSubGraphvizPrinter opts s = Dot.prGraphviz g
|
||||
where (main, subs) = mkFAs opts s
|
||||
g = STM.evalState (liftM2 Dot.addSubGraphs ss m) [0..]
|
||||
ss = mapM (\ (c,f) -> gvSLFFA (Just c) f) subs
|
||||
m = gvSLFFA Nothing main
|
||||
|
||||
gvSLFFA :: Maybe String -> SLF_FA -> STM.State [State] Dot.Graph
|
||||
gvSLFFA n fa =
|
||||
liftM (mkCluster n . faToGraphviz . mapStates (maybe "" mfaLabelToGv)
|
||||
. mapTransitions (const "")) (rename fa)
|
||||
where mfaLabelToGv = symbol ("#"++) id
|
||||
mkCluster Nothing = id
|
||||
mkCluster (Just x)
|
||||
= Dot.setName ("cluster_"++x) . Dot.setAttr "label" x
|
||||
rename fa = do
|
||||
names <- STM.get
|
||||
let fa' = renameStates names fa
|
||||
names' = unusedNames fa'
|
||||
STM.put names'
|
||||
return fa'
|
||||
|
||||
--
|
||||
-- * SLF printing (without sub-networks)
|
||||
--
|
||||
|
||||
slfPrinter :: Options -> StateGrammar -> String
|
||||
slfPrinter opts s
|
||||
= prSLF $ automatonToSLF mkSLFNode $ slfStyleFA $ cfgToFA' opts s
|
||||
|
||||
--
|
||||
-- * SLF printing (with sub-networks)
|
||||
--
|
||||
|
||||
-- | Make a network with subnetworks in SLF
|
||||
slfSubPrinter :: Options -> StateGrammar -> String
|
||||
slfSubPrinter opts s = prSLFs slfs
|
||||
where
|
||||
(main,subs) = mkFAs opts s
|
||||
slfs = SLFs [(c, faToSLF fa) | (c,fa) <- subs] (faToSLF main)
|
||||
faToSLF = automatonToSLF mfaNodeToSLFNode
|
||||
|
||||
automatonToSLF :: (Int -> a -> SLFNode) -> FA State a () -> SLF
|
||||
automatonToSLF mkNode fa = SLF { slfNodes = ns, slfEdges = es }
|
||||
where ns = map (uncurry mkNode) (states fa)
|
||||
es = zipWith (\i (f,t,()) -> mkSLFEdge i (f,t)) [0..] (transitions fa)
|
||||
|
||||
mfaNodeToSLFNode :: Int -> Maybe (MFALabel String) -> SLFNode
|
||||
mfaNodeToSLFNode i l = case l of
|
||||
Nothing -> mkSLFNode i Nothing
|
||||
Just (Tok x) -> mkSLFNode i (Just x)
|
||||
Just (Cat s) -> mkSLFSubLat i s
|
||||
|
||||
mkSLFNode :: Int -> Maybe String -> SLFNode
|
||||
mkSLFNode i Nothing = SLFNode { nId = i, nWord = Nothing, nTag = Nothing }
|
||||
mkSLFNode i (Just w)
|
||||
| isNonWord w = SLFNode { nId = i,
|
||||
nWord = Nothing,
|
||||
nTag = Just w }
|
||||
| otherwise = SLFNode { nId = i,
|
||||
nWord = Just (map toUpper w),
|
||||
nTag = Just w }
|
||||
|
||||
mkSLFSubLat :: Int -> String -> SLFNode
|
||||
mkSLFSubLat i sub = SLFSubLat { nId = i, nLat = sub }
|
||||
|
||||
mkSLFEdge :: Int -> (Int,Int) -> SLFEdge
|
||||
mkSLFEdge i (f,t) = SLFEdge { eId = i, eStart = f, eEnd = t }
|
||||
|
||||
prSLFs :: SLFs -> String
|
||||
prSLFs (SLFs subs main) = unlinesS (map prSub subs ++ [prOneSLF main]) ""
|
||||
where prSub (n,s) = showString "SUBLAT=" . shows n
|
||||
. nl . prOneSLF s . showString "." . nl
|
||||
|
||||
prSLF :: SLF -> String
|
||||
prSLF slf = prOneSLF slf ""
|
||||
|
||||
prOneSLF :: SLF -> ShowS
|
||||
prOneSLF (SLF { slfNodes = ns, slfEdges = es})
|
||||
= header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl
|
||||
where
|
||||
header = prFields [("N",show (length ns)),("L", show (length es))] . nl
|
||||
prNode (SLFNode { nId = i, nWord = w, nTag = t })
|
||||
= prFields $ [("I",show i),("W",showWord w)]
|
||||
++ maybe [] (\t -> [("s",t)]) t
|
||||
prNode (SLFSubLat { nId = i, nLat = l })
|
||||
= prFields [("I",show i),("L",show l)]
|
||||
prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))]
|
||||
|
||||
-- | Check if a word should not correspond to a word in the SLF file.
|
||||
isNonWord :: String -> Bool
|
||||
isNonWord = any isPunct
|
||||
|
||||
isPunct :: Char -> Bool
|
||||
isPunct c = c `elem` "-_.;.,?!()[]{}"
|
||||
|
||||
showWord :: SLFWord -> String
|
||||
showWord Nothing = "!NULL"
|
||||
showWord (Just w) | null w = "!NULL"
|
||||
| otherwise = w
|
||||
|
||||
prFields :: [(String,String)] -> ShowS
|
||||
prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ]
|
||||
153
src-3.0/GF/Speech/PrSRGS.hs
Normal file
153
src-3.0/GF/Speech/PrSRGS.hs
Normal file
@@ -0,0 +1,153 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : PrSRGS
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- This module prints a CFG as an SRGS XML grammar.
|
||||
--
|
||||
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
|
||||
-- categories in the grammar
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.PrSRGS (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where
|
||||
|
||||
import GF.Data.Utilities
|
||||
import GF.Data.XML
|
||||
import GF.Speech.RegExp
|
||||
import GF.Speech.SISR as SISR
|
||||
import GF.Speech.SRG
|
||||
import GF.Infra.Ident
|
||||
import GF.Today
|
||||
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), forestName, filterCats)
|
||||
import GF.Conversion.Types
|
||||
import GF.Infra.Print
|
||||
import GF.Infra.Option
|
||||
import GF.Probabilistic.Probabilistic (Probs)
|
||||
import GF.Compile.ShellState (StateGrammar)
|
||||
|
||||
import Data.Char (toUpper,toLower)
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
|
||||
srgsXmlPrinter :: Maybe SISRFormat
|
||||
-> Bool -- ^ Include probabilities
|
||||
-> Options
|
||||
-> StateGrammar -> String
|
||||
srgsXmlPrinter sisr probs opts s = prSrgsXml sisr probs $ makeSimpleSRG opts s
|
||||
|
||||
srgsXmlNonRecursivePrinter :: Options -> StateGrammar -> String
|
||||
srgsXmlNonRecursivePrinter opts s = prSrgsXml Nothing False $ makeNonRecursiveSRG opts s
|
||||
|
||||
|
||||
prSrgsXml :: Maybe SISRFormat -> Bool -> SRG -> String
|
||||
prSrgsXml sisr probs srg@(SRG{grammarName=name,startCat=start,
|
||||
origStartCat=origStart,grammarLanguage=l,rules=rs})
|
||||
= showXMLDoc (optimizeSRGS xmlGr)
|
||||
where
|
||||
Just root = cfgCatToGFCat origStart
|
||||
xmlGr = grammar sisr (catFormId root) l $
|
||||
[meta "description"
|
||||
("SRGS XML speech recognition grammar for " ++ name
|
||||
++ ". " ++ "Original start category: " ++ origStart),
|
||||
meta "generator" ("Grammatical Framework " ++ version)]
|
||||
++ topCatRules
|
||||
++ concatMap ruleToXML rs
|
||||
ruleToXML (SRGRule cat origCat alts) =
|
||||
comments ["Category " ++ origCat] ++ [rule cat (prRhs alts)]
|
||||
prRhs rhss = [oneOf (map (mkProd sisr probs) rhss)]
|
||||
-- externally visible rules for each of the GF categories
|
||||
topCatRules = [topRule tc [oneOf (map (it tc) cs)] | (tc,cs) <- srgTopCats srg]
|
||||
where it i c = Tag "item" [] ([ETag "ruleref" [("uri","#" ++ c)]]
|
||||
++ tag sisr (topCatSISR c))
|
||||
topRule i is = Tag "rule" [("id",catFormId i),("scope","public")] is
|
||||
|
||||
rule :: String -> [XML] -> XML
|
||||
rule i = Tag "rule" [("id",i)]
|
||||
|
||||
mkProd :: Maybe SISRFormat -> Bool -> SRGAlt -> XML
|
||||
mkProd sisr probs (SRGAlt mp n rhs) = Tag "item" w (ti ++ [x] ++ tf)
|
||||
where x = mkItem sisr n rhs
|
||||
w | probs = maybe [] (\p -> [("weight", show p)]) mp
|
||||
| otherwise = []
|
||||
ti = tag sisr (profileInitSISR n)
|
||||
tf = tag sisr (profileFinalSISR n)
|
||||
|
||||
mkItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> XML
|
||||
mkItem sisr cn = f
|
||||
where
|
||||
f (REUnion []) = ETag "ruleref" [("special","VOID")]
|
||||
f (REUnion xs)
|
||||
| not (null es) = Tag "item" [("repeat","0-1")] [f (REUnion nes)]
|
||||
| otherwise = oneOf (map f xs)
|
||||
where (es,nes) = partition isEpsilon xs
|
||||
f (REConcat []) = ETag "ruleref" [("special","NULL")]
|
||||
f (REConcat xs) = Tag "item" [] (map f xs)
|
||||
f (RERepeat x) = Tag "item" [("repeat","0-")] [f x]
|
||||
f (RESymbol s) = symItem sisr cn s
|
||||
|
||||
{-
|
||||
mkProd :: Maybe SISRFormat -> Bool -> SRGAlt -> XML
|
||||
mkProd sisr probs (SRGAlt mp n rhs) = Tag "item" w (ti ++ xs ++ tf)
|
||||
where xs = mkItem sisr n rhs
|
||||
w | probs = maybe [] (\p -> [("weight", show p)]) mp
|
||||
| otherwise = []
|
||||
ti = [tag sisr (profileInitSISR n)]
|
||||
tf = [tag sisr (profileFinalSISR n)]
|
||||
|
||||
|
||||
mkItem :: Maybe SISRFormat -> CFTerm -> [Symbol SRGNT Token] -> [XML]
|
||||
mkItem sisr cn ss = map (symItem sisr cn) ss
|
||||
-}
|
||||
|
||||
symItem :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> XML
|
||||
symItem sisr cn (Cat n@(c,_)) =
|
||||
Tag "item" [] $ [ETag "ruleref" [("uri","#" ++ c)]] ++ tag sisr (catSISR cn n)
|
||||
symItem _ _ (Tok t) = Tag "item" [] [Data (showToken t)]
|
||||
|
||||
tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> [XML]
|
||||
tag Nothing _ = []
|
||||
tag (Just fmt) t = case t fmt of
|
||||
[] -> []
|
||||
ts -> [Tag "tag" [] [Data (prSISR ts)]]
|
||||
|
||||
catFormId :: String -> String
|
||||
catFormId = (++ "_cat")
|
||||
|
||||
|
||||
showToken :: Token -> String
|
||||
showToken t = t
|
||||
|
||||
oneOf :: [XML] -> XML
|
||||
oneOf = Tag "one-of" []
|
||||
|
||||
grammar :: Maybe SISRFormat
|
||||
-> String -- ^ root
|
||||
-> Maybe String -- ^language
|
||||
-> [XML] -> XML
|
||||
grammar sisr root ml =
|
||||
Tag "grammar" $ [("xmlns","http://www.w3.org/2001/06/grammar"),
|
||||
("version","1.0"),
|
||||
("mode","voice"),
|
||||
("root",root)]
|
||||
++ (if isJust sisr then [("tag-format","semantics/1.0")] else [])
|
||||
++ maybe [] (\l -> [("xml:lang", l)]) ml
|
||||
|
||||
meta :: String -> String -> XML
|
||||
meta n c = ETag "meta" [("name",n),("content",c)]
|
||||
|
||||
optimizeSRGS :: XML -> XML
|
||||
optimizeSRGS = bottomUpXML f
|
||||
where f (Tag "item" [] [x@(Tag "item" _ _)]) = x
|
||||
f (Tag "item" [] [x@(Tag "one-of" _ _)]) = x
|
||||
f (Tag "item" as [Tag "item" [] xs]) = Tag "item" as xs
|
||||
f (Tag "item" as xs) = Tag "item" as (map g xs)
|
||||
where g (Tag "item" [] [x@(ETag "ruleref" _)]) = x
|
||||
g x = x
|
||||
f (Tag "one-of" [] [x]) = x
|
||||
f x = x
|
||||
147
src-3.0/GF/Speech/PrSRGS_ABNF.hs
Normal file
147
src-3.0/GF/Speech/PrSRGS_ABNF.hs
Normal file
@@ -0,0 +1,147 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : PrJSRGS_ABNF
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/11/01 20:09:04 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.16 $
|
||||
--
|
||||
-- This module prints a CFG as a JSGF grammar.
|
||||
--
|
||||
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
|
||||
-- categories in the grammar
|
||||
--
|
||||
-- FIXME: convert to UTF-8
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.PrSRGS_ABNF (srgsAbnfPrinter, srgsAbnfNonRecursivePrinter) where
|
||||
|
||||
import GF.Conversion.Types
|
||||
import GF.Data.Utilities
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), filterCats)
|
||||
import GF.Infra.Ident
|
||||
import GF.Infra.Print
|
||||
import GF.Infra.Option
|
||||
import GF.Probabilistic.Probabilistic (Probs)
|
||||
import GF.Speech.SISR
|
||||
import GF.Speech.SRG
|
||||
import GF.Speech.RegExp
|
||||
import GF.Compile.ShellState (StateGrammar)
|
||||
import GF.Today
|
||||
|
||||
import Data.Char
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import Text.PrettyPrint.HughesPJ
|
||||
import Debug.Trace
|
||||
|
||||
width :: Int
|
||||
width = 75
|
||||
|
||||
srgsAbnfPrinter :: Maybe SISRFormat
|
||||
-> Bool -- ^ Include probabilities
|
||||
-> Options
|
||||
-> StateGrammar -> String
|
||||
srgsAbnfPrinter sisr probs opts s = showDoc $ prABNF sisr probs $ makeSimpleSRG opts s
|
||||
|
||||
srgsAbnfNonRecursivePrinter :: Options -> StateGrammar -> String
|
||||
srgsAbnfNonRecursivePrinter opts s = showDoc $ prABNF Nothing False $ makeNonRecursiveSRG opts s
|
||||
|
||||
showDoc = renderStyle (style { lineLength = width })
|
||||
|
||||
prABNF :: Maybe SISRFormat -> Bool -> SRG -> Doc
|
||||
prABNF sisr probs srg@(SRG{grammarName=name,grammarLanguage=ml,
|
||||
startCat=start,origStartCat=origStart,rules=rs})
|
||||
= header $++$ vcat topCatRules $++$ foldr ($++$) empty (map prRule rs)
|
||||
where
|
||||
header = text "#ABNF 1.0 UTF-8;" $$
|
||||
meta "description"
|
||||
("Speech recognition grammar for " ++ name
|
||||
++ ". " ++ "Original start category: " ++ origStart) $$
|
||||
meta "generator" ("Grammatical Framework " ++ version) $$
|
||||
language $$ tagFormat $$ mainCat
|
||||
language = maybe empty (\l -> text "language" <+> text l <> char ';') ml
|
||||
tagFormat | isJust sisr = text "tag-format" <+> text "<semantics/1.0>" <> char ';'
|
||||
| otherwise = empty
|
||||
mainCat = case cfgCatToGFCat origStart of
|
||||
Just c -> text "root" <+> prCat (catFormId c) <> char ';'
|
||||
Nothing -> empty
|
||||
prRule (SRGRule cat origCat rhs) =
|
||||
comment origCat $$
|
||||
rule False cat (map prAlt rhs)
|
||||
-- FIXME: use the probability
|
||||
prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag]
|
||||
where initTag = tag sisr (profileInitSISR n)
|
||||
finalTag = tag sisr (profileFinalSISR n)
|
||||
p = if isEmpty initTag && isEmpty finalTag then id else parens
|
||||
|
||||
topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- srgTopCats srg]
|
||||
where it i c = prCat c <+> tag sisr (topCatSISR c)
|
||||
|
||||
catFormId :: String -> String
|
||||
catFormId = (++ "_cat")
|
||||
|
||||
prCat :: SRGCat -> Doc
|
||||
prCat c = char '$' <> text c
|
||||
|
||||
prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc
|
||||
prItem sisr t = f 0
|
||||
where
|
||||
f _ (REUnion []) = text "$VOID"
|
||||
f p (REUnion xs)
|
||||
| not (null es) = brackets (f 0 (REUnion nes))
|
||||
| otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs))
|
||||
where (es,nes) = partition isEpsilon xs
|
||||
f _ (REConcat []) = text "$NULL"
|
||||
f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs))
|
||||
f p (RERepeat x) = f 3 x <> text "<0->"
|
||||
f _ (RESymbol s) = prSymbol sisr t s
|
||||
|
||||
|
||||
prSymbol :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> Doc
|
||||
prSymbol sisr cn (Cat n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
|
||||
prSymbol _ cn (Tok t) | all isPunct (prt t) = empty -- removes punctuation
|
||||
| otherwise = text (prt t) -- FIXME: quote if there is whitespace or odd chars
|
||||
|
||||
tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc
|
||||
tag Nothing _ = empty
|
||||
tag (Just fmt) t =
|
||||
case t fmt of
|
||||
[] -> empty
|
||||
-- grr, silly SRGS ABNF does not have an escaping mechanism
|
||||
ts | '{' `elem` x || '}' `elem` x -> text "{!{" <+> text x <+> text "}!}"
|
||||
| otherwise -> text "{" <+> text x <+> text "}"
|
||||
where x = prSISR ts
|
||||
|
||||
isPunct :: Char -> Bool
|
||||
isPunct c = c `elem` "-_.;.,?!"
|
||||
|
||||
comment :: String -> Doc
|
||||
comment s = text "//" <+> text s
|
||||
|
||||
alts :: [Doc] -> Doc
|
||||
alts = fsep . prepunctuate (text "| ")
|
||||
|
||||
rule :: Bool -> SRGCat -> [Doc] -> Doc
|
||||
rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';'
|
||||
where p = if pub then text "public" else empty
|
||||
|
||||
meta :: String -> String -> Doc
|
||||
meta n v = text "meta" <+> text (show n) <+> text "is" <+> text (show v) <> char ';'
|
||||
|
||||
-- Pretty-printing utilities
|
||||
|
||||
emptyLine :: Doc
|
||||
emptyLine = text ""
|
||||
|
||||
prepunctuate :: Doc -> [Doc] -> [Doc]
|
||||
prepunctuate _ [] = []
|
||||
prepunctuate p (x:xs) = x : map (p <>) xs
|
||||
|
||||
($++$) :: Doc -> Doc -> Doc
|
||||
x $++$ y = x $$ emptyLine $$ y
|
||||
|
||||
143
src-3.0/GF/Speech/RegExp.hs
Normal file
143
src-3.0/GF/Speech/RegExp.hs
Normal file
@@ -0,0 +1,143 @@
|
||||
module GF.Speech.RegExp (RE(..),
|
||||
epsilonRE, nullRE,
|
||||
isEpsilon, isNull,
|
||||
unionRE, concatRE, seqRE,
|
||||
repeatRE, minimizeRE,
|
||||
mapRE, mapRE', joinRE,
|
||||
symbolsRE,
|
||||
dfa2re, prRE) where
|
||||
|
||||
import Data.List
|
||||
|
||||
import GF.Data.Utilities
|
||||
import GF.Speech.FiniteState
|
||||
|
||||
data RE a =
|
||||
REUnion [RE a] -- ^ REUnion [] is null
|
||||
| REConcat [RE a] -- ^ REConcat [] is epsilon
|
||||
| RERepeat (RE a)
|
||||
| RESymbol a
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
|
||||
dfa2re :: (Ord a) => DFA a -> RE a
|
||||
dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
|
||||
. oneFinalState () epsilonRE . mapTransitions RESymbol
|
||||
where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
|
||||
merge es = [(f,t,unionRE ls)
|
||||
| ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
|
||||
|
||||
elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a)
|
||||
elimStates fa =
|
||||
case [s | (s,_) <- states fa, isInternal fa s] of
|
||||
[] -> fa
|
||||
sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa
|
||||
where sAs = nonLoopTransitionsTo sE fa
|
||||
sBs = nonLoopTransitionsFrom sE fa
|
||||
r2 = unionRE $ loops sE fa
|
||||
ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
|
||||
r r1 r3 = concatRE [r1, repeatRE r2, r3]
|
||||
|
||||
epsilonRE :: RE a
|
||||
epsilonRE = REConcat []
|
||||
|
||||
nullRE :: RE a
|
||||
nullRE = REUnion []
|
||||
|
||||
isNull :: RE a -> Bool
|
||||
isNull (REUnion []) = True
|
||||
isNull _ = False
|
||||
|
||||
isEpsilon :: RE a -> Bool
|
||||
isEpsilon (REConcat []) = True
|
||||
isEpsilon _ = False
|
||||
|
||||
unionRE :: Ord a => [RE a] -> RE a
|
||||
unionRE = unionOrId . sortNub . concatMap toList
|
||||
where
|
||||
toList (REUnion xs) = xs
|
||||
toList x = [x]
|
||||
unionOrId [r] = r
|
||||
unionOrId rs = REUnion rs
|
||||
|
||||
concatRE :: [RE a] -> RE a
|
||||
concatRE xs | any isNull xs = nullRE
|
||||
| otherwise = case concatMap toList xs of
|
||||
[r] -> r
|
||||
rs -> REConcat rs
|
||||
where
|
||||
toList (REConcat xs) = xs
|
||||
toList x = [x]
|
||||
|
||||
seqRE :: [a] -> RE a
|
||||
seqRE = concatRE . map RESymbol
|
||||
|
||||
repeatRE :: RE a -> RE a
|
||||
repeatRE x | isNull x || isEpsilon x = epsilonRE
|
||||
| otherwise = RERepeat x
|
||||
|
||||
finalRE :: Ord a => DFA (RE a) -> RE a
|
||||
finalRE fa = concatRE [repeatRE r1, r2,
|
||||
repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
|
||||
where
|
||||
s0 = startState fa
|
||||
[sF] = finalStates fa
|
||||
r1 = unionRE $ loops s0 fa
|
||||
r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa
|
||||
r3 = unionRE $ loops sF fa
|
||||
r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
|
||||
|
||||
reverseRE :: RE a -> RE a
|
||||
reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs
|
||||
reverseRE (REUnion xs) = REUnion (map reverseRE xs)
|
||||
reverseRE (RERepeat x) = RERepeat (reverseRE x)
|
||||
reverseRE x = x
|
||||
|
||||
minimizeRE :: Ord a => RE a -> RE a
|
||||
minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward
|
||||
|
||||
mergeForward :: Ord a => RE a -> RE a
|
||||
mergeForward (REUnion xs) =
|
||||
unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)]
|
||||
mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)]
|
||||
mergeForward (RERepeat r) = repeatRE (mergeForward r)
|
||||
mergeForward r = r
|
||||
|
||||
firstRE :: RE a -> (RE a, RE a)
|
||||
firstRE (REConcat (x:xs)) = (x, REConcat xs)
|
||||
firstRE r = (r,epsilonRE)
|
||||
|
||||
mapRE :: (a -> b) -> RE a -> RE b
|
||||
mapRE f = mapRE' (RESymbol . f)
|
||||
|
||||
mapRE' :: (a -> RE b) -> RE a -> RE b
|
||||
mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs)
|
||||
mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs)
|
||||
mapRE' f (RERepeat x) = RERepeat (mapRE' f x)
|
||||
mapRE' f (RESymbol s) = f s
|
||||
|
||||
joinRE :: RE (RE a) -> RE a
|
||||
joinRE (REConcat xs) = REConcat (map joinRE xs)
|
||||
joinRE (REUnion xs) = REUnion (map joinRE xs)
|
||||
joinRE (RERepeat xs) = RERepeat (joinRE xs)
|
||||
joinRE (RESymbol ss) = ss
|
||||
|
||||
symbolsRE :: RE a -> [a]
|
||||
symbolsRE (REConcat xs) = concatMap symbolsRE xs
|
||||
symbolsRE (REUnion xs) = concatMap symbolsRE xs
|
||||
symbolsRE (RERepeat x) = symbolsRE x
|
||||
symbolsRE (RESymbol x) = [x]
|
||||
|
||||
-- Debugging
|
||||
|
||||
prRE :: RE String -> String
|
||||
prRE = prRE' 0
|
||||
|
||||
prRE' _ (REUnion []) = "<NULL>"
|
||||
prRE' n (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1) xs)))
|
||||
prRE' n (REConcat xs) = p n 2 (unwords (map (prRE' 2) xs))
|
||||
prRE' n (RERepeat x) = p n 3 (prRE' 3 x) ++ "*"
|
||||
prRE' _ (RESymbol s) = s
|
||||
|
||||
p n m s | n >= m = "(" ++ s ++ ")"
|
||||
| True = s
|
||||
130
src-3.0/GF/Speech/Relation.hs
Normal file
130
src-3.0/GF/Speech/Relation.hs
Normal file
@@ -0,0 +1,130 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Relation
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/26 17:13:13 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
--
|
||||
-- A simple module for relations.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.Relation (Rel, mkRel, mkRel'
|
||||
, allRelated , isRelatedTo
|
||||
, transitiveClosure
|
||||
, reflexiveClosure, reflexiveClosure_
|
||||
, symmetricClosure
|
||||
, symmetricSubrelation, reflexiveSubrelation
|
||||
, reflexiveElements
|
||||
, equivalenceClasses
|
||||
, isTransitive, isReflexive, isSymmetric
|
||||
, isEquivalence
|
||||
, isSubRelationOf) where
|
||||
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
|
||||
import GF.Data.Utilities
|
||||
|
||||
type Rel a = Map a (Set a)
|
||||
|
||||
-- | Creates a relation from a list of related pairs.
|
||||
mkRel :: Ord a => [(a,a)] -> Rel a
|
||||
mkRel ps = relates ps Map.empty
|
||||
|
||||
-- | Creates a relation from a list pairs of elements and the elements
|
||||
-- related to them.
|
||||
mkRel' :: Ord a => [(a,[a])] -> Rel a
|
||||
mkRel' xs = Map.fromListWith Set.union [(x,Set.fromList ys) | (x,ys) <- xs]
|
||||
|
||||
relToList :: Rel a -> [(a,a)]
|
||||
relToList r = [ (x,y) | (x,ys) <- Map.toList r, y <- Set.toList ys ]
|
||||
|
||||
-- | Add a pair to the relation.
|
||||
relate :: Ord a => a -> a -> Rel a -> Rel a
|
||||
relate x y r = Map.insertWith Set.union x (Set.singleton y) r
|
||||
|
||||
-- | Add a list of pairs to the relation.
|
||||
relates :: Ord a => [(a,a)] -> Rel a -> Rel a
|
||||
relates ps r = foldl (\r' (x,y) -> relate x y r') r ps
|
||||
|
||||
-- | Checks if an element is related to another.
|
||||
isRelatedTo :: Ord a => Rel a -> a -> a -> Bool
|
||||
isRelatedTo r x y = maybe False (y `Set.member`) (Map.lookup x r)
|
||||
|
||||
-- | Get the set of elements to which a given element is related.
|
||||
allRelated :: Ord a => Rel a -> a -> Set a
|
||||
allRelated r x = fromMaybe Set.empty (Map.lookup x r)
|
||||
|
||||
-- | Get all elements in the relation.
|
||||
domain :: Ord a => Rel a -> Set a
|
||||
domain r = foldl Set.union (Map.keysSet r) (Map.elems r)
|
||||
|
||||
-- | Keep only pairs for which both elements are in the given set.
|
||||
intersectSetRel :: Ord a => Set a -> Rel a -> Rel a
|
||||
intersectSetRel s = filterRel (\x y -> x `Set.member` s && y `Set.member` s)
|
||||
|
||||
transitiveClosure :: Ord a => Rel a -> Rel a
|
||||
transitiveClosure r = fix (Map.map growSet) r
|
||||
where growSet ys = foldl Set.union ys (map (allRelated r) $ Set.toList ys)
|
||||
|
||||
reflexiveClosure_ :: Ord a => [a] -- ^ The set over which the relation is defined.
|
||||
-> Rel a -> Rel a
|
||||
reflexiveClosure_ u r = relates [(x,x) | x <- u] r
|
||||
|
||||
-- | Uses 'domain'
|
||||
reflexiveClosure :: Ord a => Rel a -> Rel a
|
||||
reflexiveClosure r = reflexiveClosure_ (Set.toList $ domain r) r
|
||||
|
||||
symmetricClosure :: Ord a => Rel a -> Rel a
|
||||
symmetricClosure r = relates [ (y,x) | (x,y) <- relToList r ] r
|
||||
|
||||
symmetricSubrelation :: Ord a => Rel a -> Rel a
|
||||
symmetricSubrelation r = filterRel (flip $ isRelatedTo r) r
|
||||
|
||||
reflexiveSubrelation :: Ord a => Rel a -> Rel a
|
||||
reflexiveSubrelation r = intersectSetRel (reflexiveElements r) r
|
||||
|
||||
-- | Get the set of elements which are related to themselves.
|
||||
reflexiveElements :: Ord a => Rel a -> Set a
|
||||
reflexiveElements r = Set.fromList [ x | (x,ys) <- Map.toList r, x `Set.member` ys ]
|
||||
|
||||
-- | Keep the related pairs for which the predicate is true.
|
||||
filterRel :: Ord a => (a -> a -> Bool) -> Rel a -> Rel a
|
||||
filterRel p = purgeEmpty . Map.mapWithKey (Set.filter . p)
|
||||
|
||||
-- | Remove keys that map to no elements.
|
||||
purgeEmpty :: Ord a => Rel a -> Rel a
|
||||
purgeEmpty r = Map.filter (not . Set.null) r
|
||||
|
||||
|
||||
-- | Get the equivalence classes from an equivalence relation.
|
||||
equivalenceClasses :: Ord a => Rel a -> [Set a]
|
||||
equivalenceClasses r = equivalenceClasses_ (Map.keys r) r
|
||||
where equivalenceClasses_ [] _ = []
|
||||
equivalenceClasses_ (x:xs) r = ys:equivalenceClasses_ zs r
|
||||
where ys = allRelated r x
|
||||
zs = [x' | x' <- xs, not (x' `Set.member` ys)]
|
||||
|
||||
isTransitive :: Ord a => Rel a -> Bool
|
||||
isTransitive r = and [z `Set.member` ys | (x,ys) <- Map.toList r,
|
||||
y <- Set.toList ys, z <- Set.toList (allRelated r y)]
|
||||
|
||||
isReflexive :: Ord a => Rel a -> Bool
|
||||
isReflexive r = all (\ (x,ys) -> x `Set.member` ys) (Map.toList r)
|
||||
|
||||
isSymmetric :: Ord a => Rel a -> Bool
|
||||
isSymmetric r = and [isRelatedTo r y x | (x,y) <- relToList r]
|
||||
|
||||
isEquivalence :: Ord a => Rel a -> Bool
|
||||
isEquivalence r = isReflexive r && isSymmetric r && isTransitive r
|
||||
|
||||
isSubRelationOf :: Ord a => Rel a -> Rel a -> Bool
|
||||
isSubRelationOf r1 r2 = all (uncurry (isRelatedTo r2)) (relToList r1)
|
||||
39
src-3.0/GF/Speech/RelationQC.hs
Normal file
39
src-3.0/GF/Speech/RelationQC.hs
Normal file
@@ -0,0 +1,39 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : RelationQC
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/26 17:13:13 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
--
|
||||
-- QuickCheck properties for GF.Speech.Relation
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.RelationQC where
|
||||
|
||||
import GF.Speech.Relation
|
||||
|
||||
import Test.QuickCheck
|
||||
|
||||
prop_transitiveClosure_trans :: [(Int,Int)] -> Bool
|
||||
prop_transitiveClosure_trans ps = isTransitive (transitiveClosure (mkRel ps))
|
||||
|
||||
prop_symmetricSubrelation_symm :: [(Int,Int)] -> Bool
|
||||
prop_symmetricSubrelation_symm ps = isSymmetric (symmetricSubrelation (mkRel ps))
|
||||
|
||||
prop_symmetricSubrelation_sub :: [(Int,Int)] -> Bool
|
||||
prop_symmetricSubrelation_sub ps = symmetricSubrelation r `isSubRelationOf` r
|
||||
where r = mkRel ps
|
||||
|
||||
prop_symmetricClosure_symm :: [(Int,Int)] -> Bool
|
||||
prop_symmetricClosure_symm ps = isSymmetric (symmetricClosure (mkRel ps))
|
||||
|
||||
prop_reflexiveClosure_refl :: [(Int,Int)] -> Bool
|
||||
prop_reflexiveClosure_refl ps = isReflexive (reflexiveClosure (mkRel ps))
|
||||
|
||||
prop_mkEquiv_equiv :: [(Int,Int)] -> Bool
|
||||
prop_mkEquiv_equiv ps = isEquivalence (mkEquiv ps)
|
||||
where mkEquiv = transitiveClosure . symmetricClosure . reflexiveClosure . mkRel
|
||||
87
src-3.0/GF/Speech/SISR.hs
Normal file
87
src-3.0/GF/Speech/SISR.hs
Normal file
@@ -0,0 +1,87 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : GF.Speech.SISR
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- Abstract syntax and pretty printer for SISR,
|
||||
-- (Semantic Interpretation for Speech Recognition)
|
||||
--
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.SISR (SISRFormat(..), SISRTag, prSISR,
|
||||
topCatSISR, profileInitSISR, catSISR, profileFinalSISR) where
|
||||
|
||||
import Data.List
|
||||
|
||||
import GF.Conversion.Types
|
||||
import GF.Data.Utilities
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), forestName)
|
||||
import GF.Infra.Ident
|
||||
import GF.Speech.TransformCFG
|
||||
import GF.Speech.SRG (SRGNT)
|
||||
|
||||
import qualified GF.JavaScript.AbsJS as JS
|
||||
import qualified GF.JavaScript.PrintJS as JS
|
||||
|
||||
data SISRFormat =
|
||||
-- SISR Working draft 1 April 2003
|
||||
-- http://www.w3.org/TR/2003/WD-semantic-interpretation-20030401/
|
||||
SISROld
|
||||
deriving Show
|
||||
|
||||
type SISRTag = [JS.DeclOrExpr]
|
||||
|
||||
|
||||
prSISR :: SISRTag -> String
|
||||
prSISR = JS.printTree
|
||||
|
||||
topCatSISR :: String -> SISRFormat -> SISRTag
|
||||
topCatSISR c fmt = map JS.DExpr [fmtOut fmt `ass` fmtRef fmt c]
|
||||
|
||||
profileInitSISR :: CFTerm -> SISRFormat -> SISRTag
|
||||
profileInitSISR t fmt
|
||||
| null (usedArgs t) = []
|
||||
| otherwise = [JS.Decl [JS.DInit args (JS.EArray [])]]
|
||||
|
||||
usedArgs :: CFTerm -> [Int]
|
||||
usedArgs (CFObj _ ts) = foldr union [] (map usedArgs ts)
|
||||
usedArgs (CFAbs _ x) = usedArgs x
|
||||
usedArgs (CFApp x y) = usedArgs x `union` usedArgs y
|
||||
usedArgs (CFRes i) = [i]
|
||||
usedArgs _ = []
|
||||
|
||||
catSISR :: CFTerm -> SRGNT -> SISRFormat -> SISRTag
|
||||
catSISR t (c,i) fmt
|
||||
| i `elem` usedArgs t = map JS.DExpr
|
||||
[JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i)) `ass` fmtRef fmt c]
|
||||
| otherwise = []
|
||||
|
||||
profileFinalSISR :: CFTerm -> SISRFormat -> SISRTag
|
||||
profileFinalSISR term fmt = [JS.DExpr $ fmtOut fmt `ass` f term]
|
||||
where
|
||||
f (CFObj n ts) = tree (prIdent n) (map f ts)
|
||||
f (CFAbs v x) = JS.EFun [var v] [JS.SReturn (f x)]
|
||||
f (CFApp x y) = JS.ECall (f x) [f y]
|
||||
f (CFRes i) = JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i))
|
||||
f (CFVar v) = JS.EVar (var v)
|
||||
f (CFMeta typ) = obj [("name",JS.EStr "?"), ("type",JS.EStr typ)]
|
||||
|
||||
fmtOut SISROld = JS.EVar (JS.Ident "$")
|
||||
|
||||
fmtRef SISROld c = JS.EVar (JS.Ident ("$" ++ c))
|
||||
|
||||
args = JS.Ident "a"
|
||||
|
||||
var v = JS.Ident ("x" ++ show v)
|
||||
|
||||
field x y = JS.EMember x (JS.Ident y)
|
||||
|
||||
ass = JS.EAssign
|
||||
|
||||
tree n xs = obj [("name", JS.EStr n), ("args", JS.EArray xs)]
|
||||
|
||||
obj ps = JS.EObj [JS.Prop (JS.StringPropName x) y | (x,y) <- ps]
|
||||
|
||||
235
src-3.0/GF/Speech/SRG.hs
Normal file
235
src-3.0/GF/Speech/SRG.hs
Normal file
@@ -0,0 +1,235 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : SRG
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/11/01 20:09:04 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.20 $
|
||||
--
|
||||
-- Representation of, conversion to, and utilities for
|
||||
-- printing of a general Speech Recognition Grammar.
|
||||
--
|
||||
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
|
||||
-- categories in the grammar
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem,
|
||||
SRGCat, SRGNT, CFTerm
|
||||
, makeSRG
|
||||
, makeSimpleSRG
|
||||
, makeNonRecursiveSRG
|
||||
, lookupFM_, prtS
|
||||
, cfgCatToGFCat, srgTopCats
|
||||
) where
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.Data.Utilities
|
||||
import GF.Infra.Ident
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..), NameProfile(..)
|
||||
, Profile(..), SyntaxForest
|
||||
, filterCats, mapSymbol, symbol)
|
||||
import GF.Conversion.Types
|
||||
import GF.Infra.Print
|
||||
import GF.Speech.TransformCFG
|
||||
import GF.Speech.Relation
|
||||
import GF.Speech.FiniteState
|
||||
import GF.Speech.RegExp
|
||||
import GF.Speech.CFGToFiniteState
|
||||
import GF.Infra.Option
|
||||
import GF.Probabilistic.Probabilistic (Probs)
|
||||
import GF.Compile.ShellState (StateGrammar, stateProbs, stateOptions, cncId)
|
||||
|
||||
import Data.List
|
||||
import Data.Maybe (fromMaybe, maybeToList)
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
|
||||
import Debug.Trace
|
||||
|
||||
data SRG = SRG { grammarName :: String -- ^ grammar name
|
||||
, startCat :: SRGCat -- ^ start category name
|
||||
, origStartCat :: String -- ^ original start category name
|
||||
, grammarLanguage :: Maybe String -- ^ The language for which the grammar
|
||||
-- is intended, e.g. en-UK
|
||||
, rules :: [SRGRule]
|
||||
}
|
||||
deriving (Eq,Show)
|
||||
|
||||
data SRGRule = SRGRule SRGCat String [SRGAlt] -- ^ SRG category name, original category name
|
||||
-- and productions
|
||||
deriving (Eq,Show)
|
||||
|
||||
-- | maybe a probability, a rule name and an EBNF right-hand side
|
||||
data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem
|
||||
deriving (Eq,Show)
|
||||
|
||||
type SRGItem = RE (Symbol SRGNT Token)
|
||||
|
||||
type SRGCat = String
|
||||
|
||||
-- | An SRG non-terminal. Category name and its number in the profile.
|
||||
type SRGNT = (SRGCat, Int)
|
||||
|
||||
-- | SRG category name and original name
|
||||
type CatName = (SRGCat,String)
|
||||
|
||||
type CatNames = Map String String
|
||||
|
||||
-- | Create a non-left-recursive SRG.
|
||||
-- FIXME: the probabilities in the returned
|
||||
-- grammar may be meaningless.
|
||||
makeSimpleSRG :: Options -- ^ Grammar options
|
||||
-> StateGrammar
|
||||
-> SRG
|
||||
makeSimpleSRG opt s = makeSRG preprocess opt s
|
||||
where
|
||||
preprocess origStart = traceStats "After mergeIdentical"
|
||||
. mergeIdentical
|
||||
. traceStats "After removeLeftRecursion"
|
||||
. removeLeftRecursion origStart
|
||||
. traceStats "After topDownFilter"
|
||||
. topDownFilter origStart
|
||||
. traceStats "After bottomUpFilter"
|
||||
. bottomUpFilter
|
||||
. traceStats "After removeCycles"
|
||||
. removeCycles
|
||||
. traceStats "Inital CFG"
|
||||
|
||||
traceStats s g = trace ("---- " ++ s ++ ": " ++ stats g {- ++ "\n" ++ prCFRules g ++ "----" -}) g
|
||||
|
||||
stats g = "Categories: " ++ show (countCats g)
|
||||
++ " Rules: " ++ show (countRules g)
|
||||
|
||||
makeNonRecursiveSRG :: Options
|
||||
-> StateGrammar
|
||||
-> SRG
|
||||
makeNonRecursiveSRG opt s = renameSRG $
|
||||
SRG { grammarName = prIdent (cncId s),
|
||||
startCat = start,
|
||||
origStartCat = origStart,
|
||||
grammarLanguage = getSpeechLanguage opt s,
|
||||
rules = rs }
|
||||
where
|
||||
origStart = getStartCatCF opt s
|
||||
MFA start dfas = cfgToMFA opt s
|
||||
rs = [SRGRule l l [SRGAlt Nothing dummyCFTerm (dfaToSRGItem dfa)] | (l,dfa) <- dfas]
|
||||
where dfaToSRGItem = mapRE dummySRGNT . minimizeRE . dfa2re
|
||||
dummyCFTerm = CFMeta "dummy"
|
||||
dummySRGNT = mapSymbol (\c -> (c,0)) id
|
||||
|
||||
makeSRG :: (Cat_ -> CFRules -> CFRules)
|
||||
-> Options -- ^ Grammar options
|
||||
-> StateGrammar
|
||||
-> SRG
|
||||
makeSRG preprocess opt s = renameSRG $
|
||||
SRG { grammarName = name,
|
||||
startCat = origStart,
|
||||
origStartCat = origStart,
|
||||
grammarLanguage = getSpeechLanguage opt s,
|
||||
rules = rs }
|
||||
where
|
||||
name = prIdent (cncId s)
|
||||
origStart = getStartCatCF opt s
|
||||
(_,cfgRules) = unzip $ allRulesGrouped $ preprocess origStart $ cfgToCFRules s
|
||||
rs = map (cfgRulesToSRGRule (stateProbs s)) cfgRules
|
||||
|
||||
-- | Give names on the form NameX to all categories.
|
||||
renameSRG :: SRG -> SRG
|
||||
renameSRG srg = srg { startCat = renameCat (startCat srg),
|
||||
rules = map renameRule (rules srg) }
|
||||
where
|
||||
names = mkCatNames (grammarName srg) (allSRGCats srg)
|
||||
renameRule (SRGRule _ origCat alts) = SRGRule (renameCat origCat) origCat (map renameAlt alts)
|
||||
renameAlt (SRGAlt mp n rhs) = SRGAlt mp n (mapRE renameSymbol rhs)
|
||||
renameSymbol = mapSymbol (\ (c,x) -> (renameCat c, x)) id
|
||||
renameCat = lookupFM_ names
|
||||
|
||||
getSpeechLanguage :: Options -> StateGrammar -> Maybe String
|
||||
getSpeechLanguage opt s =
|
||||
fmap (replace '_' '-') $ getOptVal (addOptions opt (stateOptions s)) speechLanguage
|
||||
|
||||
-- FIXME: merge alternatives with same rhs and profile but different probabilities
|
||||
cfgRulesToSRGRule :: Probs -> [CFRule_] -> SRGRule
|
||||
cfgRulesToSRGRule probs rs@(r:_) = SRGRule origCat origCat rhs
|
||||
where
|
||||
origCat = lhsCat r
|
||||
alts = [((n,ruleProb probs r),mkSRGSymbols 0 ss) | CFRule c ss n <- rs]
|
||||
rhs = [SRGAlt p n (srgItem sss) | ((n,p),sss) <- buildMultiMap alts ]
|
||||
|
||||
mkSRGSymbols _ [] = []
|
||||
mkSRGSymbols i (Cat c:ss) = Cat (c,i) : mkSRGSymbols (i+1) ss
|
||||
mkSRGSymbols i (Tok t:ss) = Tok t : mkSRGSymbols i ss
|
||||
|
||||
ruleProb :: Probs -> CFRule_ -> Maybe Double
|
||||
ruleProb probs r = lookupProb probs (ruleFun r)
|
||||
|
||||
-- FIXME: move to GF.Probabilistic.Probabilistic?
|
||||
lookupProb :: Probs -> Ident -> Maybe Double
|
||||
lookupProb probs i = lookupTree prIdent i probs
|
||||
|
||||
mkCatNames :: String -- ^ Category name prefix
|
||||
-> [String] -- ^ Original category names
|
||||
-> Map String String -- ^ Maps original names to SRG names
|
||||
mkCatNames prefix origNames = Map.fromList (zip origNames names)
|
||||
where names = [prefix ++ "_" ++ show x | x <- [0..]]
|
||||
|
||||
|
||||
allSRGCats :: SRG -> [String]
|
||||
allSRGCats SRG { rules = rs } = [c | SRGRule c _ _ <- rs]
|
||||
|
||||
cfgCatToGFCat :: SRGCat -> Maybe String
|
||||
cfgCatToGFCat c
|
||||
-- categories introduced by removeLeftRecursion contain dashes
|
||||
| '-' `elem` c = Nothing
|
||||
-- some categories introduced by -conversion=finite have the form
|
||||
-- "{fun:cat}..."
|
||||
| "{" `isPrefixOf` c = case dropWhile (/=':') $ takeWhile (/='}') $ tail c of
|
||||
':':c' -> Just c'
|
||||
_ -> error $ "cfgCatToGFCat: Strange category " ++ show c
|
||||
| otherwise = Just $ takeWhile (/='{') c
|
||||
|
||||
srgTopCats :: SRG -> [(String,[SRGCat])]
|
||||
srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg,
|
||||
oc <- maybeToList $ cfgCatToGFCat origCat]
|
||||
|
||||
--
|
||||
-- * Size-optimized EBNF SRGs
|
||||
--
|
||||
|
||||
srgItem :: [[Symbol SRGNT Token]] -> SRGItem
|
||||
srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
|
||||
-- non-optimizing version:
|
||||
--srgItem = unionRE . map seqRE
|
||||
|
||||
-- | Merges a list of right-hand sides which all have the same
|
||||
-- sequence of non-terminals.
|
||||
mergeItems :: [[Symbol SRGNT Token]] -> SRGItem
|
||||
mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens
|
||||
|
||||
groupTokens :: [Symbol SRGNT Token] -> [Symbol SRGNT [Token]]
|
||||
groupTokens [] = []
|
||||
groupTokens (Tok t:ss) = case groupTokens ss of
|
||||
Tok ts:ss' -> Tok (t:ts):ss'
|
||||
ss' -> Tok [t]:ss'
|
||||
groupTokens (Cat c:ss) = Cat c : groupTokens ss
|
||||
|
||||
ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE (Symbol SRGNT Token)
|
||||
ungroupTokens = joinRE . mapRE (symbol (RESymbol . Cat) (REConcat . map (RESymbol . Tok)))
|
||||
|
||||
--
|
||||
-- * Utilities for building and printing SRGs
|
||||
--
|
||||
|
||||
lookupFM_ :: (Ord key, Show key) => Map key elt -> key -> elt
|
||||
lookupFM_ fm k = Map.findWithDefault err k fm
|
||||
where err = error $ "Key not found: " ++ show k
|
||||
++ "\namong " ++ show (Map.keys fm)
|
||||
|
||||
prtS :: Print a => a -> ShowS
|
||||
prtS = showString . prt
|
||||
378
src-3.0/GF/Speech/TransformCFG.hs
Normal file
378
src-3.0/GF/Speech/TransformCFG.hs
Normal file
@@ -0,0 +1,378 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : TransformCFG
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/11/01 20:09:04 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.24 $
|
||||
--
|
||||
-- This module does some useful transformations on CFGs.
|
||||
--
|
||||
-- peb thinks: most of this module should be moved to GF.Conversion...
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.TransformCFG where
|
||||
|
||||
import GF.Canon.CanonToGFCC (canon2gfcc)
|
||||
import qualified GF.GFCC.CId as C
|
||||
import GF.GFCC.Macros (lookType,catSkeleton)
|
||||
import GF.GFCC.DataGFCC (GFCC)
|
||||
import GF.Conversion.Types
|
||||
import GF.CF.PPrCF (prCFCat)
|
||||
import GF.Data.Utilities
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol,
|
||||
NameProfile(..), Profile(..), name2fun, forestName)
|
||||
import GF.Infra.Ident
|
||||
import GF.Infra.Option
|
||||
import GF.Infra.Print
|
||||
import GF.Speech.Relation
|
||||
import GF.Compile.ShellState (StateGrammar, stateCFG, stateGrammarST, startCatStateOpts, stateOptions)
|
||||
|
||||
import Control.Monad
|
||||
import Control.Monad.State (State, get, put, evalState)
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Data.List
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Monoid (mconcat)
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
|
||||
-- not very nice to replace the structured CFCat type with a simple string
|
||||
type CFRule_ = CFRule Cat_ CFTerm Token
|
||||
|
||||
data CFTerm
|
||||
= CFObj Fun [CFTerm] -- ^ an abstract syntax function with arguments
|
||||
| CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id.
|
||||
| CFApp CFTerm CFTerm -- ^ Application
|
||||
| CFRes Int -- ^ The result of the n:th (0-based) non-terminal
|
||||
| CFVar Int -- ^ A lambda-bound variable
|
||||
| CFMeta String -- ^ A metavariable
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
type Cat_ = String
|
||||
type CFSymbol_ = Symbol Cat_ Token
|
||||
|
||||
type CFRules = Map Cat_ (Set CFRule_)
|
||||
|
||||
|
||||
cfgToCFRules :: StateGrammar -> CFRules
|
||||
cfgToCFRules s =
|
||||
groupProds [CFRule (catToString c) (map symb r) (nameToTerm n)
|
||||
| CFRule c r n <- cfg]
|
||||
where cfg = stateCFG s
|
||||
symb = mapSymbol catToString id
|
||||
catToString = prt
|
||||
gfcc = stateGFCC s
|
||||
nameToTerm (Name IW [Unify [n]]) = CFRes n
|
||||
nameToTerm (Name f@(IC c) prs) =
|
||||
CFObj f (zipWith profileToTerm args prs)
|
||||
where (args,_) = catSkeleton $ lookType gfcc (C.CId c)
|
||||
nameToTerm n = error $ "cfgToCFRules.nameToTerm" ++ show n
|
||||
profileToTerm (C.CId t) (Unify []) = CFMeta t
|
||||
profileToTerm _ (Unify xs) = CFRes (last xs) -- FIXME: unify
|
||||
profileToTerm (C.CId t) (Constant f) = maybe (CFMeta t) (\x -> CFObj x []) (forestName f)
|
||||
|
||||
getStartCat :: Options -> StateGrammar -> String
|
||||
getStartCat opts sgr = prCFCat (startCatStateOpts opts' sgr)
|
||||
where opts' = addOptions opts (stateOptions sgr)
|
||||
|
||||
getStartCatCF :: Options -> StateGrammar -> String
|
||||
getStartCatCF opts sgr = getStartCat opts sgr ++ "{}.s"
|
||||
|
||||
stateGFCC :: StateGrammar -> GFCC
|
||||
stateGFCC = canon2gfcc noOptions . stateGrammarST
|
||||
|
||||
-- * Grammar filtering
|
||||
|
||||
-- | Removes all directly and indirectly cyclic productions.
|
||||
-- FIXME: this may be too aggressive, only one production
|
||||
-- needs to be removed to break a given cycle. But which
|
||||
-- one should we pick?
|
||||
-- FIXME: Does not (yet) remove productions which are cyclic
|
||||
-- because of empty productions.
|
||||
removeCycles :: CFRules -> CFRules
|
||||
removeCycles = groupProds . f . allRules
|
||||
where f rs = filter (not . isCycle) rs
|
||||
where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [Cat c'] _ <- rs]
|
||||
isCycle (CFRule c [Cat c'] _) = isRelatedTo alias c' c
|
||||
isCycle _ = False
|
||||
|
||||
-- | Better bottom-up filter that also removes categories which contain no finite
|
||||
-- strings.
|
||||
bottomUpFilter :: CFRules -> CFRules
|
||||
bottomUpFilter gr = fix grow Map.empty
|
||||
where grow g = g `unionCFRules` filterCFRules (all (okSym g) . ruleRhs) gr
|
||||
okSym g = symbol (`elem` allCats g) (const True)
|
||||
|
||||
-- | Removes categories which are not reachable from the start category.
|
||||
topDownFilter :: Cat_ -> CFRules -> CFRules
|
||||
topDownFilter start rules = filterCFRulesCats (isRelatedTo uses start) rules
|
||||
where
|
||||
rhsCats = [ (lhsCat r, c') | r <- allRules rules, c' <- filterCats (ruleRhs r) ]
|
||||
uses = reflexiveClosure_ (allCats rules) $ transitiveClosure $ mkRel rhsCats
|
||||
|
||||
-- | Merges categories with identical right-hand-sides.
|
||||
-- FIXME: handle probabilities
|
||||
mergeIdentical :: CFRules -> CFRules
|
||||
mergeIdentical g = groupProds $ map subst $ allRules g
|
||||
where
|
||||
-- maps categories to their replacement
|
||||
m = Map.fromList [(y,concat (intersperse "+" xs))
|
||||
| (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList g], y <- xs]
|
||||
-- build data to compare for each category: a set of name,rhs pairs
|
||||
rulesKey = Set.map (\ (CFRule _ r n) -> (n,r))
|
||||
subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
|
||||
substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
|
||||
|
||||
-- * Removing left recursion
|
||||
|
||||
-- The LC_LR algorithm from
|
||||
-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
|
||||
removeLeftRecursion :: Cat_ -> CFRules -> CFRules
|
||||
removeLeftRecursion start gr
|
||||
= groupProds $ concat [scheme1, scheme2, scheme3, scheme4]
|
||||
where
|
||||
scheme1 = [CFRule a [x,Cat a_x] n' |
|
||||
a <- retainedLeftRecursive,
|
||||
x <- properLeftCornersOf a,
|
||||
not (isLeftRecursive x),
|
||||
let a_x = mkCat (Cat a) x,
|
||||
-- this is an extension of LC_LR to avoid generating
|
||||
-- A-X categories for which there are no productions:
|
||||
a_x `Set.member` newCats,
|
||||
let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0))
|
||||
(\_ -> CFRes 0) x]
|
||||
scheme2 = [CFRule a_x (beta++[Cat a_b]) n' |
|
||||
a <- retainedLeftRecursive,
|
||||
b@(Cat b') <- properLeftCornersOf a,
|
||||
isLeftRecursive b,
|
||||
CFRule _ (x:beta) n <- catRules gr b',
|
||||
let a_x = mkCat (Cat a) x,
|
||||
let a_b = mkCat (Cat a) b,
|
||||
let i = length $ filterCats beta,
|
||||
let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n)))
|
||||
(\_ -> CFApp (CFRes i) n) x]
|
||||
scheme3 = [CFRule a_x beta n' |
|
||||
a <- retainedLeftRecursive,
|
||||
x <- properLeftCornersOf a,
|
||||
CFRule _ (x':beta) n <- catRules gr a,
|
||||
x == x',
|
||||
let a_x = mkCat (Cat a) x,
|
||||
let n' = symbol (\_ -> CFAbs 1 (shiftTerm n))
|
||||
(\_ -> n) x]
|
||||
scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . Cat) cats
|
||||
|
||||
newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3))
|
||||
|
||||
shiftTerm :: CFTerm -> CFTerm
|
||||
shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts)
|
||||
shiftTerm (CFRes 0) = CFVar 1
|
||||
shiftTerm (CFRes n) = CFRes (n-1)
|
||||
shiftTerm t = t
|
||||
-- note: the rest don't occur in the original grammar
|
||||
|
||||
cats = allCats gr
|
||||
rules = allRules gr
|
||||
|
||||
directLeftCorner = mkRel [(Cat c,t) | CFRule c (t:_) _ <- allRules gr]
|
||||
leftCorner = reflexiveClosure_ (map Cat cats) $ transitiveClosure directLeftCorner
|
||||
properLeftCorner = transitiveClosure directLeftCorner
|
||||
properLeftCornersOf = Set.toList . allRelated properLeftCorner . Cat
|
||||
isProperLeftCornerOf = flip (isRelatedTo properLeftCorner)
|
||||
|
||||
leftRecursive = reflexiveElements properLeftCorner
|
||||
isLeftRecursive = (`Set.member` leftRecursive)
|
||||
|
||||
retained = start `Set.insert`
|
||||
Set.fromList [a | r <- allRules (filterCFRulesCats (not . isLeftRecursive . Cat) gr),
|
||||
Cat a <- ruleRhs r]
|
||||
isRetained = (`Set.member` retained)
|
||||
|
||||
retainedLeftRecursive = filter (isLeftRecursive . Cat) $ Set.toList retained
|
||||
|
||||
mkCat :: CFSymbol_ -> CFSymbol_ -> Cat_
|
||||
mkCat x y = showSymbol x ++ "-" ++ showSymbol y
|
||||
where showSymbol = symbol id show
|
||||
|
||||
{-
|
||||
|
||||
-- Paull's algorithm, see
|
||||
-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
|
||||
removeLeftRecursion :: Cat_ -> CFRules -> CFRules
|
||||
removeLeftRecursion start rs = removeDirectLeftRecursions $ map handleProds rs
|
||||
where
|
||||
handleProds (c, r) = (c, concatMap handleProd r)
|
||||
handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai =
|
||||
-- FIXME: for non-recursive categories, this changes
|
||||
-- the grammar unneccessarily, maybe we can use mutRecCats
|
||||
-- to make this less invasive
|
||||
-- FIXME: this will give multiple rules with the same name,
|
||||
-- which may mess up the probabilities.
|
||||
[CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- lookup' aj rs]
|
||||
handleProd r = [r]
|
||||
|
||||
removeDirectLeftRecursions :: CFRules -> CFRules
|
||||
removeDirectLeftRecursions = concat . flip evalState 0 . mapM removeDirectLeftRecursion
|
||||
|
||||
removeDirectLeftRecursion :: (Cat_,[CFRule_]) -- ^ All productions for a category
|
||||
-> State Int CFRules
|
||||
removeDirectLeftRecursion (a,rs)
|
||||
| null dr = return [(a,rs)]
|
||||
| otherwise =
|
||||
do
|
||||
a' <- fresh a
|
||||
let as = maybeEndWithA' nr
|
||||
is = [CFRule a' (tail r) n | CFRule _ r n <- dr]
|
||||
a's = maybeEndWithA' is
|
||||
-- the not null constraint here avoids creating new
|
||||
-- left recursive (cyclic) rules.
|
||||
maybeEndWithA' xs = xs ++ [CFRule c (r++[Cat a']) n | CFRule c r n <- xs,
|
||||
not (null r)]
|
||||
return [(a, as), (a', a's)]
|
||||
where
|
||||
(dr,nr) = partition isDirectLeftRecursive rs
|
||||
fresh x = do { n <- get; put (n+1); return $ x ++ "-" ++ show n }
|
||||
|
||||
isDirectLeftRecursive :: CFRule_ -> Bool
|
||||
isDirectLeftRecursive (CFRule c (Cat c':_) _) = c == c'
|
||||
isDirectLeftRecursive _ = False
|
||||
|
||||
-}
|
||||
|
||||
-- | Get the sets of mutually recursive non-terminals for a grammar.
|
||||
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
|
||||
-- If false, only recursive categories will be included.
|
||||
-> CFRules -> [Set Cat_]
|
||||
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
|
||||
where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, Cat c' <- ss]
|
||||
refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation
|
||||
|
||||
--
|
||||
-- * Approximate context-free grammars with regular grammars.
|
||||
--
|
||||
|
||||
-- Use the transformation algorithm from \"Regular Approximation of Context-free
|
||||
-- Grammars through Approximation\", Mohri and Nederhof, 2000
|
||||
-- to create an over-generating regular frammar for a context-free
|
||||
-- grammar
|
||||
makeRegular :: CFRules -> CFRules
|
||||
makeRegular g = groupProds $ concatMap trSet (mutRecCats True g)
|
||||
where trSet cs | allXLinear cs rs = rs
|
||||
| otherwise = concatMap handleCat csl
|
||||
where csl = Set.toList cs
|
||||
rs = catSetRules g cs
|
||||
handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
|
||||
++ concatMap (makeRightLinearRules c) (catRules g c)
|
||||
where c' = newCat c
|
||||
makeRightLinearRules b' (CFRule c ss n) =
|
||||
case ys of
|
||||
[] -> newRule b' (xs ++ [Cat (newCat c)]) n -- no non-terminals left
|
||||
(Cat b:zs) -> newRule b' (xs ++ [Cat b]) n
|
||||
++ makeRightLinearRules (newCat b) (CFRule c zs n)
|
||||
where (xs,ys) = break (`catElem` cs) ss
|
||||
-- don't add rules on the form A -> A
|
||||
newRule c rhs n | rhs == [Cat c] = []
|
||||
| otherwise = [CFRule c rhs n]
|
||||
newCat c = c ++ "$"
|
||||
|
||||
--
|
||||
-- * CFG rule utilities
|
||||
--
|
||||
|
||||
-- | Group productions by their lhs categories
|
||||
groupProds :: [CFRule_] -> CFRules
|
||||
groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r))
|
||||
|
||||
allRules :: CFRules -> [CFRule_]
|
||||
allRules = concat . map Set.toList . Map.elems
|
||||
|
||||
allRulesGrouped :: CFRules -> [(Cat_,[CFRule_])]
|
||||
allRulesGrouped = Map.toList . Map.map Set.toList
|
||||
|
||||
allCats :: CFRules -> [Cat_]
|
||||
allCats = Map.keys
|
||||
|
||||
catRules :: CFRules -> Cat_ -> [CFRule_]
|
||||
catRules rs c = Set.toList $ Map.findWithDefault Set.empty c rs
|
||||
|
||||
catSetRules :: CFRules -> Set Cat_ -> [CFRule_]
|
||||
catSetRules g cs = allRules $ Map.filterWithKey (\c _ -> c `Set.member` cs) g
|
||||
|
||||
cleanCFRules :: CFRules -> CFRules
|
||||
cleanCFRules = Map.filter (not . Set.null)
|
||||
|
||||
unionCFRules :: CFRules -> CFRules -> CFRules
|
||||
unionCFRules = Map.unionWith Set.union
|
||||
|
||||
filterCFRules :: (CFRule_ -> Bool) -> CFRules -> CFRules
|
||||
filterCFRules p = cleanCFRules . Map.map (Set.filter p)
|
||||
|
||||
filterCFRulesCats :: (Cat_ -> Bool) -> CFRules -> CFRules
|
||||
filterCFRulesCats p = Map.filterWithKey (\c _ -> p c)
|
||||
|
||||
countCats :: CFRules -> Int
|
||||
countCats = Map.size . cleanCFRules
|
||||
|
||||
countRules :: CFRules -> Int
|
||||
countRules = length . allRules
|
||||
|
||||
lhsCat :: CFRule c n t -> c
|
||||
lhsCat (CFRule c _ _) = c
|
||||
|
||||
ruleRhs :: CFRule c n t -> [Symbol c t]
|
||||
ruleRhs (CFRule _ ss _) = ss
|
||||
|
||||
ruleFun :: CFRule_ -> Fun
|
||||
ruleFun (CFRule _ _ t) = f t
|
||||
where f (CFObj n _) = n
|
||||
f (CFApp _ x) = f x
|
||||
f (CFAbs _ x) = f x
|
||||
f _ = IC ""
|
||||
|
||||
-- | Checks if a symbol is a non-terminal of one of the given categories.
|
||||
catElem :: Ord c => Symbol c t -> Set c -> Bool
|
||||
catElem s cs = symbol (`Set.member` cs) (const False) s
|
||||
|
||||
-- | Check if any of the categories used on the right-hand side
|
||||
-- are in the given list of categories.
|
||||
anyUsedBy :: Eq c => [c] -> CFRule c n t -> Bool
|
||||
anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
|
||||
|
||||
mkCFTerm :: String -> CFTerm
|
||||
mkCFTerm n = CFObj (IC n) []
|
||||
|
||||
ruleIsNonRecursive :: Ord c => Set c -> CFRule c n t -> Bool
|
||||
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
|
||||
|
||||
noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
|
||||
noCatsInSet cs = not . any (`catElem` cs)
|
||||
|
||||
-- | Check if all the rules are right-linear, or all the rules are
|
||||
-- left-linear, with respect to given categories.
|
||||
allXLinear :: Ord c => Set c -> [CFRule c n t] -> Bool
|
||||
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
|
||||
|
||||
-- | Checks if a context-free rule is right-linear.
|
||||
isRightLinear :: Ord c =>
|
||||
Set c -- ^ The categories to consider
|
||||
-> CFRule c n t -- ^ The rule to check for right-linearity
|
||||
-> Bool
|
||||
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
|
||||
|
||||
-- | Checks if a context-free rule is left-linear.
|
||||
isLeftLinear :: Ord c =>
|
||||
Set c -- ^ The categories to consider
|
||||
-> CFRule c n t -- ^ The rule to check for left-linearity
|
||||
-> Bool
|
||||
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
|
||||
|
||||
prCFRules :: CFRules -> String
|
||||
prCFRules = unlines . map prRule . allRules
|
||||
where
|
||||
prRule r = lhsCat r ++ " --> " ++ unwords (map prSym (ruleRhs r))
|
||||
prSym = symbol id (\t -> "\""++ t ++"\"")
|
||||
Reference in New Issue
Block a user