forked from GitHub/gf-core
Added speech recognition grammar generation code. There is no way yet to invoke the SRG printer, and only SRGS is included.
This commit is contained in:
338
src-3.0/GF/Speech/CFG.hs
Normal file
338
src-3.0/GF/Speech/CFG.hs
Normal file
@@ -0,0 +1,338 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : GF.Speech.CFG
|
||||
--
|
||||
-- Context-free grammar representation and manipulation.
|
||||
----------------------------------------------------------------------
|
||||
module GF.Speech.CFG where
|
||||
|
||||
import GF.Data.Utilities
|
||||
import PGF.CId
|
||||
import GF.Infra.Option
|
||||
import GF.Infra.PrintClass
|
||||
import GF.Speech.Relation
|
||||
|
||||
import Control.Monad
|
||||
import Control.Monad.State (State, get, put, evalState)
|
||||
import qualified Data.ByteString.Char8 as BS
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Data.List
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Monoid (mconcat)
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
|
||||
--
|
||||
-- * Types
|
||||
--
|
||||
|
||||
type Cat = String
|
||||
type Token = String
|
||||
|
||||
data Symbol c t = NonTerminal c | Terminal t
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
type CFSymbol = Symbol Cat Token
|
||||
|
||||
data CFRule = CFRule {
|
||||
lhsCat :: Cat,
|
||||
ruleRhs :: [CFSymbol],
|
||||
ruleName :: CFTerm
|
||||
}
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
data CFTerm
|
||||
= CFObj CId [CFTerm] -- ^ an abstract syntax function with arguments
|
||||
| CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id.
|
||||
| CFApp CFTerm CFTerm -- ^ Application
|
||||
| CFRes Int -- ^ The result of the n:th (0-based) non-terminal
|
||||
| CFVar Int -- ^ A lambda-bound variable
|
||||
| CFMeta CId -- ^ A metavariable
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
data CFG = CFG { cfgStartCat :: Cat,
|
||||
cfgExternalCats :: Set Cat,
|
||||
cfgRules :: Map Cat (Set CFRule) }
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
--
|
||||
-- * Grammar filtering
|
||||
--
|
||||
|
||||
-- | Removes all directly and indirectly cyclic productions.
|
||||
-- FIXME: this may be too aggressive, only one production
|
||||
-- needs to be removed to break a given cycle. But which
|
||||
-- one should we pick?
|
||||
-- FIXME: Does not (yet) remove productions which are cyclic
|
||||
-- because of empty productions.
|
||||
removeCycles :: CFG -> CFG
|
||||
removeCycles = onRules f
|
||||
where f rs = filter (not . isCycle) rs
|
||||
where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [NonTerminal c'] _ <- rs]
|
||||
isCycle (CFRule c [NonTerminal c'] _) = isRelatedTo alias c' c
|
||||
isCycle _ = False
|
||||
|
||||
-- | Better bottom-up filter that also removes categories which contain no finite
|
||||
-- strings.
|
||||
bottomUpFilter :: CFG -> CFG
|
||||
bottomUpFilter gr = fix grow (gr { cfgRules = Map.empty })
|
||||
where grow g = g `unionCFG` filterCFG (all (okSym g) . ruleRhs) gr
|
||||
okSym g = symbol (`elem` allCats g) (const True)
|
||||
|
||||
-- | Removes categories which are not reachable from the start category.
|
||||
topDownFilter :: CFG -> CFG
|
||||
topDownFilter cfg = filterCFGCats (isRelatedTo uses (cfgStartCat cfg)) cfg
|
||||
where
|
||||
rhsCats = [ (lhsCat r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ]
|
||||
uses = reflexiveClosure_ (allCats cfg) $ transitiveClosure $ mkRel rhsCats
|
||||
|
||||
-- | Merges categories with identical right-hand-sides.
|
||||
-- FIXME: handle probabilities
|
||||
mergeIdentical :: CFG -> CFG
|
||||
mergeIdentical g = onRules (map subst) g
|
||||
where
|
||||
-- maps categories to their replacement
|
||||
m = Map.fromList [(y,concat (intersperse "+" xs))
|
||||
| (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList (cfgRules g)], y <- xs]
|
||||
-- build data to compare for each category: a set of name,rhs pairs
|
||||
rulesKey = Set.map (\ (CFRule _ r n) -> (n,r))
|
||||
subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
|
||||
substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
|
||||
|
||||
--
|
||||
-- * Removing left recursion
|
||||
--
|
||||
|
||||
-- The LC_LR algorithm from
|
||||
-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
|
||||
removeLeftRecursion :: CFG -> CFG
|
||||
removeLeftRecursion gr
|
||||
= gr { cfgRules = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] }
|
||||
where
|
||||
scheme1 = [CFRule a [x,NonTerminal a_x] n' |
|
||||
a <- retainedLeftRecursive,
|
||||
x <- properLeftCornersOf a,
|
||||
not (isLeftRecursive x),
|
||||
let a_x = mkCat (NonTerminal a) x,
|
||||
-- this is an extension of LC_LR to avoid generating
|
||||
-- A-X categories for which there are no productions:
|
||||
a_x `Set.member` newCats,
|
||||
let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0))
|
||||
(\_ -> CFRes 0) x]
|
||||
scheme2 = [CFRule a_x (beta++[NonTerminal a_b]) n' |
|
||||
a <- retainedLeftRecursive,
|
||||
b@(NonTerminal b') <- properLeftCornersOf a,
|
||||
isLeftRecursive b,
|
||||
CFRule _ (x:beta) n <- catRules gr b',
|
||||
let a_x = mkCat (NonTerminal a) x,
|
||||
let a_b = mkCat (NonTerminal a) b,
|
||||
let i = length $ filterCats beta,
|
||||
let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n)))
|
||||
(\_ -> CFApp (CFRes i) n) x]
|
||||
scheme3 = [CFRule a_x beta n' |
|
||||
a <- retainedLeftRecursive,
|
||||
x <- properLeftCornersOf a,
|
||||
CFRule _ (x':beta) n <- catRules gr a,
|
||||
x == x',
|
||||
let a_x = mkCat (NonTerminal a) x,
|
||||
let n' = symbol (\_ -> CFAbs 1 (shiftTerm n))
|
||||
(\_ -> n) x]
|
||||
scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . NonTerminal) cats
|
||||
|
||||
newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3))
|
||||
|
||||
shiftTerm :: CFTerm -> CFTerm
|
||||
shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts)
|
||||
shiftTerm (CFRes 0) = CFVar 1
|
||||
shiftTerm (CFRes n) = CFRes (n-1)
|
||||
shiftTerm t = t
|
||||
-- note: the rest don't occur in the original grammar
|
||||
|
||||
cats = allCats gr
|
||||
rules = allRules gr
|
||||
|
||||
directLeftCorner = mkRel [(NonTerminal c,t) | CFRule c (t:_) _ <- allRules gr]
|
||||
leftCorner = reflexiveClosure_ (map NonTerminal cats) $ transitiveClosure directLeftCorner
|
||||
properLeftCorner = transitiveClosure directLeftCorner
|
||||
properLeftCornersOf = Set.toList . allRelated properLeftCorner . NonTerminal
|
||||
isProperLeftCornerOf = flip (isRelatedTo properLeftCorner)
|
||||
|
||||
leftRecursive = reflexiveElements properLeftCorner
|
||||
isLeftRecursive = (`Set.member` leftRecursive)
|
||||
|
||||
retained = cfgStartCat gr `Set.insert`
|
||||
Set.fromList [a | r <- allRules (filterCFGCats (not . isLeftRecursive . NonTerminal) gr),
|
||||
NonTerminal a <- ruleRhs r]
|
||||
isRetained = (`Set.member` retained)
|
||||
|
||||
retainedLeftRecursive = filter (isLeftRecursive . NonTerminal) $ Set.toList retained
|
||||
|
||||
mkCat :: CFSymbol -> CFSymbol -> Cat
|
||||
mkCat x y = showSymbol x ++ "-" ++ showSymbol y
|
||||
where showSymbol = symbol id show
|
||||
|
||||
-- | Get the sets of mutually recursive non-terminals for a grammar.
|
||||
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
|
||||
-- If false, only recursive categories will be included.
|
||||
-> CFG -> [Set Cat]
|
||||
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
|
||||
where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, NonTerminal c' <- ss]
|
||||
refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation
|
||||
|
||||
--
|
||||
-- * Approximate context-free grammars with regular grammars.
|
||||
--
|
||||
|
||||
makeSimpleRegular :: CFG -> CFG
|
||||
makeSimpleRegular = makeRegular . topDownFilter . bottomUpFilter . removeCycles
|
||||
|
||||
-- Use the transformation algorithm from \"Regular Approximation of Context-free
|
||||
-- Grammars through Approximation\", Mohri and Nederhof, 2000
|
||||
-- to create an over-generating regular frammar for a context-free
|
||||
-- grammar
|
||||
makeRegular :: CFG -> CFG
|
||||
makeRegular g = g { cfgRules = groupProds $ concatMap trSet (mutRecCats True g) }
|
||||
where trSet cs | allXLinear cs rs = rs
|
||||
| otherwise = concatMap handleCat csl
|
||||
where csl = Set.toList cs
|
||||
rs = catSetRules g cs
|
||||
handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
|
||||
++ concatMap (makeRightLinearRules c) (catRules g c)
|
||||
where c' = newCat c
|
||||
makeRightLinearRules b' (CFRule c ss n) =
|
||||
case ys of
|
||||
[] -> newRule b' (xs ++ [NonTerminal (newCat c)]) n -- no non-terminals left
|
||||
(NonTerminal b:zs) -> newRule b' (xs ++ [NonTerminal b]) n
|
||||
++ makeRightLinearRules (newCat b) (CFRule c zs n)
|
||||
where (xs,ys) = break (`catElem` cs) ss
|
||||
-- don't add rules on the form A -> A
|
||||
newRule c rhs n | rhs == [NonTerminal c] = []
|
||||
| otherwise = [CFRule c rhs n]
|
||||
newCat c = c ++ "$"
|
||||
|
||||
--
|
||||
-- * CFG Utilities
|
||||
--
|
||||
|
||||
mkCFG :: Cat -> Set Cat -> [CFRule] -> CFG
|
||||
mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs }
|
||||
|
||||
groupProds :: [CFRule] -> Map Cat (Set CFRule)
|
||||
groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r))
|
||||
|
||||
-- | Gets all rules in a CFG.
|
||||
allRules :: CFG -> [CFRule]
|
||||
allRules = concat . map Set.toList . Map.elems . cfgRules
|
||||
|
||||
-- | Gets all rules in a CFG, grouped by their LHS categories.
|
||||
allRulesGrouped :: CFG -> [(Cat,[CFRule])]
|
||||
allRulesGrouped = Map.toList . Map.map Set.toList . cfgRules
|
||||
|
||||
-- | Gets all categories which have rules.
|
||||
allCats :: CFG -> [Cat]
|
||||
allCats = Map.keys . cfgRules
|
||||
|
||||
-- | Gets all rules for the given category.
|
||||
catRules :: CFG -> Cat -> [CFRule]
|
||||
catRules gr c = Set.toList $ Map.findWithDefault Set.empty c (cfgRules gr)
|
||||
|
||||
-- | Gets all rules for categories in the given set.
|
||||
catSetRules :: CFG -> Set Cat -> [CFRule]
|
||||
catSetRules gr cs = allRules $ filterCFGCats (`Set.member` cs) gr
|
||||
|
||||
onCFG :: (Map Cat (Set CFRule) -> Map Cat (Set CFRule)) -> CFG -> CFG
|
||||
onCFG f cfg = cfg { cfgRules = f (cfgRules cfg) }
|
||||
|
||||
onRules :: ([CFRule] -> [CFRule]) -> CFG -> CFG
|
||||
onRules f cfg = cfg { cfgRules = groupProds $ f $ allRules cfg }
|
||||
|
||||
-- | Clean up CFG after rules have been removed.
|
||||
cleanCFG :: CFG -> CFG
|
||||
cleanCFG = onCFG (Map.filter (not . Set.null))
|
||||
|
||||
-- | Combine two CFGs.
|
||||
unionCFG :: CFG -> CFG -> CFG
|
||||
unionCFG x y = onCFG (\rs -> Map.unionWith Set.union rs (cfgRules y)) x
|
||||
|
||||
filterCFG :: (CFRule -> Bool) -> CFG -> CFG
|
||||
filterCFG p = cleanCFG . onCFG (Map.map (Set.filter p))
|
||||
|
||||
filterCFGCats :: (Cat -> Bool) -> CFG -> CFG
|
||||
filterCFGCats p = onCFG (Map.filterWithKey (\c _ -> p c))
|
||||
|
||||
countCats :: CFG -> Int
|
||||
countCats = Map.size . cfgRules . cleanCFG
|
||||
|
||||
countRules :: CFG -> Int
|
||||
countRules = length . allRules
|
||||
|
||||
prCFG :: CFG -> String
|
||||
prCFG = unlines . map prRule . allRules
|
||||
where
|
||||
prRule r = lhsCat r ++ " --> " ++ unwords (map prSym (ruleRhs r))
|
||||
prSym = symbol id (\t -> "\""++ t ++"\"")
|
||||
|
||||
--
|
||||
-- * CFRule Utilities
|
||||
--
|
||||
|
||||
ruleFun :: CFRule -> CId
|
||||
ruleFun (CFRule _ _ t) = f t
|
||||
where f (CFObj n _) = n
|
||||
f (CFApp _ x) = f x
|
||||
f (CFAbs _ x) = f x
|
||||
f _ = mkCId ""
|
||||
|
||||
-- | Check if any of the categories used on the right-hand side
|
||||
-- are in the given list of categories.
|
||||
anyUsedBy :: [Cat] -> CFRule -> Bool
|
||||
anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
|
||||
|
||||
mkCFTerm :: String -> CFTerm
|
||||
mkCFTerm n = CFObj (mkCId n) []
|
||||
|
||||
ruleIsNonRecursive :: Set Cat -> CFRule -> Bool
|
||||
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
|
||||
|
||||
-- | Check if all the rules are right-linear, or all the rules are
|
||||
-- left-linear, with respect to given categories.
|
||||
allXLinear :: Set Cat -> [CFRule] -> Bool
|
||||
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
|
||||
|
||||
-- | Checks if a context-free rule is right-linear.
|
||||
isRightLinear :: Set Cat -- ^ The categories to consider
|
||||
-> CFRule -- ^ The rule to check for right-linearity
|
||||
-> Bool
|
||||
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
|
||||
|
||||
-- | Checks if a context-free rule is left-linear.
|
||||
isLeftLinear :: Set Cat -- ^ The categories to consider
|
||||
-> CFRule -- ^ The rule to check for left-linearity
|
||||
-> Bool
|
||||
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
|
||||
|
||||
|
||||
--
|
||||
-- * Symbol utilities
|
||||
--
|
||||
|
||||
symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a
|
||||
symbol fc ft (NonTerminal cat) = fc cat
|
||||
symbol fc ft (Terminal tok) = ft tok
|
||||
|
||||
mapSymbol :: (c -> c') -> (t -> t') -> Symbol c t -> Symbol c' t'
|
||||
mapSymbol fc ft = symbol (NonTerminal . fc) (Terminal . ft)
|
||||
|
||||
filterCats :: [Symbol c t] -> [c]
|
||||
filterCats syms = [ cat | NonTerminal cat <- syms ]
|
||||
|
||||
filterToks :: [Symbol c t] -> [t]
|
||||
filterToks syms = [ tok | Terminal tok <- syms ]
|
||||
|
||||
-- | Checks if a symbol is a non-terminal of one of the given categories.
|
||||
catElem :: Ord c => Symbol c t -> Set c -> Bool
|
||||
catElem s cs = symbol (`Set.member` cs) (const False) s
|
||||
|
||||
noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
|
||||
noCatsInSet cs = not . any (`catElem` cs)
|
||||
244
src-3.0/GF/Speech/CFGToFA.hs
Normal file
244
src-3.0/GF/Speech/CFGToFA.hs
Normal file
@@ -0,0 +1,244 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : GF.Speech.CFGToFA
|
||||
--
|
||||
-- Approximates CFGs with finite state networks.
|
||||
----------------------------------------------------------------------
|
||||
module GF.Speech.CFGToFA (cfgToFA, makeSimpleRegular,
|
||||
MFA(..), cfgToMFA, cfgToFA') where
|
||||
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import GF.Data.Utilities
|
||||
import GF.Speech.CFG
|
||||
import GF.Speech.PGFToCFG
|
||||
import GF.Infra.Ident (Ident)
|
||||
|
||||
import GF.Speech.FiniteState
|
||||
import GF.Speech.Graph
|
||||
import GF.Speech.Relation
|
||||
import GF.Speech.CFG
|
||||
|
||||
data Recursivity = RightR | LeftR | NotR
|
||||
|
||||
data MutRecSet = MutRecSet {
|
||||
mrCats :: Set Cat,
|
||||
mrNonRecRules :: [CFRule],
|
||||
mrRecRules :: [CFRule],
|
||||
mrRec :: Recursivity
|
||||
}
|
||||
|
||||
|
||||
type MutRecSets = Map Cat MutRecSet
|
||||
|
||||
--
|
||||
-- * Multiple DFA type
|
||||
--
|
||||
|
||||
data MFA = MFA Cat [(Cat,DFA CFSymbol)]
|
||||
|
||||
|
||||
|
||||
cfgToFA :: CFG -> DFA Token
|
||||
cfgToFA = minimize . compileAutomaton . makeSimpleRegular
|
||||
|
||||
|
||||
--
|
||||
-- * Compile strongly regular grammars to NFAs
|
||||
--
|
||||
|
||||
-- Convert a strongly regular grammar to a finite automaton.
|
||||
compileAutomaton :: CFG -> NFA Token
|
||||
compileAutomaton g = make_fa (g,ns) s [NonTerminal (cfgStartCat g)] f fa
|
||||
where
|
||||
(fa,s,f) = newFA_
|
||||
ns = mutRecSets g $ mutRecCats False g
|
||||
|
||||
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
||||
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000.
|
||||
make_fa :: (CFG,MutRecSets) -> State -> [CFSymbol] -> State
|
||||
-> NFA Token -> NFA Token
|
||||
make_fa c@(g,ns) q0 alpha q1 fa =
|
||||
case alpha of
|
||||
[] -> newTransition q0 q1 Nothing fa
|
||||
[Terminal t] -> newTransition q0 q1 (Just t) fa
|
||||
[NonTerminal a] ->
|
||||
case Map.lookup a ns of
|
||||
-- a is recursive
|
||||
Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
|
||||
case mrRec n of
|
||||
-- the set Ni is right-recursive or cyclic
|
||||
RightR ->
|
||||
let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
|
||||
++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
|
||||
let (xs,NonTerminal d) = (init ss,last ss)]
|
||||
in make_fas new $ newTransition q0 (getState a) Nothing fa'
|
||||
-- the set Ni is left-recursive
|
||||
LeftR ->
|
||||
let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
|
||||
++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- rs]
|
||||
in make_fas new $ newTransition (getState a) q1 Nothing fa'
|
||||
where
|
||||
(fa',stateMap) = addStatesForCats ni fa
|
||||
getState x = Map.findWithDefault
|
||||
(error $ "CFGToFiniteState: No state for " ++ x)
|
||||
x stateMap
|
||||
-- a is not recursive
|
||||
Nothing -> let rs = catRules g a
|
||||
in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs
|
||||
(x:beta) -> let (fa',q) = newState () fa
|
||||
in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa'
|
||||
where
|
||||
make_fa_ = make_fa c
|
||||
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
|
||||
|
||||
--
|
||||
-- * Compile a strongly regular grammar to a DFA with sub-automata
|
||||
--
|
||||
|
||||
cfgToMFA :: CFG -> MFA
|
||||
cfgToMFA = buildMFA . makeSimpleRegular
|
||||
|
||||
-- | Build a DFA by building and expanding an MFA
|
||||
cfgToFA' :: CFG -> DFA Token
|
||||
cfgToFA' = mfaToDFA . cfgToMFA
|
||||
|
||||
buildMFA :: CFG -> MFA
|
||||
buildMFA g = sortSubLats $ removeUnusedSubLats mfa
|
||||
where fas = compileAutomata g
|
||||
mfa = MFA (cfgStartCat g) [(c, minimize fa) | (c,fa) <- fas]
|
||||
|
||||
mfaStartDFA :: MFA -> DFA CFSymbol
|
||||
mfaStartDFA (MFA start subs) =
|
||||
fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs
|
||||
|
||||
mfaToDFA :: MFA -> DFA Token
|
||||
mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa
|
||||
where
|
||||
subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs]
|
||||
getSub l = fromJust $ Map.lookup l subs'
|
||||
expand (FA (Graph c ns es) s f)
|
||||
= foldl' expandEdge (FA (Graph c ns []) s f) es
|
||||
expandEdge fa (f,t,x) =
|
||||
case x of
|
||||
Nothing -> newTransition f t Nothing fa
|
||||
Just (Terminal s) -> newTransition f t (Just s) fa
|
||||
Just (NonTerminal l) -> insertNFA fa (f,t) (expand $ getSub l)
|
||||
|
||||
removeUnusedSubLats :: MFA -> MFA
|
||||
removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c]
|
||||
where
|
||||
usedMap = subLatUseMap mfa
|
||||
used = growUsedSet (Set.singleton start)
|
||||
isUsed c = c `Set.member` used
|
||||
growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s)
|
||||
|
||||
subLatUseMap :: MFA -> Map Cat (Set Cat)
|
||||
subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs]
|
||||
|
||||
usedSubLats :: DFA CFSymbol -> Set Cat
|
||||
usedSubLats fa = Set.fromList [s | (_,_,NonTerminal s) <- transitions fa]
|
||||
|
||||
-- | Sort sub-networks topologically.
|
||||
sortSubLats :: MFA -> MFA
|
||||
sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs)
|
||||
where
|
||||
usedByMap = revMultiMap (subLatUseMap mfa)
|
||||
sortLats _ [] = []
|
||||
sortLats ub ls = xs ++ sortLats ub' ys
|
||||
where (xs,ys) = partition ((==0) . indeg) ls
|
||||
ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub
|
||||
indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub
|
||||
|
||||
-- | Convert a strongly regular grammar to a number of finite automata,
|
||||
-- one for each non-terminal.
|
||||
-- The edges in the automata accept tokens, or name another automaton to use.
|
||||
compileAutomata :: CFG
|
||||
-> [(Cat,NFA CFSymbol)]
|
||||
-- ^ A map of non-terminals and their automata.
|
||||
compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
|
||||
where
|
||||
mrs = mutRecSets g $ mutRecCats True g
|
||||
makeOneFA c = make_fa1 mr s [NonTerminal c] f fa
|
||||
where (fa,s,f) = newFA_
|
||||
mr = fromJust (Map.lookup c mrs)
|
||||
|
||||
|
||||
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
||||
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000,
|
||||
-- adapted to build a finite automaton for a single (mutually recursive) set only.
|
||||
-- Categories not in the set will result in category-labelled edges.
|
||||
make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
|
||||
-- we are building the automaton.
|
||||
-> State -- ^ State to come from
|
||||
-> [CFSymbol] -- ^ Symbols to accept
|
||||
-> State -- ^ State to end up in
|
||||
-> NFA CFSymbol -- ^ FA to add to.
|
||||
-> NFA CFSymbol
|
||||
make_fa1 mr q0 alpha q1 fa =
|
||||
case alpha of
|
||||
[] -> newTransition q0 q1 Nothing fa
|
||||
[t@(Terminal _)] -> newTransition q0 q1 (Just t) fa
|
||||
[c@(NonTerminal a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
|
||||
[NonTerminal a] ->
|
||||
case mrRec mr of
|
||||
NotR -> -- the set is a non-recursive (always singleton) set of categories
|
||||
-- so the set of category rules is the set of rules for the whole set
|
||||
make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
|
||||
RightR -> -- the set is right-recursive or cyclic
|
||||
let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
|
||||
++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr,
|
||||
let (xs,NonTerminal d) = (init ss,last ss)]
|
||||
in make_fas new $ newTransition q0 (getState a) Nothing fa'
|
||||
LeftR -> -- the set is left-recursive
|
||||
let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
|
||||
++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- mrRecRules mr]
|
||||
in make_fas new $ newTransition (getState a) q1 Nothing fa'
|
||||
where
|
||||
(fa',stateMap) = addStatesForCats (mrCats mr) fa
|
||||
getState x = Map.findWithDefault
|
||||
(error $ "CFGToFiniteState: No state for " ++ x)
|
||||
x stateMap
|
||||
(x:beta) -> let (fa',q) = newState () fa
|
||||
in make_fas [(q0,[x],q),(q,beta,q1)] fa'
|
||||
where
|
||||
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs
|
||||
|
||||
mutRecSets :: CFG -> [Set Cat] -> MutRecSets
|
||||
mutRecSets g = Map.fromList . concatMap mkMutRecSet
|
||||
where
|
||||
mkMutRecSet cs = [ (c,ms) | c <- csl ]
|
||||
where csl = Set.toList cs
|
||||
rs = catSetRules g cs
|
||||
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
|
||||
ms = MutRecSet {
|
||||
mrCats = cs,
|
||||
mrNonRecRules = nrs,
|
||||
mrRecRules = rrs,
|
||||
mrRec = rec
|
||||
}
|
||||
rec | null rrs = NotR
|
||||
| all (isRightLinear cs) rrs = RightR
|
||||
| otherwise = LeftR
|
||||
|
||||
--
|
||||
-- * Utilities
|
||||
--
|
||||
|
||||
-- | Add a state for the given NFA for each of the categories
|
||||
-- in the given set. Returns a map of categories to their
|
||||
-- corresponding states.
|
||||
addStatesForCats :: Set Cat -> NFA t -> (NFA t, Map Cat State)
|
||||
addStatesForCats cs fa = (fa', m)
|
||||
where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
|
||||
m = Map.fromList (zip (Set.toList cs) (map fst ns))
|
||||
|
||||
revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
|
||||
revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s]
|
||||
329
src-3.0/GF/Speech/FiniteState.hs
Normal file
329
src-3.0/GF/Speech/FiniteState.hs
Normal file
@@ -0,0 +1,329 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : FiniteState
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/11/10 16:43:44 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.16 $
|
||||
--
|
||||
-- A simple finite state network module.
|
||||
-----------------------------------------------------------------------------
|
||||
module GF.Speech.FiniteState (FA(..), State, NFA, DFA,
|
||||
startState, finalStates,
|
||||
states, transitions,
|
||||
isInternal,
|
||||
newFA, newFA_,
|
||||
addFinalState,
|
||||
newState, newStates,
|
||||
newTransition, newTransitions,
|
||||
insertTransitionWith, insertTransitionsWith,
|
||||
mapStates, mapTransitions,
|
||||
modifyTransitions,
|
||||
nonLoopTransitionsTo, nonLoopTransitionsFrom,
|
||||
loops,
|
||||
removeState,
|
||||
oneFinalState,
|
||||
insertNFA,
|
||||
onGraph,
|
||||
moveLabelsToNodes, removeTrivialEmptyNodes,
|
||||
minimize,
|
||||
dfa2nfa,
|
||||
unusedNames, renameStates,
|
||||
prFAGraphviz, faToGraphviz) where
|
||||
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
|
||||
import GF.Data.Utilities
|
||||
import GF.Speech.Graph
|
||||
import qualified GF.Speech.Graphviz as Dot
|
||||
|
||||
type State = Int
|
||||
|
||||
-- | Type parameters: node id type, state label type, edge label type
|
||||
-- Data constructor arguments: nodes and edges, start state, final states
|
||||
data FA n a b = FA !(Graph n a b) !n ![n]
|
||||
|
||||
type NFA a = FA State () (Maybe a)
|
||||
|
||||
type DFA a = FA State () a
|
||||
|
||||
|
||||
startState :: FA n a b -> n
|
||||
startState (FA _ s _) = s
|
||||
|
||||
finalStates :: FA n a b -> [n]
|
||||
finalStates (FA _ _ ss) = ss
|
||||
|
||||
states :: FA n a b -> [(n,a)]
|
||||
states (FA g _ _) = nodes g
|
||||
|
||||
transitions :: FA n a b -> [(n,n,b)]
|
||||
transitions (FA g _ _) = edges g
|
||||
|
||||
newFA :: Enum n => a -- ^ Start node label
|
||||
-> FA n a b
|
||||
newFA l = FA g s []
|
||||
where (g,s) = newNode l (newGraph [toEnum 0..])
|
||||
|
||||
-- | Create a new finite automaton with an initial and a final state.
|
||||
newFA_ :: Enum n => (FA n () b, n, n)
|
||||
newFA_ = (fa'', s, f)
|
||||
where fa = newFA ()
|
||||
s = startState fa
|
||||
(fa',f) = newState () fa
|
||||
fa'' = addFinalState f fa'
|
||||
|
||||
addFinalState :: n -> FA n a b -> FA n a b
|
||||
addFinalState f (FA g s ss) = FA g s (f:ss)
|
||||
|
||||
newState :: a -> FA n a b -> (FA n a b, n)
|
||||
newState x (FA g s ss) = (FA g' s ss, n)
|
||||
where (g',n) = newNode x g
|
||||
|
||||
newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)])
|
||||
newStates xs (FA g s ss) = (FA g' s ss, ns)
|
||||
where (g',ns) = newNodes xs g
|
||||
|
||||
newTransition :: n -> n -> b -> FA n a b -> FA n a b
|
||||
newTransition f t l = onGraph (newEdge (f,t,l))
|
||||
|
||||
newTransitions :: [(n, n, b)] -> FA n a b -> FA n a b
|
||||
newTransitions es = onGraph (newEdges es)
|
||||
|
||||
insertTransitionWith :: Eq n =>
|
||||
(b -> b -> b) -> (n, n, b) -> FA n a b -> FA n a b
|
||||
insertTransitionWith f t = onGraph (insertEdgeWith f t)
|
||||
|
||||
insertTransitionsWith :: Eq n =>
|
||||
(b -> b -> b) -> [(n, n, b)] -> FA n a b -> FA n a b
|
||||
insertTransitionsWith f ts fa =
|
||||
foldl' (flip (insertTransitionWith f)) fa ts
|
||||
|
||||
mapStates :: (a -> c) -> FA n a b -> FA n c b
|
||||
mapStates f = onGraph (nmap f)
|
||||
|
||||
mapTransitions :: (b -> c) -> FA n a b -> FA n a c
|
||||
mapTransitions f = onGraph (emap f)
|
||||
|
||||
modifyTransitions :: ([(n,n,b)] -> [(n,n,b)]) -> FA n a b -> FA n a b
|
||||
modifyTransitions f = onGraph (\ (Graph r ns es) -> Graph r ns (f es))
|
||||
|
||||
removeState :: Ord n => n -> FA n a b -> FA n a b
|
||||
removeState n = onGraph (removeNode n)
|
||||
|
||||
minimize :: Ord a => NFA a -> DFA a
|
||||
minimize = determinize . reverseNFA . dfa2nfa . determinize . reverseNFA
|
||||
|
||||
unusedNames :: FA n a b -> [n]
|
||||
unusedNames (FA (Graph names _ _) _ _) = names
|
||||
|
||||
-- | Gets all incoming transitions to a given state, excluding
|
||||
-- transtions from the state itself.
|
||||
nonLoopTransitionsTo :: Eq n => n -> FA n a b -> [(n,b)]
|
||||
nonLoopTransitionsTo s fa =
|
||||
[(f,l) | (f,t,l) <- transitions fa, t == s && f /= s]
|
||||
|
||||
nonLoopTransitionsFrom :: Eq n => n -> FA n a b -> [(n,b)]
|
||||
nonLoopTransitionsFrom s fa =
|
||||
[(t,l) | (f,t,l) <- transitions fa, f == s && t /= s]
|
||||
|
||||
loops :: Eq n => n -> FA n a b -> [b]
|
||||
loops s fa = [l | (f,t,l) <- transitions fa, f == s && t == s]
|
||||
|
||||
-- | Give new names to all nodes.
|
||||
renameStates :: Ord x => [y] -- ^ Infinite supply of new names
|
||||
-> FA x a b
|
||||
-> FA y a b
|
||||
renameStates supply (FA g s fs) = FA (renameNodes newName rest g) s' fs'
|
||||
where (ns,rest) = splitAt (length (nodes g)) supply
|
||||
newNodes = Map.fromList (zip (map fst (nodes g)) ns)
|
||||
newName n = Map.findWithDefault (error "FiniteState.newName") n newNodes
|
||||
s' = newName s
|
||||
fs' = map newName fs
|
||||
|
||||
-- | Insert an NFA into another
|
||||
insertNFA :: NFA a -- ^ NFA to insert into
|
||||
-> (State, State) -- ^ States to insert between
|
||||
-> NFA a -- ^ NFA to insert.
|
||||
-> NFA a
|
||||
insertNFA (FA g1 s1 fs1) (f,t) (FA g2 s2 fs2)
|
||||
= FA (newEdges es g') s1 fs1
|
||||
where
|
||||
es = (f,ren s2,Nothing):[(ren f2,t,Nothing) | f2 <- fs2]
|
||||
(g',ren) = mergeGraphs g1 g2
|
||||
|
||||
onGraph :: (Graph n a b -> Graph n c d) -> FA n a b -> FA n c d
|
||||
onGraph f (FA g s ss) = FA (f g) s ss
|
||||
|
||||
|
||||
-- | Make the finite automaton have a single final state
|
||||
-- by adding a new final state and adding an edge
|
||||
-- from the old final states to the new state.
|
||||
oneFinalState :: a -- ^ Label to give the new node
|
||||
-> b -- ^ Label to give the new edges
|
||||
-> FA n a b -- ^ The old network
|
||||
-> FA n a b -- ^ The new network
|
||||
oneFinalState nl el fa =
|
||||
let (FA g s fs,nf) = newState nl fa
|
||||
es = [ (f,nf,el) | f <- fs ]
|
||||
in FA (newEdges es g) s [nf]
|
||||
|
||||
-- | Transform a standard finite automaton with labelled edges
|
||||
-- to one where the labels are on the nodes instead. This can add
|
||||
-- up to one extra node per edge.
|
||||
moveLabelsToNodes :: (Ord n,Eq a) => FA n () (Maybe a) -> FA n (Maybe a) ()
|
||||
moveLabelsToNodes = onGraph f
|
||||
where f g@(Graph c _ _) = Graph c' ns (concat ess)
|
||||
where is = [ ((n,l),inc) | (n, (l,inc,_)) <- Map.toList (nodeInfo g)]
|
||||
(c',is') = mapAccumL fixIncoming c is
|
||||
(ns,ess) = unzip (concat is')
|
||||
|
||||
|
||||
-- | Remove empty nodes which are not start or final, and have
|
||||
-- exactly one outgoing edge or exactly one incoming edge.
|
||||
removeTrivialEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
|
||||
removeTrivialEmptyNodes = pruneUnusable . skipSimpleEmptyNodes
|
||||
|
||||
-- | Move edges to empty nodes to point to the next node(s).
|
||||
-- This is not done if the pointed-to node is a final node.
|
||||
skipSimpleEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
|
||||
skipSimpleEmptyNodes fa = onGraph og fa
|
||||
where
|
||||
og g@(Graph c ns es) = if es' == es then g else og (Graph c ns es')
|
||||
where
|
||||
es' = concatMap changeEdge es
|
||||
info = nodeInfo g
|
||||
changeEdge e@(f,t,())
|
||||
| isNothing (getNodeLabel info t)
|
||||
-- && (i * o <= i + o)
|
||||
&& not (isFinal fa t)
|
||||
= [ (f,t',()) | (_,t',()) <- getOutgoing info t]
|
||||
| otherwise = [e]
|
||||
-- where i = inDegree info t
|
||||
-- o = outDegree info t
|
||||
|
||||
isInternal :: Eq n => FA n a b -> n -> Bool
|
||||
isInternal (FA _ start final) n = n /= start && n `notElem` final
|
||||
|
||||
isFinal :: Eq n => FA n a b -> n -> Bool
|
||||
isFinal (FA _ _ final) n = n `elem` final
|
||||
|
||||
-- | Remove all internal nodes with no incoming edges
|
||||
-- or no outgoing edges.
|
||||
pruneUnusable :: Ord n => FA n (Maybe a) () -> FA n (Maybe a) ()
|
||||
pruneUnusable fa = onGraph f fa
|
||||
where
|
||||
f g = if Set.null rns then g else f (removeNodes rns g)
|
||||
where info = nodeInfo g
|
||||
rns = Set.fromList [ n | (n,_) <- nodes g,
|
||||
isInternal fa n,
|
||||
inDegree info n == 0
|
||||
|| outDegree info n == 0]
|
||||
|
||||
fixIncoming :: (Ord n, Eq a) => [n]
|
||||
-> (Node n (),[Edge n (Maybe a)]) -- ^ A node and its incoming edges
|
||||
-> ([n],[(Node n (Maybe a),[Edge n ()])]) -- ^ Replacement nodes with their
|
||||
-- incoming edges.
|
||||
fixIncoming cs c@((n,()),es) = (cs'', ((n,Nothing),es'):newContexts)
|
||||
where ls = nub $ map edgeLabel es
|
||||
(cs',cs'') = splitAt (length ls) cs
|
||||
newNodes = zip cs' ls
|
||||
es' = [ (x,n,()) | x <- map fst newNodes ]
|
||||
-- separate cyclic and non-cyclic edges
|
||||
(cyc,ncyc) = partition (\ (f,_,_) -> f == n) es
|
||||
-- keep all incoming non-cyclic edges with the right label
|
||||
to (x,l) = [ (f,x,()) | (f,_,l') <- ncyc, l == l']
|
||||
-- for each cyclic edge with the right label,
|
||||
-- add an edge from each of the new nodes (including this one)
|
||||
++ [ (y,x,()) | (f,_,l') <- cyc, l == l', (y,_) <- newNodes]
|
||||
newContexts = [ (v, to v) | v <- newNodes ]
|
||||
|
||||
alphabet :: Eq b => Graph n a (Maybe b) -> [b]
|
||||
alphabet = nub . catMaybes . map edgeLabel . edges
|
||||
|
||||
determinize :: Ord a => NFA a -> DFA a
|
||||
determinize (FA g s f) = let (ns,es) = h (Set.singleton start) Set.empty Set.empty
|
||||
(ns',es') = (Set.toList ns, Set.toList es)
|
||||
final = filter isDFAFinal ns'
|
||||
fa = FA (Graph undefined [(n,()) | n <- ns'] es') start final
|
||||
in renameStates [0..] fa
|
||||
where info = nodeInfo g
|
||||
-- reach = nodesReachable out
|
||||
start = closure info $ Set.singleton s
|
||||
isDFAFinal n = not (Set.null (Set.fromList f `Set.intersection` n))
|
||||
h currentStates oldStates es
|
||||
| Set.null currentStates = (oldStates,es)
|
||||
| otherwise = ((h $! uniqueNewStates) $! allOldStates) $! es'
|
||||
where
|
||||
allOldStates = oldStates `Set.union` currentStates
|
||||
(newStates,es') = new (Set.toList currentStates) Set.empty es
|
||||
uniqueNewStates = newStates Set.\\ allOldStates
|
||||
-- Get the sets of states reachable from the given states
|
||||
-- by consuming one symbol, and the associated edges.
|
||||
new [] rs es = (rs,es)
|
||||
new (n:ns) rs es = new ns rs' es'
|
||||
where cs = reachable info n --reachable reach n
|
||||
rs' = rs `Set.union` Set.fromList (map snd cs)
|
||||
es' = es `Set.union` Set.fromList [(n,s,c) | (c,s) <- cs]
|
||||
|
||||
|
||||
-- | Get all the nodes reachable from a list of nodes by only empty edges.
|
||||
closure :: Ord n => NodeInfo n a (Maybe b) -> Set n -> Set n
|
||||
closure info x = closure_ x x
|
||||
where closure_ acc check | Set.null check = acc
|
||||
| otherwise = closure_ acc' check'
|
||||
where
|
||||
reach = Set.fromList [y | x <- Set.toList check,
|
||||
(_,y,Nothing) <- getOutgoing info x]
|
||||
acc' = acc `Set.union` reach
|
||||
check' = reach Set.\\ acc
|
||||
|
||||
-- | Get a map of labels to sets of all nodes reachable
|
||||
-- from a the set of nodes by one edge with the given
|
||||
-- label and then any number of empty edges.
|
||||
reachable :: (Ord n,Ord b) => NodeInfo n a (Maybe b) -> Set n -> [(b,Set n)]
|
||||
reachable info ns = Map.toList $ Map.map (closure info . Set.fromList) $ reachable1 info ns
|
||||
reachable1 info ns = Map.fromListWith (++) [(c, [y]) | n <- Set.toList ns, (_,y,Just c) <- getOutgoing info n]
|
||||
|
||||
reverseNFA :: NFA a -> NFA a
|
||||
reverseNFA (FA g s fs) = FA g''' s' [s]
|
||||
where g' = reverseGraph g
|
||||
(g'',s') = newNode () g'
|
||||
g''' = newEdges [(s',f,Nothing) | f <- fs] g''
|
||||
|
||||
dfa2nfa :: DFA a -> NFA a
|
||||
dfa2nfa = mapTransitions Just
|
||||
|
||||
--
|
||||
-- * Visualization
|
||||
--
|
||||
|
||||
prFAGraphviz :: (Eq n,Show n) => FA n String String -> String
|
||||
prFAGraphviz = Dot.prGraphviz . faToGraphviz
|
||||
|
||||
prFAGraphviz_ :: (Eq n,Show n,Show a, Show b) => FA n a b -> String
|
||||
prFAGraphviz_ = Dot.prGraphviz . faToGraphviz . mapStates show . mapTransitions show
|
||||
|
||||
faToGraphviz :: (Eq n,Show n) => FA n String String -> Dot.Graph
|
||||
faToGraphviz (FA (Graph _ ns es) s f)
|
||||
= Dot.Graph Dot.Directed Nothing [] (map mkNode ns) (map mkEdge es) []
|
||||
where mkNode (n,l) = Dot.Node (show n) attrs
|
||||
where attrs = [("label",l)]
|
||||
++ if n == s then [("shape","box")] else []
|
||||
++ if n `elem` f then [("style","bold")] else []
|
||||
mkEdge (x,y,l) = Dot.Edge (show x) (show y) [("label",l)]
|
||||
|
||||
--
|
||||
-- * Utilities
|
||||
--
|
||||
|
||||
lookups :: Ord k => [k] -> Map k a -> [a]
|
||||
lookups xs m = mapMaybe (flip Map.lookup m) xs
|
||||
178
src-3.0/GF/Speech/Graph.hs
Normal file
178
src-3.0/GF/Speech/Graph.hs
Normal file
@@ -0,0 +1,178 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Graph
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/11/10 16:43:44 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
--
|
||||
-- A simple graph module.
|
||||
-----------------------------------------------------------------------------
|
||||
module GF.Speech.Graph ( Graph(..), Node, Edge, NodeInfo
|
||||
, newGraph, nodes, edges
|
||||
, nmap, emap, newNode, newNodes, newEdge, newEdges
|
||||
, insertEdgeWith
|
||||
, removeNode, removeNodes
|
||||
, nodeInfo
|
||||
, getIncoming, getOutgoing, getNodeLabel
|
||||
, inDegree, outDegree
|
||||
, nodeLabel
|
||||
, edgeFrom, edgeTo, edgeLabel
|
||||
, reverseGraph, mergeGraphs, renameNodes
|
||||
) where
|
||||
|
||||
import GF.Data.Utilities
|
||||
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
|
||||
data Graph n a b = Graph [n] ![Node n a] ![Edge n b]
|
||||
deriving (Eq,Show)
|
||||
|
||||
type Node n a = (n,a)
|
||||
type Edge n b = (n,n,b)
|
||||
|
||||
type NodeInfo n a b = Map n (a, [Edge n b], [Edge n b])
|
||||
|
||||
-- | Create a new empty graph.
|
||||
newGraph :: [n] -> Graph n a b
|
||||
newGraph ns = Graph ns [] []
|
||||
|
||||
-- | Get all the nodes in the graph.
|
||||
nodes :: Graph n a b -> [Node n a]
|
||||
nodes (Graph _ ns _) = ns
|
||||
|
||||
-- | Get all the edges in the graph.
|
||||
edges :: Graph n a b -> [Edge n b]
|
||||
edges (Graph _ _ es) = es
|
||||
|
||||
-- | Map a function over the node labels.
|
||||
nmap :: (a -> c) -> Graph n a b -> Graph n c b
|
||||
nmap f (Graph c ns es) = Graph c [(n,f l) | (n,l) <- ns] es
|
||||
|
||||
-- | Map a function over the edge labels.
|
||||
emap :: (b -> c) -> Graph n a b -> Graph n a c
|
||||
emap f (Graph c ns es) = Graph c ns [(x,y,f l) | (x,y,l) <- es]
|
||||
|
||||
-- | Add a node to the graph.
|
||||
newNode :: a -- ^ Node label
|
||||
-> Graph n a b
|
||||
-> (Graph n a b,n) -- ^ Node graph and name of new node
|
||||
newNode l (Graph (c:cs) ns es) = (Graph cs ((c,l):ns) es, c)
|
||||
|
||||
newNodes :: [a] -> Graph n a b -> (Graph n a b,[Node n a])
|
||||
newNodes ls g = (g', zip ns ls)
|
||||
where (g',ns) = mapAccumL (flip newNode) g ls
|
||||
-- lazy version:
|
||||
--newNodes ls (Graph cs ns es) = (Graph cs' (ns'++ns) es, ns')
|
||||
-- where (xs,cs') = splitAt (length ls) cs
|
||||
-- ns' = zip xs ls
|
||||
|
||||
newEdge :: Edge n b -> Graph n a b -> Graph n a b
|
||||
newEdge e (Graph c ns es) = Graph c ns (e:es)
|
||||
|
||||
newEdges :: [Edge n b] -> Graph n a b -> Graph n a b
|
||||
newEdges es g = foldl' (flip newEdge) g es
|
||||
-- lazy version:
|
||||
-- newEdges es' (Graph c ns es) = Graph c ns (es'++es)
|
||||
|
||||
insertEdgeWith :: Eq n =>
|
||||
(b -> b -> b) -> Edge n b -> Graph n a b -> Graph n a b
|
||||
insertEdgeWith f e@(x,y,l) (Graph c ns es) = Graph c ns (h es)
|
||||
where h [] = [e]
|
||||
h (e'@(x',y',l'):es') | x' == x && y' == y = (x',y', f l l'):es'
|
||||
| otherwise = e':h es'
|
||||
|
||||
-- | Remove a node and all edges to and from that node.
|
||||
removeNode :: Ord n => n -> Graph n a b -> Graph n a b
|
||||
removeNode n = removeNodes (Set.singleton n)
|
||||
|
||||
-- | Remove a set of nodes and all edges to and from those nodes.
|
||||
removeNodes :: Ord n => Set n -> Graph n a b -> Graph n a b
|
||||
removeNodes xs (Graph c ns es) = Graph c ns' es'
|
||||
where
|
||||
keepNode n = not (Set.member n xs)
|
||||
ns' = [ x | x@(n,_) <- ns, keepNode n ]
|
||||
es' = [ e | e@(f,t,_) <- es, keepNode f && keepNode t ]
|
||||
|
||||
-- | Get a map of node names to info about each node.
|
||||
nodeInfo :: Ord n => Graph n a b -> NodeInfo n a b
|
||||
nodeInfo g = Map.fromList [ (n, (x, fn inc n, fn out n)) | (n,x) <- nodes g ]
|
||||
where
|
||||
inc = groupEdgesBy edgeTo g
|
||||
out = groupEdgesBy edgeFrom g
|
||||
fn m n = fromMaybe [] (Map.lookup n m)
|
||||
|
||||
groupEdgesBy :: (Ord n) => (Edge n b -> n) -- ^ Gets the node to group by
|
||||
-> Graph n a b -> Map n [Edge n b]
|
||||
groupEdgesBy f g = Map.fromListWith (++) [(f e, [e]) | e <- edges g]
|
||||
|
||||
lookupNode :: Ord n => NodeInfo n a b -> n -> (a, [Edge n b], [Edge n b])
|
||||
lookupNode i n = fromJust $ Map.lookup n i
|
||||
|
||||
getIncoming :: Ord n => NodeInfo n a b -> n -> [Edge n b]
|
||||
getIncoming i n = let (_,inc,_) = lookupNode i n in inc
|
||||
|
||||
getOutgoing :: Ord n => NodeInfo n a b -> n -> [Edge n b]
|
||||
getOutgoing i n = let (_,_,out) = lookupNode i n in out
|
||||
|
||||
inDegree :: Ord n => NodeInfo n a b -> n -> Int
|
||||
inDegree i n = length $ getIncoming i n
|
||||
|
||||
outDegree :: Ord n => NodeInfo n a b -> n -> Int
|
||||
outDegree i n = length $ getOutgoing i n
|
||||
|
||||
getNodeLabel :: Ord n => NodeInfo n a b -> n -> a
|
||||
getNodeLabel i n = let (l,_,_) = lookupNode i n in l
|
||||
|
||||
nodeLabel :: Node n a -> a
|
||||
nodeLabel = snd
|
||||
|
||||
edgeFrom :: Edge n b -> n
|
||||
edgeFrom (f,_,_) = f
|
||||
|
||||
edgeTo :: Edge n b -> n
|
||||
edgeTo (_,t,_) = t
|
||||
|
||||
edgeLabel :: Edge n b -> b
|
||||
edgeLabel (_,_,l) = l
|
||||
|
||||
reverseGraph :: Graph n a b -> Graph n a b
|
||||
reverseGraph (Graph c ns es) = Graph c ns [ (t,f,l) | (f,t,l) <- es ]
|
||||
|
||||
-- | Add the nodes from the second graph to the first graph.
|
||||
-- The nodes in the second graph will be renamed using the name
|
||||
-- supply in the first graph.
|
||||
-- This function is more efficient when the second graph
|
||||
-- is smaller than the first.
|
||||
mergeGraphs :: Ord m => Graph n a b -> Graph m a b
|
||||
-> (Graph n a b, m -> n) -- ^ The new graph and a function translating
|
||||
-- the old names of nodes in the second graph
|
||||
-- to names in the new graph.
|
||||
mergeGraphs (Graph c ns1 es1) g2 = (Graph c' (ns2++ns1) (es2++es1), newName)
|
||||
where
|
||||
(xs,c') = splitAt (length (nodes g2)) c
|
||||
newNames = Map.fromList (zip (map fst (nodes g2)) xs)
|
||||
newName n = fromJust $ Map.lookup n newNames
|
||||
Graph _ ns2 es2 = renameNodes newName undefined g2
|
||||
|
||||
-- | Rename the nodes in the graph.
|
||||
renameNodes :: (n -> m) -- ^ renaming function
|
||||
-> [m] -- ^ infinite supply of fresh node names, to
|
||||
-- use when adding nodes in the future.
|
||||
-> Graph n a b -> Graph m a b
|
||||
renameNodes newName c (Graph _ ns es) = Graph c ns' es'
|
||||
where ns' = map' (\ (n,x) -> (newName n,x)) ns
|
||||
es' = map' (\ (f,t,l) -> (newName f, newName t, l)) es
|
||||
|
||||
-- | A strict 'map'
|
||||
map' :: (a -> b) -> [a] -> [b]
|
||||
map' _ [] = []
|
||||
map' f (x:xs) = ((:) $! f x) $! map' f xs
|
||||
116
src-3.0/GF/Speech/Graphviz.hs
Normal file
116
src-3.0/GF/Speech/Graphviz.hs
Normal file
@@ -0,0 +1,116 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Graphviz
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/09/15 18:10:44 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
--
|
||||
-- Graphviz DOT format representation and printing.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.Graphviz (
|
||||
Graph(..), GraphType(..),
|
||||
Node(..), Edge(..),
|
||||
Attr,
|
||||
addSubGraphs,
|
||||
setName,
|
||||
setAttr,
|
||||
prGraphviz
|
||||
) where
|
||||
|
||||
import Data.Char
|
||||
|
||||
import GF.Data.Utilities
|
||||
|
||||
-- | Graph type, graph ID, graph attirbutes, graph nodes, graph edges, subgraphs
|
||||
data Graph = Graph {
|
||||
gType :: GraphType,
|
||||
gId :: Maybe String,
|
||||
gAttrs :: [Attr],
|
||||
gNodes :: [Node],
|
||||
gEdges :: [Edge],
|
||||
gSubgraphs :: [Graph]
|
||||
}
|
||||
deriving (Show)
|
||||
|
||||
data GraphType = Directed | Undirected
|
||||
deriving (Show)
|
||||
|
||||
data Node = Node String [Attr]
|
||||
deriving Show
|
||||
|
||||
data Edge = Edge String String [Attr]
|
||||
deriving Show
|
||||
|
||||
type Attr = (String,String)
|
||||
|
||||
--
|
||||
-- * Graph construction
|
||||
--
|
||||
|
||||
addSubGraphs :: [Graph] -> Graph -> Graph
|
||||
addSubGraphs gs g = g { gSubgraphs = gs ++ gSubgraphs g }
|
||||
|
||||
setName :: String -> Graph -> Graph
|
||||
setName n g = g { gId = Just n }
|
||||
|
||||
setAttr :: String -> String -> Graph -> Graph
|
||||
setAttr n v g = g { gAttrs = tableSet n v (gAttrs g) }
|
||||
|
||||
--
|
||||
-- * Pretty-printing
|
||||
--
|
||||
|
||||
prGraphviz :: Graph -> String
|
||||
prGraphviz g@(Graph t i _ _ _ _) =
|
||||
graphtype t ++ " " ++ maybe "" esc i ++ " {\n" ++ prGraph g ++ "}\n"
|
||||
|
||||
prSubGraph :: Graph -> String
|
||||
prSubGraph g@(Graph _ i _ _ _ _) =
|
||||
"subgraph" ++ " " ++ maybe "" esc i ++ " {\n" ++ prGraph g ++ "}"
|
||||
|
||||
prGraph :: Graph -> String
|
||||
prGraph (Graph t id at ns es ss) =
|
||||
unlines $ map (++";") (map prAttr at
|
||||
++ map prNode ns
|
||||
++ map (prEdge t) es
|
||||
++ map prSubGraph ss)
|
||||
|
||||
graphtype :: GraphType -> String
|
||||
graphtype Directed = "digraph"
|
||||
graphtype Undirected = "graph"
|
||||
|
||||
prNode :: Node -> String
|
||||
prNode (Node n at) = esc n ++ " " ++ prAttrList at
|
||||
|
||||
prEdge :: GraphType -> Edge -> String
|
||||
prEdge t (Edge x y at) = esc x ++ " " ++ edgeop t ++ " " ++ esc y ++ " " ++ prAttrList at
|
||||
|
||||
edgeop :: GraphType -> String
|
||||
edgeop Directed = "->"
|
||||
edgeop Undirected = "--"
|
||||
|
||||
prAttrList :: [Attr] -> String
|
||||
prAttrList [] = ""
|
||||
prAttrList at = "[" ++ join "," (map prAttr at) ++ "]"
|
||||
|
||||
prAttr :: Attr -> String
|
||||
prAttr (n,v) = esc n ++ " = " ++ esc v
|
||||
|
||||
esc :: String -> String
|
||||
esc s | needEsc s = "\"" ++ concat [ if shouldEsc c then ['\\',c] else [c] | c <- s ] ++ "\""
|
||||
| otherwise = s
|
||||
where shouldEsc = (`elem` ['"', '\\'])
|
||||
|
||||
needEsc :: String -> Bool
|
||||
needEsc [] = True
|
||||
needEsc xs | all isDigit xs = False
|
||||
needEsc (x:xs) = not (isIDFirst x && all isIDChar xs)
|
||||
|
||||
isIDFirst, isIDChar :: Char -> Bool
|
||||
isIDFirst c = c `elem` (['_']++['a'..'z']++['A'..'Z'])
|
||||
isIDChar c = isIDFirst c || isDigit c
|
||||
75
src-3.0/GF/Speech/PGFToCFG.hs
Normal file
75
src-3.0/GF/Speech/PGFToCFG.hs
Normal file
@@ -0,0 +1,75 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : GF.Speech.PGFToCFG
|
||||
--
|
||||
-- Approximates PGF grammars with context-free grammars.
|
||||
----------------------------------------------------------------------
|
||||
module GF.Speech.PGFToCFG (pgfToCFG) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data as PGF
|
||||
import PGF.Macros
|
||||
import GF.Infra.Ident
|
||||
import GF.Speech.CFG
|
||||
|
||||
import Data.Array as Array
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Maybe
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
|
||||
|
||||
pgfToCFG :: PGF
|
||||
-> CId -- ^ Concrete syntax name
|
||||
-> CFG
|
||||
pgfToCFG pgf lang = mkCFG (lookStartCat pgf) extCats (startRules ++ concatMap fruleToCFRule rules)
|
||||
where
|
||||
pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang)
|
||||
|
||||
rules :: [FRule]
|
||||
rules = Array.elems (PGF.allRules pinfo)
|
||||
|
||||
fcatGFCats :: Map FCat CId
|
||||
fcatGFCats = Map.fromList [(fc,c) | (c,fcs) <- Map.toList (startupCats pinfo), fc <- fcs]
|
||||
|
||||
fcatGFCat :: FCat -> CId
|
||||
fcatGFCat c = fromMaybe (mkCId "Unknown") (Map.lookup c fcatGFCats)
|
||||
|
||||
fcatToCat :: FCat -> FIndex -> Cat
|
||||
fcatToCat c l = prCId (fcatGFCat c) ++ "_" ++ show c ++ "_" ++ show l
|
||||
|
||||
extCats :: Set Cat
|
||||
extCats = Set.fromList $ map lhsCat startRules
|
||||
|
||||
-- NOTE: this is only correct for cats that have a lincat with exactly one row.
|
||||
startRules :: [CFRule]
|
||||
startRules = [CFRule (prCId c) [NonTerminal (fcatToCat fc 0)] (CFRes 0)
|
||||
| (c,fcs) <- Map.toList (startupCats pinfo), fc <- fcs]
|
||||
|
||||
fruleToCFRule :: FRule -> [CFRule]
|
||||
fruleToCFRule (FRule f ps args c rhs) =
|
||||
[CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps))
|
||||
| (l,row) <- Array.assocs rhs]
|
||||
where
|
||||
mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
|
||||
mkRhs = map fsymbolToSymbol . Array.elems
|
||||
|
||||
fsymbolToSymbol :: FSymbol -> CFSymbol
|
||||
fsymbolToSymbol (FSymCat l n) = NonTerminal (fcatToCat (args!!n) l)
|
||||
fsymbolToSymbol (FSymTok t) = Terminal t
|
||||
|
||||
fixProfile :: Array FPointPos FSymbol -> Profile -> Profile
|
||||
fixProfile row = concatMap positions
|
||||
where
|
||||
nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row ]
|
||||
positions i = [k | (k,FSymCat _ j) <- nts, j == i]
|
||||
|
||||
profilesToTerm :: [Profile] -> CFTerm
|
||||
profilesToTerm [[n]] | f == wildCId = CFRes n
|
||||
profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps)
|
||||
where (argTypes,_) = catSkeleton $ lookType pgf f
|
||||
|
||||
profileToTerm :: CId -> Profile -> CFTerm
|
||||
profileToTerm t [] = CFMeta t
|
||||
profileToTerm _ xs = CFRes (last xs) -- FIXME: unify
|
||||
143
src-3.0/GF/Speech/RegExp.hs
Normal file
143
src-3.0/GF/Speech/RegExp.hs
Normal file
@@ -0,0 +1,143 @@
|
||||
module GF.Speech.RegExp (RE(..),
|
||||
epsilonRE, nullRE,
|
||||
isEpsilon, isNull,
|
||||
unionRE, concatRE, seqRE,
|
||||
repeatRE, minimizeRE,
|
||||
mapRE, mapRE', joinRE,
|
||||
symbolsRE,
|
||||
dfa2re, prRE) where
|
||||
|
||||
import Data.List
|
||||
|
||||
import GF.Data.Utilities
|
||||
import GF.Speech.FiniteState
|
||||
|
||||
data RE a =
|
||||
REUnion [RE a] -- ^ REUnion [] is null
|
||||
| REConcat [RE a] -- ^ REConcat [] is epsilon
|
||||
| RERepeat (RE a)
|
||||
| RESymbol a
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
|
||||
dfa2re :: (Ord a) => DFA a -> RE a
|
||||
dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
|
||||
. oneFinalState () epsilonRE . mapTransitions RESymbol
|
||||
where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
|
||||
merge es = [(f,t,unionRE ls)
|
||||
| ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
|
||||
|
||||
elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a)
|
||||
elimStates fa =
|
||||
case [s | (s,_) <- states fa, isInternal fa s] of
|
||||
[] -> fa
|
||||
sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa
|
||||
where sAs = nonLoopTransitionsTo sE fa
|
||||
sBs = nonLoopTransitionsFrom sE fa
|
||||
r2 = unionRE $ loops sE fa
|
||||
ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
|
||||
r r1 r3 = concatRE [r1, repeatRE r2, r3]
|
||||
|
||||
epsilonRE :: RE a
|
||||
epsilonRE = REConcat []
|
||||
|
||||
nullRE :: RE a
|
||||
nullRE = REUnion []
|
||||
|
||||
isNull :: RE a -> Bool
|
||||
isNull (REUnion []) = True
|
||||
isNull _ = False
|
||||
|
||||
isEpsilon :: RE a -> Bool
|
||||
isEpsilon (REConcat []) = True
|
||||
isEpsilon _ = False
|
||||
|
||||
unionRE :: Ord a => [RE a] -> RE a
|
||||
unionRE = unionOrId . sortNub . concatMap toList
|
||||
where
|
||||
toList (REUnion xs) = xs
|
||||
toList x = [x]
|
||||
unionOrId [r] = r
|
||||
unionOrId rs = REUnion rs
|
||||
|
||||
concatRE :: [RE a] -> RE a
|
||||
concatRE xs | any isNull xs = nullRE
|
||||
| otherwise = case concatMap toList xs of
|
||||
[r] -> r
|
||||
rs -> REConcat rs
|
||||
where
|
||||
toList (REConcat xs) = xs
|
||||
toList x = [x]
|
||||
|
||||
seqRE :: [a] -> RE a
|
||||
seqRE = concatRE . map RESymbol
|
||||
|
||||
repeatRE :: RE a -> RE a
|
||||
repeatRE x | isNull x || isEpsilon x = epsilonRE
|
||||
| otherwise = RERepeat x
|
||||
|
||||
finalRE :: Ord a => DFA (RE a) -> RE a
|
||||
finalRE fa = concatRE [repeatRE r1, r2,
|
||||
repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
|
||||
where
|
||||
s0 = startState fa
|
||||
[sF] = finalStates fa
|
||||
r1 = unionRE $ loops s0 fa
|
||||
r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa
|
||||
r3 = unionRE $ loops sF fa
|
||||
r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
|
||||
|
||||
reverseRE :: RE a -> RE a
|
||||
reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs
|
||||
reverseRE (REUnion xs) = REUnion (map reverseRE xs)
|
||||
reverseRE (RERepeat x) = RERepeat (reverseRE x)
|
||||
reverseRE x = x
|
||||
|
||||
minimizeRE :: Ord a => RE a -> RE a
|
||||
minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward
|
||||
|
||||
mergeForward :: Ord a => RE a -> RE a
|
||||
mergeForward (REUnion xs) =
|
||||
unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)]
|
||||
mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)]
|
||||
mergeForward (RERepeat r) = repeatRE (mergeForward r)
|
||||
mergeForward r = r
|
||||
|
||||
firstRE :: RE a -> (RE a, RE a)
|
||||
firstRE (REConcat (x:xs)) = (x, REConcat xs)
|
||||
firstRE r = (r,epsilonRE)
|
||||
|
||||
mapRE :: (a -> b) -> RE a -> RE b
|
||||
mapRE f = mapRE' (RESymbol . f)
|
||||
|
||||
mapRE' :: (a -> RE b) -> RE a -> RE b
|
||||
mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs)
|
||||
mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs)
|
||||
mapRE' f (RERepeat x) = RERepeat (mapRE' f x)
|
||||
mapRE' f (RESymbol s) = f s
|
||||
|
||||
joinRE :: RE (RE a) -> RE a
|
||||
joinRE (REConcat xs) = REConcat (map joinRE xs)
|
||||
joinRE (REUnion xs) = REUnion (map joinRE xs)
|
||||
joinRE (RERepeat xs) = RERepeat (joinRE xs)
|
||||
joinRE (RESymbol ss) = ss
|
||||
|
||||
symbolsRE :: RE a -> [a]
|
||||
symbolsRE (REConcat xs) = concatMap symbolsRE xs
|
||||
symbolsRE (REUnion xs) = concatMap symbolsRE xs
|
||||
symbolsRE (RERepeat x) = symbolsRE x
|
||||
symbolsRE (RESymbol x) = [x]
|
||||
|
||||
-- Debugging
|
||||
|
||||
prRE :: RE String -> String
|
||||
prRE = prRE' 0
|
||||
|
||||
prRE' _ (REUnion []) = "<NULL>"
|
||||
prRE' n (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1) xs)))
|
||||
prRE' n (REConcat xs) = p n 2 (unwords (map (prRE' 2) xs))
|
||||
prRE' n (RERepeat x) = p n 3 (prRE' 3 x) ++ "*"
|
||||
prRE' _ (RESymbol s) = s
|
||||
|
||||
p n m s | n >= m = "(" ++ s ++ ")"
|
||||
| True = s
|
||||
130
src-3.0/GF/Speech/Relation.hs
Normal file
130
src-3.0/GF/Speech/Relation.hs
Normal file
@@ -0,0 +1,130 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Relation
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/26 17:13:13 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
--
|
||||
-- A simple module for relations.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.Relation (Rel, mkRel, mkRel'
|
||||
, allRelated , isRelatedTo
|
||||
, transitiveClosure
|
||||
, reflexiveClosure, reflexiveClosure_
|
||||
, symmetricClosure
|
||||
, symmetricSubrelation, reflexiveSubrelation
|
||||
, reflexiveElements
|
||||
, equivalenceClasses
|
||||
, isTransitive, isReflexive, isSymmetric
|
||||
, isEquivalence
|
||||
, isSubRelationOf) where
|
||||
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
|
||||
import GF.Data.Utilities
|
||||
|
||||
type Rel a = Map a (Set a)
|
||||
|
||||
-- | Creates a relation from a list of related pairs.
|
||||
mkRel :: Ord a => [(a,a)] -> Rel a
|
||||
mkRel ps = relates ps Map.empty
|
||||
|
||||
-- | Creates a relation from a list pairs of elements and the elements
|
||||
-- related to them.
|
||||
mkRel' :: Ord a => [(a,[a])] -> Rel a
|
||||
mkRel' xs = Map.fromListWith Set.union [(x,Set.fromList ys) | (x,ys) <- xs]
|
||||
|
||||
relToList :: Rel a -> [(a,a)]
|
||||
relToList r = [ (x,y) | (x,ys) <- Map.toList r, y <- Set.toList ys ]
|
||||
|
||||
-- | Add a pair to the relation.
|
||||
relate :: Ord a => a -> a -> Rel a -> Rel a
|
||||
relate x y r = Map.insertWith Set.union x (Set.singleton y) r
|
||||
|
||||
-- | Add a list of pairs to the relation.
|
||||
relates :: Ord a => [(a,a)] -> Rel a -> Rel a
|
||||
relates ps r = foldl (\r' (x,y) -> relate x y r') r ps
|
||||
|
||||
-- | Checks if an element is related to another.
|
||||
isRelatedTo :: Ord a => Rel a -> a -> a -> Bool
|
||||
isRelatedTo r x y = maybe False (y `Set.member`) (Map.lookup x r)
|
||||
|
||||
-- | Get the set of elements to which a given element is related.
|
||||
allRelated :: Ord a => Rel a -> a -> Set a
|
||||
allRelated r x = fromMaybe Set.empty (Map.lookup x r)
|
||||
|
||||
-- | Get all elements in the relation.
|
||||
domain :: Ord a => Rel a -> Set a
|
||||
domain r = foldl Set.union (Map.keysSet r) (Map.elems r)
|
||||
|
||||
-- | Keep only pairs for which both elements are in the given set.
|
||||
intersectSetRel :: Ord a => Set a -> Rel a -> Rel a
|
||||
intersectSetRel s = filterRel (\x y -> x `Set.member` s && y `Set.member` s)
|
||||
|
||||
transitiveClosure :: Ord a => Rel a -> Rel a
|
||||
transitiveClosure r = fix (Map.map growSet) r
|
||||
where growSet ys = foldl Set.union ys (map (allRelated r) $ Set.toList ys)
|
||||
|
||||
reflexiveClosure_ :: Ord a => [a] -- ^ The set over which the relation is defined.
|
||||
-> Rel a -> Rel a
|
||||
reflexiveClosure_ u r = relates [(x,x) | x <- u] r
|
||||
|
||||
-- | Uses 'domain'
|
||||
reflexiveClosure :: Ord a => Rel a -> Rel a
|
||||
reflexiveClosure r = reflexiveClosure_ (Set.toList $ domain r) r
|
||||
|
||||
symmetricClosure :: Ord a => Rel a -> Rel a
|
||||
symmetricClosure r = relates [ (y,x) | (x,y) <- relToList r ] r
|
||||
|
||||
symmetricSubrelation :: Ord a => Rel a -> Rel a
|
||||
symmetricSubrelation r = filterRel (flip $ isRelatedTo r) r
|
||||
|
||||
reflexiveSubrelation :: Ord a => Rel a -> Rel a
|
||||
reflexiveSubrelation r = intersectSetRel (reflexiveElements r) r
|
||||
|
||||
-- | Get the set of elements which are related to themselves.
|
||||
reflexiveElements :: Ord a => Rel a -> Set a
|
||||
reflexiveElements r = Set.fromList [ x | (x,ys) <- Map.toList r, x `Set.member` ys ]
|
||||
|
||||
-- | Keep the related pairs for which the predicate is true.
|
||||
filterRel :: Ord a => (a -> a -> Bool) -> Rel a -> Rel a
|
||||
filterRel p = purgeEmpty . Map.mapWithKey (Set.filter . p)
|
||||
|
||||
-- | Remove keys that map to no elements.
|
||||
purgeEmpty :: Ord a => Rel a -> Rel a
|
||||
purgeEmpty r = Map.filter (not . Set.null) r
|
||||
|
||||
|
||||
-- | Get the equivalence classes from an equivalence relation.
|
||||
equivalenceClasses :: Ord a => Rel a -> [Set a]
|
||||
equivalenceClasses r = equivalenceClasses_ (Map.keys r) r
|
||||
where equivalenceClasses_ [] _ = []
|
||||
equivalenceClasses_ (x:xs) r = ys:equivalenceClasses_ zs r
|
||||
where ys = allRelated r x
|
||||
zs = [x' | x' <- xs, not (x' `Set.member` ys)]
|
||||
|
||||
isTransitive :: Ord a => Rel a -> Bool
|
||||
isTransitive r = and [z `Set.member` ys | (x,ys) <- Map.toList r,
|
||||
y <- Set.toList ys, z <- Set.toList (allRelated r y)]
|
||||
|
||||
isReflexive :: Ord a => Rel a -> Bool
|
||||
isReflexive r = all (\ (x,ys) -> x `Set.member` ys) (Map.toList r)
|
||||
|
||||
isSymmetric :: Ord a => Rel a -> Bool
|
||||
isSymmetric r = and [isRelatedTo r y x | (x,y) <- relToList r]
|
||||
|
||||
isEquivalence :: Ord a => Rel a -> Bool
|
||||
isEquivalence r = isReflexive r && isSymmetric r && isTransitive r
|
||||
|
||||
isSubRelationOf :: Ord a => Rel a -> Rel a -> Bool
|
||||
isSubRelationOf r1 r2 = all (uncurry (isRelatedTo r2)) (relToList r1)
|
||||
80
src-3.0/GF/Speech/SISR.hs
Normal file
80
src-3.0/GF/Speech/SISR.hs
Normal file
@@ -0,0 +1,80 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : GF.Speech.SISR
|
||||
--
|
||||
-- Abstract syntax and pretty printer for SISR,
|
||||
-- (Semantic Interpretation for Speech Recognition)
|
||||
----------------------------------------------------------------------
|
||||
module GF.Speech.SISR (SISRFormat(..), SISRTag, prSISR,
|
||||
topCatSISR, profileInitSISR, catSISR, profileFinalSISR) where
|
||||
|
||||
import Data.List
|
||||
|
||||
import GF.Data.Utilities
|
||||
import GF.Infra.Ident
|
||||
import GF.Speech.CFG
|
||||
import GF.Speech.SRG (SRGNT)
|
||||
import PGF.CId
|
||||
|
||||
import qualified GF.JavaScript.AbsJS as JS
|
||||
import qualified GF.JavaScript.PrintJS as JS
|
||||
|
||||
data SISRFormat =
|
||||
-- SISR Working draft 1 April 2003
|
||||
-- http://www.w3.org/TR/2003/WD-semantic-interpretation-20030401/
|
||||
SISROld
|
||||
deriving Show
|
||||
|
||||
type SISRTag = [JS.DeclOrExpr]
|
||||
|
||||
|
||||
prSISR :: SISRTag -> String
|
||||
prSISR = JS.printTree
|
||||
|
||||
topCatSISR :: String -> SISRFormat -> SISRTag
|
||||
topCatSISR c fmt = map JS.DExpr [fmtOut fmt `ass` fmtRef fmt c]
|
||||
|
||||
profileInitSISR :: CFTerm -> SISRFormat -> SISRTag
|
||||
profileInitSISR t fmt
|
||||
| null (usedArgs t) = []
|
||||
| otherwise = [JS.Decl [JS.DInit args (JS.EArray [])]]
|
||||
|
||||
usedArgs :: CFTerm -> [Int]
|
||||
usedArgs (CFObj _ ts) = foldr union [] (map usedArgs ts)
|
||||
usedArgs (CFAbs _ x) = usedArgs x
|
||||
usedArgs (CFApp x y) = usedArgs x `union` usedArgs y
|
||||
usedArgs (CFRes i) = [i]
|
||||
usedArgs _ = []
|
||||
|
||||
catSISR :: CFTerm -> SRGNT -> SISRFormat -> SISRTag
|
||||
catSISR t (c,i) fmt
|
||||
| i `elem` usedArgs t = map JS.DExpr
|
||||
[JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i)) `ass` fmtRef fmt c]
|
||||
| otherwise = []
|
||||
|
||||
profileFinalSISR :: CFTerm -> SISRFormat -> SISRTag
|
||||
profileFinalSISR term fmt = [JS.DExpr $ fmtOut fmt `ass` f term]
|
||||
where
|
||||
f (CFObj n ts) = tree (prCId n) (map f ts)
|
||||
f (CFAbs v x) = JS.EFun [var v] [JS.SReturn (f x)]
|
||||
f (CFApp x y) = JS.ECall (f x) [f y]
|
||||
f (CFRes i) = JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i))
|
||||
f (CFVar v) = JS.EVar (var v)
|
||||
f (CFMeta typ) = obj [("name",JS.EStr "?"), ("type",JS.EStr (prCId typ))]
|
||||
|
||||
fmtOut SISROld = JS.EVar (JS.Ident "$")
|
||||
|
||||
fmtRef SISROld c = JS.EVar (JS.Ident ("$" ++ c))
|
||||
|
||||
args = JS.Ident "a"
|
||||
|
||||
var v = JS.Ident ("x" ++ show v)
|
||||
|
||||
field x y = JS.EMember x (JS.Ident y)
|
||||
|
||||
ass = JS.EAssign
|
||||
|
||||
tree n xs = obj [("name", JS.EStr n), ("args", JS.EArray xs)]
|
||||
|
||||
obj ps = JS.EObj [JS.Prop (JS.StringPropName x) y | (x,y) <- ps]
|
||||
|
||||
175
src-3.0/GF/Speech/SRG.hs
Normal file
175
src-3.0/GF/Speech/SRG.hs
Normal file
@@ -0,0 +1,175 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : SRG
|
||||
--
|
||||
-- Representation of, conversion to, and utilities for
|
||||
-- printing of a general Speech Recognition Grammar.
|
||||
--
|
||||
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
|
||||
-- categories in the grammar
|
||||
----------------------------------------------------------------------
|
||||
module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem
|
||||
, SRGNT, CFTerm
|
||||
, makeSRG
|
||||
, makeSimpleSRG
|
||||
, makeNonRecursiveSRG
|
||||
, lookupFM_, prtS
|
||||
) where
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.Data.Utilities
|
||||
import GF.Infra.Ident
|
||||
import GF.Infra.PrintClass
|
||||
import GF.Speech.CFG
|
||||
import GF.Speech.PGFToCFG
|
||||
import GF.Speech.Relation
|
||||
import GF.Speech.FiniteState
|
||||
import GF.Speech.RegExp
|
||||
import GF.Speech.CFGToFA
|
||||
import GF.Infra.Option
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Macros
|
||||
|
||||
import Data.List
|
||||
import Data.Maybe (fromMaybe, maybeToList)
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
|
||||
import Debug.Trace
|
||||
|
||||
data SRG = SRG { srgName :: String -- ^ grammar name
|
||||
, srgStartCat :: Cat -- ^ start category name
|
||||
, srgExternalCats :: Set Cat
|
||||
, srgLanguage :: Maybe String -- ^ The language for which the grammar
|
||||
-- is intended, e.g. en-UK
|
||||
, srgRules :: [SRGRule]
|
||||
}
|
||||
deriving (Eq,Show)
|
||||
|
||||
data SRGRule = SRGRule Cat [SRGAlt] -- ^ SRG category name, original category name
|
||||
-- and productions
|
||||
deriving (Eq,Show)
|
||||
|
||||
-- | maybe a probability, a rule name and an EBNF right-hand side
|
||||
data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem
|
||||
deriving (Eq,Show)
|
||||
|
||||
type SRGItem = RE SRGSymbol
|
||||
|
||||
type SRGSymbol = Symbol SRGNT Token
|
||||
|
||||
-- | An SRG non-terminal. Category name and its number in the profile.
|
||||
type SRGNT = (Cat, Int)
|
||||
|
||||
|
||||
-- | Create a non-left-recursive SRG.
|
||||
-- FIXME: the probabilities in the returned
|
||||
-- grammar may be meaningless.
|
||||
makeSimpleSRG :: PGF
|
||||
-> CId -- ^ Concrete syntax name.
|
||||
-> SRG
|
||||
makeSimpleSRG = makeSRG preprocess
|
||||
where
|
||||
preprocess = traceStats "After mergeIdentical"
|
||||
. mergeIdentical
|
||||
. traceStats "After removeLeftRecursion"
|
||||
. removeLeftRecursion
|
||||
. traceStats "After topDownFilter"
|
||||
. topDownFilter
|
||||
. traceStats "After bottomUpFilter"
|
||||
. bottomUpFilter
|
||||
. traceStats "After removeCycles"
|
||||
. removeCycles
|
||||
. traceStats "Inital CFG"
|
||||
|
||||
traceStats s g = trace ("---- " ++ s ++ ": " ++ stats g {- ++ "\n" ++ prCFRules g ++ "----" -}) g
|
||||
|
||||
stats g = "Categories: " ++ show (countCats g)
|
||||
++ " Rules: " ++ show (countRules g)
|
||||
|
||||
makeNonRecursiveSRG :: PGF
|
||||
-> CId -- ^ Concrete syntax name.
|
||||
-> SRG
|
||||
makeNonRecursiveSRG pgf cnc =
|
||||
SRG { srgName = prCId cnc,
|
||||
srgStartCat = start,
|
||||
srgExternalCats = cfgExternalCats cfg,
|
||||
srgLanguage = getSpeechLanguage pgf cnc,
|
||||
srgRules = rs }
|
||||
where
|
||||
cfg = pgfToCFG pgf cnc
|
||||
MFA start dfas = cfgToMFA cfg
|
||||
rs = [SRGRule l [SRGAlt Nothing dummyCFTerm (dfaToSRGItem dfa)] | (l,dfa) <- dfas]
|
||||
where dfaToSRGItem = mapRE dummySRGNT . minimizeRE . dfa2re
|
||||
dummyCFTerm = CFMeta (mkCId "dummy")
|
||||
dummySRGNT = mapSymbol (\c -> (c,0)) id
|
||||
|
||||
makeSRG :: (CFG -> CFG)
|
||||
-> PGF
|
||||
-> CId -- ^ Concrete syntax name.
|
||||
-> SRG
|
||||
makeSRG preprocess pgf cnc =
|
||||
SRG { srgName = prCId cnc,
|
||||
srgStartCat = cfgStartCat cfg,
|
||||
srgExternalCats = cfgExternalCats cfg,
|
||||
srgLanguage = getSpeechLanguage pgf cnc,
|
||||
srgRules = rs }
|
||||
where
|
||||
cfg = pgfToCFG pgf cnc
|
||||
(_,cfgRules) = unzip $ allRulesGrouped $ preprocess cfg
|
||||
rs = map cfRulesToSRGRule cfgRules
|
||||
|
||||
getSpeechLanguage :: PGF -> CId -> Maybe String
|
||||
getSpeechLanguage pgf cnc = fmap (replace '_' '-') $ lookConcrFlag pgf cnc (mkCId "language")
|
||||
|
||||
cfRulesToSRGRule :: [CFRule] -> SRGRule
|
||||
cfRulesToSRGRule rs@(r:_) = SRGRule (lhsCat r) rhs
|
||||
where
|
||||
alts = [((n,Nothing),mkSRGSymbols 0 ss) | CFRule c ss n <- rs]
|
||||
rhs = [SRGAlt p n (srgItem sss) | ((n,p),sss) <- buildMultiMap alts ]
|
||||
|
||||
mkSRGSymbols _ [] = []
|
||||
mkSRGSymbols i (NonTerminal c:ss) = NonTerminal (c,i) : mkSRGSymbols (i+1) ss
|
||||
mkSRGSymbols i (Terminal t:ss) = Terminal t : mkSRGSymbols i ss
|
||||
|
||||
allSRGCats :: SRG -> [String]
|
||||
allSRGCats SRG { srgRules = rs } = [c | SRGRule c _ <- rs]
|
||||
|
||||
--
|
||||
-- * Size-optimized EBNF SRGs
|
||||
--
|
||||
|
||||
srgItem :: [[SRGSymbol]] -> SRGItem
|
||||
srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
|
||||
-- non-optimizing version:
|
||||
--srgItem = unionRE . map seqRE
|
||||
|
||||
-- | Merges a list of right-hand sides which all have the same
|
||||
-- sequence of non-terminals.
|
||||
mergeItems :: [[SRGSymbol]] -> SRGItem
|
||||
mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens
|
||||
|
||||
groupTokens :: [SRGSymbol] -> [Symbol SRGNT [Token]]
|
||||
groupTokens [] = []
|
||||
groupTokens (Terminal t:ss) = case groupTokens ss of
|
||||
Terminal ts:ss' -> Terminal (t:ts):ss'
|
||||
ss' -> Terminal [t]:ss'
|
||||
groupTokens (NonTerminal c:ss) = NonTerminal c : groupTokens ss
|
||||
|
||||
ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE SRGSymbol
|
||||
ungroupTokens = joinRE . mapRE (symbol (RESymbol . NonTerminal) (REConcat . map (RESymbol . Terminal)))
|
||||
|
||||
--
|
||||
-- * Utilities for building and printing SRGs
|
||||
--
|
||||
|
||||
lookupFM_ :: (Ord key, Show key) => Map key elt -> key -> elt
|
||||
lookupFM_ fm k = Map.findWithDefault err k fm
|
||||
where err = error $ "Key not found: " ++ show k
|
||||
++ "\namong " ++ show (Map.keys fm)
|
||||
|
||||
prtS :: Print a => a -> ShowS
|
||||
prtS = showString . prt
|
||||
110
src-3.0/GF/Speech/SRGS.hs
Normal file
110
src-3.0/GF/Speech/SRGS.hs
Normal file
@@ -0,0 +1,110 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : SRGS
|
||||
--
|
||||
-- Prints an SRGS XML speech recognition grammars.
|
||||
----------------------------------------------------------------------
|
||||
module GF.Speech.SRGS (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where
|
||||
|
||||
import GF.Data.Utilities
|
||||
import GF.Data.XML
|
||||
import GF.Infra.Option
|
||||
import GF.Speech.CFG
|
||||
import GF.Speech.RegExp
|
||||
import GF.Speech.SISR as SISR
|
||||
import GF.Speech.SRG
|
||||
import PGF (PGF, CId)
|
||||
|
||||
import Control.Monad
|
||||
import Data.Char (toUpper,toLower)
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
|
||||
srgsXmlPrinter :: Maybe SISRFormat
|
||||
-> PGF -> CId -> String
|
||||
srgsXmlPrinter sisr pgf cnc = prSrgsXml sisr $ makeSimpleSRG pgf cnc
|
||||
|
||||
srgsXmlNonRecursivePrinter :: PGF -> CId -> String
|
||||
srgsXmlNonRecursivePrinter pgf cnc = prSrgsXml Nothing $ makeNonRecursiveSRG pgf cnc
|
||||
|
||||
|
||||
prSrgsXml :: Maybe SISRFormat -> SRG -> String
|
||||
prSrgsXml sisr srg = showXMLDoc (optimizeSRGS xmlGr)
|
||||
where
|
||||
xmlGr = grammar sisr (catFormId (srgStartCat srg)) (srgLanguage srg) $
|
||||
[meta "description"
|
||||
("SRGS XML speech recognition grammar for " ++ srgName srg ++ "."),
|
||||
meta "generator" "Grammatical Framework"]
|
||||
++ map ruleToXML (srgRules srg)
|
||||
ruleToXML (SRGRule cat alts) = Tag "rule" ([("id",cat)]++pub) (prRhs alts)
|
||||
where pub | cat `Set.member` srgExternalCats srg = [("scope","public")]
|
||||
| otherwise = []
|
||||
prRhs rhss = [oneOf (map (mkProd sisr) rhss)]
|
||||
|
||||
mkProd :: Maybe SISRFormat -> SRGAlt -> XML
|
||||
mkProd sisr (SRGAlt mp n rhs) = Tag "item" [] (ti ++ [x] ++ tf)
|
||||
where x = mkItem sisr n rhs
|
||||
ti = tag sisr (profileInitSISR n)
|
||||
tf = tag sisr (profileFinalSISR n)
|
||||
|
||||
mkItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> XML
|
||||
mkItem sisr cn = f
|
||||
where
|
||||
f (REUnion []) = ETag "ruleref" [("special","VOID")]
|
||||
f (REUnion xs)
|
||||
| not (null es) = Tag "item" [("repeat","0-1")] [f (REUnion nes)]
|
||||
| otherwise = oneOf (map f xs)
|
||||
where (es,nes) = partition isEpsilon xs
|
||||
f (REConcat []) = ETag "ruleref" [("special","NULL")]
|
||||
f (REConcat xs) = Tag "item" [] (map f xs)
|
||||
f (RERepeat x) = Tag "item" [("repeat","0-")] [f x]
|
||||
f (RESymbol s) = symItem sisr cn s
|
||||
|
||||
symItem :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> XML
|
||||
symItem sisr cn (NonTerminal n@(c,_)) =
|
||||
Tag "item" [] $ [ETag "ruleref" [("uri","#" ++ c)]] ++ tag sisr (catSISR cn n)
|
||||
symItem _ _ (Terminal t) = Tag "item" [] [Data (showToken t)]
|
||||
|
||||
tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> [XML]
|
||||
tag Nothing _ = []
|
||||
tag (Just fmt) t = case t fmt of
|
||||
[] -> []
|
||||
ts -> [Tag "tag" [] [Data (prSISR ts)]]
|
||||
|
||||
catFormId :: String -> String
|
||||
catFormId = (++ "_cat")
|
||||
|
||||
|
||||
showToken :: Token -> String
|
||||
showToken t = t
|
||||
|
||||
oneOf :: [XML] -> XML
|
||||
oneOf = Tag "one-of" []
|
||||
|
||||
grammar :: Maybe SISRFormat
|
||||
-> String -- ^ root
|
||||
-> Maybe String -- ^language
|
||||
-> [XML] -> XML
|
||||
grammar sisr root ml =
|
||||
Tag "grammar" $ [("xmlns","http://www.w3.org/2001/06/grammar"),
|
||||
("version","1.0"),
|
||||
("mode","voice"),
|
||||
("root",root)]
|
||||
++ (if isJust sisr then [("tag-format","semantics/1.0")] else [])
|
||||
++ maybe [] (\l -> [("xml:lang", l)]) ml
|
||||
|
||||
meta :: String -> String -> XML
|
||||
meta n c = ETag "meta" [("name",n),("content",c)]
|
||||
|
||||
optimizeSRGS :: XML -> XML
|
||||
optimizeSRGS = bottomUpXML f
|
||||
where f (Tag "item" [] [x@(Tag "item" _ _)]) = x
|
||||
f (Tag "item" [] [x@(Tag "one-of" _ _)]) = x
|
||||
f (Tag "item" as [Tag "item" [] xs]) = Tag "item" as xs
|
||||
f (Tag "item" as xs) = Tag "item" as (map g xs)
|
||||
where g (Tag "item" [] [x@(ETag "ruleref" _)]) = x
|
||||
g x = x
|
||||
f (Tag "one-of" [] [x]) = x
|
||||
f x = x
|
||||
Reference in New Issue
Block a user