forked from GitHub/gf-core
Added speech recognition grammar generation code. There is no way yet to invoke the SRG printer, and only SRGS is included.
This commit is contained in:
338
src-3.0/GF/Speech/CFG.hs
Normal file
338
src-3.0/GF/Speech/CFG.hs
Normal file
@@ -0,0 +1,338 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : GF.Speech.CFG
|
||||||
|
--
|
||||||
|
-- Context-free grammar representation and manipulation.
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
module GF.Speech.CFG where
|
||||||
|
|
||||||
|
import GF.Data.Utilities
|
||||||
|
import PGF.CId
|
||||||
|
import GF.Infra.Option
|
||||||
|
import GF.Infra.PrintClass
|
||||||
|
import GF.Speech.Relation
|
||||||
|
|
||||||
|
import Control.Monad
|
||||||
|
import Control.Monad.State (State, get, put, evalState)
|
||||||
|
import qualified Data.ByteString.Char8 as BS
|
||||||
|
import Data.Map (Map)
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import Data.List
|
||||||
|
import Data.Maybe (fromMaybe)
|
||||||
|
import Data.Monoid (mconcat)
|
||||||
|
import Data.Set (Set)
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Types
|
||||||
|
--
|
||||||
|
|
||||||
|
type Cat = String
|
||||||
|
type Token = String
|
||||||
|
|
||||||
|
data Symbol c t = NonTerminal c | Terminal t
|
||||||
|
deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
type CFSymbol = Symbol Cat Token
|
||||||
|
|
||||||
|
data CFRule = CFRule {
|
||||||
|
lhsCat :: Cat,
|
||||||
|
ruleRhs :: [CFSymbol],
|
||||||
|
ruleName :: CFTerm
|
||||||
|
}
|
||||||
|
deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
data CFTerm
|
||||||
|
= CFObj CId [CFTerm] -- ^ an abstract syntax function with arguments
|
||||||
|
| CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id.
|
||||||
|
| CFApp CFTerm CFTerm -- ^ Application
|
||||||
|
| CFRes Int -- ^ The result of the n:th (0-based) non-terminal
|
||||||
|
| CFVar Int -- ^ A lambda-bound variable
|
||||||
|
| CFMeta CId -- ^ A metavariable
|
||||||
|
deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
data CFG = CFG { cfgStartCat :: Cat,
|
||||||
|
cfgExternalCats :: Set Cat,
|
||||||
|
cfgRules :: Map Cat (Set CFRule) }
|
||||||
|
deriving (Eq, Ord, Show)
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Grammar filtering
|
||||||
|
--
|
||||||
|
|
||||||
|
-- | Removes all directly and indirectly cyclic productions.
|
||||||
|
-- FIXME: this may be too aggressive, only one production
|
||||||
|
-- needs to be removed to break a given cycle. But which
|
||||||
|
-- one should we pick?
|
||||||
|
-- FIXME: Does not (yet) remove productions which are cyclic
|
||||||
|
-- because of empty productions.
|
||||||
|
removeCycles :: CFG -> CFG
|
||||||
|
removeCycles = onRules f
|
||||||
|
where f rs = filter (not . isCycle) rs
|
||||||
|
where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [NonTerminal c'] _ <- rs]
|
||||||
|
isCycle (CFRule c [NonTerminal c'] _) = isRelatedTo alias c' c
|
||||||
|
isCycle _ = False
|
||||||
|
|
||||||
|
-- | Better bottom-up filter that also removes categories which contain no finite
|
||||||
|
-- strings.
|
||||||
|
bottomUpFilter :: CFG -> CFG
|
||||||
|
bottomUpFilter gr = fix grow (gr { cfgRules = Map.empty })
|
||||||
|
where grow g = g `unionCFG` filterCFG (all (okSym g) . ruleRhs) gr
|
||||||
|
okSym g = symbol (`elem` allCats g) (const True)
|
||||||
|
|
||||||
|
-- | Removes categories which are not reachable from the start category.
|
||||||
|
topDownFilter :: CFG -> CFG
|
||||||
|
topDownFilter cfg = filterCFGCats (isRelatedTo uses (cfgStartCat cfg)) cfg
|
||||||
|
where
|
||||||
|
rhsCats = [ (lhsCat r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ]
|
||||||
|
uses = reflexiveClosure_ (allCats cfg) $ transitiveClosure $ mkRel rhsCats
|
||||||
|
|
||||||
|
-- | Merges categories with identical right-hand-sides.
|
||||||
|
-- FIXME: handle probabilities
|
||||||
|
mergeIdentical :: CFG -> CFG
|
||||||
|
mergeIdentical g = onRules (map subst) g
|
||||||
|
where
|
||||||
|
-- maps categories to their replacement
|
||||||
|
m = Map.fromList [(y,concat (intersperse "+" xs))
|
||||||
|
| (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList (cfgRules g)], y <- xs]
|
||||||
|
-- build data to compare for each category: a set of name,rhs pairs
|
||||||
|
rulesKey = Set.map (\ (CFRule _ r n) -> (n,r))
|
||||||
|
subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
|
||||||
|
substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Removing left recursion
|
||||||
|
--
|
||||||
|
|
||||||
|
-- The LC_LR algorithm from
|
||||||
|
-- http://research.microsoft.com/users/bobmoore/naacl2k-proc-rev.pdf
|
||||||
|
removeLeftRecursion :: CFG -> CFG
|
||||||
|
removeLeftRecursion gr
|
||||||
|
= gr { cfgRules = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] }
|
||||||
|
where
|
||||||
|
scheme1 = [CFRule a [x,NonTerminal a_x] n' |
|
||||||
|
a <- retainedLeftRecursive,
|
||||||
|
x <- properLeftCornersOf a,
|
||||||
|
not (isLeftRecursive x),
|
||||||
|
let a_x = mkCat (NonTerminal a) x,
|
||||||
|
-- this is an extension of LC_LR to avoid generating
|
||||||
|
-- A-X categories for which there are no productions:
|
||||||
|
a_x `Set.member` newCats,
|
||||||
|
let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0))
|
||||||
|
(\_ -> CFRes 0) x]
|
||||||
|
scheme2 = [CFRule a_x (beta++[NonTerminal a_b]) n' |
|
||||||
|
a <- retainedLeftRecursive,
|
||||||
|
b@(NonTerminal b') <- properLeftCornersOf a,
|
||||||
|
isLeftRecursive b,
|
||||||
|
CFRule _ (x:beta) n <- catRules gr b',
|
||||||
|
let a_x = mkCat (NonTerminal a) x,
|
||||||
|
let a_b = mkCat (NonTerminal a) b,
|
||||||
|
let i = length $ filterCats beta,
|
||||||
|
let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n)))
|
||||||
|
(\_ -> CFApp (CFRes i) n) x]
|
||||||
|
scheme3 = [CFRule a_x beta n' |
|
||||||
|
a <- retainedLeftRecursive,
|
||||||
|
x <- properLeftCornersOf a,
|
||||||
|
CFRule _ (x':beta) n <- catRules gr a,
|
||||||
|
x == x',
|
||||||
|
let a_x = mkCat (NonTerminal a) x,
|
||||||
|
let n' = symbol (\_ -> CFAbs 1 (shiftTerm n))
|
||||||
|
(\_ -> n) x]
|
||||||
|
scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . NonTerminal) cats
|
||||||
|
|
||||||
|
newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3))
|
||||||
|
|
||||||
|
shiftTerm :: CFTerm -> CFTerm
|
||||||
|
shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts)
|
||||||
|
shiftTerm (CFRes 0) = CFVar 1
|
||||||
|
shiftTerm (CFRes n) = CFRes (n-1)
|
||||||
|
shiftTerm t = t
|
||||||
|
-- note: the rest don't occur in the original grammar
|
||||||
|
|
||||||
|
cats = allCats gr
|
||||||
|
rules = allRules gr
|
||||||
|
|
||||||
|
directLeftCorner = mkRel [(NonTerminal c,t) | CFRule c (t:_) _ <- allRules gr]
|
||||||
|
leftCorner = reflexiveClosure_ (map NonTerminal cats) $ transitiveClosure directLeftCorner
|
||||||
|
properLeftCorner = transitiveClosure directLeftCorner
|
||||||
|
properLeftCornersOf = Set.toList . allRelated properLeftCorner . NonTerminal
|
||||||
|
isProperLeftCornerOf = flip (isRelatedTo properLeftCorner)
|
||||||
|
|
||||||
|
leftRecursive = reflexiveElements properLeftCorner
|
||||||
|
isLeftRecursive = (`Set.member` leftRecursive)
|
||||||
|
|
||||||
|
retained = cfgStartCat gr `Set.insert`
|
||||||
|
Set.fromList [a | r <- allRules (filterCFGCats (not . isLeftRecursive . NonTerminal) gr),
|
||||||
|
NonTerminal a <- ruleRhs r]
|
||||||
|
isRetained = (`Set.member` retained)
|
||||||
|
|
||||||
|
retainedLeftRecursive = filter (isLeftRecursive . NonTerminal) $ Set.toList retained
|
||||||
|
|
||||||
|
mkCat :: CFSymbol -> CFSymbol -> Cat
|
||||||
|
mkCat x y = showSymbol x ++ "-" ++ showSymbol y
|
||||||
|
where showSymbol = symbol id show
|
||||||
|
|
||||||
|
-- | Get the sets of mutually recursive non-terminals for a grammar.
|
||||||
|
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
|
||||||
|
-- If false, only recursive categories will be included.
|
||||||
|
-> CFG -> [Set Cat]
|
||||||
|
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
|
||||||
|
where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, NonTerminal c' <- ss]
|
||||||
|
refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Approximate context-free grammars with regular grammars.
|
||||||
|
--
|
||||||
|
|
||||||
|
makeSimpleRegular :: CFG -> CFG
|
||||||
|
makeSimpleRegular = makeRegular . topDownFilter . bottomUpFilter . removeCycles
|
||||||
|
|
||||||
|
-- Use the transformation algorithm from \"Regular Approximation of Context-free
|
||||||
|
-- Grammars through Approximation\", Mohri and Nederhof, 2000
|
||||||
|
-- to create an over-generating regular frammar for a context-free
|
||||||
|
-- grammar
|
||||||
|
makeRegular :: CFG -> CFG
|
||||||
|
makeRegular g = g { cfgRules = groupProds $ concatMap trSet (mutRecCats True g) }
|
||||||
|
where trSet cs | allXLinear cs rs = rs
|
||||||
|
| otherwise = concatMap handleCat csl
|
||||||
|
where csl = Set.toList cs
|
||||||
|
rs = catSetRules g cs
|
||||||
|
handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
|
||||||
|
++ concatMap (makeRightLinearRules c) (catRules g c)
|
||||||
|
where c' = newCat c
|
||||||
|
makeRightLinearRules b' (CFRule c ss n) =
|
||||||
|
case ys of
|
||||||
|
[] -> newRule b' (xs ++ [NonTerminal (newCat c)]) n -- no non-terminals left
|
||||||
|
(NonTerminal b:zs) -> newRule b' (xs ++ [NonTerminal b]) n
|
||||||
|
++ makeRightLinearRules (newCat b) (CFRule c zs n)
|
||||||
|
where (xs,ys) = break (`catElem` cs) ss
|
||||||
|
-- don't add rules on the form A -> A
|
||||||
|
newRule c rhs n | rhs == [NonTerminal c] = []
|
||||||
|
| otherwise = [CFRule c rhs n]
|
||||||
|
newCat c = c ++ "$"
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * CFG Utilities
|
||||||
|
--
|
||||||
|
|
||||||
|
mkCFG :: Cat -> Set Cat -> [CFRule] -> CFG
|
||||||
|
mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs }
|
||||||
|
|
||||||
|
groupProds :: [CFRule] -> Map Cat (Set CFRule)
|
||||||
|
groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r))
|
||||||
|
|
||||||
|
-- | Gets all rules in a CFG.
|
||||||
|
allRules :: CFG -> [CFRule]
|
||||||
|
allRules = concat . map Set.toList . Map.elems . cfgRules
|
||||||
|
|
||||||
|
-- | Gets all rules in a CFG, grouped by their LHS categories.
|
||||||
|
allRulesGrouped :: CFG -> [(Cat,[CFRule])]
|
||||||
|
allRulesGrouped = Map.toList . Map.map Set.toList . cfgRules
|
||||||
|
|
||||||
|
-- | Gets all categories which have rules.
|
||||||
|
allCats :: CFG -> [Cat]
|
||||||
|
allCats = Map.keys . cfgRules
|
||||||
|
|
||||||
|
-- | Gets all rules for the given category.
|
||||||
|
catRules :: CFG -> Cat -> [CFRule]
|
||||||
|
catRules gr c = Set.toList $ Map.findWithDefault Set.empty c (cfgRules gr)
|
||||||
|
|
||||||
|
-- | Gets all rules for categories in the given set.
|
||||||
|
catSetRules :: CFG -> Set Cat -> [CFRule]
|
||||||
|
catSetRules gr cs = allRules $ filterCFGCats (`Set.member` cs) gr
|
||||||
|
|
||||||
|
onCFG :: (Map Cat (Set CFRule) -> Map Cat (Set CFRule)) -> CFG -> CFG
|
||||||
|
onCFG f cfg = cfg { cfgRules = f (cfgRules cfg) }
|
||||||
|
|
||||||
|
onRules :: ([CFRule] -> [CFRule]) -> CFG -> CFG
|
||||||
|
onRules f cfg = cfg { cfgRules = groupProds $ f $ allRules cfg }
|
||||||
|
|
||||||
|
-- | Clean up CFG after rules have been removed.
|
||||||
|
cleanCFG :: CFG -> CFG
|
||||||
|
cleanCFG = onCFG (Map.filter (not . Set.null))
|
||||||
|
|
||||||
|
-- | Combine two CFGs.
|
||||||
|
unionCFG :: CFG -> CFG -> CFG
|
||||||
|
unionCFG x y = onCFG (\rs -> Map.unionWith Set.union rs (cfgRules y)) x
|
||||||
|
|
||||||
|
filterCFG :: (CFRule -> Bool) -> CFG -> CFG
|
||||||
|
filterCFG p = cleanCFG . onCFG (Map.map (Set.filter p))
|
||||||
|
|
||||||
|
filterCFGCats :: (Cat -> Bool) -> CFG -> CFG
|
||||||
|
filterCFGCats p = onCFG (Map.filterWithKey (\c _ -> p c))
|
||||||
|
|
||||||
|
countCats :: CFG -> Int
|
||||||
|
countCats = Map.size . cfgRules . cleanCFG
|
||||||
|
|
||||||
|
countRules :: CFG -> Int
|
||||||
|
countRules = length . allRules
|
||||||
|
|
||||||
|
prCFG :: CFG -> String
|
||||||
|
prCFG = unlines . map prRule . allRules
|
||||||
|
where
|
||||||
|
prRule r = lhsCat r ++ " --> " ++ unwords (map prSym (ruleRhs r))
|
||||||
|
prSym = symbol id (\t -> "\""++ t ++"\"")
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * CFRule Utilities
|
||||||
|
--
|
||||||
|
|
||||||
|
ruleFun :: CFRule -> CId
|
||||||
|
ruleFun (CFRule _ _ t) = f t
|
||||||
|
where f (CFObj n _) = n
|
||||||
|
f (CFApp _ x) = f x
|
||||||
|
f (CFAbs _ x) = f x
|
||||||
|
f _ = mkCId ""
|
||||||
|
|
||||||
|
-- | Check if any of the categories used on the right-hand side
|
||||||
|
-- are in the given list of categories.
|
||||||
|
anyUsedBy :: [Cat] -> CFRule -> Bool
|
||||||
|
anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
|
||||||
|
|
||||||
|
mkCFTerm :: String -> CFTerm
|
||||||
|
mkCFTerm n = CFObj (mkCId n) []
|
||||||
|
|
||||||
|
ruleIsNonRecursive :: Set Cat -> CFRule -> Bool
|
||||||
|
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
|
||||||
|
|
||||||
|
-- | Check if all the rules are right-linear, or all the rules are
|
||||||
|
-- left-linear, with respect to given categories.
|
||||||
|
allXLinear :: Set Cat -> [CFRule] -> Bool
|
||||||
|
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
|
||||||
|
|
||||||
|
-- | Checks if a context-free rule is right-linear.
|
||||||
|
isRightLinear :: Set Cat -- ^ The categories to consider
|
||||||
|
-> CFRule -- ^ The rule to check for right-linearity
|
||||||
|
-> Bool
|
||||||
|
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
|
||||||
|
|
||||||
|
-- | Checks if a context-free rule is left-linear.
|
||||||
|
isLeftLinear :: Set Cat -- ^ The categories to consider
|
||||||
|
-> CFRule -- ^ The rule to check for left-linearity
|
||||||
|
-> Bool
|
||||||
|
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs
|
||||||
|
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Symbol utilities
|
||||||
|
--
|
||||||
|
|
||||||
|
symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a
|
||||||
|
symbol fc ft (NonTerminal cat) = fc cat
|
||||||
|
symbol fc ft (Terminal tok) = ft tok
|
||||||
|
|
||||||
|
mapSymbol :: (c -> c') -> (t -> t') -> Symbol c t -> Symbol c' t'
|
||||||
|
mapSymbol fc ft = symbol (NonTerminal . fc) (Terminal . ft)
|
||||||
|
|
||||||
|
filterCats :: [Symbol c t] -> [c]
|
||||||
|
filterCats syms = [ cat | NonTerminal cat <- syms ]
|
||||||
|
|
||||||
|
filterToks :: [Symbol c t] -> [t]
|
||||||
|
filterToks syms = [ tok | Terminal tok <- syms ]
|
||||||
|
|
||||||
|
-- | Checks if a symbol is a non-terminal of one of the given categories.
|
||||||
|
catElem :: Ord c => Symbol c t -> Set c -> Bool
|
||||||
|
catElem s cs = symbol (`Set.member` cs) (const False) s
|
||||||
|
|
||||||
|
noCatsInSet :: Ord c => Set c -> [Symbol c t] -> Bool
|
||||||
|
noCatsInSet cs = not . any (`catElem` cs)
|
||||||
244
src-3.0/GF/Speech/CFGToFA.hs
Normal file
244
src-3.0/GF/Speech/CFGToFA.hs
Normal file
@@ -0,0 +1,244 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : GF.Speech.CFGToFA
|
||||||
|
--
|
||||||
|
-- Approximates CFGs with finite state networks.
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
module GF.Speech.CFGToFA (cfgToFA, makeSimpleRegular,
|
||||||
|
MFA(..), cfgToMFA, cfgToFA') where
|
||||||
|
|
||||||
|
import Data.List
|
||||||
|
import Data.Maybe
|
||||||
|
import Data.Map (Map)
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import Data.Set (Set)
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
|
||||||
|
import PGF.CId
|
||||||
|
import PGF.Data
|
||||||
|
import GF.Data.Utilities
|
||||||
|
import GF.Speech.CFG
|
||||||
|
import GF.Speech.PGFToCFG
|
||||||
|
import GF.Infra.Ident (Ident)
|
||||||
|
|
||||||
|
import GF.Speech.FiniteState
|
||||||
|
import GF.Speech.Graph
|
||||||
|
import GF.Speech.Relation
|
||||||
|
import GF.Speech.CFG
|
||||||
|
|
||||||
|
data Recursivity = RightR | LeftR | NotR
|
||||||
|
|
||||||
|
data MutRecSet = MutRecSet {
|
||||||
|
mrCats :: Set Cat,
|
||||||
|
mrNonRecRules :: [CFRule],
|
||||||
|
mrRecRules :: [CFRule],
|
||||||
|
mrRec :: Recursivity
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
type MutRecSets = Map Cat MutRecSet
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Multiple DFA type
|
||||||
|
--
|
||||||
|
|
||||||
|
data MFA = MFA Cat [(Cat,DFA CFSymbol)]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cfgToFA :: CFG -> DFA Token
|
||||||
|
cfgToFA = minimize . compileAutomaton . makeSimpleRegular
|
||||||
|
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Compile strongly regular grammars to NFAs
|
||||||
|
--
|
||||||
|
|
||||||
|
-- Convert a strongly regular grammar to a finite automaton.
|
||||||
|
compileAutomaton :: CFG -> NFA Token
|
||||||
|
compileAutomaton g = make_fa (g,ns) s [NonTerminal (cfgStartCat g)] f fa
|
||||||
|
where
|
||||||
|
(fa,s,f) = newFA_
|
||||||
|
ns = mutRecSets g $ mutRecCats False g
|
||||||
|
|
||||||
|
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
||||||
|
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000.
|
||||||
|
make_fa :: (CFG,MutRecSets) -> State -> [CFSymbol] -> State
|
||||||
|
-> NFA Token -> NFA Token
|
||||||
|
make_fa c@(g,ns) q0 alpha q1 fa =
|
||||||
|
case alpha of
|
||||||
|
[] -> newTransition q0 q1 Nothing fa
|
||||||
|
[Terminal t] -> newTransition q0 q1 (Just t) fa
|
||||||
|
[NonTerminal a] ->
|
||||||
|
case Map.lookup a ns of
|
||||||
|
-- a is recursive
|
||||||
|
Just n@(MutRecSet { mrCats = ni, mrNonRecRules = nrs, mrRecRules = rs} ) ->
|
||||||
|
case mrRec n of
|
||||||
|
-- the set Ni is right-recursive or cyclic
|
||||||
|
RightR ->
|
||||||
|
let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
|
||||||
|
++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
|
||||||
|
let (xs,NonTerminal d) = (init ss,last ss)]
|
||||||
|
in make_fas new $ newTransition q0 (getState a) Nothing fa'
|
||||||
|
-- the set Ni is left-recursive
|
||||||
|
LeftR ->
|
||||||
|
let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
|
||||||
|
++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- rs]
|
||||||
|
in make_fas new $ newTransition (getState a) q1 Nothing fa'
|
||||||
|
where
|
||||||
|
(fa',stateMap) = addStatesForCats ni fa
|
||||||
|
getState x = Map.findWithDefault
|
||||||
|
(error $ "CFGToFiniteState: No state for " ++ x)
|
||||||
|
x stateMap
|
||||||
|
-- a is not recursive
|
||||||
|
Nothing -> let rs = catRules g a
|
||||||
|
in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs
|
||||||
|
(x:beta) -> let (fa',q) = newState () fa
|
||||||
|
in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa'
|
||||||
|
where
|
||||||
|
make_fa_ = make_fa c
|
||||||
|
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa_ s1 xs s2 f') fa xs
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Compile a strongly regular grammar to a DFA with sub-automata
|
||||||
|
--
|
||||||
|
|
||||||
|
cfgToMFA :: CFG -> MFA
|
||||||
|
cfgToMFA = buildMFA . makeSimpleRegular
|
||||||
|
|
||||||
|
-- | Build a DFA by building and expanding an MFA
|
||||||
|
cfgToFA' :: CFG -> DFA Token
|
||||||
|
cfgToFA' = mfaToDFA . cfgToMFA
|
||||||
|
|
||||||
|
buildMFA :: CFG -> MFA
|
||||||
|
buildMFA g = sortSubLats $ removeUnusedSubLats mfa
|
||||||
|
where fas = compileAutomata g
|
||||||
|
mfa = MFA (cfgStartCat g) [(c, minimize fa) | (c,fa) <- fas]
|
||||||
|
|
||||||
|
mfaStartDFA :: MFA -> DFA CFSymbol
|
||||||
|
mfaStartDFA (MFA start subs) =
|
||||||
|
fromMaybe (error $ "Bad start MFA: " ++ start) $ lookup start subs
|
||||||
|
|
||||||
|
mfaToDFA :: MFA -> DFA Token
|
||||||
|
mfaToDFA mfa@(MFA _ subs) = minimize $ expand $ dfa2nfa $ mfaStartDFA mfa
|
||||||
|
where
|
||||||
|
subs' = Map.fromList [(c, dfa2nfa n) | (c,n) <- subs]
|
||||||
|
getSub l = fromJust $ Map.lookup l subs'
|
||||||
|
expand (FA (Graph c ns es) s f)
|
||||||
|
= foldl' expandEdge (FA (Graph c ns []) s f) es
|
||||||
|
expandEdge fa (f,t,x) =
|
||||||
|
case x of
|
||||||
|
Nothing -> newTransition f t Nothing fa
|
||||||
|
Just (Terminal s) -> newTransition f t (Just s) fa
|
||||||
|
Just (NonTerminal l) -> insertNFA fa (f,t) (expand $ getSub l)
|
||||||
|
|
||||||
|
removeUnusedSubLats :: MFA -> MFA
|
||||||
|
removeUnusedSubLats mfa@(MFA start subs) = MFA start [(c,s) | (c,s) <- subs, isUsed c]
|
||||||
|
where
|
||||||
|
usedMap = subLatUseMap mfa
|
||||||
|
used = growUsedSet (Set.singleton start)
|
||||||
|
isUsed c = c `Set.member` used
|
||||||
|
growUsedSet = fix (\s -> foldl Set.union s $ mapMaybe (flip Map.lookup usedMap) $ Set.toList s)
|
||||||
|
|
||||||
|
subLatUseMap :: MFA -> Map Cat (Set Cat)
|
||||||
|
subLatUseMap (MFA _ subs) = Map.fromList [(c,usedSubLats n) | (c,n) <- subs]
|
||||||
|
|
||||||
|
usedSubLats :: DFA CFSymbol -> Set Cat
|
||||||
|
usedSubLats fa = Set.fromList [s | (_,_,NonTerminal s) <- transitions fa]
|
||||||
|
|
||||||
|
-- | Sort sub-networks topologically.
|
||||||
|
sortSubLats :: MFA -> MFA
|
||||||
|
sortSubLats mfa@(MFA main subs) = MFA main (reverse $ sortLats usedByMap subs)
|
||||||
|
where
|
||||||
|
usedByMap = revMultiMap (subLatUseMap mfa)
|
||||||
|
sortLats _ [] = []
|
||||||
|
sortLats ub ls = xs ++ sortLats ub' ys
|
||||||
|
where (xs,ys) = partition ((==0) . indeg) ls
|
||||||
|
ub' = Map.map (Set.\\ Set.fromList (map fst xs)) ub
|
||||||
|
indeg (c,_) = maybe 0 Set.size $ Map.lookup c ub
|
||||||
|
|
||||||
|
-- | Convert a strongly regular grammar to a number of finite automata,
|
||||||
|
-- one for each non-terminal.
|
||||||
|
-- The edges in the automata accept tokens, or name another automaton to use.
|
||||||
|
compileAutomata :: CFG
|
||||||
|
-> [(Cat,NFA CFSymbol)]
|
||||||
|
-- ^ A map of non-terminals and their automata.
|
||||||
|
compileAutomata g = [(c, makeOneFA c) | c <- allCats g]
|
||||||
|
where
|
||||||
|
mrs = mutRecSets g $ mutRecCats True g
|
||||||
|
makeOneFA c = make_fa1 mr s [NonTerminal c] f fa
|
||||||
|
where (fa,s,f) = newFA_
|
||||||
|
mr = fromJust (Map.lookup c mrs)
|
||||||
|
|
||||||
|
|
||||||
|
-- | The make_fa algorithm from \"Regular approximation of CFLs: a grammatical view\",
|
||||||
|
-- Mark-Jan Nederhof, Advances in Probabilistic and other Parsing Technologies, 2000,
|
||||||
|
-- adapted to build a finite automaton for a single (mutually recursive) set only.
|
||||||
|
-- Categories not in the set will result in category-labelled edges.
|
||||||
|
make_fa1 :: MutRecSet -- ^ The set of (mutually recursive) categories for which
|
||||||
|
-- we are building the automaton.
|
||||||
|
-> State -- ^ State to come from
|
||||||
|
-> [CFSymbol] -- ^ Symbols to accept
|
||||||
|
-> State -- ^ State to end up in
|
||||||
|
-> NFA CFSymbol -- ^ FA to add to.
|
||||||
|
-> NFA CFSymbol
|
||||||
|
make_fa1 mr q0 alpha q1 fa =
|
||||||
|
case alpha of
|
||||||
|
[] -> newTransition q0 q1 Nothing fa
|
||||||
|
[t@(Terminal _)] -> newTransition q0 q1 (Just t) fa
|
||||||
|
[c@(NonTerminal a)] | not (a `Set.member` mrCats mr) -> newTransition q0 q1 (Just c) fa
|
||||||
|
[NonTerminal a] ->
|
||||||
|
case mrRec mr of
|
||||||
|
NotR -> -- the set is a non-recursive (always singleton) set of categories
|
||||||
|
-- so the set of category rules is the set of rules for the whole set
|
||||||
|
make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
|
||||||
|
RightR -> -- the set is right-recursive or cyclic
|
||||||
|
let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
|
||||||
|
++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr,
|
||||||
|
let (xs,NonTerminal d) = (init ss,last ss)]
|
||||||
|
in make_fas new $ newTransition q0 (getState a) Nothing fa'
|
||||||
|
LeftR -> -- the set is left-recursive
|
||||||
|
let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
|
||||||
|
++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- mrRecRules mr]
|
||||||
|
in make_fas new $ newTransition (getState a) q1 Nothing fa'
|
||||||
|
where
|
||||||
|
(fa',stateMap) = addStatesForCats (mrCats mr) fa
|
||||||
|
getState x = Map.findWithDefault
|
||||||
|
(error $ "CFGToFiniteState: No state for " ++ x)
|
||||||
|
x stateMap
|
||||||
|
(x:beta) -> let (fa',q) = newState () fa
|
||||||
|
in make_fas [(q0,[x],q),(q,beta,q1)] fa'
|
||||||
|
where
|
||||||
|
make_fas xs fa = foldl' (\f' (s1,xs,s2) -> make_fa1 mr s1 xs s2 f') fa xs
|
||||||
|
|
||||||
|
mutRecSets :: CFG -> [Set Cat] -> MutRecSets
|
||||||
|
mutRecSets g = Map.fromList . concatMap mkMutRecSet
|
||||||
|
where
|
||||||
|
mkMutRecSet cs = [ (c,ms) | c <- csl ]
|
||||||
|
where csl = Set.toList cs
|
||||||
|
rs = catSetRules g cs
|
||||||
|
(nrs,rrs) = partition (ruleIsNonRecursive cs) rs
|
||||||
|
ms = MutRecSet {
|
||||||
|
mrCats = cs,
|
||||||
|
mrNonRecRules = nrs,
|
||||||
|
mrRecRules = rrs,
|
||||||
|
mrRec = rec
|
||||||
|
}
|
||||||
|
rec | null rrs = NotR
|
||||||
|
| all (isRightLinear cs) rrs = RightR
|
||||||
|
| otherwise = LeftR
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Utilities
|
||||||
|
--
|
||||||
|
|
||||||
|
-- | Add a state for the given NFA for each of the categories
|
||||||
|
-- in the given set. Returns a map of categories to their
|
||||||
|
-- corresponding states.
|
||||||
|
addStatesForCats :: Set Cat -> NFA t -> (NFA t, Map Cat State)
|
||||||
|
addStatesForCats cs fa = (fa', m)
|
||||||
|
where (fa', ns) = newStates (replicate (Set.size cs) ()) fa
|
||||||
|
m = Map.fromList (zip (Set.toList cs) (map fst ns))
|
||||||
|
|
||||||
|
revMultiMap :: (Ord a, Ord b) => Map a (Set b) -> Map b (Set a)
|
||||||
|
revMultiMap m = Map.fromListWith Set.union [ (y,Set.singleton x) | (x,s) <- Map.toList m, y <- Set.toList s]
|
||||||
329
src-3.0/GF/Speech/FiniteState.hs
Normal file
329
src-3.0/GF/Speech/FiniteState.hs
Normal file
@@ -0,0 +1,329 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : FiniteState
|
||||||
|
-- Maintainer : BB
|
||||||
|
-- Stability : (stable)
|
||||||
|
-- Portability : (portable)
|
||||||
|
--
|
||||||
|
-- > CVS $Date: 2005/11/10 16:43:44 $
|
||||||
|
-- > CVS $Author: bringert $
|
||||||
|
-- > CVS $Revision: 1.16 $
|
||||||
|
--
|
||||||
|
-- A simple finite state network module.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
module GF.Speech.FiniteState (FA(..), State, NFA, DFA,
|
||||||
|
startState, finalStates,
|
||||||
|
states, transitions,
|
||||||
|
isInternal,
|
||||||
|
newFA, newFA_,
|
||||||
|
addFinalState,
|
||||||
|
newState, newStates,
|
||||||
|
newTransition, newTransitions,
|
||||||
|
insertTransitionWith, insertTransitionsWith,
|
||||||
|
mapStates, mapTransitions,
|
||||||
|
modifyTransitions,
|
||||||
|
nonLoopTransitionsTo, nonLoopTransitionsFrom,
|
||||||
|
loops,
|
||||||
|
removeState,
|
||||||
|
oneFinalState,
|
||||||
|
insertNFA,
|
||||||
|
onGraph,
|
||||||
|
moveLabelsToNodes, removeTrivialEmptyNodes,
|
||||||
|
minimize,
|
||||||
|
dfa2nfa,
|
||||||
|
unusedNames, renameStates,
|
||||||
|
prFAGraphviz, faToGraphviz) where
|
||||||
|
|
||||||
|
import Data.List
|
||||||
|
import Data.Maybe
|
||||||
|
import Data.Map (Map)
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import Data.Set (Set)
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
|
||||||
|
import GF.Data.Utilities
|
||||||
|
import GF.Speech.Graph
|
||||||
|
import qualified GF.Speech.Graphviz as Dot
|
||||||
|
|
||||||
|
type State = Int
|
||||||
|
|
||||||
|
-- | Type parameters: node id type, state label type, edge label type
|
||||||
|
-- Data constructor arguments: nodes and edges, start state, final states
|
||||||
|
data FA n a b = FA !(Graph n a b) !n ![n]
|
||||||
|
|
||||||
|
type NFA a = FA State () (Maybe a)
|
||||||
|
|
||||||
|
type DFA a = FA State () a
|
||||||
|
|
||||||
|
|
||||||
|
startState :: FA n a b -> n
|
||||||
|
startState (FA _ s _) = s
|
||||||
|
|
||||||
|
finalStates :: FA n a b -> [n]
|
||||||
|
finalStates (FA _ _ ss) = ss
|
||||||
|
|
||||||
|
states :: FA n a b -> [(n,a)]
|
||||||
|
states (FA g _ _) = nodes g
|
||||||
|
|
||||||
|
transitions :: FA n a b -> [(n,n,b)]
|
||||||
|
transitions (FA g _ _) = edges g
|
||||||
|
|
||||||
|
newFA :: Enum n => a -- ^ Start node label
|
||||||
|
-> FA n a b
|
||||||
|
newFA l = FA g s []
|
||||||
|
where (g,s) = newNode l (newGraph [toEnum 0..])
|
||||||
|
|
||||||
|
-- | Create a new finite automaton with an initial and a final state.
|
||||||
|
newFA_ :: Enum n => (FA n () b, n, n)
|
||||||
|
newFA_ = (fa'', s, f)
|
||||||
|
where fa = newFA ()
|
||||||
|
s = startState fa
|
||||||
|
(fa',f) = newState () fa
|
||||||
|
fa'' = addFinalState f fa'
|
||||||
|
|
||||||
|
addFinalState :: n -> FA n a b -> FA n a b
|
||||||
|
addFinalState f (FA g s ss) = FA g s (f:ss)
|
||||||
|
|
||||||
|
newState :: a -> FA n a b -> (FA n a b, n)
|
||||||
|
newState x (FA g s ss) = (FA g' s ss, n)
|
||||||
|
where (g',n) = newNode x g
|
||||||
|
|
||||||
|
newStates :: [a] -> FA n a b -> (FA n a b, [(n,a)])
|
||||||
|
newStates xs (FA g s ss) = (FA g' s ss, ns)
|
||||||
|
where (g',ns) = newNodes xs g
|
||||||
|
|
||||||
|
newTransition :: n -> n -> b -> FA n a b -> FA n a b
|
||||||
|
newTransition f t l = onGraph (newEdge (f,t,l))
|
||||||
|
|
||||||
|
newTransitions :: [(n, n, b)] -> FA n a b -> FA n a b
|
||||||
|
newTransitions es = onGraph (newEdges es)
|
||||||
|
|
||||||
|
insertTransitionWith :: Eq n =>
|
||||||
|
(b -> b -> b) -> (n, n, b) -> FA n a b -> FA n a b
|
||||||
|
insertTransitionWith f t = onGraph (insertEdgeWith f t)
|
||||||
|
|
||||||
|
insertTransitionsWith :: Eq n =>
|
||||||
|
(b -> b -> b) -> [(n, n, b)] -> FA n a b -> FA n a b
|
||||||
|
insertTransitionsWith f ts fa =
|
||||||
|
foldl' (flip (insertTransitionWith f)) fa ts
|
||||||
|
|
||||||
|
mapStates :: (a -> c) -> FA n a b -> FA n c b
|
||||||
|
mapStates f = onGraph (nmap f)
|
||||||
|
|
||||||
|
mapTransitions :: (b -> c) -> FA n a b -> FA n a c
|
||||||
|
mapTransitions f = onGraph (emap f)
|
||||||
|
|
||||||
|
modifyTransitions :: ([(n,n,b)] -> [(n,n,b)]) -> FA n a b -> FA n a b
|
||||||
|
modifyTransitions f = onGraph (\ (Graph r ns es) -> Graph r ns (f es))
|
||||||
|
|
||||||
|
removeState :: Ord n => n -> FA n a b -> FA n a b
|
||||||
|
removeState n = onGraph (removeNode n)
|
||||||
|
|
||||||
|
minimize :: Ord a => NFA a -> DFA a
|
||||||
|
minimize = determinize . reverseNFA . dfa2nfa . determinize . reverseNFA
|
||||||
|
|
||||||
|
unusedNames :: FA n a b -> [n]
|
||||||
|
unusedNames (FA (Graph names _ _) _ _) = names
|
||||||
|
|
||||||
|
-- | Gets all incoming transitions to a given state, excluding
|
||||||
|
-- transtions from the state itself.
|
||||||
|
nonLoopTransitionsTo :: Eq n => n -> FA n a b -> [(n,b)]
|
||||||
|
nonLoopTransitionsTo s fa =
|
||||||
|
[(f,l) | (f,t,l) <- transitions fa, t == s && f /= s]
|
||||||
|
|
||||||
|
nonLoopTransitionsFrom :: Eq n => n -> FA n a b -> [(n,b)]
|
||||||
|
nonLoopTransitionsFrom s fa =
|
||||||
|
[(t,l) | (f,t,l) <- transitions fa, f == s && t /= s]
|
||||||
|
|
||||||
|
loops :: Eq n => n -> FA n a b -> [b]
|
||||||
|
loops s fa = [l | (f,t,l) <- transitions fa, f == s && t == s]
|
||||||
|
|
||||||
|
-- | Give new names to all nodes.
|
||||||
|
renameStates :: Ord x => [y] -- ^ Infinite supply of new names
|
||||||
|
-> FA x a b
|
||||||
|
-> FA y a b
|
||||||
|
renameStates supply (FA g s fs) = FA (renameNodes newName rest g) s' fs'
|
||||||
|
where (ns,rest) = splitAt (length (nodes g)) supply
|
||||||
|
newNodes = Map.fromList (zip (map fst (nodes g)) ns)
|
||||||
|
newName n = Map.findWithDefault (error "FiniteState.newName") n newNodes
|
||||||
|
s' = newName s
|
||||||
|
fs' = map newName fs
|
||||||
|
|
||||||
|
-- | Insert an NFA into another
|
||||||
|
insertNFA :: NFA a -- ^ NFA to insert into
|
||||||
|
-> (State, State) -- ^ States to insert between
|
||||||
|
-> NFA a -- ^ NFA to insert.
|
||||||
|
-> NFA a
|
||||||
|
insertNFA (FA g1 s1 fs1) (f,t) (FA g2 s2 fs2)
|
||||||
|
= FA (newEdges es g') s1 fs1
|
||||||
|
where
|
||||||
|
es = (f,ren s2,Nothing):[(ren f2,t,Nothing) | f2 <- fs2]
|
||||||
|
(g',ren) = mergeGraphs g1 g2
|
||||||
|
|
||||||
|
onGraph :: (Graph n a b -> Graph n c d) -> FA n a b -> FA n c d
|
||||||
|
onGraph f (FA g s ss) = FA (f g) s ss
|
||||||
|
|
||||||
|
|
||||||
|
-- | Make the finite automaton have a single final state
|
||||||
|
-- by adding a new final state and adding an edge
|
||||||
|
-- from the old final states to the new state.
|
||||||
|
oneFinalState :: a -- ^ Label to give the new node
|
||||||
|
-> b -- ^ Label to give the new edges
|
||||||
|
-> FA n a b -- ^ The old network
|
||||||
|
-> FA n a b -- ^ The new network
|
||||||
|
oneFinalState nl el fa =
|
||||||
|
let (FA g s fs,nf) = newState nl fa
|
||||||
|
es = [ (f,nf,el) | f <- fs ]
|
||||||
|
in FA (newEdges es g) s [nf]
|
||||||
|
|
||||||
|
-- | Transform a standard finite automaton with labelled edges
|
||||||
|
-- to one where the labels are on the nodes instead. This can add
|
||||||
|
-- up to one extra node per edge.
|
||||||
|
moveLabelsToNodes :: (Ord n,Eq a) => FA n () (Maybe a) -> FA n (Maybe a) ()
|
||||||
|
moveLabelsToNodes = onGraph f
|
||||||
|
where f g@(Graph c _ _) = Graph c' ns (concat ess)
|
||||||
|
where is = [ ((n,l),inc) | (n, (l,inc,_)) <- Map.toList (nodeInfo g)]
|
||||||
|
(c',is') = mapAccumL fixIncoming c is
|
||||||
|
(ns,ess) = unzip (concat is')
|
||||||
|
|
||||||
|
|
||||||
|
-- | Remove empty nodes which are not start or final, and have
|
||||||
|
-- exactly one outgoing edge or exactly one incoming edge.
|
||||||
|
removeTrivialEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
|
||||||
|
removeTrivialEmptyNodes = pruneUnusable . skipSimpleEmptyNodes
|
||||||
|
|
||||||
|
-- | Move edges to empty nodes to point to the next node(s).
|
||||||
|
-- This is not done if the pointed-to node is a final node.
|
||||||
|
skipSimpleEmptyNodes :: (Eq a, Ord n) => FA n (Maybe a) () -> FA n (Maybe a) ()
|
||||||
|
skipSimpleEmptyNodes fa = onGraph og fa
|
||||||
|
where
|
||||||
|
og g@(Graph c ns es) = if es' == es then g else og (Graph c ns es')
|
||||||
|
where
|
||||||
|
es' = concatMap changeEdge es
|
||||||
|
info = nodeInfo g
|
||||||
|
changeEdge e@(f,t,())
|
||||||
|
| isNothing (getNodeLabel info t)
|
||||||
|
-- && (i * o <= i + o)
|
||||||
|
&& not (isFinal fa t)
|
||||||
|
= [ (f,t',()) | (_,t',()) <- getOutgoing info t]
|
||||||
|
| otherwise = [e]
|
||||||
|
-- where i = inDegree info t
|
||||||
|
-- o = outDegree info t
|
||||||
|
|
||||||
|
isInternal :: Eq n => FA n a b -> n -> Bool
|
||||||
|
isInternal (FA _ start final) n = n /= start && n `notElem` final
|
||||||
|
|
||||||
|
isFinal :: Eq n => FA n a b -> n -> Bool
|
||||||
|
isFinal (FA _ _ final) n = n `elem` final
|
||||||
|
|
||||||
|
-- | Remove all internal nodes with no incoming edges
|
||||||
|
-- or no outgoing edges.
|
||||||
|
pruneUnusable :: Ord n => FA n (Maybe a) () -> FA n (Maybe a) ()
|
||||||
|
pruneUnusable fa = onGraph f fa
|
||||||
|
where
|
||||||
|
f g = if Set.null rns then g else f (removeNodes rns g)
|
||||||
|
where info = nodeInfo g
|
||||||
|
rns = Set.fromList [ n | (n,_) <- nodes g,
|
||||||
|
isInternal fa n,
|
||||||
|
inDegree info n == 0
|
||||||
|
|| outDegree info n == 0]
|
||||||
|
|
||||||
|
fixIncoming :: (Ord n, Eq a) => [n]
|
||||||
|
-> (Node n (),[Edge n (Maybe a)]) -- ^ A node and its incoming edges
|
||||||
|
-> ([n],[(Node n (Maybe a),[Edge n ()])]) -- ^ Replacement nodes with their
|
||||||
|
-- incoming edges.
|
||||||
|
fixIncoming cs c@((n,()),es) = (cs'', ((n,Nothing),es'):newContexts)
|
||||||
|
where ls = nub $ map edgeLabel es
|
||||||
|
(cs',cs'') = splitAt (length ls) cs
|
||||||
|
newNodes = zip cs' ls
|
||||||
|
es' = [ (x,n,()) | x <- map fst newNodes ]
|
||||||
|
-- separate cyclic and non-cyclic edges
|
||||||
|
(cyc,ncyc) = partition (\ (f,_,_) -> f == n) es
|
||||||
|
-- keep all incoming non-cyclic edges with the right label
|
||||||
|
to (x,l) = [ (f,x,()) | (f,_,l') <- ncyc, l == l']
|
||||||
|
-- for each cyclic edge with the right label,
|
||||||
|
-- add an edge from each of the new nodes (including this one)
|
||||||
|
++ [ (y,x,()) | (f,_,l') <- cyc, l == l', (y,_) <- newNodes]
|
||||||
|
newContexts = [ (v, to v) | v <- newNodes ]
|
||||||
|
|
||||||
|
alphabet :: Eq b => Graph n a (Maybe b) -> [b]
|
||||||
|
alphabet = nub . catMaybes . map edgeLabel . edges
|
||||||
|
|
||||||
|
determinize :: Ord a => NFA a -> DFA a
|
||||||
|
determinize (FA g s f) = let (ns,es) = h (Set.singleton start) Set.empty Set.empty
|
||||||
|
(ns',es') = (Set.toList ns, Set.toList es)
|
||||||
|
final = filter isDFAFinal ns'
|
||||||
|
fa = FA (Graph undefined [(n,()) | n <- ns'] es') start final
|
||||||
|
in renameStates [0..] fa
|
||||||
|
where info = nodeInfo g
|
||||||
|
-- reach = nodesReachable out
|
||||||
|
start = closure info $ Set.singleton s
|
||||||
|
isDFAFinal n = not (Set.null (Set.fromList f `Set.intersection` n))
|
||||||
|
h currentStates oldStates es
|
||||||
|
| Set.null currentStates = (oldStates,es)
|
||||||
|
| otherwise = ((h $! uniqueNewStates) $! allOldStates) $! es'
|
||||||
|
where
|
||||||
|
allOldStates = oldStates `Set.union` currentStates
|
||||||
|
(newStates,es') = new (Set.toList currentStates) Set.empty es
|
||||||
|
uniqueNewStates = newStates Set.\\ allOldStates
|
||||||
|
-- Get the sets of states reachable from the given states
|
||||||
|
-- by consuming one symbol, and the associated edges.
|
||||||
|
new [] rs es = (rs,es)
|
||||||
|
new (n:ns) rs es = new ns rs' es'
|
||||||
|
where cs = reachable info n --reachable reach n
|
||||||
|
rs' = rs `Set.union` Set.fromList (map snd cs)
|
||||||
|
es' = es `Set.union` Set.fromList [(n,s,c) | (c,s) <- cs]
|
||||||
|
|
||||||
|
|
||||||
|
-- | Get all the nodes reachable from a list of nodes by only empty edges.
|
||||||
|
closure :: Ord n => NodeInfo n a (Maybe b) -> Set n -> Set n
|
||||||
|
closure info x = closure_ x x
|
||||||
|
where closure_ acc check | Set.null check = acc
|
||||||
|
| otherwise = closure_ acc' check'
|
||||||
|
where
|
||||||
|
reach = Set.fromList [y | x <- Set.toList check,
|
||||||
|
(_,y,Nothing) <- getOutgoing info x]
|
||||||
|
acc' = acc `Set.union` reach
|
||||||
|
check' = reach Set.\\ acc
|
||||||
|
|
||||||
|
-- | Get a map of labels to sets of all nodes reachable
|
||||||
|
-- from a the set of nodes by one edge with the given
|
||||||
|
-- label and then any number of empty edges.
|
||||||
|
reachable :: (Ord n,Ord b) => NodeInfo n a (Maybe b) -> Set n -> [(b,Set n)]
|
||||||
|
reachable info ns = Map.toList $ Map.map (closure info . Set.fromList) $ reachable1 info ns
|
||||||
|
reachable1 info ns = Map.fromListWith (++) [(c, [y]) | n <- Set.toList ns, (_,y,Just c) <- getOutgoing info n]
|
||||||
|
|
||||||
|
reverseNFA :: NFA a -> NFA a
|
||||||
|
reverseNFA (FA g s fs) = FA g''' s' [s]
|
||||||
|
where g' = reverseGraph g
|
||||||
|
(g'',s') = newNode () g'
|
||||||
|
g''' = newEdges [(s',f,Nothing) | f <- fs] g''
|
||||||
|
|
||||||
|
dfa2nfa :: DFA a -> NFA a
|
||||||
|
dfa2nfa = mapTransitions Just
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Visualization
|
||||||
|
--
|
||||||
|
|
||||||
|
prFAGraphviz :: (Eq n,Show n) => FA n String String -> String
|
||||||
|
prFAGraphviz = Dot.prGraphviz . faToGraphviz
|
||||||
|
|
||||||
|
prFAGraphviz_ :: (Eq n,Show n,Show a, Show b) => FA n a b -> String
|
||||||
|
prFAGraphviz_ = Dot.prGraphviz . faToGraphviz . mapStates show . mapTransitions show
|
||||||
|
|
||||||
|
faToGraphviz :: (Eq n,Show n) => FA n String String -> Dot.Graph
|
||||||
|
faToGraphviz (FA (Graph _ ns es) s f)
|
||||||
|
= Dot.Graph Dot.Directed Nothing [] (map mkNode ns) (map mkEdge es) []
|
||||||
|
where mkNode (n,l) = Dot.Node (show n) attrs
|
||||||
|
where attrs = [("label",l)]
|
||||||
|
++ if n == s then [("shape","box")] else []
|
||||||
|
++ if n `elem` f then [("style","bold")] else []
|
||||||
|
mkEdge (x,y,l) = Dot.Edge (show x) (show y) [("label",l)]
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Utilities
|
||||||
|
--
|
||||||
|
|
||||||
|
lookups :: Ord k => [k] -> Map k a -> [a]
|
||||||
|
lookups xs m = mapMaybe (flip Map.lookup m) xs
|
||||||
178
src-3.0/GF/Speech/Graph.hs
Normal file
178
src-3.0/GF/Speech/Graph.hs
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : Graph
|
||||||
|
-- Maintainer : BB
|
||||||
|
-- Stability : (stable)
|
||||||
|
-- Portability : (portable)
|
||||||
|
--
|
||||||
|
-- > CVS $Date: 2005/11/10 16:43:44 $
|
||||||
|
-- > CVS $Author: bringert $
|
||||||
|
-- > CVS $Revision: 1.2 $
|
||||||
|
--
|
||||||
|
-- A simple graph module.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
module GF.Speech.Graph ( Graph(..), Node, Edge, NodeInfo
|
||||||
|
, newGraph, nodes, edges
|
||||||
|
, nmap, emap, newNode, newNodes, newEdge, newEdges
|
||||||
|
, insertEdgeWith
|
||||||
|
, removeNode, removeNodes
|
||||||
|
, nodeInfo
|
||||||
|
, getIncoming, getOutgoing, getNodeLabel
|
||||||
|
, inDegree, outDegree
|
||||||
|
, nodeLabel
|
||||||
|
, edgeFrom, edgeTo, edgeLabel
|
||||||
|
, reverseGraph, mergeGraphs, renameNodes
|
||||||
|
) where
|
||||||
|
|
||||||
|
import GF.Data.Utilities
|
||||||
|
|
||||||
|
import Data.List
|
||||||
|
import Data.Maybe
|
||||||
|
import Data.Map (Map)
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import Data.Set (Set)
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
|
||||||
|
data Graph n a b = Graph [n] ![Node n a] ![Edge n b]
|
||||||
|
deriving (Eq,Show)
|
||||||
|
|
||||||
|
type Node n a = (n,a)
|
||||||
|
type Edge n b = (n,n,b)
|
||||||
|
|
||||||
|
type NodeInfo n a b = Map n (a, [Edge n b], [Edge n b])
|
||||||
|
|
||||||
|
-- | Create a new empty graph.
|
||||||
|
newGraph :: [n] -> Graph n a b
|
||||||
|
newGraph ns = Graph ns [] []
|
||||||
|
|
||||||
|
-- | Get all the nodes in the graph.
|
||||||
|
nodes :: Graph n a b -> [Node n a]
|
||||||
|
nodes (Graph _ ns _) = ns
|
||||||
|
|
||||||
|
-- | Get all the edges in the graph.
|
||||||
|
edges :: Graph n a b -> [Edge n b]
|
||||||
|
edges (Graph _ _ es) = es
|
||||||
|
|
||||||
|
-- | Map a function over the node labels.
|
||||||
|
nmap :: (a -> c) -> Graph n a b -> Graph n c b
|
||||||
|
nmap f (Graph c ns es) = Graph c [(n,f l) | (n,l) <- ns] es
|
||||||
|
|
||||||
|
-- | Map a function over the edge labels.
|
||||||
|
emap :: (b -> c) -> Graph n a b -> Graph n a c
|
||||||
|
emap f (Graph c ns es) = Graph c ns [(x,y,f l) | (x,y,l) <- es]
|
||||||
|
|
||||||
|
-- | Add a node to the graph.
|
||||||
|
newNode :: a -- ^ Node label
|
||||||
|
-> Graph n a b
|
||||||
|
-> (Graph n a b,n) -- ^ Node graph and name of new node
|
||||||
|
newNode l (Graph (c:cs) ns es) = (Graph cs ((c,l):ns) es, c)
|
||||||
|
|
||||||
|
newNodes :: [a] -> Graph n a b -> (Graph n a b,[Node n a])
|
||||||
|
newNodes ls g = (g', zip ns ls)
|
||||||
|
where (g',ns) = mapAccumL (flip newNode) g ls
|
||||||
|
-- lazy version:
|
||||||
|
--newNodes ls (Graph cs ns es) = (Graph cs' (ns'++ns) es, ns')
|
||||||
|
-- where (xs,cs') = splitAt (length ls) cs
|
||||||
|
-- ns' = zip xs ls
|
||||||
|
|
||||||
|
newEdge :: Edge n b -> Graph n a b -> Graph n a b
|
||||||
|
newEdge e (Graph c ns es) = Graph c ns (e:es)
|
||||||
|
|
||||||
|
newEdges :: [Edge n b] -> Graph n a b -> Graph n a b
|
||||||
|
newEdges es g = foldl' (flip newEdge) g es
|
||||||
|
-- lazy version:
|
||||||
|
-- newEdges es' (Graph c ns es) = Graph c ns (es'++es)
|
||||||
|
|
||||||
|
insertEdgeWith :: Eq n =>
|
||||||
|
(b -> b -> b) -> Edge n b -> Graph n a b -> Graph n a b
|
||||||
|
insertEdgeWith f e@(x,y,l) (Graph c ns es) = Graph c ns (h es)
|
||||||
|
where h [] = [e]
|
||||||
|
h (e'@(x',y',l'):es') | x' == x && y' == y = (x',y', f l l'):es'
|
||||||
|
| otherwise = e':h es'
|
||||||
|
|
||||||
|
-- | Remove a node and all edges to and from that node.
|
||||||
|
removeNode :: Ord n => n -> Graph n a b -> Graph n a b
|
||||||
|
removeNode n = removeNodes (Set.singleton n)
|
||||||
|
|
||||||
|
-- | Remove a set of nodes and all edges to and from those nodes.
|
||||||
|
removeNodes :: Ord n => Set n -> Graph n a b -> Graph n a b
|
||||||
|
removeNodes xs (Graph c ns es) = Graph c ns' es'
|
||||||
|
where
|
||||||
|
keepNode n = not (Set.member n xs)
|
||||||
|
ns' = [ x | x@(n,_) <- ns, keepNode n ]
|
||||||
|
es' = [ e | e@(f,t,_) <- es, keepNode f && keepNode t ]
|
||||||
|
|
||||||
|
-- | Get a map of node names to info about each node.
|
||||||
|
nodeInfo :: Ord n => Graph n a b -> NodeInfo n a b
|
||||||
|
nodeInfo g = Map.fromList [ (n, (x, fn inc n, fn out n)) | (n,x) <- nodes g ]
|
||||||
|
where
|
||||||
|
inc = groupEdgesBy edgeTo g
|
||||||
|
out = groupEdgesBy edgeFrom g
|
||||||
|
fn m n = fromMaybe [] (Map.lookup n m)
|
||||||
|
|
||||||
|
groupEdgesBy :: (Ord n) => (Edge n b -> n) -- ^ Gets the node to group by
|
||||||
|
-> Graph n a b -> Map n [Edge n b]
|
||||||
|
groupEdgesBy f g = Map.fromListWith (++) [(f e, [e]) | e <- edges g]
|
||||||
|
|
||||||
|
lookupNode :: Ord n => NodeInfo n a b -> n -> (a, [Edge n b], [Edge n b])
|
||||||
|
lookupNode i n = fromJust $ Map.lookup n i
|
||||||
|
|
||||||
|
getIncoming :: Ord n => NodeInfo n a b -> n -> [Edge n b]
|
||||||
|
getIncoming i n = let (_,inc,_) = lookupNode i n in inc
|
||||||
|
|
||||||
|
getOutgoing :: Ord n => NodeInfo n a b -> n -> [Edge n b]
|
||||||
|
getOutgoing i n = let (_,_,out) = lookupNode i n in out
|
||||||
|
|
||||||
|
inDegree :: Ord n => NodeInfo n a b -> n -> Int
|
||||||
|
inDegree i n = length $ getIncoming i n
|
||||||
|
|
||||||
|
outDegree :: Ord n => NodeInfo n a b -> n -> Int
|
||||||
|
outDegree i n = length $ getOutgoing i n
|
||||||
|
|
||||||
|
getNodeLabel :: Ord n => NodeInfo n a b -> n -> a
|
||||||
|
getNodeLabel i n = let (l,_,_) = lookupNode i n in l
|
||||||
|
|
||||||
|
nodeLabel :: Node n a -> a
|
||||||
|
nodeLabel = snd
|
||||||
|
|
||||||
|
edgeFrom :: Edge n b -> n
|
||||||
|
edgeFrom (f,_,_) = f
|
||||||
|
|
||||||
|
edgeTo :: Edge n b -> n
|
||||||
|
edgeTo (_,t,_) = t
|
||||||
|
|
||||||
|
edgeLabel :: Edge n b -> b
|
||||||
|
edgeLabel (_,_,l) = l
|
||||||
|
|
||||||
|
reverseGraph :: Graph n a b -> Graph n a b
|
||||||
|
reverseGraph (Graph c ns es) = Graph c ns [ (t,f,l) | (f,t,l) <- es ]
|
||||||
|
|
||||||
|
-- | Add the nodes from the second graph to the first graph.
|
||||||
|
-- The nodes in the second graph will be renamed using the name
|
||||||
|
-- supply in the first graph.
|
||||||
|
-- This function is more efficient when the second graph
|
||||||
|
-- is smaller than the first.
|
||||||
|
mergeGraphs :: Ord m => Graph n a b -> Graph m a b
|
||||||
|
-> (Graph n a b, m -> n) -- ^ The new graph and a function translating
|
||||||
|
-- the old names of nodes in the second graph
|
||||||
|
-- to names in the new graph.
|
||||||
|
mergeGraphs (Graph c ns1 es1) g2 = (Graph c' (ns2++ns1) (es2++es1), newName)
|
||||||
|
where
|
||||||
|
(xs,c') = splitAt (length (nodes g2)) c
|
||||||
|
newNames = Map.fromList (zip (map fst (nodes g2)) xs)
|
||||||
|
newName n = fromJust $ Map.lookup n newNames
|
||||||
|
Graph _ ns2 es2 = renameNodes newName undefined g2
|
||||||
|
|
||||||
|
-- | Rename the nodes in the graph.
|
||||||
|
renameNodes :: (n -> m) -- ^ renaming function
|
||||||
|
-> [m] -- ^ infinite supply of fresh node names, to
|
||||||
|
-- use when adding nodes in the future.
|
||||||
|
-> Graph n a b -> Graph m a b
|
||||||
|
renameNodes newName c (Graph _ ns es) = Graph c ns' es'
|
||||||
|
where ns' = map' (\ (n,x) -> (newName n,x)) ns
|
||||||
|
es' = map' (\ (f,t,l) -> (newName f, newName t, l)) es
|
||||||
|
|
||||||
|
-- | A strict 'map'
|
||||||
|
map' :: (a -> b) -> [a] -> [b]
|
||||||
|
map' _ [] = []
|
||||||
|
map' f (x:xs) = ((:) $! f x) $! map' f xs
|
||||||
116
src-3.0/GF/Speech/Graphviz.hs
Normal file
116
src-3.0/GF/Speech/Graphviz.hs
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : Graphviz
|
||||||
|
-- Maintainer : BB
|
||||||
|
-- Stability : (stable)
|
||||||
|
-- Portability : (portable)
|
||||||
|
--
|
||||||
|
-- > CVS $Date: 2005/09/15 18:10:44 $
|
||||||
|
-- > CVS $Author: bringert $
|
||||||
|
-- > CVS $Revision: 1.2 $
|
||||||
|
--
|
||||||
|
-- Graphviz DOT format representation and printing.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
module GF.Speech.Graphviz (
|
||||||
|
Graph(..), GraphType(..),
|
||||||
|
Node(..), Edge(..),
|
||||||
|
Attr,
|
||||||
|
addSubGraphs,
|
||||||
|
setName,
|
||||||
|
setAttr,
|
||||||
|
prGraphviz
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.Char
|
||||||
|
|
||||||
|
import GF.Data.Utilities
|
||||||
|
|
||||||
|
-- | Graph type, graph ID, graph attirbutes, graph nodes, graph edges, subgraphs
|
||||||
|
data Graph = Graph {
|
||||||
|
gType :: GraphType,
|
||||||
|
gId :: Maybe String,
|
||||||
|
gAttrs :: [Attr],
|
||||||
|
gNodes :: [Node],
|
||||||
|
gEdges :: [Edge],
|
||||||
|
gSubgraphs :: [Graph]
|
||||||
|
}
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
|
data GraphType = Directed | Undirected
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
|
data Node = Node String [Attr]
|
||||||
|
deriving Show
|
||||||
|
|
||||||
|
data Edge = Edge String String [Attr]
|
||||||
|
deriving Show
|
||||||
|
|
||||||
|
type Attr = (String,String)
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Graph construction
|
||||||
|
--
|
||||||
|
|
||||||
|
addSubGraphs :: [Graph] -> Graph -> Graph
|
||||||
|
addSubGraphs gs g = g { gSubgraphs = gs ++ gSubgraphs g }
|
||||||
|
|
||||||
|
setName :: String -> Graph -> Graph
|
||||||
|
setName n g = g { gId = Just n }
|
||||||
|
|
||||||
|
setAttr :: String -> String -> Graph -> Graph
|
||||||
|
setAttr n v g = g { gAttrs = tableSet n v (gAttrs g) }
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Pretty-printing
|
||||||
|
--
|
||||||
|
|
||||||
|
prGraphviz :: Graph -> String
|
||||||
|
prGraphviz g@(Graph t i _ _ _ _) =
|
||||||
|
graphtype t ++ " " ++ maybe "" esc i ++ " {\n" ++ prGraph g ++ "}\n"
|
||||||
|
|
||||||
|
prSubGraph :: Graph -> String
|
||||||
|
prSubGraph g@(Graph _ i _ _ _ _) =
|
||||||
|
"subgraph" ++ " " ++ maybe "" esc i ++ " {\n" ++ prGraph g ++ "}"
|
||||||
|
|
||||||
|
prGraph :: Graph -> String
|
||||||
|
prGraph (Graph t id at ns es ss) =
|
||||||
|
unlines $ map (++";") (map prAttr at
|
||||||
|
++ map prNode ns
|
||||||
|
++ map (prEdge t) es
|
||||||
|
++ map prSubGraph ss)
|
||||||
|
|
||||||
|
graphtype :: GraphType -> String
|
||||||
|
graphtype Directed = "digraph"
|
||||||
|
graphtype Undirected = "graph"
|
||||||
|
|
||||||
|
prNode :: Node -> String
|
||||||
|
prNode (Node n at) = esc n ++ " " ++ prAttrList at
|
||||||
|
|
||||||
|
prEdge :: GraphType -> Edge -> String
|
||||||
|
prEdge t (Edge x y at) = esc x ++ " " ++ edgeop t ++ " " ++ esc y ++ " " ++ prAttrList at
|
||||||
|
|
||||||
|
edgeop :: GraphType -> String
|
||||||
|
edgeop Directed = "->"
|
||||||
|
edgeop Undirected = "--"
|
||||||
|
|
||||||
|
prAttrList :: [Attr] -> String
|
||||||
|
prAttrList [] = ""
|
||||||
|
prAttrList at = "[" ++ join "," (map prAttr at) ++ "]"
|
||||||
|
|
||||||
|
prAttr :: Attr -> String
|
||||||
|
prAttr (n,v) = esc n ++ " = " ++ esc v
|
||||||
|
|
||||||
|
esc :: String -> String
|
||||||
|
esc s | needEsc s = "\"" ++ concat [ if shouldEsc c then ['\\',c] else [c] | c <- s ] ++ "\""
|
||||||
|
| otherwise = s
|
||||||
|
where shouldEsc = (`elem` ['"', '\\'])
|
||||||
|
|
||||||
|
needEsc :: String -> Bool
|
||||||
|
needEsc [] = True
|
||||||
|
needEsc xs | all isDigit xs = False
|
||||||
|
needEsc (x:xs) = not (isIDFirst x && all isIDChar xs)
|
||||||
|
|
||||||
|
isIDFirst, isIDChar :: Char -> Bool
|
||||||
|
isIDFirst c = c `elem` (['_']++['a'..'z']++['A'..'Z'])
|
||||||
|
isIDChar c = isIDFirst c || isDigit c
|
||||||
75
src-3.0/GF/Speech/PGFToCFG.hs
Normal file
75
src-3.0/GF/Speech/PGFToCFG.hs
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : GF.Speech.PGFToCFG
|
||||||
|
--
|
||||||
|
-- Approximates PGF grammars with context-free grammars.
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
module GF.Speech.PGFToCFG (pgfToCFG) where
|
||||||
|
|
||||||
|
import PGF.CId
|
||||||
|
import PGF.Data as PGF
|
||||||
|
import PGF.Macros
|
||||||
|
import GF.Infra.Ident
|
||||||
|
import GF.Speech.CFG
|
||||||
|
|
||||||
|
import Data.Array as Array
|
||||||
|
import Data.Map (Map)
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import Data.Maybe
|
||||||
|
import Data.Set (Set)
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
|
||||||
|
|
||||||
|
pgfToCFG :: PGF
|
||||||
|
-> CId -- ^ Concrete syntax name
|
||||||
|
-> CFG
|
||||||
|
pgfToCFG pgf lang = mkCFG (lookStartCat pgf) extCats (startRules ++ concatMap fruleToCFRule rules)
|
||||||
|
where
|
||||||
|
pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang)
|
||||||
|
|
||||||
|
rules :: [FRule]
|
||||||
|
rules = Array.elems (PGF.allRules pinfo)
|
||||||
|
|
||||||
|
fcatGFCats :: Map FCat CId
|
||||||
|
fcatGFCats = Map.fromList [(fc,c) | (c,fcs) <- Map.toList (startupCats pinfo), fc <- fcs]
|
||||||
|
|
||||||
|
fcatGFCat :: FCat -> CId
|
||||||
|
fcatGFCat c = fromMaybe (mkCId "Unknown") (Map.lookup c fcatGFCats)
|
||||||
|
|
||||||
|
fcatToCat :: FCat -> FIndex -> Cat
|
||||||
|
fcatToCat c l = prCId (fcatGFCat c) ++ "_" ++ show c ++ "_" ++ show l
|
||||||
|
|
||||||
|
extCats :: Set Cat
|
||||||
|
extCats = Set.fromList $ map lhsCat startRules
|
||||||
|
|
||||||
|
-- NOTE: this is only correct for cats that have a lincat with exactly one row.
|
||||||
|
startRules :: [CFRule]
|
||||||
|
startRules = [CFRule (prCId c) [NonTerminal (fcatToCat fc 0)] (CFRes 0)
|
||||||
|
| (c,fcs) <- Map.toList (startupCats pinfo), fc <- fcs]
|
||||||
|
|
||||||
|
fruleToCFRule :: FRule -> [CFRule]
|
||||||
|
fruleToCFRule (FRule f ps args c rhs) =
|
||||||
|
[CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm (map (fixProfile row) ps))
|
||||||
|
| (l,row) <- Array.assocs rhs]
|
||||||
|
where
|
||||||
|
mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
|
||||||
|
mkRhs = map fsymbolToSymbol . Array.elems
|
||||||
|
|
||||||
|
fsymbolToSymbol :: FSymbol -> CFSymbol
|
||||||
|
fsymbolToSymbol (FSymCat l n) = NonTerminal (fcatToCat (args!!n) l)
|
||||||
|
fsymbolToSymbol (FSymTok t) = Terminal t
|
||||||
|
|
||||||
|
fixProfile :: Array FPointPos FSymbol -> Profile -> Profile
|
||||||
|
fixProfile row = concatMap positions
|
||||||
|
where
|
||||||
|
nts = zip [0..] [nt | nt@(FSymCat _ _) <- Array.elems row ]
|
||||||
|
positions i = [k | (k,FSymCat _ j) <- nts, j == i]
|
||||||
|
|
||||||
|
profilesToTerm :: [Profile] -> CFTerm
|
||||||
|
profilesToTerm [[n]] | f == wildCId = CFRes n
|
||||||
|
profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps)
|
||||||
|
where (argTypes,_) = catSkeleton $ lookType pgf f
|
||||||
|
|
||||||
|
profileToTerm :: CId -> Profile -> CFTerm
|
||||||
|
profileToTerm t [] = CFMeta t
|
||||||
|
profileToTerm _ xs = CFRes (last xs) -- FIXME: unify
|
||||||
143
src-3.0/GF/Speech/RegExp.hs
Normal file
143
src-3.0/GF/Speech/RegExp.hs
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
module GF.Speech.RegExp (RE(..),
|
||||||
|
epsilonRE, nullRE,
|
||||||
|
isEpsilon, isNull,
|
||||||
|
unionRE, concatRE, seqRE,
|
||||||
|
repeatRE, minimizeRE,
|
||||||
|
mapRE, mapRE', joinRE,
|
||||||
|
symbolsRE,
|
||||||
|
dfa2re, prRE) where
|
||||||
|
|
||||||
|
import Data.List
|
||||||
|
|
||||||
|
import GF.Data.Utilities
|
||||||
|
import GF.Speech.FiniteState
|
||||||
|
|
||||||
|
data RE a =
|
||||||
|
REUnion [RE a] -- ^ REUnion [] is null
|
||||||
|
| REConcat [RE a] -- ^ REConcat [] is epsilon
|
||||||
|
| RERepeat (RE a)
|
||||||
|
| RESymbol a
|
||||||
|
deriving (Eq,Ord,Show)
|
||||||
|
|
||||||
|
|
||||||
|
dfa2re :: (Ord a) => DFA a -> RE a
|
||||||
|
dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
|
||||||
|
. oneFinalState () epsilonRE . mapTransitions RESymbol
|
||||||
|
where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
|
||||||
|
merge es = [(f,t,unionRE ls)
|
||||||
|
| ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
|
||||||
|
|
||||||
|
elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a)
|
||||||
|
elimStates fa =
|
||||||
|
case [s | (s,_) <- states fa, isInternal fa s] of
|
||||||
|
[] -> fa
|
||||||
|
sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa
|
||||||
|
where sAs = nonLoopTransitionsTo sE fa
|
||||||
|
sBs = nonLoopTransitionsFrom sE fa
|
||||||
|
r2 = unionRE $ loops sE fa
|
||||||
|
ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
|
||||||
|
r r1 r3 = concatRE [r1, repeatRE r2, r3]
|
||||||
|
|
||||||
|
epsilonRE :: RE a
|
||||||
|
epsilonRE = REConcat []
|
||||||
|
|
||||||
|
nullRE :: RE a
|
||||||
|
nullRE = REUnion []
|
||||||
|
|
||||||
|
isNull :: RE a -> Bool
|
||||||
|
isNull (REUnion []) = True
|
||||||
|
isNull _ = False
|
||||||
|
|
||||||
|
isEpsilon :: RE a -> Bool
|
||||||
|
isEpsilon (REConcat []) = True
|
||||||
|
isEpsilon _ = False
|
||||||
|
|
||||||
|
unionRE :: Ord a => [RE a] -> RE a
|
||||||
|
unionRE = unionOrId . sortNub . concatMap toList
|
||||||
|
where
|
||||||
|
toList (REUnion xs) = xs
|
||||||
|
toList x = [x]
|
||||||
|
unionOrId [r] = r
|
||||||
|
unionOrId rs = REUnion rs
|
||||||
|
|
||||||
|
concatRE :: [RE a] -> RE a
|
||||||
|
concatRE xs | any isNull xs = nullRE
|
||||||
|
| otherwise = case concatMap toList xs of
|
||||||
|
[r] -> r
|
||||||
|
rs -> REConcat rs
|
||||||
|
where
|
||||||
|
toList (REConcat xs) = xs
|
||||||
|
toList x = [x]
|
||||||
|
|
||||||
|
seqRE :: [a] -> RE a
|
||||||
|
seqRE = concatRE . map RESymbol
|
||||||
|
|
||||||
|
repeatRE :: RE a -> RE a
|
||||||
|
repeatRE x | isNull x || isEpsilon x = epsilonRE
|
||||||
|
| otherwise = RERepeat x
|
||||||
|
|
||||||
|
finalRE :: Ord a => DFA (RE a) -> RE a
|
||||||
|
finalRE fa = concatRE [repeatRE r1, r2,
|
||||||
|
repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
|
||||||
|
where
|
||||||
|
s0 = startState fa
|
||||||
|
[sF] = finalStates fa
|
||||||
|
r1 = unionRE $ loops s0 fa
|
||||||
|
r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa
|
||||||
|
r3 = unionRE $ loops sF fa
|
||||||
|
r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
|
||||||
|
|
||||||
|
reverseRE :: RE a -> RE a
|
||||||
|
reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs
|
||||||
|
reverseRE (REUnion xs) = REUnion (map reverseRE xs)
|
||||||
|
reverseRE (RERepeat x) = RERepeat (reverseRE x)
|
||||||
|
reverseRE x = x
|
||||||
|
|
||||||
|
minimizeRE :: Ord a => RE a -> RE a
|
||||||
|
minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward
|
||||||
|
|
||||||
|
mergeForward :: Ord a => RE a -> RE a
|
||||||
|
mergeForward (REUnion xs) =
|
||||||
|
unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)]
|
||||||
|
mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)]
|
||||||
|
mergeForward (RERepeat r) = repeatRE (mergeForward r)
|
||||||
|
mergeForward r = r
|
||||||
|
|
||||||
|
firstRE :: RE a -> (RE a, RE a)
|
||||||
|
firstRE (REConcat (x:xs)) = (x, REConcat xs)
|
||||||
|
firstRE r = (r,epsilonRE)
|
||||||
|
|
||||||
|
mapRE :: (a -> b) -> RE a -> RE b
|
||||||
|
mapRE f = mapRE' (RESymbol . f)
|
||||||
|
|
||||||
|
mapRE' :: (a -> RE b) -> RE a -> RE b
|
||||||
|
mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs)
|
||||||
|
mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs)
|
||||||
|
mapRE' f (RERepeat x) = RERepeat (mapRE' f x)
|
||||||
|
mapRE' f (RESymbol s) = f s
|
||||||
|
|
||||||
|
joinRE :: RE (RE a) -> RE a
|
||||||
|
joinRE (REConcat xs) = REConcat (map joinRE xs)
|
||||||
|
joinRE (REUnion xs) = REUnion (map joinRE xs)
|
||||||
|
joinRE (RERepeat xs) = RERepeat (joinRE xs)
|
||||||
|
joinRE (RESymbol ss) = ss
|
||||||
|
|
||||||
|
symbolsRE :: RE a -> [a]
|
||||||
|
symbolsRE (REConcat xs) = concatMap symbolsRE xs
|
||||||
|
symbolsRE (REUnion xs) = concatMap symbolsRE xs
|
||||||
|
symbolsRE (RERepeat x) = symbolsRE x
|
||||||
|
symbolsRE (RESymbol x) = [x]
|
||||||
|
|
||||||
|
-- Debugging
|
||||||
|
|
||||||
|
prRE :: RE String -> String
|
||||||
|
prRE = prRE' 0
|
||||||
|
|
||||||
|
prRE' _ (REUnion []) = "<NULL>"
|
||||||
|
prRE' n (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1) xs)))
|
||||||
|
prRE' n (REConcat xs) = p n 2 (unwords (map (prRE' 2) xs))
|
||||||
|
prRE' n (RERepeat x) = p n 3 (prRE' 3 x) ++ "*"
|
||||||
|
prRE' _ (RESymbol s) = s
|
||||||
|
|
||||||
|
p n m s | n >= m = "(" ++ s ++ ")"
|
||||||
|
| True = s
|
||||||
130
src-3.0/GF/Speech/Relation.hs
Normal file
130
src-3.0/GF/Speech/Relation.hs
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : Relation
|
||||||
|
-- Maintainer : BB
|
||||||
|
-- Stability : (stable)
|
||||||
|
-- Portability : (portable)
|
||||||
|
--
|
||||||
|
-- > CVS $Date: 2005/10/26 17:13:13 $
|
||||||
|
-- > CVS $Author: bringert $
|
||||||
|
-- > CVS $Revision: 1.1 $
|
||||||
|
--
|
||||||
|
-- A simple module for relations.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
module GF.Speech.Relation (Rel, mkRel, mkRel'
|
||||||
|
, allRelated , isRelatedTo
|
||||||
|
, transitiveClosure
|
||||||
|
, reflexiveClosure, reflexiveClosure_
|
||||||
|
, symmetricClosure
|
||||||
|
, symmetricSubrelation, reflexiveSubrelation
|
||||||
|
, reflexiveElements
|
||||||
|
, equivalenceClasses
|
||||||
|
, isTransitive, isReflexive, isSymmetric
|
||||||
|
, isEquivalence
|
||||||
|
, isSubRelationOf) where
|
||||||
|
|
||||||
|
import Data.List
|
||||||
|
import Data.Maybe
|
||||||
|
import Data.Map (Map)
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import Data.Set (Set)
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
|
||||||
|
import GF.Data.Utilities
|
||||||
|
|
||||||
|
type Rel a = Map a (Set a)
|
||||||
|
|
||||||
|
-- | Creates a relation from a list of related pairs.
|
||||||
|
mkRel :: Ord a => [(a,a)] -> Rel a
|
||||||
|
mkRel ps = relates ps Map.empty
|
||||||
|
|
||||||
|
-- | Creates a relation from a list pairs of elements and the elements
|
||||||
|
-- related to them.
|
||||||
|
mkRel' :: Ord a => [(a,[a])] -> Rel a
|
||||||
|
mkRel' xs = Map.fromListWith Set.union [(x,Set.fromList ys) | (x,ys) <- xs]
|
||||||
|
|
||||||
|
relToList :: Rel a -> [(a,a)]
|
||||||
|
relToList r = [ (x,y) | (x,ys) <- Map.toList r, y <- Set.toList ys ]
|
||||||
|
|
||||||
|
-- | Add a pair to the relation.
|
||||||
|
relate :: Ord a => a -> a -> Rel a -> Rel a
|
||||||
|
relate x y r = Map.insertWith Set.union x (Set.singleton y) r
|
||||||
|
|
||||||
|
-- | Add a list of pairs to the relation.
|
||||||
|
relates :: Ord a => [(a,a)] -> Rel a -> Rel a
|
||||||
|
relates ps r = foldl (\r' (x,y) -> relate x y r') r ps
|
||||||
|
|
||||||
|
-- | Checks if an element is related to another.
|
||||||
|
isRelatedTo :: Ord a => Rel a -> a -> a -> Bool
|
||||||
|
isRelatedTo r x y = maybe False (y `Set.member`) (Map.lookup x r)
|
||||||
|
|
||||||
|
-- | Get the set of elements to which a given element is related.
|
||||||
|
allRelated :: Ord a => Rel a -> a -> Set a
|
||||||
|
allRelated r x = fromMaybe Set.empty (Map.lookup x r)
|
||||||
|
|
||||||
|
-- | Get all elements in the relation.
|
||||||
|
domain :: Ord a => Rel a -> Set a
|
||||||
|
domain r = foldl Set.union (Map.keysSet r) (Map.elems r)
|
||||||
|
|
||||||
|
-- | Keep only pairs for which both elements are in the given set.
|
||||||
|
intersectSetRel :: Ord a => Set a -> Rel a -> Rel a
|
||||||
|
intersectSetRel s = filterRel (\x y -> x `Set.member` s && y `Set.member` s)
|
||||||
|
|
||||||
|
transitiveClosure :: Ord a => Rel a -> Rel a
|
||||||
|
transitiveClosure r = fix (Map.map growSet) r
|
||||||
|
where growSet ys = foldl Set.union ys (map (allRelated r) $ Set.toList ys)
|
||||||
|
|
||||||
|
reflexiveClosure_ :: Ord a => [a] -- ^ The set over which the relation is defined.
|
||||||
|
-> Rel a -> Rel a
|
||||||
|
reflexiveClosure_ u r = relates [(x,x) | x <- u] r
|
||||||
|
|
||||||
|
-- | Uses 'domain'
|
||||||
|
reflexiveClosure :: Ord a => Rel a -> Rel a
|
||||||
|
reflexiveClosure r = reflexiveClosure_ (Set.toList $ domain r) r
|
||||||
|
|
||||||
|
symmetricClosure :: Ord a => Rel a -> Rel a
|
||||||
|
symmetricClosure r = relates [ (y,x) | (x,y) <- relToList r ] r
|
||||||
|
|
||||||
|
symmetricSubrelation :: Ord a => Rel a -> Rel a
|
||||||
|
symmetricSubrelation r = filterRel (flip $ isRelatedTo r) r
|
||||||
|
|
||||||
|
reflexiveSubrelation :: Ord a => Rel a -> Rel a
|
||||||
|
reflexiveSubrelation r = intersectSetRel (reflexiveElements r) r
|
||||||
|
|
||||||
|
-- | Get the set of elements which are related to themselves.
|
||||||
|
reflexiveElements :: Ord a => Rel a -> Set a
|
||||||
|
reflexiveElements r = Set.fromList [ x | (x,ys) <- Map.toList r, x `Set.member` ys ]
|
||||||
|
|
||||||
|
-- | Keep the related pairs for which the predicate is true.
|
||||||
|
filterRel :: Ord a => (a -> a -> Bool) -> Rel a -> Rel a
|
||||||
|
filterRel p = purgeEmpty . Map.mapWithKey (Set.filter . p)
|
||||||
|
|
||||||
|
-- | Remove keys that map to no elements.
|
||||||
|
purgeEmpty :: Ord a => Rel a -> Rel a
|
||||||
|
purgeEmpty r = Map.filter (not . Set.null) r
|
||||||
|
|
||||||
|
|
||||||
|
-- | Get the equivalence classes from an equivalence relation.
|
||||||
|
equivalenceClasses :: Ord a => Rel a -> [Set a]
|
||||||
|
equivalenceClasses r = equivalenceClasses_ (Map.keys r) r
|
||||||
|
where equivalenceClasses_ [] _ = []
|
||||||
|
equivalenceClasses_ (x:xs) r = ys:equivalenceClasses_ zs r
|
||||||
|
where ys = allRelated r x
|
||||||
|
zs = [x' | x' <- xs, not (x' `Set.member` ys)]
|
||||||
|
|
||||||
|
isTransitive :: Ord a => Rel a -> Bool
|
||||||
|
isTransitive r = and [z `Set.member` ys | (x,ys) <- Map.toList r,
|
||||||
|
y <- Set.toList ys, z <- Set.toList (allRelated r y)]
|
||||||
|
|
||||||
|
isReflexive :: Ord a => Rel a -> Bool
|
||||||
|
isReflexive r = all (\ (x,ys) -> x `Set.member` ys) (Map.toList r)
|
||||||
|
|
||||||
|
isSymmetric :: Ord a => Rel a -> Bool
|
||||||
|
isSymmetric r = and [isRelatedTo r y x | (x,y) <- relToList r]
|
||||||
|
|
||||||
|
isEquivalence :: Ord a => Rel a -> Bool
|
||||||
|
isEquivalence r = isReflexive r && isSymmetric r && isTransitive r
|
||||||
|
|
||||||
|
isSubRelationOf :: Ord a => Rel a -> Rel a -> Bool
|
||||||
|
isSubRelationOf r1 r2 = all (uncurry (isRelatedTo r2)) (relToList r1)
|
||||||
80
src-3.0/GF/Speech/SISR.hs
Normal file
80
src-3.0/GF/Speech/SISR.hs
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : GF.Speech.SISR
|
||||||
|
--
|
||||||
|
-- Abstract syntax and pretty printer for SISR,
|
||||||
|
-- (Semantic Interpretation for Speech Recognition)
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
module GF.Speech.SISR (SISRFormat(..), SISRTag, prSISR,
|
||||||
|
topCatSISR, profileInitSISR, catSISR, profileFinalSISR) where
|
||||||
|
|
||||||
|
import Data.List
|
||||||
|
|
||||||
|
import GF.Data.Utilities
|
||||||
|
import GF.Infra.Ident
|
||||||
|
import GF.Speech.CFG
|
||||||
|
import GF.Speech.SRG (SRGNT)
|
||||||
|
import PGF.CId
|
||||||
|
|
||||||
|
import qualified GF.JavaScript.AbsJS as JS
|
||||||
|
import qualified GF.JavaScript.PrintJS as JS
|
||||||
|
|
||||||
|
data SISRFormat =
|
||||||
|
-- SISR Working draft 1 April 2003
|
||||||
|
-- http://www.w3.org/TR/2003/WD-semantic-interpretation-20030401/
|
||||||
|
SISROld
|
||||||
|
deriving Show
|
||||||
|
|
||||||
|
type SISRTag = [JS.DeclOrExpr]
|
||||||
|
|
||||||
|
|
||||||
|
prSISR :: SISRTag -> String
|
||||||
|
prSISR = JS.printTree
|
||||||
|
|
||||||
|
topCatSISR :: String -> SISRFormat -> SISRTag
|
||||||
|
topCatSISR c fmt = map JS.DExpr [fmtOut fmt `ass` fmtRef fmt c]
|
||||||
|
|
||||||
|
profileInitSISR :: CFTerm -> SISRFormat -> SISRTag
|
||||||
|
profileInitSISR t fmt
|
||||||
|
| null (usedArgs t) = []
|
||||||
|
| otherwise = [JS.Decl [JS.DInit args (JS.EArray [])]]
|
||||||
|
|
||||||
|
usedArgs :: CFTerm -> [Int]
|
||||||
|
usedArgs (CFObj _ ts) = foldr union [] (map usedArgs ts)
|
||||||
|
usedArgs (CFAbs _ x) = usedArgs x
|
||||||
|
usedArgs (CFApp x y) = usedArgs x `union` usedArgs y
|
||||||
|
usedArgs (CFRes i) = [i]
|
||||||
|
usedArgs _ = []
|
||||||
|
|
||||||
|
catSISR :: CFTerm -> SRGNT -> SISRFormat -> SISRTag
|
||||||
|
catSISR t (c,i) fmt
|
||||||
|
| i `elem` usedArgs t = map JS.DExpr
|
||||||
|
[JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i)) `ass` fmtRef fmt c]
|
||||||
|
| otherwise = []
|
||||||
|
|
||||||
|
profileFinalSISR :: CFTerm -> SISRFormat -> SISRTag
|
||||||
|
profileFinalSISR term fmt = [JS.DExpr $ fmtOut fmt `ass` f term]
|
||||||
|
where
|
||||||
|
f (CFObj n ts) = tree (prCId n) (map f ts)
|
||||||
|
f (CFAbs v x) = JS.EFun [var v] [JS.SReturn (f x)]
|
||||||
|
f (CFApp x y) = JS.ECall (f x) [f y]
|
||||||
|
f (CFRes i) = JS.EIndex (JS.EVar args) (JS.EInt (fromIntegral i))
|
||||||
|
f (CFVar v) = JS.EVar (var v)
|
||||||
|
f (CFMeta typ) = obj [("name",JS.EStr "?"), ("type",JS.EStr (prCId typ))]
|
||||||
|
|
||||||
|
fmtOut SISROld = JS.EVar (JS.Ident "$")
|
||||||
|
|
||||||
|
fmtRef SISROld c = JS.EVar (JS.Ident ("$" ++ c))
|
||||||
|
|
||||||
|
args = JS.Ident "a"
|
||||||
|
|
||||||
|
var v = JS.Ident ("x" ++ show v)
|
||||||
|
|
||||||
|
field x y = JS.EMember x (JS.Ident y)
|
||||||
|
|
||||||
|
ass = JS.EAssign
|
||||||
|
|
||||||
|
tree n xs = obj [("name", JS.EStr n), ("args", JS.EArray xs)]
|
||||||
|
|
||||||
|
obj ps = JS.EObj [JS.Prop (JS.StringPropName x) y | (x,y) <- ps]
|
||||||
|
|
||||||
175
src-3.0/GF/Speech/SRG.hs
Normal file
175
src-3.0/GF/Speech/SRG.hs
Normal file
@@ -0,0 +1,175 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : SRG
|
||||||
|
--
|
||||||
|
-- Representation of, conversion to, and utilities for
|
||||||
|
-- printing of a general Speech Recognition Grammar.
|
||||||
|
--
|
||||||
|
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
|
||||||
|
-- categories in the grammar
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem
|
||||||
|
, SRGNT, CFTerm
|
||||||
|
, makeSRG
|
||||||
|
, makeSimpleSRG
|
||||||
|
, makeNonRecursiveSRG
|
||||||
|
, lookupFM_, prtS
|
||||||
|
) where
|
||||||
|
|
||||||
|
import GF.Data.Operations
|
||||||
|
import GF.Data.Utilities
|
||||||
|
import GF.Infra.Ident
|
||||||
|
import GF.Infra.PrintClass
|
||||||
|
import GF.Speech.CFG
|
||||||
|
import GF.Speech.PGFToCFG
|
||||||
|
import GF.Speech.Relation
|
||||||
|
import GF.Speech.FiniteState
|
||||||
|
import GF.Speech.RegExp
|
||||||
|
import GF.Speech.CFGToFA
|
||||||
|
import GF.Infra.Option
|
||||||
|
import PGF.CId
|
||||||
|
import PGF.Data
|
||||||
|
import PGF.Macros
|
||||||
|
|
||||||
|
import Data.List
|
||||||
|
import Data.Maybe (fromMaybe, maybeToList)
|
||||||
|
import Data.Map (Map)
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import Data.Set (Set)
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
|
||||||
|
import Debug.Trace
|
||||||
|
|
||||||
|
data SRG = SRG { srgName :: String -- ^ grammar name
|
||||||
|
, srgStartCat :: Cat -- ^ start category name
|
||||||
|
, srgExternalCats :: Set Cat
|
||||||
|
, srgLanguage :: Maybe String -- ^ The language for which the grammar
|
||||||
|
-- is intended, e.g. en-UK
|
||||||
|
, srgRules :: [SRGRule]
|
||||||
|
}
|
||||||
|
deriving (Eq,Show)
|
||||||
|
|
||||||
|
data SRGRule = SRGRule Cat [SRGAlt] -- ^ SRG category name, original category name
|
||||||
|
-- and productions
|
||||||
|
deriving (Eq,Show)
|
||||||
|
|
||||||
|
-- | maybe a probability, a rule name and an EBNF right-hand side
|
||||||
|
data SRGAlt = SRGAlt (Maybe Double) CFTerm SRGItem
|
||||||
|
deriving (Eq,Show)
|
||||||
|
|
||||||
|
type SRGItem = RE SRGSymbol
|
||||||
|
|
||||||
|
type SRGSymbol = Symbol SRGNT Token
|
||||||
|
|
||||||
|
-- | An SRG non-terminal. Category name and its number in the profile.
|
||||||
|
type SRGNT = (Cat, Int)
|
||||||
|
|
||||||
|
|
||||||
|
-- | Create a non-left-recursive SRG.
|
||||||
|
-- FIXME: the probabilities in the returned
|
||||||
|
-- grammar may be meaningless.
|
||||||
|
makeSimpleSRG :: PGF
|
||||||
|
-> CId -- ^ Concrete syntax name.
|
||||||
|
-> SRG
|
||||||
|
makeSimpleSRG = makeSRG preprocess
|
||||||
|
where
|
||||||
|
preprocess = traceStats "After mergeIdentical"
|
||||||
|
. mergeIdentical
|
||||||
|
. traceStats "After removeLeftRecursion"
|
||||||
|
. removeLeftRecursion
|
||||||
|
. traceStats "After topDownFilter"
|
||||||
|
. topDownFilter
|
||||||
|
. traceStats "After bottomUpFilter"
|
||||||
|
. bottomUpFilter
|
||||||
|
. traceStats "After removeCycles"
|
||||||
|
. removeCycles
|
||||||
|
. traceStats "Inital CFG"
|
||||||
|
|
||||||
|
traceStats s g = trace ("---- " ++ s ++ ": " ++ stats g {- ++ "\n" ++ prCFRules g ++ "----" -}) g
|
||||||
|
|
||||||
|
stats g = "Categories: " ++ show (countCats g)
|
||||||
|
++ " Rules: " ++ show (countRules g)
|
||||||
|
|
||||||
|
makeNonRecursiveSRG :: PGF
|
||||||
|
-> CId -- ^ Concrete syntax name.
|
||||||
|
-> SRG
|
||||||
|
makeNonRecursiveSRG pgf cnc =
|
||||||
|
SRG { srgName = prCId cnc,
|
||||||
|
srgStartCat = start,
|
||||||
|
srgExternalCats = cfgExternalCats cfg,
|
||||||
|
srgLanguage = getSpeechLanguage pgf cnc,
|
||||||
|
srgRules = rs }
|
||||||
|
where
|
||||||
|
cfg = pgfToCFG pgf cnc
|
||||||
|
MFA start dfas = cfgToMFA cfg
|
||||||
|
rs = [SRGRule l [SRGAlt Nothing dummyCFTerm (dfaToSRGItem dfa)] | (l,dfa) <- dfas]
|
||||||
|
where dfaToSRGItem = mapRE dummySRGNT . minimizeRE . dfa2re
|
||||||
|
dummyCFTerm = CFMeta (mkCId "dummy")
|
||||||
|
dummySRGNT = mapSymbol (\c -> (c,0)) id
|
||||||
|
|
||||||
|
makeSRG :: (CFG -> CFG)
|
||||||
|
-> PGF
|
||||||
|
-> CId -- ^ Concrete syntax name.
|
||||||
|
-> SRG
|
||||||
|
makeSRG preprocess pgf cnc =
|
||||||
|
SRG { srgName = prCId cnc,
|
||||||
|
srgStartCat = cfgStartCat cfg,
|
||||||
|
srgExternalCats = cfgExternalCats cfg,
|
||||||
|
srgLanguage = getSpeechLanguage pgf cnc,
|
||||||
|
srgRules = rs }
|
||||||
|
where
|
||||||
|
cfg = pgfToCFG pgf cnc
|
||||||
|
(_,cfgRules) = unzip $ allRulesGrouped $ preprocess cfg
|
||||||
|
rs = map cfRulesToSRGRule cfgRules
|
||||||
|
|
||||||
|
getSpeechLanguage :: PGF -> CId -> Maybe String
|
||||||
|
getSpeechLanguage pgf cnc = fmap (replace '_' '-') $ lookConcrFlag pgf cnc (mkCId "language")
|
||||||
|
|
||||||
|
cfRulesToSRGRule :: [CFRule] -> SRGRule
|
||||||
|
cfRulesToSRGRule rs@(r:_) = SRGRule (lhsCat r) rhs
|
||||||
|
where
|
||||||
|
alts = [((n,Nothing),mkSRGSymbols 0 ss) | CFRule c ss n <- rs]
|
||||||
|
rhs = [SRGAlt p n (srgItem sss) | ((n,p),sss) <- buildMultiMap alts ]
|
||||||
|
|
||||||
|
mkSRGSymbols _ [] = []
|
||||||
|
mkSRGSymbols i (NonTerminal c:ss) = NonTerminal (c,i) : mkSRGSymbols (i+1) ss
|
||||||
|
mkSRGSymbols i (Terminal t:ss) = Terminal t : mkSRGSymbols i ss
|
||||||
|
|
||||||
|
allSRGCats :: SRG -> [String]
|
||||||
|
allSRGCats SRG { srgRules = rs } = [c | SRGRule c _ <- rs]
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Size-optimized EBNF SRGs
|
||||||
|
--
|
||||||
|
|
||||||
|
srgItem :: [[SRGSymbol]] -> SRGItem
|
||||||
|
srgItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
|
||||||
|
-- non-optimizing version:
|
||||||
|
--srgItem = unionRE . map seqRE
|
||||||
|
|
||||||
|
-- | Merges a list of right-hand sides which all have the same
|
||||||
|
-- sequence of non-terminals.
|
||||||
|
mergeItems :: [[SRGSymbol]] -> SRGItem
|
||||||
|
mergeItems = minimizeRE . ungroupTokens . minimizeRE . unionRE . map seqRE . map groupTokens
|
||||||
|
|
||||||
|
groupTokens :: [SRGSymbol] -> [Symbol SRGNT [Token]]
|
||||||
|
groupTokens [] = []
|
||||||
|
groupTokens (Terminal t:ss) = case groupTokens ss of
|
||||||
|
Terminal ts:ss' -> Terminal (t:ts):ss'
|
||||||
|
ss' -> Terminal [t]:ss'
|
||||||
|
groupTokens (NonTerminal c:ss) = NonTerminal c : groupTokens ss
|
||||||
|
|
||||||
|
ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE SRGSymbol
|
||||||
|
ungroupTokens = joinRE . mapRE (symbol (RESymbol . NonTerminal) (REConcat . map (RESymbol . Terminal)))
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Utilities for building and printing SRGs
|
||||||
|
--
|
||||||
|
|
||||||
|
lookupFM_ :: (Ord key, Show key) => Map key elt -> key -> elt
|
||||||
|
lookupFM_ fm k = Map.findWithDefault err k fm
|
||||||
|
where err = error $ "Key not found: " ++ show k
|
||||||
|
++ "\namong " ++ show (Map.keys fm)
|
||||||
|
|
||||||
|
prtS :: Print a => a -> ShowS
|
||||||
|
prtS = showString . prt
|
||||||
110
src-3.0/GF/Speech/SRGS.hs
Normal file
110
src-3.0/GF/Speech/SRGS.hs
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : SRGS
|
||||||
|
--
|
||||||
|
-- Prints an SRGS XML speech recognition grammars.
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
module GF.Speech.SRGS (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where
|
||||||
|
|
||||||
|
import GF.Data.Utilities
|
||||||
|
import GF.Data.XML
|
||||||
|
import GF.Infra.Option
|
||||||
|
import GF.Speech.CFG
|
||||||
|
import GF.Speech.RegExp
|
||||||
|
import GF.Speech.SISR as SISR
|
||||||
|
import GF.Speech.SRG
|
||||||
|
import PGF (PGF, CId)
|
||||||
|
|
||||||
|
import Control.Monad
|
||||||
|
import Data.Char (toUpper,toLower)
|
||||||
|
import Data.List
|
||||||
|
import Data.Maybe
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
|
||||||
|
srgsXmlPrinter :: Maybe SISRFormat
|
||||||
|
-> PGF -> CId -> String
|
||||||
|
srgsXmlPrinter sisr pgf cnc = prSrgsXml sisr $ makeSimpleSRG pgf cnc
|
||||||
|
|
||||||
|
srgsXmlNonRecursivePrinter :: PGF -> CId -> String
|
||||||
|
srgsXmlNonRecursivePrinter pgf cnc = prSrgsXml Nothing $ makeNonRecursiveSRG pgf cnc
|
||||||
|
|
||||||
|
|
||||||
|
prSrgsXml :: Maybe SISRFormat -> SRG -> String
|
||||||
|
prSrgsXml sisr srg = showXMLDoc (optimizeSRGS xmlGr)
|
||||||
|
where
|
||||||
|
xmlGr = grammar sisr (catFormId (srgStartCat srg)) (srgLanguage srg) $
|
||||||
|
[meta "description"
|
||||||
|
("SRGS XML speech recognition grammar for " ++ srgName srg ++ "."),
|
||||||
|
meta "generator" "Grammatical Framework"]
|
||||||
|
++ map ruleToXML (srgRules srg)
|
||||||
|
ruleToXML (SRGRule cat alts) = Tag "rule" ([("id",cat)]++pub) (prRhs alts)
|
||||||
|
where pub | cat `Set.member` srgExternalCats srg = [("scope","public")]
|
||||||
|
| otherwise = []
|
||||||
|
prRhs rhss = [oneOf (map (mkProd sisr) rhss)]
|
||||||
|
|
||||||
|
mkProd :: Maybe SISRFormat -> SRGAlt -> XML
|
||||||
|
mkProd sisr (SRGAlt mp n rhs) = Tag "item" [] (ti ++ [x] ++ tf)
|
||||||
|
where x = mkItem sisr n rhs
|
||||||
|
ti = tag sisr (profileInitSISR n)
|
||||||
|
tf = tag sisr (profileFinalSISR n)
|
||||||
|
|
||||||
|
mkItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> XML
|
||||||
|
mkItem sisr cn = f
|
||||||
|
where
|
||||||
|
f (REUnion []) = ETag "ruleref" [("special","VOID")]
|
||||||
|
f (REUnion xs)
|
||||||
|
| not (null es) = Tag "item" [("repeat","0-1")] [f (REUnion nes)]
|
||||||
|
| otherwise = oneOf (map f xs)
|
||||||
|
where (es,nes) = partition isEpsilon xs
|
||||||
|
f (REConcat []) = ETag "ruleref" [("special","NULL")]
|
||||||
|
f (REConcat xs) = Tag "item" [] (map f xs)
|
||||||
|
f (RERepeat x) = Tag "item" [("repeat","0-")] [f x]
|
||||||
|
f (RESymbol s) = symItem sisr cn s
|
||||||
|
|
||||||
|
symItem :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> XML
|
||||||
|
symItem sisr cn (NonTerminal n@(c,_)) =
|
||||||
|
Tag "item" [] $ [ETag "ruleref" [("uri","#" ++ c)]] ++ tag sisr (catSISR cn n)
|
||||||
|
symItem _ _ (Terminal t) = Tag "item" [] [Data (showToken t)]
|
||||||
|
|
||||||
|
tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> [XML]
|
||||||
|
tag Nothing _ = []
|
||||||
|
tag (Just fmt) t = case t fmt of
|
||||||
|
[] -> []
|
||||||
|
ts -> [Tag "tag" [] [Data (prSISR ts)]]
|
||||||
|
|
||||||
|
catFormId :: String -> String
|
||||||
|
catFormId = (++ "_cat")
|
||||||
|
|
||||||
|
|
||||||
|
showToken :: Token -> String
|
||||||
|
showToken t = t
|
||||||
|
|
||||||
|
oneOf :: [XML] -> XML
|
||||||
|
oneOf = Tag "one-of" []
|
||||||
|
|
||||||
|
grammar :: Maybe SISRFormat
|
||||||
|
-> String -- ^ root
|
||||||
|
-> Maybe String -- ^language
|
||||||
|
-> [XML] -> XML
|
||||||
|
grammar sisr root ml =
|
||||||
|
Tag "grammar" $ [("xmlns","http://www.w3.org/2001/06/grammar"),
|
||||||
|
("version","1.0"),
|
||||||
|
("mode","voice"),
|
||||||
|
("root",root)]
|
||||||
|
++ (if isJust sisr then [("tag-format","semantics/1.0")] else [])
|
||||||
|
++ maybe [] (\l -> [("xml:lang", l)]) ml
|
||||||
|
|
||||||
|
meta :: String -> String -> XML
|
||||||
|
meta n c = ETag "meta" [("name",n),("content",c)]
|
||||||
|
|
||||||
|
optimizeSRGS :: XML -> XML
|
||||||
|
optimizeSRGS = bottomUpXML f
|
||||||
|
where f (Tag "item" [] [x@(Tag "item" _ _)]) = x
|
||||||
|
f (Tag "item" [] [x@(Tag "one-of" _ _)]) = x
|
||||||
|
f (Tag "item" as [Tag "item" [] xs]) = Tag "item" as xs
|
||||||
|
f (Tag "item" as xs) = Tag "item" as (map g xs)
|
||||||
|
where g (Tag "item" [] [x@(ETag "ruleref" _)]) = x
|
||||||
|
g x = x
|
||||||
|
f (Tag "one-of" [] [x]) = x
|
||||||
|
f x = x
|
||||||
Reference in New Issue
Block a user