1
0
forked from GitHub/gf-core

Towards smaller SRGs when lots of variants are used.

This commit is contained in:
bringert
2006-12-15 16:09:58 +00:00
parent 0ef8dced52
commit 160c6354c0
8 changed files with 201 additions and 83 deletions

View File

@@ -14,11 +14,17 @@
module GF.Speech.FiniteState (FA(..), State, NFA, DFA, module GF.Speech.FiniteState (FA(..), State, NFA, DFA,
startState, finalStates, startState, finalStates,
states, transitions, states, transitions,
isInternal,
newFA, newFA,
addFinalState, addFinalState,
newState, newStates, newState, newStates,
newTransition, newTransitions, newTransition, newTransitions,
insertTransitionWith, insertTransitionsWith,
mapStates, mapTransitions, mapStates, mapTransitions,
modifyTransitions,
nonLoopTransitionsTo, nonLoopTransitionsFrom,
loops,
removeState,
oneFinalState, oneFinalState,
insertNFA, insertNFA,
onGraph, onGraph,
@@ -41,6 +47,7 @@ import qualified GF.Visualization.Graphviz as Dot
type State = Int type State = Int
-- | Type parameters: node id type, state label type, edge label type
data FA n a b = FA !(Graph n a b) !n ![n] data FA n a b = FA !(Graph n a b) !n ![n]
type NFA a = FA State () (Maybe a) type NFA a = FA State () (Maybe a)
@@ -82,18 +89,46 @@ newTransition f t l = onGraph (newEdge (f,t,l))
newTransitions :: [(n, n, b)] -> FA n a b -> FA n a b newTransitions :: [(n, n, b)] -> FA n a b -> FA n a b
newTransitions es = onGraph (newEdges es) newTransitions es = onGraph (newEdges es)
insertTransitionWith :: Eq n =>
(b -> b -> b) -> (n, n, b) -> FA n a b -> FA n a b
insertTransitionWith f t = onGraph (insertEdgeWith f t)
insertTransitionsWith :: Eq n =>
(b -> b -> b) -> [(n, n, b)] -> FA n a b -> FA n a b
insertTransitionsWith f ts fa =
foldl' (flip (insertTransitionWith f)) fa ts
mapStates :: (a -> c) -> FA n a b -> FA n c b mapStates :: (a -> c) -> FA n a b -> FA n c b
mapStates f = onGraph (nmap f) mapStates f = onGraph (nmap f)
mapTransitions :: (b -> c) -> FA n a b -> FA n a c mapTransitions :: (b -> c) -> FA n a b -> FA n a c
mapTransitions f = onGraph (emap f) mapTransitions f = onGraph (emap f)
modifyTransitions :: ([(n,n,b)] -> [(n,n,b)]) -> FA n a b -> FA n a b
modifyTransitions f = onGraph (\ (Graph r ns es) -> Graph r ns (f es))
removeState :: Ord n => n -> FA n a b -> FA n a b
removeState n = onGraph (removeNode n)
minimize :: Ord a => NFA a -> DFA a minimize :: Ord a => NFA a -> DFA a
minimize = determinize . reverseNFA . dfa2nfa . determinize . reverseNFA minimize = determinize . reverseNFA . dfa2nfa . determinize . reverseNFA
unusedNames :: FA n a b -> [n] unusedNames :: FA n a b -> [n]
unusedNames (FA (Graph names _ _) _ _) = names unusedNames (FA (Graph names _ _) _ _) = names
-- | Gets all incoming transitions to a given state, excluding
-- transtions from the state itself.
nonLoopTransitionsTo :: Eq n => n -> FA n a b -> [(n,b)]
nonLoopTransitionsTo s fa =
[(f,l) | (f,t,l) <- transitions fa, t == s && f /= s]
nonLoopTransitionsFrom :: Eq n => n -> FA n a b -> [(n,b)]
nonLoopTransitionsFrom s fa =
[(t,l) | (f,t,l) <- transitions fa, f == s && t /= s]
loops :: Eq n => n -> FA n a b -> [b]
loops s fa = [l | (f,t,l) <- transitions fa, f == s && t == s]
-- | Give new names to all nodes. -- | Give new names to all nodes.
renameStates :: Ord x => [y] -- ^ Infinite supply of new names renameStates :: Ord x => [y] -- ^ Infinite supply of new names
-> FA x a b -> FA x a b

View File

@@ -14,7 +14,8 @@
module GF.Speech.Graph ( Graph(..), Node, Edge, NodeInfo module GF.Speech.Graph ( Graph(..), Node, Edge, NodeInfo
, newGraph, nodes, edges , newGraph, nodes, edges
, nmap, emap, newNode, newNodes, newEdge, newEdges , nmap, emap, newNode, newNodes, newEdge, newEdges
, removeNodes , insertEdgeWith
, removeNode, removeNodes
, nodeInfo , nodeInfo
, getIncoming, getOutgoing, getNodeLabel , getIncoming, getOutgoing, getNodeLabel
, inDegree, outDegree , inDegree, outDegree
@@ -82,6 +83,17 @@ newEdges es g = foldl' (flip newEdge) g es
-- lazy version: -- lazy version:
-- newEdges es' (Graph c ns es) = Graph c ns (es'++es) -- newEdges es' (Graph c ns es) = Graph c ns (es'++es)
insertEdgeWith :: Eq n =>
(b -> b -> b) -> Edge n b -> Graph n a b -> Graph n a b
insertEdgeWith f e@(x,y,l) (Graph c ns es) = Graph c ns (h es)
where h [] = [e]
h (e'@(x',y',l'):es') | x' == x && y' == y = (x',y', f l l'):es'
| otherwise = e':h es'
-- | Remove a node and all edges to and from that node.
removeNode :: Ord n => n -> Graph n a b -> Graph n a b
removeNode n = removeNodes (Set.singleton n)
-- | Remove a set of nodes and all edges to and from those nodes. -- | Remove a set of nodes and all edges to and from those nodes.
removeNodes :: Ord n => Set n -> Graph n a b -> Graph n a b removeNodes :: Ord n => Set n -> Graph n a b -> Graph n a b
removeNodes xs (Graph c ns es) = Graph c ns' es' removeNodes xs (Graph c ns es) = Graph c ns' es'

View File

@@ -28,6 +28,7 @@ import GF.Infra.Print
import GF.Infra.Option import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs) import GF.Probabilistic.Probabilistic (Probs)
import GF.Speech.SRG import GF.Speech.SRG
import GF.Speech.RegExp
jsgfPrinter :: Ident -- ^ Grammar name jsgfPrinter :: Ident -- ^ Grammar name
-> String -- ^ Start category -> String -- ^ Start category
@@ -48,20 +49,27 @@ prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
. showString "public <MAIN> = " . prCat start . showChar ';' . nl . nl . showString "public <MAIN> = " . prCat start . showChar ';' . nl . nl
prRule (SRGRule cat origCat rhs) = prRule (SRGRule cat origCat rhs) =
comments [origCat] . nl comments [origCat] . nl
. prCat cat . showString " = " . joinS " | " (map prAlt rhs) . nl . prCat cat . showString " = " . joinS " | " (map prAlt (ebnfSRGAlts rhs)) . nl
-- FIXME: use the probability -- FIXME: use the probability
prAlt (SRGAlt mp _ rhs) prAlt (EBnfSRGAlt mp _ rhs) = prItem rhs
| null rhs' = showString "<NULL>"
| otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")"
where rhs' = rmPunct rhs
prSymbol (Cat (c,_)) = prCat c
prSymbol (Tok t) = wrap "\"" (prtS t) "\""
prCat c = showChar '<' . showString c . showChar '>'
rmPunct :: [Symbol c Token] -> [Symbol c Token] prCat :: SRGCat -> ShowS
rmPunct [] = [] prCat c = showChar '<' . showString c . showChar '>'
rmPunct (Tok t:ss) | all isPunct (prt t) = rmPunct ss
rmPunct (s:ss) = s : rmPunct ss prItem :: EBnfSRGItem -> ShowS
prItem = f
where
f (REUnion []) = showString "<VOID>"
f (REUnion xs) = wrap "(" (joinS " | " (map f xs)) ")"
f (REConcat []) = showString "<NULL>"
f (REConcat xs) = wrap "(" (unwordsS (map f xs)) ")"
f (RERepeat x) = wrap "(" (f x) ")" . showString "*"
f (RESymbol s) = prSymbol s
prSymbol :: Symbol SRGNT Token -> ShowS
prSymbol (Cat (c,_)) = prCat c
prSymbol (Tok t) | all isPunct (prt t) = id -- removes punctuation
| otherwise = wrap "\"" (prtS t) "\""
isPunct :: Char -> Bool isPunct :: Char -> Bool
isPunct c = c `elem` "-_.;.,?!" isPunct c = c `elem` "-_.;.,?!"

21
src/GF/Speech/PrRegExp.hs Normal file
View File

@@ -0,0 +1,21 @@
----------------------------------------------------------------------
-- |
-- Module : PrSLF
-- Maintainer : BB
-- Stability : (stable)
-- Portability : (portable)
--
-- This module prints a grammar as a regular expression.
-----------------------------------------------------------------------------
module GF.Speech.PrRegExp (regexpPrinter) where
import GF.Conversion.Types
import GF.Infra.Ident
import GF.Speech.CFGToFiniteState
import GF.Speech.RegExp
regexpPrinter :: Ident -- ^ Grammar name
-> String -> CGrammar -> String
regexpPrinter name start cfg = prRE $ dfa2re $ cfgToFA start cfg

View File

@@ -15,6 +15,7 @@ module GF.Speech.PrSRGS (SISRFormat(..), srgsXmlPrinter) where
import GF.Data.Utilities import GF.Data.Utilities
import GF.Data.XML import GF.Data.XML
import GF.Speech.RegExp
import GF.Speech.SISR as SISR import GF.Speech.SISR as SISR
import GF.Speech.SRG import GF.Speech.SRG
import GF.Infra.Ident import GF.Infra.Ident
@@ -85,9 +86,12 @@ mkProd sisr (EBnfSRGAlt mp n@(Name f prs) rhs) = Tag "item" w (t ++ xs)
argInit (Constant f) = maybe "?" prIdent (forestName f) argInit (Constant f) = maybe "?" prIdent (forestName f)
mkItem :: Maybe SISRFormat -> EBnfSRGItem -> XML mkItem :: Maybe SISRFormat -> EBnfSRGItem -> XML
mkItem sisr (EBnfOneOf xs) = oneOf (map (mkItem sisr) xs) mkItem sisr = f
mkItem sisr (EBnfSeq xs) = Tag "item" [] (map (mkItem sisr) xs) where
mkItem sisr (EBnfSymbol s) = symItem sisr s f (REUnion xs) = oneOf (map f xs)
f (REConcat xs) = Tag "item" [] (map f xs)
f (RERepeat x) = Tag "item" [("repeat","0-")] [f x]
f (RESymbol s) = symItem sisr s
symItem :: Maybe SISRFormat -> Symbol SRGNT Token -> XML symItem :: Maybe SISRFormat -> Symbol SRGNT Token -> XML
symItem sisr (Cat (c,slots)) = Tag "item" [] ([Tag "ruleref" [("uri","#" ++ prCat c)] []]++t) symItem sisr (Cat (c,slots)) = Tag "item" [] ([Tag "ruleref" [("uri","#" ++ prCat c)] []]++t)
@@ -107,8 +111,7 @@ showToken :: Token -> String
showToken t = t showToken t = t
oneOf :: [XML] -> XML oneOf :: [XML] -> XML
oneOf [x] = x oneOf = Tag "one-of" []
oneOf xs = Tag "one-of" [] xs
grammar :: Maybe SISRFormat grammar :: Maybe SISRFormat
-> String -- ^ root -> String -- ^ root
@@ -130,61 +133,3 @@ optimizeSRGS = bottomUpXML f
where f (Tag "item" [] [x@(Tag "item" [] _)]) = x where f (Tag "item" [] [x@(Tag "item" [] _)]) = x
f (Tag "one-of" [] [x]) = x f (Tag "one-of" [] [x]) = x
f x = x f x = x
{-
--
-- * SRGS minimization
--
minimizeRule :: XML -> XML
minimizeRule (Tag "rule" attrs cs)
= Tag "rule" attrs (map minimizeOneOf cs)
minimizeOneOf :: XML -> XML
minimizeOneOf (Tag "one-of" attrs cs)
= Tag "item" [] (p++[Tag "one-of" attrs cs'])
where
(pref,cs') = factor cs
p = if null pref then [] else [Tag "one-of" [] pref]
minimizeOneOf x = x
factor :: [XML] -> ([XML],[XML])
factor xs = case f of
Just (ps,xs') -> (map it ps, map it xs')
Nothing -> ([],xs)
where
-- FIXME: maybe getting all the longest terminal prefixes
-- is not optimal?
f = cartesianFactor $ map (terminalPrefix . unIt) xs
unIt (Tag "item" [] cs) = cs
it cs = Tag "item" [] cs
terminalPrefix :: [XML] -> ([XML],[XML])
terminalPrefix cs = (terms, tags ++ cs'')
where (tags,cs') = span isTag cs
(terms,cs'') = span isTerminalItem cs'
isTag :: XML -> Bool
isTag (Tag t _ _) = t == "tag"
isTag _ = False
isTerminalItem :: XML -> Bool
isTerminalItem (Tag "item" [] [Data _]) = True
isTerminalItem _ = False
--
-- * Utilities
--
allEqual :: Eq a => [a] -> Bool
allEqual [] = True
allEqual (x:xs) = all (x==) xs
cartesianFactor :: (Ord a, Ord b) => [(a,b)] -> Maybe ([a],[b])
cartesianFactor xs
| not (null es) && allEqual es = Just (Map.keys m, Set.elems (head es))
| otherwise = Nothing
where m = Map.fromListWith Set.union [(x,Set.singleton y) | (x,y) <- xs]
es = Map.elems m
-}

84
src/GF/Speech/RegExp.hs Normal file
View File

@@ -0,0 +1,84 @@
module GF.Speech.RegExp (RE(..), dfa2re, prRE) where
import Data.List
import GF.Data.Utilities
import GF.Speech.FiniteState
data RE a =
REUnion [RE a] -- ^ REUnion [] is null
| REConcat [RE a] -- ^ REConcat [] is epsilon
| RERepeat (RE a)
| RESymbol a
deriving (Eq,Show)
dfa2re :: Show a => DFA a -> RE a
dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
. oneFinalState () epsilonRE . mapTransitions RESymbol
where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
merge es = [(f,t,unionRE ls)
| ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
elimStates :: Show a => DFA (RE a) -> DFA (RE a)
elimStates fa =
case [s | (s,_) <- states fa, isInternal fa s] of
[] -> fa
sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa
where sAs = nonLoopTransitionsTo sE fa
sBs = nonLoopTransitionsFrom sE fa
r2 = unionRE $ loops sE fa
ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
r r1 r3 = concatRE [r1, repeatRE r2, r3]
epsilonRE = REConcat []
nullRE = REUnion []
isNull (REUnion []) = True
isNull _ = False
isEpsilon (REConcat []) = True
isEpsilon _ = False
unionRE :: [RE a] -> RE a
unionRE = unionOrId . concatMap toList
where
toList (REUnion xs) = xs
toList x = [x]
unionOrId [r] = r
unionOrId rs = REUnion rs
concatRE :: [RE a] -> RE a
concatRE xs | any isNull xs = nullRE
| otherwise = case concatMap toList xs of
[r] -> r
rs -> REConcat rs
where
toList (REConcat xs) = xs
toList x = [x]
repeatRE :: RE a -> RE a
repeatRE x | isNull x || isEpsilon x = epsilonRE
| otherwise = RERepeat x
finalRE :: DFA (RE a) -> RE a
finalRE fa = concatRE [repeatRE r1, r2,
repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
where
s0 = startState fa
[sF] = finalStates fa
r1 = unionRE $ loops s0 fa
r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa
r3 = unionRE $ loops sF fa
r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
-- Debugging
prRE :: Show a => RE a -> String
prRE (REUnion []) = "<NULL>"
prRE (REUnion xs) = "(" ++ concat (intersperse " | " (map prRE xs)) ++ ")"
prRE (REConcat xs) = "(" ++ unwords (map prRE xs) ++ ")"
prRE (RERepeat x) = "(" ++ prRE x ++ ")*"
prRE (RESymbol s) = show s

View File

@@ -23,7 +23,7 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..),
makeSimpleSRG, makeSRG makeSimpleSRG, makeSRG
, lookupFM_, prtS , lookupFM_, prtS
, topDownFilter , topDownFilter
, EBnfSRGAlt(..), EBnfSRGItem(..) , EBnfSRGAlt(..), EBnfSRGItem
, ebnfSRGAlts , ebnfSRGAlts
) where ) where
@@ -38,6 +38,8 @@ import GF.Conversion.Types
import GF.Infra.Print import GF.Infra.Print
import GF.Speech.TransformCFG import GF.Speech.TransformCFG
import GF.Speech.Relation import GF.Speech.Relation
import GF.Speech.FiniteState
import GF.Speech.RegExp
import GF.Infra.Option import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs) import GF.Probabilistic.Probabilistic (Probs)
@@ -173,18 +175,25 @@ allSRGCats SRG { rules = rs } = [c | SRGRule c _ _ <- rs]
data EBnfSRGAlt = EBnfSRGAlt (Maybe Double) Name EBnfSRGItem data EBnfSRGAlt = EBnfSRGAlt (Maybe Double) Name EBnfSRGItem
deriving (Eq,Show) deriving (Eq,Show)
data EBnfSRGItem = type EBnfSRGItem = RE (Symbol SRGNT Token)
EBnfOneOf [EBnfSRGItem]
| EBnfSeq [EBnfSRGItem]
| EBnfSymbol (Symbol SRGNT Token)
deriving (Eq,Show)
ebnfSRGAlts :: [SRGAlt] -> [EBnfSRGAlt] ebnfSRGAlts :: [SRGAlt] -> [EBnfSRGAlt]
ebnfSRGAlts alts = [EBnfSRGAlt p n (ebnfSRGItem sss) ebnfSRGAlts alts = [EBnfSRGAlt p n (ebnfSRGItem sss)
| ((p,n),sss) <- buildMultiMap [((p,n),ss) | SRGAlt p n ss <- alts]] | ((p,n),sss) <- buildMultiMap [((p,n),ss) | SRGAlt p n ss <- alts]]
ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem
ebnfSRGItem sss = EBnfOneOf (map (EBnfSeq . map EBnfSymbol) sss) ebnfSRGItem = dfa2re . mkSRGFA
mkSRGFA :: [[Symbol SRGNT Token]] -> DFA (Symbol SRGNT Token)
mkSRGFA = minimize . dfa2nfa . foldr addString (newFA ())
addString :: [a] -> DFA a -> DFA a
addString xs fa = addFinalState (last sts0) $ newTransitions ts fa'
where (fa',ss) = newStates (replicate (length xs) ()) fa
sts0 = startState fa : sts1
sts1 = map fst ss
ts = zip3 sts0 sts1 xs
-- --
-- * Utilities for building and printing SRGs -- * Utilities for building and printing SRGs

View File

@@ -64,6 +64,7 @@ import GF.Speech.PrJSGF (jsgfPrinter)
import qualified GF.Speech.PrSRGS as SRGS import qualified GF.Speech.PrSRGS as SRGS
import GF.Speech.PrSLF import GF.Speech.PrSLF
import GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter) import GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter)
import GF.Speech.PrRegExp (regexpPrinter)
import GF.Speech.GrammarToVoiceXML (grammar2vxml) import GF.Speech.GrammarToVoiceXML (grammar2vxml)
import GF.Data.Zipper import GF.Data.Zipper
@@ -284,6 +285,9 @@ customGrammarPrinter =
,(strCI "fa_c", \opts s -> let start = getStartCatCF opts s ,(strCI "fa_c", \opts s -> let start = getStartCatCF opts s
name = cncId s name = cncId s
in faCPrinter name start $ stateCFG s) in faCPrinter name start $ stateCFG s)
,(strCI "regexp", \opts s -> let start = getStartCatCF opts s
name = cncId s
in regexpPrinter name start $ stateCFG s)
,(strCI "regular", \_ -> regularPrinter . stateCFG) ,(strCI "regular", \_ -> regularPrinter . stateCFG)
,(strCI "plbnf", \_ -> prLBNF True) ,(strCI "plbnf", \_ -> prLBNF True)
,(strCI "lbnf", \_ -> prLBNF False) ,(strCI "lbnf", \_ -> prLBNF False)