forked from GitHub/gf-core
Towards smaller SRGs when lots of variants are used.
This commit is contained in:
@@ -14,11 +14,17 @@
|
||||
module GF.Speech.FiniteState (FA(..), State, NFA, DFA,
|
||||
startState, finalStates,
|
||||
states, transitions,
|
||||
isInternal,
|
||||
newFA,
|
||||
addFinalState,
|
||||
newState, newStates,
|
||||
newTransition, newTransitions,
|
||||
insertTransitionWith, insertTransitionsWith,
|
||||
mapStates, mapTransitions,
|
||||
modifyTransitions,
|
||||
nonLoopTransitionsTo, nonLoopTransitionsFrom,
|
||||
loops,
|
||||
removeState,
|
||||
oneFinalState,
|
||||
insertNFA,
|
||||
onGraph,
|
||||
@@ -41,6 +47,7 @@ import qualified GF.Visualization.Graphviz as Dot
|
||||
|
||||
type State = Int
|
||||
|
||||
-- | Type parameters: node id type, state label type, edge label type
|
||||
data FA n a b = FA !(Graph n a b) !n ![n]
|
||||
|
||||
type NFA a = FA State () (Maybe a)
|
||||
@@ -82,18 +89,46 @@ newTransition f t l = onGraph (newEdge (f,t,l))
|
||||
newTransitions :: [(n, n, b)] -> FA n a b -> FA n a b
|
||||
newTransitions es = onGraph (newEdges es)
|
||||
|
||||
insertTransitionWith :: Eq n =>
|
||||
(b -> b -> b) -> (n, n, b) -> FA n a b -> FA n a b
|
||||
insertTransitionWith f t = onGraph (insertEdgeWith f t)
|
||||
|
||||
insertTransitionsWith :: Eq n =>
|
||||
(b -> b -> b) -> [(n, n, b)] -> FA n a b -> FA n a b
|
||||
insertTransitionsWith f ts fa =
|
||||
foldl' (flip (insertTransitionWith f)) fa ts
|
||||
|
||||
mapStates :: (a -> c) -> FA n a b -> FA n c b
|
||||
mapStates f = onGraph (nmap f)
|
||||
|
||||
mapTransitions :: (b -> c) -> FA n a b -> FA n a c
|
||||
mapTransitions f = onGraph (emap f)
|
||||
|
||||
modifyTransitions :: ([(n,n,b)] -> [(n,n,b)]) -> FA n a b -> FA n a b
|
||||
modifyTransitions f = onGraph (\ (Graph r ns es) -> Graph r ns (f es))
|
||||
|
||||
removeState :: Ord n => n -> FA n a b -> FA n a b
|
||||
removeState n = onGraph (removeNode n)
|
||||
|
||||
minimize :: Ord a => NFA a -> DFA a
|
||||
minimize = determinize . reverseNFA . dfa2nfa . determinize . reverseNFA
|
||||
|
||||
unusedNames :: FA n a b -> [n]
|
||||
unusedNames (FA (Graph names _ _) _ _) = names
|
||||
|
||||
-- | Gets all incoming transitions to a given state, excluding
|
||||
-- transtions from the state itself.
|
||||
nonLoopTransitionsTo :: Eq n => n -> FA n a b -> [(n,b)]
|
||||
nonLoopTransitionsTo s fa =
|
||||
[(f,l) | (f,t,l) <- transitions fa, t == s && f /= s]
|
||||
|
||||
nonLoopTransitionsFrom :: Eq n => n -> FA n a b -> [(n,b)]
|
||||
nonLoopTransitionsFrom s fa =
|
||||
[(t,l) | (f,t,l) <- transitions fa, f == s && t /= s]
|
||||
|
||||
loops :: Eq n => n -> FA n a b -> [b]
|
||||
loops s fa = [l | (f,t,l) <- transitions fa, f == s && t == s]
|
||||
|
||||
-- | Give new names to all nodes.
|
||||
renameStates :: Ord x => [y] -- ^ Infinite supply of new names
|
||||
-> FA x a b
|
||||
|
||||
@@ -14,7 +14,8 @@
|
||||
module GF.Speech.Graph ( Graph(..), Node, Edge, NodeInfo
|
||||
, newGraph, nodes, edges
|
||||
, nmap, emap, newNode, newNodes, newEdge, newEdges
|
||||
, removeNodes
|
||||
, insertEdgeWith
|
||||
, removeNode, removeNodes
|
||||
, nodeInfo
|
||||
, getIncoming, getOutgoing, getNodeLabel
|
||||
, inDegree, outDegree
|
||||
@@ -82,6 +83,17 @@ newEdges es g = foldl' (flip newEdge) g es
|
||||
-- lazy version:
|
||||
-- newEdges es' (Graph c ns es) = Graph c ns (es'++es)
|
||||
|
||||
insertEdgeWith :: Eq n =>
|
||||
(b -> b -> b) -> Edge n b -> Graph n a b -> Graph n a b
|
||||
insertEdgeWith f e@(x,y,l) (Graph c ns es) = Graph c ns (h es)
|
||||
where h [] = [e]
|
||||
h (e'@(x',y',l'):es') | x' == x && y' == y = (x',y', f l l'):es'
|
||||
| otherwise = e':h es'
|
||||
|
||||
-- | Remove a node and all edges to and from that node.
|
||||
removeNode :: Ord n => n -> Graph n a b -> Graph n a b
|
||||
removeNode n = removeNodes (Set.singleton n)
|
||||
|
||||
-- | Remove a set of nodes and all edges to and from those nodes.
|
||||
removeNodes :: Ord n => Set n -> Graph n a b -> Graph n a b
|
||||
removeNodes xs (Graph c ns es) = Graph c ns' es'
|
||||
|
||||
@@ -28,6 +28,7 @@ import GF.Infra.Print
|
||||
import GF.Infra.Option
|
||||
import GF.Probabilistic.Probabilistic (Probs)
|
||||
import GF.Speech.SRG
|
||||
import GF.Speech.RegExp
|
||||
|
||||
jsgfPrinter :: Ident -- ^ Grammar name
|
||||
-> String -- ^ Start category
|
||||
@@ -48,20 +49,27 @@ prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||
. showString "public <MAIN> = " . prCat start . showChar ';' . nl . nl
|
||||
prRule (SRGRule cat origCat rhs) =
|
||||
comments [origCat] . nl
|
||||
. prCat cat . showString " = " . joinS " | " (map prAlt rhs) . nl
|
||||
. prCat cat . showString " = " . joinS " | " (map prAlt (ebnfSRGAlts rhs)) . nl
|
||||
-- FIXME: use the probability
|
||||
prAlt (SRGAlt mp _ rhs)
|
||||
| null rhs' = showString "<NULL>"
|
||||
| otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")"
|
||||
where rhs' = rmPunct rhs
|
||||
prSymbol (Cat (c,_)) = prCat c
|
||||
prSymbol (Tok t) = wrap "\"" (prtS t) "\""
|
||||
prCat c = showChar '<' . showString c . showChar '>'
|
||||
prAlt (EBnfSRGAlt mp _ rhs) = prItem rhs
|
||||
|
||||
rmPunct :: [Symbol c Token] -> [Symbol c Token]
|
||||
rmPunct [] = []
|
||||
rmPunct (Tok t:ss) | all isPunct (prt t) = rmPunct ss
|
||||
rmPunct (s:ss) = s : rmPunct ss
|
||||
prCat :: SRGCat -> ShowS
|
||||
prCat c = showChar '<' . showString c . showChar '>'
|
||||
|
||||
prItem :: EBnfSRGItem -> ShowS
|
||||
prItem = f
|
||||
where
|
||||
f (REUnion []) = showString "<VOID>"
|
||||
f (REUnion xs) = wrap "(" (joinS " | " (map f xs)) ")"
|
||||
f (REConcat []) = showString "<NULL>"
|
||||
f (REConcat xs) = wrap "(" (unwordsS (map f xs)) ")"
|
||||
f (RERepeat x) = wrap "(" (f x) ")" . showString "*"
|
||||
f (RESymbol s) = prSymbol s
|
||||
|
||||
prSymbol :: Symbol SRGNT Token -> ShowS
|
||||
prSymbol (Cat (c,_)) = prCat c
|
||||
prSymbol (Tok t) | all isPunct (prt t) = id -- removes punctuation
|
||||
| otherwise = wrap "\"" (prtS t) "\""
|
||||
|
||||
isPunct :: Char -> Bool
|
||||
isPunct c = c `elem` "-_.;.,?!"
|
||||
|
||||
21
src/GF/Speech/PrRegExp.hs
Normal file
21
src/GF/Speech/PrRegExp.hs
Normal file
@@ -0,0 +1,21 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : PrSLF
|
||||
-- Maintainer : BB
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- This module prints a grammar as a regular expression.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.PrRegExp (regexpPrinter) where
|
||||
|
||||
import GF.Conversion.Types
|
||||
import GF.Infra.Ident
|
||||
import GF.Speech.CFGToFiniteState
|
||||
import GF.Speech.RegExp
|
||||
|
||||
|
||||
regexpPrinter :: Ident -- ^ Grammar name
|
||||
-> String -> CGrammar -> String
|
||||
regexpPrinter name start cfg = prRE $ dfa2re $ cfgToFA start cfg
|
||||
@@ -15,6 +15,7 @@ module GF.Speech.PrSRGS (SISRFormat(..), srgsXmlPrinter) where
|
||||
|
||||
import GF.Data.Utilities
|
||||
import GF.Data.XML
|
||||
import GF.Speech.RegExp
|
||||
import GF.Speech.SISR as SISR
|
||||
import GF.Speech.SRG
|
||||
import GF.Infra.Ident
|
||||
@@ -85,9 +86,12 @@ mkProd sisr (EBnfSRGAlt mp n@(Name f prs) rhs) = Tag "item" w (t ++ xs)
|
||||
argInit (Constant f) = maybe "?" prIdent (forestName f)
|
||||
|
||||
mkItem :: Maybe SISRFormat -> EBnfSRGItem -> XML
|
||||
mkItem sisr (EBnfOneOf xs) = oneOf (map (mkItem sisr) xs)
|
||||
mkItem sisr (EBnfSeq xs) = Tag "item" [] (map (mkItem sisr) xs)
|
||||
mkItem sisr (EBnfSymbol s) = symItem sisr s
|
||||
mkItem sisr = f
|
||||
where
|
||||
f (REUnion xs) = oneOf (map f xs)
|
||||
f (REConcat xs) = Tag "item" [] (map f xs)
|
||||
f (RERepeat x) = Tag "item" [("repeat","0-")] [f x]
|
||||
f (RESymbol s) = symItem sisr s
|
||||
|
||||
symItem :: Maybe SISRFormat -> Symbol SRGNT Token -> XML
|
||||
symItem sisr (Cat (c,slots)) = Tag "item" [] ([Tag "ruleref" [("uri","#" ++ prCat c)] []]++t)
|
||||
@@ -107,8 +111,7 @@ showToken :: Token -> String
|
||||
showToken t = t
|
||||
|
||||
oneOf :: [XML] -> XML
|
||||
oneOf [x] = x
|
||||
oneOf xs = Tag "one-of" [] xs
|
||||
oneOf = Tag "one-of" []
|
||||
|
||||
grammar :: Maybe SISRFormat
|
||||
-> String -- ^ root
|
||||
@@ -130,61 +133,3 @@ optimizeSRGS = bottomUpXML f
|
||||
where f (Tag "item" [] [x@(Tag "item" [] _)]) = x
|
||||
f (Tag "one-of" [] [x]) = x
|
||||
f x = x
|
||||
|
||||
{-
|
||||
|
||||
--
|
||||
-- * SRGS minimization
|
||||
--
|
||||
|
||||
minimizeRule :: XML -> XML
|
||||
minimizeRule (Tag "rule" attrs cs)
|
||||
= Tag "rule" attrs (map minimizeOneOf cs)
|
||||
|
||||
minimizeOneOf :: XML -> XML
|
||||
minimizeOneOf (Tag "one-of" attrs cs)
|
||||
= Tag "item" [] (p++[Tag "one-of" attrs cs'])
|
||||
where
|
||||
(pref,cs') = factor cs
|
||||
p = if null pref then [] else [Tag "one-of" [] pref]
|
||||
minimizeOneOf x = x
|
||||
|
||||
factor :: [XML] -> ([XML],[XML])
|
||||
factor xs = case f of
|
||||
Just (ps,xs') -> (map it ps, map it xs')
|
||||
Nothing -> ([],xs)
|
||||
where
|
||||
-- FIXME: maybe getting all the longest terminal prefixes
|
||||
-- is not optimal?
|
||||
f = cartesianFactor $ map (terminalPrefix . unIt) xs
|
||||
unIt (Tag "item" [] cs) = cs
|
||||
it cs = Tag "item" [] cs
|
||||
|
||||
terminalPrefix :: [XML] -> ([XML],[XML])
|
||||
terminalPrefix cs = (terms, tags ++ cs'')
|
||||
where (tags,cs') = span isTag cs
|
||||
(terms,cs'') = span isTerminalItem cs'
|
||||
|
||||
isTag :: XML -> Bool
|
||||
isTag (Tag t _ _) = t == "tag"
|
||||
isTag _ = False
|
||||
|
||||
isTerminalItem :: XML -> Bool
|
||||
isTerminalItem (Tag "item" [] [Data _]) = True
|
||||
isTerminalItem _ = False
|
||||
|
||||
--
|
||||
-- * Utilities
|
||||
--
|
||||
|
||||
allEqual :: Eq a => [a] -> Bool
|
||||
allEqual [] = True
|
||||
allEqual (x:xs) = all (x==) xs
|
||||
|
||||
cartesianFactor :: (Ord a, Ord b) => [(a,b)] -> Maybe ([a],[b])
|
||||
cartesianFactor xs
|
||||
| not (null es) && allEqual es = Just (Map.keys m, Set.elems (head es))
|
||||
| otherwise = Nothing
|
||||
where m = Map.fromListWith Set.union [(x,Set.singleton y) | (x,y) <- xs]
|
||||
es = Map.elems m
|
||||
-}
|
||||
84
src/GF/Speech/RegExp.hs
Normal file
84
src/GF/Speech/RegExp.hs
Normal file
@@ -0,0 +1,84 @@
|
||||
module GF.Speech.RegExp (RE(..), dfa2re, prRE) where
|
||||
|
||||
import Data.List
|
||||
|
||||
import GF.Data.Utilities
|
||||
import GF.Speech.FiniteState
|
||||
|
||||
data RE a =
|
||||
REUnion [RE a] -- ^ REUnion [] is null
|
||||
| REConcat [RE a] -- ^ REConcat [] is epsilon
|
||||
| RERepeat (RE a)
|
||||
| RESymbol a
|
||||
deriving (Eq,Show)
|
||||
|
||||
|
||||
dfa2re :: Show a => DFA a -> RE a
|
||||
dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
|
||||
. oneFinalState () epsilonRE . mapTransitions RESymbol
|
||||
where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
|
||||
merge es = [(f,t,unionRE ls)
|
||||
| ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
|
||||
|
||||
elimStates :: Show a => DFA (RE a) -> DFA (RE a)
|
||||
elimStates fa =
|
||||
case [s | (s,_) <- states fa, isInternal fa s] of
|
||||
[] -> fa
|
||||
sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa
|
||||
where sAs = nonLoopTransitionsTo sE fa
|
||||
sBs = nonLoopTransitionsFrom sE fa
|
||||
r2 = unionRE $ loops sE fa
|
||||
ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
|
||||
r r1 r3 = concatRE [r1, repeatRE r2, r3]
|
||||
|
||||
epsilonRE = REConcat []
|
||||
|
||||
nullRE = REUnion []
|
||||
|
||||
isNull (REUnion []) = True
|
||||
isNull _ = False
|
||||
|
||||
isEpsilon (REConcat []) = True
|
||||
isEpsilon _ = False
|
||||
|
||||
unionRE :: [RE a] -> RE a
|
||||
unionRE = unionOrId . concatMap toList
|
||||
where
|
||||
toList (REUnion xs) = xs
|
||||
toList x = [x]
|
||||
unionOrId [r] = r
|
||||
unionOrId rs = REUnion rs
|
||||
|
||||
concatRE :: [RE a] -> RE a
|
||||
concatRE xs | any isNull xs = nullRE
|
||||
| otherwise = case concatMap toList xs of
|
||||
[r] -> r
|
||||
rs -> REConcat rs
|
||||
where
|
||||
toList (REConcat xs) = xs
|
||||
toList x = [x]
|
||||
|
||||
repeatRE :: RE a -> RE a
|
||||
repeatRE x | isNull x || isEpsilon x = epsilonRE
|
||||
| otherwise = RERepeat x
|
||||
|
||||
finalRE :: DFA (RE a) -> RE a
|
||||
finalRE fa = concatRE [repeatRE r1, r2,
|
||||
repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
|
||||
where
|
||||
s0 = startState fa
|
||||
[sF] = finalStates fa
|
||||
r1 = unionRE $ loops s0 fa
|
||||
r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa
|
||||
r3 = unionRE $ loops sF fa
|
||||
r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa
|
||||
|
||||
-- Debugging
|
||||
|
||||
prRE :: Show a => RE a -> String
|
||||
prRE (REUnion []) = "<NULL>"
|
||||
prRE (REUnion xs) = "(" ++ concat (intersperse " | " (map prRE xs)) ++ ")"
|
||||
prRE (REConcat xs) = "(" ++ unwords (map prRE xs) ++ ")"
|
||||
prRE (RERepeat x) = "(" ++ prRE x ++ ")*"
|
||||
prRE (RESymbol s) = show s
|
||||
|
||||
@@ -23,7 +23,7 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..),
|
||||
makeSimpleSRG, makeSRG
|
||||
, lookupFM_, prtS
|
||||
, topDownFilter
|
||||
, EBnfSRGAlt(..), EBnfSRGItem(..)
|
||||
, EBnfSRGAlt(..), EBnfSRGItem
|
||||
, ebnfSRGAlts
|
||||
) where
|
||||
|
||||
@@ -38,6 +38,8 @@ import GF.Conversion.Types
|
||||
import GF.Infra.Print
|
||||
import GF.Speech.TransformCFG
|
||||
import GF.Speech.Relation
|
||||
import GF.Speech.FiniteState
|
||||
import GF.Speech.RegExp
|
||||
import GF.Infra.Option
|
||||
import GF.Probabilistic.Probabilistic (Probs)
|
||||
|
||||
@@ -173,18 +175,25 @@ allSRGCats SRG { rules = rs } = [c | SRGRule c _ _ <- rs]
|
||||
data EBnfSRGAlt = EBnfSRGAlt (Maybe Double) Name EBnfSRGItem
|
||||
deriving (Eq,Show)
|
||||
|
||||
data EBnfSRGItem =
|
||||
EBnfOneOf [EBnfSRGItem]
|
||||
| EBnfSeq [EBnfSRGItem]
|
||||
| EBnfSymbol (Symbol SRGNT Token)
|
||||
deriving (Eq,Show)
|
||||
type EBnfSRGItem = RE (Symbol SRGNT Token)
|
||||
|
||||
|
||||
ebnfSRGAlts :: [SRGAlt] -> [EBnfSRGAlt]
|
||||
ebnfSRGAlts alts = [EBnfSRGAlt p n (ebnfSRGItem sss)
|
||||
| ((p,n),sss) <- buildMultiMap [((p,n),ss) | SRGAlt p n ss <- alts]]
|
||||
|
||||
ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem
|
||||
ebnfSRGItem sss = EBnfOneOf (map (EBnfSeq . map EBnfSymbol) sss)
|
||||
ebnfSRGItem = dfa2re . mkSRGFA
|
||||
|
||||
mkSRGFA :: [[Symbol SRGNT Token]] -> DFA (Symbol SRGNT Token)
|
||||
mkSRGFA = minimize . dfa2nfa . foldr addString (newFA ())
|
||||
|
||||
addString :: [a] -> DFA a -> DFA a
|
||||
addString xs fa = addFinalState (last sts0) $ newTransitions ts fa'
|
||||
where (fa',ss) = newStates (replicate (length xs) ()) fa
|
||||
sts0 = startState fa : sts1
|
||||
sts1 = map fst ss
|
||||
ts = zip3 sts0 sts1 xs
|
||||
|
||||
--
|
||||
-- * Utilities for building and printing SRGs
|
||||
|
||||
@@ -64,6 +64,7 @@ import GF.Speech.PrJSGF (jsgfPrinter)
|
||||
import qualified GF.Speech.PrSRGS as SRGS
|
||||
import GF.Speech.PrSLF
|
||||
import GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter)
|
||||
import GF.Speech.PrRegExp (regexpPrinter)
|
||||
import GF.Speech.GrammarToVoiceXML (grammar2vxml)
|
||||
|
||||
import GF.Data.Zipper
|
||||
@@ -284,6 +285,9 @@ customGrammarPrinter =
|
||||
,(strCI "fa_c", \opts s -> let start = getStartCatCF opts s
|
||||
name = cncId s
|
||||
in faCPrinter name start $ stateCFG s)
|
||||
,(strCI "regexp", \opts s -> let start = getStartCatCF opts s
|
||||
name = cncId s
|
||||
in regexpPrinter name start $ stateCFG s)
|
||||
,(strCI "regular", \_ -> regularPrinter . stateCFG)
|
||||
,(strCI "plbnf", \_ -> prLBNF True)
|
||||
,(strCI "lbnf", \_ -> prLBNF False)
|
||||
|
||||
Reference in New Issue
Block a user