Documented SRGS XML generation. Started working on support for probabilities in SRG generation. Added support for probabilities in for SRGS.

This commit is contained in:
bringert
2005-11-01 19:09:04 +00:00
parent 7f5b7eb623
commit da9c8e2e1c
9 changed files with 108 additions and 59 deletions

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/30 18:39:44 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.20 $
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.21 $
--
-- some auxiliary GF operations. AR 19\/6\/1998 -- 6\/2\/2001
--
@@ -100,6 +100,7 @@ data Err a = Ok a | Bad String
instance Monad Err where
return = Ok
fail = Bad
Ok a >>= f = f a
Bad s >>= f = Bad s
@@ -302,19 +303,19 @@ isInBinTree :: (Ord a) => a -> BinTree a b -> Bool
isInBinTree x = err (const False) (const True) . justLookupTree x
-- isInBinTree = elemFM
justLookupTree :: (Ord a) => a -> BinTree a b -> Err b
justLookupTree :: (Monad m,Ord a) => a -> BinTree a b -> m b
justLookupTree = lookupTree (const [])
lookupTree :: (Ord a) => (a -> String) -> a -> BinTree a b -> Err b
lookupTree :: (Monad m,Ord a) => (a -> String) -> a -> BinTree a b -> m b
lookupTree pr x tree = case tree of
NT -> Bad ("no occurrence of element" +++ pr x)
NT -> fail ("no occurrence of element" +++ pr x)
BT (a,b) left right
| x < a -> lookupTree pr x left
| x > a -> lookupTree pr x right
| x == a -> return b
--lookupTree pr x tree = case lookupFM tree x of
-- Just y -> return y
-- _ -> Bad ("no occurrence of element" +++ pr x)
-- _ -> fail ("no occurrence of element" +++ pr x)
lookupTreeMany :: Ord a => (a -> String) -> [BinTree a b] -> a -> Err b
lookupTreeMany pr (t:ts) x = case lookupTree pr x t of

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/10/31 19:02:35 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.18 $
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.19 $
--
-- Help on shell commands. Generated from HelpFile by 'make help'.
-- PLEASE DON'T EDIT THIS FILE.
@@ -575,6 +575,7 @@ txtHelpFile =
"\n -printer=probs show probabilities of all functions" ++
"\n -printer=gsl Nuance GSL speech recognition grammar" ++
"\n -printer=jsgf Java Speech Grammar Format" ++
"\n -printer=srgs_xml SRGS XML format" ++
"\n -printer=slf a finite automaton in the HTK SLF format" ++
"\n -printer=slf_graphviz the same automaton as in SLF, but in Graphviz format" ++
"\n -printer=fa_graphviz a finite automaton with labelled edges" ++

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/09/14 15:17:29 $
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.21 $
-- > CVS $Revision: 1.22 $
--
-- This module prints a CFG as a Nuance GSL 2.0 grammar.
--
@@ -26,13 +26,14 @@ import GF.Formalism.Utilities (Symbol(..))
import GF.Conversion.Types
import GF.Infra.Print
import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs)
import Data.Char (toUpper,toLower)
gslPrinter :: Ident -- ^ Grammar name
-> Options -> CGrammar -> String
gslPrinter name opts cfg = prGSL srg ""
where srg = makeSRG name opts cfg
-> Options -> Maybe Probs -> CGrammar -> String
gslPrinter name opts probs cfg = prGSL srg ""
where srg = makeSRG name opts probs cfg
prGSL :: SRG -> ShowS
prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
@@ -46,8 +47,9 @@ prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
prRule (SRGRule cat origCat rhs) =
showString "; " . prtS origCat . nl
. prCat cat . sp . wrap "[" (unwordsS (map prAlt rhs)) "]" . nl
prAlt rhs = wrap "(" (unwordsS (map prSymbol rhs')) ")"
where rhs' = rmPunct rhs
-- FIXME: use the probability
prAlt (SRGAlt mp rhs) = wrap "(" (unwordsS (map prSymbol rhs')) ")"
where rhs' = rmPunct rhs
prSymbol (Cat c) = prCat c
prSymbol (Tok t) = wrap "\"" (showString (showToken t)) "\""
-- GSL requires an upper case letter in category names

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/09/14 15:17:29 $
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.15 $
-- > CVS $Revision: 1.16 $
--
-- This module prints a CFG as a JSGF grammar.
--
@@ -26,12 +26,13 @@ import GF.Formalism.Utilities (Symbol(..))
import GF.Infra.Ident
import GF.Infra.Print
import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs)
import GF.Speech.SRG
jsgfPrinter :: Ident -- ^ Grammar name
-> Options -> CGrammar -> String
jsgfPrinter name opts cfg = prJSGF srg ""
where srg = makeSRG name opts cfg
-> Options -> Maybe Probs -> CGrammar -> String
jsgfPrinter name opts probs cfg = prJSGF srg ""
where srg = makeSRG name opts probs cfg
prJSGF :: SRG -> ShowS
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
@@ -47,9 +48,11 @@ prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
prRule (SRGRule cat origCat rhs) =
comments [origCat] . nl
. prCat cat . showString " = " . joinS " | " (map prAlt rhs) . nl
prAlt rhs | null rhs' = showString "<NULL>"
| otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")"
where rhs' = rmPunct rhs
-- FIXME: use the probability
prAlt (SRGAlt mp rhs)
| null rhs' = showString "<NULL>"
| otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")"
where rhs' = rmPunct rhs
prSymbol (Cat c) = prCat c
prSymbol (Tok t) = wrap "\"" (prtS t) "\""
prCat c = showChar '<' . showString c . showChar '>'

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/10/31 16:48:08 $
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.1 $
-- > CVS $Revision: 1.2 $
--
-- This module prints a CFG as an SRGS XML grammar.
--
@@ -26,6 +26,7 @@ import GF.Formalism.Utilities (Symbol(..))
import GF.Conversion.Types
import GF.Infra.Print
import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs)
import Data.Char (toUpper,toLower)
@@ -35,9 +36,11 @@ data XML = Data String | Tag String [Attr] [XML] | Comment String
type Attr = (String,String)
srgsXmlPrinter :: Ident -- ^ Grammar name
-> Options -> CGrammar -> String
srgsXmlPrinter name opts cfg = prSrgsXml srg ""
where srg = makeSRG name opts cfg
-> Options
-> Maybe Probs
-> CGrammar -> String
srgsXmlPrinter name opts probs cfg = prSrgsXml srg ""
where srg = makeSRG name opts probs cfg
prSrgsXml :: SRG -> ShowS
prSrgsXml (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
@@ -50,13 +53,12 @@ prSrgsXml (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
"Generated by GF",
"Original start category: " ++ origStart]
++ map ruleToXML rs)
ruleToXML (SRGRule cat origCat rhss) =
rule (prCat cat) (comments ["Category " ++ origCat] ++ [prRhs rhss])
prRhs rhss = oneOf (map item (map prAlt rhss))
-- FIXME: don't use one-of if there is only one
prAlt rhs = map prSymbol rhs
ruleToXML (SRGRule cat origCat alts) =
rule (prCat cat) (comments ["Category " ++ origCat] ++ [prRhs alts])
prRhs rhss = oneOf (map prAlt rhss)
prAlt (SRGAlt p rhs) = item p (map prSymbol rhs)
prSymbol (Cat c) = Tag "ruleref" [("uri","#" ++ prCat c)] []
prSymbol (Tok t) = item [Data (showToken t)]
prSymbol (Tok t) = item Nothing [Data (showToken t)]
prCat c = c -- FIXME: escape something?
showToken t = t -- FIXME: escape something?
@@ -64,9 +66,13 @@ rule :: String -- ^ id
-> [XML] -> XML
rule i = Tag "rule" [("id",i)]
item :: [XML] -> XML
item [x@(Tag "item" _ _)] = x
item xs = Tag "item" [] xs
item :: Maybe Double -> [XML] -> XML
-- FIXME: what is the weight called?
item mp xs = Tag "item" as cs
where as = maybe [] (\p -> [("weight", show p)]) mp
cs = case xs of
[Tag "item" [] xs'] -> xs'
_ -> xs
oneOf :: [XML] -> XML
oneOf [x] = x

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/10/26 17:14:03 $
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.19 $
-- > CVS $Revision: 1.20 $
--
-- Representation of, conversion to, and utilities for
-- printing of a general Speech Recognition Grammar.
@@ -20,6 +20,7 @@
module GF.Speech.SRG where
import GF.Data.Operations
import GF.Data.Utilities
import GF.Infra.Ident
import GF.Formalism.CFG
@@ -28,6 +29,7 @@ import GF.Conversion.Types
import GF.Infra.Print
import GF.Speech.TransformCFG
import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs)
import Data.List
import Data.Maybe (fromMaybe)
@@ -38,9 +40,15 @@ data SRG = SRG { grammarName :: String -- ^ grammar name
, origStartCat :: String -- ^ original start category name
, rules :: [SRGRule]
}
deriving (Eq,Show)
data SRGRule = SRGRule String String [SRGAlt] -- ^ SRG category name, original category name
-- and productions
type SRGAlt = [Symbol String Token]
deriving (Eq,Show)
-- | maybe a probability, and a list of symbols
data SRGAlt = SRGAlt (Maybe Double) [Symbol String Token]
deriving (Eq,Show)
-- | SRG category name and original name
type CatName = (String,String)
@@ -49,27 +57,42 @@ type CatNames = FiniteMap String String
makeSRG :: Ident -- ^ Grammar name
-> Options -- ^ Grammar options
-> Maybe Probs -- ^ Probabilities
-> CGrammar -- ^ A context-free grammar
-> SRG
makeSRG i opts gr = SRG { grammarName = name,
startCat = lookupFM_ names origStart,
origStartCat = origStart,
rules = map (cfgRulesToSRGRule names) cfgRules }
makeSRG i opts probs gr
= SRG { grammarName = name,
startCat = lookupFM_ names origStart,
origStartCat = origStart,
rules = rs }
where
name = prIdent i
origStart = getStartCat opts
gr' = removeLeftRecursion $ removeIdenticalRules $ removeEmptyCats $ cfgToCFRules gr
(cats,cfgRules) = unzip gr'
names = mkCatNames name cats
rs = map (cfgRulesToSRGRule names probs) cfgRules
cfgRulesToSRGRule :: FiniteMap String String -> [CFRule_] -> SRGRule
cfgRulesToSRGRule names rs@(r:_) = SRGRule cat origCat rhs
-- FIXME: probabilities get larger than 1.0 when new rules are
-- introduced
-- FIXME: merge alternatives with same rhs but different probabilities
cfgRulesToSRGRule :: FiniteMap String String -> Maybe Probs -> [CFRule_] -> SRGRule
cfgRulesToSRGRule names probs rs@(r:_) = SRGRule cat origCat rhs
where origCat = lhsCat r
cat = lookupFM_ names origCat
rhs = nub $ map (map renameCat . ruleRhs) rs
rhs = nub $ map ruleToAlt rs
ruleToAlt r = SRGAlt (ruleProb probs r) (map renameCat (ruleRhs r))
renameCat (Cat c) = Cat (lookupFM_ names c)
renameCat t = t
ruleProb :: Maybe Probs -> CFRule_ -> Maybe Double
ruleProb mp r = mp >>= \probs -> lookupProb probs (ruleFun r)
-- FIXME: move to GF.Probabilistic.Probabilistic?
lookupProb :: Probs -> Ident -> Maybe Double
lookupProb probs i = lookupTree prIdent i probs
mkCatNames :: String -- ^ Category name prefix
-> [String] -- ^ Original category names
-> FiniteMap String String -- ^ Maps original names to SRG names

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/10/26 18:47:16 $
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.23 $
-- > CVS $Revision: 1.24 $
--
-- This module does some useful transformations on CFGs.
--
@@ -26,7 +26,8 @@ module GF.Speech.TransformCFG {- (CFRule_, CFRules,
import GF.Conversion.Types
import GF.Data.Utilities
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol,
NameProfile(..), name2fun)
import GF.Infra.Ident
import GF.Infra.Option
import GF.Infra.Print
@@ -75,6 +76,7 @@ removeEmptyCats = fix removeEmptyCats'
k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep
-- | Remove rules which are identical, not caring about the rule names.
-- FIXME: this messes up probabilities
removeIdenticalRules :: CFRules -> CFRules
removeIdenticalRules g = [(c,sortNubBy compareCatAndRhs rs) | (c,rs) <- g]
where compareCatAndRhs (CFRule c1 ss1 _) (CFRule c2 ss2 _) =
@@ -87,7 +89,11 @@ removeLeftRecursion rs = concatMap removeDirectLeftRecursion $ map handleProds r
where
handleProds (c, r) = (c, concatMap handleProd r)
handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai =
-- FIXME: this will give multiple rules with the same name
-- FIXME: for non-recursive categories, this changes
-- the grammar unneccessarily, maybe we can use mutRecCats
-- to make this less invasive
-- FIXME: this will give multiple rules with the same name,
-- which may mess up the probabilities.
[CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- lookup' aj rs]
handleProd r = [r]
@@ -124,6 +130,8 @@ lhsCat (CFRule c _ _) = c
ruleRhs :: CFRule c n t -> [Symbol c t]
ruleRhs (CFRule _ ss _) = ss
ruleFun :: CFRule_ -> Fun
ruleFun (CFRule _ _ n) = name2fun n
-- | Checks if a symbol is a non-terminal of one of the given categories.
catElem :: Eq c => Symbol c t -> [c] -> Bool

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/10/31 19:02:35 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.80 $
-- > CVS $Date: 2005/11/01 20:09:04 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.81 $
--
-- A database for customizable GF shell commands.
--
@@ -240,13 +240,17 @@ customGrammarPrinter =
,(strCI "srg", prSRG . stateCF)
,(strCI "gsl", \s -> let opts = stateOptions s
name = cncId s
in gslPrinter name opts $ stateCFG s)
in gslPrinter name opts Nothing $ stateCFG s)
,(strCI "jsgf", \s -> let opts = stateOptions s
name = cncId s
in jsgfPrinter name opts $ stateCFG s)
in jsgfPrinter name opts Nothing $ stateCFG s)
,(strCI "srgs_xml", \s -> let opts = stateOptions s
name = cncId s
in srgsXmlPrinter name opts $ stateCFG s)
in srgsXmlPrinter name opts Nothing $ stateCFG s)
,(strCI "srgs_xml_prob", \s -> let opts = stateOptions s
name = cncId s
probs = stateProbs s
in srgsXmlPrinter name opts (Just probs) $ stateCFG s)
,(strCI "slf", \s -> let opts = stateOptions s
name = cncId s
in slfPrinter name opts $ stateCFG s)

View File

@@ -546,6 +546,7 @@ q, quit: q
-printer=probs show probabilities of all functions
-printer=gsl Nuance GSL speech recognition grammar
-printer=jsgf Java Speech Grammar Format
-printer=srgs_xml SRGS XML format
-printer=slf a finite automaton in the HTK SLF format
-printer=slf_graphviz the same automaton as in SLF, but in Graphviz format
-printer=fa_graphviz a finite automaton with labelled edges