forked from GitHub/gf-core
Documented SRGS XML generation. Started working on support for probabilities in SRG generation. Added support for probabilities in for SRGS.
This commit is contained in:
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/05/30 18:39:44 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.20 $
|
||||
-- > CVS $Date: 2005/11/01 20:09:04 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.21 $
|
||||
--
|
||||
-- some auxiliary GF operations. AR 19\/6\/1998 -- 6\/2\/2001
|
||||
--
|
||||
@@ -100,6 +100,7 @@ data Err a = Ok a | Bad String
|
||||
|
||||
instance Monad Err where
|
||||
return = Ok
|
||||
fail = Bad
|
||||
Ok a >>= f = f a
|
||||
Bad s >>= f = Bad s
|
||||
|
||||
@@ -302,19 +303,19 @@ isInBinTree :: (Ord a) => a -> BinTree a b -> Bool
|
||||
isInBinTree x = err (const False) (const True) . justLookupTree x
|
||||
-- isInBinTree = elemFM
|
||||
|
||||
justLookupTree :: (Ord a) => a -> BinTree a b -> Err b
|
||||
justLookupTree :: (Monad m,Ord a) => a -> BinTree a b -> m b
|
||||
justLookupTree = lookupTree (const [])
|
||||
|
||||
lookupTree :: (Ord a) => (a -> String) -> a -> BinTree a b -> Err b
|
||||
lookupTree :: (Monad m,Ord a) => (a -> String) -> a -> BinTree a b -> m b
|
||||
lookupTree pr x tree = case tree of
|
||||
NT -> Bad ("no occurrence of element" +++ pr x)
|
||||
NT -> fail ("no occurrence of element" +++ pr x)
|
||||
BT (a,b) left right
|
||||
| x < a -> lookupTree pr x left
|
||||
| x > a -> lookupTree pr x right
|
||||
| x == a -> return b
|
||||
--lookupTree pr x tree = case lookupFM tree x of
|
||||
-- Just y -> return y
|
||||
-- _ -> Bad ("no occurrence of element" +++ pr x)
|
||||
-- _ -> fail ("no occurrence of element" +++ pr x)
|
||||
|
||||
lookupTreeMany :: Ord a => (a -> String) -> [BinTree a b] -> a -> Err b
|
||||
lookupTreeMany pr (t:ts) x = case lookupTree pr x t of
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/31 19:02:35 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.18 $
|
||||
-- > CVS $Date: 2005/11/01 20:09:04 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.19 $
|
||||
--
|
||||
-- Help on shell commands. Generated from HelpFile by 'make help'.
|
||||
-- PLEASE DON'T EDIT THIS FILE.
|
||||
@@ -575,6 +575,7 @@ txtHelpFile =
|
||||
"\n -printer=probs show probabilities of all functions" ++
|
||||
"\n -printer=gsl Nuance GSL speech recognition grammar" ++
|
||||
"\n -printer=jsgf Java Speech Grammar Format" ++
|
||||
"\n -printer=srgs_xml SRGS XML format" ++
|
||||
"\n -printer=slf a finite automaton in the HTK SLF format" ++
|
||||
"\n -printer=slf_graphviz the same automaton as in SLF, but in Graphviz format" ++
|
||||
"\n -printer=fa_graphviz a finite automaton with labelled edges" ++
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/09/14 15:17:29 $
|
||||
-- > CVS $Date: 2005/11/01 20:09:04 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.21 $
|
||||
-- > CVS $Revision: 1.22 $
|
||||
--
|
||||
-- This module prints a CFG as a Nuance GSL 2.0 grammar.
|
||||
--
|
||||
@@ -26,13 +26,14 @@ import GF.Formalism.Utilities (Symbol(..))
|
||||
import GF.Conversion.Types
|
||||
import GF.Infra.Print
|
||||
import GF.Infra.Option
|
||||
import GF.Probabilistic.Probabilistic (Probs)
|
||||
|
||||
import Data.Char (toUpper,toLower)
|
||||
|
||||
gslPrinter :: Ident -- ^ Grammar name
|
||||
-> Options -> CGrammar -> String
|
||||
gslPrinter name opts cfg = prGSL srg ""
|
||||
where srg = makeSRG name opts cfg
|
||||
-> Options -> Maybe Probs -> CGrammar -> String
|
||||
gslPrinter name opts probs cfg = prGSL srg ""
|
||||
where srg = makeSRG name opts probs cfg
|
||||
|
||||
prGSL :: SRG -> ShowS
|
||||
prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||
@@ -46,8 +47,9 @@ prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||
prRule (SRGRule cat origCat rhs) =
|
||||
showString "; " . prtS origCat . nl
|
||||
. prCat cat . sp . wrap "[" (unwordsS (map prAlt rhs)) "]" . nl
|
||||
prAlt rhs = wrap "(" (unwordsS (map prSymbol rhs')) ")"
|
||||
where rhs' = rmPunct rhs
|
||||
-- FIXME: use the probability
|
||||
prAlt (SRGAlt mp rhs) = wrap "(" (unwordsS (map prSymbol rhs')) ")"
|
||||
where rhs' = rmPunct rhs
|
||||
prSymbol (Cat c) = prCat c
|
||||
prSymbol (Tok t) = wrap "\"" (showString (showToken t)) "\""
|
||||
-- GSL requires an upper case letter in category names
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/09/14 15:17:29 $
|
||||
-- > CVS $Date: 2005/11/01 20:09:04 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.15 $
|
||||
-- > CVS $Revision: 1.16 $
|
||||
--
|
||||
-- This module prints a CFG as a JSGF grammar.
|
||||
--
|
||||
@@ -26,12 +26,13 @@ import GF.Formalism.Utilities (Symbol(..))
|
||||
import GF.Infra.Ident
|
||||
import GF.Infra.Print
|
||||
import GF.Infra.Option
|
||||
import GF.Probabilistic.Probabilistic (Probs)
|
||||
import GF.Speech.SRG
|
||||
|
||||
jsgfPrinter :: Ident -- ^ Grammar name
|
||||
-> Options -> CGrammar -> String
|
||||
jsgfPrinter name opts cfg = prJSGF srg ""
|
||||
where srg = makeSRG name opts cfg
|
||||
-> Options -> Maybe Probs -> CGrammar -> String
|
||||
jsgfPrinter name opts probs cfg = prJSGF srg ""
|
||||
where srg = makeSRG name opts probs cfg
|
||||
|
||||
prJSGF :: SRG -> ShowS
|
||||
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||
@@ -47,9 +48,11 @@ prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||
prRule (SRGRule cat origCat rhs) =
|
||||
comments [origCat] . nl
|
||||
. prCat cat . showString " = " . joinS " | " (map prAlt rhs) . nl
|
||||
prAlt rhs | null rhs' = showString "<NULL>"
|
||||
| otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")"
|
||||
where rhs' = rmPunct rhs
|
||||
-- FIXME: use the probability
|
||||
prAlt (SRGAlt mp rhs)
|
||||
| null rhs' = showString "<NULL>"
|
||||
| otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")"
|
||||
where rhs' = rmPunct rhs
|
||||
prSymbol (Cat c) = prCat c
|
||||
prSymbol (Tok t) = wrap "\"" (prtS t) "\""
|
||||
prCat c = showChar '<' . showString c . showChar '>'
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/31 16:48:08 $
|
||||
-- > CVS $Date: 2005/11/01 20:09:04 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
--
|
||||
-- This module prints a CFG as an SRGS XML grammar.
|
||||
--
|
||||
@@ -26,6 +26,7 @@ import GF.Formalism.Utilities (Symbol(..))
|
||||
import GF.Conversion.Types
|
||||
import GF.Infra.Print
|
||||
import GF.Infra.Option
|
||||
import GF.Probabilistic.Probabilistic (Probs)
|
||||
|
||||
import Data.Char (toUpper,toLower)
|
||||
|
||||
@@ -35,9 +36,11 @@ data XML = Data String | Tag String [Attr] [XML] | Comment String
|
||||
type Attr = (String,String)
|
||||
|
||||
srgsXmlPrinter :: Ident -- ^ Grammar name
|
||||
-> Options -> CGrammar -> String
|
||||
srgsXmlPrinter name opts cfg = prSrgsXml srg ""
|
||||
where srg = makeSRG name opts cfg
|
||||
-> Options
|
||||
-> Maybe Probs
|
||||
-> CGrammar -> String
|
||||
srgsXmlPrinter name opts probs cfg = prSrgsXml srg ""
|
||||
where srg = makeSRG name opts probs cfg
|
||||
|
||||
prSrgsXml :: SRG -> ShowS
|
||||
prSrgsXml (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||
@@ -50,13 +53,12 @@ prSrgsXml (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||
"Generated by GF",
|
||||
"Original start category: " ++ origStart]
|
||||
++ map ruleToXML rs)
|
||||
ruleToXML (SRGRule cat origCat rhss) =
|
||||
rule (prCat cat) (comments ["Category " ++ origCat] ++ [prRhs rhss])
|
||||
prRhs rhss = oneOf (map item (map prAlt rhss))
|
||||
-- FIXME: don't use one-of if there is only one
|
||||
prAlt rhs = map prSymbol rhs
|
||||
ruleToXML (SRGRule cat origCat alts) =
|
||||
rule (prCat cat) (comments ["Category " ++ origCat] ++ [prRhs alts])
|
||||
prRhs rhss = oneOf (map prAlt rhss)
|
||||
prAlt (SRGAlt p rhs) = item p (map prSymbol rhs)
|
||||
prSymbol (Cat c) = Tag "ruleref" [("uri","#" ++ prCat c)] []
|
||||
prSymbol (Tok t) = item [Data (showToken t)]
|
||||
prSymbol (Tok t) = item Nothing [Data (showToken t)]
|
||||
prCat c = c -- FIXME: escape something?
|
||||
showToken t = t -- FIXME: escape something?
|
||||
|
||||
@@ -64,9 +66,13 @@ rule :: String -- ^ id
|
||||
-> [XML] -> XML
|
||||
rule i = Tag "rule" [("id",i)]
|
||||
|
||||
item :: [XML] -> XML
|
||||
item [x@(Tag "item" _ _)] = x
|
||||
item xs = Tag "item" [] xs
|
||||
item :: Maybe Double -> [XML] -> XML
|
||||
-- FIXME: what is the weight called?
|
||||
item mp xs = Tag "item" as cs
|
||||
where as = maybe [] (\p -> [("weight", show p)]) mp
|
||||
cs = case xs of
|
||||
[Tag "item" [] xs'] -> xs'
|
||||
_ -> xs
|
||||
|
||||
oneOf :: [XML] -> XML
|
||||
oneOf [x] = x
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/26 17:14:03 $
|
||||
-- > CVS $Date: 2005/11/01 20:09:04 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.19 $
|
||||
-- > CVS $Revision: 1.20 $
|
||||
--
|
||||
-- Representation of, conversion to, and utilities for
|
||||
-- printing of a general Speech Recognition Grammar.
|
||||
@@ -20,6 +20,7 @@
|
||||
|
||||
module GF.Speech.SRG where
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.Data.Utilities
|
||||
import GF.Infra.Ident
|
||||
import GF.Formalism.CFG
|
||||
@@ -28,6 +29,7 @@ import GF.Conversion.Types
|
||||
import GF.Infra.Print
|
||||
import GF.Speech.TransformCFG
|
||||
import GF.Infra.Option
|
||||
import GF.Probabilistic.Probabilistic (Probs)
|
||||
|
||||
import Data.List
|
||||
import Data.Maybe (fromMaybe)
|
||||
@@ -38,9 +40,15 @@ data SRG = SRG { grammarName :: String -- ^ grammar name
|
||||
, origStartCat :: String -- ^ original start category name
|
||||
, rules :: [SRGRule]
|
||||
}
|
||||
deriving (Eq,Show)
|
||||
|
||||
data SRGRule = SRGRule String String [SRGAlt] -- ^ SRG category name, original category name
|
||||
-- and productions
|
||||
type SRGAlt = [Symbol String Token]
|
||||
deriving (Eq,Show)
|
||||
|
||||
-- | maybe a probability, and a list of symbols
|
||||
data SRGAlt = SRGAlt (Maybe Double) [Symbol String Token]
|
||||
deriving (Eq,Show)
|
||||
|
||||
-- | SRG category name and original name
|
||||
type CatName = (String,String)
|
||||
@@ -49,27 +57,42 @@ type CatNames = FiniteMap String String
|
||||
|
||||
makeSRG :: Ident -- ^ Grammar name
|
||||
-> Options -- ^ Grammar options
|
||||
-> Maybe Probs -- ^ Probabilities
|
||||
-> CGrammar -- ^ A context-free grammar
|
||||
-> SRG
|
||||
makeSRG i opts gr = SRG { grammarName = name,
|
||||
startCat = lookupFM_ names origStart,
|
||||
origStartCat = origStart,
|
||||
rules = map (cfgRulesToSRGRule names) cfgRules }
|
||||
makeSRG i opts probs gr
|
||||
= SRG { grammarName = name,
|
||||
startCat = lookupFM_ names origStart,
|
||||
origStartCat = origStart,
|
||||
rules = rs }
|
||||
where
|
||||
name = prIdent i
|
||||
origStart = getStartCat opts
|
||||
gr' = removeLeftRecursion $ removeIdenticalRules $ removeEmptyCats $ cfgToCFRules gr
|
||||
(cats,cfgRules) = unzip gr'
|
||||
names = mkCatNames name cats
|
||||
rs = map (cfgRulesToSRGRule names probs) cfgRules
|
||||
|
||||
cfgRulesToSRGRule :: FiniteMap String String -> [CFRule_] -> SRGRule
|
||||
cfgRulesToSRGRule names rs@(r:_) = SRGRule cat origCat rhs
|
||||
|
||||
-- FIXME: probabilities get larger than 1.0 when new rules are
|
||||
-- introduced
|
||||
-- FIXME: merge alternatives with same rhs but different probabilities
|
||||
cfgRulesToSRGRule :: FiniteMap String String -> Maybe Probs -> [CFRule_] -> SRGRule
|
||||
cfgRulesToSRGRule names probs rs@(r:_) = SRGRule cat origCat rhs
|
||||
where origCat = lhsCat r
|
||||
cat = lookupFM_ names origCat
|
||||
rhs = nub $ map (map renameCat . ruleRhs) rs
|
||||
rhs = nub $ map ruleToAlt rs
|
||||
ruleToAlt r = SRGAlt (ruleProb probs r) (map renameCat (ruleRhs r))
|
||||
renameCat (Cat c) = Cat (lookupFM_ names c)
|
||||
renameCat t = t
|
||||
|
||||
ruleProb :: Maybe Probs -> CFRule_ -> Maybe Double
|
||||
ruleProb mp r = mp >>= \probs -> lookupProb probs (ruleFun r)
|
||||
|
||||
-- FIXME: move to GF.Probabilistic.Probabilistic?
|
||||
lookupProb :: Probs -> Ident -> Maybe Double
|
||||
lookupProb probs i = lookupTree prIdent i probs
|
||||
|
||||
mkCatNames :: String -- ^ Category name prefix
|
||||
-> [String] -- ^ Original category names
|
||||
-> FiniteMap String String -- ^ Maps original names to SRG names
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/26 18:47:16 $
|
||||
-- > CVS $Date: 2005/11/01 20:09:04 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.23 $
|
||||
-- > CVS $Revision: 1.24 $
|
||||
--
|
||||
-- This module does some useful transformations on CFGs.
|
||||
--
|
||||
@@ -26,7 +26,8 @@ module GF.Speech.TransformCFG {- (CFRule_, CFRules,
|
||||
import GF.Conversion.Types
|
||||
import GF.Data.Utilities
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol, NameProfile(..))
|
||||
import GF.Formalism.Utilities (Symbol(..), mapSymbol, filterCats, symbol,
|
||||
NameProfile(..), name2fun)
|
||||
import GF.Infra.Ident
|
||||
import GF.Infra.Option
|
||||
import GF.Infra.Print
|
||||
@@ -75,6 +76,7 @@ removeEmptyCats = fix removeEmptyCats'
|
||||
k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep
|
||||
|
||||
-- | Remove rules which are identical, not caring about the rule names.
|
||||
-- FIXME: this messes up probabilities
|
||||
removeIdenticalRules :: CFRules -> CFRules
|
||||
removeIdenticalRules g = [(c,sortNubBy compareCatAndRhs rs) | (c,rs) <- g]
|
||||
where compareCatAndRhs (CFRule c1 ss1 _) (CFRule c2 ss2 _) =
|
||||
@@ -87,7 +89,11 @@ removeLeftRecursion rs = concatMap removeDirectLeftRecursion $ map handleProds r
|
||||
where
|
||||
handleProds (c, r) = (c, concatMap handleProd r)
|
||||
handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai =
|
||||
-- FIXME: this will give multiple rules with the same name
|
||||
-- FIXME: for non-recursive categories, this changes
|
||||
-- the grammar unneccessarily, maybe we can use mutRecCats
|
||||
-- to make this less invasive
|
||||
-- FIXME: this will give multiple rules with the same name,
|
||||
-- which may mess up the probabilities.
|
||||
[CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- lookup' aj rs]
|
||||
handleProd r = [r]
|
||||
|
||||
@@ -124,6 +130,8 @@ lhsCat (CFRule c _ _) = c
|
||||
ruleRhs :: CFRule c n t -> [Symbol c t]
|
||||
ruleRhs (CFRule _ ss _) = ss
|
||||
|
||||
ruleFun :: CFRule_ -> Fun
|
||||
ruleFun (CFRule _ _ n) = name2fun n
|
||||
|
||||
-- | Checks if a symbol is a non-terminal of one of the given categories.
|
||||
catElem :: Eq c => Symbol c t -> [c] -> Bool
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/31 19:02:35 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.80 $
|
||||
-- > CVS $Date: 2005/11/01 20:09:04 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.81 $
|
||||
--
|
||||
-- A database for customizable GF shell commands.
|
||||
--
|
||||
@@ -240,13 +240,17 @@ customGrammarPrinter =
|
||||
,(strCI "srg", prSRG . stateCF)
|
||||
,(strCI "gsl", \s -> let opts = stateOptions s
|
||||
name = cncId s
|
||||
in gslPrinter name opts $ stateCFG s)
|
||||
in gslPrinter name opts Nothing $ stateCFG s)
|
||||
,(strCI "jsgf", \s -> let opts = stateOptions s
|
||||
name = cncId s
|
||||
in jsgfPrinter name opts $ stateCFG s)
|
||||
in jsgfPrinter name opts Nothing $ stateCFG s)
|
||||
,(strCI "srgs_xml", \s -> let opts = stateOptions s
|
||||
name = cncId s
|
||||
in srgsXmlPrinter name opts $ stateCFG s)
|
||||
in srgsXmlPrinter name opts Nothing $ stateCFG s)
|
||||
,(strCI "srgs_xml_prob", \s -> let opts = stateOptions s
|
||||
name = cncId s
|
||||
probs = stateProbs s
|
||||
in srgsXmlPrinter name opts (Just probs) $ stateCFG s)
|
||||
,(strCI "slf", \s -> let opts = stateOptions s
|
||||
name = cncId s
|
||||
in slfPrinter name opts $ stateCFG s)
|
||||
|
||||
@@ -546,6 +546,7 @@ q, quit: q
|
||||
-printer=probs show probabilities of all functions
|
||||
-printer=gsl Nuance GSL speech recognition grammar
|
||||
-printer=jsgf Java Speech Grammar Format
|
||||
-printer=srgs_xml SRGS XML format
|
||||
-printer=slf a finite automaton in the HTK SLF format
|
||||
-printer=slf_graphviz the same automaton as in SLF, but in Graphviz format
|
||||
-printer=fa_graphviz a finite automaton with labelled edges
|
||||
|
||||
Reference in New Issue
Block a user