1
0
forked from GitHub/gf-core

First version of SRGS with semantic tags.

This commit is contained in:
bringert
2006-02-01 16:23:14 +00:00
parent 992e212bcc
commit fd0dfd7d4d
9 changed files with 124 additions and 51 deletions

View File

@@ -73,7 +73,12 @@ sortNub = map head . group . sort
-- | Like 'nubBy', but more efficient as it uses sorting internally. -- | Like 'nubBy', but more efficient as it uses sorting internally.
sortNubBy :: (a -> a -> Ordering) -> [a] -> [a] sortNubBy :: (a -> a -> Ordering) -> [a] -> [a]
sortNubBy f = map head . groupBy (compareEq f) . sortBy f sortNubBy f = map head . sortGroupBy f
-- | Sorts and then groups elements given and ordering of the
-- elements.
sortGroupBy :: (a -> a -> Ordering) -> [a] -> [[a]]
sortGroupBy f = groupBy (compareEq f) . sortBy f
-- | Take the union of a list of lists. -- | Take the union of a list of lists.
unionAll :: Eq a => [[a]] -> [a] unionAll :: Eq a => [[a]] -> [a]

View File

@@ -598,10 +598,14 @@ txtHelpFile =
"\n -printer=jsgf Java Speech Grammar Format" ++ "\n -printer=jsgf Java Speech Grammar Format" ++
"\n -printer=srgs_xml SRGS XML format" ++ "\n -printer=srgs_xml SRGS XML format" ++
"\n -printer=srgs_xml_prob SRGS XML format, with weights" ++ "\n -printer=srgs_xml_prob SRGS XML format, with weights" ++
"\n -printer=srgs_xml_ms_sem SRGS XML format, with semantic tags for the" ++
"\n Microsoft Speech API." ++
"\n -printer=slf a finite automaton in the HTK SLF format" ++ "\n -printer=slf a finite automaton in the HTK SLF format" ++
"\n -printer=slf_graphviz the same automaton as slf, but in Graphviz format" ++ "\n -printer=slf_graphviz the same automaton as slf, but in Graphviz format" ++
"\n -printer=slf_sub a finite automaton with sub-automata in the HTK SLF format" ++ "\n -printer=slf_sub a finite automaton with sub-automata in the " ++
"\n -printer=slf_sub_graphviz the same automaton as slf_sub, but in Graphviz format" ++ "\n HTK SLF format" ++
"\n -printer=slf_sub_graphviz the same automaton as slf_sub, but in " ++
"\n Graphviz format" ++
"\n -printer=fa_graphviz a finite automaton with labelled edges" ++ "\n -printer=fa_graphviz a finite automaton with labelled edges" ++
"\n -printer=regular a regular grammar in a simple BNF" ++ "\n -printer=regular a regular grammar in a simple BNF" ++
"\n -printer=unpar a gfc grammar with parameters eliminated" ++ "\n -printer=unpar a gfc grammar with parameters eliminated" ++

View File

@@ -33,7 +33,7 @@ import Data.Char (toUpper,toLower)
gslPrinter :: Ident -- ^ Grammar name gslPrinter :: Ident -- ^ Grammar name
-> Options -> Maybe Probs -> CGrammar -> String -> Options -> Maybe Probs -> CGrammar -> String
gslPrinter name opts probs cfg = prGSL srg "" gslPrinter name opts probs cfg = prGSL srg ""
where srg = makeSRG name opts probs cfg where srg = makeSimpleSRG name opts probs cfg
prGSL :: SRG -> ShowS prGSL :: SRG -> ShowS
prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
@@ -48,7 +48,7 @@ prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
showString "; " . prtS origCat . nl showString "; " . prtS origCat . nl
. prCat cat . sp . wrap "[" (unwordsS (map prAlt rhs)) "]" . nl . prCat cat . sp . wrap "[" (unwordsS (map prAlt rhs)) "]" . nl
-- FIXME: use the probability -- FIXME: use the probability
prAlt (SRGAlt mp rhs) = wrap "(" (unwordsS (map prSymbol rhs')) ")" prAlt (SRGAlt mp _ rhs) = wrap "(" (unwordsS (map prSymbol rhs')) ")"
where rhs' = rmPunct rhs where rhs' = rmPunct rhs
prSymbol (Cat c) = prCat c prSymbol (Cat c) = prCat c
prSymbol (Tok t) = wrap "\"" (showString (showToken t)) "\"" prSymbol (Tok t) = wrap "\"" (showString (showToken t)) "\""

View File

@@ -32,7 +32,7 @@ import GF.Speech.SRG
jsgfPrinter :: Ident -- ^ Grammar name jsgfPrinter :: Ident -- ^ Grammar name
-> Options -> Maybe Probs -> CGrammar -> String -> Options -> Maybe Probs -> CGrammar -> String
jsgfPrinter name opts probs cfg = prJSGF srg "" jsgfPrinter name opts probs cfg = prJSGF srg ""
where srg = makeSRG name opts probs cfg where srg = makeSimpleSRG name opts probs cfg
prJSGF :: SRG -> ShowS prJSGF :: SRG -> ShowS
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
@@ -49,7 +49,7 @@ prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
comments [origCat] . nl comments [origCat] . nl
. prCat cat . showString " = " . joinS " | " (map prAlt rhs) . nl . prCat cat . showString " = " . joinS " | " (map prAlt rhs) . nl
-- FIXME: use the probability -- FIXME: use the probability
prAlt (SRGAlt mp rhs) prAlt (SRGAlt mp _ rhs)
| null rhs' = showString "<NULL>" | null rhs' = showString "<NULL>"
| otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")" | otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")"
where rhs' = rmPunct rhs where rhs' = rmPunct rhs

View File

@@ -22,7 +22,7 @@ import GF.Speech.SRG
import GF.Infra.Ident import GF.Infra.Ident
import GF.Formalism.CFG import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..)) import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), forestName)
import GF.Conversion.Types import GF.Conversion.Types
import GF.Infra.Print import GF.Infra.Print
import GF.Infra.Option import GF.Infra.Option
@@ -37,43 +37,74 @@ type Attr = (String,String)
srgsXmlPrinter :: Ident -- ^ Grammar name srgsXmlPrinter :: Ident -- ^ Grammar name
-> Options -> Options
-> Bool -- ^ Whether to include semantic interpretation
-> Maybe Probs -> Maybe Probs
-> CGrammar -> String -> CGrammar -> String
srgsXmlPrinter name opts probs cfg = prSrgsXml srg "" srgsXmlPrinter name opts sisr probs cfg = prSrgsXml sisr srg ""
where srg = makeSRG name opts probs cfg where srg = makeSRG name opts probs cfg
prSrgsXml :: SRG -> ShowS prSrgsXml :: Bool -> SRG -> ShowS
prSrgsXml (SRG{grammarName=name,startCat=start, prSrgsXml sisr (SRG{grammarName=name,startCat=start,
origStartCat=origStart,grammarLanguage=l,rules=rs}) origStartCat=origStart,grammarLanguage=l,rules=rs})
= header . showsXML xmlGr = header . showsXML xmlGr
where where
header = showString "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" header = showString "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
root = prCat start root = prCat start
xmlGr = grammar root l (comments xmlGr = grammar root l ([meta "description"
["SRGS XML speech recognition grammar for " ++ name, ("SRGS XML speech recognition grammar for " ++ name
"Generated by GF", ++ ". " ++ "Original start category: " ++ origStart),
"Original start category: " ++ origStart] meta "generator" "GF"]
++ map ruleToXML rs) ++ map ruleToXML rs)
ruleToXML (SRGRule cat origCat alts) = ruleToXML (SRGRule cat origCat alts) =
rule (prCat cat) (comments ["Category " ++ origCat] ++ [prRhs alts]) rule (prCat cat) (comments ["Category " ++ origCat] ++ [prRhs alts])
prRhs rhss = oneOf (map prAlt rhss) prRhs rhss = oneOf (map prAlt rhss)
prAlt (SRGAlt p rhs) = item p (map prSymbol rhs) prAlt (SRGAlt p n@(Name _ pr) rhs)
prSymbol (Cat c) = Tag "ruleref" [("uri","#" ++ prCat c)] [] | sisr = prodItem (Just n) p (map (uncurry (symItem pr)) (numberCats 0 rhs))
prSymbol (Tok t) = item Nothing [Data (showToken t)] | otherwise = prodItem Nothing p (map (\s -> symItem [] s 0) rhs)
prCat c = c -- FIXME: escape something? numberCats _ [] = []
showToken t = t -- FIXME: escape something? numberCats n (s@(Cat _):ss) = (s,n):numberCats (n+1) ss
numberCats n (s:ss) = (s,n):numberCats n ss
rule :: String -- ^ id rule :: String -> [XML] -> XML
-> [XML] -> XML
rule i = Tag "rule" [("id",i)] rule i = Tag "rule" [("id",i)]
item :: Maybe Double -> [XML] -> XML prodItem :: Maybe Name -> Maybe Double -> [XML] -> XML
-- FIXME: what is the weight called? prodItem n mp xs = Tag "item" w (t++cs)
item mp xs = Tag "item" as cs where
where as = maybe [] (\p -> [("weight", show p)]) mp w = maybe [] (\p -> [("weight", show p)]) mp
cs = case xs of t = maybe [] prodTag n
[Tag "item" [] xs'] -> xs' cs = case xs of
_ -> xs [Tag "item" [] xs'] -> xs'
_ -> xs
prodTag :: Name -> [XML]
prodTag (Name f prs) = [Tag "tag" [] [Data (join "; " ts)]]
where
ts = ["$.name=" ++ showFun f] ++
["$.arg" ++ show n ++ "=" ++ argInit (prs!!n)
| n <- [0..length prs-1]]
argInit (Unify _) = metavar
argInit (Constant f) = maybe metavar showFun (forestName f)
showFun = show . prIdent
metavar = show "?"
symItem :: [Profile a] -> Symbol String Token -> Int -> XML
symItem prs (Cat c) x = Tag "item" [] ([Tag "ruleref" [("uri","#" ++ prCat c)] []]++t)
where
t = if null ts then [] else [Tag "tag" [] [Data (join "; " ts)]]
ts = ["$.arg" ++ show n ++ "=$$"
| n <- [0..length prs-1], inProfile x (prs!!n)]
symItem _ (Tok t) _ = Tag "item" [] [Data (showToken t)]
inProfile :: Int -> Profile a -> Bool
inProfile x (Unify xs) = x `elem` xs
inProfile _ (Constant _) = False
prCat :: String -> String
prCat c = c -- FIXME: escape something?
showToken :: Token -> String
showToken t = t -- FIXME: escape something?
oneOf :: [XML] -> XML oneOf :: [XML] -> XML
oneOf [x] = x oneOf [x] = x
@@ -88,6 +119,9 @@ grammar root l = Tag "grammar" [("xml:lang", l),
("mode","voice"), ("mode","voice"),
("root",root)] ("root",root)]
meta :: String -> String -> XML
meta n c = Tag "meta" [("name",n),("content",c)] []
comments :: [String] -> [XML] comments :: [String] -> [XML]
comments = map Comment comments = map Comment

View File

@@ -18,13 +18,16 @@
-- FIXME: figure out name prefix from grammar name -- FIXME: figure out name prefix from grammar name
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
module GF.Speech.SRG where module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..),
makeSimpleSRG, makeSRG
, lookupFM_, prtS) where
import GF.Data.Operations import GF.Data.Operations
import GF.Data.Utilities import GF.Data.Utilities
import GF.Infra.Ident import GF.Infra.Ident
import GF.Formalism.CFG import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..)) import GF.Formalism.Utilities (Symbol(..), NameProfile(..)
, Profile, SyntaxForest)
import GF.Conversion.Types import GF.Conversion.Types
import GF.Infra.Print import GF.Infra.Print
import GF.Speech.TransformCFG import GF.Speech.TransformCFG
@@ -48,8 +51,8 @@ data SRGRule = SRGRule String String [SRGAlt] -- ^ SRG category name, original c
-- and productions -- and productions
deriving (Eq,Show) deriving (Eq,Show)
-- | maybe a probability, and a list of symbols -- | maybe a probability, a rule name and a list of symbols
data SRGAlt = SRGAlt (Maybe Double) [Symbol String Token] data SRGAlt = SRGAlt (Maybe Double) Name [Symbol String Token]
deriving (Eq,Show) deriving (Eq,Show)
-- | SRG category name and original name -- | SRG category name and original name
@@ -57,12 +60,34 @@ type CatName = (String,String)
type CatNames = FiniteMap String String type CatNames = FiniteMap String String
-- | Create a non-left-recursive SRG.
-- FIXME: the probabilities, names and profiles in the returned
-- grammar may be meaningless.
makeSimpleSRG :: Ident -- ^ Grammar name
-> Options -- ^ Grammar options
-> Maybe Probs -- ^ Probabilities
-> CGrammar -- ^ A context-free grammar
-> SRG
makeSimpleSRG
= makeSRG_ (removeLeftRecursion . removeIdenticalRules . removeEmptyCats)
-- | Create a SRG preserving the names, profiles and probabilities of the
-- input grammar. The returned grammar may be left-recursive.
makeSRG :: Ident -- ^ Grammar name makeSRG :: Ident -- ^ Grammar name
-> Options -- ^ Grammar options -> Options -- ^ Grammar options
-> Maybe Probs -- ^ Probabilities -> Maybe Probs -- ^ Probabilities
-> CGrammar -- ^ A context-free grammar -> CGrammar -- ^ A context-free grammar
-> SRG -> SRG
makeSRG i opts probs gr makeSRG = makeSRG_ removeEmptyCats
makeSRG_ :: (CFRules -> CFRules) -- ^ Transformations to apply to the
-- CFG before converting to SRG
-> Ident -- ^ Grammar name
-> Options -- ^ Grammar options
-> Maybe Probs -- ^ Probabilities
-> CGrammar -- ^ A context-free grammar
-> SRG
makeSRG_ f i opts probs gr
= SRG { grammarName = name, = SRG { grammarName = name,
startCat = lookupFM_ names origStart, startCat = lookupFM_ names origStart,
origStartCat = origStart, origStartCat = origStart,
@@ -72,21 +97,19 @@ makeSRG i opts probs gr
name = prIdent i name = prIdent i
origStart = getStartCat opts origStart = getStartCat opts
l = fromMaybe "en_UK" (getOptVal opts speechLanguage) l = fromMaybe "en_UK" (getOptVal opts speechLanguage)
gr' = removeLeftRecursion $ removeIdenticalRules $ removeEmptyCats $ cfgToCFRules gr gr' = f (cfgToCFRules gr)
(cats,cfgRules) = unzip gr' (cats,cfgRules) = unzip gr'
names = mkCatNames name cats names = mkCatNames name cats
rs = map (cfgRulesToSRGRule names probs) cfgRules rs = map (cfgRulesToSRGRule names probs) cfgRules
-- FIXME: merge alternatives with same rhs and profile but different probabilities
-- FIXME: probabilities get larger than 1.0 when new rules are
-- introduced
-- FIXME: merge alternatives with same rhs but different probabilities
cfgRulesToSRGRule :: FiniteMap String String -> Maybe Probs -> [CFRule_] -> SRGRule cfgRulesToSRGRule :: FiniteMap String String -> Maybe Probs -> [CFRule_] -> SRGRule
cfgRulesToSRGRule names probs rs@(r:_) = SRGRule cat origCat rhs cfgRulesToSRGRule names probs rs@(r:_) = SRGRule cat origCat rhs
where origCat = lhsCat r where origCat = lhsCat r
cat = lookupFM_ names origCat cat = lookupFM_ names origCat
rhs = nub $ map ruleToAlt rs rhs = nub $ map ruleToAlt rs
ruleToAlt r = SRGAlt (ruleProb probs r) (map renameCat (ruleRhs r)) ruleToAlt r@(CFRule c ss n)
= SRGAlt (ruleProb probs r) n (map renameCat ss)
renameCat (Cat c) = Cat (lookupFM_ names c) renameCat (Cat c) = Cat (lookupFM_ names c)
renameCat t = t renameCat t = t

View File

@@ -37,6 +37,7 @@ import Control.Monad
import Data.FiniteMap import Data.FiniteMap
import Data.List import Data.List
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.Monoid (mconcat)
import Data.Set (Set) import Data.Set (Set)
import qualified Data.Set as Set import qualified Data.Set as Set
@@ -77,14 +78,13 @@ removeEmptyCats = fix removeEmptyCats'
emptyCats = filter (nothingOrNull . flip lookup rs) allCats emptyCats = filter (nothingOrNull . flip lookup rs) allCats
k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep
-- | Remove rules which are identical, not caring about the rule names. -- | Remove rules which have the same rhs.
-- FIXME: this messes up probabilities -- FIXME: this messes up probabilities, names and profiles
removeIdenticalRules :: CFRules -> CFRules removeIdenticalRules :: CFRules -> CFRules
removeIdenticalRules g = [(c,sortNubBy compareCatAndRhs rs) | (c,rs) <- g] removeIdenticalRules g = [(c,sortNubBy cmpRules rs) | (c,rs) <- g]
where compareCatAndRhs (CFRule c1 ss1 _) (CFRule c2 ss2 _) = where
case c1 `compare` c2 of cmpRules (CFRule c1 ss1 _) (CFRule c2 ss2 _) =
EQ -> ss1 `compare` ss2 mconcat [c1 `compare` c2, ss1 `compare` ss2]
o -> o
removeLeftRecursion :: CFRules -> CFRules removeLeftRecursion :: CFRules -> CFRules
removeLeftRecursion rs = concatMap removeDirectLeftRecursion $ map handleProds rs removeLeftRecursion rs = concatMap removeDirectLeftRecursion $ map handleProds rs

View File

@@ -249,11 +249,14 @@ customGrammarPrinter =
in jsgfPrinter name opts Nothing $ stateCFG s) in jsgfPrinter name opts Nothing $ stateCFG s)
,(strCI "srgs_xml", \s -> let opts = stateOptions s ,(strCI "srgs_xml", \s -> let opts = stateOptions s
name = cncId s name = cncId s
in srgsXmlPrinter name opts Nothing $ stateCFG s) in srgsXmlPrinter name opts False Nothing $ stateCFG s)
,(strCI "srgs_xml_prob", \s -> let opts = stateOptions s ,(strCI "srgs_xml_prob", \s -> let opts = stateOptions s
name = cncId s name = cncId s
probs = stateProbs s probs = stateProbs s
in srgsXmlPrinter name opts (Just probs) $ stateCFG s) in srgsXmlPrinter name opts False (Just probs) $ stateCFG s)
,(strCI "srgs_xml_ms_sem", \s -> let opts = stateOptions s
name = cncId s
in srgsXmlPrinter name opts True Nothing $ stateCFG s)
,(strCI "slf", \s -> let opts = stateOptions s ,(strCI "slf", \s -> let opts = stateOptions s
start = getStartCat opts start = getStartCat opts
name = cncId s name = cncId s

View File

@@ -569,10 +569,14 @@ q, quit: q
-printer=jsgf Java Speech Grammar Format -printer=jsgf Java Speech Grammar Format
-printer=srgs_xml SRGS XML format -printer=srgs_xml SRGS XML format
-printer=srgs_xml_prob SRGS XML format, with weights -printer=srgs_xml_prob SRGS XML format, with weights
-printer=srgs_xml_ms_sem SRGS XML format, with semantic tags for the
Microsoft Speech API.
-printer=slf a finite automaton in the HTK SLF format -printer=slf a finite automaton in the HTK SLF format
-printer=slf_graphviz the same automaton as slf, but in Graphviz format -printer=slf_graphviz the same automaton as slf, but in Graphviz format
-printer=slf_sub a finite automaton with sub-automata in the HTK SLF format -printer=slf_sub a finite automaton with sub-automata in the
-printer=slf_sub_graphviz the same automaton as slf_sub, but in Graphviz format HTK SLF format
-printer=slf_sub_graphviz the same automaton as slf_sub, but in
Graphviz format
-printer=fa_graphviz a finite automaton with labelled edges -printer=fa_graphviz a finite automaton with labelled edges
-printer=regular a regular grammar in a simple BNF -printer=regular a regular grammar in a simple BNF
-printer=unpar a gfc grammar with parameters eliminated -printer=unpar a gfc grammar with parameters eliminated