From fd0dfd7d4d9885b8b4cac7bd60308e2890a05caa Mon Sep 17 00:00:00 2001 From: bringert Date: Wed, 1 Feb 2006 16:23:14 +0000 Subject: [PATCH] First version of SRGS with semantic tags. --- src/GF/Data/Utilities.hs | 7 +++- src/GF/Shell/HelpFile.hs | 8 +++- src/GF/Speech/PrGSL.hs | 4 +- src/GF/Speech/PrJSGF.hs | 4 +- src/GF/Speech/PrSRGS.hs | 78 +++++++++++++++++++++++++---------- src/GF/Speech/SRG.hs | 45 +++++++++++++++----- src/GF/Speech/TransformCFG.hs | 14 +++---- src/GF/UseGrammar/Custom.hs | 7 +++- src/HelpFile | 8 +++- 9 files changed, 124 insertions(+), 51 deletions(-) diff --git a/src/GF/Data/Utilities.hs b/src/GF/Data/Utilities.hs index aaadad1fe..c7e1600c3 100644 --- a/src/GF/Data/Utilities.hs +++ b/src/GF/Data/Utilities.hs @@ -73,7 +73,12 @@ sortNub = map head . group . sort -- | Like 'nubBy', but more efficient as it uses sorting internally. sortNubBy :: (a -> a -> Ordering) -> [a] -> [a] -sortNubBy f = map head . groupBy (compareEq f) . sortBy f +sortNubBy f = map head . sortGroupBy f + +-- | Sorts and then groups elements given and ordering of the +-- elements. +sortGroupBy :: (a -> a -> Ordering) -> [a] -> [[a]] +sortGroupBy f = groupBy (compareEq f) . sortBy f -- | Take the union of a list of lists. unionAll :: Eq a => [[a]] -> [a] diff --git a/src/GF/Shell/HelpFile.hs b/src/GF/Shell/HelpFile.hs index ab5321297..482967fb2 100644 --- a/src/GF/Shell/HelpFile.hs +++ b/src/GF/Shell/HelpFile.hs @@ -598,10 +598,14 @@ txtHelpFile = "\n -printer=jsgf Java Speech Grammar Format" ++ "\n -printer=srgs_xml SRGS XML format" ++ "\n -printer=srgs_xml_prob SRGS XML format, with weights" ++ + "\n -printer=srgs_xml_ms_sem SRGS XML format, with semantic tags for the" ++ + "\n Microsoft Speech API." ++ "\n -printer=slf a finite automaton in the HTK SLF format" ++ "\n -printer=slf_graphviz the same automaton as slf, but in Graphviz format" ++ - "\n -printer=slf_sub a finite automaton with sub-automata in the HTK SLF format" ++ - "\n -printer=slf_sub_graphviz the same automaton as slf_sub, but in Graphviz format" ++ + "\n -printer=slf_sub a finite automaton with sub-automata in the " ++ + "\n HTK SLF format" ++ + "\n -printer=slf_sub_graphviz the same automaton as slf_sub, but in " ++ + "\n Graphviz format" ++ "\n -printer=fa_graphviz a finite automaton with labelled edges" ++ "\n -printer=regular a regular grammar in a simple BNF" ++ "\n -printer=unpar a gfc grammar with parameters eliminated" ++ diff --git a/src/GF/Speech/PrGSL.hs b/src/GF/Speech/PrGSL.hs index 34e2620bb..b204ae6c3 100644 --- a/src/GF/Speech/PrGSL.hs +++ b/src/GF/Speech/PrGSL.hs @@ -33,7 +33,7 @@ import Data.Char (toUpper,toLower) gslPrinter :: Ident -- ^ Grammar name -> Options -> Maybe Probs -> CGrammar -> String gslPrinter name opts probs cfg = prGSL srg "" - where srg = makeSRG name opts probs cfg + where srg = makeSimpleSRG name opts probs cfg prGSL :: SRG -> ShowS prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) @@ -48,7 +48,7 @@ prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) showString "; " . prtS origCat . nl . prCat cat . sp . wrap "[" (unwordsS (map prAlt rhs)) "]" . nl -- FIXME: use the probability - prAlt (SRGAlt mp rhs) = wrap "(" (unwordsS (map prSymbol rhs')) ")" + prAlt (SRGAlt mp _ rhs) = wrap "(" (unwordsS (map prSymbol rhs')) ")" where rhs' = rmPunct rhs prSymbol (Cat c) = prCat c prSymbol (Tok t) = wrap "\"" (showString (showToken t)) "\"" diff --git a/src/GF/Speech/PrJSGF.hs b/src/GF/Speech/PrJSGF.hs index 0eab36828..56f5eda1b 100644 --- a/src/GF/Speech/PrJSGF.hs +++ b/src/GF/Speech/PrJSGF.hs @@ -32,7 +32,7 @@ import GF.Speech.SRG jsgfPrinter :: Ident -- ^ Grammar name -> Options -> Maybe Probs -> CGrammar -> String jsgfPrinter name opts probs cfg = prJSGF srg "" - where srg = makeSRG name opts probs cfg + where srg = makeSimpleSRG name opts probs cfg prJSGF :: SRG -> ShowS prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) @@ -49,7 +49,7 @@ prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) comments [origCat] . nl . prCat cat . showString " = " . joinS " | " (map prAlt rhs) . nl -- FIXME: use the probability - prAlt (SRGAlt mp rhs) + prAlt (SRGAlt mp _ rhs) | null rhs' = showString "" | otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")" where rhs' = rmPunct rhs diff --git a/src/GF/Speech/PrSRGS.hs b/src/GF/Speech/PrSRGS.hs index 63775c852..81d5fd236 100644 --- a/src/GF/Speech/PrSRGS.hs +++ b/src/GF/Speech/PrSRGS.hs @@ -22,7 +22,7 @@ import GF.Speech.SRG import GF.Infra.Ident import GF.Formalism.CFG -import GF.Formalism.Utilities (Symbol(..)) +import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), forestName) import GF.Conversion.Types import GF.Infra.Print import GF.Infra.Option @@ -37,43 +37,74 @@ type Attr = (String,String) srgsXmlPrinter :: Ident -- ^ Grammar name -> Options + -> Bool -- ^ Whether to include semantic interpretation -> Maybe Probs -> CGrammar -> String -srgsXmlPrinter name opts probs cfg = prSrgsXml srg "" +srgsXmlPrinter name opts sisr probs cfg = prSrgsXml sisr srg "" where srg = makeSRG name opts probs cfg -prSrgsXml :: SRG -> ShowS -prSrgsXml (SRG{grammarName=name,startCat=start, +prSrgsXml :: Bool -> SRG -> ShowS +prSrgsXml sisr (SRG{grammarName=name,startCat=start, origStartCat=origStart,grammarLanguage=l,rules=rs}) = header . showsXML xmlGr where header = showString "" root = prCat start - xmlGr = grammar root l (comments - ["SRGS XML speech recognition grammar for " ++ name, - "Generated by GF", - "Original start category: " ++ origStart] + xmlGr = grammar root l ([meta "description" + ("SRGS XML speech recognition grammar for " ++ name + ++ ". " ++ "Original start category: " ++ origStart), + meta "generator" "GF"] ++ map ruleToXML rs) ruleToXML (SRGRule cat origCat alts) = rule (prCat cat) (comments ["Category " ++ origCat] ++ [prRhs alts]) prRhs rhss = oneOf (map prAlt rhss) - prAlt (SRGAlt p rhs) = item p (map prSymbol rhs) - prSymbol (Cat c) = Tag "ruleref" [("uri","#" ++ prCat c)] [] - prSymbol (Tok t) = item Nothing [Data (showToken t)] - prCat c = c -- FIXME: escape something? - showToken t = t -- FIXME: escape something? + prAlt (SRGAlt p n@(Name _ pr) rhs) + | sisr = prodItem (Just n) p (map (uncurry (symItem pr)) (numberCats 0 rhs)) + | otherwise = prodItem Nothing p (map (\s -> symItem [] s 0) rhs) + numberCats _ [] = [] + numberCats n (s@(Cat _):ss) = (s,n):numberCats (n+1) ss + numberCats n (s:ss) = (s,n):numberCats n ss -rule :: String -- ^ id - -> [XML] -> XML +rule :: String -> [XML] -> XML rule i = Tag "rule" [("id",i)] -item :: Maybe Double -> [XML] -> XML --- FIXME: what is the weight called? -item mp xs = Tag "item" as cs - where as = maybe [] (\p -> [("weight", show p)]) mp - cs = case xs of - [Tag "item" [] xs'] -> xs' - _ -> xs +prodItem :: Maybe Name -> Maybe Double -> [XML] -> XML +prodItem n mp xs = Tag "item" w (t++cs) + where + w = maybe [] (\p -> [("weight", show p)]) mp + t = maybe [] prodTag n + cs = case xs of + [Tag "item" [] xs'] -> xs' + _ -> xs + +prodTag :: Name -> [XML] +prodTag (Name f prs) = [Tag "tag" [] [Data (join "; " ts)]] + where + ts = ["$.name=" ++ showFun f] ++ + ["$.arg" ++ show n ++ "=" ++ argInit (prs!!n) + | n <- [0..length prs-1]] + argInit (Unify _) = metavar + argInit (Constant f) = maybe metavar showFun (forestName f) + showFun = show . prIdent + metavar = show "?" + +symItem :: [Profile a] -> Symbol String Token -> Int -> XML +symItem prs (Cat c) x = Tag "item" [] ([Tag "ruleref" [("uri","#" ++ prCat c)] []]++t) + where + t = if null ts then [] else [Tag "tag" [] [Data (join "; " ts)]] + ts = ["$.arg" ++ show n ++ "=$$" + | n <- [0..length prs-1], inProfile x (prs!!n)] +symItem _ (Tok t) _ = Tag "item" [] [Data (showToken t)] + +inProfile :: Int -> Profile a -> Bool +inProfile x (Unify xs) = x `elem` xs +inProfile _ (Constant _) = False + +prCat :: String -> String +prCat c = c -- FIXME: escape something? + +showToken :: Token -> String +showToken t = t -- FIXME: escape something? oneOf :: [XML] -> XML oneOf [x] = x @@ -88,6 +119,9 @@ grammar root l = Tag "grammar" [("xml:lang", l), ("mode","voice"), ("root",root)] +meta :: String -> String -> XML +meta n c = Tag "meta" [("name",n),("content",c)] [] + comments :: [String] -> [XML] comments = map Comment diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs index 8bc4f68e1..ce4d89da0 100644 --- a/src/GF/Speech/SRG.hs +++ b/src/GF/Speech/SRG.hs @@ -18,13 +18,16 @@ -- FIXME: figure out name prefix from grammar name ----------------------------------------------------------------------------- -module GF.Speech.SRG where +module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), + makeSimpleSRG, makeSRG + , lookupFM_, prtS) where import GF.Data.Operations import GF.Data.Utilities import GF.Infra.Ident import GF.Formalism.CFG -import GF.Formalism.Utilities (Symbol(..)) +import GF.Formalism.Utilities (Symbol(..), NameProfile(..) + , Profile, SyntaxForest) import GF.Conversion.Types import GF.Infra.Print import GF.Speech.TransformCFG @@ -48,8 +51,8 @@ data SRGRule = SRGRule String String [SRGAlt] -- ^ SRG category name, original c -- and productions deriving (Eq,Show) --- | maybe a probability, and a list of symbols -data SRGAlt = SRGAlt (Maybe Double) [Symbol String Token] +-- | maybe a probability, a rule name and a list of symbols +data SRGAlt = SRGAlt (Maybe Double) Name [Symbol String Token] deriving (Eq,Show) -- | SRG category name and original name @@ -57,12 +60,34 @@ type CatName = (String,String) type CatNames = FiniteMap String String +-- | Create a non-left-recursive SRG. +-- FIXME: the probabilities, names and profiles in the returned +-- grammar may be meaningless. +makeSimpleSRG :: Ident -- ^ Grammar name + -> Options -- ^ Grammar options + -> Maybe Probs -- ^ Probabilities + -> CGrammar -- ^ A context-free grammar + -> SRG +makeSimpleSRG + = makeSRG_ (removeLeftRecursion . removeIdenticalRules . removeEmptyCats) + +-- | Create a SRG preserving the names, profiles and probabilities of the +-- input grammar. The returned grammar may be left-recursive. makeSRG :: Ident -- ^ Grammar name -> Options -- ^ Grammar options -> Maybe Probs -- ^ Probabilities -> CGrammar -- ^ A context-free grammar -> SRG -makeSRG i opts probs gr +makeSRG = makeSRG_ removeEmptyCats + +makeSRG_ :: (CFRules -> CFRules) -- ^ Transformations to apply to the + -- CFG before converting to SRG + -> Ident -- ^ Grammar name + -> Options -- ^ Grammar options + -> Maybe Probs -- ^ Probabilities + -> CGrammar -- ^ A context-free grammar + -> SRG +makeSRG_ f i opts probs gr = SRG { grammarName = name, startCat = lookupFM_ names origStart, origStartCat = origStart, @@ -72,21 +97,19 @@ makeSRG i opts probs gr name = prIdent i origStart = getStartCat opts l = fromMaybe "en_UK" (getOptVal opts speechLanguage) - gr' = removeLeftRecursion $ removeIdenticalRules $ removeEmptyCats $ cfgToCFRules gr + gr' = f (cfgToCFRules gr) (cats,cfgRules) = unzip gr' names = mkCatNames name cats rs = map (cfgRulesToSRGRule names probs) cfgRules - --- FIXME: probabilities get larger than 1.0 when new rules are --- introduced --- FIXME: merge alternatives with same rhs but different probabilities +-- FIXME: merge alternatives with same rhs and profile but different probabilities cfgRulesToSRGRule :: FiniteMap String String -> Maybe Probs -> [CFRule_] -> SRGRule cfgRulesToSRGRule names probs rs@(r:_) = SRGRule cat origCat rhs where origCat = lhsCat r cat = lookupFM_ names origCat rhs = nub $ map ruleToAlt rs - ruleToAlt r = SRGAlt (ruleProb probs r) (map renameCat (ruleRhs r)) + ruleToAlt r@(CFRule c ss n) + = SRGAlt (ruleProb probs r) n (map renameCat ss) renameCat (Cat c) = Cat (lookupFM_ names c) renameCat t = t diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs index 08aae8897..38148418c 100644 --- a/src/GF/Speech/TransformCFG.hs +++ b/src/GF/Speech/TransformCFG.hs @@ -37,6 +37,7 @@ import Control.Monad import Data.FiniteMap import Data.List import Data.Maybe (fromMaybe) +import Data.Monoid (mconcat) import Data.Set (Set) import qualified Data.Set as Set @@ -77,14 +78,13 @@ removeEmptyCats = fix removeEmptyCats' emptyCats = filter (nothingOrNull . flip lookup rs) allCats k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep --- | Remove rules which are identical, not caring about the rule names. --- FIXME: this messes up probabilities +-- | Remove rules which have the same rhs. +-- FIXME: this messes up probabilities, names and profiles removeIdenticalRules :: CFRules -> CFRules -removeIdenticalRules g = [(c,sortNubBy compareCatAndRhs rs) | (c,rs) <- g] - where compareCatAndRhs (CFRule c1 ss1 _) (CFRule c2 ss2 _) = - case c1 `compare` c2 of - EQ -> ss1 `compare` ss2 - o -> o +removeIdenticalRules g = [(c,sortNubBy cmpRules rs) | (c,rs) <- g] + where + cmpRules (CFRule c1 ss1 _) (CFRule c2 ss2 _) = + mconcat [c1 `compare` c2, ss1 `compare` ss2] removeLeftRecursion :: CFRules -> CFRules removeLeftRecursion rs = concatMap removeDirectLeftRecursion $ map handleProds rs diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index 9a6cd0e21..0d6a143ef 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -249,11 +249,14 @@ customGrammarPrinter = in jsgfPrinter name opts Nothing $ stateCFG s) ,(strCI "srgs_xml", \s -> let opts = stateOptions s name = cncId s - in srgsXmlPrinter name opts Nothing $ stateCFG s) + in srgsXmlPrinter name opts False Nothing $ stateCFG s) ,(strCI "srgs_xml_prob", \s -> let opts = stateOptions s name = cncId s probs = stateProbs s - in srgsXmlPrinter name opts (Just probs) $ stateCFG s) + in srgsXmlPrinter name opts False (Just probs) $ stateCFG s) + ,(strCI "srgs_xml_ms_sem", \s -> let opts = stateOptions s + name = cncId s + in srgsXmlPrinter name opts True Nothing $ stateCFG s) ,(strCI "slf", \s -> let opts = stateOptions s start = getStartCat opts name = cncId s diff --git a/src/HelpFile b/src/HelpFile index 88b5eba7e..a67f79412 100644 --- a/src/HelpFile +++ b/src/HelpFile @@ -569,10 +569,14 @@ q, quit: q -printer=jsgf Java Speech Grammar Format -printer=srgs_xml SRGS XML format -printer=srgs_xml_prob SRGS XML format, with weights + -printer=srgs_xml_ms_sem SRGS XML format, with semantic tags for the + Microsoft Speech API. -printer=slf a finite automaton in the HTK SLF format -printer=slf_graphviz the same automaton as slf, but in Graphviz format - -printer=slf_sub a finite automaton with sub-automata in the HTK SLF format - -printer=slf_sub_graphviz the same automaton as slf_sub, but in Graphviz format + -printer=slf_sub a finite automaton with sub-automata in the + HTK SLF format + -printer=slf_sub_graphviz the same automaton as slf_sub, but in + Graphviz format -printer=fa_graphviz a finite automaton with labelled edges -printer=regular a regular grammar in a simple BNF -printer=unpar a gfc grammar with parameters eliminated