forked from GitHub/gf-core
First version of SRGS with semantic tags.
This commit is contained in:
@@ -73,7 +73,12 @@ sortNub = map head . group . sort
|
||||
|
||||
-- | Like 'nubBy', but more efficient as it uses sorting internally.
|
||||
sortNubBy :: (a -> a -> Ordering) -> [a] -> [a]
|
||||
sortNubBy f = map head . groupBy (compareEq f) . sortBy f
|
||||
sortNubBy f = map head . sortGroupBy f
|
||||
|
||||
-- | Sorts and then groups elements given and ordering of the
|
||||
-- elements.
|
||||
sortGroupBy :: (a -> a -> Ordering) -> [a] -> [[a]]
|
||||
sortGroupBy f = groupBy (compareEq f) . sortBy f
|
||||
|
||||
-- | Take the union of a list of lists.
|
||||
unionAll :: Eq a => [[a]] -> [a]
|
||||
|
||||
@@ -598,10 +598,14 @@ txtHelpFile =
|
||||
"\n -printer=jsgf Java Speech Grammar Format" ++
|
||||
"\n -printer=srgs_xml SRGS XML format" ++
|
||||
"\n -printer=srgs_xml_prob SRGS XML format, with weights" ++
|
||||
"\n -printer=srgs_xml_ms_sem SRGS XML format, with semantic tags for the" ++
|
||||
"\n Microsoft Speech API." ++
|
||||
"\n -printer=slf a finite automaton in the HTK SLF format" ++
|
||||
"\n -printer=slf_graphviz the same automaton as slf, but in Graphviz format" ++
|
||||
"\n -printer=slf_sub a finite automaton with sub-automata in the HTK SLF format" ++
|
||||
"\n -printer=slf_sub_graphviz the same automaton as slf_sub, but in Graphviz format" ++
|
||||
"\n -printer=slf_sub a finite automaton with sub-automata in the " ++
|
||||
"\n HTK SLF format" ++
|
||||
"\n -printer=slf_sub_graphviz the same automaton as slf_sub, but in " ++
|
||||
"\n Graphviz format" ++
|
||||
"\n -printer=fa_graphviz a finite automaton with labelled edges" ++
|
||||
"\n -printer=regular a regular grammar in a simple BNF" ++
|
||||
"\n -printer=unpar a gfc grammar with parameters eliminated" ++
|
||||
|
||||
@@ -33,7 +33,7 @@ import Data.Char (toUpper,toLower)
|
||||
gslPrinter :: Ident -- ^ Grammar name
|
||||
-> Options -> Maybe Probs -> CGrammar -> String
|
||||
gslPrinter name opts probs cfg = prGSL srg ""
|
||||
where srg = makeSRG name opts probs cfg
|
||||
where srg = makeSimpleSRG name opts probs cfg
|
||||
|
||||
prGSL :: SRG -> ShowS
|
||||
prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||
@@ -48,7 +48,7 @@ prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||
showString "; " . prtS origCat . nl
|
||||
. prCat cat . sp . wrap "[" (unwordsS (map prAlt rhs)) "]" . nl
|
||||
-- FIXME: use the probability
|
||||
prAlt (SRGAlt mp rhs) = wrap "(" (unwordsS (map prSymbol rhs')) ")"
|
||||
prAlt (SRGAlt mp _ rhs) = wrap "(" (unwordsS (map prSymbol rhs')) ")"
|
||||
where rhs' = rmPunct rhs
|
||||
prSymbol (Cat c) = prCat c
|
||||
prSymbol (Tok t) = wrap "\"" (showString (showToken t)) "\""
|
||||
|
||||
@@ -32,7 +32,7 @@ import GF.Speech.SRG
|
||||
jsgfPrinter :: Ident -- ^ Grammar name
|
||||
-> Options -> Maybe Probs -> CGrammar -> String
|
||||
jsgfPrinter name opts probs cfg = prJSGF srg ""
|
||||
where srg = makeSRG name opts probs cfg
|
||||
where srg = makeSimpleSRG name opts probs cfg
|
||||
|
||||
prJSGF :: SRG -> ShowS
|
||||
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||
@@ -49,7 +49,7 @@ prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||
comments [origCat] . nl
|
||||
. prCat cat . showString " = " . joinS " | " (map prAlt rhs) . nl
|
||||
-- FIXME: use the probability
|
||||
prAlt (SRGAlt mp rhs)
|
||||
prAlt (SRGAlt mp _ rhs)
|
||||
| null rhs' = showString "<NULL>"
|
||||
| otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")"
|
||||
where rhs' = rmPunct rhs
|
||||
|
||||
@@ -22,7 +22,7 @@ import GF.Speech.SRG
|
||||
import GF.Infra.Ident
|
||||
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..))
|
||||
import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), forestName)
|
||||
import GF.Conversion.Types
|
||||
import GF.Infra.Print
|
||||
import GF.Infra.Option
|
||||
@@ -37,43 +37,74 @@ type Attr = (String,String)
|
||||
|
||||
srgsXmlPrinter :: Ident -- ^ Grammar name
|
||||
-> Options
|
||||
-> Bool -- ^ Whether to include semantic interpretation
|
||||
-> Maybe Probs
|
||||
-> CGrammar -> String
|
||||
srgsXmlPrinter name opts probs cfg = prSrgsXml srg ""
|
||||
srgsXmlPrinter name opts sisr probs cfg = prSrgsXml sisr srg ""
|
||||
where srg = makeSRG name opts probs cfg
|
||||
|
||||
prSrgsXml :: SRG -> ShowS
|
||||
prSrgsXml (SRG{grammarName=name,startCat=start,
|
||||
prSrgsXml :: Bool -> SRG -> ShowS
|
||||
prSrgsXml sisr (SRG{grammarName=name,startCat=start,
|
||||
origStartCat=origStart,grammarLanguage=l,rules=rs})
|
||||
= header . showsXML xmlGr
|
||||
where
|
||||
header = showString "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
|
||||
root = prCat start
|
||||
xmlGr = grammar root l (comments
|
||||
["SRGS XML speech recognition grammar for " ++ name,
|
||||
"Generated by GF",
|
||||
"Original start category: " ++ origStart]
|
||||
xmlGr = grammar root l ([meta "description"
|
||||
("SRGS XML speech recognition grammar for " ++ name
|
||||
++ ". " ++ "Original start category: " ++ origStart),
|
||||
meta "generator" "GF"]
|
||||
++ map ruleToXML rs)
|
||||
ruleToXML (SRGRule cat origCat alts) =
|
||||
rule (prCat cat) (comments ["Category " ++ origCat] ++ [prRhs alts])
|
||||
prRhs rhss = oneOf (map prAlt rhss)
|
||||
prAlt (SRGAlt p rhs) = item p (map prSymbol rhs)
|
||||
prSymbol (Cat c) = Tag "ruleref" [("uri","#" ++ prCat c)] []
|
||||
prSymbol (Tok t) = item Nothing [Data (showToken t)]
|
||||
prCat c = c -- FIXME: escape something?
|
||||
showToken t = t -- FIXME: escape something?
|
||||
prAlt (SRGAlt p n@(Name _ pr) rhs)
|
||||
| sisr = prodItem (Just n) p (map (uncurry (symItem pr)) (numberCats 0 rhs))
|
||||
| otherwise = prodItem Nothing p (map (\s -> symItem [] s 0) rhs)
|
||||
numberCats _ [] = []
|
||||
numberCats n (s@(Cat _):ss) = (s,n):numberCats (n+1) ss
|
||||
numberCats n (s:ss) = (s,n):numberCats n ss
|
||||
|
||||
rule :: String -- ^ id
|
||||
-> [XML] -> XML
|
||||
rule :: String -> [XML] -> XML
|
||||
rule i = Tag "rule" [("id",i)]
|
||||
|
||||
item :: Maybe Double -> [XML] -> XML
|
||||
-- FIXME: what is the weight called?
|
||||
item mp xs = Tag "item" as cs
|
||||
where as = maybe [] (\p -> [("weight", show p)]) mp
|
||||
cs = case xs of
|
||||
[Tag "item" [] xs'] -> xs'
|
||||
_ -> xs
|
||||
prodItem :: Maybe Name -> Maybe Double -> [XML] -> XML
|
||||
prodItem n mp xs = Tag "item" w (t++cs)
|
||||
where
|
||||
w = maybe [] (\p -> [("weight", show p)]) mp
|
||||
t = maybe [] prodTag n
|
||||
cs = case xs of
|
||||
[Tag "item" [] xs'] -> xs'
|
||||
_ -> xs
|
||||
|
||||
prodTag :: Name -> [XML]
|
||||
prodTag (Name f prs) = [Tag "tag" [] [Data (join "; " ts)]]
|
||||
where
|
||||
ts = ["$.name=" ++ showFun f] ++
|
||||
["$.arg" ++ show n ++ "=" ++ argInit (prs!!n)
|
||||
| n <- [0..length prs-1]]
|
||||
argInit (Unify _) = metavar
|
||||
argInit (Constant f) = maybe metavar showFun (forestName f)
|
||||
showFun = show . prIdent
|
||||
metavar = show "?"
|
||||
|
||||
symItem :: [Profile a] -> Symbol String Token -> Int -> XML
|
||||
symItem prs (Cat c) x = Tag "item" [] ([Tag "ruleref" [("uri","#" ++ prCat c)] []]++t)
|
||||
where
|
||||
t = if null ts then [] else [Tag "tag" [] [Data (join "; " ts)]]
|
||||
ts = ["$.arg" ++ show n ++ "=$$"
|
||||
| n <- [0..length prs-1], inProfile x (prs!!n)]
|
||||
symItem _ (Tok t) _ = Tag "item" [] [Data (showToken t)]
|
||||
|
||||
inProfile :: Int -> Profile a -> Bool
|
||||
inProfile x (Unify xs) = x `elem` xs
|
||||
inProfile _ (Constant _) = False
|
||||
|
||||
prCat :: String -> String
|
||||
prCat c = c -- FIXME: escape something?
|
||||
|
||||
showToken :: Token -> String
|
||||
showToken t = t -- FIXME: escape something?
|
||||
|
||||
oneOf :: [XML] -> XML
|
||||
oneOf [x] = x
|
||||
@@ -88,6 +119,9 @@ grammar root l = Tag "grammar" [("xml:lang", l),
|
||||
("mode","voice"),
|
||||
("root",root)]
|
||||
|
||||
meta :: String -> String -> XML
|
||||
meta n c = Tag "meta" [("name",n),("content",c)] []
|
||||
|
||||
comments :: [String] -> [XML]
|
||||
comments = map Comment
|
||||
|
||||
|
||||
@@ -18,13 +18,16 @@
|
||||
-- FIXME: figure out name prefix from grammar name
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Speech.SRG where
|
||||
module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..),
|
||||
makeSimpleSRG, makeSRG
|
||||
, lookupFM_, prtS) where
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.Data.Utilities
|
||||
import GF.Infra.Ident
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..))
|
||||
import GF.Formalism.Utilities (Symbol(..), NameProfile(..)
|
||||
, Profile, SyntaxForest)
|
||||
import GF.Conversion.Types
|
||||
import GF.Infra.Print
|
||||
import GF.Speech.TransformCFG
|
||||
@@ -48,8 +51,8 @@ data SRGRule = SRGRule String String [SRGAlt] -- ^ SRG category name, original c
|
||||
-- and productions
|
||||
deriving (Eq,Show)
|
||||
|
||||
-- | maybe a probability, and a list of symbols
|
||||
data SRGAlt = SRGAlt (Maybe Double) [Symbol String Token]
|
||||
-- | maybe a probability, a rule name and a list of symbols
|
||||
data SRGAlt = SRGAlt (Maybe Double) Name [Symbol String Token]
|
||||
deriving (Eq,Show)
|
||||
|
||||
-- | SRG category name and original name
|
||||
@@ -57,12 +60,34 @@ type CatName = (String,String)
|
||||
|
||||
type CatNames = FiniteMap String String
|
||||
|
||||
-- | Create a non-left-recursive SRG.
|
||||
-- FIXME: the probabilities, names and profiles in the returned
|
||||
-- grammar may be meaningless.
|
||||
makeSimpleSRG :: Ident -- ^ Grammar name
|
||||
-> Options -- ^ Grammar options
|
||||
-> Maybe Probs -- ^ Probabilities
|
||||
-> CGrammar -- ^ A context-free grammar
|
||||
-> SRG
|
||||
makeSimpleSRG
|
||||
= makeSRG_ (removeLeftRecursion . removeIdenticalRules . removeEmptyCats)
|
||||
|
||||
-- | Create a SRG preserving the names, profiles and probabilities of the
|
||||
-- input grammar. The returned grammar may be left-recursive.
|
||||
makeSRG :: Ident -- ^ Grammar name
|
||||
-> Options -- ^ Grammar options
|
||||
-> Maybe Probs -- ^ Probabilities
|
||||
-> CGrammar -- ^ A context-free grammar
|
||||
-> SRG
|
||||
makeSRG i opts probs gr
|
||||
makeSRG = makeSRG_ removeEmptyCats
|
||||
|
||||
makeSRG_ :: (CFRules -> CFRules) -- ^ Transformations to apply to the
|
||||
-- CFG before converting to SRG
|
||||
-> Ident -- ^ Grammar name
|
||||
-> Options -- ^ Grammar options
|
||||
-> Maybe Probs -- ^ Probabilities
|
||||
-> CGrammar -- ^ A context-free grammar
|
||||
-> SRG
|
||||
makeSRG_ f i opts probs gr
|
||||
= SRG { grammarName = name,
|
||||
startCat = lookupFM_ names origStart,
|
||||
origStartCat = origStart,
|
||||
@@ -72,21 +97,19 @@ makeSRG i opts probs gr
|
||||
name = prIdent i
|
||||
origStart = getStartCat opts
|
||||
l = fromMaybe "en_UK" (getOptVal opts speechLanguage)
|
||||
gr' = removeLeftRecursion $ removeIdenticalRules $ removeEmptyCats $ cfgToCFRules gr
|
||||
gr' = f (cfgToCFRules gr)
|
||||
(cats,cfgRules) = unzip gr'
|
||||
names = mkCatNames name cats
|
||||
rs = map (cfgRulesToSRGRule names probs) cfgRules
|
||||
|
||||
|
||||
-- FIXME: probabilities get larger than 1.0 when new rules are
|
||||
-- introduced
|
||||
-- FIXME: merge alternatives with same rhs but different probabilities
|
||||
-- FIXME: merge alternatives with same rhs and profile but different probabilities
|
||||
cfgRulesToSRGRule :: FiniteMap String String -> Maybe Probs -> [CFRule_] -> SRGRule
|
||||
cfgRulesToSRGRule names probs rs@(r:_) = SRGRule cat origCat rhs
|
||||
where origCat = lhsCat r
|
||||
cat = lookupFM_ names origCat
|
||||
rhs = nub $ map ruleToAlt rs
|
||||
ruleToAlt r = SRGAlt (ruleProb probs r) (map renameCat (ruleRhs r))
|
||||
ruleToAlt r@(CFRule c ss n)
|
||||
= SRGAlt (ruleProb probs r) n (map renameCat ss)
|
||||
renameCat (Cat c) = Cat (lookupFM_ names c)
|
||||
renameCat t = t
|
||||
|
||||
|
||||
@@ -37,6 +37,7 @@ import Control.Monad
|
||||
import Data.FiniteMap
|
||||
import Data.List
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Monoid (mconcat)
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
|
||||
@@ -77,14 +78,13 @@ removeEmptyCats = fix removeEmptyCats'
|
||||
emptyCats = filter (nothingOrNull . flip lookup rs) allCats
|
||||
k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep
|
||||
|
||||
-- | Remove rules which are identical, not caring about the rule names.
|
||||
-- FIXME: this messes up probabilities
|
||||
-- | Remove rules which have the same rhs.
|
||||
-- FIXME: this messes up probabilities, names and profiles
|
||||
removeIdenticalRules :: CFRules -> CFRules
|
||||
removeIdenticalRules g = [(c,sortNubBy compareCatAndRhs rs) | (c,rs) <- g]
|
||||
where compareCatAndRhs (CFRule c1 ss1 _) (CFRule c2 ss2 _) =
|
||||
case c1 `compare` c2 of
|
||||
EQ -> ss1 `compare` ss2
|
||||
o -> o
|
||||
removeIdenticalRules g = [(c,sortNubBy cmpRules rs) | (c,rs) <- g]
|
||||
where
|
||||
cmpRules (CFRule c1 ss1 _) (CFRule c2 ss2 _) =
|
||||
mconcat [c1 `compare` c2, ss1 `compare` ss2]
|
||||
|
||||
removeLeftRecursion :: CFRules -> CFRules
|
||||
removeLeftRecursion rs = concatMap removeDirectLeftRecursion $ map handleProds rs
|
||||
|
||||
@@ -249,11 +249,14 @@ customGrammarPrinter =
|
||||
in jsgfPrinter name opts Nothing $ stateCFG s)
|
||||
,(strCI "srgs_xml", \s -> let opts = stateOptions s
|
||||
name = cncId s
|
||||
in srgsXmlPrinter name opts Nothing $ stateCFG s)
|
||||
in srgsXmlPrinter name opts False Nothing $ stateCFG s)
|
||||
,(strCI "srgs_xml_prob", \s -> let opts = stateOptions s
|
||||
name = cncId s
|
||||
probs = stateProbs s
|
||||
in srgsXmlPrinter name opts (Just probs) $ stateCFG s)
|
||||
in srgsXmlPrinter name opts False (Just probs) $ stateCFG s)
|
||||
,(strCI "srgs_xml_ms_sem", \s -> let opts = stateOptions s
|
||||
name = cncId s
|
||||
in srgsXmlPrinter name opts True Nothing $ stateCFG s)
|
||||
,(strCI "slf", \s -> let opts = stateOptions s
|
||||
start = getStartCat opts
|
||||
name = cncId s
|
||||
|
||||
@@ -569,10 +569,14 @@ q, quit: q
|
||||
-printer=jsgf Java Speech Grammar Format
|
||||
-printer=srgs_xml SRGS XML format
|
||||
-printer=srgs_xml_prob SRGS XML format, with weights
|
||||
-printer=srgs_xml_ms_sem SRGS XML format, with semantic tags for the
|
||||
Microsoft Speech API.
|
||||
-printer=slf a finite automaton in the HTK SLF format
|
||||
-printer=slf_graphviz the same automaton as slf, but in Graphviz format
|
||||
-printer=slf_sub a finite automaton with sub-automata in the HTK SLF format
|
||||
-printer=slf_sub_graphviz the same automaton as slf_sub, but in Graphviz format
|
||||
-printer=slf_sub a finite automaton with sub-automata in the
|
||||
HTK SLF format
|
||||
-printer=slf_sub_graphviz the same automaton as slf_sub, but in
|
||||
Graphviz format
|
||||
-printer=fa_graphviz a finite automaton with labelled edges
|
||||
-printer=regular a regular grammar in a simple BNF
|
||||
-printer=unpar a gfc grammar with parameters eliminated
|
||||
|
||||
Reference in New Issue
Block a user