Generalized Speech Recognition Grammar generation. Added JSGF grammar printer.

This commit is contained in:
bringert
2004-10-01 08:43:59 +00:00
parent f7e066c8fa
commit 12e4aecffe
4 changed files with 212 additions and 101 deletions

View File

@@ -11,96 +11,45 @@
Created : September 13, 2004
Modified :
Modified : October 1, 2004
**************************************************************
-}
-- FIXME: this modules should not be in cfgm, but where?
-- FIXME: remove left-recursion
-- FIXME: remove empty rules
-- FIXME: remove categories with no RHS
-- FIXME: remove / warn / fail if there are int / string literal
-- categories in the grammar
-- FIXME: figure out name prefix from grammar name
module PrGSL (gslPrinter) where
import SRG
import Ident
import CFGrammar
import Parser (Symbol(..))
import GrammarTypes
import PrintParser
import TransformCFG
import Option
import Data.List
import Data.Maybe (fromMaybe)
import Data.FiniteMap
data GSLGrammar = GSLGrammar String -- ^ grammar name
String -- ^ start category name
[GSLRule]
data GSLRule = GSLRule String [GSLAlt]
type GSLAlt = [Symbol String Token]
type CatNames = FiniteMap String String
gslPrinter :: Ident -- ^ Grammar name
-> Options -> CFGrammar -> String
gslPrinter name opts = prGSL (prIdent name) start
where mstart = getOptVal opts gStartCat
start = fromMaybe "S" mstart ++ "{}.s"
gslPrinter name opts cfg = prGSL srg ""
where srg = makeSRG name opts cfg
prGSL :: String -- ^ Grammar name
-> String -- ^ startcat
-> CFGrammar -> String
prGSL name start cfg = prGSLGrammar names gsl ""
where
cfg' = makeNice cfg
gsl = cfgToGSL name start cfg'
names = mkCatNames gsl
cfgToGSL :: String -- ^ grammar name
-> String -- ^ start category
-> [CFRule_] -> GSLGrammar
cfgToGSL name start =
GSLGrammar name start . map cfgRulesToGSLRule . sortAndGroupBy ruleCat
where
ruleCat (Rule c _ _) = c
ruleRhs (Rule _ r _) = r
cfgRulesToGSLRule rs@(r:_) = GSLRule (ruleCat r) (map ruleRhs rs)
mkCatNames :: GSLGrammar -> CatNames
mkCatNames (GSLGrammar name start rules) = listToFM (zip lhsCats names)
where names = [name ++ "_" ++ show x | x <- [0..]]
lhsCats = [ c | GSLRule c _ <- rules]
prGSLGrammar :: CatNames -> GSLGrammar -> ShowS
prGSLGrammar names (GSLGrammar name start g) =
header . mainCat . unlinesS (map prGSLrule g)
prGSL :: SRG -> ShowS
prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
= header . mainCat . unlinesS (map prRule rs)
where
header = showString ";GSL2.0" . nl
. comments ["Nuance speech recognition grammar for " ++ name,
"Generated by GF"] . nl . nl
mainCat = showString ("; Start category: " ++ start) . nl
. showString ".MAIN " . prGSLCat start . nl . nl
prGSLrule (GSLRule cat rhs) =
showString "; " . prtS cat . nl
. prGSLCat cat . sp . wrap "[" (unwordsS (map prGSLAlt rhs)) "]" . nl
prGSLAlt rhs = wrap "(" (unwordsS (map prGSLSymbol rhs')) ")"
mainCat = showString ("; Start category: " ++ origStart) . nl
. showString ".MAIN " . prCat start . nl . nl
prRule (SRGRule cat origCat rhs) =
showString "; " . prtS origCat . nl
. prCat cat . sp . wrap "[" (unwordsS (map prAlt rhs)) "]" . nl
prAlt rhs = wrap "(" (unwordsS (map prSymbol rhs')) ")"
where rhs' = rmPunct rhs
prGSLSymbol (Cat c) = prGSLCat c
prGSLSymbol (Tok t) = wrap "\"" (prtS t) "\""
prGSLCat c = showString n
where n = case lookupFM names c of
Nothing -> error $ "Unknown category: " ++ c
Just x -> x
prSymbol (Cat c) = prCat c
prSymbol (Tok t) = wrap "\"" (prtS t) "\""
prCat c = showString c
rmPunct :: [Symbol String Token] -> [Symbol String Token]
rmPunct [] = []
@@ -112,37 +61,3 @@ isPunct c = c `elem` "-_.;.,?!"
comments :: [String] -> ShowS
comments = unlinesS . map (showString . ("; " ++))
--
-- * Utils
--
nl :: ShowS
nl = showChar '\n'
sp :: ShowS
sp = showChar ' '
wrap :: String -> ShowS -> String -> ShowS
wrap o s c = showString o . s . showString c
concatS :: [ShowS] -> ShowS
concatS = foldr (.) id
unwordsS :: [ShowS] -> ShowS
unwordsS = concatS . intersperse sp
unlinesS :: [ShowS] -> ShowS
unlinesS = concatS . intersperse nl
sortAndGroupBy :: Ord b =>
(a -> b) -- ^ Gets the value to sort and group by
-> [a]
-> [[a]]
sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f)
both :: (b -> b -> c) -> (a -> b) -> a -> a -> c
both f g x y = f (g x) (g y)
prtS :: Print a => a -> ShowS
prtS = showString . prt

67
src/GF/Speech/PrJSGF.hs Normal file
View File

@@ -0,0 +1,67 @@
{-
**************************************************************
GF Module
Description : This module prints a CFG as a JSGF grammar.
Author : Björn Bringert (bringert@cs.chalmers.se)
License : GPL (GNU General Public License)
Created : October 1, 2004
Modified :
**************************************************************
-}
-- FIXME: remove / warn / fail if there are int / string literal
-- categories in the grammar
-- FIXME: convert to UTF-8
module PrJSGF (jsgfPrinter) where
import SRG
import Ident
import CFGrammar
import Parser (Symbol(..))
import GrammarTypes
import PrintParser
import Option
jsgfPrinter :: Ident -- ^ Grammar name
-> Options -> CFGrammar -> String
jsgfPrinter name opts cfg = prJSGF srg ""
where srg = makeSRG name opts cfg
prJSGF :: SRG -> ShowS
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
= header . mainCat . unlinesS (map prRule rs)
where
header = showString "#JSGF V1.0 UTF-8;" . nl
. comments ["JSGF speech recognition grammar for " ++ name,
"Generated by GF"] . nl
. showString ("grammar " ++ name ++ ";") . nl
. nl
mainCat = comments ["Start category: " ++ origStart] . nl
. showString "public <MAIN> = " . prCat start . showChar ';' . nl . nl
prRule (SRGRule cat origCat rhs) =
comments [origCat] . nl
. prCat cat . showString " = " . join " | " (map prAlt rhs) . nl
prAlt rhs | null rhs' = showString "<NULL>"
| otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")"
where rhs' = rmPunct rhs
prSymbol (Cat c) = prCat c
prSymbol (Tok t) = wrap "\"" (prtS t) "\""
prCat c = showChar '<' . showString c . showChar '>'
rmPunct :: [Symbol String Token] -> [Symbol String Token]
rmPunct [] = []
rmPunct (Tok t:ss) | all isPunct (prt t) = rmPunct ss
rmPunct (s:ss) = s : rmPunct ss
isPunct :: Char -> Bool
isPunct c = c `elem` "-_.;.,?!"
comments :: [String] -> ShowS
comments = unlinesS . map (showString . ("// " ++))

125
src/GF/Speech/SRG.hs Normal file
View File

@@ -0,0 +1,125 @@
{-
**************************************************************
GF Module
Description : Representation of, conversion to, and
utilities for printing of a
general Speech Recognition Grammar.
Author : Björn Bringert (bringert@cs.chalmers.se)
License : GPL (GNU General Public License)
Created : October 1, 2004
Modified :
**************************************************************
-}
-- FIXME: remove / warn / fail if there are int / string literal
-- categories in the grammar
-- FIXME: figure out name prefix from grammar name
module SRG where
import Ident
import CFGrammar
import Parser (Symbol(..))
import GrammarTypes
import PrintParser
import TransformCFG
import Option
import Data.List
import Data.Maybe (fromMaybe)
import Data.FiniteMap
data SRG = SRG { grammarName :: String -- ^ grammar name
, startCat :: String -- ^ start category name
, origStartCat :: String -- ^ original start category name
, rules :: [SRGRule]
}
data SRGRule = SRGRule String String [SRGAlt] -- ^ SRG category name, original category name
-- and productions
type SRGAlt = [Symbol String Token]
type CatName = (String,String) -- ^ SRG category name and original name
type CatNames = FiniteMap String String
makeSRG :: Ident -- ^ Grammar name
-> Options -- ^ Grammar options
-> CFGrammar -- ^ A context-free grammar
-> SRG
makeSRG i opts gr = SRG { grammarName = name,
startCat = start,
origStartCat = origStart,
rules = rs }
where
name = prIdent i
origStart = fromMaybe "S" (getOptVal opts gStartCat) ++ "{}.s"
start = lookupFM_ names origStart
gr' = makeNice gr
names = mkCatNames name (nub $ map ruleCat gr')
rs = map (cfgRulesToSRGRule names) (sortAndGroupBy ruleCat gr')
cfgRulesToSRGRule :: FiniteMap String String -> [CFRule_] -> SRGRule
cfgRulesToSRGRule names rs@(r:_) = SRGRule cat origCat rhs
where origCat = ruleCat r
cat = lookupFM_ names origCat
rhs = nub $ map (map renameCat . ruleRhs) rs
renameCat (Cat c) = Cat (lookupFM_ names c)
renameCat t = t
ruleCat :: Rule n c t -> c
ruleCat (Rule c _ _) = c
ruleRhs :: Rule n c t -> [Symbol c t]
ruleRhs (Rule _ r _) = r
mkCatNames :: String -- ^ Category name prefix
-> [String] -- ^ Original category names
-> FiniteMap String String -- ^ Maps original names to SRG names
mkCatNames prefix origNames = listToFM (zip origNames names)
where names = [prefix ++ "_" ++ show x | x <- [0..]]
--
-- * Utilities for building and printing SRGs
--
nl :: ShowS
nl = showChar '\n'
sp :: ShowS
sp = showChar ' '
wrap :: String -> ShowS -> String -> ShowS
wrap o s c = showString o . s . showString c
concatS :: [ShowS] -> ShowS
concatS = foldr (.) id
unwordsS :: [ShowS] -> ShowS
unwordsS = join " "
unlinesS :: [ShowS] -> ShowS
unlinesS = join "\n"
join :: String -> [ShowS] -> ShowS
join glue = concatS . intersperse (showString glue)
sortAndGroupBy :: Ord b =>
(a -> b) -- ^ Gets the value to sort and group by
-> [a]
-> [[a]]
sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f)
both :: (b -> b -> c) -> (a -> b) -> a -> a -> c
both f g x y = f (g x) (g y)
prtS :: Print a => a -> ShowS
prtS = showString . prt
lookupFM_ :: (Ord key, Show key) => FiniteMap key elt -> key -> elt
lookupFM_ fm k = lookupWithDefaultFM fm (error $ "Key not found: " ++ show k) k

View File

@@ -28,6 +28,7 @@ import PrOld
import MkGFC
import CFtoSRG
import PrGSL (gslPrinter)
import PrJSGF (jsgfPrinter)
import Zipper
@@ -194,6 +195,9 @@ customGrammarPrinter =
,(strCI "gsl", \s -> let opts = stateOptions s
name = cncId s
in gslPrinter name opts $ Cnv.cfg $ statePInfo s)
,(strCI "jsgf", \s -> let opts = stateOptions s
name = cncId s
in jsgfPrinter name opts $ Cnv.cfg $ statePInfo s)
,(strCI "plbnf", prLBNF True)
,(strCI "lbnf", prLBNF False)
,(strCI "bnf", prBNF False)