From 12e4aecffe74739d8705df99216ad12b3a706828 Mon Sep 17 00:00:00 2001 From: bringert Date: Fri, 1 Oct 2004 08:43:59 +0000 Subject: [PATCH] Generalized Speech Recognition Grammar generation. Added JSGF grammar printer. --- src/GF/Speech/PrGSL.hs | 117 +++++---------------------------- src/GF/Speech/PrJSGF.hs | 67 +++++++++++++++++++ src/GF/Speech/SRG.hs | 125 ++++++++++++++++++++++++++++++++++++ src/GF/UseGrammar/Custom.hs | 4 ++ 4 files changed, 212 insertions(+), 101 deletions(-) create mode 100644 src/GF/Speech/PrJSGF.hs create mode 100644 src/GF/Speech/SRG.hs diff --git a/src/GF/Speech/PrGSL.hs b/src/GF/Speech/PrGSL.hs index c7cb283c6..0d7cb6c39 100644 --- a/src/GF/Speech/PrGSL.hs +++ b/src/GF/Speech/PrGSL.hs @@ -11,96 +11,45 @@ Created : September 13, 2004 - Modified : + Modified : October 1, 2004 ************************************************************** -} --- FIXME: this modules should not be in cfgm, but where? - --- FIXME: remove left-recursion - --- FIXME: remove empty rules - --- FIXME: remove categories with no RHS - -- FIXME: remove / warn / fail if there are int / string literal -- categories in the grammar --- FIXME: figure out name prefix from grammar name - module PrGSL (gslPrinter) where +import SRG import Ident import CFGrammar import Parser (Symbol(..)) import GrammarTypes import PrintParser -import TransformCFG import Option -import Data.List -import Data.Maybe (fromMaybe) -import Data.FiniteMap - - -data GSLGrammar = GSLGrammar String -- ^ grammar name - String -- ^ start category name - [GSLRule] -data GSLRule = GSLRule String [GSLAlt] -type GSLAlt = [Symbol String Token] - -type CatNames = FiniteMap String String - gslPrinter :: Ident -- ^ Grammar name -> Options -> CFGrammar -> String -gslPrinter name opts = prGSL (prIdent name) start - where mstart = getOptVal opts gStartCat - start = fromMaybe "S" mstart ++ "{}.s" +gslPrinter name opts cfg = prGSL srg "" + where srg = makeSRG name opts cfg -prGSL :: String -- ^ Grammar name - -> String -- ^ startcat - -> CFGrammar -> String -prGSL name start cfg = prGSLGrammar names gsl "" - where - cfg' = makeNice cfg - gsl = cfgToGSL name start cfg' - names = mkCatNames gsl - -cfgToGSL :: String -- ^ grammar name - -> String -- ^ start category - -> [CFRule_] -> GSLGrammar -cfgToGSL name start = - GSLGrammar name start . map cfgRulesToGSLRule . sortAndGroupBy ruleCat - where - ruleCat (Rule c _ _) = c - ruleRhs (Rule _ r _) = r - cfgRulesToGSLRule rs@(r:_) = GSLRule (ruleCat r) (map ruleRhs rs) - -mkCatNames :: GSLGrammar -> CatNames -mkCatNames (GSLGrammar name start rules) = listToFM (zip lhsCats names) - where names = [name ++ "_" ++ show x | x <- [0..]] - lhsCats = [ c | GSLRule c _ <- rules] - -prGSLGrammar :: CatNames -> GSLGrammar -> ShowS -prGSLGrammar names (GSLGrammar name start g) = - header . mainCat . unlinesS (map prGSLrule g) +prGSL :: SRG -> ShowS +prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) + = header . mainCat . unlinesS (map prRule rs) where header = showString ";GSL2.0" . nl . comments ["Nuance speech recognition grammar for " ++ name, "Generated by GF"] . nl . nl - mainCat = showString ("; Start category: " ++ start) . nl - . showString ".MAIN " . prGSLCat start . nl . nl - prGSLrule (GSLRule cat rhs) = - showString "; " . prtS cat . nl - . prGSLCat cat . sp . wrap "[" (unwordsS (map prGSLAlt rhs)) "]" . nl - prGSLAlt rhs = wrap "(" (unwordsS (map prGSLSymbol rhs')) ")" + mainCat = showString ("; Start category: " ++ origStart) . nl + . showString ".MAIN " . prCat start . nl . nl + prRule (SRGRule cat origCat rhs) = + showString "; " . prtS origCat . nl + . prCat cat . sp . wrap "[" (unwordsS (map prAlt rhs)) "]" . nl + prAlt rhs = wrap "(" (unwordsS (map prSymbol rhs')) ")" where rhs' = rmPunct rhs - prGSLSymbol (Cat c) = prGSLCat c - prGSLSymbol (Tok t) = wrap "\"" (prtS t) "\"" - prGSLCat c = showString n - where n = case lookupFM names c of - Nothing -> error $ "Unknown category: " ++ c - Just x -> x + prSymbol (Cat c) = prCat c + prSymbol (Tok t) = wrap "\"" (prtS t) "\"" + prCat c = showString c rmPunct :: [Symbol String Token] -> [Symbol String Token] rmPunct [] = [] @@ -112,37 +61,3 @@ isPunct c = c `elem` "-_.;.,?!" comments :: [String] -> ShowS comments = unlinesS . map (showString . ("; " ++)) - --- --- * Utils --- - -nl :: ShowS -nl = showChar '\n' - -sp :: ShowS -sp = showChar ' ' - -wrap :: String -> ShowS -> String -> ShowS -wrap o s c = showString o . s . showString c - -concatS :: [ShowS] -> ShowS -concatS = foldr (.) id - -unwordsS :: [ShowS] -> ShowS -unwordsS = concatS . intersperse sp - -unlinesS :: [ShowS] -> ShowS -unlinesS = concatS . intersperse nl - -sortAndGroupBy :: Ord b => - (a -> b) -- ^ Gets the value to sort and group by - -> [a] - -> [[a]] -sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f) - -both :: (b -> b -> c) -> (a -> b) -> a -> a -> c -both f g x y = f (g x) (g y) - -prtS :: Print a => a -> ShowS -prtS = showString . prt \ No newline at end of file diff --git a/src/GF/Speech/PrJSGF.hs b/src/GF/Speech/PrJSGF.hs new file mode 100644 index 000000000..4c849e8c8 --- /dev/null +++ b/src/GF/Speech/PrJSGF.hs @@ -0,0 +1,67 @@ +{- + ************************************************************** + GF Module + + Description : This module prints a CFG as a JSGF grammar. + + Author : Björn Bringert (bringert@cs.chalmers.se) + + License : GPL (GNU General Public License) + + Created : October 1, 2004 + + Modified : + ************************************************************** +-} + +-- FIXME: remove / warn / fail if there are int / string literal +-- categories in the grammar + +-- FIXME: convert to UTF-8 + +module PrJSGF (jsgfPrinter) where + +import SRG +import Ident +import CFGrammar +import Parser (Symbol(..)) +import GrammarTypes +import PrintParser +import Option + +jsgfPrinter :: Ident -- ^ Grammar name + -> Options -> CFGrammar -> String +jsgfPrinter name opts cfg = prJSGF srg "" + where srg = makeSRG name opts cfg + +prJSGF :: SRG -> ShowS +prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) + = header . mainCat . unlinesS (map prRule rs) + where + header = showString "#JSGF V1.0 UTF-8;" . nl + . comments ["JSGF speech recognition grammar for " ++ name, + "Generated by GF"] . nl + . showString ("grammar " ++ name ++ ";") . nl + . nl + mainCat = comments ["Start category: " ++ origStart] . nl + . showString "public
= " . prCat start . showChar ';' . nl . nl + prRule (SRGRule cat origCat rhs) = + comments [origCat] . nl + . prCat cat . showString " = " . join " | " (map prAlt rhs) . nl + prAlt rhs | null rhs' = showString "" + | otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")" + where rhs' = rmPunct rhs + prSymbol (Cat c) = prCat c + prSymbol (Tok t) = wrap "\"" (prtS t) "\"" + prCat c = showChar '<' . showString c . showChar '>' + +rmPunct :: [Symbol String Token] -> [Symbol String Token] +rmPunct [] = [] +rmPunct (Tok t:ss) | all isPunct (prt t) = rmPunct ss +rmPunct (s:ss) = s : rmPunct ss + +isPunct :: Char -> Bool +isPunct c = c `elem` "-_.;.,?!" + +comments :: [String] -> ShowS +comments = unlinesS . map (showString . ("// " ++)) diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs new file mode 100644 index 000000000..0fc7349f3 --- /dev/null +++ b/src/GF/Speech/SRG.hs @@ -0,0 +1,125 @@ +{- + ************************************************************** + GF Module + + Description : Representation of, conversion to, and + utilities for printing of a + general Speech Recognition Grammar. + + Author : Björn Bringert (bringert@cs.chalmers.se) + + License : GPL (GNU General Public License) + + Created : October 1, 2004 + + Modified : + ************************************************************** +-} + +-- FIXME: remove / warn / fail if there are int / string literal +-- categories in the grammar + +-- FIXME: figure out name prefix from grammar name + +module SRG where + +import Ident +import CFGrammar +import Parser (Symbol(..)) +import GrammarTypes +import PrintParser +import TransformCFG +import Option + +import Data.List +import Data.Maybe (fromMaybe) +import Data.FiniteMap + + +data SRG = SRG { grammarName :: String -- ^ grammar name + , startCat :: String -- ^ start category name + , origStartCat :: String -- ^ original start category name + , rules :: [SRGRule] + } +data SRGRule = SRGRule String String [SRGAlt] -- ^ SRG category name, original category name + -- and productions +type SRGAlt = [Symbol String Token] +type CatName = (String,String) -- ^ SRG category name and original name + +type CatNames = FiniteMap String String + +makeSRG :: Ident -- ^ Grammar name + -> Options -- ^ Grammar options + -> CFGrammar -- ^ A context-free grammar + -> SRG +makeSRG i opts gr = SRG { grammarName = name, + startCat = start, + origStartCat = origStart, + rules = rs } + where + name = prIdent i + origStart = fromMaybe "S" (getOptVal opts gStartCat) ++ "{}.s" + start = lookupFM_ names origStart + gr' = makeNice gr + names = mkCatNames name (nub $ map ruleCat gr') + rs = map (cfgRulesToSRGRule names) (sortAndGroupBy ruleCat gr') + +cfgRulesToSRGRule :: FiniteMap String String -> [CFRule_] -> SRGRule +cfgRulesToSRGRule names rs@(r:_) = SRGRule cat origCat rhs + where origCat = ruleCat r + cat = lookupFM_ names origCat + rhs = nub $ map (map renameCat . ruleRhs) rs + renameCat (Cat c) = Cat (lookupFM_ names c) + renameCat t = t + +ruleCat :: Rule n c t -> c +ruleCat (Rule c _ _) = c + +ruleRhs :: Rule n c t -> [Symbol c t] +ruleRhs (Rule _ r _) = r + +mkCatNames :: String -- ^ Category name prefix + -> [String] -- ^ Original category names + -> FiniteMap String String -- ^ Maps original names to SRG names +mkCatNames prefix origNames = listToFM (zip origNames names) + where names = [prefix ++ "_" ++ show x | x <- [0..]] + +-- +-- * Utilities for building and printing SRGs +-- + +nl :: ShowS +nl = showChar '\n' + +sp :: ShowS +sp = showChar ' ' + +wrap :: String -> ShowS -> String -> ShowS +wrap o s c = showString o . s . showString c + +concatS :: [ShowS] -> ShowS +concatS = foldr (.) id + +unwordsS :: [ShowS] -> ShowS +unwordsS = join " " + +unlinesS :: [ShowS] -> ShowS +unlinesS = join "\n" + +join :: String -> [ShowS] -> ShowS +join glue = concatS . intersperse (showString glue) + +sortAndGroupBy :: Ord b => + (a -> b) -- ^ Gets the value to sort and group by + -> [a] + -> [[a]] +sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f) + +both :: (b -> b -> c) -> (a -> b) -> a -> a -> c +both f g x y = f (g x) (g y) + +prtS :: Print a => a -> ShowS +prtS = showString . prt + +lookupFM_ :: (Ord key, Show key) => FiniteMap key elt -> key -> elt +lookupFM_ fm k = lookupWithDefaultFM fm (error $ "Key not found: " ++ show k) k diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index 9119b8f36..dfffd2b2a 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -28,6 +28,7 @@ import PrOld import MkGFC import CFtoSRG import PrGSL (gslPrinter) +import PrJSGF (jsgfPrinter) import Zipper @@ -194,6 +195,9 @@ customGrammarPrinter = ,(strCI "gsl", \s -> let opts = stateOptions s name = cncId s in gslPrinter name opts $ Cnv.cfg $ statePInfo s) + ,(strCI "jsgf", \s -> let opts = stateOptions s + name = cncId s + in jsgfPrinter name opts $ Cnv.cfg $ statePInfo s) ,(strCI "plbnf", prLBNF True) ,(strCI "lbnf", prLBNF False) ,(strCI "bnf", prBNF False)