Files
gf-core/src-3.0/GF/Speech/GSL.hs
2008-06-17 12:06:27 +00:00

95 lines
2.4 KiB
Haskell

----------------------------------------------------------------------
-- |
-- Module : GF.Speech.GSL
--
-- This module prints a CFG as a Nuance GSL 2.0 grammar.
--
-----------------------------------------------------------------------------
module GF.Speech.GSL (gslPrinter) where
import GF.Data.Utilities
import GF.Speech.CFG
import GF.Speech.SRG
import GF.Speech.RegExp
import GF.Infra.Ident
import PGF.CId
import PGF.Data
import Data.Char (toUpper,toLower)
import Data.List (partition)
import Text.PrettyPrint.HughesPJ
width :: Int
width = 75
gslPrinter :: PGF -> CId -> String
gslPrinter pgf cnc = renderStyle st $ prGSL $ makeSimpleSRG pgf cnc
where st = style { lineLength = width }
prGSL :: SRG -> Doc
prGSL srg = header $++$ mainCat $++$ foldr ($++$) empty (map prRule (srgRules srg))
where
header = text ";GSL2.0" $$
comment ("Nuance speech recognition grammar for " ++ srgName srg) $$
comment ("Generated by GF")
mainCat = text ".MAIN" <+> prCat (srgStartCat srg)
prRule (SRGRule cat rhs) = prCat cat <+> union (map prAlt rhs)
-- FIXME: use the probability
prAlt (SRGAlt mp _ rhs) = prItem rhs
prItem :: SRGItem -> Doc
prItem = f
where
f (REUnion xs) = (if null es then empty else text "?") <> union (map f nes)
where (es,nes) = partition isEpsilon xs
f (REConcat [x]) = f x
f (REConcat xs) = text "(" <> fsep (map f xs) <> text ")"
f (RERepeat x) = text "*" <> f x
f (RESymbol s) = prSymbol s
union :: [Doc] -> Doc
union [x] = x
union xs = text "[" <> fsep xs <> text "]"
prSymbol :: Symbol SRGNT Token -> Doc
prSymbol = symbol (prCat . fst) (doubleQuotes . showToken)
-- GSL requires an upper case letter in category names
prCat :: Cat -> Doc
prCat = text . firstToUpper
firstToUpper :: String -> String
firstToUpper [] = []
firstToUpper (x:xs) = toUpper x : xs
{-
rmPunctCFG :: CGrammar -> CGrammar
rmPunctCFG g = [CFRule c (filter keepSymbol ss) n | CFRule c ss n <- g]
keepSymbol :: Symbol c Token -> Bool
keepSymbol (Tok t) = not (all isPunct (prt t))
keepSymbol _ = True
-}
-- Nuance does not like upper case characters in tokens
showToken :: Token -> Doc
showToken = text . map toLower
isPunct :: Char -> Bool
isPunct c = c `elem` "-_.:;.,?!()[]{}"
comment :: String -> Doc
comment s = text ";" <+> text s
-- Pretty-printing utilities
emptyLine :: Doc
emptyLine = text ""
($++$) :: Doc -> Doc -> Doc
x $++$ y = x $$ emptyLine $$ y