mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
Generalized Speech Recognition Grammar generation. Added JSGF grammar printer.
This commit is contained in:
@@ -11,96 +11,45 @@
|
||||
|
||||
Created : September 13, 2004
|
||||
|
||||
Modified :
|
||||
Modified : October 1, 2004
|
||||
**************************************************************
|
||||
-}
|
||||
|
||||
-- FIXME: this modules should not be in cfgm, but where?
|
||||
|
||||
-- FIXME: remove left-recursion
|
||||
|
||||
-- FIXME: remove empty rules
|
||||
|
||||
-- FIXME: remove categories with no RHS
|
||||
|
||||
-- FIXME: remove / warn / fail if there are int / string literal
|
||||
-- categories in the grammar
|
||||
|
||||
-- FIXME: figure out name prefix from grammar name
|
||||
|
||||
module PrGSL (gslPrinter) where
|
||||
|
||||
import SRG
|
||||
import Ident
|
||||
import CFGrammar
|
||||
import Parser (Symbol(..))
|
||||
import GrammarTypes
|
||||
import PrintParser
|
||||
import TransformCFG
|
||||
import Option
|
||||
|
||||
import Data.List
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.FiniteMap
|
||||
|
||||
|
||||
data GSLGrammar = GSLGrammar String -- ^ grammar name
|
||||
String -- ^ start category name
|
||||
[GSLRule]
|
||||
data GSLRule = GSLRule String [GSLAlt]
|
||||
type GSLAlt = [Symbol String Token]
|
||||
|
||||
type CatNames = FiniteMap String String
|
||||
|
||||
gslPrinter :: Ident -- ^ Grammar name
|
||||
-> Options -> CFGrammar -> String
|
||||
gslPrinter name opts = prGSL (prIdent name) start
|
||||
where mstart = getOptVal opts gStartCat
|
||||
start = fromMaybe "S" mstart ++ "{}.s"
|
||||
gslPrinter name opts cfg = prGSL srg ""
|
||||
where srg = makeSRG name opts cfg
|
||||
|
||||
prGSL :: String -- ^ Grammar name
|
||||
-> String -- ^ startcat
|
||||
-> CFGrammar -> String
|
||||
prGSL name start cfg = prGSLGrammar names gsl ""
|
||||
where
|
||||
cfg' = makeNice cfg
|
||||
gsl = cfgToGSL name start cfg'
|
||||
names = mkCatNames gsl
|
||||
|
||||
cfgToGSL :: String -- ^ grammar name
|
||||
-> String -- ^ start category
|
||||
-> [CFRule_] -> GSLGrammar
|
||||
cfgToGSL name start =
|
||||
GSLGrammar name start . map cfgRulesToGSLRule . sortAndGroupBy ruleCat
|
||||
where
|
||||
ruleCat (Rule c _ _) = c
|
||||
ruleRhs (Rule _ r _) = r
|
||||
cfgRulesToGSLRule rs@(r:_) = GSLRule (ruleCat r) (map ruleRhs rs)
|
||||
|
||||
mkCatNames :: GSLGrammar -> CatNames
|
||||
mkCatNames (GSLGrammar name start rules) = listToFM (zip lhsCats names)
|
||||
where names = [name ++ "_" ++ show x | x <- [0..]]
|
||||
lhsCats = [ c | GSLRule c _ <- rules]
|
||||
|
||||
prGSLGrammar :: CatNames -> GSLGrammar -> ShowS
|
||||
prGSLGrammar names (GSLGrammar name start g) =
|
||||
header . mainCat . unlinesS (map prGSLrule g)
|
||||
prGSL :: SRG -> ShowS
|
||||
prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||
= header . mainCat . unlinesS (map prRule rs)
|
||||
where
|
||||
header = showString ";GSL2.0" . nl
|
||||
. comments ["Nuance speech recognition grammar for " ++ name,
|
||||
"Generated by GF"] . nl . nl
|
||||
mainCat = showString ("; Start category: " ++ start) . nl
|
||||
. showString ".MAIN " . prGSLCat start . nl . nl
|
||||
prGSLrule (GSLRule cat rhs) =
|
||||
showString "; " . prtS cat . nl
|
||||
. prGSLCat cat . sp . wrap "[" (unwordsS (map prGSLAlt rhs)) "]" . nl
|
||||
prGSLAlt rhs = wrap "(" (unwordsS (map prGSLSymbol rhs')) ")"
|
||||
mainCat = showString ("; Start category: " ++ origStart) . nl
|
||||
. showString ".MAIN " . prCat start . nl . nl
|
||||
prRule (SRGRule cat origCat rhs) =
|
||||
showString "; " . prtS origCat . nl
|
||||
. prCat cat . sp . wrap "[" (unwordsS (map prAlt rhs)) "]" . nl
|
||||
prAlt rhs = wrap "(" (unwordsS (map prSymbol rhs')) ")"
|
||||
where rhs' = rmPunct rhs
|
||||
prGSLSymbol (Cat c) = prGSLCat c
|
||||
prGSLSymbol (Tok t) = wrap "\"" (prtS t) "\""
|
||||
prGSLCat c = showString n
|
||||
where n = case lookupFM names c of
|
||||
Nothing -> error $ "Unknown category: " ++ c
|
||||
Just x -> x
|
||||
prSymbol (Cat c) = prCat c
|
||||
prSymbol (Tok t) = wrap "\"" (prtS t) "\""
|
||||
prCat c = showString c
|
||||
|
||||
rmPunct :: [Symbol String Token] -> [Symbol String Token]
|
||||
rmPunct [] = []
|
||||
@@ -112,37 +61,3 @@ isPunct c = c `elem` "-_.;.,?!"
|
||||
|
||||
comments :: [String] -> ShowS
|
||||
comments = unlinesS . map (showString . ("; " ++))
|
||||
|
||||
--
|
||||
-- * Utils
|
||||
--
|
||||
|
||||
nl :: ShowS
|
||||
nl = showChar '\n'
|
||||
|
||||
sp :: ShowS
|
||||
sp = showChar ' '
|
||||
|
||||
wrap :: String -> ShowS -> String -> ShowS
|
||||
wrap o s c = showString o . s . showString c
|
||||
|
||||
concatS :: [ShowS] -> ShowS
|
||||
concatS = foldr (.) id
|
||||
|
||||
unwordsS :: [ShowS] -> ShowS
|
||||
unwordsS = concatS . intersperse sp
|
||||
|
||||
unlinesS :: [ShowS] -> ShowS
|
||||
unlinesS = concatS . intersperse nl
|
||||
|
||||
sortAndGroupBy :: Ord b =>
|
||||
(a -> b) -- ^ Gets the value to sort and group by
|
||||
-> [a]
|
||||
-> [[a]]
|
||||
sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f)
|
||||
|
||||
both :: (b -> b -> c) -> (a -> b) -> a -> a -> c
|
||||
both f g x y = f (g x) (g y)
|
||||
|
||||
prtS :: Print a => a -> ShowS
|
||||
prtS = showString . prt
|
||||
67
src/GF/Speech/PrJSGF.hs
Normal file
67
src/GF/Speech/PrJSGF.hs
Normal file
@@ -0,0 +1,67 @@
|
||||
{-
|
||||
**************************************************************
|
||||
GF Module
|
||||
|
||||
Description : This module prints a CFG as a JSGF grammar.
|
||||
|
||||
Author : Björn Bringert (bringert@cs.chalmers.se)
|
||||
|
||||
License : GPL (GNU General Public License)
|
||||
|
||||
Created : October 1, 2004
|
||||
|
||||
Modified :
|
||||
**************************************************************
|
||||
-}
|
||||
|
||||
-- FIXME: remove / warn / fail if there are int / string literal
|
||||
-- categories in the grammar
|
||||
|
||||
-- FIXME: convert to UTF-8
|
||||
|
||||
module PrJSGF (jsgfPrinter) where
|
||||
|
||||
import SRG
|
||||
import Ident
|
||||
import CFGrammar
|
||||
import Parser (Symbol(..))
|
||||
import GrammarTypes
|
||||
import PrintParser
|
||||
import Option
|
||||
|
||||
jsgfPrinter :: Ident -- ^ Grammar name
|
||||
-> Options -> CFGrammar -> String
|
||||
jsgfPrinter name opts cfg = prJSGF srg ""
|
||||
where srg = makeSRG name opts cfg
|
||||
|
||||
prJSGF :: SRG -> ShowS
|
||||
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||
= header . mainCat . unlinesS (map prRule rs)
|
||||
where
|
||||
header = showString "#JSGF V1.0 UTF-8;" . nl
|
||||
. comments ["JSGF speech recognition grammar for " ++ name,
|
||||
"Generated by GF"] . nl
|
||||
. showString ("grammar " ++ name ++ ";") . nl
|
||||
. nl
|
||||
mainCat = comments ["Start category: " ++ origStart] . nl
|
||||
. showString "public <MAIN> = " . prCat start . showChar ';' . nl . nl
|
||||
prRule (SRGRule cat origCat rhs) =
|
||||
comments [origCat] . nl
|
||||
. prCat cat . showString " = " . join " | " (map prAlt rhs) . nl
|
||||
prAlt rhs | null rhs' = showString "<NULL>"
|
||||
| otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")"
|
||||
where rhs' = rmPunct rhs
|
||||
prSymbol (Cat c) = prCat c
|
||||
prSymbol (Tok t) = wrap "\"" (prtS t) "\""
|
||||
prCat c = showChar '<' . showString c . showChar '>'
|
||||
|
||||
rmPunct :: [Symbol String Token] -> [Symbol String Token]
|
||||
rmPunct [] = []
|
||||
rmPunct (Tok t:ss) | all isPunct (prt t) = rmPunct ss
|
||||
rmPunct (s:ss) = s : rmPunct ss
|
||||
|
||||
isPunct :: Char -> Bool
|
||||
isPunct c = c `elem` "-_.;.,?!"
|
||||
|
||||
comments :: [String] -> ShowS
|
||||
comments = unlinesS . map (showString . ("// " ++))
|
||||
125
src/GF/Speech/SRG.hs
Normal file
125
src/GF/Speech/SRG.hs
Normal file
@@ -0,0 +1,125 @@
|
||||
{-
|
||||
**************************************************************
|
||||
GF Module
|
||||
|
||||
Description : Representation of, conversion to, and
|
||||
utilities for printing of a
|
||||
general Speech Recognition Grammar.
|
||||
|
||||
Author : Björn Bringert (bringert@cs.chalmers.se)
|
||||
|
||||
License : GPL (GNU General Public License)
|
||||
|
||||
Created : October 1, 2004
|
||||
|
||||
Modified :
|
||||
**************************************************************
|
||||
-}
|
||||
|
||||
-- FIXME: remove / warn / fail if there are int / string literal
|
||||
-- categories in the grammar
|
||||
|
||||
-- FIXME: figure out name prefix from grammar name
|
||||
|
||||
module SRG where
|
||||
|
||||
import Ident
|
||||
import CFGrammar
|
||||
import Parser (Symbol(..))
|
||||
import GrammarTypes
|
||||
import PrintParser
|
||||
import TransformCFG
|
||||
import Option
|
||||
|
||||
import Data.List
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.FiniteMap
|
||||
|
||||
|
||||
data SRG = SRG { grammarName :: String -- ^ grammar name
|
||||
, startCat :: String -- ^ start category name
|
||||
, origStartCat :: String -- ^ original start category name
|
||||
, rules :: [SRGRule]
|
||||
}
|
||||
data SRGRule = SRGRule String String [SRGAlt] -- ^ SRG category name, original category name
|
||||
-- and productions
|
||||
type SRGAlt = [Symbol String Token]
|
||||
type CatName = (String,String) -- ^ SRG category name and original name
|
||||
|
||||
type CatNames = FiniteMap String String
|
||||
|
||||
makeSRG :: Ident -- ^ Grammar name
|
||||
-> Options -- ^ Grammar options
|
||||
-> CFGrammar -- ^ A context-free grammar
|
||||
-> SRG
|
||||
makeSRG i opts gr = SRG { grammarName = name,
|
||||
startCat = start,
|
||||
origStartCat = origStart,
|
||||
rules = rs }
|
||||
where
|
||||
name = prIdent i
|
||||
origStart = fromMaybe "S" (getOptVal opts gStartCat) ++ "{}.s"
|
||||
start = lookupFM_ names origStart
|
||||
gr' = makeNice gr
|
||||
names = mkCatNames name (nub $ map ruleCat gr')
|
||||
rs = map (cfgRulesToSRGRule names) (sortAndGroupBy ruleCat gr')
|
||||
|
||||
cfgRulesToSRGRule :: FiniteMap String String -> [CFRule_] -> SRGRule
|
||||
cfgRulesToSRGRule names rs@(r:_) = SRGRule cat origCat rhs
|
||||
where origCat = ruleCat r
|
||||
cat = lookupFM_ names origCat
|
||||
rhs = nub $ map (map renameCat . ruleRhs) rs
|
||||
renameCat (Cat c) = Cat (lookupFM_ names c)
|
||||
renameCat t = t
|
||||
|
||||
ruleCat :: Rule n c t -> c
|
||||
ruleCat (Rule c _ _) = c
|
||||
|
||||
ruleRhs :: Rule n c t -> [Symbol c t]
|
||||
ruleRhs (Rule _ r _) = r
|
||||
|
||||
mkCatNames :: String -- ^ Category name prefix
|
||||
-> [String] -- ^ Original category names
|
||||
-> FiniteMap String String -- ^ Maps original names to SRG names
|
||||
mkCatNames prefix origNames = listToFM (zip origNames names)
|
||||
where names = [prefix ++ "_" ++ show x | x <- [0..]]
|
||||
|
||||
--
|
||||
-- * Utilities for building and printing SRGs
|
||||
--
|
||||
|
||||
nl :: ShowS
|
||||
nl = showChar '\n'
|
||||
|
||||
sp :: ShowS
|
||||
sp = showChar ' '
|
||||
|
||||
wrap :: String -> ShowS -> String -> ShowS
|
||||
wrap o s c = showString o . s . showString c
|
||||
|
||||
concatS :: [ShowS] -> ShowS
|
||||
concatS = foldr (.) id
|
||||
|
||||
unwordsS :: [ShowS] -> ShowS
|
||||
unwordsS = join " "
|
||||
|
||||
unlinesS :: [ShowS] -> ShowS
|
||||
unlinesS = join "\n"
|
||||
|
||||
join :: String -> [ShowS] -> ShowS
|
||||
join glue = concatS . intersperse (showString glue)
|
||||
|
||||
sortAndGroupBy :: Ord b =>
|
||||
(a -> b) -- ^ Gets the value to sort and group by
|
||||
-> [a]
|
||||
-> [[a]]
|
||||
sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f)
|
||||
|
||||
both :: (b -> b -> c) -> (a -> b) -> a -> a -> c
|
||||
both f g x y = f (g x) (g y)
|
||||
|
||||
prtS :: Print a => a -> ShowS
|
||||
prtS = showString . prt
|
||||
|
||||
lookupFM_ :: (Ord key, Show key) => FiniteMap key elt -> key -> elt
|
||||
lookupFM_ fm k = lookupWithDefaultFM fm (error $ "Key not found: " ++ show k) k
|
||||
@@ -28,6 +28,7 @@ import PrOld
|
||||
import MkGFC
|
||||
import CFtoSRG
|
||||
import PrGSL (gslPrinter)
|
||||
import PrJSGF (jsgfPrinter)
|
||||
|
||||
import Zipper
|
||||
|
||||
@@ -194,6 +195,9 @@ customGrammarPrinter =
|
||||
,(strCI "gsl", \s -> let opts = stateOptions s
|
||||
name = cncId s
|
||||
in gslPrinter name opts $ Cnv.cfg $ statePInfo s)
|
||||
,(strCI "jsgf", \s -> let opts = stateOptions s
|
||||
name = cncId s
|
||||
in jsgfPrinter name opts $ Cnv.cfg $ statePInfo s)
|
||||
,(strCI "plbnf", prLBNF True)
|
||||
,(strCI "lbnf", prLBNF False)
|
||||
,(strCI "bnf", prBNF False)
|
||||
|
||||
Reference in New Issue
Block a user