mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 19:42:50 -06:00
Generalized Speech Recognition Grammar generation. Added JSGF grammar printer.
This commit is contained in:
@@ -11,96 +11,45 @@
|
|||||||
|
|
||||||
Created : September 13, 2004
|
Created : September 13, 2004
|
||||||
|
|
||||||
Modified :
|
Modified : October 1, 2004
|
||||||
**************************************************************
|
**************************************************************
|
||||||
-}
|
-}
|
||||||
|
|
||||||
-- FIXME: this modules should not be in cfgm, but where?
|
|
||||||
|
|
||||||
-- FIXME: remove left-recursion
|
|
||||||
|
|
||||||
-- FIXME: remove empty rules
|
|
||||||
|
|
||||||
-- FIXME: remove categories with no RHS
|
|
||||||
|
|
||||||
-- FIXME: remove / warn / fail if there are int / string literal
|
-- FIXME: remove / warn / fail if there are int / string literal
|
||||||
-- categories in the grammar
|
-- categories in the grammar
|
||||||
|
|
||||||
-- FIXME: figure out name prefix from grammar name
|
|
||||||
|
|
||||||
module PrGSL (gslPrinter) where
|
module PrGSL (gslPrinter) where
|
||||||
|
|
||||||
|
import SRG
|
||||||
import Ident
|
import Ident
|
||||||
import CFGrammar
|
import CFGrammar
|
||||||
import Parser (Symbol(..))
|
import Parser (Symbol(..))
|
||||||
import GrammarTypes
|
import GrammarTypes
|
||||||
import PrintParser
|
import PrintParser
|
||||||
import TransformCFG
|
|
||||||
import Option
|
import Option
|
||||||
|
|
||||||
import Data.List
|
|
||||||
import Data.Maybe (fromMaybe)
|
|
||||||
import Data.FiniteMap
|
|
||||||
|
|
||||||
|
|
||||||
data GSLGrammar = GSLGrammar String -- ^ grammar name
|
|
||||||
String -- ^ start category name
|
|
||||||
[GSLRule]
|
|
||||||
data GSLRule = GSLRule String [GSLAlt]
|
|
||||||
type GSLAlt = [Symbol String Token]
|
|
||||||
|
|
||||||
type CatNames = FiniteMap String String
|
|
||||||
|
|
||||||
gslPrinter :: Ident -- ^ Grammar name
|
gslPrinter :: Ident -- ^ Grammar name
|
||||||
-> Options -> CFGrammar -> String
|
-> Options -> CFGrammar -> String
|
||||||
gslPrinter name opts = prGSL (prIdent name) start
|
gslPrinter name opts cfg = prGSL srg ""
|
||||||
where mstart = getOptVal opts gStartCat
|
where srg = makeSRG name opts cfg
|
||||||
start = fromMaybe "S" mstart ++ "{}.s"
|
|
||||||
|
|
||||||
prGSL :: String -- ^ Grammar name
|
prGSL :: SRG -> ShowS
|
||||||
-> String -- ^ startcat
|
prGSL (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||||
-> CFGrammar -> String
|
= header . mainCat . unlinesS (map prRule rs)
|
||||||
prGSL name start cfg = prGSLGrammar names gsl ""
|
|
||||||
where
|
|
||||||
cfg' = makeNice cfg
|
|
||||||
gsl = cfgToGSL name start cfg'
|
|
||||||
names = mkCatNames gsl
|
|
||||||
|
|
||||||
cfgToGSL :: String -- ^ grammar name
|
|
||||||
-> String -- ^ start category
|
|
||||||
-> [CFRule_] -> GSLGrammar
|
|
||||||
cfgToGSL name start =
|
|
||||||
GSLGrammar name start . map cfgRulesToGSLRule . sortAndGroupBy ruleCat
|
|
||||||
where
|
|
||||||
ruleCat (Rule c _ _) = c
|
|
||||||
ruleRhs (Rule _ r _) = r
|
|
||||||
cfgRulesToGSLRule rs@(r:_) = GSLRule (ruleCat r) (map ruleRhs rs)
|
|
||||||
|
|
||||||
mkCatNames :: GSLGrammar -> CatNames
|
|
||||||
mkCatNames (GSLGrammar name start rules) = listToFM (zip lhsCats names)
|
|
||||||
where names = [name ++ "_" ++ show x | x <- [0..]]
|
|
||||||
lhsCats = [ c | GSLRule c _ <- rules]
|
|
||||||
|
|
||||||
prGSLGrammar :: CatNames -> GSLGrammar -> ShowS
|
|
||||||
prGSLGrammar names (GSLGrammar name start g) =
|
|
||||||
header . mainCat . unlinesS (map prGSLrule g)
|
|
||||||
where
|
where
|
||||||
header = showString ";GSL2.0" . nl
|
header = showString ";GSL2.0" . nl
|
||||||
. comments ["Nuance speech recognition grammar for " ++ name,
|
. comments ["Nuance speech recognition grammar for " ++ name,
|
||||||
"Generated by GF"] . nl . nl
|
"Generated by GF"] . nl . nl
|
||||||
mainCat = showString ("; Start category: " ++ start) . nl
|
mainCat = showString ("; Start category: " ++ origStart) . nl
|
||||||
. showString ".MAIN " . prGSLCat start . nl . nl
|
. showString ".MAIN " . prCat start . nl . nl
|
||||||
prGSLrule (GSLRule cat rhs) =
|
prRule (SRGRule cat origCat rhs) =
|
||||||
showString "; " . prtS cat . nl
|
showString "; " . prtS origCat . nl
|
||||||
. prGSLCat cat . sp . wrap "[" (unwordsS (map prGSLAlt rhs)) "]" . nl
|
. prCat cat . sp . wrap "[" (unwordsS (map prAlt rhs)) "]" . nl
|
||||||
prGSLAlt rhs = wrap "(" (unwordsS (map prGSLSymbol rhs')) ")"
|
prAlt rhs = wrap "(" (unwordsS (map prSymbol rhs')) ")"
|
||||||
where rhs' = rmPunct rhs
|
where rhs' = rmPunct rhs
|
||||||
prGSLSymbol (Cat c) = prGSLCat c
|
prSymbol (Cat c) = prCat c
|
||||||
prGSLSymbol (Tok t) = wrap "\"" (prtS t) "\""
|
prSymbol (Tok t) = wrap "\"" (prtS t) "\""
|
||||||
prGSLCat c = showString n
|
prCat c = showString c
|
||||||
where n = case lookupFM names c of
|
|
||||||
Nothing -> error $ "Unknown category: " ++ c
|
|
||||||
Just x -> x
|
|
||||||
|
|
||||||
rmPunct :: [Symbol String Token] -> [Symbol String Token]
|
rmPunct :: [Symbol String Token] -> [Symbol String Token]
|
||||||
rmPunct [] = []
|
rmPunct [] = []
|
||||||
@@ -112,37 +61,3 @@ isPunct c = c `elem` "-_.;.,?!"
|
|||||||
|
|
||||||
comments :: [String] -> ShowS
|
comments :: [String] -> ShowS
|
||||||
comments = unlinesS . map (showString . ("; " ++))
|
comments = unlinesS . map (showString . ("; " ++))
|
||||||
|
|
||||||
--
|
|
||||||
-- * Utils
|
|
||||||
--
|
|
||||||
|
|
||||||
nl :: ShowS
|
|
||||||
nl = showChar '\n'
|
|
||||||
|
|
||||||
sp :: ShowS
|
|
||||||
sp = showChar ' '
|
|
||||||
|
|
||||||
wrap :: String -> ShowS -> String -> ShowS
|
|
||||||
wrap o s c = showString o . s . showString c
|
|
||||||
|
|
||||||
concatS :: [ShowS] -> ShowS
|
|
||||||
concatS = foldr (.) id
|
|
||||||
|
|
||||||
unwordsS :: [ShowS] -> ShowS
|
|
||||||
unwordsS = concatS . intersperse sp
|
|
||||||
|
|
||||||
unlinesS :: [ShowS] -> ShowS
|
|
||||||
unlinesS = concatS . intersperse nl
|
|
||||||
|
|
||||||
sortAndGroupBy :: Ord b =>
|
|
||||||
(a -> b) -- ^ Gets the value to sort and group by
|
|
||||||
-> [a]
|
|
||||||
-> [[a]]
|
|
||||||
sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f)
|
|
||||||
|
|
||||||
both :: (b -> b -> c) -> (a -> b) -> a -> a -> c
|
|
||||||
both f g x y = f (g x) (g y)
|
|
||||||
|
|
||||||
prtS :: Print a => a -> ShowS
|
|
||||||
prtS = showString . prt
|
|
||||||
67
src/GF/Speech/PrJSGF.hs
Normal file
67
src/GF/Speech/PrJSGF.hs
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
{-
|
||||||
|
**************************************************************
|
||||||
|
GF Module
|
||||||
|
|
||||||
|
Description : This module prints a CFG as a JSGF grammar.
|
||||||
|
|
||||||
|
Author : Björn Bringert (bringert@cs.chalmers.se)
|
||||||
|
|
||||||
|
License : GPL (GNU General Public License)
|
||||||
|
|
||||||
|
Created : October 1, 2004
|
||||||
|
|
||||||
|
Modified :
|
||||||
|
**************************************************************
|
||||||
|
-}
|
||||||
|
|
||||||
|
-- FIXME: remove / warn / fail if there are int / string literal
|
||||||
|
-- categories in the grammar
|
||||||
|
|
||||||
|
-- FIXME: convert to UTF-8
|
||||||
|
|
||||||
|
module PrJSGF (jsgfPrinter) where
|
||||||
|
|
||||||
|
import SRG
|
||||||
|
import Ident
|
||||||
|
import CFGrammar
|
||||||
|
import Parser (Symbol(..))
|
||||||
|
import GrammarTypes
|
||||||
|
import PrintParser
|
||||||
|
import Option
|
||||||
|
|
||||||
|
jsgfPrinter :: Ident -- ^ Grammar name
|
||||||
|
-> Options -> CFGrammar -> String
|
||||||
|
jsgfPrinter name opts cfg = prJSGF srg ""
|
||||||
|
where srg = makeSRG name opts cfg
|
||||||
|
|
||||||
|
prJSGF :: SRG -> ShowS
|
||||||
|
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||||
|
= header . mainCat . unlinesS (map prRule rs)
|
||||||
|
where
|
||||||
|
header = showString "#JSGF V1.0 UTF-8;" . nl
|
||||||
|
. comments ["JSGF speech recognition grammar for " ++ name,
|
||||||
|
"Generated by GF"] . nl
|
||||||
|
. showString ("grammar " ++ name ++ ";") . nl
|
||||||
|
. nl
|
||||||
|
mainCat = comments ["Start category: " ++ origStart] . nl
|
||||||
|
. showString "public <MAIN> = " . prCat start . showChar ';' . nl . nl
|
||||||
|
prRule (SRGRule cat origCat rhs) =
|
||||||
|
comments [origCat] . nl
|
||||||
|
. prCat cat . showString " = " . join " | " (map prAlt rhs) . nl
|
||||||
|
prAlt rhs | null rhs' = showString "<NULL>"
|
||||||
|
| otherwise = wrap "(" (unwordsS (map prSymbol rhs')) ")"
|
||||||
|
where rhs' = rmPunct rhs
|
||||||
|
prSymbol (Cat c) = prCat c
|
||||||
|
prSymbol (Tok t) = wrap "\"" (prtS t) "\""
|
||||||
|
prCat c = showChar '<' . showString c . showChar '>'
|
||||||
|
|
||||||
|
rmPunct :: [Symbol String Token] -> [Symbol String Token]
|
||||||
|
rmPunct [] = []
|
||||||
|
rmPunct (Tok t:ss) | all isPunct (prt t) = rmPunct ss
|
||||||
|
rmPunct (s:ss) = s : rmPunct ss
|
||||||
|
|
||||||
|
isPunct :: Char -> Bool
|
||||||
|
isPunct c = c `elem` "-_.;.,?!"
|
||||||
|
|
||||||
|
comments :: [String] -> ShowS
|
||||||
|
comments = unlinesS . map (showString . ("// " ++))
|
||||||
125
src/GF/Speech/SRG.hs
Normal file
125
src/GF/Speech/SRG.hs
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
{-
|
||||||
|
**************************************************************
|
||||||
|
GF Module
|
||||||
|
|
||||||
|
Description : Representation of, conversion to, and
|
||||||
|
utilities for printing of a
|
||||||
|
general Speech Recognition Grammar.
|
||||||
|
|
||||||
|
Author : Björn Bringert (bringert@cs.chalmers.se)
|
||||||
|
|
||||||
|
License : GPL (GNU General Public License)
|
||||||
|
|
||||||
|
Created : October 1, 2004
|
||||||
|
|
||||||
|
Modified :
|
||||||
|
**************************************************************
|
||||||
|
-}
|
||||||
|
|
||||||
|
-- FIXME: remove / warn / fail if there are int / string literal
|
||||||
|
-- categories in the grammar
|
||||||
|
|
||||||
|
-- FIXME: figure out name prefix from grammar name
|
||||||
|
|
||||||
|
module SRG where
|
||||||
|
|
||||||
|
import Ident
|
||||||
|
import CFGrammar
|
||||||
|
import Parser (Symbol(..))
|
||||||
|
import GrammarTypes
|
||||||
|
import PrintParser
|
||||||
|
import TransformCFG
|
||||||
|
import Option
|
||||||
|
|
||||||
|
import Data.List
|
||||||
|
import Data.Maybe (fromMaybe)
|
||||||
|
import Data.FiniteMap
|
||||||
|
|
||||||
|
|
||||||
|
data SRG = SRG { grammarName :: String -- ^ grammar name
|
||||||
|
, startCat :: String -- ^ start category name
|
||||||
|
, origStartCat :: String -- ^ original start category name
|
||||||
|
, rules :: [SRGRule]
|
||||||
|
}
|
||||||
|
data SRGRule = SRGRule String String [SRGAlt] -- ^ SRG category name, original category name
|
||||||
|
-- and productions
|
||||||
|
type SRGAlt = [Symbol String Token]
|
||||||
|
type CatName = (String,String) -- ^ SRG category name and original name
|
||||||
|
|
||||||
|
type CatNames = FiniteMap String String
|
||||||
|
|
||||||
|
makeSRG :: Ident -- ^ Grammar name
|
||||||
|
-> Options -- ^ Grammar options
|
||||||
|
-> CFGrammar -- ^ A context-free grammar
|
||||||
|
-> SRG
|
||||||
|
makeSRG i opts gr = SRG { grammarName = name,
|
||||||
|
startCat = start,
|
||||||
|
origStartCat = origStart,
|
||||||
|
rules = rs }
|
||||||
|
where
|
||||||
|
name = prIdent i
|
||||||
|
origStart = fromMaybe "S" (getOptVal opts gStartCat) ++ "{}.s"
|
||||||
|
start = lookupFM_ names origStart
|
||||||
|
gr' = makeNice gr
|
||||||
|
names = mkCatNames name (nub $ map ruleCat gr')
|
||||||
|
rs = map (cfgRulesToSRGRule names) (sortAndGroupBy ruleCat gr')
|
||||||
|
|
||||||
|
cfgRulesToSRGRule :: FiniteMap String String -> [CFRule_] -> SRGRule
|
||||||
|
cfgRulesToSRGRule names rs@(r:_) = SRGRule cat origCat rhs
|
||||||
|
where origCat = ruleCat r
|
||||||
|
cat = lookupFM_ names origCat
|
||||||
|
rhs = nub $ map (map renameCat . ruleRhs) rs
|
||||||
|
renameCat (Cat c) = Cat (lookupFM_ names c)
|
||||||
|
renameCat t = t
|
||||||
|
|
||||||
|
ruleCat :: Rule n c t -> c
|
||||||
|
ruleCat (Rule c _ _) = c
|
||||||
|
|
||||||
|
ruleRhs :: Rule n c t -> [Symbol c t]
|
||||||
|
ruleRhs (Rule _ r _) = r
|
||||||
|
|
||||||
|
mkCatNames :: String -- ^ Category name prefix
|
||||||
|
-> [String] -- ^ Original category names
|
||||||
|
-> FiniteMap String String -- ^ Maps original names to SRG names
|
||||||
|
mkCatNames prefix origNames = listToFM (zip origNames names)
|
||||||
|
where names = [prefix ++ "_" ++ show x | x <- [0..]]
|
||||||
|
|
||||||
|
--
|
||||||
|
-- * Utilities for building and printing SRGs
|
||||||
|
--
|
||||||
|
|
||||||
|
nl :: ShowS
|
||||||
|
nl = showChar '\n'
|
||||||
|
|
||||||
|
sp :: ShowS
|
||||||
|
sp = showChar ' '
|
||||||
|
|
||||||
|
wrap :: String -> ShowS -> String -> ShowS
|
||||||
|
wrap o s c = showString o . s . showString c
|
||||||
|
|
||||||
|
concatS :: [ShowS] -> ShowS
|
||||||
|
concatS = foldr (.) id
|
||||||
|
|
||||||
|
unwordsS :: [ShowS] -> ShowS
|
||||||
|
unwordsS = join " "
|
||||||
|
|
||||||
|
unlinesS :: [ShowS] -> ShowS
|
||||||
|
unlinesS = join "\n"
|
||||||
|
|
||||||
|
join :: String -> [ShowS] -> ShowS
|
||||||
|
join glue = concatS . intersperse (showString glue)
|
||||||
|
|
||||||
|
sortAndGroupBy :: Ord b =>
|
||||||
|
(a -> b) -- ^ Gets the value to sort and group by
|
||||||
|
-> [a]
|
||||||
|
-> [[a]]
|
||||||
|
sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f)
|
||||||
|
|
||||||
|
both :: (b -> b -> c) -> (a -> b) -> a -> a -> c
|
||||||
|
both f g x y = f (g x) (g y)
|
||||||
|
|
||||||
|
prtS :: Print a => a -> ShowS
|
||||||
|
prtS = showString . prt
|
||||||
|
|
||||||
|
lookupFM_ :: (Ord key, Show key) => FiniteMap key elt -> key -> elt
|
||||||
|
lookupFM_ fm k = lookupWithDefaultFM fm (error $ "Key not found: " ++ show k) k
|
||||||
@@ -28,6 +28,7 @@ import PrOld
|
|||||||
import MkGFC
|
import MkGFC
|
||||||
import CFtoSRG
|
import CFtoSRG
|
||||||
import PrGSL (gslPrinter)
|
import PrGSL (gslPrinter)
|
||||||
|
import PrJSGF (jsgfPrinter)
|
||||||
|
|
||||||
import Zipper
|
import Zipper
|
||||||
|
|
||||||
@@ -194,6 +195,9 @@ customGrammarPrinter =
|
|||||||
,(strCI "gsl", \s -> let opts = stateOptions s
|
,(strCI "gsl", \s -> let opts = stateOptions s
|
||||||
name = cncId s
|
name = cncId s
|
||||||
in gslPrinter name opts $ Cnv.cfg $ statePInfo s)
|
in gslPrinter name opts $ Cnv.cfg $ statePInfo s)
|
||||||
|
,(strCI "jsgf", \s -> let opts = stateOptions s
|
||||||
|
name = cncId s
|
||||||
|
in jsgfPrinter name opts $ Cnv.cfg $ statePInfo s)
|
||||||
,(strCI "plbnf", prLBNF True)
|
,(strCI "plbnf", prLBNF True)
|
||||||
,(strCI "lbnf", prLBNF False)
|
,(strCI "lbnf", prLBNF False)
|
||||||
,(strCI "bnf", prBNF False)
|
,(strCI "bnf", prBNF False)
|
||||||
|
|||||||
Reference in New Issue
Block a user