Added GSL speech recognition grammar generation.

This commit is contained in:
bringert
2004-09-14 08:36:57 +00:00
parent 318379f73a
commit 87eec27336
4 changed files with 231 additions and 1 deletions

126
src/GF/Speech/PrGSL.hs Normal file
View File

@@ -0,0 +1,126 @@
{-
**************************************************************
GF Module
Description : This module prints a CFG as a Nuance GSL 2.0
grammar.
Author : Björn Bringert (bringert@cs.chalmers.se)
License : GPL (GNU General Public License)
Created : September 13, 2004
Modified :
**************************************************************
-}
-- FIXME: this modules should not be in cfgm, but where?
-- FIXME: remove left-recursion
-- FIXME: remove empty rules
-- FIXME: remove categories with no RHS
-- FIXME: remove / warn / fail if there are int / string literal
-- categories in the grammar
-- FIXME: figure out name prefix from grammar name
module PrGSL (prGSL) where
import Ident
import CFGrammar
import Parser (Symbol(..))
import GrammarTypes
import PrintParser
import TransformCFG
import Data.List
import Data.FiniteMap
type GSLGrammar = [GSLRule]
data GSLRule = GSLRule String [GSLAlt]
type GSLAlt = [Symbol String Token]
type CatNames = FiniteMap String String
prGSL :: CFGrammar -> String
prGSL cfg = prGSLGrammar names gsl ""
where
cfg' = makeNice cfg
gsl = cfgToGSL cfg'
names = mkCatNames "GSL_" gsl
cfgToGSL :: [CFRule_] -> GSLGrammar
cfgToGSL = map cfgRulesToGSLRule . sortAndGroupBy ruleCat
where
ruleCat (Rule c _ _) = c
ruleRhs (Rule _ r _) = r
cfgRulesToGSLRule rs@(r:_) = GSLRule (ruleCat r) (map ruleRhs rs)
mkCatNames :: String -- name prefix
-> GSLGrammar -> CatNames
mkCatNames pref gsl = listToFM (zip lhsCats names)
where names = [pref ++ show x | x <- [0..]]
lhsCats = [ c | GSLRule c _ <- gsl ]
prGSLGrammar :: CatNames -> GSLGrammar -> ShowS
prGSLGrammar names g = header . unlinesS (map prGSLrule g)
where
header = showString ";GSL2.0" . nl
prGSLrule (GSLRule cat rhs) =
showString "; " . prtS cat . nl
. prGSLCat cat . sp . wrap "[" (unwordsS (map prGSLAlt rhs)) "]" . nl
prGSLAlt rhs = wrap "(" (unwordsS (map prGSLSymbol rhs')) ")"
where rhs' = rmPunct rhs
prGSLSymbol (Cat c) = prGSLCat c
prGSLSymbol (Tok t) = wrap "\"" (prtS t) "\""
prGSLCat c = showString n
where n = case lookupFM names c of
Nothing -> error $ "Unknown category: " ++ c
Just x -> x
rmPunct :: [Symbol String Token] -> [Symbol String Token]
rmPunct [] = []
rmPunct (Tok t:ss) | all isPunct (prt t) = rmPunct ss
rmPunct (s:ss) = s : rmPunct ss
isPunct :: Char -> Bool
isPunct c = c `elem` "-_.;.,?!"
--
-- * Utils
--
nl :: ShowS
nl = showChar '\n'
sp :: ShowS
sp = showChar ' '
wrap :: String -> ShowS -> String -> ShowS
wrap o s c = showString o . s . showString c
concatS :: [ShowS] -> ShowS
concatS = foldr (.) id
unwordsS :: [ShowS] -> ShowS
unwordsS = concatS . intersperse sp
unlinesS :: [ShowS] -> ShowS
unlinesS = concatS . intersperse nl
sortAndGroupBy :: Ord b =>
(a -> b) -- ^ Gets the value to sort and group by
-> [a]
-> [[a]]
sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f)
both :: (b -> b -> c) -> (a -> b) -> a -> a -> c
both f g x y = f (g x) (g y)
prtS :: Print a => a -> ShowS
prtS = showString . prt

View File

@@ -0,0 +1,102 @@
{-
**************************************************************
GF Module
Description : This module does some useful transformations
on CFGs.
Author : Björn Bringert (bringert@cs.chalmers.se)
License : GPL (GNU General Public License)
Created : September 13, 2004
Modified :
**************************************************************
-}
module TransformCFG where
import Ident
import CFGrammar
import Parser (Symbol(..))
import GrammarTypes
import PrintParser
import Data.FiniteMap
import Data.List
import Data.Maybe (fromJust)
import Debug.Trace
-- FIXME: remove cycles
-- not very nice to get replace the structured CFCat type with a simple string
type CFRule_ = Rule CFName String Token
type CFRules = FiniteMap String [CFRule_]
makeNice :: CFGrammar -> [CFRule_]
makeNice = concat . eltsFM . makeNice' . groupProds . cfgToCFRules
where makeNice' = removeLeftRecursion . removeEmptyCats
cfgToCFRules :: CFGrammar -> [CFRule_]
cfgToCFRules cfg = [Rule (catToString c) (map symb r) n | Rule c r n <- cfg]
where symb (Cat c) = Cat (catToString c)
symb (Tok t) = Tok t
catToString = prt
-- | Group productions by their lhs categories
groupProds :: [CFRule_] -> CFRules
groupProds = addListToFM_C (++) emptyFM . map (\rs -> (ruleCat rs,[rs]))
where ruleCat (Rule c _ _) = c
-- | Remove productions which use categories which have no productions
removeEmptyCats :: CFRules -> CFRules
removeEmptyCats rss = listToFM $ fix removeEmptyCats' $ fmToList rss
where
removeEmptyCats' :: [(String,[CFRule_])] -> [(String,[CFRule_])]
removeEmptyCats' rs = k'
where
keep = filter (not . null . snd) rs
allCats = nub [c | (_,r) <- rs, Rule _ rhs _ <- r, Cat c <- rhs]
emptyCats = filter (nothingOrNull . flip lookup rs) allCats
k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep
anyUsedBy :: [String] -> CFRule_ -> Bool
anyUsedBy ss (Rule _ r _) = or [c `elem` ss | Cat c <- r]
removeLeftRecursion :: CFRules -> CFRules
removeLeftRecursion rs = listToFM $ concatMap removeDirectLeftRecursion $ map handleProds $ fmToList rs
where
handleProds (c, r) = (c, concatMap handleProd r)
handleProd (Rule ai (Cat aj:alpha) n) | aj < ai =
-- FIXME: this will give multiple rules with the same name
[Rule ai (beta ++ alpha) n | Rule _ beta _ <- fromJust (lookupFM rs aj)]
handleProd r = [r]
removeDirectLeftRecursion :: (String,[CFRule_]) -- ^ All productions for a category
-> [(String,[CFRule_])]
removeDirectLeftRecursion (a,rs) | null dr = [(a,rs)]
| otherwise = [(a, as), (a', a's)]
where
a' = a ++ "'" -- FIXME: this might not be unique
(dr,nr) = partition isDirectLeftRecursive rs
as = maybeEndWithA' nr
is = [Rule a' (tail r) n | Rule _ r n <- dr]
a's = maybeEndWithA' is
maybeEndWithA' xs = xs ++ [Rule c (r++[Cat a']) n | Rule c r n <- xs]
isDirectLeftRecursive :: CFRule_ -> Bool
isDirectLeftRecursive (Rule c (Cat c':_) _) = c == c'
isDirectLeftRecursive _ = False
fix :: Eq a => (a -> a) -> a -> a
fix f x = let x' = f x in if x' == x then x else fix f x'
nothingOrNull :: Maybe [a] -> Bool
nothingOrNull Nothing = True
nothingOrNull (Just xs) = null xs

View File

@@ -27,6 +27,7 @@ import PrGrammar
import PrOld
import MkGFC
import CFtoSRG
import PrGSL (prGSL)
import Zipper
@@ -190,6 +191,7 @@ customGrammarPrinter =
,(strCI "cf", prCF . stateCF)
,(strCI "old", printGrammarOld . stateGrammarST)
,(strCI "srg", prSRG . stateCF)
,(strCI "gsl", prGSL . Cnv.cfg . statePInfo)
,(strCI "lbnf", prLBNF . stateCF)
,(strCI "haskell", grammar2haskell . stateGrammarST)
,(strCI "morpho", prMorpho . stateMorpho)

View File

@@ -9,7 +9,7 @@ GHCFUDFLAG=
JAVAFLAGS=-target 1.4 -source 1.4
HUGSINCLUDE =.:for-hugs:api:source:canonical:cf:grammar:infra:shell:useGrammar:compile:newparsing:trace:
BASICINCLUDE =-iapi -icompile -igrammar -iinfra -ishell -isource -icanonical -iuseGrammar -icf -inewparsing -iparsers -inotrace -icfgm
BASICINCLUDE =-iapi -icompile -igrammar -iinfra -ishell -isource -icanonical -iuseGrammar -icf -inewparsing -iparsers -inotrace -icfgm -ispeech
GHCINCLUDE =-ifor-ghc $(BASICINCLUDE)
GHCINCLUDENOFUD=-ifor-ghc-nofud $(BASICINCLUDE)
GHCINCLUDEGFT =-ifor-gft $(BASICINCLUDE)