From 87eec27336bd9c2f123cffe56a67cc919c8da09b Mon Sep 17 00:00:00 2001 From: bringert Date: Tue, 14 Sep 2004 08:36:57 +0000 Subject: [PATCH] Added GSL speech recognition grammar generation. --- src/GF/Speech/PrGSL.hs | 126 ++++++++++++++++++++++++++++++++++ src/GF/Speech/TransformCFG.hs | 102 +++++++++++++++++++++++++++ src/GF/UseGrammar/Custom.hs | 2 + src/Makefile | 2 +- 4 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 src/GF/Speech/PrGSL.hs create mode 100644 src/GF/Speech/TransformCFG.hs diff --git a/src/GF/Speech/PrGSL.hs b/src/GF/Speech/PrGSL.hs new file mode 100644 index 000000000..58271ec2c --- /dev/null +++ b/src/GF/Speech/PrGSL.hs @@ -0,0 +1,126 @@ +{- + ************************************************************** + GF Module + + Description : This module prints a CFG as a Nuance GSL 2.0 + grammar. + + Author : Björn Bringert (bringert@cs.chalmers.se) + + License : GPL (GNU General Public License) + + Created : September 13, 2004 + + Modified : + ************************************************************** +-} + +-- FIXME: this modules should not be in cfgm, but where? + +-- FIXME: remove left-recursion + +-- FIXME: remove empty rules + +-- FIXME: remove categories with no RHS + +-- FIXME: remove / warn / fail if there are int / string literal +-- categories in the grammar + +-- FIXME: figure out name prefix from grammar name + +module PrGSL (prGSL) where + +import Ident +import CFGrammar +import Parser (Symbol(..)) +import GrammarTypes +import PrintParser +import TransformCFG + +import Data.List +import Data.FiniteMap + + +type GSLGrammar = [GSLRule] +data GSLRule = GSLRule String [GSLAlt] +type GSLAlt = [Symbol String Token] + +type CatNames = FiniteMap String String + +prGSL :: CFGrammar -> String +prGSL cfg = prGSLGrammar names gsl "" + where + cfg' = makeNice cfg + gsl = cfgToGSL cfg' + names = mkCatNames "GSL_" gsl + +cfgToGSL :: [CFRule_] -> GSLGrammar +cfgToGSL = map cfgRulesToGSLRule . sortAndGroupBy ruleCat + where + ruleCat (Rule c _ _) = c + ruleRhs (Rule _ r _) = r + cfgRulesToGSLRule rs@(r:_) = GSLRule (ruleCat r) (map ruleRhs rs) + +mkCatNames :: String -- name prefix + -> GSLGrammar -> CatNames +mkCatNames pref gsl = listToFM (zip lhsCats names) + where names = [pref ++ show x | x <- [0..]] + lhsCats = [ c | GSLRule c _ <- gsl ] + +prGSLGrammar :: CatNames -> GSLGrammar -> ShowS +prGSLGrammar names g = header . unlinesS (map prGSLrule g) + where + header = showString ";GSL2.0" . nl + prGSLrule (GSLRule cat rhs) = + showString "; " . prtS cat . nl + . prGSLCat cat . sp . wrap "[" (unwordsS (map prGSLAlt rhs)) "]" . nl + prGSLAlt rhs = wrap "(" (unwordsS (map prGSLSymbol rhs')) ")" + where rhs' = rmPunct rhs + prGSLSymbol (Cat c) = prGSLCat c + prGSLSymbol (Tok t) = wrap "\"" (prtS t) "\"" + prGSLCat c = showString n + where n = case lookupFM names c of + Nothing -> error $ "Unknown category: " ++ c + Just x -> x + +rmPunct :: [Symbol String Token] -> [Symbol String Token] +rmPunct [] = [] +rmPunct (Tok t:ss) | all isPunct (prt t) = rmPunct ss +rmPunct (s:ss) = s : rmPunct ss + +isPunct :: Char -> Bool +isPunct c = c `elem` "-_.;.,?!" + +-- +-- * Utils +-- + +nl :: ShowS +nl = showChar '\n' + +sp :: ShowS +sp = showChar ' ' + +wrap :: String -> ShowS -> String -> ShowS +wrap o s c = showString o . s . showString c + +concatS :: [ShowS] -> ShowS +concatS = foldr (.) id + +unwordsS :: [ShowS] -> ShowS +unwordsS = concatS . intersperse sp + +unlinesS :: [ShowS] -> ShowS +unlinesS = concatS . intersperse nl + +sortAndGroupBy :: Ord b => + (a -> b) -- ^ Gets the value to sort and group by + -> [a] + -> [[a]] +sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f) + +both :: (b -> b -> c) -> (a -> b) -> a -> a -> c +both f g x y = f (g x) (g y) + +prtS :: Print a => a -> ShowS +prtS = showString . prt \ No newline at end of file diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs new file mode 100644 index 000000000..322888da5 --- /dev/null +++ b/src/GF/Speech/TransformCFG.hs @@ -0,0 +1,102 @@ +{- + ************************************************************** + GF Module + + Description : This module does some useful transformations + on CFGs. + + Author : Björn Bringert (bringert@cs.chalmers.se) + + License : GPL (GNU General Public License) + + Created : September 13, 2004 + + Modified : + ************************************************************** +-} + +module TransformCFG where + +import Ident +import CFGrammar +import Parser (Symbol(..)) +import GrammarTypes +import PrintParser + +import Data.FiniteMap +import Data.List +import Data.Maybe (fromJust) + +import Debug.Trace + +-- FIXME: remove cycles + + +-- not very nice to get replace the structured CFCat type with a simple string +type CFRule_ = Rule CFName String Token + +type CFRules = FiniteMap String [CFRule_] + +makeNice :: CFGrammar -> [CFRule_] +makeNice = concat . eltsFM . makeNice' . groupProds . cfgToCFRules + where makeNice' = removeLeftRecursion . removeEmptyCats + +cfgToCFRules :: CFGrammar -> [CFRule_] +cfgToCFRules cfg = [Rule (catToString c) (map symb r) n | Rule c r n <- cfg] + where symb (Cat c) = Cat (catToString c) + symb (Tok t) = Tok t + catToString = prt + +-- | Group productions by their lhs categories +groupProds :: [CFRule_] -> CFRules +groupProds = addListToFM_C (++) emptyFM . map (\rs -> (ruleCat rs,[rs])) + where ruleCat (Rule c _ _) = c + +-- | Remove productions which use categories which have no productions +removeEmptyCats :: CFRules -> CFRules +removeEmptyCats rss = listToFM $ fix removeEmptyCats' $ fmToList rss + where + removeEmptyCats' :: [(String,[CFRule_])] -> [(String,[CFRule_])] + removeEmptyCats' rs = k' + where + keep = filter (not . null . snd) rs + allCats = nub [c | (_,r) <- rs, Rule _ rhs _ <- r, Cat c <- rhs] + emptyCats = filter (nothingOrNull . flip lookup rs) allCats + k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep + +anyUsedBy :: [String] -> CFRule_ -> Bool +anyUsedBy ss (Rule _ r _) = or [c `elem` ss | Cat c <- r] + +removeLeftRecursion :: CFRules -> CFRules +removeLeftRecursion rs = listToFM $ concatMap removeDirectLeftRecursion $ map handleProds $ fmToList rs + where + handleProds (c, r) = (c, concatMap handleProd r) + handleProd (Rule ai (Cat aj:alpha) n) | aj < ai = + -- FIXME: this will give multiple rules with the same name + [Rule ai (beta ++ alpha) n | Rule _ beta _ <- fromJust (lookupFM rs aj)] + handleProd r = [r] + +removeDirectLeftRecursion :: (String,[CFRule_]) -- ^ All productions for a category + -> [(String,[CFRule_])] +removeDirectLeftRecursion (a,rs) | null dr = [(a,rs)] + | otherwise = [(a, as), (a', a's)] + where + a' = a ++ "'" -- FIXME: this might not be unique + (dr,nr) = partition isDirectLeftRecursive rs + as = maybeEndWithA' nr + is = [Rule a' (tail r) n | Rule _ r n <- dr] + a's = maybeEndWithA' is + maybeEndWithA' xs = xs ++ [Rule c (r++[Cat a']) n | Rule c r n <- xs] + +isDirectLeftRecursive :: CFRule_ -> Bool +isDirectLeftRecursive (Rule c (Cat c':_) _) = c == c' +isDirectLeftRecursive _ = False + + + +fix :: Eq a => (a -> a) -> a -> a +fix f x = let x' = f x in if x' == x then x else fix f x' + +nothingOrNull :: Maybe [a] -> Bool +nothingOrNull Nothing = True +nothingOrNull (Just xs) = null xs \ No newline at end of file diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index 9df79e983..4fd12f12a 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -27,6 +27,7 @@ import PrGrammar import PrOld import MkGFC import CFtoSRG +import PrGSL (prGSL) import Zipper @@ -190,6 +191,7 @@ customGrammarPrinter = ,(strCI "cf", prCF . stateCF) ,(strCI "old", printGrammarOld . stateGrammarST) ,(strCI "srg", prSRG . stateCF) + ,(strCI "gsl", prGSL . Cnv.cfg . statePInfo) ,(strCI "lbnf", prLBNF . stateCF) ,(strCI "haskell", grammar2haskell . stateGrammarST) ,(strCI "morpho", prMorpho . stateMorpho) diff --git a/src/Makefile b/src/Makefile index 77254c590..a71f5f03d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -9,7 +9,7 @@ GHCFUDFLAG= JAVAFLAGS=-target 1.4 -source 1.4 HUGSINCLUDE =.:for-hugs:api:source:canonical:cf:grammar:infra:shell:useGrammar:compile:newparsing:trace: -BASICINCLUDE =-iapi -icompile -igrammar -iinfra -ishell -isource -icanonical -iuseGrammar -icf -inewparsing -iparsers -inotrace -icfgm +BASICINCLUDE =-iapi -icompile -igrammar -iinfra -ishell -isource -icanonical -iuseGrammar -icf -inewparsing -iparsers -inotrace -icfgm -ispeech GHCINCLUDE =-ifor-ghc $(BASICINCLUDE) GHCINCLUDENOFUD=-ifor-ghc-nofud $(BASICINCLUDE) GHCINCLUDEGFT =-ifor-gft $(BASICINCLUDE)