mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-10 05:29:30 -06:00
Added GSL speech recognition grammar generation.
This commit is contained in:
126
src/GF/Speech/PrGSL.hs
Normal file
126
src/GF/Speech/PrGSL.hs
Normal file
@@ -0,0 +1,126 @@
|
||||
{-
|
||||
**************************************************************
|
||||
GF Module
|
||||
|
||||
Description : This module prints a CFG as a Nuance GSL 2.0
|
||||
grammar.
|
||||
|
||||
Author : Björn Bringert (bringert@cs.chalmers.se)
|
||||
|
||||
License : GPL (GNU General Public License)
|
||||
|
||||
Created : September 13, 2004
|
||||
|
||||
Modified :
|
||||
**************************************************************
|
||||
-}
|
||||
|
||||
-- FIXME: this modules should not be in cfgm, but where?
|
||||
|
||||
-- FIXME: remove left-recursion
|
||||
|
||||
-- FIXME: remove empty rules
|
||||
|
||||
-- FIXME: remove categories with no RHS
|
||||
|
||||
-- FIXME: remove / warn / fail if there are int / string literal
|
||||
-- categories in the grammar
|
||||
|
||||
-- FIXME: figure out name prefix from grammar name
|
||||
|
||||
module PrGSL (prGSL) where
|
||||
|
||||
import Ident
|
||||
import CFGrammar
|
||||
import Parser (Symbol(..))
|
||||
import GrammarTypes
|
||||
import PrintParser
|
||||
import TransformCFG
|
||||
|
||||
import Data.List
|
||||
import Data.FiniteMap
|
||||
|
||||
|
||||
type GSLGrammar = [GSLRule]
|
||||
data GSLRule = GSLRule String [GSLAlt]
|
||||
type GSLAlt = [Symbol String Token]
|
||||
|
||||
type CatNames = FiniteMap String String
|
||||
|
||||
prGSL :: CFGrammar -> String
|
||||
prGSL cfg = prGSLGrammar names gsl ""
|
||||
where
|
||||
cfg' = makeNice cfg
|
||||
gsl = cfgToGSL cfg'
|
||||
names = mkCatNames "GSL_" gsl
|
||||
|
||||
cfgToGSL :: [CFRule_] -> GSLGrammar
|
||||
cfgToGSL = map cfgRulesToGSLRule . sortAndGroupBy ruleCat
|
||||
where
|
||||
ruleCat (Rule c _ _) = c
|
||||
ruleRhs (Rule _ r _) = r
|
||||
cfgRulesToGSLRule rs@(r:_) = GSLRule (ruleCat r) (map ruleRhs rs)
|
||||
|
||||
mkCatNames :: String -- name prefix
|
||||
-> GSLGrammar -> CatNames
|
||||
mkCatNames pref gsl = listToFM (zip lhsCats names)
|
||||
where names = [pref ++ show x | x <- [0..]]
|
||||
lhsCats = [ c | GSLRule c _ <- gsl ]
|
||||
|
||||
prGSLGrammar :: CatNames -> GSLGrammar -> ShowS
|
||||
prGSLGrammar names g = header . unlinesS (map prGSLrule g)
|
||||
where
|
||||
header = showString ";GSL2.0" . nl
|
||||
prGSLrule (GSLRule cat rhs) =
|
||||
showString "; " . prtS cat . nl
|
||||
. prGSLCat cat . sp . wrap "[" (unwordsS (map prGSLAlt rhs)) "]" . nl
|
||||
prGSLAlt rhs = wrap "(" (unwordsS (map prGSLSymbol rhs')) ")"
|
||||
where rhs' = rmPunct rhs
|
||||
prGSLSymbol (Cat c) = prGSLCat c
|
||||
prGSLSymbol (Tok t) = wrap "\"" (prtS t) "\""
|
||||
prGSLCat c = showString n
|
||||
where n = case lookupFM names c of
|
||||
Nothing -> error $ "Unknown category: " ++ c
|
||||
Just x -> x
|
||||
|
||||
rmPunct :: [Symbol String Token] -> [Symbol String Token]
|
||||
rmPunct [] = []
|
||||
rmPunct (Tok t:ss) | all isPunct (prt t) = rmPunct ss
|
||||
rmPunct (s:ss) = s : rmPunct ss
|
||||
|
||||
isPunct :: Char -> Bool
|
||||
isPunct c = c `elem` "-_.;.,?!"
|
||||
|
||||
--
|
||||
-- * Utils
|
||||
--
|
||||
|
||||
nl :: ShowS
|
||||
nl = showChar '\n'
|
||||
|
||||
sp :: ShowS
|
||||
sp = showChar ' '
|
||||
|
||||
wrap :: String -> ShowS -> String -> ShowS
|
||||
wrap o s c = showString o . s . showString c
|
||||
|
||||
concatS :: [ShowS] -> ShowS
|
||||
concatS = foldr (.) id
|
||||
|
||||
unwordsS :: [ShowS] -> ShowS
|
||||
unwordsS = concatS . intersperse sp
|
||||
|
||||
unlinesS :: [ShowS] -> ShowS
|
||||
unlinesS = concatS . intersperse nl
|
||||
|
||||
sortAndGroupBy :: Ord b =>
|
||||
(a -> b) -- ^ Gets the value to sort and group by
|
||||
-> [a]
|
||||
-> [[a]]
|
||||
sortAndGroupBy f = groupBy (both (==) f) . sortBy (both compare f)
|
||||
|
||||
both :: (b -> b -> c) -> (a -> b) -> a -> a -> c
|
||||
both f g x y = f (g x) (g y)
|
||||
|
||||
prtS :: Print a => a -> ShowS
|
||||
prtS = showString . prt
|
||||
102
src/GF/Speech/TransformCFG.hs
Normal file
102
src/GF/Speech/TransformCFG.hs
Normal file
@@ -0,0 +1,102 @@
|
||||
{-
|
||||
**************************************************************
|
||||
GF Module
|
||||
|
||||
Description : This module does some useful transformations
|
||||
on CFGs.
|
||||
|
||||
Author : Björn Bringert (bringert@cs.chalmers.se)
|
||||
|
||||
License : GPL (GNU General Public License)
|
||||
|
||||
Created : September 13, 2004
|
||||
|
||||
Modified :
|
||||
**************************************************************
|
||||
-}
|
||||
|
||||
module TransformCFG where
|
||||
|
||||
import Ident
|
||||
import CFGrammar
|
||||
import Parser (Symbol(..))
|
||||
import GrammarTypes
|
||||
import PrintParser
|
||||
|
||||
import Data.FiniteMap
|
||||
import Data.List
|
||||
import Data.Maybe (fromJust)
|
||||
|
||||
import Debug.Trace
|
||||
|
||||
-- FIXME: remove cycles
|
||||
|
||||
|
||||
-- not very nice to get replace the structured CFCat type with a simple string
|
||||
type CFRule_ = Rule CFName String Token
|
||||
|
||||
type CFRules = FiniteMap String [CFRule_]
|
||||
|
||||
makeNice :: CFGrammar -> [CFRule_]
|
||||
makeNice = concat . eltsFM . makeNice' . groupProds . cfgToCFRules
|
||||
where makeNice' = removeLeftRecursion . removeEmptyCats
|
||||
|
||||
cfgToCFRules :: CFGrammar -> [CFRule_]
|
||||
cfgToCFRules cfg = [Rule (catToString c) (map symb r) n | Rule c r n <- cfg]
|
||||
where symb (Cat c) = Cat (catToString c)
|
||||
symb (Tok t) = Tok t
|
||||
catToString = prt
|
||||
|
||||
-- | Group productions by their lhs categories
|
||||
groupProds :: [CFRule_] -> CFRules
|
||||
groupProds = addListToFM_C (++) emptyFM . map (\rs -> (ruleCat rs,[rs]))
|
||||
where ruleCat (Rule c _ _) = c
|
||||
|
||||
-- | Remove productions which use categories which have no productions
|
||||
removeEmptyCats :: CFRules -> CFRules
|
||||
removeEmptyCats rss = listToFM $ fix removeEmptyCats' $ fmToList rss
|
||||
where
|
||||
removeEmptyCats' :: [(String,[CFRule_])] -> [(String,[CFRule_])]
|
||||
removeEmptyCats' rs = k'
|
||||
where
|
||||
keep = filter (not . null . snd) rs
|
||||
allCats = nub [c | (_,r) <- rs, Rule _ rhs _ <- r, Cat c <- rhs]
|
||||
emptyCats = filter (nothingOrNull . flip lookup rs) allCats
|
||||
k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep
|
||||
|
||||
anyUsedBy :: [String] -> CFRule_ -> Bool
|
||||
anyUsedBy ss (Rule _ r _) = or [c `elem` ss | Cat c <- r]
|
||||
|
||||
removeLeftRecursion :: CFRules -> CFRules
|
||||
removeLeftRecursion rs = listToFM $ concatMap removeDirectLeftRecursion $ map handleProds $ fmToList rs
|
||||
where
|
||||
handleProds (c, r) = (c, concatMap handleProd r)
|
||||
handleProd (Rule ai (Cat aj:alpha) n) | aj < ai =
|
||||
-- FIXME: this will give multiple rules with the same name
|
||||
[Rule ai (beta ++ alpha) n | Rule _ beta _ <- fromJust (lookupFM rs aj)]
|
||||
handleProd r = [r]
|
||||
|
||||
removeDirectLeftRecursion :: (String,[CFRule_]) -- ^ All productions for a category
|
||||
-> [(String,[CFRule_])]
|
||||
removeDirectLeftRecursion (a,rs) | null dr = [(a,rs)]
|
||||
| otherwise = [(a, as), (a', a's)]
|
||||
where
|
||||
a' = a ++ "'" -- FIXME: this might not be unique
|
||||
(dr,nr) = partition isDirectLeftRecursive rs
|
||||
as = maybeEndWithA' nr
|
||||
is = [Rule a' (tail r) n | Rule _ r n <- dr]
|
||||
a's = maybeEndWithA' is
|
||||
maybeEndWithA' xs = xs ++ [Rule c (r++[Cat a']) n | Rule c r n <- xs]
|
||||
|
||||
isDirectLeftRecursive :: CFRule_ -> Bool
|
||||
isDirectLeftRecursive (Rule c (Cat c':_) _) = c == c'
|
||||
isDirectLeftRecursive _ = False
|
||||
|
||||
|
||||
|
||||
fix :: Eq a => (a -> a) -> a -> a
|
||||
fix f x = let x' = f x in if x' == x then x else fix f x'
|
||||
|
||||
nothingOrNull :: Maybe [a] -> Bool
|
||||
nothingOrNull Nothing = True
|
||||
nothingOrNull (Just xs) = null xs
|
||||
@@ -27,6 +27,7 @@ import PrGrammar
|
||||
import PrOld
|
||||
import MkGFC
|
||||
import CFtoSRG
|
||||
import PrGSL (prGSL)
|
||||
|
||||
import Zipper
|
||||
|
||||
@@ -190,6 +191,7 @@ customGrammarPrinter =
|
||||
,(strCI "cf", prCF . stateCF)
|
||||
,(strCI "old", printGrammarOld . stateGrammarST)
|
||||
,(strCI "srg", prSRG . stateCF)
|
||||
,(strCI "gsl", prGSL . Cnv.cfg . statePInfo)
|
||||
,(strCI "lbnf", prLBNF . stateCF)
|
||||
,(strCI "haskell", grammar2haskell . stateGrammarST)
|
||||
,(strCI "morpho", prMorpho . stateMorpho)
|
||||
|
||||
@@ -9,7 +9,7 @@ GHCFUDFLAG=
|
||||
JAVAFLAGS=-target 1.4 -source 1.4
|
||||
|
||||
HUGSINCLUDE =.:for-hugs:api:source:canonical:cf:grammar:infra:shell:useGrammar:compile:newparsing:trace:
|
||||
BASICINCLUDE =-iapi -icompile -igrammar -iinfra -ishell -isource -icanonical -iuseGrammar -icf -inewparsing -iparsers -inotrace -icfgm
|
||||
BASICINCLUDE =-iapi -icompile -igrammar -iinfra -ishell -isource -icanonical -iuseGrammar -icf -inewparsing -iparsers -inotrace -icfgm -ispeech
|
||||
GHCINCLUDE =-ifor-ghc $(BASICINCLUDE)
|
||||
GHCINCLUDENOFUD=-ifor-ghc-nofud $(BASICINCLUDE)
|
||||
GHCINCLUDEGFT =-ifor-gft $(BASICINCLUDE)
|
||||
|
||||
Reference in New Issue
Block a user