1
0
forked from GitHub/gf-core

Many fixes to JSGF format (never tested before). Implemented JSGF+SISR. Left recursion removal destroys SISR, must be fixed.

This commit is contained in:
bringert
2006-12-17 13:17:17 +00:00
parent 6d819b2c84
commit 2b326f8041
5 changed files with 98 additions and 51 deletions

View File

@@ -22,57 +22,85 @@ module GF.Speech.PrJSGF (jsgfPrinter) where
import GF.Conversion.Types import GF.Conversion.Types
import GF.Data.Utilities import GF.Data.Utilities
import GF.Formalism.CFG import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..)) import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..))
import GF.Infra.Ident import GF.Infra.Ident
import GF.Infra.Print import GF.Infra.Print
import GF.Infra.Option import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs) import GF.Probabilistic.Probabilistic (Probs)
import GF.Speech.SISR
import GF.Speech.SRG import GF.Speech.SRG
import GF.Speech.RegExp import GF.Speech.RegExp
import Debug.Trace
jsgfPrinter :: Ident -- ^ Grammar name jsgfPrinter :: Ident -- ^ Grammar name
-> String -- ^ Start category -> String -- ^ Start category
-> Options -> Maybe Probs -> CGrammar -> String -> Options
jsgfPrinter name start opts probs cfg = prJSGF srg "" -> Maybe SISRFormat
-> Maybe Probs -> CGrammar -> String
jsgfPrinter name start opts sisr probs cfg = trace (show srg) $ prJSGF srg sisr ""
where srg = makeSimpleSRG name start opts probs cfg where srg = makeSimpleSRG name start opts probs cfg
prJSGF :: SRG -> ShowS prJSGF :: SRG -> Maybe SISRFormat -> ShowS
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) sisr
= header . mainCat . unlinesS (map prRule rs) = header . nl
. mainCat . nl
. unlinesS topCatRules . nl
. unlinesS (map prRule rs)
where where
header = showString "#JSGF V1.0 UTF-8;" . nl header = showString "#JSGF V1.0 UTF-8;" . nl
. comments ["JSGF speech recognition grammar for " ++ name, . comment ("JSGF speech recognition grammar for " ++ name)
"Generated by GF"] . nl . comment "Generated by GF"
. showString ("grammar " ++ name ++ ";") . nl . showString ("grammar " ++ name ++ ";") . nl
. nl mainCat = comment ("Start category: " ++ origStart)
mainCat = comments ["Start category: " ++ origStart] . nl . rule True "MAIN" [prCat start]
. showString "public <MAIN> = " . prCat start . showChar ';' . nl . nl
prRule (SRGRule cat origCat rhs) = prRule (SRGRule cat origCat rhs) =
comments [origCat] . nl comment origCat
. prCat cat . showString " = " . joinS " | " (map prAlt (ebnfSRGAlts rhs)) . nl . rule False cat (map prAlt (ebnfSRGAlts rhs))
-- FIXME: use the probability -- FIXME: use the probability
prAlt (EBnfSRGAlt mp _ rhs) = prItem rhs prAlt (EBnfSRGAlt mp n rhs) = tag sisr (profileInitSISR n) . showChar ' '. prItem sisr rhs
topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- topCats]
where topCats = buildMultiMap [(cfgCatToGFCat origCat, cat) | SRGRule cat origCat _ <- rs]
it i c = prCat c . tag sisr [(EThis :. catFieldId i) := (ERef c)]
catFormId :: String -> String
catFormId = (++ "_cat")
catFieldId :: String -> String
catFieldId = (++ "_field")
prCat :: SRGCat -> ShowS prCat :: SRGCat -> ShowS
prCat c = showChar '<' . showString c . showChar '>' prCat c = showChar '<' . showString c . showChar '>'
prItem :: EBnfSRGItem -> ShowS prItem :: Maybe SISRFormat -> EBnfSRGItem -> ShowS
prItem = f prItem sisr = f 1
where where
f (REUnion []) = showString "<VOID>" f _ (REUnion []) = showString "<VOID>"
f (REUnion xs) = wrap "(" (joinS " | " (map f xs)) ")" f p (REUnion xs) = (if p >= 1 then paren else id) (joinS " | " (map (f 1) xs))
f (REConcat []) = showString "<NULL>" f _ (REConcat []) = showString "<NULL>"
f (REConcat xs) = wrap "(" (unwordsS (map f xs)) ")" f p (REConcat xs) = (if p >= 3 then paren else id) (unwordsS (map (f 2) xs))
f (RERepeat x) = wrap "(" (f x) ")" . showString "*" f p (RERepeat x) = f 3 x . showString "*"
f (RESymbol s) = prSymbol s f _ (RESymbol s) = prSymbol sisr s
prSymbol :: Symbol SRGNT Token -> ShowS prSymbol :: Maybe SISRFormat -> Symbol SRGNT Token -> ShowS
prSymbol (Cat (c,_)) = prCat c prSymbol sisr (Cat n@(c,_)) = prCat c . tag sisr (catSISR n)
prSymbol (Tok t) | all isPunct (prt t) = id -- removes punctuation prSymbol _ (Tok t) | all isPunct (prt t) = id -- removes punctuation
| otherwise = wrap "\"" (prtS t) "\"" | otherwise = prtS t -- FIXME: quote if there is whitespace or odd chars
tag :: Maybe SISRFormat -> [SISRExpr] -> ShowS
tag Nothing _ = id
tag _ [] = id
tag (Just fmt) t = showString "{" . showString (prSISR fmt t) . showString "}"
isPunct :: Char -> Bool isPunct :: Char -> Bool
isPunct c = c `elem` "-_.;.,?!" isPunct c = c `elem` "-_.;.,?!"
comments :: [String] -> ShowS comment :: String -> ShowS
comments = unlinesS . map (showString . ("// " ++)) comment s = showString "// " . showString s . nl
paren f = wrap "(" f ")"
rule :: Bool -> SRGCat -> [ShowS] -> ShowS
rule pub c xs = p . prCat c . showString " = " . joinS " | " xs . showChar ';' . nl
where p = if pub then showString "public " else id

View File

@@ -11,7 +11,7 @@
-- categories in the grammar -- categories in the grammar
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
module GF.Speech.PrSRGS (SISRFormat(..), srgsXmlPrinter) where module GF.Speech.PrSRGS (srgsXmlPrinter) where
import GF.Data.Utilities import GF.Data.Utilities
import GF.Data.XML import GF.Data.XML
@@ -71,19 +71,11 @@ prSrgsXml sisr (SRG{grammarName=name,startCat=start,
rule :: String -> [XML] -> XML rule :: String -> [XML] -> XML
rule i = Tag "rule" [("id",i)] rule i = Tag "rule" [("id",i)]
cfgCatToGFCat :: String -> String
cfgCatToGFCat = takeWhile (/='{')
mkProd :: Maybe SISRFormat -> EBnfSRGAlt -> XML mkProd :: Maybe SISRFormat -> EBnfSRGAlt -> XML
mkProd sisr (EBnfSRGAlt mp n@(Name f prs) rhs) = Tag "item" w (t ++ xs) mkProd sisr (EBnfSRGAlt mp n rhs) = Tag "item" w (t ++ xs)
where xs = [mkItem sisr rhs] where xs = [mkItem sisr rhs]
w = maybe [] (\p -> [("weight", show p)]) mp w = maybe [] (\p -> [("weight", show p)]) mp
t = [tag sisr ts] t = [tag sisr (profileInitSISR n)]
ts = [(EThis :. "name") := (EStr (prIdent f))] ++
[(EThis :. ("arg" ++ show n)) := (EStr (argInit (prs!!n)))
| n <- [0..length prs-1]]
argInit (Unify _) = "?"
argInit (Constant f) = maybe "?" prIdent (forestName f)
mkItem :: Maybe SISRFormat -> EBnfSRGItem -> XML mkItem :: Maybe SISRFormat -> EBnfSRGItem -> XML
mkItem sisr = f mkItem sisr = f
@@ -94,15 +86,14 @@ mkItem sisr = f
f (RESymbol s) = symItem sisr s f (RESymbol s) = symItem sisr s
symItem :: Maybe SISRFormat -> Symbol SRGNT Token -> XML symItem :: Maybe SISRFormat -> Symbol SRGNT Token -> XML
symItem sisr (Cat (c,slots)) = Tag "item" [] ([Tag "ruleref" [("uri","#" ++ c)] []]++t) symItem sisr (Cat n@(c,_)) =
where Tag "item" [] [Tag "ruleref" [("uri","#" ++ c)] [], tag sisr (catSISR n)]
t = if null ts then [] else [tag sisr ts]
ts = [(EThis :. ("arg" ++ show s)) := (ERef c) | s <- slots]
symItem _ (Tok t) = Tag "item" [] [Data (showToken t)] symItem _ (Tok t) = Tag "item" [] [Data (showToken t)]
tag :: Maybe SISRFormat -> [SISRExpr] -> XML tag :: Maybe SISRFormat -> [SISRExpr] -> XML
tag Nothing _ = Empty tag Nothing _ = Empty
tag (Just fmt) ts = Tag "tag" [] [Data (join "; " (map (prSISR fmt) ts))] tag _ [] = Empty
tag (Just fmt) ts = Tag "tag" [] [Data (prSISR fmt ts)]
catFormId :: String -> String catFormId :: String -> String

View File

@@ -10,10 +10,19 @@
-- --
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
module GF.Speech.SISR (SISRFormat(..), SISRExpr(..), prSISR) where module GF.Speech.SISR (SISRFormat(..), SISRExpr(..), prSISR,
profileInitSISR, catSISR) where
import Data.List import Data.List
import GF.Conversion.Types
import GF.Data.Utilities
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), forestName)
import GF.Infra.Ident
import GF.Speech.SRG
infixl 8 :. infixl 8 :.
infixr 1 := infixr 1 :=
@@ -32,8 +41,8 @@ data SISRExpr = SISRExpr := SISRExpr
| ENew String [SISRExpr] | ENew String [SISRExpr]
deriving Show deriving Show
prSISR :: SISRFormat -> SISRExpr -> String prSISR :: SISRFormat -> [SISRExpr] -> String
prSISR fmt = f prSISR fmt = join "; " . map f
where where
f e = f e =
case e of case e of
@@ -43,4 +52,15 @@ prSISR fmt = f
ERef y -> "$" ++ y ERef y -> "$" ++ y
EStr s -> show s EStr s -> show s
EApp x ys -> f x ++ "(" ++ concat (intersperse "," (map f ys)) ++ ")" EApp x ys -> f x ++ "(" ++ concat (intersperse "," (map f ys)) ++ ")"
ENew n ys -> "new " ++ n ++ "(" ++ concat (intersperse "," (map f ys)) ++ ")" ENew n ys -> "new " ++ n ++ "(" ++ concat (intersperse "," (map f ys)) ++ ")"
profileInitSISR :: Name -> [SISRExpr]
profileInitSISR (Name f prs) =
[(EThis :. "name") := (EStr (prIdent f))] ++
[(EThis :. ("arg" ++ show n)) := (EStr (argInit (prs!!n)))
| n <- [0..length prs-1]]
where argInit (Unify _) = "?"
argInit (Constant f) = maybe "?" prIdent (forestName f)
catSISR :: SRGNT -> [SISRExpr]
catSISR (c,slots) = [(EThis :. ("arg" ++ show s)) := (ERef c) | s <- slots]

View File

@@ -22,7 +22,7 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..),
SRGCat, SRGNT, SRGCat, SRGNT,
makeSimpleSRG, makeSRG makeSimpleSRG, makeSRG
, lookupFM_, prtS , lookupFM_, prtS
, topDownFilter , topDownFilter, cfgCatToGFCat
, EBnfSRGAlt(..), EBnfSRGItem , EBnfSRGAlt(..), EBnfSRGItem
, ebnfSRGAlts , ebnfSRGAlts
) where ) where
@@ -168,6 +168,9 @@ topDownFilter srg@(SRG { startCat = start, rules = rs }) = srg { rules = rs' }
allSRGCats :: SRG -> [String] allSRGCats :: SRG -> [String]
allSRGCats SRG { rules = rs } = [c | SRGRule c _ _ <- rs] allSRGCats SRG { rules = rs } = [c | SRGRule c _ _ <- rs]
cfgCatToGFCat :: SRGCat -> String
cfgCatToGFCat = takeWhile (/='{')
-- --
-- * Size-optimized EBNF SRGs -- * Size-optimized EBNF SRGs
-- --

View File

@@ -62,6 +62,7 @@ import GF.CF.CFtoSRG
import GF.Speech.PrGSL (gslPrinter) import GF.Speech.PrGSL (gslPrinter)
import GF.Speech.PrJSGF (jsgfPrinter) import GF.Speech.PrJSGF (jsgfPrinter)
import qualified GF.Speech.PrSRGS as SRGS import qualified GF.Speech.PrSRGS as SRGS
import qualified GF.Speech.SISR as SISR
import GF.Speech.PrSLF import GF.Speech.PrSLF
import GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter) import GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter)
import GF.Speech.PrRegExp (regexpPrinter) import GF.Speech.PrRegExp (regexpPrinter)
@@ -252,7 +253,11 @@ customGrammarPrinter =
in gslPrinter name start opts Nothing $ stateCFG s) in gslPrinter name start opts Nothing $ stateCFG s)
,(strCI "jsgf", \opts s -> let name = cncId s ,(strCI "jsgf", \opts s -> let name = cncId s
start = getStartCatCF opts s start = getStartCatCF opts s
in jsgfPrinter name start opts Nothing $ stateCFG s) in jsgfPrinter name start opts Nothing Nothing $ stateCFG s)
,(strCI "jsgf_sisr_old",
\opts s -> let name = cncId s
start = getStartCatCF opts s
in jsgfPrinter name start opts (Just SISR.SISROld) Nothing $ stateCFG s)
,(strCI "srgs_xml", \opts s -> let name = cncId s ,(strCI "srgs_xml", \opts s -> let name = cncId s
start = getStartCatCF opts s start = getStartCatCF opts s
in SRGS.srgsXmlPrinter name start opts Nothing Nothing $ stateCFG s) in SRGS.srgsXmlPrinter name start opts Nothing Nothing $ stateCFG s)
@@ -264,7 +269,7 @@ customGrammarPrinter =
,(strCI "srgs_xml_sisr_old", ,(strCI "srgs_xml_sisr_old",
\opts s -> let name = cncId s \opts s -> let name = cncId s
start = getStartCatCF opts s start = getStartCatCF opts s
in SRGS.srgsXmlPrinter name start opts (Just SRGS.SISROld) Nothing $ stateCFG s) in SRGS.srgsXmlPrinter name start opts (Just SISR.SISROld) Nothing $ stateCFG s)
,(strCI "vxml", \opts s -> let start = getStartCat opts s ,(strCI "vxml", \opts s -> let start = getStartCat opts s
in grammar2vxml start s) in grammar2vxml start s)
,(strCI "slf", \opts s -> let start = getStartCatCF opts s ,(strCI "slf", \opts s -> let start = getStartCatCF opts s