1
0
forked from GitHub/gf-core

Many fixes to JSGF format (never tested before). Implemented JSGF+SISR. Left recursion removal destroys SISR, must be fixed.

This commit is contained in:
bringert
2006-12-17 13:17:17 +00:00
parent 6d819b2c84
commit 2b326f8041
5 changed files with 98 additions and 51 deletions

View File

@@ -22,57 +22,85 @@ module GF.Speech.PrJSGF (jsgfPrinter) where
import GF.Conversion.Types
import GF.Data.Utilities
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..))
import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..))
import GF.Infra.Ident
import GF.Infra.Print
import GF.Infra.Option
import GF.Probabilistic.Probabilistic (Probs)
import GF.Speech.SISR
import GF.Speech.SRG
import GF.Speech.RegExp
import Debug.Trace
jsgfPrinter :: Ident -- ^ Grammar name
-> String -- ^ Start category
-> Options -> Maybe Probs -> CGrammar -> String
jsgfPrinter name start opts probs cfg = prJSGF srg ""
-> Options
-> Maybe SISRFormat
-> Maybe Probs -> CGrammar -> String
jsgfPrinter name start opts sisr probs cfg = trace (show srg) $ prJSGF srg sisr ""
where srg = makeSimpleSRG name start opts probs cfg
prJSGF :: SRG -> ShowS
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
= header . mainCat . unlinesS (map prRule rs)
prJSGF :: SRG -> Maybe SISRFormat -> ShowS
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) sisr
= header . nl
. mainCat . nl
. unlinesS topCatRules . nl
. unlinesS (map prRule rs)
where
header = showString "#JSGF V1.0 UTF-8;" . nl
. comments ["JSGF speech recognition grammar for " ++ name,
"Generated by GF"] . nl
. comment ("JSGF speech recognition grammar for " ++ name)
. comment "Generated by GF"
. showString ("grammar " ++ name ++ ";") . nl
. nl
mainCat = comments ["Start category: " ++ origStart] . nl
. showString "public <MAIN> = " . prCat start . showChar ';' . nl . nl
mainCat = comment ("Start category: " ++ origStart)
. rule True "MAIN" [prCat start]
prRule (SRGRule cat origCat rhs) =
comments [origCat] . nl
. prCat cat . showString " = " . joinS " | " (map prAlt (ebnfSRGAlts rhs)) . nl
comment origCat
. rule False cat (map prAlt (ebnfSRGAlts rhs))
-- FIXME: use the probability
prAlt (EBnfSRGAlt mp _ rhs) = prItem rhs
prAlt (EBnfSRGAlt mp n rhs) = tag sisr (profileInitSISR n) . showChar ' '. prItem sisr rhs
topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- topCats]
where topCats = buildMultiMap [(cfgCatToGFCat origCat, cat) | SRGRule cat origCat _ <- rs]
it i c = prCat c . tag sisr [(EThis :. catFieldId i) := (ERef c)]
catFormId :: String -> String
catFormId = (++ "_cat")
catFieldId :: String -> String
catFieldId = (++ "_field")
prCat :: SRGCat -> ShowS
prCat c = showChar '<' . showString c . showChar '>'
prItem :: EBnfSRGItem -> ShowS
prItem = f
prItem :: Maybe SISRFormat -> EBnfSRGItem -> ShowS
prItem sisr = f 1
where
f (REUnion []) = showString "<VOID>"
f (REUnion xs) = wrap "(" (joinS " | " (map f xs)) ")"
f (REConcat []) = showString "<NULL>"
f (REConcat xs) = wrap "(" (unwordsS (map f xs)) ")"
f (RERepeat x) = wrap "(" (f x) ")" . showString "*"
f (RESymbol s) = prSymbol s
f _ (REUnion []) = showString "<VOID>"
f p (REUnion xs) = (if p >= 1 then paren else id) (joinS " | " (map (f 1) xs))
f _ (REConcat []) = showString "<NULL>"
f p (REConcat xs) = (if p >= 3 then paren else id) (unwordsS (map (f 2) xs))
f p (RERepeat x) = f 3 x . showString "*"
f _ (RESymbol s) = prSymbol sisr s
prSymbol :: Symbol SRGNT Token -> ShowS
prSymbol (Cat (c,_)) = prCat c
prSymbol (Tok t) | all isPunct (prt t) = id -- removes punctuation
| otherwise = wrap "\"" (prtS t) "\""
prSymbol :: Maybe SISRFormat -> Symbol SRGNT Token -> ShowS
prSymbol sisr (Cat n@(c,_)) = prCat c . tag sisr (catSISR n)
prSymbol _ (Tok t) | all isPunct (prt t) = id -- removes punctuation
| otherwise = prtS t -- FIXME: quote if there is whitespace or odd chars
tag :: Maybe SISRFormat -> [SISRExpr] -> ShowS
tag Nothing _ = id
tag _ [] = id
tag (Just fmt) t = showString "{" . showString (prSISR fmt t) . showString "}"
isPunct :: Char -> Bool
isPunct c = c `elem` "-_.;.,?!"
comments :: [String] -> ShowS
comments = unlinesS . map (showString . ("// " ++))
comment :: String -> ShowS
comment s = showString "// " . showString s . nl
paren f = wrap "(" f ")"
rule :: Bool -> SRGCat -> [ShowS] -> ShowS
rule pub c xs = p . prCat c . showString " = " . joinS " | " xs . showChar ';' . nl
where p = if pub then showString "public " else id

View File

@@ -11,7 +11,7 @@
-- categories in the grammar
-----------------------------------------------------------------------------
module GF.Speech.PrSRGS (SISRFormat(..), srgsXmlPrinter) where
module GF.Speech.PrSRGS (srgsXmlPrinter) where
import GF.Data.Utilities
import GF.Data.XML
@@ -71,19 +71,11 @@ prSrgsXml sisr (SRG{grammarName=name,startCat=start,
rule :: String -> [XML] -> XML
rule i = Tag "rule" [("id",i)]
cfgCatToGFCat :: String -> String
cfgCatToGFCat = takeWhile (/='{')
mkProd :: Maybe SISRFormat -> EBnfSRGAlt -> XML
mkProd sisr (EBnfSRGAlt mp n@(Name f prs) rhs) = Tag "item" w (t ++ xs)
mkProd sisr (EBnfSRGAlt mp n rhs) = Tag "item" w (t ++ xs)
where xs = [mkItem sisr rhs]
w = maybe [] (\p -> [("weight", show p)]) mp
t = [tag sisr ts]
ts = [(EThis :. "name") := (EStr (prIdent f))] ++
[(EThis :. ("arg" ++ show n)) := (EStr (argInit (prs!!n)))
| n <- [0..length prs-1]]
argInit (Unify _) = "?"
argInit (Constant f) = maybe "?" prIdent (forestName f)
t = [tag sisr (profileInitSISR n)]
mkItem :: Maybe SISRFormat -> EBnfSRGItem -> XML
mkItem sisr = f
@@ -94,15 +86,14 @@ mkItem sisr = f
f (RESymbol s) = symItem sisr s
symItem :: Maybe SISRFormat -> Symbol SRGNT Token -> XML
symItem sisr (Cat (c,slots)) = Tag "item" [] ([Tag "ruleref" [("uri","#" ++ c)] []]++t)
where
t = if null ts then [] else [tag sisr ts]
ts = [(EThis :. ("arg" ++ show s)) := (ERef c) | s <- slots]
symItem sisr (Cat n@(c,_)) =
Tag "item" [] [Tag "ruleref" [("uri","#" ++ c)] [], tag sisr (catSISR n)]
symItem _ (Tok t) = Tag "item" [] [Data (showToken t)]
tag :: Maybe SISRFormat -> [SISRExpr] -> XML
tag Nothing _ = Empty
tag (Just fmt) ts = Tag "tag" [] [Data (join "; " (map (prSISR fmt) ts))]
tag _ [] = Empty
tag (Just fmt) ts = Tag "tag" [] [Data (prSISR fmt ts)]
catFormId :: String -> String

View File

@@ -10,10 +10,19 @@
--
-----------------------------------------------------------------------------
module GF.Speech.SISR (SISRFormat(..), SISRExpr(..), prSISR) where
module GF.Speech.SISR (SISRFormat(..), SISRExpr(..), prSISR,
profileInitSISR, catSISR) where
import Data.List
import GF.Conversion.Types
import GF.Data.Utilities
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), forestName)
import GF.Infra.Ident
import GF.Speech.SRG
infixl 8 :.
infixr 1 :=
@@ -32,8 +41,8 @@ data SISRExpr = SISRExpr := SISRExpr
| ENew String [SISRExpr]
deriving Show
prSISR :: SISRFormat -> SISRExpr -> String
prSISR fmt = f
prSISR :: SISRFormat -> [SISRExpr] -> String
prSISR fmt = join "; " . map f
where
f e =
case e of
@@ -43,4 +52,15 @@ prSISR fmt = f
ERef y -> "$" ++ y
EStr s -> show s
EApp x ys -> f x ++ "(" ++ concat (intersperse "," (map f ys)) ++ ")"
ENew n ys -> "new " ++ n ++ "(" ++ concat (intersperse "," (map f ys)) ++ ")"
ENew n ys -> "new " ++ n ++ "(" ++ concat (intersperse "," (map f ys)) ++ ")"
profileInitSISR :: Name -> [SISRExpr]
profileInitSISR (Name f prs) =
[(EThis :. "name") := (EStr (prIdent f))] ++
[(EThis :. ("arg" ++ show n)) := (EStr (argInit (prs!!n)))
| n <- [0..length prs-1]]
where argInit (Unify _) = "?"
argInit (Constant f) = maybe "?" prIdent (forestName f)
catSISR :: SRGNT -> [SISRExpr]
catSISR (c,slots) = [(EThis :. ("arg" ++ show s)) := (ERef c) | s <- slots]

View File

@@ -22,7 +22,7 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..),
SRGCat, SRGNT,
makeSimpleSRG, makeSRG
, lookupFM_, prtS
, topDownFilter
, topDownFilter, cfgCatToGFCat
, EBnfSRGAlt(..), EBnfSRGItem
, ebnfSRGAlts
) where
@@ -168,6 +168,9 @@ topDownFilter srg@(SRG { startCat = start, rules = rs }) = srg { rules = rs' }
allSRGCats :: SRG -> [String]
allSRGCats SRG { rules = rs } = [c | SRGRule c _ _ <- rs]
cfgCatToGFCat :: SRGCat -> String
cfgCatToGFCat = takeWhile (/='{')
--
-- * Size-optimized EBNF SRGs
--

View File

@@ -62,6 +62,7 @@ import GF.CF.CFtoSRG
import GF.Speech.PrGSL (gslPrinter)
import GF.Speech.PrJSGF (jsgfPrinter)
import qualified GF.Speech.PrSRGS as SRGS
import qualified GF.Speech.SISR as SISR
import GF.Speech.PrSLF
import GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter)
import GF.Speech.PrRegExp (regexpPrinter)
@@ -252,7 +253,11 @@ customGrammarPrinter =
in gslPrinter name start opts Nothing $ stateCFG s)
,(strCI "jsgf", \opts s -> let name = cncId s
start = getStartCatCF opts s
in jsgfPrinter name start opts Nothing $ stateCFG s)
in jsgfPrinter name start opts Nothing Nothing $ stateCFG s)
,(strCI "jsgf_sisr_old",
\opts s -> let name = cncId s
start = getStartCatCF opts s
in jsgfPrinter name start opts (Just SISR.SISROld) Nothing $ stateCFG s)
,(strCI "srgs_xml", \opts s -> let name = cncId s
start = getStartCatCF opts s
in SRGS.srgsXmlPrinter name start opts Nothing Nothing $ stateCFG s)
@@ -264,7 +269,7 @@ customGrammarPrinter =
,(strCI "srgs_xml_sisr_old",
\opts s -> let name = cncId s
start = getStartCatCF opts s
in SRGS.srgsXmlPrinter name start opts (Just SRGS.SISROld) Nothing $ stateCFG s)
in SRGS.srgsXmlPrinter name start opts (Just SISR.SISROld) Nothing $ stateCFG s)
,(strCI "vxml", \opts s -> let start = getStartCat opts s
in grammar2vxml start s)
,(strCI "slf", \opts s -> let start = getStartCatCF opts s