From 2b326f80418ec87f3e59a4f036d5227dd9660d4c Mon Sep 17 00:00:00 2001 From: bringert Date: Sun, 17 Dec 2006 13:17:17 +0000 Subject: [PATCH] Many fixes to JSGF format (never tested before). Implemented JSGF+SISR. Left recursion removal destroys SISR, must be fixed. --- src/GF/Speech/PrJSGF.hs | 84 ++++++++++++++++++++++++------------- src/GF/Speech/PrSRGS.hs | 23 ++++------ src/GF/Speech/SISR.hs | 28 +++++++++++-- src/GF/Speech/SRG.hs | 5 ++- src/GF/UseGrammar/Custom.hs | 9 +++- 5 files changed, 98 insertions(+), 51 deletions(-) diff --git a/src/GF/Speech/PrJSGF.hs b/src/GF/Speech/PrJSGF.hs index f09d454d9..26421d36c 100644 --- a/src/GF/Speech/PrJSGF.hs +++ b/src/GF/Speech/PrJSGF.hs @@ -22,57 +22,85 @@ module GF.Speech.PrJSGF (jsgfPrinter) where import GF.Conversion.Types import GF.Data.Utilities import GF.Formalism.CFG -import GF.Formalism.Utilities (Symbol(..)) +import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..)) import GF.Infra.Ident import GF.Infra.Print import GF.Infra.Option import GF.Probabilistic.Probabilistic (Probs) +import GF.Speech.SISR import GF.Speech.SRG import GF.Speech.RegExp +import Debug.Trace + jsgfPrinter :: Ident -- ^ Grammar name -> String -- ^ Start category - -> Options -> Maybe Probs -> CGrammar -> String -jsgfPrinter name start opts probs cfg = prJSGF srg "" + -> Options + -> Maybe SISRFormat + -> Maybe Probs -> CGrammar -> String +jsgfPrinter name start opts sisr probs cfg = trace (show srg) $ prJSGF srg sisr "" where srg = makeSimpleSRG name start opts probs cfg -prJSGF :: SRG -> ShowS -prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) - = header . mainCat . unlinesS (map prRule rs) +prJSGF :: SRG -> Maybe SISRFormat -> ShowS +prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) sisr + = header . nl + . mainCat . nl + . unlinesS topCatRules . nl + . unlinesS (map prRule rs) where header = showString "#JSGF V1.0 UTF-8;" . nl - . comments ["JSGF speech recognition grammar for " ++ name, - "Generated by GF"] . nl + . comment ("JSGF speech recognition grammar for " ++ name) + . comment "Generated by GF" . showString ("grammar " ++ name ++ ";") . nl - . nl - mainCat = comments ["Start category: " ++ origStart] . nl - . showString "public
= " . prCat start . showChar ';' . nl . nl + mainCat = comment ("Start category: " ++ origStart) + . rule True "MAIN" [prCat start] prRule (SRGRule cat origCat rhs) = - comments [origCat] . nl - . prCat cat . showString " = " . joinS " | " (map prAlt (ebnfSRGAlts rhs)) . nl + comment origCat + . rule False cat (map prAlt (ebnfSRGAlts rhs)) -- FIXME: use the probability - prAlt (EBnfSRGAlt mp _ rhs) = prItem rhs + prAlt (EBnfSRGAlt mp n rhs) = tag sisr (profileInitSISR n) . showChar ' '. prItem sisr rhs + + topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- topCats] + where topCats = buildMultiMap [(cfgCatToGFCat origCat, cat) | SRGRule cat origCat _ <- rs] + it i c = prCat c . tag sisr [(EThis :. catFieldId i) := (ERef c)] + +catFormId :: String -> String +catFormId = (++ "_cat") + +catFieldId :: String -> String +catFieldId = (++ "_field") prCat :: SRGCat -> ShowS prCat c = showChar '<' . showString c . showChar '>' -prItem :: EBnfSRGItem -> ShowS -prItem = f +prItem :: Maybe SISRFormat -> EBnfSRGItem -> ShowS +prItem sisr = f 1 where - f (REUnion []) = showString "" - f (REUnion xs) = wrap "(" (joinS " | " (map f xs)) ")" - f (REConcat []) = showString "" - f (REConcat xs) = wrap "(" (unwordsS (map f xs)) ")" - f (RERepeat x) = wrap "(" (f x) ")" . showString "*" - f (RESymbol s) = prSymbol s + f _ (REUnion []) = showString "" + f p (REUnion xs) = (if p >= 1 then paren else id) (joinS " | " (map (f 1) xs)) + f _ (REConcat []) = showString "" + f p (REConcat xs) = (if p >= 3 then paren else id) (unwordsS (map (f 2) xs)) + f p (RERepeat x) = f 3 x . showString "*" + f _ (RESymbol s) = prSymbol sisr s -prSymbol :: Symbol SRGNT Token -> ShowS -prSymbol (Cat (c,_)) = prCat c -prSymbol (Tok t) | all isPunct (prt t) = id -- removes punctuation - | otherwise = wrap "\"" (prtS t) "\"" +prSymbol :: Maybe SISRFormat -> Symbol SRGNT Token -> ShowS +prSymbol sisr (Cat n@(c,_)) = prCat c . tag sisr (catSISR n) +prSymbol _ (Tok t) | all isPunct (prt t) = id -- removes punctuation + | otherwise = prtS t -- FIXME: quote if there is whitespace or odd chars + +tag :: Maybe SISRFormat -> [SISRExpr] -> ShowS +tag Nothing _ = id +tag _ [] = id +tag (Just fmt) t = showString "{" . showString (prSISR fmt t) . showString "}" isPunct :: Char -> Bool isPunct c = c `elem` "-_.;.,?!" -comments :: [String] -> ShowS -comments = unlinesS . map (showString . ("// " ++)) +comment :: String -> ShowS +comment s = showString "// " . showString s . nl + +paren f = wrap "(" f ")" + +rule :: Bool -> SRGCat -> [ShowS] -> ShowS +rule pub c xs = p . prCat c . showString " = " . joinS " | " xs . showChar ';' . nl + where p = if pub then showString "public " else id \ No newline at end of file diff --git a/src/GF/Speech/PrSRGS.hs b/src/GF/Speech/PrSRGS.hs index 2a7e99d07..d4ab5c4c0 100644 --- a/src/GF/Speech/PrSRGS.hs +++ b/src/GF/Speech/PrSRGS.hs @@ -11,7 +11,7 @@ -- categories in the grammar ----------------------------------------------------------------------------- -module GF.Speech.PrSRGS (SISRFormat(..), srgsXmlPrinter) where +module GF.Speech.PrSRGS (srgsXmlPrinter) where import GF.Data.Utilities import GF.Data.XML @@ -71,19 +71,11 @@ prSrgsXml sisr (SRG{grammarName=name,startCat=start, rule :: String -> [XML] -> XML rule i = Tag "rule" [("id",i)] -cfgCatToGFCat :: String -> String -cfgCatToGFCat = takeWhile (/='{') - mkProd :: Maybe SISRFormat -> EBnfSRGAlt -> XML -mkProd sisr (EBnfSRGAlt mp n@(Name f prs) rhs) = Tag "item" w (t ++ xs) +mkProd sisr (EBnfSRGAlt mp n rhs) = Tag "item" w (t ++ xs) where xs = [mkItem sisr rhs] w = maybe [] (\p -> [("weight", show p)]) mp - t = [tag sisr ts] - ts = [(EThis :. "name") := (EStr (prIdent f))] ++ - [(EThis :. ("arg" ++ show n)) := (EStr (argInit (prs!!n))) - | n <- [0..length prs-1]] - argInit (Unify _) = "?" - argInit (Constant f) = maybe "?" prIdent (forestName f) + t = [tag sisr (profileInitSISR n)] mkItem :: Maybe SISRFormat -> EBnfSRGItem -> XML mkItem sisr = f @@ -94,15 +86,14 @@ mkItem sisr = f f (RESymbol s) = symItem sisr s symItem :: Maybe SISRFormat -> Symbol SRGNT Token -> XML -symItem sisr (Cat (c,slots)) = Tag "item" [] ([Tag "ruleref" [("uri","#" ++ c)] []]++t) - where - t = if null ts then [] else [tag sisr ts] - ts = [(EThis :. ("arg" ++ show s)) := (ERef c) | s <- slots] +symItem sisr (Cat n@(c,_)) = + Tag "item" [] [Tag "ruleref" [("uri","#" ++ c)] [], tag sisr (catSISR n)] symItem _ (Tok t) = Tag "item" [] [Data (showToken t)] tag :: Maybe SISRFormat -> [SISRExpr] -> XML tag Nothing _ = Empty -tag (Just fmt) ts = Tag "tag" [] [Data (join "; " (map (prSISR fmt) ts))] +tag _ [] = Empty +tag (Just fmt) ts = Tag "tag" [] [Data (prSISR fmt ts)] catFormId :: String -> String diff --git a/src/GF/Speech/SISR.hs b/src/GF/Speech/SISR.hs index 20c6f7fe4..4f37b6b82 100644 --- a/src/GF/Speech/SISR.hs +++ b/src/GF/Speech/SISR.hs @@ -10,10 +10,19 @@ -- ----------------------------------------------------------------------------- -module GF.Speech.SISR (SISRFormat(..), SISRExpr(..), prSISR) where +module GF.Speech.SISR (SISRFormat(..), SISRExpr(..), prSISR, + profileInitSISR, catSISR) where import Data.List +import GF.Conversion.Types +import GF.Data.Utilities +import GF.Formalism.CFG +import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..), forestName) +import GF.Infra.Ident +import GF.Speech.SRG + + infixl 8 :. infixr 1 := @@ -32,8 +41,8 @@ data SISRExpr = SISRExpr := SISRExpr | ENew String [SISRExpr] deriving Show -prSISR :: SISRFormat -> SISRExpr -> String -prSISR fmt = f +prSISR :: SISRFormat -> [SISRExpr] -> String +prSISR fmt = join "; " . map f where f e = case e of @@ -43,4 +52,15 @@ prSISR fmt = f ERef y -> "$" ++ y EStr s -> show s EApp x ys -> f x ++ "(" ++ concat (intersperse "," (map f ys)) ++ ")" - ENew n ys -> "new " ++ n ++ "(" ++ concat (intersperse "," (map f ys)) ++ ")" \ No newline at end of file + ENew n ys -> "new " ++ n ++ "(" ++ concat (intersperse "," (map f ys)) ++ ")" + +profileInitSISR :: Name -> [SISRExpr] +profileInitSISR (Name f prs) = + [(EThis :. "name") := (EStr (prIdent f))] ++ + [(EThis :. ("arg" ++ show n)) := (EStr (argInit (prs!!n))) + | n <- [0..length prs-1]] + where argInit (Unify _) = "?" + argInit (Constant f) = maybe "?" prIdent (forestName f) + +catSISR :: SRGNT -> [SISRExpr] +catSISR (c,slots) = [(EThis :. ("arg" ++ show s)) := (ERef c) | s <- slots] diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs index b27c5ad56..e89e42662 100644 --- a/src/GF/Speech/SRG.hs +++ b/src/GF/Speech/SRG.hs @@ -22,7 +22,7 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGCat, SRGNT, makeSimpleSRG, makeSRG , lookupFM_, prtS - , topDownFilter + , topDownFilter, cfgCatToGFCat , EBnfSRGAlt(..), EBnfSRGItem , ebnfSRGAlts ) where @@ -168,6 +168,9 @@ topDownFilter srg@(SRG { startCat = start, rules = rs }) = srg { rules = rs' } allSRGCats :: SRG -> [String] allSRGCats SRG { rules = rs } = [c | SRGRule c _ _ <- rs] +cfgCatToGFCat :: SRGCat -> String +cfgCatToGFCat = takeWhile (/='{') + -- -- * Size-optimized EBNF SRGs -- diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index 92b95756a..9a689cb8c 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -62,6 +62,7 @@ import GF.CF.CFtoSRG import GF.Speech.PrGSL (gslPrinter) import GF.Speech.PrJSGF (jsgfPrinter) import qualified GF.Speech.PrSRGS as SRGS +import qualified GF.Speech.SISR as SISR import GF.Speech.PrSLF import GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter) import GF.Speech.PrRegExp (regexpPrinter) @@ -252,7 +253,11 @@ customGrammarPrinter = in gslPrinter name start opts Nothing $ stateCFG s) ,(strCI "jsgf", \opts s -> let name = cncId s start = getStartCatCF opts s - in jsgfPrinter name start opts Nothing $ stateCFG s) + in jsgfPrinter name start opts Nothing Nothing $ stateCFG s) + ,(strCI "jsgf_sisr_old", + \opts s -> let name = cncId s + start = getStartCatCF opts s + in jsgfPrinter name start opts (Just SISR.SISROld) Nothing $ stateCFG s) ,(strCI "srgs_xml", \opts s -> let name = cncId s start = getStartCatCF opts s in SRGS.srgsXmlPrinter name start opts Nothing Nothing $ stateCFG s) @@ -264,7 +269,7 @@ customGrammarPrinter = ,(strCI "srgs_xml_sisr_old", \opts s -> let name = cncId s start = getStartCatCF opts s - in SRGS.srgsXmlPrinter name start opts (Just SRGS.SISROld) Nothing $ stateCFG s) + in SRGS.srgsXmlPrinter name start opts (Just SISR.SISROld) Nothing $ stateCFG s) ,(strCI "vxml", \opts s -> let start = getStartCat opts s in grammar2vxml start s) ,(strCI "slf", \opts s -> let start = getStartCatCF opts s