forked from GitHub/gf-core
Many fixes to JSGF format (never tested before). Implemented JSGF+SISR. Left recursion removal destroys SISR, must be fixed.
This commit is contained in:
@@ -22,57 +22,85 @@ module GF.Speech.PrJSGF (jsgfPrinter) where
|
||||
import GF.Conversion.Types
|
||||
import GF.Data.Utilities
|
||||
import GF.Formalism.CFG
|
||||
import GF.Formalism.Utilities (Symbol(..))
|
||||
import GF.Formalism.Utilities (Symbol(..), NameProfile(..), Profile(..))
|
||||
import GF.Infra.Ident
|
||||
import GF.Infra.Print
|
||||
import GF.Infra.Option
|
||||
import GF.Probabilistic.Probabilistic (Probs)
|
||||
import GF.Speech.SISR
|
||||
import GF.Speech.SRG
|
||||
import GF.Speech.RegExp
|
||||
|
||||
import Debug.Trace
|
||||
|
||||
jsgfPrinter :: Ident -- ^ Grammar name
|
||||
-> String -- ^ Start category
|
||||
-> Options -> Maybe Probs -> CGrammar -> String
|
||||
jsgfPrinter name start opts probs cfg = prJSGF srg ""
|
||||
-> Options
|
||||
-> Maybe SISRFormat
|
||||
-> Maybe Probs -> CGrammar -> String
|
||||
jsgfPrinter name start opts sisr probs cfg = trace (show srg) $ prJSGF srg sisr ""
|
||||
where srg = makeSimpleSRG name start opts probs cfg
|
||||
|
||||
prJSGF :: SRG -> ShowS
|
||||
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs})
|
||||
= header . mainCat . unlinesS (map prRule rs)
|
||||
prJSGF :: SRG -> Maybe SISRFormat -> ShowS
|
||||
prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) sisr
|
||||
= header . nl
|
||||
. mainCat . nl
|
||||
. unlinesS topCatRules . nl
|
||||
. unlinesS (map prRule rs)
|
||||
where
|
||||
header = showString "#JSGF V1.0 UTF-8;" . nl
|
||||
. comments ["JSGF speech recognition grammar for " ++ name,
|
||||
"Generated by GF"] . nl
|
||||
. comment ("JSGF speech recognition grammar for " ++ name)
|
||||
. comment "Generated by GF"
|
||||
. showString ("grammar " ++ name ++ ";") . nl
|
||||
. nl
|
||||
mainCat = comments ["Start category: " ++ origStart] . nl
|
||||
. showString "public <MAIN> = " . prCat start . showChar ';' . nl . nl
|
||||
mainCat = comment ("Start category: " ++ origStart)
|
||||
. rule True "MAIN" [prCat start]
|
||||
prRule (SRGRule cat origCat rhs) =
|
||||
comments [origCat] . nl
|
||||
. prCat cat . showString " = " . joinS " | " (map prAlt (ebnfSRGAlts rhs)) . nl
|
||||
comment origCat
|
||||
. rule False cat (map prAlt (ebnfSRGAlts rhs))
|
||||
-- FIXME: use the probability
|
||||
prAlt (EBnfSRGAlt mp _ rhs) = prItem rhs
|
||||
prAlt (EBnfSRGAlt mp n rhs) = tag sisr (profileInitSISR n) . showChar ' '. prItem sisr rhs
|
||||
|
||||
topCatRules = [rule True (catFormId tc) (map (it tc) cs) | (tc,cs) <- topCats]
|
||||
where topCats = buildMultiMap [(cfgCatToGFCat origCat, cat) | SRGRule cat origCat _ <- rs]
|
||||
it i c = prCat c . tag sisr [(EThis :. catFieldId i) := (ERef c)]
|
||||
|
||||
catFormId :: String -> String
|
||||
catFormId = (++ "_cat")
|
||||
|
||||
catFieldId :: String -> String
|
||||
catFieldId = (++ "_field")
|
||||
|
||||
prCat :: SRGCat -> ShowS
|
||||
prCat c = showChar '<' . showString c . showChar '>'
|
||||
|
||||
prItem :: EBnfSRGItem -> ShowS
|
||||
prItem = f
|
||||
prItem :: Maybe SISRFormat -> EBnfSRGItem -> ShowS
|
||||
prItem sisr = f 1
|
||||
where
|
||||
f (REUnion []) = showString "<VOID>"
|
||||
f (REUnion xs) = wrap "(" (joinS " | " (map f xs)) ")"
|
||||
f (REConcat []) = showString "<NULL>"
|
||||
f (REConcat xs) = wrap "(" (unwordsS (map f xs)) ")"
|
||||
f (RERepeat x) = wrap "(" (f x) ")" . showString "*"
|
||||
f (RESymbol s) = prSymbol s
|
||||
f _ (REUnion []) = showString "<VOID>"
|
||||
f p (REUnion xs) = (if p >= 1 then paren else id) (joinS " | " (map (f 1) xs))
|
||||
f _ (REConcat []) = showString "<NULL>"
|
||||
f p (REConcat xs) = (if p >= 3 then paren else id) (unwordsS (map (f 2) xs))
|
||||
f p (RERepeat x) = f 3 x . showString "*"
|
||||
f _ (RESymbol s) = prSymbol sisr s
|
||||
|
||||
prSymbol :: Symbol SRGNT Token -> ShowS
|
||||
prSymbol (Cat (c,_)) = prCat c
|
||||
prSymbol (Tok t) | all isPunct (prt t) = id -- removes punctuation
|
||||
| otherwise = wrap "\"" (prtS t) "\""
|
||||
prSymbol :: Maybe SISRFormat -> Symbol SRGNT Token -> ShowS
|
||||
prSymbol sisr (Cat n@(c,_)) = prCat c . tag sisr (catSISR n)
|
||||
prSymbol _ (Tok t) | all isPunct (prt t) = id -- removes punctuation
|
||||
| otherwise = prtS t -- FIXME: quote if there is whitespace or odd chars
|
||||
|
||||
tag :: Maybe SISRFormat -> [SISRExpr] -> ShowS
|
||||
tag Nothing _ = id
|
||||
tag _ [] = id
|
||||
tag (Just fmt) t = showString "{" . showString (prSISR fmt t) . showString "}"
|
||||
|
||||
isPunct :: Char -> Bool
|
||||
isPunct c = c `elem` "-_.;.,?!"
|
||||
|
||||
comments :: [String] -> ShowS
|
||||
comments = unlinesS . map (showString . ("// " ++))
|
||||
comment :: String -> ShowS
|
||||
comment s = showString "// " . showString s . nl
|
||||
|
||||
paren f = wrap "(" f ")"
|
||||
|
||||
rule :: Bool -> SRGCat -> [ShowS] -> ShowS
|
||||
rule pub c xs = p . prCat c . showString " = " . joinS " | " xs . showChar ';' . nl
|
||||
where p = if pub then showString "public " else id
|
||||
Reference in New Issue
Block a user