mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
112 lines
3.6 KiB
Haskell
112 lines
3.6 KiB
Haskell
----------------------------------------------------------------------
|
|
-- |
|
|
-- Module : GF.Speech.JSGF
|
|
--
|
|
-- This module prints a CFG as a JSGF grammar.
|
|
--
|
|
-- FIXME: remove \/ warn \/ fail if there are int \/ string literal
|
|
-- categories in the grammar
|
|
--
|
|
-- FIXME: convert to UTF-8
|
|
-----------------------------------------------------------------------------
|
|
|
|
module GF.Speech.JSGF (jsgfPrinter) where
|
|
|
|
import GF.Data.Utilities
|
|
import GF.Speech.CFG
|
|
import GF.Speech.RegExp
|
|
import GF.Speech.SISR
|
|
import GF.Speech.SRG
|
|
import PGF.CId
|
|
import PGF.Data
|
|
|
|
import Data.Char
|
|
import Data.List
|
|
import Data.Maybe
|
|
import Text.PrettyPrint.HughesPJ
|
|
import Debug.Trace
|
|
|
|
width :: Int
|
|
width = 75
|
|
|
|
jsgfPrinter :: Maybe SISRFormat
|
|
-> PGF
|
|
-> CId -> String
|
|
jsgfPrinter sisr pgf cnc = renderStyle st $ prJSGF sisr $ makeSimpleSRG pgf cnc
|
|
where st = style { lineLength = width }
|
|
|
|
prJSGF :: Maybe SISRFormat -> SRG -> Doc
|
|
prJSGF sisr srg
|
|
= header $++$ mainCat $++$ foldr ($++$) empty (map prRule (srgRules srg))
|
|
where
|
|
header = text "#JSGF" <+> text "V1.0" <+> text "UTF-8" <+> lang <> char ';' $$
|
|
comment ("JSGF speech recognition grammar for " ++ srgName srg) $$
|
|
comment "Generated by GF" $$
|
|
text ("grammar " ++ srgName srg ++ ";")
|
|
lang = maybe empty text (srgLanguage srg)
|
|
mainCat = rule True "MAIN" [prCat (srgStartCat srg)]
|
|
prRule (SRGRule cat rhs) = rule (isExternalCat srg cat) cat (map prAlt rhs)
|
|
prAlt (SRGAlt mp n rhs) = sep [initTag, p (prItem sisr n rhs), finalTag]
|
|
where initTag | isEmpty t = empty
|
|
| otherwise = text "<NULL>" <+> t
|
|
where t = tag sisr (profileInitSISR n)
|
|
finalTag = tag sisr (profileFinalSISR n)
|
|
p = if isEmpty initTag && isEmpty finalTag then id else parens
|
|
|
|
prCat :: Cat -> Doc
|
|
prCat c = char '<' <> text c <> char '>'
|
|
|
|
prItem :: Maybe SISRFormat -> CFTerm -> SRGItem -> Doc
|
|
prItem sisr t = f 0
|
|
where
|
|
f _ (REUnion []) = text "<VOID>"
|
|
f p (REUnion xs)
|
|
| not (null es) = brackets (f 0 (REUnion nes))
|
|
| otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs))
|
|
where (es,nes) = partition isEpsilon xs
|
|
f _ (REConcat []) = text "<NULL>"
|
|
f p (REConcat xs) = (if p >= 3 then parens else id) (fsep (map (f 2) xs))
|
|
f p (RERepeat x) = f 3 x <> char '*'
|
|
f _ (RESymbol s) = prSymbol sisr t s
|
|
|
|
prSymbol :: Maybe SISRFormat -> CFTerm -> SRGSymbol -> Doc
|
|
prSymbol sisr cn (NonTerminal n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
|
|
prSymbol _ cn (Terminal t) | all isPunct t = empty -- removes punctuation
|
|
| otherwise = text t -- FIXME: quote if there is whitespace or odd chars
|
|
|
|
tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc
|
|
tag Nothing _ = empty
|
|
tag (Just fmt) t = case t fmt of
|
|
[] -> empty
|
|
ts -> char '{' <+> (text (e $ prSISR ts)) <+> char '}'
|
|
where e [] = []
|
|
e ('}':xs) = '\\':'}':e xs
|
|
e ('\n':xs) = ' ' : e (dropWhile isSpace xs)
|
|
e (x:xs) = x:e xs
|
|
|
|
isPunct :: Char -> Bool
|
|
isPunct c = c `elem` "-_.;.,?!"
|
|
|
|
comment :: String -> Doc
|
|
comment s = text "//" <+> text s
|
|
|
|
alts :: [Doc] -> Doc
|
|
alts = fsep . prepunctuate (text "| ")
|
|
|
|
rule :: Bool -> Cat -> [Doc] -> Doc
|
|
rule pub c xs = p <+> prCat c <+> char '=' <+> nest 2 (alts xs) <+> char ';'
|
|
where p = if pub then text "public" else empty
|
|
|
|
-- Pretty-printing utilities
|
|
|
|
emptyLine :: Doc
|
|
emptyLine = text ""
|
|
|
|
prepunctuate :: Doc -> [Doc] -> [Doc]
|
|
prepunctuate _ [] = []
|
|
prepunctuate p (x:xs) = x : map (p <>) xs
|
|
|
|
($++$) :: Doc -> Doc -> Doc
|
|
x $++$ y = x $$ emptyLine $$ y
|
|
|