From 07b34ff78084cca586f049455edf5d034cf5fcb6 Mon Sep 17 00:00:00 2001 From: bringert Date: Fri, 3 Feb 2006 18:43:06 +0000 Subject: [PATCH] Moved general XML stuff to GF.Data.XML. Started working on VoiceXML generation. --- src/GF/Data/XML.hs | 46 +++++++ src/GF/Shell/HelpFile.hs | 1 + src/GF/Speech/GrammarToVoiceXML.hs | 195 +++++++++++++++++++++++++++++ src/GF/Speech/PrSRGS.hs | 41 ++---- src/GF/UseGrammar/Custom.hs | 2 + src/HelpFile | 1 + 6 files changed, 252 insertions(+), 34 deletions(-) create mode 100644 src/GF/Data/XML.hs create mode 100644 src/GF/Speech/GrammarToVoiceXML.hs diff --git a/src/GF/Data/XML.hs b/src/GF/Data/XML.hs new file mode 100644 index 000000000..816f6ec18 --- /dev/null +++ b/src/GF/Data/XML.hs @@ -0,0 +1,46 @@ +---------------------------------------------------------------------- +-- | +-- Module : XML +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- Utilities for creating XML documents. +----------------------------------------------------------------------------- + +module GF.Data.XML (XML(..), Attr, comments, showsXMLDoc, showsXML) where + +import GF.Data.Utilities + +data XML = Data String | Tag String [Attr] [XML] | Comment String + deriving (Eq,Show) + +type Attr = (String,String) + +comments :: [String] -> [XML] +comments = map Comment + +showsXMLDoc :: XML -> ShowS +showsXMLDoc xml = showString header . showsXML xml + where header = "" + +showsXML :: XML -> ShowS +showsXML (Data s) = showString s +showsXML (Tag t as []) = showChar '<' . showString t . showsAttrs as . showString "/>" +showsXML (Tag t as cs) = + showChar '<' . showString t . showsAttrs as . showChar '>' + . concatS (map showsXML cs) . showString "' +showsXML (Comment c) = showString "" + +showsAttrs :: [Attr] -> ShowS +showsAttrs = concatS . map (showChar ' ' .) . map showsAttr + +showsAttr :: Attr -> ShowS +showsAttr (n,v) = showString n . showString "=\"" . showString (escape v) . showString "\"" + +-- FIXME: escape strange charachters with &#xxx; +escape :: String -> String +escape = concatMap escChar + where + escChar c | c `elem` ['"','\\'] = '\\':[c] + | otherwise = [c] diff --git a/src/GF/Shell/HelpFile.hs b/src/GF/Shell/HelpFile.hs index 482967fb2..dd03d2515 100644 --- a/src/GF/Shell/HelpFile.hs +++ b/src/GF/Shell/HelpFile.hs @@ -600,6 +600,7 @@ txtHelpFile = "\n -printer=srgs_xml_prob SRGS XML format, with weights" ++ "\n -printer=srgs_xml_ms_sem SRGS XML format, with semantic tags for the" ++ "\n Microsoft Speech API." ++ + "\n -printer=vxml Generate a dialogue system in VoiceXML." ++ "\n -printer=slf a finite automaton in the HTK SLF format" ++ "\n -printer=slf_graphviz the same automaton as slf, but in Graphviz format" ++ "\n -printer=slf_sub a finite automaton with sub-automata in the " ++ diff --git a/src/GF/Speech/GrammarToVoiceXML.hs b/src/GF/Speech/GrammarToVoiceXML.hs new file mode 100644 index 000000000..11a7febd2 --- /dev/null +++ b/src/GF/Speech/GrammarToVoiceXML.hs @@ -0,0 +1,195 @@ +---------------------------------------------------------------------- +-- | +-- Module : GrammarToVoiceXML +-- Maintainer : Bjorn Bringert +-- Stability : (stable) +-- Portability : (portable) +-- +-- Create VoiceXML dialogue system from a GF grammar. +----------------------------------------------------------------------------- + +module GF.Speech.GrammarToVoiceXML (grammar2vxml) where + +import qualified GF.Canon.GFC as GFC +import GF.Grammar.Macros hiding (assign) + +import GF.Infra.Modules +import GF.Data.Operations + +import GF.Data.XML + +import Data.List (isPrefixOf, find, intersperse) + +-- | the main function +grammar2vxml :: GFC.CanonGrammar -> String +grammar2vxml gr = showsXMLDoc (skel2vxml name startcat gr') "" + where (name, gr') = vSkeleton gr + startcat = "Order" -- FIXME + +type VIdent = String + +type VSkeleton = [(VIdent, [(VIdent, [VIdent])])] + + +vSkeleton :: GFC.CanonGrammar -> (String,VSkeleton) +vSkeleton gr = (name,collectR rules [(c,[]) | c <- cats]) where + collectR rr hh = + case rr of + (fun,typ):rs -> case catSkeleton typ of + Ok (cats,cat) -> + collectR rs (updateSkeleton (symid (snd cat)) hh (fun, + map (symid . snd) cats)) + _ -> collectR rs hh + _ -> hh + cats = [symid cat | (cat,GFC.AbsCat _ _) <- defs] + rules = [(symid fun, typ) | (fun,GFC.AbsFun typ _) <- defs] + + defs = concat [tree2list (jments m) | im@(_,ModMod m) <- modules gr, isModAbs m] + name = ifNull "UnknownModule" (symid . last) [n | (n,ModMod m) <- modules gr, isModAbs m] + +updateSkeleton :: VIdent -> VSkeleton -> (VIdent, [VIdent]) -> VSkeleton +updateSkeleton cat skel rule = + case skel of + (cat0,rules):rr | cat0 == cat -> (cat0, rule:rules) : rr + (cat0,rules):rr -> (cat0, rules) : updateSkeleton cat rr rule + + +skel2vxml :: String -> VIdent -> VSkeleton -> XML +skel2vxml name start skel = + vxml ([startForm] ++ concatMap (uncurry (catForms gr)) skel) + where + gr = grammarURI name + startForm = Tag "form" [] [subdialog "sub" [("src","#"++start)] []] + +grammarURI :: String -> String +grammarURI name = name ++ ".grxml" + +catForms :: String -> VIdent -> [(VIdent, [VIdent])] -> [XML] +catForms gr cat fs = + comments [cat ++ " category."] + ++ [cat2form gr cat fs] + ++ map (uncurry (fun2form gr)) fs + +cat2form :: String -> VIdent -> [(VIdent, [VIdent])] -> XML +cat2form gr cat fs = + form cat [var "value" (Just "'?'"), + field cat [] [promptString ("quest_"++cat), + grammar (gr++"#"++cat), + nomatch [Data "I didn't understand you.", reprompt], + help [Data ("help_"++cat)], + filled [] [if_else (cat ++ " == '?'") [reprompt] feedback]], + subdialog "sub" [("srcexpr","'#'+"++cat++".name")] + [param "value" cat, filled [] subDone]] + where subDone = [return_ ["sub.value"]] + feedback = [Data "Constructor: ", value (cat++".name")] + +fun2form :: String -> VIdent -> [VIdent] -> XML +fun2form gr fun args = + form fun ([var "value" Nothing] + ++ ss + ++ [ret]) + where + argNames = zip ["arg"++show n | n <- [0..]] args + ss = map (uncurry mkSub) argNames + mkSub a t = subdialog a [("src","#"++t)] + [param "value" ("value."++a), + filled [] [assign ("value."++a) a]] + ret = block [return_ ["value"]] + +-- +-- * VoiceXML stuff +-- + +vxml :: [XML] -> XML +vxml = Tag "vxml" [("version","2.0"),("xmlns","http://www.w3.org/2001/vxml")] + +form :: String -> [XML] -> XML +form id = Tag "form" [("id", id)] + +field :: String -> [(String,String)] -> [XML] -> XML +field name attrs = Tag "field" ([("name",name)]++attrs) + +subdialog :: String -> [(String,String)] -> [XML] -> XML +subdialog name attrs = Tag "subdialog" ([("name",name)]++attrs) + +filled :: [(String,String)] -> [XML] -> XML +filled = Tag "filled" + +grammar :: String -> XML +grammar uri = Tag "grammar" [("src",uri)] [] + +prompt :: [XML] -> XML +prompt = Tag "prompt" [] + +promptString :: String -> XML +promptString p = prompt [Data p] + +reprompt :: XML +reprompt = Tag "reprompt" [] [] + +assign :: String -> String -> XML +assign n e = Tag "assign" [("name",n),("expr",e)] [] + +value :: String -> XML +value expr = Tag "value" [("expr",expr)] [] + +if_ :: String -> [XML] -> XML +if_ c b = if_else c b [] + +if_else :: String -> [XML] -> [XML] -> XML +if_else c t f = cond [(c,t)] f + +cond :: [(String,[XML])] -> [XML] -> XML +cond ((c,b):rest) els = Tag "if" [("cond",c)] (b ++ es) + where es = [Tag "elseif" [("cond",c')] b' | (c',b') <- rest] + ++ if null els then [] else (Tag "else" [] []:els) + +goto_item :: String -> XML +goto_item nextitem = Tag "goto" [("nextitem",nextitem)] [] + +return_ :: [String] -> XML +return_ names = Tag "return" [("namelist", unwords names)] [] + +block :: [XML] -> XML +block = Tag "block" [] + +throw :: String -> String -> XML +throw event msg = Tag "throw" [("event",event),("message",msg)] [] + +nomatch :: [XML] -> XML +nomatch = Tag "nomatch" [] + +help :: [XML] -> XML +help = Tag "help" [] + +param :: String -> String -> XML +param name expr = Tag "param" [("name",name),("expr",expr)] [] + +var :: String -> Maybe String -> XML +var name expr = Tag "var" ([("name",name)]++e) [] + where e = maybe [] ((:[]) . (,) "expr") expr + + +-- +-- * List stuff +-- + +isListCat :: (VIdent, [(VIdent, [VIdent])]) -> Bool +isListCat (cat,rules) = "List" `isPrefixOf` cat && length rules == 2 + && ("Base"++c) `elem` fs && ("Cons"++c) `elem` fs + where c = elemCat cat + fs = map fst rules + +-- | Gets the element category of a list category. +elemCat :: VIdent -> VIdent +elemCat = drop 4 + +isBaseFun :: VIdent -> Bool +isBaseFun f = "Base" `isPrefixOf` f + +isConsFun :: VIdent -> Bool +isConsFun f = "Cons" `isPrefixOf` f + +baseSize :: (VIdent, [(VIdent, [VIdent])]) -> Int +baseSize (_,rules) = length bs + where Just (_,bs) = find (("Base" `isPrefixOf`) . fst) rules diff --git a/src/GF/Speech/PrSRGS.hs b/src/GF/Speech/PrSRGS.hs index 60c2ba8e7..dda0f4d8a 100644 --- a/src/GF/Speech/PrSRGS.hs +++ b/src/GF/Speech/PrSRGS.hs @@ -18,6 +18,7 @@ module GF.Speech.PrSRGS (srgsXmlPrinter) where import GF.Data.Utilities +import GF.Data.XML import GF.Speech.SRG import GF.Infra.Ident import GF.Today @@ -32,11 +33,6 @@ import GF.Probabilistic.Probabilistic (Probs) import Data.Char (toUpper,toLower) import Data.List -data XML = Data String | Tag String [Attr] [XML] | Comment String - deriving (Eq,Show) - -type Attr = (String,String) - srgsXmlPrinter :: Ident -- ^ Grammar name -> Options -> Bool -- ^ Whether to include semantic interpretation @@ -48,9 +44,8 @@ srgsXmlPrinter name opts sisr probs cfg = prSrgsXml sisr srg "" prSrgsXml :: Bool -> SRG -> ShowS prSrgsXml sisr (SRG{grammarName=name,startCat=start, origStartCat=origStart,grammarLanguage=l,rules=rs}) - = header . showsXML xmlGr + = showsXMLDoc xmlGr where - header = showString "" root = prCat start xmlGr = grammar root l ([meta "description" ("SRGS XML speech recognition grammar for " ++ name @@ -67,15 +62,17 @@ prSrgsXml sisr (SRG{grammarName=name,startCat=start, prRhs isList rhss = [oneOf (map (mkProd sisr isList) rhss)] -- externally visible rules for each of the GF categories topCatRules = [topRule tc [oneOf (map it cs)] | (tc,cs) <- topCats] - where topCats = buildMultiMap [(gfCat origCat, cat) | SRGRule cat origCat _ <- rs] - gfCat = takeWhile (/='{') + where topCats = buildMultiMap [(cfgCatToGFCat origCat, cat) | SRGRule cat origCat _ <- rs] it c = symItem [] (Cat c) 0 topRule i is = Tag "rule" [("id",i),("scope","public")] - (is ++ [tag ["$ = $$"]]) + (is ++ [tag ["$."++i++ " = $$"]]) rule :: String -> [XML] -> XML rule i = Tag "rule" [("id",i)] +cfgCatToGFCat :: String -> String +cfgCatToGFCat = takeWhile (/='{') + isBase :: Fun -> Bool isBase f = "Base" `isPrefixOf` prIdent f @@ -153,27 +150,3 @@ grammar root l = Tag "grammar" [("xml:lang", l), meta :: String -> String -> XML meta n c = Tag "meta" [("name",n),("content",c)] [] - -comments :: [String] -> [XML] -comments = map Comment - -showsXML :: XML -> ShowS -showsXML (Data s) = showString s -showsXML (Tag t as []) = showChar '<' . showString t . showsAttrs as . showString "/>" -showsXML (Tag t as cs) = - showChar '<' . showString t . showsAttrs as . showChar '>' - . concatS (map showsXML cs) . showString "' -showsXML (Comment c) = showString "" - -showsAttrs :: [Attr] -> ShowS -showsAttrs = concatS . map (showChar ' ' .) . map showsAttr - -showsAttr :: Attr -> ShowS -showsAttr (n,v) = showString n . showString "=\"" . showString (escape v) . showString "\"" - --- FIXME: escape strange charachters with &#xxx; -escape :: String -> String -escape = concatMap escChar - where - escChar c | c `elem` ['"','\\'] = '\\':[c] - | otherwise = [c] diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index 0d6a143ef..4400c2585 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -60,6 +60,7 @@ import GF.Speech.PrJSGF (jsgfPrinter) import GF.Speech.PrSRGS (srgsXmlPrinter) import GF.Speech.PrSLF import GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter) +import GF.Speech.GrammarToVoiceXML (grammar2vxml) import GF.Data.Zipper @@ -257,6 +258,7 @@ customGrammarPrinter = ,(strCI "srgs_xml_ms_sem", \s -> let opts = stateOptions s name = cncId s in srgsXmlPrinter name opts True Nothing $ stateCFG s) + ,(strCI "vxml", grammar2vxml . stateGrammarST) ,(strCI "slf", \s -> let opts = stateOptions s start = getStartCat opts name = cncId s diff --git a/src/HelpFile b/src/HelpFile index a67f79412..0ff04b25b 100644 --- a/src/HelpFile +++ b/src/HelpFile @@ -571,6 +571,7 @@ q, quit: q -printer=srgs_xml_prob SRGS XML format, with weights -printer=srgs_xml_ms_sem SRGS XML format, with semantic tags for the Microsoft Speech API. + -printer=vxml Generate a dialogue system in VoiceXML. -printer=slf a finite automaton in the HTK SLF format -printer=slf_graphviz the same automaton as slf, but in Graphviz format -printer=slf_sub a finite automaton with sub-automata in the