diff --git a/src-3.0/GF/Speech/SLF.hs b/src-3.0/GF/Speech/SLF.hs new file mode 100644 index 000000000..9bc025558 --- /dev/null +++ b/src-3.0/GF/Speech/SLF.hs @@ -0,0 +1,190 @@ +---------------------------------------------------------------------- +-- | +-- Module : PrSLF +-- Maintainer : BB +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/11/10 16:43:44 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.12 $ +-- +-- This module converts a CFG to an SLF finite-state network +-- for use with the ATK recognizer. The SLF format is described +-- in the HTK manual, and an example for use in ATK is shown +-- in the ATK manual. +-- +-- FIXME: remove \/ warn \/ fail if there are int \/ string literal +-- categories in the grammar +----------------------------------------------------------------------------- + +module GF.Speech.PrSLF (slfPrinter,slfGraphvizPrinter, + slfSubPrinter,slfSubGraphvizPrinter) where + +import GF.Data.Utilities +import GF.Conversion.Types +import GF.Formalism.CFG +import GF.Formalism.Utilities (Symbol(..),symbol,mapSymbol) +import GF.Infra.Ident +import GF.Infra.Option (Options) +import GF.Infra.Print +import GF.Speech.CFGToFiniteState +import GF.Speech.FiniteState +import GF.Speech.TransformCFG +import qualified GF.Visualization.Graphviz as Dot +import GF.Compile.ShellState (StateGrammar) + +import Control.Monad +import qualified Control.Monad.State as STM +import Data.Char (toUpper) +import Data.List +import Data.Maybe + +data SLFs = SLFs [(String,SLF)] SLF + +data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] } + +data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String } + | SLFSubLat { nId :: Int, nLat :: String } + +-- | An SLF word is a word, or the empty string. +type SLFWord = Maybe String + +data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int } + +type SLF_FA = FA State (Maybe (MFALabel String)) () + +mkFAs :: Options -> StateGrammar -> (SLF_FA, [(String,SLF_FA)]) +mkFAs opts s = (slfStyleFA main, [(c,slfStyleFA n) | (c,n) <- subs]) + where MFA start subs = {- renameSubs $ -} cfgToMFA opts s + main = let (fa,s,f) = newFA_ in newTransition s f (Cat start) fa + +slfStyleFA :: Eq a => DFA a -> FA State (Maybe a) () +slfStyleFA = renameStates [0..] . removeTrivialEmptyNodes . oneFinalState Nothing () + . moveLabelsToNodes . dfa2nfa + +-- | Give sequential names to subnetworks. +renameSubs :: MFA String -> MFA String +renameSubs (MFA start subs) = MFA (newName start) subs' + where newNames = zip (map fst subs) ["sub"++show n | n <- [0..]] + newName s = lookup' s newNames + subs' = [(newName s,renameLabels n) | (s,n) <- subs] + renameLabels = mapTransitions (mapSymbol newName id) + +-- +-- * SLF graphviz printing (without sub-networks) +-- + +slfGraphvizPrinter :: Options -> StateGrammar -> String +slfGraphvizPrinter opts s + = prFAGraphviz $ gvFA $ slfStyleFA $ cfgToFA' opts s + where + gvFA = mapStates (fromMaybe "") . mapTransitions (const "") + +-- +-- * SLF graphviz printing (with sub-networks) +-- + +slfSubGraphvizPrinter :: Options -> StateGrammar -> String +slfSubGraphvizPrinter opts s = Dot.prGraphviz g + where (main, subs) = mkFAs opts s + g = STM.evalState (liftM2 Dot.addSubGraphs ss m) [0..] + ss = mapM (\ (c,f) -> gvSLFFA (Just c) f) subs + m = gvSLFFA Nothing main + +gvSLFFA :: Maybe String -> SLF_FA -> STM.State [State] Dot.Graph +gvSLFFA n fa = + liftM (mkCluster n . faToGraphviz . mapStates (maybe "" mfaLabelToGv) + . mapTransitions (const "")) (rename fa) + where mfaLabelToGv = symbol ("#"++) id + mkCluster Nothing = id + mkCluster (Just x) + = Dot.setName ("cluster_"++x) . Dot.setAttr "label" x + rename fa = do + names <- STM.get + let fa' = renameStates names fa + names' = unusedNames fa' + STM.put names' + return fa' + +-- +-- * SLF printing (without sub-networks) +-- + +slfPrinter :: Options -> StateGrammar -> String +slfPrinter opts s + = prSLF $ automatonToSLF mkSLFNode $ slfStyleFA $ cfgToFA' opts s + +-- +-- * SLF printing (with sub-networks) +-- + +-- | Make a network with subnetworks in SLF +slfSubPrinter :: Options -> StateGrammar -> String +slfSubPrinter opts s = prSLFs slfs + where + (main,subs) = mkFAs opts s + slfs = SLFs [(c, faToSLF fa) | (c,fa) <- subs] (faToSLF main) + faToSLF = automatonToSLF mfaNodeToSLFNode + +automatonToSLF :: (Int -> a -> SLFNode) -> FA State a () -> SLF +automatonToSLF mkNode fa = SLF { slfNodes = ns, slfEdges = es } + where ns = map (uncurry mkNode) (states fa) + es = zipWith (\i (f,t,()) -> mkSLFEdge i (f,t)) [0..] (transitions fa) + +mfaNodeToSLFNode :: Int -> Maybe (MFALabel String) -> SLFNode +mfaNodeToSLFNode i l = case l of + Nothing -> mkSLFNode i Nothing + Just (Tok x) -> mkSLFNode i (Just x) + Just (Cat s) -> mkSLFSubLat i s + +mkSLFNode :: Int -> Maybe String -> SLFNode +mkSLFNode i Nothing = SLFNode { nId = i, nWord = Nothing, nTag = Nothing } +mkSLFNode i (Just w) + | isNonWord w = SLFNode { nId = i, + nWord = Nothing, + nTag = Just w } + | otherwise = SLFNode { nId = i, + nWord = Just (map toUpper w), + nTag = Just w } + +mkSLFSubLat :: Int -> String -> SLFNode +mkSLFSubLat i sub = SLFSubLat { nId = i, nLat = sub } + +mkSLFEdge :: Int -> (Int,Int) -> SLFEdge +mkSLFEdge i (f,t) = SLFEdge { eId = i, eStart = f, eEnd = t } + +prSLFs :: SLFs -> String +prSLFs (SLFs subs main) = unlinesS (map prSub subs ++ [prOneSLF main]) "" + where prSub (n,s) = showString "SUBLAT=" . shows n + . nl . prOneSLF s . showString "." . nl + +prSLF :: SLF -> String +prSLF slf = prOneSLF slf "" + +prOneSLF :: SLF -> ShowS +prOneSLF (SLF { slfNodes = ns, slfEdges = es}) + = header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl + where + header = prFields [("N",show (length ns)),("L", show (length es))] . nl + prNode (SLFNode { nId = i, nWord = w, nTag = t }) + = prFields $ [("I",show i),("W",showWord w)] + ++ maybe [] (\t -> [("s",t)]) t + prNode (SLFSubLat { nId = i, nLat = l }) + = prFields [("I",show i),("L",show l)] + prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))] + +-- | Check if a word should not correspond to a word in the SLF file. +isNonWord :: String -> Bool +isNonWord = any isPunct + +isPunct :: Char -> Bool +isPunct c = c `elem` "-_.;.,?!()[]{}" + +showWord :: SLFWord -> String +showWord Nothing = "!NULL" +showWord (Just w) | null w = "!NULL" + | otherwise = w + +prFields :: [(String,String)] -> ShowS +prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ]