mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-15 07:49:31 -06:00
+ References to modules under src/compiler have been eliminated from the PGF library (under src/runtime/haskell). Only two functions had to be moved (from GF.Data.Utilities to PGF.Utilities) to make this possible, other apparent dependencies turned out to be vacuous. + In gf.cabal, the GF executable no longer directly depends on the PGF library source directory, but only on the exposed library modules. This means that there is less duplication in gf.cabal and that the 30 modules in the PGF library will no longer be compiled twice while building GF. To make this possible, additional PGF library modules have been exposed, even though they should probably be considered for internal use only. They could be collected in a PGF.Internal module, or marked as "unstable", to make this explicit. + Also, by using the -fwarn-unused-imports flag, ~220 redundant imports were found and removed, reducing the total number of imports by ~15%.
179 lines
6.1 KiB
Haskell
179 lines
6.1 KiB
Haskell
----------------------------------------------------------------------
|
|
-- |
|
|
-- Module : GF.Speech.SLF
|
|
--
|
|
-- This module converts a CFG to an SLF finite-state network
|
|
-- for use with the ATK recognizer. The SLF format is described
|
|
-- in the HTK manual, and an example for use in ATK is shown
|
|
-- in the ATK manual.
|
|
--
|
|
-----------------------------------------------------------------------------
|
|
|
|
module GF.Speech.SLF (slfPrinter,slfGraphvizPrinter,
|
|
slfSubPrinter,slfSubGraphvizPrinter) where
|
|
|
|
import GF.Data.Utilities
|
|
import GF.Speech.CFG
|
|
import GF.Speech.FiniteState
|
|
--import GF.Speech.CFG
|
|
import GF.Speech.CFGToFA
|
|
import GF.Speech.PGFToCFG
|
|
import qualified GF.Data.Graphviz as Dot
|
|
import PGF
|
|
--import PGF.CId
|
|
|
|
import Control.Monad
|
|
import qualified Control.Monad.State as STM
|
|
import Data.Char (toUpper)
|
|
--import Data.List
|
|
import Data.Maybe
|
|
|
|
data SLFs = SLFs [(String,SLF)] SLF
|
|
|
|
data SLF = SLF { slfNodes :: [SLFNode], slfEdges :: [SLFEdge] }
|
|
|
|
data SLFNode = SLFNode { nId :: Int, nWord :: SLFWord, nTag :: Maybe String }
|
|
| SLFSubLat { nId :: Int, nLat :: String }
|
|
|
|
-- | An SLF word is a word, or the empty string.
|
|
type SLFWord = Maybe String
|
|
|
|
data SLFEdge = SLFEdge { eId :: Int, eStart :: Int, eEnd :: Int }
|
|
|
|
type SLF_FA = FA State (Maybe CFSymbol) ()
|
|
|
|
mkFAs :: PGF -> CId -> (SLF_FA, [(String,SLF_FA)])
|
|
mkFAs pgf cnc = (slfStyleFA main, [(c,slfStyleFA n) | (c,n) <- subs])
|
|
where MFA start subs = {- renameSubs $ -} cfgToMFA $ pgfToCFG pgf cnc
|
|
main = let (fa,s,f) = newFA_ in newTransition s f (NonTerminal start) fa
|
|
|
|
slfStyleFA :: Eq a => DFA a -> FA State (Maybe a) ()
|
|
slfStyleFA = renameStates [0..] . removeTrivialEmptyNodes . oneFinalState Nothing ()
|
|
. moveLabelsToNodes . dfa2nfa
|
|
|
|
-- | Give sequential names to subnetworks.
|
|
renameSubs :: MFA -> MFA
|
|
renameSubs (MFA start subs) = MFA (newName start) subs'
|
|
where newNames = zip (map fst subs) ["sub"++show n | n <- [0..]]
|
|
newName s = lookup' s newNames
|
|
subs' = [(newName s,renameLabels n) | (s,n) <- subs]
|
|
renameLabels = mapTransitions (mapSymbol newName id)
|
|
|
|
--
|
|
-- * SLF graphviz printing (without sub-networks)
|
|
--
|
|
|
|
slfGraphvizPrinter :: PGF -> CId -> String
|
|
slfGraphvizPrinter pgf cnc
|
|
= prFAGraphviz $ gvFA $ slfStyleFA $ cfgToFA' $ pgfToCFG pgf cnc
|
|
where
|
|
gvFA = mapStates (fromMaybe "") . mapTransitions (const "")
|
|
|
|
--
|
|
-- * SLF graphviz printing (with sub-networks)
|
|
--
|
|
|
|
slfSubGraphvizPrinter :: PGF -> CId -> String
|
|
slfSubGraphvizPrinter pgf cnc = Dot.prGraphviz g
|
|
where (main, subs) = mkFAs pgf cnc
|
|
g = STM.evalState (liftM2 Dot.addSubGraphs ss m) [0..]
|
|
ss = mapM (\ (c,f) -> gvSLFFA (Just c) f) subs
|
|
m = gvSLFFA Nothing main
|
|
|
|
gvSLFFA :: Maybe String -> SLF_FA -> STM.State [State] Dot.Graph
|
|
gvSLFFA n fa =
|
|
liftM (mkCluster n . faToGraphviz . mapStates (maybe "" mfaLabelToGv)
|
|
. mapTransitions (const "")) (rename fa)
|
|
where mfaLabelToGv = symbol ("#"++) id
|
|
mkCluster Nothing = id
|
|
mkCluster (Just x)
|
|
= Dot.setName ("cluster_"++x) . Dot.setAttr "label" x
|
|
rename fa = do
|
|
names <- STM.get
|
|
let fa' = renameStates names fa
|
|
names' = unusedNames fa'
|
|
STM.put names'
|
|
return fa'
|
|
|
|
--
|
|
-- * SLF printing (without sub-networks)
|
|
--
|
|
|
|
slfPrinter :: PGF -> CId -> String
|
|
slfPrinter pgf cnc
|
|
= prSLF $ automatonToSLF mkSLFNode $ slfStyleFA $ cfgToFA' $ pgfToCFG pgf cnc
|
|
|
|
--
|
|
-- * SLF printing (with sub-networks)
|
|
--
|
|
|
|
-- | Make a network with subnetworks in SLF
|
|
slfSubPrinter :: PGF -> CId -> String
|
|
slfSubPrinter pgf cnc = prSLFs slfs
|
|
where
|
|
(main,subs) = mkFAs pgf cnc
|
|
slfs = SLFs [(c, faToSLF fa) | (c,fa) <- subs] (faToSLF main)
|
|
faToSLF = automatonToSLF mfaNodeToSLFNode
|
|
|
|
automatonToSLF :: (Int -> a -> SLFNode) -> FA State a () -> SLF
|
|
automatonToSLF mkNode fa = SLF { slfNodes = ns, slfEdges = es }
|
|
where ns = map (uncurry mkNode) (states fa)
|
|
es = zipWith (\i (f,t,()) -> mkSLFEdge i (f,t)) [0..] (transitions fa)
|
|
|
|
mfaNodeToSLFNode :: Int -> Maybe CFSymbol -> SLFNode
|
|
mfaNodeToSLFNode i l = case l of
|
|
Nothing -> mkSLFNode i Nothing
|
|
Just (Terminal x) -> mkSLFNode i (Just x)
|
|
Just (NonTerminal s) -> mkSLFSubLat i s
|
|
|
|
mkSLFNode :: Int -> Maybe String -> SLFNode
|
|
mkSLFNode i Nothing = SLFNode { nId = i, nWord = Nothing, nTag = Nothing }
|
|
mkSLFNode i (Just w)
|
|
| isNonWord w = SLFNode { nId = i,
|
|
nWord = Nothing,
|
|
nTag = Just w }
|
|
| otherwise = SLFNode { nId = i,
|
|
nWord = Just (map toUpper w),
|
|
nTag = Just w }
|
|
|
|
mkSLFSubLat :: Int -> String -> SLFNode
|
|
mkSLFSubLat i sub = SLFSubLat { nId = i, nLat = sub }
|
|
|
|
mkSLFEdge :: Int -> (Int,Int) -> SLFEdge
|
|
mkSLFEdge i (f,t) = SLFEdge { eId = i, eStart = f, eEnd = t }
|
|
|
|
prSLFs :: SLFs -> String
|
|
prSLFs (SLFs subs main) = unlinesS (map prSub subs ++ [prOneSLF main]) ""
|
|
where prSub (n,s) = showString "SUBLAT=" . shows n
|
|
. nl . prOneSLF s . showString "." . nl
|
|
|
|
prSLF :: SLF -> String
|
|
prSLF slf = prOneSLF slf ""
|
|
|
|
prOneSLF :: SLF -> ShowS
|
|
prOneSLF (SLF { slfNodes = ns, slfEdges = es})
|
|
= header . unlinesS (map prNode ns) . nl . unlinesS (map prEdge es) . nl
|
|
where
|
|
header = prFields [("N",show (length ns)),("L", show (length es))] . nl
|
|
prNode (SLFNode { nId = i, nWord = w, nTag = t })
|
|
= prFields $ [("I",show i),("W",showWord w)]
|
|
++ maybe [] (\t -> [("s",t)]) t
|
|
prNode (SLFSubLat { nId = i, nLat = l })
|
|
= prFields [("I",show i),("L",show l)]
|
|
prEdge e = prFields [("J",show (eId e)),("S",show (eStart e)),("E",show (eEnd e))]
|
|
|
|
-- | Check if a word should not correspond to a word in the SLF file.
|
|
isNonWord :: String -> Bool
|
|
isNonWord = any isPunct
|
|
|
|
isPunct :: Char -> Bool
|
|
isPunct c = c `elem` "-_.;.,?!()[]{}"
|
|
|
|
showWord :: SLFWord -> String
|
|
showWord Nothing = "!NULL"
|
|
showWord (Just w) | null w = "!NULL"
|
|
| otherwise = w
|
|
|
|
prFields :: [(String,String)] -> ShowS
|
|
prFields fs = unwordsS [ showString l . showChar '=' . showString v | (l,v) <- fs ]
|