Use language flag to select the acoustic model to use with ATK.

This commit is contained in:
bringert
2006-01-05 16:34:35 +00:00
parent 5c0d9d52b3
commit a373760ebb
2 changed files with 25 additions and 12 deletions

View File

@@ -15,7 +15,7 @@
module GF.System.ATKSpeechInput (recognizeSpeech) where module GF.System.ATKSpeechInput (recognizeSpeech) where
import GF.Infra.Ident (Ident, prIdent) import GF.Infra.Ident (Ident, prIdent)
import GF.Infra.Option (Options) import GF.Infra.Option
import GF.Conversion.Types (CGrammar) import GF.Conversion.Types (CGrammar)
import GF.Speech.PrSLF import GF.Speech.PrSLF
@@ -29,11 +29,11 @@ import System.IO
import System.IO.Unsafe import System.IO.Unsafe
data ATKLang = ATKLang { data ATKLang = ATKLang {
cmndef :: Maybe FilePath,
hmmlist :: FilePath, hmmlist :: FilePath,
mmf0 :: FilePath, mmf0 :: FilePath,
mmf1 :: FilePath, mmf1 :: FilePath,
dict :: FilePath dict :: FilePath,
opts :: [(String,String)]
} }
atk_home_error = "The environment variable ATK_HOME is not set. " atk_home_error = "The environment variable ATK_HOME is not set. "
@@ -51,19 +51,22 @@ getLanguage l =
atk_home <- getEnv_ "ATK_HOME" atk_home_error atk_home <- getEnv_ "ATK_HOME" atk_home_error
let res = atk_home ++ "/Resources" let res = atk_home ++ "/Resources"
return $ ATKLang { return $ ATKLang {
cmndef = Just $ res ++ "/UK_SI_ZMFCC/cepmean",
hmmlist = res ++ "/UK_SI_ZMFCC/hmmlistbg", hmmlist = res ++ "/UK_SI_ZMFCC/hmmlistbg",
mmf0 = res ++ "/UK_SI_ZMFCC/WI4", mmf0 = res ++ "/UK_SI_ZMFCC/WI4",
mmf1 = res ++ "/UK_SI_ZMFCC/BGHMM2", mmf1 = res ++ "/UK_SI_ZMFCC/BGHMM2",
dict = res ++ "/beep.dct" } dict = res ++ "/beep.dct",
opts = [("TARGETKIND", "MFCC_0_D_A_Z"),
("HPARM:CMNDEFAULT", res ++ "/UK_SI_ZMFCC/cepmean")]
}
"sv_SE" -> do "sv_SE" -> do
let res = "/home/bjorn/projects/atkswe/stoneage-swe" let res = "/home/bjorn/projects/atkswe/stoneage-swe"
return $ ATKLang { return $ ATKLang {
cmndef = Nothing,
hmmlist = res ++ "/triphones1", hmmlist = res ++ "/triphones1",
mmf0 = res ++ "/hmm12/macros", mmf0 = res ++ "/hmm12/macros",
mmf1 = res ++ "/hmm12/hmmdefs", mmf1 = res ++ "/hmm12/hmmdefs",
dict = res ++ "/dict" } dict = res ++ "/dict",
opts = [("TARGETKIND", "MFCC_0_D_A")]
}
_ -> fail $ "ATKSpeechInput: language " ++ l ++ " not supported" _ -> fail $ "ATKSpeechInput: language " ++ l ++ " not supported"
-- | List of the languages for which we have already loaded the HMM -- | List of the languages for which we have already loaded the HMM
@@ -80,8 +83,8 @@ initATK language =
when (null ls) $ do when (null ls) $ do
config <- getEnv_ "GF_ATK_CFG" gf_atk_cfg_error config <- getEnv_ "GF_ATK_CFG" gf_atk_cfg_error
hPutStrLn stderr $ "Initializing ATK..." hPutStrLn stderr $ "Initializing ATK..."
let ps = map ((,) "HPARM:CMNDEFAULT") (maybeToList (cmndef l)) -- FIXME: different recognizers need different global options
initialize (Just config) ps initialize (Just config) (opts l)
when (language `notElem` ls) $ when (language `notElem` ls) $
do do
let hmmName = "hmm_" ++ language let hmmName = "hmm_" ++ language
@@ -95,14 +98,21 @@ recognizeSpeech :: Ident -- ^ Grammar name
-> Options -> CGrammar -> IO String -> Options -> CGrammar -> IO String
recognizeSpeech name opts cfg = recognizeSpeech name opts cfg =
do do
-- Options
let language = fromMaybe "en_UK" (getOptVal opts speechLanguage)
cat = fromMaybe "S" (getOptVal opts gStartCat) ++ "{}.s"
number = optIntOrN opts flagNumber 1
-- FIXME: use values of cat and number flags
let slf = slfPrinter name opts cfg let slf = slfPrinter name opts cfg
n = prIdent name n = prIdent name
language = "sv_SE"
hmmName = "hmm_" ++ language hmmName = "hmm_" ++ language
dictName = "dict_" ++ language dictName = "dict_" ++ language
slfName = "gram_" ++ n slfName = "gram_" ++ n
recName = "rec_" ++ language ++ "_" ++ n recName = "rec_" ++ language ++ "_" ++ n
print opts
writeFile "debug.net" slf
initATK language initATK language
hPutStrLn stderr "Loading grammar..."
loadGrammarString slfName slf loadGrammarString slfName slf
createRecognizer recName hmmName dictName slfName createRecognizer recName hmmName dictName slfName
hPutStrLn stderr "Listening..." hPutStrLn stderr "Listening..."

View File

@@ -5,7 +5,10 @@
SOURCEFORMAT = HAUDIO SOURCEFORMAT = HAUDIO
SOURCERATE = 625 SOURCERATE = 625
TARGETKIND = MFCC_0_D_A_Z
# Set in GF/System/ATKSpeechInput.hs
# TARGETKIND = MFCC_0_D_A
TARGETRATE = 100000.0 TARGETRATE = 100000.0
WINDOWSIZE = 250000.0 WINDOWSIZE = 250000.0
ENORMALISE = F ENORMALISE = F
@@ -56,7 +59,7 @@ ARMAN: AUTOSIL = F
HREC: CONFSCALE = 0.15 HREC: CONFSCALE = 0.15
HREC: CONFOFFSET = 0.0 HREC: CONFOFFSET = 0.0
HREC: CONFBGHMM = bghmm #HREC: CONFBGHMM = bghmm
# -- Set visibility and positions of ATK controls -- # -- Set visibility and positions of ATK controls --