mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 11:42:49 -06:00
Use language flag to select the acoustic model to use with ATK.
This commit is contained in:
@@ -15,7 +15,7 @@
|
|||||||
module GF.System.ATKSpeechInput (recognizeSpeech) where
|
module GF.System.ATKSpeechInput (recognizeSpeech) where
|
||||||
|
|
||||||
import GF.Infra.Ident (Ident, prIdent)
|
import GF.Infra.Ident (Ident, prIdent)
|
||||||
import GF.Infra.Option (Options)
|
import GF.Infra.Option
|
||||||
import GF.Conversion.Types (CGrammar)
|
import GF.Conversion.Types (CGrammar)
|
||||||
import GF.Speech.PrSLF
|
import GF.Speech.PrSLF
|
||||||
|
|
||||||
@@ -29,11 +29,11 @@ import System.IO
|
|||||||
import System.IO.Unsafe
|
import System.IO.Unsafe
|
||||||
|
|
||||||
data ATKLang = ATKLang {
|
data ATKLang = ATKLang {
|
||||||
cmndef :: Maybe FilePath,
|
|
||||||
hmmlist :: FilePath,
|
hmmlist :: FilePath,
|
||||||
mmf0 :: FilePath,
|
mmf0 :: FilePath,
|
||||||
mmf1 :: FilePath,
|
mmf1 :: FilePath,
|
||||||
dict :: FilePath
|
dict :: FilePath,
|
||||||
|
opts :: [(String,String)]
|
||||||
}
|
}
|
||||||
|
|
||||||
atk_home_error = "The environment variable ATK_HOME is not set. "
|
atk_home_error = "The environment variable ATK_HOME is not set. "
|
||||||
@@ -51,19 +51,22 @@ getLanguage l =
|
|||||||
atk_home <- getEnv_ "ATK_HOME" atk_home_error
|
atk_home <- getEnv_ "ATK_HOME" atk_home_error
|
||||||
let res = atk_home ++ "/Resources"
|
let res = atk_home ++ "/Resources"
|
||||||
return $ ATKLang {
|
return $ ATKLang {
|
||||||
cmndef = Just $ res ++ "/UK_SI_ZMFCC/cepmean",
|
|
||||||
hmmlist = res ++ "/UK_SI_ZMFCC/hmmlistbg",
|
hmmlist = res ++ "/UK_SI_ZMFCC/hmmlistbg",
|
||||||
mmf0 = res ++ "/UK_SI_ZMFCC/WI4",
|
mmf0 = res ++ "/UK_SI_ZMFCC/WI4",
|
||||||
mmf1 = res ++ "/UK_SI_ZMFCC/BGHMM2",
|
mmf1 = res ++ "/UK_SI_ZMFCC/BGHMM2",
|
||||||
dict = res ++ "/beep.dct" }
|
dict = res ++ "/beep.dct",
|
||||||
|
opts = [("TARGETKIND", "MFCC_0_D_A_Z"),
|
||||||
|
("HPARM:CMNDEFAULT", res ++ "/UK_SI_ZMFCC/cepmean")]
|
||||||
|
}
|
||||||
"sv_SE" -> do
|
"sv_SE" -> do
|
||||||
let res = "/home/bjorn/projects/atkswe/stoneage-swe"
|
let res = "/home/bjorn/projects/atkswe/stoneage-swe"
|
||||||
return $ ATKLang {
|
return $ ATKLang {
|
||||||
cmndef = Nothing,
|
|
||||||
hmmlist = res ++ "/triphones1",
|
hmmlist = res ++ "/triphones1",
|
||||||
mmf0 = res ++ "/hmm12/macros",
|
mmf0 = res ++ "/hmm12/macros",
|
||||||
mmf1 = res ++ "/hmm12/hmmdefs",
|
mmf1 = res ++ "/hmm12/hmmdefs",
|
||||||
dict = res ++ "/dict" }
|
dict = res ++ "/dict",
|
||||||
|
opts = [("TARGETKIND", "MFCC_0_D_A")]
|
||||||
|
}
|
||||||
_ -> fail $ "ATKSpeechInput: language " ++ l ++ " not supported"
|
_ -> fail $ "ATKSpeechInput: language " ++ l ++ " not supported"
|
||||||
|
|
||||||
-- | List of the languages for which we have already loaded the HMM
|
-- | List of the languages for which we have already loaded the HMM
|
||||||
@@ -80,8 +83,8 @@ initATK language =
|
|||||||
when (null ls) $ do
|
when (null ls) $ do
|
||||||
config <- getEnv_ "GF_ATK_CFG" gf_atk_cfg_error
|
config <- getEnv_ "GF_ATK_CFG" gf_atk_cfg_error
|
||||||
hPutStrLn stderr $ "Initializing ATK..."
|
hPutStrLn stderr $ "Initializing ATK..."
|
||||||
let ps = map ((,) "HPARM:CMNDEFAULT") (maybeToList (cmndef l))
|
-- FIXME: different recognizers need different global options
|
||||||
initialize (Just config) ps
|
initialize (Just config) (opts l)
|
||||||
when (language `notElem` ls) $
|
when (language `notElem` ls) $
|
||||||
do
|
do
|
||||||
let hmmName = "hmm_" ++ language
|
let hmmName = "hmm_" ++ language
|
||||||
@@ -95,14 +98,21 @@ recognizeSpeech :: Ident -- ^ Grammar name
|
|||||||
-> Options -> CGrammar -> IO String
|
-> Options -> CGrammar -> IO String
|
||||||
recognizeSpeech name opts cfg =
|
recognizeSpeech name opts cfg =
|
||||||
do
|
do
|
||||||
|
-- Options
|
||||||
|
let language = fromMaybe "en_UK" (getOptVal opts speechLanguage)
|
||||||
|
cat = fromMaybe "S" (getOptVal opts gStartCat) ++ "{}.s"
|
||||||
|
number = optIntOrN opts flagNumber 1
|
||||||
|
-- FIXME: use values of cat and number flags
|
||||||
let slf = slfPrinter name opts cfg
|
let slf = slfPrinter name opts cfg
|
||||||
n = prIdent name
|
n = prIdent name
|
||||||
language = "sv_SE"
|
|
||||||
hmmName = "hmm_" ++ language
|
hmmName = "hmm_" ++ language
|
||||||
dictName = "dict_" ++ language
|
dictName = "dict_" ++ language
|
||||||
slfName = "gram_" ++ n
|
slfName = "gram_" ++ n
|
||||||
recName = "rec_" ++ language ++ "_" ++ n
|
recName = "rec_" ++ language ++ "_" ++ n
|
||||||
|
print opts
|
||||||
|
writeFile "debug.net" slf
|
||||||
initATK language
|
initATK language
|
||||||
|
hPutStrLn stderr "Loading grammar..."
|
||||||
loadGrammarString slfName slf
|
loadGrammarString slfName slf
|
||||||
createRecognizer recName hmmName dictName slfName
|
createRecognizer recName hmmName dictName slfName
|
||||||
hPutStrLn stderr "Listening..."
|
hPutStrLn stderr "Listening..."
|
||||||
|
|||||||
@@ -5,7 +5,10 @@
|
|||||||
|
|
||||||
SOURCEFORMAT = HAUDIO
|
SOURCEFORMAT = HAUDIO
|
||||||
SOURCERATE = 625
|
SOURCERATE = 625
|
||||||
TARGETKIND = MFCC_0_D_A_Z
|
|
||||||
|
# Set in GF/System/ATKSpeechInput.hs
|
||||||
|
# TARGETKIND = MFCC_0_D_A
|
||||||
|
|
||||||
TARGETRATE = 100000.0
|
TARGETRATE = 100000.0
|
||||||
WINDOWSIZE = 250000.0
|
WINDOWSIZE = 250000.0
|
||||||
ENORMALISE = F
|
ENORMALISE = F
|
||||||
@@ -56,7 +59,7 @@ ARMAN: AUTOSIL = F
|
|||||||
|
|
||||||
HREC: CONFSCALE = 0.15
|
HREC: CONFSCALE = 0.15
|
||||||
HREC: CONFOFFSET = 0.0
|
HREC: CONFOFFSET = 0.0
|
||||||
HREC: CONFBGHMM = bghmm
|
#HREC: CONFBGHMM = bghmm
|
||||||
|
|
||||||
# -- Set visibility and positions of ATK controls --
|
# -- Set visibility and positions of ATK controls --
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user