forked from GitHub/gf-core
probabilities in ShellState
This commit is contained in:
@@ -12,6 +12,14 @@ Changes in functionality since May 17, 2005, release of GF Version 2.2
|
||||
|
||||
</center>
|
||||
|
||||
31/10 (AR) Probabilistic grammars. Probabilities can be used to
|
||||
weight random generation (<tt>gr -prob</tt>) and to rank parse
|
||||
results (<tt>p -prob</tt>). They are read from a separate file
|
||||
(flag <tt>i -probs=File</tt>, format <tt>--# prob Fun Double</tt>)
|
||||
or from the top-level grammar file itself (option <tt>i -prob</tt>).
|
||||
|
||||
<p>
|
||||
|
||||
12/10 (AR) Flag <tt>-atoms=Int</tt> to the command <tt>gt = generate_trees</tt>
|
||||
takes away all zero-argument functions except Int per category. In
|
||||
this way, it is possible to generate a corpus illustrating each
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/21 16:45:58 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.18 $
|
||||
-- > CVS $Date: 2005/10/31 19:02:35 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.19 $
|
||||
--
|
||||
-- for reading grammars and terms from strings and files
|
||||
-----------------------------------------------------------------------------
|
||||
@@ -21,6 +21,7 @@ import GF.Compile.PGrammar
|
||||
import GF.Grammar.TypeCheck
|
||||
import GF.Compile.Compile
|
||||
import GF.Compile.ShellState
|
||||
import GF.Probabilistic.Probabilistic
|
||||
|
||||
import GF.Infra.Modules
|
||||
import GF.Infra.ReadFiles (isOldFile)
|
||||
@@ -50,7 +51,9 @@ string2annotTree gr m = annotate gr . string2absTerm (prt m) ---- prt
|
||||
---string2paramList st = map (renameTrm (lookupConcrete st) . patt2term) . pPattList
|
||||
|
||||
shellStateFromFiles :: Options -> ShellState -> FilePath -> IOE ShellState
|
||||
shellStateFromFiles opts st file = case fileSuffix file of
|
||||
shellStateFromFiles opts st file = do
|
||||
let top = identC $ justModuleName file
|
||||
sh <- case fileSuffix file of
|
||||
"gfcm" -> do
|
||||
cenv <- compileOne opts (compileEnvShSt st []) file
|
||||
ioeErr $ updateShellState opts Nothing st cenv
|
||||
@@ -66,10 +69,14 @@ shellStateFromFiles opts st file = case fileSuffix file of
|
||||
then addOptions (options []) opts' -- for old no emit
|
||||
else addOptions (options [emitCode]) opts'
|
||||
grts <- compileModule osb st file
|
||||
let top = identC $ justModuleName file
|
||||
mtop = if oElem showOld opts' then Nothing else Just top
|
||||
let mtop = if oElem showOld opts' then Nothing else Just top
|
||||
ioeErr $ updateShellState opts' mtop st grts
|
||||
--- liftM (changeModTimes rts) $ grammar2shellState opts gr
|
||||
if (isSetFlag opts probFile || oElem (iOpt "prob") opts)
|
||||
then do
|
||||
probs <- ioeIO $ getProbsFromFile opts file
|
||||
let lang = maybe top id $ concrete sh --- to work with cf, too
|
||||
ioeErr $ addProbs (lang,probs) sh
|
||||
else return sh
|
||||
|
||||
getShellStateFromFiles :: Options -> FilePath -> IO ShellState
|
||||
getShellStateFromFiles os =
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/30 23:44:00 $
|
||||
-- > CVS $Date: 2005/10/31 19:02:35 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.49 $
|
||||
-- > CVS $Revision: 1.50 $
|
||||
--
|
||||
-- (Description of the module)
|
||||
-----------------------------------------------------------------------------
|
||||
@@ -462,6 +462,13 @@ abstractOfState = maybe emptyAbstractST id . maybeStateAbstract
|
||||
stateIsWord :: StateGrammar -> String -> Bool
|
||||
stateIsWord sg = isKnownWord (stateMorpho sg)
|
||||
|
||||
addProbs :: (Ident,Probs) -> ShellState -> Err ShellState
|
||||
addProbs ip@(lang,probs)
|
||||
sh@(ShSt x y cs ms ss cfs old_pis mcfgs cfgs pinfos mos pbs os rs acs s) = do
|
||||
let gr = grammarOfLang sh lang
|
||||
probs' <- checkGrammarProbs gr probs
|
||||
let pbs' = (lang,probs') : filter ((/= lang) . fst) pbs
|
||||
return (ShSt x y cs ms ss cfs old_pis mcfgs cfgs pinfos mos pbs' os rs acs s)
|
||||
|
||||
|
||||
{-
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/31 08:12:18 $
|
||||
-- > CVS $Date: 2005/10/31 19:02:35 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
-- > CVS $Revision: 1.3 $
|
||||
--
|
||||
-- Probabilistic abstract syntax. AR 30\/10\/2005
|
||||
--
|
||||
@@ -26,6 +26,7 @@ module GF.Probabilistic.Probabilistic (
|
||||
,Probs -- = BinTree Ident Double
|
||||
,getProbsFromFile -- :: Opts -> IO Probs
|
||||
,emptyProbs -- :: Probs
|
||||
,prProbs -- :: Probs -> String
|
||||
) where
|
||||
|
||||
import GF.Canon.GFC
|
||||
@@ -54,8 +55,10 @@ generateRandomTreesProb opts gen gr probs cat =
|
||||
cat' = prt $ snd cat
|
||||
|
||||
-- | check that probabilities attached to a grammar make sense
|
||||
checkGrammarProbs :: GFCGrammar -> Probs -> Err ()
|
||||
checkGrammarProbs gr probs = err Bad (const (return ())) $ checkSGrammar $ gr2sgr gr probs
|
||||
checkGrammarProbs :: GFCGrammar -> Probs -> Err Probs
|
||||
checkGrammarProbs gr probs =
|
||||
err Bad (return . gr2probs) $ checkSGrammar $ gr2sgr gr probs where
|
||||
gr2probs sgr = buildTree [(zIdent f,p) | (_,rs) <- tree2list sgr, ((p,f),_) <- rs]
|
||||
|
||||
-- | compute the probability of a given tree
|
||||
computeProbTree :: Probs -> Tree -> Double
|
||||
@@ -71,14 +74,14 @@ computeProbTree probs (Tr (N (_,at,_,_,_),ts)) = case at of
|
||||
rankByScore :: Ord n => [(a,n)] -> [(a,n)]
|
||||
rankByScore = sortBy (\ (_,p) (_,q) -> compare q p)
|
||||
|
||||
getProbsFromFile :: Options -> IO Probs
|
||||
getProbsFromFile opts = do
|
||||
s <- maybe (return "") readFile $ getOptVal opts probFile
|
||||
getProbsFromFile :: Options -> FilePath -> IO Probs
|
||||
getProbsFromFile opts file = do
|
||||
s <- maybe (readFile file) readFile $ getOptVal opts probFile
|
||||
return $ buildTree $ concatMap pProb $ lines s
|
||||
where
|
||||
pProb s = case words s of
|
||||
"--":f:p:_ | isDouble p -> [(zIdent f, read p)]
|
||||
f:p:_ | isDouble p -> [(zIdent f, read p)]
|
||||
"--#":"prob":f:p:_ | isDouble p -> [(zIdent f, read p)]
|
||||
f:p:_ | isDouble p -> [(zIdent f, read p)]
|
||||
_ -> []
|
||||
isDouble = all (flip elem ('.':['0'..'9']))
|
||||
|
||||
@@ -86,7 +89,11 @@ type Probs = BinTree Ident Double
|
||||
|
||||
emptyProbs :: Probs
|
||||
emptyProbs = emptyBinTree
|
||||
|
||||
|
||||
prProbs :: Probs -> String
|
||||
prProbs = unlines . map pr . tree2list where
|
||||
pr (f,p) = prt f ++ "\t" ++ show p
|
||||
|
||||
------------------------------------------
|
||||
-- translate grammar to simpler form and generated trees back
|
||||
|
||||
@@ -151,21 +158,14 @@ genTree :: [Double] -> SGrammar -> SCat -> (STree,Int)
|
||||
genTree rs gr = gett rs where
|
||||
gett ds "String" = (SString "foo",1)
|
||||
gett ds "Int" = (SInt 1978,1)
|
||||
gett ds cat = let
|
||||
gett ds cat = case look cat of
|
||||
[] -> (SMeta cat,1) -- if no productions, return ?
|
||||
fs -> let
|
||||
d:ds2 = ds
|
||||
(pf,args) = getf d cat
|
||||
(pf,args) = getf d fs
|
||||
(ts,k) = getts ds2 args
|
||||
in (SApp (pf,ts), k+1)
|
||||
getf d cat =
|
||||
let
|
||||
regs0 = [(p,(pf,args)) | (pf@(p,_),(args,_)) <- look cat]
|
||||
{- not needed
|
||||
pstd = 1.0 / genericLength regs
|
||||
regs = if any (>1.0) (map fst regs0)
|
||||
then [(pstd,pa) | (_,pa) <- regs0]
|
||||
else regs0
|
||||
-}
|
||||
in hitRegion d regs0
|
||||
getf d fs = hitRegion d [(p,(pf,args)) | (pf@(p,_),(args,_)) <- fs]
|
||||
getts ds cats = case cats of
|
||||
c:cs -> let
|
||||
(t, k) = gett ds c
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/31 08:12:18 $
|
||||
-- > CVS $Date: 2005/10/31 19:02:35 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.48 $
|
||||
-- > CVS $Revision: 1.49 $
|
||||
--
|
||||
-- GF shell command interpreter.
|
||||
-----------------------------------------------------------------------------
|
||||
@@ -222,8 +222,8 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
|
||||
let p = optParseArgErrMsg opts gro x
|
||||
case p of
|
||||
Ok (ts,msg)
|
||||
| isSetFlag opts probFile -> do
|
||||
probs <- getProbsFromFile opts
|
||||
| oElem (iOpt "prob") opts -> do
|
||||
let probs = stateProbs gro
|
||||
let tps = rankByScore [(t,computeProbTree probs t) | t <- ts]
|
||||
putStrLnFlush msg
|
||||
mapM_ putStrLnFlush [show p | (t,p) <- tps]
|
||||
@@ -235,17 +235,8 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
|
||||
let a' = opST2CommandArg (optParseArgErr opts (sgr il)) a
|
||||
returnArg (opTS2CommandArg (optLinearizeTreeVal opts (sgr ol)) a') sa
|
||||
|
||||
|
||||
CGenerateRandom | isSetFlag opts probFile -> do
|
||||
probs <- getProbsFromFile opts
|
||||
let cat = firstAbsCat opts gro
|
||||
let n = optIntOrN opts flagNumber 1
|
||||
gen <- newStdGen
|
||||
let ts = take n $ generateRandomTreesProb opts gen cgr probs cat
|
||||
returnArg (ATrms (map (term2tree gro) ts)) sa
|
||||
|
||||
CGenerateRandom | oElem showCF opts -> do
|
||||
let probs = emptyProbs ---
|
||||
CGenerateRandom | oElem showCF opts || oElem (iOpt "prob") opts -> do
|
||||
let probs = stateProbs gro
|
||||
let cat = firstAbsCat opts gro
|
||||
let n = optIntOrN opts flagNumber 1
|
||||
gen <- newStdGen
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/12 12:38:30 $
|
||||
-- > CVS $Date: 2005/10/31 19:02:35 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.17 $
|
||||
-- > CVS $Revision: 1.18 $
|
||||
--
|
||||
-- Help on shell commands. Generated from HelpFile by 'make help'.
|
||||
-- PLEASE DON'T EDIT THIS FILE.
|
||||
@@ -63,6 +63,7 @@ txtHelpFile =
|
||||
"\n -noemit do not emit code (default with old grammar format)" ++
|
||||
"\n -o do emit code (default with new grammar format)" ++
|
||||
"\n -ex preprocess .gfe files if needed" ++
|
||||
"\n -prob read probabilities from top grammar file (format --# prob Fun Double)" ++
|
||||
"\n flags:" ++
|
||||
"\n -abs set the name used for abstract syntax (with -old option)" ++
|
||||
"\n -cnc set the name used for concrete syntax (with -old option)" ++
|
||||
@@ -70,6 +71,7 @@ txtHelpFile =
|
||||
"\n -path use the (colon-separated) search path to find modules" ++
|
||||
"\n -optimize select an optimization to override file-defined flags" ++
|
||||
"\n -conversion select parsing method (values strict|nondet)" ++
|
||||
"\n -probs read probabilities from file (format (--# prob) Fun Double)" ++
|
||||
"\n examples:" ++
|
||||
"\n i English.gf -- ordinary import of Concrete" ++
|
||||
"\n i -retain german/ParadigmsGer.gf -- import of Resource to test" ++
|
||||
@@ -194,6 +196,7 @@ txtHelpFile =
|
||||
"\n options for batch input:" ++
|
||||
"\n -lines parse each line of input separately, ignoring empty lines" ++
|
||||
"\n -all as -lines, but also parse empty lines" ++
|
||||
"\n -prob rank results by probability" ++
|
||||
"\n options for selecting parsing method:" ++
|
||||
"\n (default)parse using an overgenerating CFG" ++
|
||||
"\n -cfg parse using a much less overgenerating CFG" ++
|
||||
@@ -270,6 +273,9 @@ txtHelpFile =
|
||||
"\n Generates a random Tree of a given category. If a Tree" ++
|
||||
"\n argument is given, the command completes the Tree with values to" ++
|
||||
"\n the metavariables in the tree. " ++
|
||||
"\n options:" ++
|
||||
"\n -prob use probabilities (works for nondep types only)" ++
|
||||
"\n -cf use a very fast method (works for nondep types only)" ++
|
||||
"\n flags:" ++
|
||||
"\n -cat generate in this category" ++
|
||||
"\n -lang use the abstract syntax of this grammar" ++
|
||||
@@ -566,6 +572,7 @@ txtHelpFile =
|
||||
"\n *-printer=xml XML: DTD for the pg command, object for st" ++
|
||||
"\n -printer=old old GF: file readable by GF 1.2" ++
|
||||
"\n -printer=stat show some statistics of generated GFC" ++
|
||||
"\n -printer=probs show probabilities of all functions" ++
|
||||
"\n -printer=gsl Nuance GSL speech recognition grammar" ++
|
||||
"\n -printer=jsgf Java Speech Grammar Format" ++
|
||||
"\n -printer=slf a finite automaton in the HTK SLF format" ++
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/31 08:12:18 $
|
||||
-- > CVS $Date: 2005/10/31 19:02:35 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.44 $
|
||||
-- > CVS $Revision: 1.45 $
|
||||
--
|
||||
-- The datatype of shell commands and the list of their options.
|
||||
-----------------------------------------------------------------------------
|
||||
@@ -165,18 +165,18 @@ optionsOfCommand co = case co of
|
||||
CSetFlag -> both "utf8 table struct record all multi"
|
||||
"cat lang lexer parser number depth rawtrees unlexer optimize path conversion printer"
|
||||
|
||||
CImport _ -> both "old v s src retain nocf nocheckcirc cflexer noemit o ex"
|
||||
"abs cnc res path optimize conversion cat"
|
||||
CImport _ -> both "old v s src retain nocf nocheckcirc cflexer noemit o ex prob"
|
||||
"abs cnc res path optimize conversion cat probs"
|
||||
CRemoveLanguage _ -> none
|
||||
CEmptyState -> none
|
||||
CStripState -> none
|
||||
CTransformGrammar _ -> flags "printer"
|
||||
CConvertLatex _ -> none
|
||||
CLinearize _ -> both "utf8 table struct record all multi" "lang number unlexer"
|
||||
CParse -> both "new newer cfg mcfg n ign raw v lines all"
|
||||
"cat lang lexer parser number rawtrees probs"
|
||||
CParse -> both "new newer cfg mcfg n ign raw v lines all prob"
|
||||
"cat lang lexer parser number rawtrees"
|
||||
CTranslate _ _ -> opts "cat lexer parser"
|
||||
CGenerateRandom -> flags "cat lang number depth probs"
|
||||
CGenerateRandom -> both "cf prob" "cat lang number depth"
|
||||
CGenerateTrees -> both "metas" "atoms depth alts cat lang number"
|
||||
CPutTerm -> flags "transform number"
|
||||
CWrapTerm _ -> opts "c"
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/31 16:48:10 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.79 $
|
||||
-- > CVS $Date: 2005/10/31 19:02:35 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.80 $
|
||||
--
|
||||
-- A database for customizable GF shell commands.
|
||||
--
|
||||
@@ -68,6 +68,7 @@ import GF.UseGrammar.Information
|
||||
import GF.API.GrammarToHaskell
|
||||
-----import GrammarToCanon (showCanon, showCanonOpt)
|
||||
-----import qualified GrammarToGFC as GFC
|
||||
import GF.Probabilistic.Probabilistic (prProbs)
|
||||
|
||||
-- the cf parsing algorithms
|
||||
import GF.CF.ChartParser -- OBSOLETE
|
||||
@@ -266,6 +267,7 @@ customGrammarPrinter =
|
||||
,(strCI "words", unwords . stateGrammarWords)
|
||||
,(strCI "printnames", C.prPrintnamesGrammar . stateGrammarST)
|
||||
,(strCI "stat", prStatistics . stateGrammarST)
|
||||
,(strCI "probs", prProbs . stateProbs)
|
||||
,(strCI "unpar", prCanon . unparametrizeCanon . stateGrammarST)
|
||||
,(strCI "subs", prSubtermStat . stateGrammarST)
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@ i, import: i File
|
||||
-noemit do not emit code (default with old grammar format)
|
||||
-o do emit code (default with new grammar format)
|
||||
-ex preprocess .gfe files if needed
|
||||
-prob read probabilities from top grammar file (format --# prob Fun Double)
|
||||
flags:
|
||||
-abs set the name used for abstract syntax (with -old option)
|
||||
-cnc set the name used for concrete syntax (with -old option)
|
||||
@@ -41,6 +42,7 @@ i, import: i File
|
||||
-path use the (colon-separated) search path to find modules
|
||||
-optimize select an optimization to override file-defined flags
|
||||
-conversion select parsing method (values strict|nondet)
|
||||
-probs read probabilities from file (format (--# prob) Fun Double)
|
||||
examples:
|
||||
i English.gf -- ordinary import of Concrete
|
||||
i -retain german/ParadigmsGer.gf -- import of Resource to test
|
||||
@@ -165,6 +167,7 @@ p, parse: p String
|
||||
options for batch input:
|
||||
-lines parse each line of input separately, ignoring empty lines
|
||||
-all as -lines, but also parse empty lines
|
||||
-prob rank results by probability
|
||||
options for selecting parsing method:
|
||||
(default)parse using an overgenerating CFG
|
||||
-cfg parse using a much less overgenerating CFG
|
||||
@@ -241,6 +244,9 @@ gr, generate_random: gr Tree?
|
||||
Generates a random Tree of a given category. If a Tree
|
||||
argument is given, the command completes the Tree with values to
|
||||
the metavariables in the tree.
|
||||
options:
|
||||
-prob use probabilities (works for nondep types only)
|
||||
-cf use a very fast method (works for nondep types only)
|
||||
flags:
|
||||
-cat generate in this category
|
||||
-lang use the abstract syntax of this grammar
|
||||
@@ -537,6 +543,7 @@ q, quit: q
|
||||
*-printer=xml XML: DTD for the pg command, object for st
|
||||
-printer=old old GF: file readable by GF 1.2
|
||||
-printer=stat show some statistics of generated GFC
|
||||
-printer=probs show probabilities of all functions
|
||||
-printer=gsl Nuance GSL speech recognition grammar
|
||||
-printer=jsgf Java Speech Grammar Format
|
||||
-printer=slf a finite automaton in the HTK SLF format
|
||||
|
||||
Reference in New Issue
Block a user