cleanup and export the Probabilistic API from PGF

This commit is contained in:
krasimir
2010-09-22 09:06:19 +00:00
parent fb15e95a07
commit 1c9305e7a3
3 changed files with 40 additions and 21 deletions

View File

@@ -977,8 +977,8 @@ allCommands env@(pgf, mos) = Map.fromList [
optProbs opts pgfr = case valStrOpts "probs" "" opts of optProbs opts pgfr = case valStrOpts "probs" "" opts of
"" -> return Nothing "" -> return Nothing
file -> do file -> do
ps <- getProbsFromFile file pgf ---- pgfr! ps <- readProbabilitiesFromFile file pgf ---- pgfr!
-- putStrLn $ prProbabilities ps -- putStrLn $ showProbabilities ps
return $ Just ps return $ Just ps
optFile opts = valStrOpts "file" "_gftmp" opts optFile opts = valStrOpts "file" "_gftmp" opts

View File

@@ -99,6 +99,13 @@ module PGF(
graphvizBracketedString, graphvizBracketedString,
graphvizAlignment, graphvizAlignment,
-- * Probabilities
Probabilities,
mkProbabilities,
defaultProbabilities,
showProbabilities,
readProbabilitiesFromFile,
-- * Browsing -- * Browsing
browse browse
) where ) where
@@ -109,6 +116,7 @@ import PGF.Generate
import PGF.TypeCheck import PGF.TypeCheck
import PGF.Paraphrase import PGF.Paraphrase
import PGF.VisualizeTree import PGF.VisualizeTree
import PGF.Probabilistic
import PGF.Macros import PGF.Macros
import PGF.Expr (Tree) import PGF.Expr (Tree)
import PGF.Morphology import PGF.Morphology

View File

@@ -1,12 +1,13 @@
module PGF.Probabilistic ( module PGF.Probabilistic
probTree -- :: Probabilities -> Tree -> Double ( Probabilities(..)
,rankTreesByProbs -- :: Probabilities -> [Tree] -> [Tree] , mkProbabilities -- :: PGF -> M.Map CId Double -> Probabilities
,Probabilities -- data , defaultProbabilities -- :: PGF -> Probabilities
,prProbabilities -- Probabilities -> String , showProbabilities -- :: Probabilities -> String
,catProbs , readProbabilitiesFromFile -- :: FilePath -> PGF -> IO Probabilities
,getProbsFromFile -- :: FilePath -> PGF -> IO Probabilities
,defaultProbabilities -- :: PGF -> Probabilities , probTree -- :: Probabilities -> Tree -> Double
) where , rankTreesByProbs -- :: Probabilities -> [Tree] -> [Tree]
) where
import PGF.CId import PGF.CId
import PGF.Data import PGF.Data
@@ -15,25 +16,34 @@ import PGF.Macros
import qualified Data.Map as M import qualified Data.Map as M
import Data.List (sortBy,partition) import Data.List (sortBy,partition)
-- | An abstract data structure which represents
-- the probabilities for the different functions in a grammar.
data Probabilities = Probs { data Probabilities = Probs {
funProbs :: M.Map CId Double, funProbs :: M.Map CId Double,
catProbs :: M.Map CId [(Double, (CId,[CId]))] -- prob and arglist catProbs :: M.Map CId [(Double, (CId,[CId]))] -- prob and arglist
} }
prProbabilities :: Probabilities -> String -- | Renders the probability structure as string
prProbabilities = unlines . map pr . M.toList . funProbs where showProbabilities :: Probabilities -> String
showProbabilities = unlines . map pr . M.toList . funProbs where
pr (f,d) = showCId f ++ "\t" ++ show d pr (f,d) = showCId f ++ "\t" ++ show d
getProbsFromFile :: FilePath -> PGF -> IO Probabilities -- | Reads the probabilities from a file.
getProbsFromFile file pgf = do -- This should be a text file where on every line
-- there is a function name followed by a real number.
-- The number represents the probability mass allocated for that function.
-- The function name and the probability should be separated by a whitespace.
readProbabilitiesFromFile :: FilePath -> PGF -> IO Probabilities
readProbabilitiesFromFile file pgf = do
s <- readFile file s <- readFile file
let ps0 = M.fromList [(mkCId f,read p) | f:p:_ <- map words (lines s)] let ps0 = M.fromList [(mkCId f,read p) | f:p:_ <- map words (lines s)]
return $ fillProbs pgf ps0 return $ mkProbabilities pgf ps0
-- | build probability tables by filling unspecified funs with prob sum -- | Builds probability tables by filling unspecified funs with probability sum
-- TODO: check that probabilities sum to 1 --
fillProbs :: PGF -> M.Map CId Double -> Probabilities -- TODO: check that probabilities sum to 1
fillProbs pgf funs = mkProbabilities :: PGF -> M.Map CId Double -> Probabilities
mkProbabilities pgf funs =
let let
cats0 = [(cat,[(f,fst (catSkeleton ty)) | (f,ty) <- fs]) cats0 = [(cat,[(f,fst (catSkeleton ty)) | (f,ty) <- fs])
| (cat,_) <- M.toList (cats (abstract pgf)), | (cat,_) <- M.toList (cats (abstract pgf)),
@@ -54,8 +64,9 @@ fillProbs pgf funs =
_ -> (1 - sum poss) / fromIntegral (length negs) _ -> (1 - sum poss) / fromIntegral (length negs)
(poss,negs) = partition (> (-0.5)) (map fst pfs) (poss,negs) = partition (> (-0.5)) (map fst pfs)
-- | Returns the default even distibution.
defaultProbabilities :: PGF -> Probabilities defaultProbabilities :: PGF -> Probabilities
defaultProbabilities pgf = fillProbs pgf M.empty defaultProbabilities pgf = mkProbabilities pgf M.empty
-- | compute the probability of a given tree -- | compute the probability of a given tree
probTree :: Probabilities -> Expr -> Double probTree :: Probabilities -> Expr -> Double