From e91c610e5afd0083574d2f28cda07a03fe52ea8f Mon Sep 17 00:00:00 2001 From: aarne Date: Tue, 26 Jan 2010 15:53:49 +0000 Subject: [PATCH] added probabilities to trees and random gen; not yet in shell --- src/runtime/haskell/PGF/Generate.hs | 32 ++++++++++--- src/runtime/haskell/PGF/Probabilistic.hs | 61 ++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 7 deletions(-) create mode 100644 src/runtime/haskell/PGF/Probabilistic.hs diff --git a/src/runtime/haskell/PGF/Generate.hs b/src/runtime/haskell/PGF/Generate.hs index 5add00a78..3f044c224 100644 --- a/src/runtime/haskell/PGF/Generate.hs +++ b/src/runtime/haskell/PGF/Generate.hs @@ -4,6 +4,7 @@ import PGF.CId import PGF.Data import PGF.Macros import PGF.TypeCheck +import PGF.Probabilistic import qualified Data.Map as M import System.Random @@ -29,10 +30,14 @@ generate pgf ty@(DTyp _ cat _) dp = filter (\e -> case checkExpr pgf e ty of -- generate an infinite list of trees randomly genRandom :: StdGen -> PGF -> Type -> [Expr] -genRandom gen pgf ty@(DTyp _ cat _) = filter (\e -> case checkExpr pgf e ty of - Left _ -> False - Right _ -> True ) - (genTrees (randomRs (0.0, 1.0 :: Double) gen) cat) +genRandom = genRandomProb Nothing + +genRandomProb :: Maybe Probabilities -> StdGen -> PGF -> Type -> [Expr] +genRandomProb mprobs gen pgf ty@(DTyp _ cat _) = + filter (\e -> case checkExpr pgf e ty of + Left _ -> False + Right _ -> True ) + (genTrees (randomRs (0.0, 1.0 :: Double) gen) cat) where timeout = 47 -- give up @@ -54,8 +59,9 @@ genRandom gen pgf ty@(DTyp _ cat _) = filter (\e -> case checkExpr pgf e ty of (f,args) = getf d fs (ts,k) = getts ds2 args in (foldl EApp (EFun f) ts, k+1) - getf d fs = let lg = (length fs) in - fs !! (floor (d * fromIntegral lg)) + getf d fs = case mprobs of + Just _ -> hitRegion d [(p,(f,args)) | (p,(f,args)) <- fs] + _ -> let lg = (length fs) in snd (fs !! (floor (d * fromIntegral lg))) getts ds cats = case cats of c:cs -> let (t, k) = gett ds c @@ -63,4 +69,16 @@ genRandom gen pgf ty@(DTyp _ cat _) = filter (\e -> case checkExpr pgf e ty of in (t:ts, k + ks) _ -> ([],0) - fns cat = [(f,(fst (catSkeleton ty))) | (f,ty) <- functionsToCat pgf cat] + fns :: CId -> [(Double,(CId,[CId]))] + fns cat = case mprobs of + Just probs -> maybe [] id $ M.lookup cat (catProbs probs) + _ -> [(deflt,(f,(fst (catSkeleton ty)))) | + let fs = functionsToCat pgf cat, + (f,ty) <- fs, + let deflt = 1.0 / fromIntegral (length fs)] + +hitRegion :: Double -> [(Double,a)] -> a +hitRegion d vs = case vs of + (p1,v1):vs2 -> + if d < p1 then v1 else hitRegion d [(p+p1,v) | (p,v) <- vs2] + diff --git a/src/runtime/haskell/PGF/Probabilistic.hs b/src/runtime/haskell/PGF/Probabilistic.hs new file mode 100644 index 000000000..c0422a784 --- /dev/null +++ b/src/runtime/haskell/PGF/Probabilistic.hs @@ -0,0 +1,61 @@ +module PGF.Probabilistic ( + probTree -- :: Probabilities -> Tree -> Double + ,rankTreesByProbs -- :: Probabilities -> [Tree] -> [Tree] + ,Probabilities -- data + ,catProbs + ,getProbsFromFile -- :: FilePath -> PGF -> IO Probabilities + ) where + +import PGF.CId +import PGF.Data +import PGF.Macros + +import qualified Data.Map as M +import Data.List (sortBy,partition) + +data Probabilities = Probs { + funProbs :: M.Map CId Double, + catProbs :: M.Map CId [(Double, (CId,[CId]))] -- prob and arglist + } + +getProbsFromFile :: FilePath -> PGF -> IO Probabilities +getProbsFromFile file pgf = do + s <- readFile file + let ps0 = M.fromList [(mkCId f,read p) | f:p:_ <- map words (lines s)] + return $ fillProbs pgf ps0 + +-- | build probability tables by filling unspecified funs with prob sum +-- TODO: check that probabilities sum to 1 +fillProbs :: PGF -> M.Map CId Double -> Probabilities +fillProbs pgf funs = + let + cats0 = [(cat,[(f,fst (catSkeleton ty)) | (f,ty) <- fs]) + | (cat,_) <- M.toList (cats (abstract pgf)), + let fs = functionsToCat pgf cat] + cats1 = map fill cats0 + funs1 = M.fromList [(f,p) | (_,cf) <- cats1, (p,(f,_)) <- cf] + in Probs funs1 (M.fromList cats1) + where + fill (cat,fs) = (cat, pad [(getProb0 f,(f,xs)) | (f,xs) <- fs]) + where + getProb0 :: CId -> Double + getProb0 f = maybe (-1) id $ M.lookup f funs + pad :: [(Double,a)] -> [(Double,a)] + pad pfs = [(if p== -1 then deflt else p,f) | (p,f) <- pfs] + where + deflt = 1 - sum poss / fromIntegral (length negs) + (poss,negs) = partition (> (-1)) (map fst pfs) + +-- | compute the probability of a given tree +probTree :: Probabilities -> Expr -> Double +probTree probs t = case t of + EApp f e -> probTree probs f * probTree probs e + EFun f -> maybe 1 id $ M.lookup f (funProbs probs) + _ -> 1 + +-- | rank from highest to lowest probability +rankTreesByProbs :: Probabilities -> [Expr] -> [(Expr,Double)] +rankTreesByProbs probs ts = sortBy (\ (_,p) (_,q) -> compare q p) + [(t, probTree probs t) | t <- ts] + +