From dd4c792e67a3124706bef57ab23ff542d2d0d961 Mon Sep 17 00:00:00 2001 From: aarne Date: Tue, 26 Jan 2010 21:08:04 +0000 Subject: [PATCH] probability ranking (rt) and gr -probs=FILE --- src/compiler/GF/Command/Commands.hs | 46 ++++++++++++++++++++++-- src/runtime/haskell/PGF/Probabilistic.hs | 19 +++++++--- 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index f537099f8..a032145a8 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -20,6 +20,8 @@ import PGF.Macros import PGF.Data ---- import PGF.Morphology import PGF.Printer +import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabilities) +import PGF.Generate (genRandomProb) ---- import GF.Compile.Export import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..)) import GF.Infra.UseIO @@ -42,6 +44,7 @@ import System.Cmd import Text.PrettyPrint import Data.List (sort) import Debug.Trace +import System.Random (newStdGen) ---- type CommandOutput = ([Expr],String) ---- errors, etc @@ -245,11 +248,14 @@ allCommands cod env@(pgf, mos) = Map.fromList [ flags = [ ("cat","generation category"), ("lang","uses only functions that have linearizations in all these languages"), - ("number","number of trees generated") + ("number","number of trees generated"), + ("probs", "file with biased probabilities (format 'f 0.4' one by line)") ], exec = \opts _ -> do let pgfr = optRestricted opts - ts <- generateRandom pgfr (optType opts) + gen <- newStdGen + mprobs <- optProbs opts pgfr + ts <- return $ genRandomProb mprobs gen pgfr (optType opts) returnFromExprs $ take (optNum opts) ts }), ("gt", emptyCommandInfo { @@ -540,6 +546,35 @@ allCommands cod env@(pgf, mos) = Map.fromList [ _ -> return (fromString s), flags = [("file","the input file name")] }), + ("rt", emptyCommandInfo { + longname = "rank_trees", + synopsis = "show trees in an order of decreasing probability", + explanation = unlines [ + "Order trees from the most to the least probable, using either", + "even distribution in each category (default) or biased as specified", + "by the file given by flag -probs=FILE, where each line has the form", + "'function probability', e.g. 'youPol_Pron 0.01'." + ], + exec = \opts ts -> do + mprobs <- optProbs opts pgf + let probs = maybe (defaultProbabilities pgf) id mprobs + let tds = rankTreesByProbs probs ts + if isOpt "v" opts + then putStrLn $ + unlines [showExpr [] t ++ "\t--" ++ show d | (t,d) <- tds] + else return () + returnFromExprs $ map fst tds, + flags = [ + ("probs","probabilities from this file (format 'f 0.6' per line)") + ], + options = [ + ("v","show all trees with their probability scores") + ], + examples = [ + "p \"you are here\" | rt -probs=probs | pt -number=1 -- most probable result", + "se utf8 -- set encoding to utf8 (default)" + ] + }), ("tq", emptyCommandInfo { longname = "translation_quiz", synopsis = "start a translation quiz", @@ -829,6 +864,13 @@ allCommands cod env@(pgf, mos) = Map.fromList [ "" -> [] cats -> mapMaybe readType (chunks ',' cats) + optProbs opts pgfr = case valStrOpts "probs" "" opts of + "" -> return Nothing + file -> do + ps <- getProbsFromFile file pgf ---- pgfr! +-- putStrLn $ prProbabilities ps + return $ Just ps + optType opts = let str = valStrOpts "cat" (showCId $ lookStartCat pgf) opts in case readType str of diff --git a/src/runtime/haskell/PGF/Probabilistic.hs b/src/runtime/haskell/PGF/Probabilistic.hs index c0422a784..e42698cfe 100644 --- a/src/runtime/haskell/PGF/Probabilistic.hs +++ b/src/runtime/haskell/PGF/Probabilistic.hs @@ -2,8 +2,10 @@ module PGF.Probabilistic ( probTree -- :: Probabilities -> Tree -> Double ,rankTreesByProbs -- :: Probabilities -> [Tree] -> [Tree] ,Probabilities -- data + ,prProbabilities -- Probabilities -> String ,catProbs ,getProbsFromFile -- :: FilePath -> PGF -> IO Probabilities + ,defaultProbabilities -- :: PGF -> Probabilities ) where import PGF.CId @@ -18,6 +20,10 @@ data Probabilities = Probs { catProbs :: M.Map CId [(Double, (CId,[CId]))] -- prob and arglist } +prProbabilities :: Probabilities -> String +prProbabilities = unlines . map pr . M.toList . funProbs where + pr (f,d) = showCId f ++ "\t" ++ show d + getProbsFromFile :: FilePath -> PGF -> IO Probabilities getProbsFromFile file pgf = do s <- readFile file @@ -33,8 +39,8 @@ fillProbs pgf funs = | (cat,_) <- M.toList (cats (abstract pgf)), let fs = functionsToCat pgf cat] cats1 = map fill cats0 - funs1 = M.fromList [(f,p) | (_,cf) <- cats1, (p,(f,_)) <- cf] - in Probs funs1 (M.fromList cats1) + funs1 = [(f,p) | (_,cf) <- cats1, (p,(f,_)) <- cf] + in Probs (M.fromList funs1) (M.fromList cats1) where fill (cat,fs) = (cat, pad [(getProb0 f,(f,xs)) | (f,xs) <- fs]) where @@ -43,8 +49,13 @@ fillProbs pgf funs = pad :: [(Double,a)] -> [(Double,a)] pad pfs = [(if p== -1 then deflt else p,f) | (p,f) <- pfs] where - deflt = 1 - sum poss / fromIntegral (length negs) - (poss,negs) = partition (> (-1)) (map fst pfs) + deflt = case length negs of + 0 -> 0 + _ -> (1 - sum poss) / fromIntegral (length negs) + (poss,negs) = partition (> (-0.5)) (map fst pfs) + +defaultProbabilities :: PGF -> Probabilities +defaultProbabilities pgf = fillProbs pgf M.empty -- | compute the probability of a given tree probTree :: Probabilities -> Expr -> Double