probability ranking (rt) and gr -probs=FILE

This commit is contained in:
aarne
2010-01-26 21:08:04 +00:00
parent e91c610e5a
commit dd4c792e67
2 changed files with 59 additions and 6 deletions

View File

@@ -20,6 +20,8 @@ import PGF.Macros
import PGF.Data ----
import PGF.Morphology
import PGF.Printer
import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabilities)
import PGF.Generate (genRandomProb) ----
import GF.Compile.Export
import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..))
import GF.Infra.UseIO
@@ -42,6 +44,7 @@ import System.Cmd
import Text.PrettyPrint
import Data.List (sort)
import Debug.Trace
import System.Random (newStdGen) ----
type CommandOutput = ([Expr],String) ---- errors, etc
@@ -245,11 +248,14 @@ allCommands cod env@(pgf, mos) = Map.fromList [
flags = [
("cat","generation category"),
("lang","uses only functions that have linearizations in all these languages"),
("number","number of trees generated")
("number","number of trees generated"),
("probs", "file with biased probabilities (format 'f 0.4' one by line)")
],
exec = \opts _ -> do
let pgfr = optRestricted opts
ts <- generateRandom pgfr (optType opts)
gen <- newStdGen
mprobs <- optProbs opts pgfr
ts <- return $ genRandomProb mprobs gen pgfr (optType opts)
returnFromExprs $ take (optNum opts) ts
}),
("gt", emptyCommandInfo {
@@ -540,6 +546,35 @@ allCommands cod env@(pgf, mos) = Map.fromList [
_ -> return (fromString s),
flags = [("file","the input file name")]
}),
("rt", emptyCommandInfo {
longname = "rank_trees",
synopsis = "show trees in an order of decreasing probability",
explanation = unlines [
"Order trees from the most to the least probable, using either",
"even distribution in each category (default) or biased as specified",
"by the file given by flag -probs=FILE, where each line has the form",
"'function probability', e.g. 'youPol_Pron 0.01'."
],
exec = \opts ts -> do
mprobs <- optProbs opts pgf
let probs = maybe (defaultProbabilities pgf) id mprobs
let tds = rankTreesByProbs probs ts
if isOpt "v" opts
then putStrLn $
unlines [showExpr [] t ++ "\t--" ++ show d | (t,d) <- tds]
else return ()
returnFromExprs $ map fst tds,
flags = [
("probs","probabilities from this file (format 'f 0.6' per line)")
],
options = [
("v","show all trees with their probability scores")
],
examples = [
"p \"you are here\" | rt -probs=probs | pt -number=1 -- most probable result",
"se utf8 -- set encoding to utf8 (default)"
]
}),
("tq", emptyCommandInfo {
longname = "translation_quiz",
synopsis = "start a translation quiz",
@@ -829,6 +864,13 @@ allCommands cod env@(pgf, mos) = Map.fromList [
"" -> []
cats -> mapMaybe readType (chunks ',' cats)
optProbs opts pgfr = case valStrOpts "probs" "" opts of
"" -> return Nothing
file -> do
ps <- getProbsFromFile file pgf ---- pgfr!
-- putStrLn $ prProbabilities ps
return $ Just ps
optType opts =
let str = valStrOpts "cat" (showCId $ lookStartCat pgf) opts
in case readType str of

View File

@@ -2,8 +2,10 @@ module PGF.Probabilistic (
probTree -- :: Probabilities -> Tree -> Double
,rankTreesByProbs -- :: Probabilities -> [Tree] -> [Tree]
,Probabilities -- data
,prProbabilities -- Probabilities -> String
,catProbs
,getProbsFromFile -- :: FilePath -> PGF -> IO Probabilities
,defaultProbabilities -- :: PGF -> Probabilities
) where
import PGF.CId
@@ -18,6 +20,10 @@ data Probabilities = Probs {
catProbs :: M.Map CId [(Double, (CId,[CId]))] -- prob and arglist
}
prProbabilities :: Probabilities -> String
prProbabilities = unlines . map pr . M.toList . funProbs where
pr (f,d) = showCId f ++ "\t" ++ show d
getProbsFromFile :: FilePath -> PGF -> IO Probabilities
getProbsFromFile file pgf = do
s <- readFile file
@@ -33,8 +39,8 @@ fillProbs pgf funs =
| (cat,_) <- M.toList (cats (abstract pgf)),
let fs = functionsToCat pgf cat]
cats1 = map fill cats0
funs1 = M.fromList [(f,p) | (_,cf) <- cats1, (p,(f,_)) <- cf]
in Probs funs1 (M.fromList cats1)
funs1 = [(f,p) | (_,cf) <- cats1, (p,(f,_)) <- cf]
in Probs (M.fromList funs1) (M.fromList cats1)
where
fill (cat,fs) = (cat, pad [(getProb0 f,(f,xs)) | (f,xs) <- fs])
where
@@ -43,8 +49,13 @@ fillProbs pgf funs =
pad :: [(Double,a)] -> [(Double,a)]
pad pfs = [(if p== -1 then deflt else p,f) | (p,f) <- pfs]
where
deflt = 1 - sum poss / fromIntegral (length negs)
(poss,negs) = partition (> (-1)) (map fst pfs)
deflt = case length negs of
0 -> 0
_ -> (1 - sum poss) / fromIntegral (length negs)
(poss,negs) = partition (> (-0.5)) (map fst pfs)
defaultProbabilities :: PGF -> Probabilities
defaultProbabilities pgf = fillProbs pgf M.empty
-- | compute the probability of a given tree
probTree :: Probabilities -> Expr -> Double