probability ranking (rt) and gr -probs=FILE

This commit is contained in:
aarne
2010-01-26 21:08:04 +00:00
parent 9226dc6052
commit a463443cf5
2 changed files with 59 additions and 6 deletions

View File

@@ -20,6 +20,8 @@ import PGF.Macros
import PGF.Data ---- import PGF.Data ----
import PGF.Morphology import PGF.Morphology
import PGF.Printer import PGF.Printer
import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabilities)
import PGF.Generate (genRandomProb) ----
import GF.Compile.Export import GF.Compile.Export
import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..)) import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..))
import GF.Infra.UseIO import GF.Infra.UseIO
@@ -42,6 +44,7 @@ import System.Cmd
import Text.PrettyPrint import Text.PrettyPrint
import Data.List (sort) import Data.List (sort)
import Debug.Trace import Debug.Trace
import System.Random (newStdGen) ----
type CommandOutput = ([Expr],String) ---- errors, etc type CommandOutput = ([Expr],String) ---- errors, etc
@@ -245,11 +248,14 @@ allCommands cod env@(pgf, mos) = Map.fromList [
flags = [ flags = [
("cat","generation category"), ("cat","generation category"),
("lang","uses only functions that have linearizations in all these languages"), ("lang","uses only functions that have linearizations in all these languages"),
("number","number of trees generated") ("number","number of trees generated"),
("probs", "file with biased probabilities (format 'f 0.4' one by line)")
], ],
exec = \opts _ -> do exec = \opts _ -> do
let pgfr = optRestricted opts let pgfr = optRestricted opts
ts <- generateRandom pgfr (optType opts) gen <- newStdGen
mprobs <- optProbs opts pgfr
ts <- return $ genRandomProb mprobs gen pgfr (optType opts)
returnFromExprs $ take (optNum opts) ts returnFromExprs $ take (optNum opts) ts
}), }),
("gt", emptyCommandInfo { ("gt", emptyCommandInfo {
@@ -540,6 +546,35 @@ allCommands cod env@(pgf, mos) = Map.fromList [
_ -> return (fromString s), _ -> return (fromString s),
flags = [("file","the input file name")] flags = [("file","the input file name")]
}), }),
("rt", emptyCommandInfo {
longname = "rank_trees",
synopsis = "show trees in an order of decreasing probability",
explanation = unlines [
"Order trees from the most to the least probable, using either",
"even distribution in each category (default) or biased as specified",
"by the file given by flag -probs=FILE, where each line has the form",
"'function probability', e.g. 'youPol_Pron 0.01'."
],
exec = \opts ts -> do
mprobs <- optProbs opts pgf
let probs = maybe (defaultProbabilities pgf) id mprobs
let tds = rankTreesByProbs probs ts
if isOpt "v" opts
then putStrLn $
unlines [showExpr [] t ++ "\t--" ++ show d | (t,d) <- tds]
else return ()
returnFromExprs $ map fst tds,
flags = [
("probs","probabilities from this file (format 'f 0.6' per line)")
],
options = [
("v","show all trees with their probability scores")
],
examples = [
"p \"you are here\" | rt -probs=probs | pt -number=1 -- most probable result",
"se utf8 -- set encoding to utf8 (default)"
]
}),
("tq", emptyCommandInfo { ("tq", emptyCommandInfo {
longname = "translation_quiz", longname = "translation_quiz",
synopsis = "start a translation quiz", synopsis = "start a translation quiz",
@@ -829,6 +864,13 @@ allCommands cod env@(pgf, mos) = Map.fromList [
"" -> [] "" -> []
cats -> mapMaybe readType (chunks ',' cats) cats -> mapMaybe readType (chunks ',' cats)
optProbs opts pgfr = case valStrOpts "probs" "" opts of
"" -> return Nothing
file -> do
ps <- getProbsFromFile file pgf ---- pgfr!
-- putStrLn $ prProbabilities ps
return $ Just ps
optType opts = optType opts =
let str = valStrOpts "cat" (showCId $ lookStartCat pgf) opts let str = valStrOpts "cat" (showCId $ lookStartCat pgf) opts
in case readType str of in case readType str of

View File

@@ -2,8 +2,10 @@ module PGF.Probabilistic (
probTree -- :: Probabilities -> Tree -> Double probTree -- :: Probabilities -> Tree -> Double
,rankTreesByProbs -- :: Probabilities -> [Tree] -> [Tree] ,rankTreesByProbs -- :: Probabilities -> [Tree] -> [Tree]
,Probabilities -- data ,Probabilities -- data
,prProbabilities -- Probabilities -> String
,catProbs ,catProbs
,getProbsFromFile -- :: FilePath -> PGF -> IO Probabilities ,getProbsFromFile -- :: FilePath -> PGF -> IO Probabilities
,defaultProbabilities -- :: PGF -> Probabilities
) where ) where
import PGF.CId import PGF.CId
@@ -18,6 +20,10 @@ data Probabilities = Probs {
catProbs :: M.Map CId [(Double, (CId,[CId]))] -- prob and arglist catProbs :: M.Map CId [(Double, (CId,[CId]))] -- prob and arglist
} }
prProbabilities :: Probabilities -> String
prProbabilities = unlines . map pr . M.toList . funProbs where
pr (f,d) = showCId f ++ "\t" ++ show d
getProbsFromFile :: FilePath -> PGF -> IO Probabilities getProbsFromFile :: FilePath -> PGF -> IO Probabilities
getProbsFromFile file pgf = do getProbsFromFile file pgf = do
s <- readFile file s <- readFile file
@@ -33,8 +39,8 @@ fillProbs pgf funs =
| (cat,_) <- M.toList (cats (abstract pgf)), | (cat,_) <- M.toList (cats (abstract pgf)),
let fs = functionsToCat pgf cat] let fs = functionsToCat pgf cat]
cats1 = map fill cats0 cats1 = map fill cats0
funs1 = M.fromList [(f,p) | (_,cf) <- cats1, (p,(f,_)) <- cf] funs1 = [(f,p) | (_,cf) <- cats1, (p,(f,_)) <- cf]
in Probs funs1 (M.fromList cats1) in Probs (M.fromList funs1) (M.fromList cats1)
where where
fill (cat,fs) = (cat, pad [(getProb0 f,(f,xs)) | (f,xs) <- fs]) fill (cat,fs) = (cat, pad [(getProb0 f,(f,xs)) | (f,xs) <- fs])
where where
@@ -43,8 +49,13 @@ fillProbs pgf funs =
pad :: [(Double,a)] -> [(Double,a)] pad :: [(Double,a)] -> [(Double,a)]
pad pfs = [(if p== -1 then deflt else p,f) | (p,f) <- pfs] pad pfs = [(if p== -1 then deflt else p,f) | (p,f) <- pfs]
where where
deflt = 1 - sum poss / fromIntegral (length negs) deflt = case length negs of
(poss,negs) = partition (> (-1)) (map fst pfs) 0 -> 0
_ -> (1 - sum poss) / fromIntegral (length negs)
(poss,negs) = partition (> (-0.5)) (map fst pfs)
defaultProbabilities :: PGF -> Probabilities
defaultProbabilities pgf = fillProbs pgf M.empty
-- | compute the probability of a given tree -- | compute the probability of a given tree
probTree :: Probabilities -> Expr -> Double probTree :: Probabilities -> Expr -> Double