forked from GitHub/gf-core
probability ranking (rt) and gr -probs=FILE
This commit is contained in:
@@ -20,6 +20,8 @@ import PGF.Macros
|
||||
import PGF.Data ----
|
||||
import PGF.Morphology
|
||||
import PGF.Printer
|
||||
import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabilities)
|
||||
import PGF.Generate (genRandomProb) ----
|
||||
import GF.Compile.Export
|
||||
import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..))
|
||||
import GF.Infra.UseIO
|
||||
@@ -42,6 +44,7 @@ import System.Cmd
|
||||
import Text.PrettyPrint
|
||||
import Data.List (sort)
|
||||
import Debug.Trace
|
||||
import System.Random (newStdGen) ----
|
||||
|
||||
type CommandOutput = ([Expr],String) ---- errors, etc
|
||||
|
||||
@@ -245,11 +248,14 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
||||
flags = [
|
||||
("cat","generation category"),
|
||||
("lang","uses only functions that have linearizations in all these languages"),
|
||||
("number","number of trees generated")
|
||||
("number","number of trees generated"),
|
||||
("probs", "file with biased probabilities (format 'f 0.4' one by line)")
|
||||
],
|
||||
exec = \opts _ -> do
|
||||
let pgfr = optRestricted opts
|
||||
ts <- generateRandom pgfr (optType opts)
|
||||
gen <- newStdGen
|
||||
mprobs <- optProbs opts pgfr
|
||||
ts <- return $ genRandomProb mprobs gen pgfr (optType opts)
|
||||
returnFromExprs $ take (optNum opts) ts
|
||||
}),
|
||||
("gt", emptyCommandInfo {
|
||||
@@ -540,6 +546,35 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
||||
_ -> return (fromString s),
|
||||
flags = [("file","the input file name")]
|
||||
}),
|
||||
("rt", emptyCommandInfo {
|
||||
longname = "rank_trees",
|
||||
synopsis = "show trees in an order of decreasing probability",
|
||||
explanation = unlines [
|
||||
"Order trees from the most to the least probable, using either",
|
||||
"even distribution in each category (default) or biased as specified",
|
||||
"by the file given by flag -probs=FILE, where each line has the form",
|
||||
"'function probability', e.g. 'youPol_Pron 0.01'."
|
||||
],
|
||||
exec = \opts ts -> do
|
||||
mprobs <- optProbs opts pgf
|
||||
let probs = maybe (defaultProbabilities pgf) id mprobs
|
||||
let tds = rankTreesByProbs probs ts
|
||||
if isOpt "v" opts
|
||||
then putStrLn $
|
||||
unlines [showExpr [] t ++ "\t--" ++ show d | (t,d) <- tds]
|
||||
else return ()
|
||||
returnFromExprs $ map fst tds,
|
||||
flags = [
|
||||
("probs","probabilities from this file (format 'f 0.6' per line)")
|
||||
],
|
||||
options = [
|
||||
("v","show all trees with their probability scores")
|
||||
],
|
||||
examples = [
|
||||
"p \"you are here\" | rt -probs=probs | pt -number=1 -- most probable result",
|
||||
"se utf8 -- set encoding to utf8 (default)"
|
||||
]
|
||||
}),
|
||||
("tq", emptyCommandInfo {
|
||||
longname = "translation_quiz",
|
||||
synopsis = "start a translation quiz",
|
||||
@@ -829,6 +864,13 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
||||
"" -> []
|
||||
cats -> mapMaybe readType (chunks ',' cats)
|
||||
|
||||
optProbs opts pgfr = case valStrOpts "probs" "" opts of
|
||||
"" -> return Nothing
|
||||
file -> do
|
||||
ps <- getProbsFromFile file pgf ---- pgfr!
|
||||
-- putStrLn $ prProbabilities ps
|
||||
return $ Just ps
|
||||
|
||||
optType opts =
|
||||
let str = valStrOpts "cat" (showCId $ lookStartCat pgf) opts
|
||||
in case readType str of
|
||||
|
||||
@@ -2,8 +2,10 @@ module PGF.Probabilistic (
|
||||
probTree -- :: Probabilities -> Tree -> Double
|
||||
,rankTreesByProbs -- :: Probabilities -> [Tree] -> [Tree]
|
||||
,Probabilities -- data
|
||||
,prProbabilities -- Probabilities -> String
|
||||
,catProbs
|
||||
,getProbsFromFile -- :: FilePath -> PGF -> IO Probabilities
|
||||
,defaultProbabilities -- :: PGF -> Probabilities
|
||||
) where
|
||||
|
||||
import PGF.CId
|
||||
@@ -18,6 +20,10 @@ data Probabilities = Probs {
|
||||
catProbs :: M.Map CId [(Double, (CId,[CId]))] -- prob and arglist
|
||||
}
|
||||
|
||||
prProbabilities :: Probabilities -> String
|
||||
prProbabilities = unlines . map pr . M.toList . funProbs where
|
||||
pr (f,d) = showCId f ++ "\t" ++ show d
|
||||
|
||||
getProbsFromFile :: FilePath -> PGF -> IO Probabilities
|
||||
getProbsFromFile file pgf = do
|
||||
s <- readFile file
|
||||
@@ -33,8 +39,8 @@ fillProbs pgf funs =
|
||||
| (cat,_) <- M.toList (cats (abstract pgf)),
|
||||
let fs = functionsToCat pgf cat]
|
||||
cats1 = map fill cats0
|
||||
funs1 = M.fromList [(f,p) | (_,cf) <- cats1, (p,(f,_)) <- cf]
|
||||
in Probs funs1 (M.fromList cats1)
|
||||
funs1 = [(f,p) | (_,cf) <- cats1, (p,(f,_)) <- cf]
|
||||
in Probs (M.fromList funs1) (M.fromList cats1)
|
||||
where
|
||||
fill (cat,fs) = (cat, pad [(getProb0 f,(f,xs)) | (f,xs) <- fs])
|
||||
where
|
||||
@@ -43,8 +49,13 @@ fillProbs pgf funs =
|
||||
pad :: [(Double,a)] -> [(Double,a)]
|
||||
pad pfs = [(if p== -1 then deflt else p,f) | (p,f) <- pfs]
|
||||
where
|
||||
deflt = 1 - sum poss / fromIntegral (length negs)
|
||||
(poss,negs) = partition (> (-1)) (map fst pfs)
|
||||
deflt = case length negs of
|
||||
0 -> 0
|
||||
_ -> (1 - sum poss) / fromIntegral (length negs)
|
||||
(poss,negs) = partition (> (-0.5)) (map fst pfs)
|
||||
|
||||
defaultProbabilities :: PGF -> Probabilities
|
||||
defaultProbabilities pgf = fillProbs pgf M.empty
|
||||
|
||||
-- | compute the probability of a given tree
|
||||
probTree :: Probabilities -> Expr -> Double
|
||||
|
||||
Reference in New Issue
Block a user