mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 03:32:51 -06:00
probability ranking (rt) and gr -probs=FILE
This commit is contained in:
@@ -20,6 +20,8 @@ import PGF.Macros
|
|||||||
import PGF.Data ----
|
import PGF.Data ----
|
||||||
import PGF.Morphology
|
import PGF.Morphology
|
||||||
import PGF.Printer
|
import PGF.Printer
|
||||||
|
import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabilities)
|
||||||
|
import PGF.Generate (genRandomProb) ----
|
||||||
import GF.Compile.Export
|
import GF.Compile.Export
|
||||||
import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..))
|
import GF.Infra.Option (noOptions, readOutputFormat, Encoding(..))
|
||||||
import GF.Infra.UseIO
|
import GF.Infra.UseIO
|
||||||
@@ -42,6 +44,7 @@ import System.Cmd
|
|||||||
import Text.PrettyPrint
|
import Text.PrettyPrint
|
||||||
import Data.List (sort)
|
import Data.List (sort)
|
||||||
import Debug.Trace
|
import Debug.Trace
|
||||||
|
import System.Random (newStdGen) ----
|
||||||
|
|
||||||
type CommandOutput = ([Expr],String) ---- errors, etc
|
type CommandOutput = ([Expr],String) ---- errors, etc
|
||||||
|
|
||||||
@@ -245,11 +248,14 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
|||||||
flags = [
|
flags = [
|
||||||
("cat","generation category"),
|
("cat","generation category"),
|
||||||
("lang","uses only functions that have linearizations in all these languages"),
|
("lang","uses only functions that have linearizations in all these languages"),
|
||||||
("number","number of trees generated")
|
("number","number of trees generated"),
|
||||||
|
("probs", "file with biased probabilities (format 'f 0.4' one by line)")
|
||||||
],
|
],
|
||||||
exec = \opts _ -> do
|
exec = \opts _ -> do
|
||||||
let pgfr = optRestricted opts
|
let pgfr = optRestricted opts
|
||||||
ts <- generateRandom pgfr (optType opts)
|
gen <- newStdGen
|
||||||
|
mprobs <- optProbs opts pgfr
|
||||||
|
ts <- return $ genRandomProb mprobs gen pgfr (optType opts)
|
||||||
returnFromExprs $ take (optNum opts) ts
|
returnFromExprs $ take (optNum opts) ts
|
||||||
}),
|
}),
|
||||||
("gt", emptyCommandInfo {
|
("gt", emptyCommandInfo {
|
||||||
@@ -540,6 +546,35 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
|||||||
_ -> return (fromString s),
|
_ -> return (fromString s),
|
||||||
flags = [("file","the input file name")]
|
flags = [("file","the input file name")]
|
||||||
}),
|
}),
|
||||||
|
("rt", emptyCommandInfo {
|
||||||
|
longname = "rank_trees",
|
||||||
|
synopsis = "show trees in an order of decreasing probability",
|
||||||
|
explanation = unlines [
|
||||||
|
"Order trees from the most to the least probable, using either",
|
||||||
|
"even distribution in each category (default) or biased as specified",
|
||||||
|
"by the file given by flag -probs=FILE, where each line has the form",
|
||||||
|
"'function probability', e.g. 'youPol_Pron 0.01'."
|
||||||
|
],
|
||||||
|
exec = \opts ts -> do
|
||||||
|
mprobs <- optProbs opts pgf
|
||||||
|
let probs = maybe (defaultProbabilities pgf) id mprobs
|
||||||
|
let tds = rankTreesByProbs probs ts
|
||||||
|
if isOpt "v" opts
|
||||||
|
then putStrLn $
|
||||||
|
unlines [showExpr [] t ++ "\t--" ++ show d | (t,d) <- tds]
|
||||||
|
else return ()
|
||||||
|
returnFromExprs $ map fst tds,
|
||||||
|
flags = [
|
||||||
|
("probs","probabilities from this file (format 'f 0.6' per line)")
|
||||||
|
],
|
||||||
|
options = [
|
||||||
|
("v","show all trees with their probability scores")
|
||||||
|
],
|
||||||
|
examples = [
|
||||||
|
"p \"you are here\" | rt -probs=probs | pt -number=1 -- most probable result",
|
||||||
|
"se utf8 -- set encoding to utf8 (default)"
|
||||||
|
]
|
||||||
|
}),
|
||||||
("tq", emptyCommandInfo {
|
("tq", emptyCommandInfo {
|
||||||
longname = "translation_quiz",
|
longname = "translation_quiz",
|
||||||
synopsis = "start a translation quiz",
|
synopsis = "start a translation quiz",
|
||||||
@@ -829,6 +864,13 @@ allCommands cod env@(pgf, mos) = Map.fromList [
|
|||||||
"" -> []
|
"" -> []
|
||||||
cats -> mapMaybe readType (chunks ',' cats)
|
cats -> mapMaybe readType (chunks ',' cats)
|
||||||
|
|
||||||
|
optProbs opts pgfr = case valStrOpts "probs" "" opts of
|
||||||
|
"" -> return Nothing
|
||||||
|
file -> do
|
||||||
|
ps <- getProbsFromFile file pgf ---- pgfr!
|
||||||
|
-- putStrLn $ prProbabilities ps
|
||||||
|
return $ Just ps
|
||||||
|
|
||||||
optType opts =
|
optType opts =
|
||||||
let str = valStrOpts "cat" (showCId $ lookStartCat pgf) opts
|
let str = valStrOpts "cat" (showCId $ lookStartCat pgf) opts
|
||||||
in case readType str of
|
in case readType str of
|
||||||
|
|||||||
@@ -2,8 +2,10 @@ module PGF.Probabilistic (
|
|||||||
probTree -- :: Probabilities -> Tree -> Double
|
probTree -- :: Probabilities -> Tree -> Double
|
||||||
,rankTreesByProbs -- :: Probabilities -> [Tree] -> [Tree]
|
,rankTreesByProbs -- :: Probabilities -> [Tree] -> [Tree]
|
||||||
,Probabilities -- data
|
,Probabilities -- data
|
||||||
|
,prProbabilities -- Probabilities -> String
|
||||||
,catProbs
|
,catProbs
|
||||||
,getProbsFromFile -- :: FilePath -> PGF -> IO Probabilities
|
,getProbsFromFile -- :: FilePath -> PGF -> IO Probabilities
|
||||||
|
,defaultProbabilities -- :: PGF -> Probabilities
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import PGF.CId
|
import PGF.CId
|
||||||
@@ -18,6 +20,10 @@ data Probabilities = Probs {
|
|||||||
catProbs :: M.Map CId [(Double, (CId,[CId]))] -- prob and arglist
|
catProbs :: M.Map CId [(Double, (CId,[CId]))] -- prob and arglist
|
||||||
}
|
}
|
||||||
|
|
||||||
|
prProbabilities :: Probabilities -> String
|
||||||
|
prProbabilities = unlines . map pr . M.toList . funProbs where
|
||||||
|
pr (f,d) = showCId f ++ "\t" ++ show d
|
||||||
|
|
||||||
getProbsFromFile :: FilePath -> PGF -> IO Probabilities
|
getProbsFromFile :: FilePath -> PGF -> IO Probabilities
|
||||||
getProbsFromFile file pgf = do
|
getProbsFromFile file pgf = do
|
||||||
s <- readFile file
|
s <- readFile file
|
||||||
@@ -33,8 +39,8 @@ fillProbs pgf funs =
|
|||||||
| (cat,_) <- M.toList (cats (abstract pgf)),
|
| (cat,_) <- M.toList (cats (abstract pgf)),
|
||||||
let fs = functionsToCat pgf cat]
|
let fs = functionsToCat pgf cat]
|
||||||
cats1 = map fill cats0
|
cats1 = map fill cats0
|
||||||
funs1 = M.fromList [(f,p) | (_,cf) <- cats1, (p,(f,_)) <- cf]
|
funs1 = [(f,p) | (_,cf) <- cats1, (p,(f,_)) <- cf]
|
||||||
in Probs funs1 (M.fromList cats1)
|
in Probs (M.fromList funs1) (M.fromList cats1)
|
||||||
where
|
where
|
||||||
fill (cat,fs) = (cat, pad [(getProb0 f,(f,xs)) | (f,xs) <- fs])
|
fill (cat,fs) = (cat, pad [(getProb0 f,(f,xs)) | (f,xs) <- fs])
|
||||||
where
|
where
|
||||||
@@ -43,8 +49,13 @@ fillProbs pgf funs =
|
|||||||
pad :: [(Double,a)] -> [(Double,a)]
|
pad :: [(Double,a)] -> [(Double,a)]
|
||||||
pad pfs = [(if p== -1 then deflt else p,f) | (p,f) <- pfs]
|
pad pfs = [(if p== -1 then deflt else p,f) | (p,f) <- pfs]
|
||||||
where
|
where
|
||||||
deflt = 1 - sum poss / fromIntegral (length negs)
|
deflt = case length negs of
|
||||||
(poss,negs) = partition (> (-1)) (map fst pfs)
|
0 -> 0
|
||||||
|
_ -> (1 - sum poss) / fromIntegral (length negs)
|
||||||
|
(poss,negs) = partition (> (-0.5)) (map fst pfs)
|
||||||
|
|
||||||
|
defaultProbabilities :: PGF -> Probabilities
|
||||||
|
defaultProbabilities pgf = fillProbs pgf M.empty
|
||||||
|
|
||||||
-- | compute the probability of a given tree
|
-- | compute the probability of a given tree
|
||||||
probTree :: Probabilities -> Expr -> Double
|
probTree :: Probabilities -> Expr -> Double
|
||||||
|
|||||||
Reference in New Issue
Block a user