1
0
forked from GitHub/gf-core

probabilistic

This commit is contained in:
aarne
2005-10-30 22:44:00 +00:00
parent d08695f71f
commit e64822f921
7 changed files with 306 additions and 37 deletions

View File

@@ -0,0 +1,225 @@
----------------------------------------------------------------------
-- |
-- Module : Probabilistic
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/10/30 23:44:00 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.1 $
--
-- Probabilistic abstract syntax. AR 30\/10\/2005
--
-- (c) Aarne Ranta 2005 under GNU GPL
--
-- Contents: parsing and random generation with probabilistic grammars.
-- To begin with, we use simple types and don't
-- guarantee the correctness of bindings\/dependences.
-----------------------------------------------------------------------------
module GF.Probabilistic.Probabilistic (
generateRandomTreesProb -- :: Options -> StdGen -> GFCGrammar -> Probs -> Cat -> [Exp]
,checkGrammarProbs -- :: GFCGrammar -> Probs -> Err ()
,computeProbTree -- :: Probs -> Tree -> Double
,rankByScore -- :: Ord n => [(a,n)] -> [(a,n)]
,Probs -- = BinTree Ident Double
,getProbsFromFile -- :: Opts -> IO Probs
,emptyProbs -- :: Probs
) where
import GF.Canon.GFC
import GF.Grammar.LookAbs
import GF.Grammar.PrGrammar
import GF.Grammar.Macros
import GF.Grammar.Values
import GF.Grammar.Grammar (Cat)
import GF.Infra.Ident
import GF.Data.Zipper
import GF.Data.Operations
import GF.Infra.Option
import Data.Char
import Data.List
import Control.Monad
import System.Random
-- | generate an infinite list of trees, with their probabilities
generateRandomTreesProb :: Options -> StdGen -> GFCGrammar -> Probs -> Cat -> [Exp]
generateRandomTreesProb opts gen gr probs cat =
map str2tr $ randomTrees gen gr' cat' where
gr' = gr2sgr gr probs
cat' = prt $ snd cat
-- | check that probabilities attached to a grammar make sense
checkGrammarProbs :: GFCGrammar -> Probs -> Err ()
checkGrammarProbs gr probs = err Bad (const (return ())) $ checkSGrammar $ gr2sgr gr probs
-- | compute the probability of a given tree
computeProbTree :: Probs -> Tree -> Double
computeProbTree probs (Tr (N (_,at,_,_,_),ts)) = case at of
AtC (_,f) -> case lookupTree prt f probs of
Ok p -> p * product (map prob ts)
_ -> product (map prob ts)
_ -> 1.0 ----
where
prob = computeProbTree probs
-- | rank from highest to lowest score, e.g. probability
rankByScore :: Ord n => [(a,n)] -> [(a,n)]
rankByScore = sortBy (\ (_,p) (_,q) -> compare q p)
getProbsFromFile :: Options -> IO Probs
getProbsFromFile opts = do
s <- maybe (return "") readFile $ getOptVal opts probFile
return $ buildTree $ pProbs $ lines s
where
pProbs ss = [(zIdent f, read p) | s <- ss, [f,p] <- [words s]]
type Probs = BinTree Ident Double
emptyProbs :: Probs
emptyProbs = emptyBinTree
------------------------------------------
-- translate grammar to simpler form and generated trees back
gr2sgr :: GFCGrammar -> Probs -> SGrammar
gr2sgr gr probs = buildTree [(c,{- fillProb -} rs) | rs@((_,(_,c)):_) <- rules] where
rules =
groupBy (\x y -> scat x == scat y) $
sortBy (\x y -> compare (scat x) (scat y))
[(trId f, ty') | (f,ty) <- funRulesOf gr, ty' <- trTy ty]
trId (_,f) = let f' = prt f in case lookupTree prt f probs of
Ok p -> (p,f')
_ -> (2.0, f')
trTy ty = case catSkeleton ty of
Ok (mcs,mc) -> [(map trCat mcs, trCat mc)]
_ -> []
trCat (m,c) = prt c ---
scat (_,(_,c)) = c
str2tr :: STree -> Exp
str2tr t = case t of
SApp ((_,f),ts) -> mkApp (trId f) (map str2tr ts)
SMeta _ -> mkMeta 0
---- SString s -> K s
where
trId = cn . zIdent
type SGrammar = BinTree SCat [SRule]
type SIdent = String
type SRule = (SFun,SType)
type SType = ([SCat],SCat)
type SCat = SIdent
type SFun = (Double,SIdent)
allRules gr = concat [rs | (c,rs) <- tree2list gr]
data STree =
SApp (SFun,[STree])
-- | SAppN (SIdent,[STree]) -- no probability given
| SMeta SCat
| SString String
| SInt Int
deriving (Show,Eq)
probTree :: STree -> Double
probTree t = case t of
SApp ((p,_),ts) -> p * product (map probTree ts)
_ -> 1
rankTrees :: [STree] -> [(STree,Double)]
rankTrees ts = sortBy (\ (_,p) (_,q) -> compare q p) [(t,probTree t) | t <- ts]
randomTrees :: StdGen -> SGrammar -> SCat -> [STree]
randomTrees gen = genTrees (randomRs (0.0, 1.0) gen)
genTrees :: [Double] -> SGrammar -> SCat -> [STree]
genTrees ds gr cat =
let (t,k) = genTree ds gr cat
in t : genTrees (drop k ds) gr cat
genTree :: [Double] -> SGrammar -> SCat -> (STree,Int)
genTree rs gr = gett rs where
gett ds cat = let
d:ds2 = ds
(pf,args) = getf d cat
(ts,k) = getts ds2 args
in (SApp (pf,ts), k+1)
getf d cat = hitRegion d [(p,(pf,args)) | (pf@(p,_),(args,_)) <- look cat]
getts ds cats = case cats of
c:cs -> let
(t, k) = gett ds c
(ts,ks) = getts (drop k ds) cs
in (t:ts, k + ks)
_ -> ([],0)
look cat = errVal [] $ lookupTree id cat gr
hitRegion :: Double -> [(Double,a)] -> a
hitRegion d vs = case vs of
(p1,v1):vs2 ->
if d < p1 then v1 else hitRegion d [(p+p1,v) | (p,v) <- vs2]
--- this should recover from rounding errors
checkSGrammar :: SGrammar -> Err SGrammar
checkSGrammar = mapMTree chCat where
chCat (c,rs) = case sum [p | ((p,f),_) <- rs] of
s | s /= 1.0 -> Bad $ "illegal probability sum " ++ show s ++ " in " ++ c
_ -> return (c,rs)
-- for cases where explicit probability is not given (encoded as
-- p > 1) divide the remaining mass by the number of such cases
fillProb :: [SRule] -> [SRule]
fillProb rs = [((defa p,f),ty) | ((p,f),ty) <- rs] where
defa p = if p > 1.0 then def else p
def = (1 - sum given) / genericLength nope
(nope,given) = partition (> 1.0) [p | ((p,_),_) <- rs]
------------------------------------------
-- to test outside GF
prSTree t = case t of
SApp ((p,f),ts) -> f ++ prParenth (show p) ++ concat (map pr1 ts)
SMeta c -> '?':c
SString s -> prQuotedString s
SInt i -> show i
where
pr1 t@(SApp (_,ts)) = ' ' : (if null ts then id else prParenth) (prSTree t)
pr1 t = prSTree t
mkSGrammar :: [SRule] -> SGrammar
mkSGrammar rules =
buildTree [(c, fillProb rs) | rs@((_,(_,c)):_) <- rules'] where
rules' =
groupBy (\x y -> scat x == scat y) $
sortBy (\x y -> compare (scat x) (scat y))
rules
scat (_,(_,c)) = c
pSRule :: String -> SRule
pSRule s = case words s of
p : f : c : cs ->
if isDigit (head p)
then ((read p, f),(init cs', last cs'))
else ((2.0, p),(init (c:cs'), last (c:cs'))) --- hack for automatic probability
where cs' = [cs !! i | i <- [0,2..length cs - 1]]
_ -> error $ "not a rule" +++ s
exSgr = mkSGrammar $ map pSRule [
"0.8 a : A"
,"0.2 b : A"
,"0.2 n : A -> S -> S"
,"0.8 e : S"
]
ex1 :: IO ()
ex1 = do
g <- newStdGen
mapM_ (putStrLn . prSTree) $ randomTrees g exSgr "S"