1
0
forked from GitHub/gf-core

probabilistic

This commit is contained in:
aarne
2005-10-30 22:44:00 +00:00
parent 815dda6b4b
commit f9293c6b29
5 changed files with 302 additions and 35 deletions

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/09/20 09:32:56 $
-- > CVS $Date: 2005/10/30 23:44:00 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.48 $
-- > CVS $Revision: 1.49 $
--
-- (Description of the module)
-----------------------------------------------------------------------------
@@ -31,6 +31,7 @@ import GF.CF.CF
import GF.CF.CFIdent
import GF.CF.CanonToCF
import GF.UseGrammar.Morphology
import GF.Probabilistic.Probabilistic
import GF.Infra.Option
import GF.Infra.Ident
import GF.System.Arch (ModTime)
@@ -57,6 +58,7 @@ data ShellState = ShSt {
-- (large, with parameters, no-so overgenerating)
pInfos :: [(Ident, Prs.PInfo)], -- ^ parsing information (compiled mcfg&cfg grammars)
morphos :: [(Ident,Morpho)], -- ^ morphologies
probss :: [(Ident,Probs)], -- ^ probability distributions
gloptions :: Options, -- ^ global options
readFiles :: [(FilePath,ModTime)],-- ^ files read
absCats :: [(G.Cat,(G.Context,
@@ -86,6 +88,7 @@ emptyShellState = ShSt {
cfgs = [],
pInfos = [],
morphos = [],
probss = [],
gloptions = noOptions,
readFiles = [],
absCats = [],
@@ -114,6 +117,7 @@ data StateGrammar = StGr {
cfg :: Cnv.CGrammar,
pInfo :: Prs.PInfo,
morpho :: Morpho,
probs :: Probs,
loptions :: Options
}
@@ -128,6 +132,7 @@ emptyStateGrammar = StGr {
cfg = [],
pInfo = Prs.buildPInfo [] [],
morpho = emptyMorpho,
probs = emptyProbs,
loptions = noOptions
}
@@ -140,6 +145,7 @@ stateMCFG :: StateGrammar -> Cnv.MGrammar
stateCFG :: StateGrammar -> Cnv.CGrammar
statePInfo :: StateGrammar -> Prs.PInfo
stateMorpho :: StateGrammar -> Morpho
stateProbs :: StateGrammar -> Probs
stateOptions :: StateGrammar -> Options
stateGrammarWords :: StateGrammar -> [String]
stateGrammarLang :: StateGrammar -> (CanonGrammar, Ident)
@@ -151,6 +157,7 @@ stateMCFG = mcfg
stateCFG = cfg
statePInfo = pInfo
stateMorpho = morpho
stateProbs = probs
stateOptions = loptions
stateGrammarWords = allMorphoWords . stateMorpho
stateGrammarLang st = (grammar st, cncId st)
@@ -190,6 +197,7 @@ updateShellState opts mcnc sh ((_,sgr,gr),rts) = do
cfs <- mapM (canon2cf opts subcgr) concrs --- why need to update all...
let morphos = map (mkMorpho subcgr) concrs
let probss = [] -----
let pinfosOld = map (CnvOld.pInfo opts cgr) concrs -- peb 18/6 (OBSOLETE)
@@ -219,6 +227,7 @@ updateShellState opts mcnc sh ((_,sgr,gr),rts) = do
cfgs = zip concrs cfgs,
pInfos = zip concrs pInfos,
morphos = zip concrs morphos,
probss = zip concrs probss,
gloptions = gloptions sh, --- opts, -- this would be command-line options
readFiles = [ft | ft@(f,_) <- readFiles sh, notInrts f] ++ rts,
absCats = csi,
@@ -280,6 +289,7 @@ purgeShellState sh = ShSt {
cfgs = cfgs sh,
pInfos = pInfos sh,
morphos = morphos sh,
probss = probss sh,
gloptions = gloptions sh,
readFiles = [],
absCats = absCats sh,
@@ -291,15 +301,17 @@ purgeShellState sh = ShSt {
acncs = maybe [] singleton (abstract sh) ++ map (snd . fst) (concretes sh)
changeMain :: Maybe Ident -> ShellState -> Err ShellState
changeMain Nothing (ShSt _ _ cs ms ss cfs old_pis mcfgs cfgs pinfos mos os rs acs s) =
return (ShSt Nothing Nothing [] ms ss cfs old_pis mcfgs cfgs pinfos mos os rs acs s)
changeMain (Just c) st@(ShSt _ _ cs ms ss cfs old_pis mcfgs cfgs pinfos mos os rs acs s) =
case lookup c (M.modules ms) of
changeMain Nothing (ShSt _ _ cs ms ss cfs old_pis mcfgs cfgs pinfos mos pbs os rs acs s) =
return (ShSt Nothing Nothing [] ms ss cfs old_pis mcfgs cfgs pinfos mos pbs os rs acs s)
changeMain
(Just c) st@(ShSt _ _ cs ms ss cfs old_pis mcfgs cfgs pinfos mos pbs os rs acs s) =
case lookup c (M.modules ms) of
Just _ -> do
a <- M.abstractOfConcrete ms c
let cas = M.allConcretes ms a
let cs' = [((c,c),True) | c <- cas]
return (ShSt (Just a) (Just c) cs' ms ss cfs old_pis mcfgs cfgs pinfos mos os rs acs s)
return (ShSt (Just a) (Just c) cs' ms ss cfs old_pis mcfgs cfgs
pinfos mos pbs os rs acs s)
_ -> P.prtBad "The state has no concrete syntax named" c
-- | form just one state grammar, if unique, from a canonical grammar
@@ -326,6 +338,7 @@ stateGrammarOfLang st l = StGr {
cfg = maybe [] id $ lookup l $ cfgs st,
pInfo = maybe (Prs.buildPInfo [] []) id $ lookup l $ pInfos st,
morpho = maybe emptyMorpho id (lookup l (morphos st)),
probs = maybe emptyProbs id (lookup l (probss st)),
loptions = errVal noOptions $ lookupOptionsCan can
}
where
@@ -336,11 +349,13 @@ stateGrammarOfLang st l = StGr {
grammarOfLang :: ShellState -> Language -> CanonGrammar
cfOfLang :: ShellState -> Language -> CF
morphoOfLang :: ShellState -> Language -> Morpho
probsOfLang :: ShellState -> Language -> Probs
optionsOfLang :: ShellState -> Language -> Options
grammarOfLang st = stateGrammarST . stateGrammarOfLang st
cfOfLang st = stateCF . stateGrammarOfLang st
morphoOfLang st = stateMorpho . stateGrammarOfLang st
probsOfLang st = stateProbs . stateGrammarOfLang st
optionsOfLang st = stateOptions . stateGrammarOfLang st
-- | the last introduced grammar, stored in options, is the default for operations
@@ -363,6 +378,7 @@ stateAbstractGrammar st = StGr {
cfg = [],
pInfo = Prs.buildPInfo [] [],
morpho = emptyMorpho,
probs = emptyProbs,
loptions = gloptions st ----
}
@@ -501,8 +517,9 @@ languageOff = languageOnOff False
languageOnOff :: Bool -> Language -> ShellStateOper
--- __________ this is OBSOLETE
languageOnOff b lang (ShSt a c cs cg sg cfs old_pinfos mcfgs cfgs pinfos ms os fs cats sts) =
ShSt a c cs' cg sg cfs old_pinfos mcfgs cfgs pinfos ms os fs cats sts where
languageOnOff b lang
(ShSt a c cs cg sg cfs old_pinfos mcfgs cfgs pinfos ms pbs os fs cats sts) =
ShSt a c cs' cg sg cfs old_pinfos mcfgs cfgs pinfos ms pbs os fs cats sts where
cs' = [if lang==l then (lc,b) else i | i@(lc@(l,c),_) <- cs]
{-
@@ -521,13 +538,15 @@ removeLanguage lang (ShSt (ab,gs,os)) = ShSt (ab,removeAssoc lang gs, os)
changeOptions :: (Options -> Options) -> ShellStateOper
--- __________ this is OBSOLETE
changeOptions f (ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms os ff ts ss) =
ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms (f os) ff ts ss
changeOptions f
(ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms pbs os ff ts ss) =
ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms pbs (f os) ff ts ss
changeModTimes :: [(FilePath,ModTime)] -> ShellStateOper
--- __________ this is OBSOLETE
changeModTimes mfs (ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms os ff ts ss) =
ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms os ff' ts ss
changeModTimes mfs
(ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms pbs os ff ts ss) =
ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms pbs os ff' ts ss
where
ff' = mfs ++ [mf | mf@(f,_) <- ff, notElem f (map fst mfs)]

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/10/06 10:02:33 $
-- > CVS $Date: 2005/10/30 23:44:00 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.32 $
-- > CVS $Revision: 1.33 $
--
-- Options and flags used in GF shell commands and files.
--
@@ -62,6 +62,12 @@ getOptVal (Opts os) fopt =
a:_ -> Just a
_ -> Nothing
isSetFlag :: Options -> OptFun -> Bool
isSetFlag (Opts os) fopt =
case [a | opt@(Opt (o,[a])) <- os, opt == fopt a] of
a:_ -> True
_ -> False
getOptInt :: Options -> OptFun -> Maybe Int
getOptInt opts f = do
s <- getOptVal opts f
@@ -304,6 +310,7 @@ noDepTypes = aOpt "nodeptypes"
extractGr = aOpt "extract"
pathList = aOpt "path"
uniCoding = aOpt "coding"
probFile = aOpt "probs"
-- peb 16/3-05:
gfcConversion :: String -> Option

View File

@@ -0,0 +1,225 @@
----------------------------------------------------------------------
-- |
-- Module : Probabilistic
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/10/30 23:44:00 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.1 $
--
-- Probabilistic abstract syntax. AR 30\/10\/2005
--
-- (c) Aarne Ranta 2005 under GNU GPL
--
-- Contents: parsing and random generation with probabilistic grammars.
-- To begin with, we use simple types and don't
-- guarantee the correctness of bindings\/dependences.
-----------------------------------------------------------------------------
module GF.Probabilistic.Probabilistic (
generateRandomTreesProb -- :: Options -> StdGen -> GFCGrammar -> Probs -> Cat -> [Exp]
,checkGrammarProbs -- :: GFCGrammar -> Probs -> Err ()
,computeProbTree -- :: Probs -> Tree -> Double
,rankByScore -- :: Ord n => [(a,n)] -> [(a,n)]
,Probs -- = BinTree Ident Double
,getProbsFromFile -- :: Opts -> IO Probs
,emptyProbs -- :: Probs
) where
import GF.Canon.GFC
import GF.Grammar.LookAbs
import GF.Grammar.PrGrammar
import GF.Grammar.Macros
import GF.Grammar.Values
import GF.Grammar.Grammar (Cat)
import GF.Infra.Ident
import GF.Data.Zipper
import GF.Data.Operations
import GF.Infra.Option
import Data.Char
import Data.List
import Control.Monad
import System.Random
-- | generate an infinite list of trees, with their probabilities
generateRandomTreesProb :: Options -> StdGen -> GFCGrammar -> Probs -> Cat -> [Exp]
generateRandomTreesProb opts gen gr probs cat =
map str2tr $ randomTrees gen gr' cat' where
gr' = gr2sgr gr probs
cat' = prt $ snd cat
-- | check that probabilities attached to a grammar make sense
checkGrammarProbs :: GFCGrammar -> Probs -> Err ()
checkGrammarProbs gr probs = err Bad (const (return ())) $ checkSGrammar $ gr2sgr gr probs
-- | compute the probability of a given tree
computeProbTree :: Probs -> Tree -> Double
computeProbTree probs (Tr (N (_,at,_,_,_),ts)) = case at of
AtC (_,f) -> case lookupTree prt f probs of
Ok p -> p * product (map prob ts)
_ -> product (map prob ts)
_ -> 1.0 ----
where
prob = computeProbTree probs
-- | rank from highest to lowest score, e.g. probability
rankByScore :: Ord n => [(a,n)] -> [(a,n)]
rankByScore = sortBy (\ (_,p) (_,q) -> compare q p)
getProbsFromFile :: Options -> IO Probs
getProbsFromFile opts = do
s <- maybe (return "") readFile $ getOptVal opts probFile
return $ buildTree $ pProbs $ lines s
where
pProbs ss = [(zIdent f, read p) | s <- ss, [f,p] <- [words s]]
type Probs = BinTree Ident Double
emptyProbs :: Probs
emptyProbs = emptyBinTree
------------------------------------------
-- translate grammar to simpler form and generated trees back
gr2sgr :: GFCGrammar -> Probs -> SGrammar
gr2sgr gr probs = buildTree [(c,{- fillProb -} rs) | rs@((_,(_,c)):_) <- rules] where
rules =
groupBy (\x y -> scat x == scat y) $
sortBy (\x y -> compare (scat x) (scat y))
[(trId f, ty') | (f,ty) <- funRulesOf gr, ty' <- trTy ty]
trId (_,f) = let f' = prt f in case lookupTree prt f probs of
Ok p -> (p,f')
_ -> (2.0, f')
trTy ty = case catSkeleton ty of
Ok (mcs,mc) -> [(map trCat mcs, trCat mc)]
_ -> []
trCat (m,c) = prt c ---
scat (_,(_,c)) = c
str2tr :: STree -> Exp
str2tr t = case t of
SApp ((_,f),ts) -> mkApp (trId f) (map str2tr ts)
SMeta _ -> mkMeta 0
---- SString s -> K s
where
trId = cn . zIdent
type SGrammar = BinTree SCat [SRule]
type SIdent = String
type SRule = (SFun,SType)
type SType = ([SCat],SCat)
type SCat = SIdent
type SFun = (Double,SIdent)
allRules gr = concat [rs | (c,rs) <- tree2list gr]
data STree =
SApp (SFun,[STree])
-- | SAppN (SIdent,[STree]) -- no probability given
| SMeta SCat
| SString String
| SInt Int
deriving (Show,Eq)
probTree :: STree -> Double
probTree t = case t of
SApp ((p,_),ts) -> p * product (map probTree ts)
_ -> 1
rankTrees :: [STree] -> [(STree,Double)]
rankTrees ts = sortBy (\ (_,p) (_,q) -> compare q p) [(t,probTree t) | t <- ts]
randomTrees :: StdGen -> SGrammar -> SCat -> [STree]
randomTrees gen = genTrees (randomRs (0.0, 1.0) gen)
genTrees :: [Double] -> SGrammar -> SCat -> [STree]
genTrees ds gr cat =
let (t,k) = genTree ds gr cat
in t : genTrees (drop k ds) gr cat
genTree :: [Double] -> SGrammar -> SCat -> (STree,Int)
genTree rs gr = gett rs where
gett ds cat = let
d:ds2 = ds
(pf,args) = getf d cat
(ts,k) = getts ds2 args
in (SApp (pf,ts), k+1)
getf d cat = hitRegion d [(p,(pf,args)) | (pf@(p,_),(args,_)) <- look cat]
getts ds cats = case cats of
c:cs -> let
(t, k) = gett ds c
(ts,ks) = getts (drop k ds) cs
in (t:ts, k + ks)
_ -> ([],0)
look cat = errVal [] $ lookupTree id cat gr
hitRegion :: Double -> [(Double,a)] -> a
hitRegion d vs = case vs of
(p1,v1):vs2 ->
if d < p1 then v1 else hitRegion d [(p+p1,v) | (p,v) <- vs2]
--- this should recover from rounding errors
checkSGrammar :: SGrammar -> Err SGrammar
checkSGrammar = mapMTree chCat where
chCat (c,rs) = case sum [p | ((p,f),_) <- rs] of
s | s /= 1.0 -> Bad $ "illegal probability sum " ++ show s ++ " in " ++ c
_ -> return (c,rs)
-- for cases where explicit probability is not given (encoded as
-- p > 1) divide the remaining mass by the number of such cases
fillProb :: [SRule] -> [SRule]
fillProb rs = [((defa p,f),ty) | ((p,f),ty) <- rs] where
defa p = if p > 1.0 then def else p
def = (1 - sum given) / genericLength nope
(nope,given) = partition (> 1.0) [p | ((p,_),_) <- rs]
------------------------------------------
-- to test outside GF
prSTree t = case t of
SApp ((p,f),ts) -> f ++ prParenth (show p) ++ concat (map pr1 ts)
SMeta c -> '?':c
SString s -> prQuotedString s
SInt i -> show i
where
pr1 t@(SApp (_,ts)) = ' ' : (if null ts then id else prParenth) (prSTree t)
pr1 t = prSTree t
mkSGrammar :: [SRule] -> SGrammar
mkSGrammar rules =
buildTree [(c, fillProb rs) | rs@((_,(_,c)):_) <- rules'] where
rules' =
groupBy (\x y -> scat x == scat y) $
sortBy (\x y -> compare (scat x) (scat y))
rules
scat (_,(_,c)) = c
pSRule :: String -> SRule
pSRule s = case words s of
p : f : c : cs ->
if isDigit (head p)
then ((read p, f),(init cs', last cs'))
else ((2.0, p),(init (c:cs'), last (c:cs'))) --- hack for automatic probability
where cs' = [cs !! i | i <- [0,2..length cs - 1]]
_ -> error $ "not a rule" +++ s
exSgr = mkSGrammar $ map pSRule [
"0.8 a : A"
,"0.2 b : A"
,"0.2 n : A -> S -> S"
,"0.8 e : S"
]
ex1 :: IO ()
ex1 = do
g <- newStdGen
mapM_ (putStrLn . prSTree) $ randomTrees g exSgr "S"

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/10/27 13:21:53 $
-- > CVS $Date: 2005/10/30 23:44:00 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.46 $
-- > CVS $Revision: 1.47 $
--
-- GF shell command interpreter.
-----------------------------------------------------------------------------
@@ -41,6 +41,8 @@ import GF.Shell.TeachYourself -- also a subshell
import GF.UseGrammar.Randomized ---
import GF.UseGrammar.Editing (goFirstMeta) ---
import GF.Probabilistic.Probabilistic
import GF.Compile.ShellState
import GF.Infra.Option
import GF.UseGrammar.Information
@@ -208,7 +210,8 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
CParse
---- | oElem showMulti opts -> do
| oElem byLines opts -> do
let ss = (if oElem showAll opts then id else filter (not . null)) $ lines $ prCommandArg a
let ss = (if oElem showAll opts then id else filter (not . null)) $
lines $ prCommandArg a
mts <- mapM parse ss
let a' = ATrms [t | (_,ATrms ts) <- mts, t <- ts]
changeArg (const a') sa
@@ -218,12 +221,29 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
warnDiscont opts
let p = optParseArgErrMsg opts gro x
case p of
Ok (ts,msg) -> putStrLnFlush msg >> changeArg (const $ ATrms ts) sa
Ok (ts,msg)
| isSetFlag opts probFile -> do
probs <- getProbsFromFile opts
let tps = rankByScore [(t,computeProbTree probs t) | t <- ts]
putStrLnFlush msg
mapM_ putStrLnFlush [show p +++ prt_ t | (t,p) <- tps]
changeArg (const $ ATrms (map fst tps)) sa
| otherwise -> putStrLnFlush msg >> changeArg (const $ ATrms ts) sa
Bad msg -> changeArg (const $ AError (msg +++ "input" +++ x)) sa
CTranslate il ol -> do
let a' = opST2CommandArg (optParseArgErr opts (sgr il)) a
returnArg (opTS2CommandArg (optLinearizeTreeVal opts (sgr ol)) a') sa
CGenerateRandom | isSetFlag opts probFile -> do
probs <- getProbsFromFile opts
let cat = firstAbsCat opts gro
let n = optIntOrN opts flagNumber 1
gen <- newStdGen
let ts = take n $ generateRandomTreesProb opts gen cgr probs cat
returnArg (ATrms (map (term2tree gro) ts)) sa
CGenerateRandom -> do
let
a' = case a of

View File

@@ -35,32 +35,28 @@ To make and install GF2 (Version 2.1, 8/11/2004).
You may need to run "make clean" after ./configure when you change the
readline setting.
5. Compile with make:
make
The binary is sent to the file GF/bin/gf
6. For a quick test: load a grammar package from
6. Move files to their right places:
http://www.cs.chalmers.se/~aarne/GF/download/gf-lib.tgz
make install
Open the package under GF-2.0
7. For a quick test:
Start gf, load a grammar and parse a string:
tar xvfz gf-lib.tgz
Then start gf, load a grammar and parse a string:
cd ../lib/basic
../../bin/gf English.gf
cd GF/grammars/basic
gf English.gf
-- when gf has started and shows the prompt >:
> p "every number is even or odd"
-- this is the response from GF2:
-- this is the response from GF:
PredA1 (Every Number) (DisjA1 Even Odd)
7. If you want to run the Java GUI, go back to GF2/src and also do
8. If you want to run the Java GUI, go back to GF2/src and also do
make install-java
@@ -68,11 +64,11 @@ To make and install GF2 (Version 2.1, 8/11/2004).
the value of GFHOME to the location of your GF2/bin directory. Test the
GUI with some grammars:
cd ../lib/letter
../../bin/gf <mkLetter.gfs
../../bin/jgf Letter.gfcm
cd ../examples/letter
gf <mkLetter.gfs
jgf Letter.gfcm
When the window opens, push the Random button.
Author: Aarne Ranta, 8/11/2004
Author: Aarne Ranta, Björn Bringert 8/11/2004 -- 18/10/2005