diff --git a/src/GF/Compile/ShellState.hs b/src/GF/Compile/ShellState.hs index 0c965f1f4..e00e2e477 100644 --- a/src/GF/Compile/ShellState.hs +++ b/src/GF/Compile/ShellState.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/09/20 09:32:56 $ +-- > CVS $Date: 2005/10/30 23:44:00 $ -- > CVS $Author: aarne $ --- > CVS $Revision: 1.48 $ +-- > CVS $Revision: 1.49 $ -- -- (Description of the module) ----------------------------------------------------------------------------- @@ -31,6 +31,7 @@ import GF.CF.CF import GF.CF.CFIdent import GF.CF.CanonToCF import GF.UseGrammar.Morphology +import GF.Probabilistic.Probabilistic import GF.Infra.Option import GF.Infra.Ident import GF.System.Arch (ModTime) @@ -57,6 +58,7 @@ data ShellState = ShSt { -- (large, with parameters, no-so overgenerating) pInfos :: [(Ident, Prs.PInfo)], -- ^ parsing information (compiled mcfg&cfg grammars) morphos :: [(Ident,Morpho)], -- ^ morphologies + probss :: [(Ident,Probs)], -- ^ probability distributions gloptions :: Options, -- ^ global options readFiles :: [(FilePath,ModTime)],-- ^ files read absCats :: [(G.Cat,(G.Context, @@ -86,6 +88,7 @@ emptyShellState = ShSt { cfgs = [], pInfos = [], morphos = [], + probss = [], gloptions = noOptions, readFiles = [], absCats = [], @@ -114,6 +117,7 @@ data StateGrammar = StGr { cfg :: Cnv.CGrammar, pInfo :: Prs.PInfo, morpho :: Morpho, + probs :: Probs, loptions :: Options } @@ -128,6 +132,7 @@ emptyStateGrammar = StGr { cfg = [], pInfo = Prs.buildPInfo [] [], morpho = emptyMorpho, + probs = emptyProbs, loptions = noOptions } @@ -140,6 +145,7 @@ stateMCFG :: StateGrammar -> Cnv.MGrammar stateCFG :: StateGrammar -> Cnv.CGrammar statePInfo :: StateGrammar -> Prs.PInfo stateMorpho :: StateGrammar -> Morpho +stateProbs :: StateGrammar -> Probs stateOptions :: StateGrammar -> Options stateGrammarWords :: StateGrammar -> [String] stateGrammarLang :: StateGrammar -> (CanonGrammar, Ident) @@ -151,6 +157,7 @@ stateMCFG = mcfg stateCFG = cfg statePInfo = pInfo stateMorpho = morpho +stateProbs = probs stateOptions = loptions stateGrammarWords = allMorphoWords . stateMorpho stateGrammarLang st = (grammar st, cncId st) @@ -190,6 +197,7 @@ updateShellState opts mcnc sh ((_,sgr,gr),rts) = do cfs <- mapM (canon2cf opts subcgr) concrs --- why need to update all... let morphos = map (mkMorpho subcgr) concrs + let probss = [] ----- let pinfosOld = map (CnvOld.pInfo opts cgr) concrs -- peb 18/6 (OBSOLETE) @@ -219,6 +227,7 @@ updateShellState opts mcnc sh ((_,sgr,gr),rts) = do cfgs = zip concrs cfgs, pInfos = zip concrs pInfos, morphos = zip concrs morphos, + probss = zip concrs probss, gloptions = gloptions sh, --- opts, -- this would be command-line options readFiles = [ft | ft@(f,_) <- readFiles sh, notInrts f] ++ rts, absCats = csi, @@ -280,6 +289,7 @@ purgeShellState sh = ShSt { cfgs = cfgs sh, pInfos = pInfos sh, morphos = morphos sh, + probss = probss sh, gloptions = gloptions sh, readFiles = [], absCats = absCats sh, @@ -291,15 +301,17 @@ purgeShellState sh = ShSt { acncs = maybe [] singleton (abstract sh) ++ map (snd . fst) (concretes sh) changeMain :: Maybe Ident -> ShellState -> Err ShellState -changeMain Nothing (ShSt _ _ cs ms ss cfs old_pis mcfgs cfgs pinfos mos os rs acs s) = - return (ShSt Nothing Nothing [] ms ss cfs old_pis mcfgs cfgs pinfos mos os rs acs s) -changeMain (Just c) st@(ShSt _ _ cs ms ss cfs old_pis mcfgs cfgs pinfos mos os rs acs s) = - case lookup c (M.modules ms) of +changeMain Nothing (ShSt _ _ cs ms ss cfs old_pis mcfgs cfgs pinfos mos pbs os rs acs s) = + return (ShSt Nothing Nothing [] ms ss cfs old_pis mcfgs cfgs pinfos mos pbs os rs acs s) +changeMain + (Just c) st@(ShSt _ _ cs ms ss cfs old_pis mcfgs cfgs pinfos mos pbs os rs acs s) = + case lookup c (M.modules ms) of Just _ -> do a <- M.abstractOfConcrete ms c let cas = M.allConcretes ms a let cs' = [((c,c),True) | c <- cas] - return (ShSt (Just a) (Just c) cs' ms ss cfs old_pis mcfgs cfgs pinfos mos os rs acs s) + return (ShSt (Just a) (Just c) cs' ms ss cfs old_pis mcfgs cfgs + pinfos mos pbs os rs acs s) _ -> P.prtBad "The state has no concrete syntax named" c -- | form just one state grammar, if unique, from a canonical grammar @@ -326,6 +338,7 @@ stateGrammarOfLang st l = StGr { cfg = maybe [] id $ lookup l $ cfgs st, pInfo = maybe (Prs.buildPInfo [] []) id $ lookup l $ pInfos st, morpho = maybe emptyMorpho id (lookup l (morphos st)), + probs = maybe emptyProbs id (lookup l (probss st)), loptions = errVal noOptions $ lookupOptionsCan can } where @@ -336,11 +349,13 @@ stateGrammarOfLang st l = StGr { grammarOfLang :: ShellState -> Language -> CanonGrammar cfOfLang :: ShellState -> Language -> CF morphoOfLang :: ShellState -> Language -> Morpho +probsOfLang :: ShellState -> Language -> Probs optionsOfLang :: ShellState -> Language -> Options grammarOfLang st = stateGrammarST . stateGrammarOfLang st cfOfLang st = stateCF . stateGrammarOfLang st morphoOfLang st = stateMorpho . stateGrammarOfLang st +probsOfLang st = stateProbs . stateGrammarOfLang st optionsOfLang st = stateOptions . stateGrammarOfLang st -- | the last introduced grammar, stored in options, is the default for operations @@ -363,6 +378,7 @@ stateAbstractGrammar st = StGr { cfg = [], pInfo = Prs.buildPInfo [] [], morpho = emptyMorpho, + probs = emptyProbs, loptions = gloptions st ---- } @@ -501,8 +517,9 @@ languageOff = languageOnOff False languageOnOff :: Bool -> Language -> ShellStateOper --- __________ this is OBSOLETE -languageOnOff b lang (ShSt a c cs cg sg cfs old_pinfos mcfgs cfgs pinfos ms os fs cats sts) = - ShSt a c cs' cg sg cfs old_pinfos mcfgs cfgs pinfos ms os fs cats sts where +languageOnOff b lang + (ShSt a c cs cg sg cfs old_pinfos mcfgs cfgs pinfos ms pbs os fs cats sts) = + ShSt a c cs' cg sg cfs old_pinfos mcfgs cfgs pinfos ms pbs os fs cats sts where cs' = [if lang==l then (lc,b) else i | i@(lc@(l,c),_) <- cs] {- @@ -521,13 +538,15 @@ removeLanguage lang (ShSt (ab,gs,os)) = ShSt (ab,removeAssoc lang gs, os) changeOptions :: (Options -> Options) -> ShellStateOper --- __________ this is OBSOLETE -changeOptions f (ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms os ff ts ss) = - ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms (f os) ff ts ss +changeOptions f + (ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms pbs os ff ts ss) = + ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms pbs (f os) ff ts ss changeModTimes :: [(FilePath,ModTime)] -> ShellStateOper --- __________ this is OBSOLETE -changeModTimes mfs (ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms os ff ts ss) = - ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms os ff' ts ss +changeModTimes mfs + (ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms pbs os ff ts ss) = + ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms pbs os ff' ts ss where ff' = mfs ++ [mf | mf@(f,_) <- ff, notElem f (map fst mfs)] diff --git a/src/GF/Infra/Option.hs b/src/GF/Infra/Option.hs index 6fb9f9f2e..bf364ba50 100644 --- a/src/GF/Infra/Option.hs +++ b/src/GF/Infra/Option.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/10/06 10:02:33 $ +-- > CVS $Date: 2005/10/30 23:44:00 $ -- > CVS $Author: aarne $ --- > CVS $Revision: 1.32 $ +-- > CVS $Revision: 1.33 $ -- -- Options and flags used in GF shell commands and files. -- @@ -62,6 +62,12 @@ getOptVal (Opts os) fopt = a:_ -> Just a _ -> Nothing +isSetFlag :: Options -> OptFun -> Bool +isSetFlag (Opts os) fopt = + case [a | opt@(Opt (o,[a])) <- os, opt == fopt a] of + a:_ -> True + _ -> False + getOptInt :: Options -> OptFun -> Maybe Int getOptInt opts f = do s <- getOptVal opts f @@ -304,6 +310,7 @@ noDepTypes = aOpt "nodeptypes" extractGr = aOpt "extract" pathList = aOpt "path" uniCoding = aOpt "coding" +probFile = aOpt "probs" -- peb 16/3-05: gfcConversion :: String -> Option diff --git a/src/GF/Probabilistic/Probabilistic.hs b/src/GF/Probabilistic/Probabilistic.hs new file mode 100644 index 000000000..bc69a1cf3 --- /dev/null +++ b/src/GF/Probabilistic/Probabilistic.hs @@ -0,0 +1,225 @@ +---------------------------------------------------------------------- +-- | +-- Module : Probabilistic +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/10/30 23:44:00 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.1 $ +-- +-- Probabilistic abstract syntax. AR 30\/10\/2005 +-- +-- (c) Aarne Ranta 2005 under GNU GPL +-- +-- Contents: parsing and random generation with probabilistic grammars. +-- To begin with, we use simple types and don't +-- guarantee the correctness of bindings\/dependences. +----------------------------------------------------------------------------- + +module GF.Probabilistic.Probabilistic ( + generateRandomTreesProb -- :: Options -> StdGen -> GFCGrammar -> Probs -> Cat -> [Exp] + ,checkGrammarProbs -- :: GFCGrammar -> Probs -> Err () + ,computeProbTree -- :: Probs -> Tree -> Double + ,rankByScore -- :: Ord n => [(a,n)] -> [(a,n)] + ,Probs -- = BinTree Ident Double + ,getProbsFromFile -- :: Opts -> IO Probs + ,emptyProbs -- :: Probs + ) where + +import GF.Canon.GFC +import GF.Grammar.LookAbs +import GF.Grammar.PrGrammar +import GF.Grammar.Macros +import GF.Grammar.Values +import GF.Grammar.Grammar (Cat) + +import GF.Infra.Ident +import GF.Data.Zipper +import GF.Data.Operations +import GF.Infra.Option + +import Data.Char +import Data.List +import Control.Monad +import System.Random + + +-- | generate an infinite list of trees, with their probabilities +generateRandomTreesProb :: Options -> StdGen -> GFCGrammar -> Probs -> Cat -> [Exp] +generateRandomTreesProb opts gen gr probs cat = + map str2tr $ randomTrees gen gr' cat' where + gr' = gr2sgr gr probs + cat' = prt $ snd cat + +-- | check that probabilities attached to a grammar make sense +checkGrammarProbs :: GFCGrammar -> Probs -> Err () +checkGrammarProbs gr probs = err Bad (const (return ())) $ checkSGrammar $ gr2sgr gr probs + +-- | compute the probability of a given tree +computeProbTree :: Probs -> Tree -> Double +computeProbTree probs (Tr (N (_,at,_,_,_),ts)) = case at of + AtC (_,f) -> case lookupTree prt f probs of + Ok p -> p * product (map prob ts) + _ -> product (map prob ts) + _ -> 1.0 ---- + where + prob = computeProbTree probs + +-- | rank from highest to lowest score, e.g. probability +rankByScore :: Ord n => [(a,n)] -> [(a,n)] +rankByScore = sortBy (\ (_,p) (_,q) -> compare q p) + +getProbsFromFile :: Options -> IO Probs +getProbsFromFile opts = do + s <- maybe (return "") readFile $ getOptVal opts probFile + return $ buildTree $ pProbs $ lines s + where + pProbs ss = [(zIdent f, read p) | s <- ss, [f,p] <- [words s]] + +type Probs = BinTree Ident Double + +emptyProbs :: Probs +emptyProbs = emptyBinTree + +------------------------------------------ +-- translate grammar to simpler form and generated trees back + +gr2sgr :: GFCGrammar -> Probs -> SGrammar +gr2sgr gr probs = buildTree [(c,{- fillProb -} rs) | rs@((_,(_,c)):_) <- rules] where + rules = + groupBy (\x y -> scat x == scat y) $ + sortBy (\x y -> compare (scat x) (scat y)) + [(trId f, ty') | (f,ty) <- funRulesOf gr, ty' <- trTy ty] + trId (_,f) = let f' = prt f in case lookupTree prt f probs of + Ok p -> (p,f') + _ -> (2.0, f') + trTy ty = case catSkeleton ty of + Ok (mcs,mc) -> [(map trCat mcs, trCat mc)] + _ -> [] + trCat (m,c) = prt c --- + scat (_,(_,c)) = c + +str2tr :: STree -> Exp +str2tr t = case t of + SApp ((_,f),ts) -> mkApp (trId f) (map str2tr ts) + SMeta _ -> mkMeta 0 +---- SString s -> K s + where + trId = cn . zIdent + +type SGrammar = BinTree SCat [SRule] +type SIdent = String +type SRule = (SFun,SType) +type SType = ([SCat],SCat) +type SCat = SIdent +type SFun = (Double,SIdent) + +allRules gr = concat [rs | (c,rs) <- tree2list gr] + +data STree = + SApp (SFun,[STree]) +-- | SAppN (SIdent,[STree]) -- no probability given + | SMeta SCat + | SString String + | SInt Int + deriving (Show,Eq) + +probTree :: STree -> Double +probTree t = case t of + SApp ((p,_),ts) -> p * product (map probTree ts) + _ -> 1 + +rankTrees :: [STree] -> [(STree,Double)] +rankTrees ts = sortBy (\ (_,p) (_,q) -> compare q p) [(t,probTree t) | t <- ts] + +randomTrees :: StdGen -> SGrammar -> SCat -> [STree] +randomTrees gen = genTrees (randomRs (0.0, 1.0) gen) + +genTrees :: [Double] -> SGrammar -> SCat -> [STree] +genTrees ds gr cat = + let (t,k) = genTree ds gr cat + in t : genTrees (drop k ds) gr cat + +genTree :: [Double] -> SGrammar -> SCat -> (STree,Int) +genTree rs gr = gett rs where + gett ds cat = let + d:ds2 = ds + (pf,args) = getf d cat + (ts,k) = getts ds2 args + in (SApp (pf,ts), k+1) + getf d cat = hitRegion d [(p,(pf,args)) | (pf@(p,_),(args,_)) <- look cat] + getts ds cats = case cats of + c:cs -> let + (t, k) = gett ds c + (ts,ks) = getts (drop k ds) cs + in (t:ts, k + ks) + _ -> ([],0) + look cat = errVal [] $ lookupTree id cat gr + +hitRegion :: Double -> [(Double,a)] -> a +hitRegion d vs = case vs of + (p1,v1):vs2 -> + if d < p1 then v1 else hitRegion d [(p+p1,v) | (p,v) <- vs2] + +--- this should recover from rounding errors + +checkSGrammar :: SGrammar -> Err SGrammar +checkSGrammar = mapMTree chCat where + chCat (c,rs) = case sum [p | ((p,f),_) <- rs] of + s | s /= 1.0 -> Bad $ "illegal probability sum " ++ show s ++ " in " ++ c + _ -> return (c,rs) + +-- for cases where explicit probability is not given (encoded as +-- p > 1) divide the remaining mass by the number of such cases + +fillProb :: [SRule] -> [SRule] +fillProb rs = [((defa p,f),ty) | ((p,f),ty) <- rs] where + defa p = if p > 1.0 then def else p + def = (1 - sum given) / genericLength nope + (nope,given) = partition (> 1.0) [p | ((p,_),_) <- rs] + + +------------------------------------------ +-- to test outside GF + +prSTree t = case t of + SApp ((p,f),ts) -> f ++ prParenth (show p) ++ concat (map pr1 ts) + SMeta c -> '?':c + SString s -> prQuotedString s + SInt i -> show i + where + pr1 t@(SApp (_,ts)) = ' ' : (if null ts then id else prParenth) (prSTree t) + pr1 t = prSTree t + + +mkSGrammar :: [SRule] -> SGrammar +mkSGrammar rules = + buildTree [(c, fillProb rs) | rs@((_,(_,c)):_) <- rules'] where + rules' = + groupBy (\x y -> scat x == scat y) $ + sortBy (\x y -> compare (scat x) (scat y)) + rules + scat (_,(_,c)) = c + +pSRule :: String -> SRule +pSRule s = case words s of + p : f : c : cs -> + if isDigit (head p) + then ((read p, f),(init cs', last cs')) + else ((2.0, p),(init (c:cs'), last (c:cs'))) --- hack for automatic probability + where cs' = [cs !! i | i <- [0,2..length cs - 1]] + _ -> error $ "not a rule" +++ s + +exSgr = mkSGrammar $ map pSRule [ + "0.8 a : A" + ,"0.2 b : A" + ,"0.2 n : A -> S -> S" + ,"0.8 e : S" + ] + +ex1 :: IO () +ex1 = do + g <- newStdGen + mapM_ (putStrLn . prSTree) $ randomTrees g exSgr "S" diff --git a/src/GF/Shell.hs b/src/GF/Shell.hs index d20601844..4a214bd4c 100644 --- a/src/GF/Shell.hs +++ b/src/GF/Shell.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/10/27 13:21:53 $ +-- > CVS $Date: 2005/10/30 23:44:00 $ -- > CVS $Author: aarne $ --- > CVS $Revision: 1.46 $ +-- > CVS $Revision: 1.47 $ -- -- GF shell command interpreter. ----------------------------------------------------------------------------- @@ -41,6 +41,8 @@ import GF.Shell.TeachYourself -- also a subshell import GF.UseGrammar.Randomized --- import GF.UseGrammar.Editing (goFirstMeta) --- +import GF.Probabilistic.Probabilistic + import GF.Compile.ShellState import GF.Infra.Option import GF.UseGrammar.Information @@ -208,7 +210,8 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com CParse ---- | oElem showMulti opts -> do | oElem byLines opts -> do - let ss = (if oElem showAll opts then id else filter (not . null)) $ lines $ prCommandArg a + let ss = (if oElem showAll opts then id else filter (not . null)) $ + lines $ prCommandArg a mts <- mapM parse ss let a' = ATrms [t | (_,ATrms ts) <- mts, t <- ts] changeArg (const a') sa @@ -218,12 +221,29 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com warnDiscont opts let p = optParseArgErrMsg opts gro x case p of - Ok (ts,msg) -> putStrLnFlush msg >> changeArg (const $ ATrms ts) sa + Ok (ts,msg) + | isSetFlag opts probFile -> do + probs <- getProbsFromFile opts + let tps = rankByScore [(t,computeProbTree probs t) | t <- ts] + putStrLnFlush msg + mapM_ putStrLnFlush [show p +++ prt_ t | (t,p) <- tps] + changeArg (const $ ATrms (map fst tps)) sa + | otherwise -> putStrLnFlush msg >> changeArg (const $ ATrms ts) sa Bad msg -> changeArg (const $ AError (msg +++ "input" +++ x)) sa CTranslate il ol -> do let a' = opST2CommandArg (optParseArgErr opts (sgr il)) a returnArg (opTS2CommandArg (optLinearizeTreeVal opts (sgr ol)) a') sa + + + CGenerateRandom | isSetFlag opts probFile -> do + probs <- getProbsFromFile opts + let cat = firstAbsCat opts gro + let n = optIntOrN opts flagNumber 1 + gen <- newStdGen + let ts = take n $ generateRandomTreesProb opts gen cgr probs cat + returnArg (ATrms (map (term2tree gro) ts)) sa + CGenerateRandom -> do let a' = case a of diff --git a/src/INSTALL b/src/INSTALL index 436aacb65..da4757135 100644 --- a/src/INSTALL +++ b/src/INSTALL @@ -35,32 +35,28 @@ To make and install GF2 (Version 2.1, 8/11/2004). You may need to run "make clean" after ./configure when you change the readline setting. - 5. Compile with make: make The binary is sent to the file GF/bin/gf -6. For a quick test: load a grammar package from +6. Move files to their right places: - http://www.cs.chalmers.se/~aarne/GF/download/gf-lib.tgz + make install - Open the package under GF-2.0 +7. For a quick test: + Start gf, load a grammar and parse a string: - tar xvfz gf-lib.tgz - - Then start gf, load a grammar and parse a string: - - cd ../lib/basic - ../../bin/gf English.gf + cd GF/grammars/basic + gf English.gf -- when gf has started and shows the prompt >: > p "every number is even or odd" - -- this is the response from GF2: + -- this is the response from GF: PredA1 (Every Number) (DisjA1 Even Odd) -7. If you want to run the Java GUI, go back to GF2/src and also do +8. If you want to run the Java GUI, go back to GF2/src and also do make install-java @@ -68,11 +64,11 @@ To make and install GF2 (Version 2.1, 8/11/2004). the value of GFHOME to the location of your GF2/bin directory. Test the GUI with some grammars: - cd ../lib/letter - ../../bin/gf