1
0
forked from GitHub/gf-core

the first revision of exhaustive and random generation with dependent types. Still not quite stable.

This commit is contained in:
krasimir
2010-09-22 15:49:16 +00:00
parent bc92927692
commit 4e715c3952
7 changed files with 267 additions and 151 deletions

View File

@@ -312,8 +312,12 @@ allCommands env@(pgf, mos) = Map.fromList [
let pgfr = optRestricted opts
gen <- newStdGen
mprobs <- optProbs opts pgfr
let mt = mexp xs
ts <- return $ generateRandomFrom mt mprobs gen pgfr (optType opts)
let sel = case mprobs of
Just probs -> WeightSel gen probs
Nothing -> RandSel gen
let ts = case mexp xs of
Just ex -> generateRandomFrom sel pgfr ex
Nothing -> generateRandom sel pgfr (optType opts)
returnFromExprs $ take (optNum opts) ts
}),
("gt", emptyCommandInfo {
@@ -339,9 +343,10 @@ allCommands env@(pgf, mos) = Map.fromList [
],
exec = \opts xs -> do
let pgfr = optRestricted opts
let dp = return $ valIntOpts "depth" 4 opts
let mt = mexp xs
let ts = generateAllDepth mt pgfr (optType opts) dp
let dp = valIntOpts "depth" 4 opts
let ts = case mexp xs of
Just ex -> generateFromDepth pgfr ex (Just dp)
Nothing -> generateAllDepth pgfr (optType opts) (Just dp)
returnFromExprs $ take (optNumInf opts) ts
}),
("h", emptyCommandInfo {

View File

@@ -42,7 +42,12 @@ translationList ::
PGF -> Language -> Language -> Type -> Int -> IO [(String,[String])]
translationList mex mprobs pgf ig og typ number = do
gen <- newStdGen
let ts = take number $ generateRandomFrom mex mprobs gen pgf typ
let sel = case mprobs of
Just probs -> WeightSel gen probs
Nothing -> RandSel gen
let ts = take number $ case mex of
Just ex -> generateRandomFrom sel pgf ex
Nothing -> generateRandom sel pgf typ
return $ map mkOne $ ts
where
mkOne t = (norml (linearize pgf ig t), map (norml . linearize pgf og) (homonyms t))
@@ -53,7 +58,12 @@ morphologyList ::
PGF -> Language -> Type -> Int -> IO [(String,[String])]
morphologyList mex mprobs pgf ig typ number = do
gen <- newStdGen
let ts = take (max 1 number) $ generateRandomFrom mex mprobs gen pgf typ
let sel = case mprobs of
Just probs -> WeightSel gen probs
Nothing -> RandSel gen
let ts = take (max 1 number) $ case mex of
Just ex -> generateRandomFrom sel pgf ex
Nothing -> generateRandom sel pgf typ
let ss = map (tabularLinearizes pgf ig) ts
let size = length (head (head ss))
let forms = take number $ randomRs (0,size-1) gen

View File

@@ -85,8 +85,28 @@ module PGF(
Parse.ParseOutput(..), Parse.getParseOutput,
-- ** Generation
generateRandom, generateAll, generateAllDepth,
generateRandomFrom, -- from initial expression, possibly weighed
-- | The PGF interpreter allows automatic generation of
-- abstract syntax expressions of a given type. Since the
-- type system of GF allows dependent types, the generation
-- is in general undecidable. In fact, the set of all type
-- signatures in the grammar is equivalent to a Turing-complete language (Prolog).
--
-- There are several generation methods which mainly differ in:
--
-- * whether the expressions are sequentially or randomly generated?
--
-- * are they generated from a template? The template is an expression
-- containing meta variables which the generator will fill in.
--
-- * is there a limit of the depth of the expression?
-- The depth can be used to limit the search space, which
-- in some cases is the only way to make the search decidable.
generateAll, generateAllDepth,
generateFrom, generateFromDepth,
generateRandom, generateRandomDepth,
generateRandomFrom, generateRandomFromDepth,
RandomSelector(..),
-- ** Morphological Analysis
Lemma, Analysis, Morpho,
@@ -169,20 +189,6 @@ parse_ :: PGF -> Language -> Type -> String -> (Parse.ParseOutput,Brackete
-- | This is an experimental function. Use it on your own risk
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> (Parse.ParseOutput,BracketedString)
-- | The same as 'generateAllDepth' but does not limit
-- the depth in the generation, and doesn't give an initial expression.
generateAll :: PGF -> Type -> [Expr]
-- | Generates an infinite list of random abstract syntax expressions.
-- This is usefull for tree bank generation which after that can be used
-- for grammar testing.
generateRandom :: PGF -> Type -> IO [Expr]
-- | Generates an exhaustive possibly infinite list of
-- abstract syntax expressions. A depth can be specified
-- to limit the search space.
generateAllDepth :: Maybe Expr -> PGF -> Type -> Maybe Int -> [Expr]
-- | List of all languages available in the given grammar.
languages :: PGF -> [Language]
@@ -246,13 +252,6 @@ groupResults = Map.toList . foldr more Map.empty . start . concat
more (l,s) =
Map.insertWith (\ [x] xs -> if elem x xs then xs else (x : xs)) l s
generateRandom pgf cat = do
gen <- newStdGen
return $ genRandom gen pgf cat
generateAll pgf cat = generate pgf cat Nothing
generateAllDepth mex pgf cat = generateAllFrom mex pgf cat
abstractName pgf = absname pgf
languages pgf = Map.keys (concretes pgf)

View File

@@ -1,26 +1,68 @@
module PGF.Generate where
module PGF.Generate
( generateAll, generateAllDepth
, generateFrom, generateFromDepth
, generateRandom, generateRandomDepth
, generateRandomFrom, generateRandomFromDepth
, RandomSelector(..)
) where
import PGF.CId
import PGF.Data
import PGF.Expr
import PGF.Macros
import PGF.TypeCheck
import PGF.Probabilistic
import qualified Data.Map as M
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Control.Monad
import System.Random
-- generate all fillings of metavariables in an expr
generateAllFrom :: Maybe Expr -> PGF -> Type -> Maybe Int -> [Expr]
generateAllFrom mex pgf ty mi =
maybe (gen ty) (generateForMetas False pgf gen) mex where
gen ty = generate pgf ty mi
-- | Generates an exhaustive possibly infinite list of
-- abstract syntax expressions.
generateAll :: PGF -> Type -> [Expr]
generateAll pgf ty = generateAllDepth pgf ty Nothing
-- | A variant of 'generateAll' which also takes as argument
-- the upper limit of the depth of the generated expression.
generateAllDepth :: PGF -> Type -> Maybe Int -> [Expr]
generateAllDepth pgf ty dp = generate () pgf ty dp
-- | Generates a list of abstract syntax expressions
-- in a way similar to 'generateAll' but instead of
-- generating all instances of a given type, this
-- function uses a template.
generateFrom :: PGF -> Expr -> [Expr]
generateFrom pgf ex = generateFromDepth pgf ex Nothing
-- | A variant of 'generateFrom' which also takes as argument
-- the upper limit of the depth of the generated subexpressions.
generateFromDepth :: PGF -> Expr -> Maybe Int -> [Expr]
generateFromDepth pgf e dp = generateForMetas False pgf (\ty -> generateAllDepth pgf ty dp) e
-- | Generates an infinite list of random abstract syntax expressions.
-- This is usefull for tree bank generation which after that can be used
-- for grammar testing.
generateRandom :: RandomGen g => RandomSelector g -> PGF -> Type -> [Expr]
generateRandom sel pgf ty =
generate sel pgf ty Nothing
-- | A variant of 'generateRandom' which also takes as argument
-- the upper limit of the depth of the generated expression.
generateRandomDepth :: RandomGen g => RandomSelector g -> PGF -> Type -> Maybe Int -> [Expr]
generateRandomDepth sel pgf ty dp = generate sel pgf ty dp
-- | Random generation based on template
generateRandomFrom :: RandomGen g => RandomSelector g -> PGF -> Expr -> [Expr]
generateRandomFrom sel pgf e =
generateForMetas True pgf (\ty -> generate sel pgf ty Nothing) e
-- | Random generation based on template with a limitation in the depth.
generateRandomFromDepth :: RandomGen g => RandomSelector g -> PGF -> Expr -> Maybe Int -> [Expr]
generateRandomFromDepth sel pgf e dp =
generateForMetas True pgf (\ty -> generate sel pgf ty dp) e
-- generate random fillings of metavariables in an expr
generateRandomFrom :: Maybe Expr ->
Maybe Probabilities -> StdGen -> PGF -> Type -> [Expr]
generateRandomFrom mex ps rg pgf ty =
maybe (gen ty) (generateForMetas True pgf gen) mex where
gen ty = genRandomProb ps rg pgf ty
-- generic algorithm for filling holes in a generator
@@ -38,80 +80,144 @@ generateForMetas breadth pgf gen exp = case exp of
Right (_,DTyp ((_,_,ty):_) _ _) -> gen ty
_ -> []
-- generate an infinite list of trees exhaustively
generate :: PGF -> Type -> Maybe Int -> [Expr]
generate pgf ty@(DTyp _ cat _) dp = filter (\e -> case checkExpr pgf e ty of
Left _ -> False
Right _ -> True )
(concatMap (\i -> gener i cat) depths)
where
gener 0 c = [EFun f | (f, ([],_)) <- fns c]
gener i c = [
tr |
(f, (cs,_)) <- fns c, not (null cs),
let alts = map (gener (i-1)) cs,
ts <- combinations alts,
let tr = foldl EApp (EFun f) ts,
depth tr >= i
]
fns c = [(f,catSkeleton ty) | (f,ty) <- functionsToCat pgf c]
depths = maybe [0 ..] (\d -> [0..d]) dp
-- generate an infinite list of trees randomly
genRandom :: StdGen -> PGF -> Type -> [Expr]
genRandom = genRandomProb Nothing
------------------------------------------------------------------------------
-- The main generation algorithm
genRandomProb :: Maybe Probabilities -> StdGen -> PGF -> Type -> [Expr]
genRandomProb mprobs gen pgf ty@(DTyp _ cat _) =
filter (\e -> case checkExpr pgf e ty of
Left _ -> False
Right _ -> True )
(genTrees (randomRs (0.0, 1.0 :: Double) gen) cat)
where
timeout = 47 -- give up
generate :: Selector sel => sel -> PGF -> Type -> Maybe Int -> [Expr]
generate sel pgf ty dp =
[value2expr (funs (abstract pgf),lookupMeta ms) 0 v |
(ms,v) <- runGenM (prove (abstract pgf) emptyScope (TTyp [] ty) dp) sel IntMap.empty]
genTrees ds0 cat =
let (ds,ds2) = splitAt (timeout+1) ds0 -- for time out, else ds
(t,k) = genTree ds cat
in (if k>timeout then id else (t:))
(genTrees ds2 cat) -- else (drop k ds)
prove :: Selector sel => Abstr -> Scope -> TType -> Maybe Int -> GenM sel MetaStore Value
prove abs scope tty@(TTyp env (DTyp [] cat es)) dp = do
(fn,DTyp hypos cat es) <- clauses cat
case dp of
Just 0 | not (null hypos) -> mzero
_ -> return ()
(env,args) <- mkEnv [] hypos
runTcM abs (eqType scope (scopeSize scope) 0 (TTyp env (DTyp [] cat es)) tty)
vs <- mapM descend args
return (VApp fn vs)
where
clauses cat =
do fn <- select abs cat
case Map.lookup fn (funs abs) of
Just (ty,_,_) -> return (fn,ty)
Nothing -> mzero
genTree rs = gett rs where
gett ds cid | cid == cidString = (ELit (LStr "foo"), 1)
gett ds cid | cid == cidInt = (ELit (LInt 12345), 1)
gett ds cid | cid == cidFloat = (ELit (LFlt 12345), 1)
gett [] _ = (ELit (LStr "TIMEOUT"), 1) ----
gett ds cat = case fns cat of
[] -> (EMeta 0,1)
fs -> let
d:ds2 = ds
(f,args) = getf d fs
(ts,k) = getts ds2 args
in (foldl EApp f ts, k+1)
getf d fs = case mprobs of
Just _ -> hitRegion d [(p,(f,args)) | (p,(f,args)) <- fs]
_ -> let
lg = length fs
(f,v) = snd (fs !! (floor (d * fromIntegral lg)))
in (EFun f,v)
getts ds cats = case cats of
c:cs -> let
(t, k) = gett ds c
(ts,ks) = getts (drop k ds) cs
in (t:ts, k + ks)
_ -> ([],0)
mkEnv env [] = return (env,[])
mkEnv env ((bt,x,ty):hypos) = do
(env,arg) <- if x /= wildCId
then do i <- runTcM abs (newMeta scope (TTyp env ty))
let v = VMeta i env []
return (v : env,Right v)
else return (env,Left (TTyp env ty))
(env,args) <- mkEnv env hypos
return (env,(bt,arg):args)
fns :: CId -> [(Double,(CId,[CId]))]
fns cat = case mprobs of
Just probs -> maybe [] id $ M.lookup cat (catProbs probs)
_ -> [(deflt,(f,(fst (catSkeleton ty)))) |
let fs = functionsToCat pgf cat,
(f,ty) <- fs,
let deflt = 1.0 / fromIntegral (length fs)]
hitRegion :: Double -> [(Double,(CId,[a]))] -> (Expr,[a])
hitRegion d vs = case vs of
(p1,(f,v1)):vs2 -> if d < p1 then (EFun f, v1) else hitRegion (d-p1) vs2
_ -> (EMeta 9,[])
descend (bt,arg) = do let dp' = fmap (flip (-) 1) dp
v <- case arg of
Right v -> return v
Left tty -> prove abs scope tty dp'
v <- case bt of
Implicit -> return (VImplArg v)
Explicit -> return v
return v
------------------------------------------------------------------------------
-- Generation Monad
newtype GenM sel s a = GenM {unGen :: sel -> s -> Choice sel s a}
data Choice sel s a = COk sel s a
| CFail
| CBranch (Choice sel s a) (Choice sel s a)
instance Monad (GenM sel s) where
return x = GenM (\sel s -> COk sel s x)
f >>= g = GenM (\sel s -> iter (unGen f sel s))
where
iter (COk sel s x) = unGen (g x) sel s
iter (CBranch b1 b2) = CBranch (iter b1) (iter b2)
iter CFail = CFail
fail _ = GenM (\sel s -> CFail)
instance Selector sel => MonadPlus (GenM sel s) where
mzero = GenM (\sel s -> CFail)
mplus f g = GenM (\sel s -> let (sel1,sel2) = splitSelector sel
in CBranch (unGen f sel1 s) (unGen g sel2 s))
runGenM :: GenM sel s a -> sel -> s -> [(s,a)]
runGenM f sel s = toList (unGen f sel s) []
where
toList (COk sel s x) xs = (s,x) : xs
toList (CFail) xs = xs
toList (CBranch b1 b2) xs = toList b1 (toList b2 xs)
runTcM :: Abstr -> TcM a -> GenM sel MetaStore a
runTcM abs f = GenM (\sel ms ->
case unTcM f abs ms of
Ok ms a -> COk sel ms a
Fail _ -> CFail)
------------------------------------------------------------------------------
-- Selectors
class Selector sel where
splitSelector :: sel -> (sel,sel)
select :: Abstr -> CId -> GenM sel s CId
instance Selector () where
splitSelector sel = (sel,sel)
select abs cat = GenM (\sel s -> case Map.lookup cat (cats abs) of
Just (_,fns) -> iter s fns
Nothing -> CFail)
where
iter s [] = CFail
iter s (fn:fns) = CBranch (COk () s fn) (iter s fns)
-- | The random selector data type is used to specify the random number generator
-- and the distribution among the functions with the same result category.
-- The distribution is even for 'RandSel' and weighted for 'WeightSel'.
data RandomSelector g = RandSel g
| WeightSel g Probabilities
instance RandomGen g => Selector (RandomSelector g) where
splitSelector (RandSel g) = let (g1,g2) = split g
in (RandSel g1, RandSel g2)
splitSelector (WeightSel g probs) = let (g1,g2) = split g
in (WeightSel g1 probs, WeightSel g2 probs)
select abs cat = GenM (\sel s -> case sel of
RandSel g -> case Map.lookup cat (cats abs) of
Just (_,fns) -> do_rand g s (length fns) fns
Nothing -> CFail
WeightSel g probs -> case Map.lookup cat (catProbs probs) of
Just fns -> do_weight g s 1.0 fns
Nothing -> CFail)
where
do_rand g s n [] = CFail
do_rand g s n fns = let n' = n-1
(i,g') = randomR (0,n') g
(g1,g2) = split g'
(fn,fns') = pick i fns
in CBranch (COk (RandSel g1) s fn) (do_rand g2 s n' fns')
do_weight g s p [] = CFail
do_weight g s p fns = let (d,g') = randomR (0.0,p) g
(g1,g2) = split g'
(p',fn,fns') = hit d fns
in CBranch (COk (RandSel g1) s fn) (do_weight g2 s (p-p') fns')
pick :: Int -> [a] -> (a,[a])
pick 0 (x:xs) = (x,xs)
pick n (x:xs) = let (x',xs') = pick (n-1) xs
in (x',x:xs')
hit :: Double -> [(Double,a)] -> (Double,a,[(Double,a)])
hit d (px@(p,x):xs)
| d < p = (p,x,xs)
| otherwise = let (p',x',xs') = hit (d-p) xs
in (p,x',px:xs')

View File

@@ -13,19 +13,20 @@ import PGF.CId
import PGF.Data
import PGF.Macros
import qualified Data.Map as M
import qualified Data.Map as Map
import Data.List (sortBy,partition)
import Data.Maybe (fromMaybe)
-- | An abstract data structure which represents
-- the probabilities for the different functions in a grammar.
data Probabilities = Probs {
funProbs :: M.Map CId Double,
catProbs :: M.Map CId [(Double, (CId,[CId]))] -- prob and arglist
funProbs :: Map.Map CId Double,
catProbs :: Map.Map CId [(Double, CId)]
}
-- | Renders the probability structure as string
showProbabilities :: Probabilities -> String
showProbabilities = unlines . map pr . M.toList . funProbs where
showProbabilities = unlines . map pr . Map.toList . funProbs where
pr (f,d) = showCId f ++ "\t" ++ show d
-- | Reads the probabilities from a file.
@@ -36,43 +37,38 @@ showProbabilities = unlines . map pr . M.toList . funProbs where
readProbabilitiesFromFile :: FilePath -> PGF -> IO Probabilities
readProbabilitiesFromFile file pgf = do
s <- readFile file
let ps0 = M.fromList [(mkCId f,read p) | f:p:_ <- map words (lines s)]
let ps0 = Map.fromList [(mkCId f,read p) | f:p:_ <- map words (lines s)]
return $ mkProbabilities pgf ps0
-- | Builds probability tables by filling unspecified funs with probability sum
--
-- TODO: check that probabilities sum to 1
mkProbabilities :: PGF -> M.Map CId Double -> Probabilities
mkProbabilities pgf funs =
let
cats0 = [(cat,[(f,fst (catSkeleton ty)) | (f,ty) <- fs])
| (cat,_) <- M.toList (cats (abstract pgf)),
let fs = functionsToCat pgf cat]
cats1 = map fill cats0
funs1 = [(f,p) | (_,cf) <- cats1, (p,(f,_)) <- cf]
in Probs (M.fromList funs1) (M.fromList cats1)
where
fill (cat,fs) = (cat, pad [(getProb0 f,(f,xs)) | (f,xs) <- fs])
where
getProb0 :: CId -> Double
getProb0 f = maybe (-1) id $ M.lookup f funs
pad :: [(Double,a)] -> [(Double,a)]
pad pfs = [(if p== -1 then deflt else p,f) | (p,f) <- pfs]
where
deflt = case length negs of
0 -> 0
_ -> (1 - sum poss) / fromIntegral (length negs)
(poss,negs) = partition (> (-0.5)) (map fst pfs)
-- | Builds probability tables. The second argument is a map
-- which contains the know probabilities. If some function is
-- not in the map then it gets assigned some probability based
-- on the even distribution of the unallocated probability mass
-- for the result category.
mkProbabilities :: PGF -> Map.Map CId Double -> Probabilities
mkProbabilities pgf probs =
let funs1 = Map.fromList [(f,p) | (_,cf) <- Map.toList cats1, (p,f) <- cf]
cats1 = Map.map (\(_,fs) -> fill fs) (cats (abstract pgf))
in Probs funs1 cats1
where
fill fs = pad [(Map.lookup f probs,f) | f <- fs]
where
pad :: [(Maybe Double,a)] -> [(Double,a)]
pad pfs = [(fromMaybe deflt mb_p,f) | (mb_p,f) <- pfs]
where
deflt = case length [f | (Nothing,f) <- pfs] of
0 -> 0
n -> (1 - sum [d | (Just d,f) <- pfs]) / fromIntegral n
-- | Returns the default even distibution.
defaultProbabilities :: PGF -> Probabilities
defaultProbabilities pgf = mkProbabilities pgf M.empty
defaultProbabilities pgf = mkProbabilities pgf Map.empty
-- | compute the probability of a given tree
probTree :: Probabilities -> Expr -> Double
probTree probs t = case t of
EApp f e -> probTree probs f * probTree probs e
EFun f -> maybe 1 id $ M.lookup f (funProbs probs)
EFun f -> maybe 1 id $ Map.lookup f (funProbs probs)
_ -> 1
-- | rank from highest to lowest probability

View File

@@ -78,11 +78,11 @@ pHypoBinds = do
ty <- pType
return [(b,v,ty) | (b,v) <- xs]
pAtom = do
cat <- pCId
RP.skipSpaces
args <- RP.sepBy pArg RP.skipSpaces
return (cat, args)
pAtom = do
cat <- pCId
RP.skipSpaces
args <- RP.sepBy pArg RP.skipSpaces
return (cat, args)
ppType :: Int -> [CId] -> Type -> PP.Doc
ppType d scope (DTyp hyps cat args)

View File

@@ -21,7 +21,7 @@ module PGF.TypeCheck ( checkType, checkExpr, inferExpr
, TcM(..), TcResult(..), TType(..), tcError
, tcExpr, infExpr, eqType
, lookupFunType, eval
, refineExpr, checkResolvedMetaStore
, refineExpr, checkResolvedMetaStore, lookupMeta
) where
import PGF.Data
@@ -431,7 +431,7 @@ eqType scope k i0 tty1@(TTyp delta1 ty1@(DTyp hyps1 cat1 es1)) tty2@(TTyp delta2
MUnbound scopei _ cs -> do e1 <- mkLam i scopei env2 vs2 v1
setMeta i (MBound e1)
sequence_ [c e1 | c <- cs]
MGuarded e cs x -> setMeta i (MGuarded e ((\e -> apply env2 e vs2 >>= \v2 -> eqValue' k v1 v2) : cs) x)
MGuarded e cs x -> setMeta i (MGuarded e ((\e -> apply env2 e vs2 >>= \v2 -> eqValue' k v1 v2) : cs) x)
eqValue' k (VApp f1 vs1) (VApp f2 vs2) | f1 == f2 = zipWithM_ (eqValue k) vs1 vs2
eqValue' k (VConst f1 vs1) (VConst f2 vs2) | f1 == f2 = zipWithM_ (eqValue k) vs1 vs2
eqValue' k (VLit l1) (VLit l2 ) | l1 == l2 = return ()