added random generation

This commit is contained in:
Krasimir Angelov
2023-03-05 13:18:14 +01:00
parent 97bb8ae3f6
commit ee717fb022
8 changed files with 202 additions and 11 deletions

View File

@@ -41,8 +41,8 @@ translationList mex pgf ig og typ number = do
Nothing -> generateRandom gen pgf typ
return $ map mkOne $ ts
where
mkOne t = (norml (linearize ig t),
map norml (concatMap lins (homonyms t)))
mkOne (t,p) = (norml (linearize ig t),
map norml (concatMap lins (homonyms t)))
homonyms t =
case (parse ig typ . linearize ig) t of
ParseOk res -> map fst res
@@ -56,7 +56,7 @@ morphologyList mex pgf ig typ number = do
let ts = take (max 1 number) $ case mex of
Just ex -> generateRandomFrom gen pgf ex
Nothing -> generateRandom gen pgf typ
let ss = map (tabularLinearizeAll ig) ts
let ss = map (tabularLinearizeAll ig . fst) ts
let size = length (head (head ss))
let forms = take number $ randomRs (0,size-1) gen
return [(snd (head pws0) +++ fst (pws0 !! i), ws) |

View File

@@ -40,7 +40,9 @@ libpgf_la_SOURCES = \
pgf/phrasetable.cxx \
pgf/phrasetable.h \
pgf/probspace.cxx \
pgf/probspace.h
pgf/probspace.h \
pgf/generator.cxx \
pgf/generator.h
libpgf_la_LDFLAGS = -no-undefined -version-info 4:0:0
libpgf_la_CXXFLAGS = -fno-rtti -std=c++11 -DCOMPILING_PGF

View File

@@ -1,6 +1,7 @@
#include <fcntl.h>
#include <math.h>
#include <errno.h>
#include <limits.h>
#ifdef _WIN32
#include <sys\stat.h>
#endif
@@ -14,6 +15,7 @@
#include "parser.h"
#include "graphviz.h"
#include "aligner.h"
#include "generator.h"
static void
pgf_exn_clear(PgfExn* err)
@@ -1190,6 +1192,61 @@ void pgf_check_type(PgfDB *db, PgfRevision revision,
} PGF_API_END
}
PGF_API
PgfExpr pgf_generate_random(PgfDB *db, PgfRevision revision,
PgfType type, size_t depth,
uint64_t *seed, prob_t *prob,
PgfMarshaller *m, PgfUnmarshaller *u,
PgfExn *err)
{
PGF_API_BEGIN {
DB_scope scope(db, READER_SCOPE);
ref<PgfPGF> pgf = db->revision2pgf(revision);
// Generation may fail for certain random choices, but succeed
// for others. We try 10 time to increase the chance of succeess.
for (size_t i = 0; i < 10; i++) {
PgfRandomGenerator gen(pgf, depth, seed, m, u);
PgfExpr expr = m->match_type(&gen, type);
if (expr != 0) {
*prob = gen.getProb();
return expr;
}
}
} PGF_API_END
return 0;
}
PGF_API
PgfExpr pgf_generate_random_from
(PgfDB *db, PgfRevision revision,
PgfExpr expr, size_t depth,
uint64_t *seed, prob_t *prob,
PgfMarshaller *m, PgfUnmarshaller *u,
PgfExn *err)
{
PGF_API_BEGIN {
DB_scope scope(db, READER_SCOPE);
ref<PgfPGF> pgf = db->revision2pgf(revision);
// Generation may fail for certain random choices, but succeed
// for others. We try 10 time to increase the chance of succeess.
for (size_t i = 0; i < 10; i++) {
PgfRandomGenerator gen(pgf, depth, seed, m, u);
PgfExpr new_expr = m->match_expr(&gen, expr);
if (new_expr != 0) {
*prob = gen.getProb();
return new_expr;
}
}
} PGF_API_END
return 0;
}
PGF_API
PgfRevision pgf_start_transaction(PgfDB *db, PgfExn *err)
{

View File

@@ -509,6 +509,21 @@ void pgf_check_type(PgfDB *db, PgfRevision revision,
PgfMarshaller *m, PgfUnmarshaller *u,
PgfExn *err);
PGF_API_DECL
PgfExpr pgf_generate_random(PgfDB *db, PgfRevision revision,
PgfType type, size_t depth,
uint64_t *seed, prob_t *prob,
PgfMarshaller *m, PgfUnmarshaller *u,
PgfExn *err);
PGF_API_DECL
PgfExpr pgf_generate_random_from
(PgfDB *db, PgfRevision revision,
PgfExpr expr, size_t depth,
uint64_t *seed, prob_t *prob,
PgfMarshaller *m, PgfUnmarshaller *u,
PgfExn *err);
PGF_API_DECL
PgfRevision pgf_start_transaction(PgfDB *db, PgfExn *err);

View File

@@ -1,4 +1,5 @@
#include "data.h"
#include "math.h"
static
int entry_cmp(PgfProbspaceEntry *entry1, PgfProbspaceEntry *entry2)
@@ -199,6 +200,52 @@ void probspace_iter(PgfProbspace space, PgfText *cat,
}
}
static
ref<PgfAbsFun> probspace_random(PgfProbspace space,
PgfText *cat, prob_t *rand,
bool is_last)
{
if (space == 0)
return 0;
int cmp = textcmp(cat,&(*space->value.cat));
if (cmp < 0) {
return probspace_random(space->left, cat, rand, true);
} else if (cmp > 0) {
return probspace_random(space->right, cat, rand, true);
} else {
ref<PgfAbsFun> fun;
fun = probspace_random(space->left, cat, rand, false);
if (fun != 0)
return fun;
bool is_res = (space->value.cat == ref<PgfText>::from_ptr(&space->value.fun->type->name));
if (is_res) {
*rand -= exp(-space->value.fun->prob);
if (*rand <= 0)
return space->value.fun;
}
fun = probspace_random(space->right, cat, rand, is_last);
if (fun != 0)
return fun;
if (is_last && is_res) {
// necessary due to floating point rounding
return space->value.fun;
}
}
return 0;
}
PGF_INTERNAL
ref<PgfAbsFun> probspace_random(PgfProbspace space,
PgfText *cat, prob_t rand)
{
return probspace_random(space,cat,&rand,true);
}
PGF_INTERNAL
void probspace_release(PgfProbspace space)
{

View File

@@ -67,6 +67,12 @@ PGF_INTERNAL_DECL
void probspace_iter(PgfProbspace space, PgfText *cat,
PgfItor* itor, bool all, PgfExn *err);
/* Given a random number from 0 to 1, select a random function from
* the given category */
PGF_INTERNAL_DECL
ref<PgfAbsFun> probspace_random(PgfProbspace space,
PgfText *cat, prob_t rand);
PGF_INTERNAL_DECL
void probspace_release(PgfProbspace space);

View File

@@ -55,7 +55,10 @@ module PGF2 (-- * PGF
compute,
-- ** Generation
generateAll, generateAllFrom, generateRandom, generateRandomFrom,
generateAll, generateAllDepth,
generateAllFrom, generateAllFromDepth,
generateRandom, generateRandomDepth,
generateRandomFrom, generateRandomFromDepth,
-- ** Morphological Analysis
MorphoAnalysis, lookupMorpho, lookupCohorts, fullFormLexicon,
@@ -1009,16 +1012,73 @@ bracketedLinearizeAll c e = unsafePerformIO $ do
else writeIORef ref (False,[],[],reverse bs:all)
generateAll :: PGF -> Type -> [(Expr,Float)]
generateAll p ty = error "TODO: generateAll"
generateAll p ty = generateAllDepth p ty maxBound
generateAllDepth :: PGF -> Type -> Int -> [(Expr,Float)]
generateAllDepth p ty dp = error "TODO: generateAllDepth"
generateAllFrom :: PGF -> Expr -> [(Expr,Float)]
generateAllFrom p ty = error "TODO: generateAllFrom"
generateAllFrom p ty = generateAllFromDepth p ty maxBound
generateRandom :: StdGen -> PGF -> Type -> [a]
generateRandom = error "TODO: generateRandom"
generateAllFromDepth :: PGF -> Expr -> Int -> [(Expr,Float)]
generateAllFromDepth p ty = error "TODO: generateFromDepth"
generateRandomFrom :: StdGen -> PGF -> Expr -> [a]
generateRandomFrom = error "TODO: generateRandomFrom"
-- | Generates a potentially infinite list of random
-- abstract syntax expressions. This is usefull for tree bank generation
-- which after that can be used for grammar testing.
generateRandom :: RandomGen g => g -> PGF -> Type -> [(Expr,Float)]
generateRandom g pgf ty = generateRandomDepth g pgf ty maxBound
generateRandomDepth :: RandomGen g => g -> PGF -> Type -> Int -> [(Expr,Float)]
generateRandomDepth g p ty dp =
let (seed,_) = random g
in generate seed
where
generate seed =
unsafePerformIO $
bracket (newStablePtr ty) freeStablePtr $ \c_ty ->
withForeignPtr marshaller $ \m ->
withForeignPtr unmarshaller $ \u ->
withForeignPtr (a_revision p) $ \c_revision ->
alloca $ \p_seed ->
alloca $ \p_prob ->
mask_ $ do
poke p_seed seed
c_expr <- withPgfExn "generateRandomDepth" (pgf_generate_random (a_db p) c_revision c_ty (fromIntegral dp) p_seed p_prob m u)
if castStablePtrToPtr c_expr == nullPtr
then return []
else do expr <- deRefStablePtr c_expr
freeStablePtr c_expr
seed <- peek p_seed
prob <- peek p_prob
return ((expr,prob):generate seed)
generateRandomFrom :: RandomGen g => g -> PGF -> Expr -> [(Expr,Float)]
generateRandomFrom g p e = generateRandomFromDepth g p e maxBound
generateRandomFromDepth :: RandomGen g => g -> PGF -> Expr -> Int -> [(Expr,Float)]
generateRandomFromDepth g p e dp =
let (seed,_) = random g
in generate seed
where
generate seed =
unsafePerformIO $
bracket (newStablePtr e) freeStablePtr $ \c_e ->
withForeignPtr marshaller $ \m ->
withForeignPtr unmarshaller $ \u ->
withForeignPtr (a_revision p) $ \c_revision ->
alloca $ \p_seed ->
alloca $ \p_prob ->
mask_ $ do
poke p_seed seed
c_expr <- withPgfExn "generateRandomFromDepth" (pgf_generate_random_from (a_db p) c_revision c_e (fromIntegral dp) p_seed p_prob m u)
if castStablePtrToPtr c_expr == nullPtr
then return []
else do expr <- deRefStablePtr c_expr
freeStablePtr c_expr
seed <- peek p_seed
prob <- peek p_prob
return ((expr,prob):generate seed)
-- | List of all functions defined in the abstract syntax
categories :: PGF -> [Cat]

View File

@@ -195,6 +195,10 @@ foreign import ccall pgf_infer_expr :: Ptr PgfDB -> Ptr PGF -> Ptr (StablePtr Ex
foreign import ccall pgf_check_type :: Ptr PgfDB -> Ptr PGF -> Ptr (StablePtr Type) -> Ptr PgfMarshaller -> Ptr PgfUnmarshaller -> Ptr PgfExn -> IO ()
foreign import ccall pgf_generate_random :: Ptr PgfDB -> Ptr PGF -> StablePtr Type -> CSize -> Ptr Word64 -> Ptr (#type prob_t) -> Ptr PgfMarshaller -> Ptr PgfUnmarshaller -> Ptr PgfExn -> IO (StablePtr Expr)
foreign import ccall pgf_generate_random_from :: Ptr PgfDB -> Ptr PGF -> StablePtr Expr -> CSize -> Ptr Word64 -> Ptr (#type prob_t) -> Ptr PgfMarshaller -> Ptr PgfUnmarshaller -> Ptr PgfExn -> IO (StablePtr Expr)
foreign import ccall pgf_start_transaction :: Ptr PgfDB -> Ptr PgfExn -> IO (Ptr PGF)
foreign import ccall pgf_commit_transaction :: Ptr PgfDB -> Ptr PGF -> Ptr PgfExn -> IO ()