From ee717fb0223ecd60a8fa89336758eb8497a5bba7 Mon Sep 17 00:00:00 2001 From: Krasimir Angelov Date: Sun, 5 Mar 2023 13:18:14 +0100 Subject: [PATCH] added random generation --- src/compiler/GF/Quiz.hs | 6 +-- src/runtime/c/Makefile.am | 4 +- src/runtime/c/pgf/pgf.cxx | 57 ++++++++++++++++++++++++ src/runtime/c/pgf/pgf.h | 15 +++++++ src/runtime/c/pgf/probspace.cxx | 47 ++++++++++++++++++++ src/runtime/c/pgf/probspace.h | 6 +++ src/runtime/haskell/PGF2.hsc | 74 +++++++++++++++++++++++++++++--- src/runtime/haskell/PGF2/FFI.hsc | 4 ++ 8 files changed, 202 insertions(+), 11 deletions(-) diff --git a/src/compiler/GF/Quiz.hs b/src/compiler/GF/Quiz.hs index 7da485db0..63d19e13d 100644 --- a/src/compiler/GF/Quiz.hs +++ b/src/compiler/GF/Quiz.hs @@ -41,8 +41,8 @@ translationList mex pgf ig og typ number = do Nothing -> generateRandom gen pgf typ return $ map mkOne $ ts where - mkOne t = (norml (linearize ig t), - map norml (concatMap lins (homonyms t))) + mkOne (t,p) = (norml (linearize ig t), + map norml (concatMap lins (homonyms t))) homonyms t = case (parse ig typ . linearize ig) t of ParseOk res -> map fst res @@ -56,7 +56,7 @@ morphologyList mex pgf ig typ number = do let ts = take (max 1 number) $ case mex of Just ex -> generateRandomFrom gen pgf ex Nothing -> generateRandom gen pgf typ - let ss = map (tabularLinearizeAll ig) ts + let ss = map (tabularLinearizeAll ig . fst) ts let size = length (head (head ss)) let forms = take number $ randomRs (0,size-1) gen return [(snd (head pws0) +++ fst (pws0 !! i), ws) | diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am index 030d4b627..af20f62a1 100644 --- a/src/runtime/c/Makefile.am +++ b/src/runtime/c/Makefile.am @@ -40,7 +40,9 @@ libpgf_la_SOURCES = \ pgf/phrasetable.cxx \ pgf/phrasetable.h \ pgf/probspace.cxx \ - pgf/probspace.h + pgf/probspace.h \ + pgf/generator.cxx \ + pgf/generator.h libpgf_la_LDFLAGS = -no-undefined -version-info 4:0:0 libpgf_la_CXXFLAGS = -fno-rtti -std=c++11 -DCOMPILING_PGF diff --git a/src/runtime/c/pgf/pgf.cxx b/src/runtime/c/pgf/pgf.cxx index 1c877602e..ea970d9fc 100644 --- a/src/runtime/c/pgf/pgf.cxx +++ b/src/runtime/c/pgf/pgf.cxx @@ -1,6 +1,7 @@ #include #include #include +#include #ifdef _WIN32 #include #endif @@ -14,6 +15,7 @@ #include "parser.h" #include "graphviz.h" #include "aligner.h" +#include "generator.h" static void pgf_exn_clear(PgfExn* err) @@ -1190,6 +1192,61 @@ void pgf_check_type(PgfDB *db, PgfRevision revision, } PGF_API_END } +PGF_API +PgfExpr pgf_generate_random(PgfDB *db, PgfRevision revision, + PgfType type, size_t depth, + uint64_t *seed, prob_t *prob, + PgfMarshaller *m, PgfUnmarshaller *u, + PgfExn *err) +{ + PGF_API_BEGIN { + DB_scope scope(db, READER_SCOPE); + + ref pgf = db->revision2pgf(revision); + + // Generation may fail for certain random choices, but succeed + // for others. We try 10 time to increase the chance of succeess. + for (size_t i = 0; i < 10; i++) { + PgfRandomGenerator gen(pgf, depth, seed, m, u); + PgfExpr expr = m->match_type(&gen, type); + if (expr != 0) { + *prob = gen.getProb(); + return expr; + } + } + } PGF_API_END + + return 0; +} + +PGF_API +PgfExpr pgf_generate_random_from + (PgfDB *db, PgfRevision revision, + PgfExpr expr, size_t depth, + uint64_t *seed, prob_t *prob, + PgfMarshaller *m, PgfUnmarshaller *u, + PgfExn *err) +{ + PGF_API_BEGIN { + DB_scope scope(db, READER_SCOPE); + + ref pgf = db->revision2pgf(revision); + + // Generation may fail for certain random choices, but succeed + // for others. We try 10 time to increase the chance of succeess. + for (size_t i = 0; i < 10; i++) { + PgfRandomGenerator gen(pgf, depth, seed, m, u); + PgfExpr new_expr = m->match_expr(&gen, expr); + if (new_expr != 0) { + *prob = gen.getProb(); + return new_expr; + } + } + } PGF_API_END + + return 0; +} + PGF_API PgfRevision pgf_start_transaction(PgfDB *db, PgfExn *err) { diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index a1ccccd0d..ac1cb7676 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -509,6 +509,21 @@ void pgf_check_type(PgfDB *db, PgfRevision revision, PgfMarshaller *m, PgfUnmarshaller *u, PgfExn *err); +PGF_API_DECL +PgfExpr pgf_generate_random(PgfDB *db, PgfRevision revision, + PgfType type, size_t depth, + uint64_t *seed, prob_t *prob, + PgfMarshaller *m, PgfUnmarshaller *u, + PgfExn *err); + +PGF_API_DECL +PgfExpr pgf_generate_random_from + (PgfDB *db, PgfRevision revision, + PgfExpr expr, size_t depth, + uint64_t *seed, prob_t *prob, + PgfMarshaller *m, PgfUnmarshaller *u, + PgfExn *err); + PGF_API_DECL PgfRevision pgf_start_transaction(PgfDB *db, PgfExn *err); diff --git a/src/runtime/c/pgf/probspace.cxx b/src/runtime/c/pgf/probspace.cxx index bcfc43b5b..73b2f4980 100644 --- a/src/runtime/c/pgf/probspace.cxx +++ b/src/runtime/c/pgf/probspace.cxx @@ -1,4 +1,5 @@ #include "data.h" +#include "math.h" static int entry_cmp(PgfProbspaceEntry *entry1, PgfProbspaceEntry *entry2) @@ -199,6 +200,52 @@ void probspace_iter(PgfProbspace space, PgfText *cat, } } +static +ref probspace_random(PgfProbspace space, + PgfText *cat, prob_t *rand, + bool is_last) +{ + if (space == 0) + return 0; + + int cmp = textcmp(cat,&(*space->value.cat)); + if (cmp < 0) { + return probspace_random(space->left, cat, rand, true); + } else if (cmp > 0) { + return probspace_random(space->right, cat, rand, true); + } else { + ref fun; + + fun = probspace_random(space->left, cat, rand, false); + if (fun != 0) + return fun; + + bool is_res = (space->value.cat == ref::from_ptr(&space->value.fun->type->name)); + if (is_res) { + *rand -= exp(-space->value.fun->prob); + if (*rand <= 0) + return space->value.fun; + } + + fun = probspace_random(space->right, cat, rand, is_last); + if (fun != 0) + return fun; + if (is_last && is_res) { + // necessary due to floating point rounding + return space->value.fun; + } + } + + return 0; +} + +PGF_INTERNAL +ref probspace_random(PgfProbspace space, + PgfText *cat, prob_t rand) +{ + return probspace_random(space,cat,&rand,true); +} + PGF_INTERNAL void probspace_release(PgfProbspace space) { diff --git a/src/runtime/c/pgf/probspace.h b/src/runtime/c/pgf/probspace.h index b0c93cf12..c6011efb8 100644 --- a/src/runtime/c/pgf/probspace.h +++ b/src/runtime/c/pgf/probspace.h @@ -67,6 +67,12 @@ PGF_INTERNAL_DECL void probspace_iter(PgfProbspace space, PgfText *cat, PgfItor* itor, bool all, PgfExn *err); +/* Given a random number from 0 to 1, select a random function from + * the given category */ +PGF_INTERNAL_DECL +ref probspace_random(PgfProbspace space, + PgfText *cat, prob_t rand); + PGF_INTERNAL_DECL void probspace_release(PgfProbspace space); diff --git a/src/runtime/haskell/PGF2.hsc b/src/runtime/haskell/PGF2.hsc index 42fb96105..21406aa3c 100644 --- a/src/runtime/haskell/PGF2.hsc +++ b/src/runtime/haskell/PGF2.hsc @@ -55,7 +55,10 @@ module PGF2 (-- * PGF compute, -- ** Generation - generateAll, generateAllFrom, generateRandom, generateRandomFrom, + generateAll, generateAllDepth, + generateAllFrom, generateAllFromDepth, + generateRandom, generateRandomDepth, + generateRandomFrom, generateRandomFromDepth, -- ** Morphological Analysis MorphoAnalysis, lookupMorpho, lookupCohorts, fullFormLexicon, @@ -1009,16 +1012,73 @@ bracketedLinearizeAll c e = unsafePerformIO $ do else writeIORef ref (False,[],[],reverse bs:all) generateAll :: PGF -> Type -> [(Expr,Float)] -generateAll p ty = error "TODO: generateAll" +generateAll p ty = generateAllDepth p ty maxBound + +generateAllDepth :: PGF -> Type -> Int -> [(Expr,Float)] +generateAllDepth p ty dp = error "TODO: generateAllDepth" generateAllFrom :: PGF -> Expr -> [(Expr,Float)] -generateAllFrom p ty = error "TODO: generateAllFrom" +generateAllFrom p ty = generateAllFromDepth p ty maxBound -generateRandom :: StdGen -> PGF -> Type -> [a] -generateRandom = error "TODO: generateRandom" +generateAllFromDepth :: PGF -> Expr -> Int -> [(Expr,Float)] +generateAllFromDepth p ty = error "TODO: generateFromDepth" -generateRandomFrom :: StdGen -> PGF -> Expr -> [a] -generateRandomFrom = error "TODO: generateRandomFrom" +-- | Generates a potentially infinite list of random +-- abstract syntax expressions. This is usefull for tree bank generation +-- which after that can be used for grammar testing. +generateRandom :: RandomGen g => g -> PGF -> Type -> [(Expr,Float)] +generateRandom g pgf ty = generateRandomDepth g pgf ty maxBound + +generateRandomDepth :: RandomGen g => g -> PGF -> Type -> Int -> [(Expr,Float)] +generateRandomDepth g p ty dp = + let (seed,_) = random g + in generate seed + where + generate seed = + unsafePerformIO $ + bracket (newStablePtr ty) freeStablePtr $ \c_ty -> + withForeignPtr marshaller $ \m -> + withForeignPtr unmarshaller $ \u -> + withForeignPtr (a_revision p) $ \c_revision -> + alloca $ \p_seed -> + alloca $ \p_prob -> + mask_ $ do + poke p_seed seed + c_expr <- withPgfExn "generateRandomDepth" (pgf_generate_random (a_db p) c_revision c_ty (fromIntegral dp) p_seed p_prob m u) + if castStablePtrToPtr c_expr == nullPtr + then return [] + else do expr <- deRefStablePtr c_expr + freeStablePtr c_expr + seed <- peek p_seed + prob <- peek p_prob + return ((expr,prob):generate seed) + +generateRandomFrom :: RandomGen g => g -> PGF -> Expr -> [(Expr,Float)] +generateRandomFrom g p e = generateRandomFromDepth g p e maxBound + +generateRandomFromDepth :: RandomGen g => g -> PGF -> Expr -> Int -> [(Expr,Float)] +generateRandomFromDepth g p e dp = + let (seed,_) = random g + in generate seed + where + generate seed = + unsafePerformIO $ + bracket (newStablePtr e) freeStablePtr $ \c_e -> + withForeignPtr marshaller $ \m -> + withForeignPtr unmarshaller $ \u -> + withForeignPtr (a_revision p) $ \c_revision -> + alloca $ \p_seed -> + alloca $ \p_prob -> + mask_ $ do + poke p_seed seed + c_expr <- withPgfExn "generateRandomFromDepth" (pgf_generate_random_from (a_db p) c_revision c_e (fromIntegral dp) p_seed p_prob m u) + if castStablePtrToPtr c_expr == nullPtr + then return [] + else do expr <- deRefStablePtr c_expr + freeStablePtr c_expr + seed <- peek p_seed + prob <- peek p_prob + return ((expr,prob):generate seed) -- | List of all functions defined in the abstract syntax categories :: PGF -> [Cat] diff --git a/src/runtime/haskell/PGF2/FFI.hsc b/src/runtime/haskell/PGF2/FFI.hsc index d1f338a0e..f3c55efb0 100644 --- a/src/runtime/haskell/PGF2/FFI.hsc +++ b/src/runtime/haskell/PGF2/FFI.hsc @@ -195,6 +195,10 @@ foreign import ccall pgf_infer_expr :: Ptr PgfDB -> Ptr PGF -> Ptr (StablePtr Ex foreign import ccall pgf_check_type :: Ptr PgfDB -> Ptr PGF -> Ptr (StablePtr Type) -> Ptr PgfMarshaller -> Ptr PgfUnmarshaller -> Ptr PgfExn -> IO () +foreign import ccall pgf_generate_random :: Ptr PgfDB -> Ptr PGF -> StablePtr Type -> CSize -> Ptr Word64 -> Ptr (#type prob_t) -> Ptr PgfMarshaller -> Ptr PgfUnmarshaller -> Ptr PgfExn -> IO (StablePtr Expr) + +foreign import ccall pgf_generate_random_from :: Ptr PgfDB -> Ptr PGF -> StablePtr Expr -> CSize -> Ptr Word64 -> Ptr (#type prob_t) -> Ptr PgfMarshaller -> Ptr PgfUnmarshaller -> Ptr PgfExn -> IO (StablePtr Expr) + foreign import ccall pgf_start_transaction :: Ptr PgfDB -> Ptr PgfExn -> IO (Ptr PGF) foreign import ccall pgf_commit_transaction :: Ptr PgfDB -> Ptr PGF -> Ptr PgfExn -> IO ()