mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
make it possible to replace the probabilities while reading a new .pgf
This commit is contained in:
@@ -64,8 +64,14 @@ importPGF opts Nothing f
|
||||
then removeFile f'
|
||||
else return ()
|
||||
putStr ("(Boot image "++f'++") ")
|
||||
fmap Just (bootNGF f f')
|
||||
| otherwise = fmap Just (readPGF f)
|
||||
mb_probs <- case flag optProbsFile opts of
|
||||
Nothing -> return Nothing
|
||||
Just file -> fmap Just (readProbabilitiesFromFile file)
|
||||
fmap Just (bootNGFWithProbs f mb_probs f')
|
||||
| otherwise = do mb_probs <- case flag optProbsFile opts of
|
||||
Nothing -> return Nothing
|
||||
Just file -> fmap Just (readProbabilitiesFromFile file)
|
||||
fmap Just (readPGFWithProbs f mb_probs)
|
||||
importPGF opts (Just pgf) f = fmap Just (modifyPGF pgf (mergePGF f) `catch`
|
||||
(\e@(PGFError loc msg) ->
|
||||
if msg == "The abstract syntax names doesn't match"
|
||||
@@ -73,7 +79,6 @@ importPGF opts (Just pgf) f = fmap Just (modifyPGF pgf (mergePGF f) `catc
|
||||
readPGF f
|
||||
else throwIO e))
|
||||
|
||||
|
||||
importSource :: Options -> [FilePath] -> IO (ModuleName,SourceGrammar)
|
||||
importSource opts files = fmap snd (batchCompile opts files)
|
||||
|
||||
|
||||
@@ -140,7 +140,10 @@ unionPGFFiles opts fs =
|
||||
doIt =
|
||||
case fs of
|
||||
[] -> return ()
|
||||
(f:fs) -> do pgf <- if snd (flag optLinkTargets opts)
|
||||
(f:fs) -> do mb_probs <- case flag optProbsFile opts of
|
||||
Nothing -> return Nothing
|
||||
Just file -> fmap Just (readProbabilitiesFromFile file)
|
||||
pgf <- if snd (flag optLinkTargets opts)
|
||||
then case flag optName opts of
|
||||
Just name -> do let fname = maybe id (</>) (flag optOutputDir opts) (name<.>"ngf")
|
||||
putStrLnE ("(Boot image "++fname++")")
|
||||
@@ -148,10 +151,10 @@ unionPGFFiles opts fs =
|
||||
if exists
|
||||
then removeFile fname
|
||||
else return ()
|
||||
echo (\f -> bootNGF f fname) f
|
||||
echo (\f -> bootNGFWithProbs f mb_probs fname) f
|
||||
Nothing -> do putStrLnE $ "To boot from a list of .pgf files add option -name"
|
||||
echo readPGF f
|
||||
else echo readPGF f
|
||||
echo (\f -> readPGFWithProbs f mb_probs) f
|
||||
else echo (\f -> readPGFWithProbs f mb_probs) f
|
||||
pgf <- foldM (\pgf -> echo (modifyPGF pgf . mergePGF)) pgf fs
|
||||
let pgfFile = outputPath opts (grammarName opts pgf <.> "pgf")
|
||||
if pgfFile `elem` fs
|
||||
|
||||
@@ -407,6 +407,33 @@ void namespace_iter(Namespace<V> map, PgfItor* itor, PgfExn *err)
|
||||
return;
|
||||
}
|
||||
|
||||
template <class V,class A>
|
||||
void namespace_vec_fill_names(Namespace<V> node, size_t offs, Vector<A> *vec)
|
||||
{
|
||||
if (node == 0)
|
||||
return;
|
||||
|
||||
namespace_vec_fill_names(node->left, offs, vec);
|
||||
|
||||
offs += namespace_size(node->left);
|
||||
vector_elem(vec, offs++)->name = &node->value->name;
|
||||
|
||||
namespace_vec_fill_names(node->right, offs, vec);
|
||||
}
|
||||
|
||||
template <class V,class A>
|
||||
Vector<A> *namespace_to_sorted_names(Namespace<V> node)
|
||||
{
|
||||
Vector<A> *vec = (Vector<A> *)
|
||||
malloc(sizeof(Vector<A>)+node->sz*sizeof(A));
|
||||
if (errno != 0)
|
||||
throw pgf_systemerror(errno);
|
||||
vec->len = node->sz;
|
||||
memset(vec->data, 0, node->sz*sizeof(A));
|
||||
namespace_vec_fill_names(node, 0, vec);
|
||||
return vec;
|
||||
}
|
||||
|
||||
template <class V>
|
||||
void namespace_release(Namespace<V> node)
|
||||
{
|
||||
|
||||
@@ -37,8 +37,8 @@ pgf_exn_clear(PgfExn* err)
|
||||
}
|
||||
|
||||
PGF_API
|
||||
PgfDB *pgf_read_pgf(const char* fpath,
|
||||
PgfRevision *revision,
|
||||
PgfDB *pgf_read_pgf(const char* fpath, PgfRevision *revision,
|
||||
PgfProbsCallback *probs_callback,
|
||||
PgfExn* err)
|
||||
{
|
||||
PgfDB *db = NULL;
|
||||
@@ -56,7 +56,7 @@ PgfDB *pgf_read_pgf(const char* fpath,
|
||||
|
||||
db->start_transaction();
|
||||
|
||||
PgfReader rdr(in);
|
||||
PgfReader rdr(in,probs_callback);
|
||||
ref<PgfPGF> pgf = rdr.read_pgf();
|
||||
|
||||
*revision = db->register_revision(pgf.tagged(), PgfDB::get_txn_id());
|
||||
@@ -79,6 +79,7 @@ PgfDB *pgf_read_pgf(const char* fpath,
|
||||
PGF_API
|
||||
PgfDB *pgf_boot_ngf(const char* pgf_path, const char* ngf_path,
|
||||
PgfRevision *revision,
|
||||
PgfProbsCallback *probs_callback,
|
||||
PgfExn* err)
|
||||
{
|
||||
PgfDB *db = NULL;
|
||||
@@ -103,7 +104,7 @@ PgfDB *pgf_boot_ngf(const char* pgf_path, const char* ngf_path,
|
||||
|
||||
db->start_transaction();
|
||||
|
||||
PgfReader rdr(in);
|
||||
PgfReader rdr(in,probs_callback);
|
||||
ref<PgfPGF> pgf = rdr.read_pgf();
|
||||
|
||||
*revision = db->register_revision(pgf.tagged(), PgfDB::get_txn_id());
|
||||
@@ -220,7 +221,7 @@ void pgf_merge_pgf(PgfDB *db, PgfRevision revision,
|
||||
DB_scope scope(db, WRITER_SCOPE);
|
||||
ref<PgfPGF> pgf = db->revision2pgf(revision);
|
||||
|
||||
PgfReader rdr(in);
|
||||
PgfReader rdr(in,NULL);
|
||||
rdr.merge_pgf(pgf);
|
||||
}
|
||||
} PGF_API_END
|
||||
|
||||
@@ -226,11 +226,17 @@ typedef struct PgfDB PgfDB;
|
||||
typedef object PgfRevision;
|
||||
typedef object PgfConcrRevision;
|
||||
|
||||
typedef struct PgfProbsCallback PgfProbsCallback;
|
||||
struct PgfProbsCallback {
|
||||
double (*fn)(PgfProbsCallback* self, PgfText *name);
|
||||
};
|
||||
|
||||
/* Reads a PGF file and builds the database in memory.
|
||||
* If successful, *revision will contain the initial revision of
|
||||
* the grammar. */
|
||||
PGF_API_DECL
|
||||
PgfDB *pgf_read_pgf(const char* fpath, PgfRevision *revision,
|
||||
PgfProbsCallback *probs_callback,
|
||||
PgfExn* err);
|
||||
|
||||
/* Reads a PGF file and stores the unpacked data in an NGF file
|
||||
@@ -240,6 +246,7 @@ PgfDB *pgf_read_pgf(const char* fpath, PgfRevision *revision,
|
||||
PGF_API_DECL
|
||||
PgfDB *pgf_boot_ngf(const char* pgf_path, const char* ngf_path,
|
||||
PgfRevision *revision,
|
||||
PgfProbsCallback *probs_callback,
|
||||
PgfExn* err);
|
||||
|
||||
/* Tries to read the grammar from an already booted NGF file.
|
||||
|
||||
@@ -3,9 +3,10 @@
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
PgfReader::PgfReader(FILE *in)
|
||||
PgfReader::PgfReader(FILE *in,PgfProbsCallback *probs_callback)
|
||||
{
|
||||
this->in = in;
|
||||
this->probs_callback = probs_callback;
|
||||
this->abstract = 0;
|
||||
this->concrete = 0;
|
||||
}
|
||||
@@ -71,6 +72,15 @@ double PgfReader::read_double()
|
||||
return sign ? copysign(ret, -1.0) : ret;
|
||||
}
|
||||
|
||||
prob_t PgfReader::read_prob(PgfText *name)
|
||||
{
|
||||
double d = read_double();
|
||||
if (probs_callback != NULL) {
|
||||
d = probs_callback->fn(probs_callback, name);
|
||||
}
|
||||
return - log(d);
|
||||
}
|
||||
|
||||
uint64_t PgfReader::read_uint()
|
||||
{
|
||||
uint64_t u = 0;
|
||||
@@ -318,7 +328,7 @@ ref<PgfAbsFun> PgfReader::read_absfun()
|
||||
default:
|
||||
throw pgf_error("Unknown tag, 0 or 1 expected");
|
||||
}
|
||||
absfun->prob = - log(read_double());
|
||||
absfun->prob = read_prob(&absfun->name);
|
||||
return absfun;
|
||||
}
|
||||
|
||||
@@ -326,10 +336,76 @@ ref<PgfAbsCat> PgfReader::read_abscat()
|
||||
{
|
||||
ref<PgfAbsCat> abscat = read_name<PgfAbsCat>(&PgfAbsCat::name);
|
||||
abscat->context = read_vector<PgfHypo>(&PgfReader::read_hypo);
|
||||
abscat->prob = - log(read_double());
|
||||
abscat->prob = read_prob(&abscat->name);
|
||||
return abscat;
|
||||
}
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfAbsCatCounts
|
||||
{
|
||||
PgfText *name;
|
||||
size_t n_nan_probs;
|
||||
double probs_sum;
|
||||
prob_t prob;
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfProbItor : PgfItor
|
||||
{
|
||||
Vector<PgfAbsCatCounts> *cats;
|
||||
};
|
||||
|
||||
static
|
||||
PgfAbsCatCounts *find_counts(Vector<PgfAbsCatCounts> *cats, PgfText *name)
|
||||
{
|
||||
size_t i = 0;
|
||||
size_t j = cats->len-1;
|
||||
while (i <= j) {
|
||||
size_t k = (i+j)/2;
|
||||
PgfAbsCatCounts *counts = &cats->data[k];
|
||||
int cmp = textcmp(name, counts->name);
|
||||
if (cmp < 0) {
|
||||
j = k-1;
|
||||
} else if (cmp > 0) {
|
||||
i = k+1;
|
||||
} else {
|
||||
return counts;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static
|
||||
void collect_counts(PgfItor *itor, PgfText *key, object value, PgfExn *err)
|
||||
{
|
||||
PgfProbItor* prob_itor = (PgfProbItor*) itor;
|
||||
ref<PgfAbsFun> absfun = value;
|
||||
|
||||
PgfAbsCatCounts *counts =
|
||||
find_counts(prob_itor->cats, &absfun->type->name);
|
||||
if (counts != NULL) {
|
||||
if (isnan(absfun->prob)) {
|
||||
counts->n_nan_probs++;
|
||||
} else {
|
||||
counts->probs_sum += exp(-absfun->prob);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void pad_probs(PgfItor *itor, PgfText *key, object value, PgfExn *err)
|
||||
{
|
||||
PgfProbItor* prob_itor = (PgfProbItor*) itor;
|
||||
ref<PgfAbsFun> absfun = value;
|
||||
|
||||
if (isnan(absfun->prob)) {
|
||||
PgfAbsCatCounts *counts =
|
||||
find_counts(prob_itor->cats, &absfun->type->name);
|
||||
if (counts != NULL) {
|
||||
absfun->prob = counts->prob;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PgfReader::read_abstract(ref<PgfAbstr> abstract)
|
||||
{
|
||||
this->abstract = abstract;
|
||||
@@ -338,6 +414,27 @@ void PgfReader::read_abstract(ref<PgfAbstr> abstract)
|
||||
abstract->aflags = read_namespace<PgfFlag>(&PgfReader::read_flag);
|
||||
abstract->funs = read_namespace<PgfAbsFun>(&PgfReader::read_absfun);
|
||||
abstract->cats = read_namespace<PgfAbsCat>(&PgfReader::read_abscat);
|
||||
|
||||
if (probs_callback != NULL) {
|
||||
PgfExn err;
|
||||
err.type = PGF_EXN_NONE;
|
||||
|
||||
PgfProbItor itor;
|
||||
itor.cats = namespace_to_sorted_names<PgfAbsCat,PgfAbsCatCounts>(abstract->cats);
|
||||
|
||||
itor.fn = collect_counts;
|
||||
namespace_iter(abstract->funs, &itor, &err);
|
||||
|
||||
for (size_t i = 0; i < itor.cats->len; i++) {
|
||||
PgfAbsCatCounts *counts = &itor.cats->data[i];
|
||||
counts->prob = - log((1-counts->probs_sum) / counts->n_nan_probs);
|
||||
}
|
||||
|
||||
itor.fn = pad_probs;
|
||||
namespace_iter(abstract->funs, &itor, &err);
|
||||
|
||||
free(itor.cats);
|
||||
}
|
||||
}
|
||||
|
||||
void PgfReader::merge_abstract(ref<PgfAbstr> abstract)
|
||||
|
||||
@@ -8,12 +8,13 @@
|
||||
class PGF_INTERNAL_DECL PgfReader
|
||||
{
|
||||
public:
|
||||
PgfReader(FILE *in);
|
||||
PgfReader(FILE *in,PgfProbsCallback *probs_callback);
|
||||
|
||||
uint8_t read_uint8();
|
||||
uint16_t read_u16be();
|
||||
uint64_t read_u64be();
|
||||
double read_double();
|
||||
prob_t read_prob(PgfText *name);
|
||||
uint64_t read_uint();
|
||||
int64_t read_int() { return (int64_t) read_uint(); };
|
||||
size_t read_len() { return (size_t) read_uint(); };
|
||||
@@ -87,6 +88,7 @@ public:
|
||||
|
||||
private:
|
||||
FILE *in;
|
||||
PgfProbsCallback *probs_callback;
|
||||
ref<PgfAbstr> abstract;
|
||||
ref<PgfConcr> concrete;
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
|
||||
module PGF2 (-- * PGF
|
||||
PGF,readPGF,bootNGF,readNGF,newNGF,writePGF,showPGF,
|
||||
readPGFWithProbs, bootNGFWithProbs,
|
||||
|
||||
-- * Abstract syntax
|
||||
AbsName,abstractName,globalFlag,abstractFlag,
|
||||
@@ -109,11 +110,15 @@ import Text.PrettyPrint
|
||||
|
||||
-- | Reads a PGF file and keeps it in memory.
|
||||
readPGF :: FilePath -> IO PGF
|
||||
readPGF fpath =
|
||||
readPGF fpath = readPGFWithProbs fpath Nothing
|
||||
|
||||
readPGFWithProbs :: FilePath -> Maybe (Map.Map String Double) -> IO PGF
|
||||
readPGFWithProbs fpath mb_probs =
|
||||
withCString fpath $ \c_fpath ->
|
||||
alloca $ \p_revision ->
|
||||
withProbsCallback mb_probs $ \c_pcallback ->
|
||||
mask_ $ do
|
||||
c_db <- withPgfExn "readPGF" (pgf_read_pgf c_fpath p_revision)
|
||||
c_db <- withPgfExn "readPGF" (pgf_read_pgf c_fpath p_revision c_pcallback)
|
||||
c_revision <- peek p_revision
|
||||
fptr <- newForeignPtrEnv pgf_free_revision c_db c_revision
|
||||
langs <- getConcretes c_db fptr
|
||||
@@ -124,17 +129,37 @@ readPGF fpath =
|
||||
-- The NGF file is platform dependent and should not be copied
|
||||
-- between machines.
|
||||
bootNGF :: FilePath -> FilePath -> IO PGF
|
||||
bootNGF pgf_path ngf_path =
|
||||
bootNGF pgf_path ngf_path = bootNGFWithProbs pgf_path Nothing ngf_path
|
||||
|
||||
bootNGFWithProbs :: FilePath -> Maybe (Map.Map String Double) -> FilePath -> IO PGF
|
||||
bootNGFWithProbs pgf_path mb_probs ngf_path =
|
||||
withCString pgf_path $ \c_pgf_path ->
|
||||
withCString ngf_path $ \c_ngf_path ->
|
||||
alloca $ \p_revision ->
|
||||
withProbsCallback mb_probs $ \c_pcallback ->
|
||||
mask_ $ do
|
||||
c_db <- withPgfExn "bootNGF" (pgf_boot_ngf c_pgf_path c_ngf_path p_revision)
|
||||
c_db <- withPgfExn "bootNGF" (pgf_boot_ngf c_pgf_path c_ngf_path p_revision c_pcallback)
|
||||
c_revision <- peek p_revision
|
||||
fptr <- newForeignPtrEnv pgf_free_revision c_db c_revision
|
||||
langs <- getConcretes c_db fptr
|
||||
return (PGF c_db fptr langs)
|
||||
|
||||
withProbsCallback :: Maybe (Map.Map String Double) -> (Ptr PgfProbsCallback -> IO a) -> IO a
|
||||
withProbsCallback Nothing f = f nullPtr
|
||||
withProbsCallback (Just probs) f =
|
||||
allocaBytes (#size PgfProbsCallback) $ \callback ->
|
||||
bracket (wrapProbsCallback getProb) freeHaskellFunPtr $ \fptr -> do
|
||||
(#poke PgfProbsCallback, fn) callback fptr
|
||||
f callback
|
||||
where
|
||||
getProb _ c_name = do
|
||||
name <- peekText c_name
|
||||
case Map.lookup name probs of
|
||||
Nothing -> return nan
|
||||
Just p -> return p
|
||||
|
||||
nan = log (-1)
|
||||
|
||||
-- | Reads the grammar from an already booted NGF file.
|
||||
-- The function fails if the file does not exist.
|
||||
readNGF :: FilePath -> IO PGF
|
||||
|
||||
@@ -46,6 +46,7 @@ data PgfLinBuilderIface
|
||||
data PgfLinearizationOutputIface
|
||||
data PgfGraphvizOptions
|
||||
data PgfSequenceItor
|
||||
data PgfProbsCallback
|
||||
data PgfMorphoCallback
|
||||
data PgfCohortsCallback
|
||||
data PgfPhrasetableIds
|
||||
@@ -60,10 +61,14 @@ foreign import ccall unsafe "pgf_utf8_encode"
|
||||
pgf_utf8_encode :: Word32 -> Ptr CString -> IO ()
|
||||
|
||||
foreign import ccall "pgf_read_pgf"
|
||||
pgf_read_pgf :: CString -> Ptr (Ptr PGF) -> Ptr PgfExn -> IO (Ptr PgfDB)
|
||||
pgf_read_pgf :: CString -> Ptr (Ptr PGF) -> Ptr PgfProbsCallback -> Ptr PgfExn -> IO (Ptr PgfDB)
|
||||
|
||||
foreign import ccall "pgf_boot_ngf"
|
||||
pgf_boot_ngf :: CString -> CString -> Ptr (Ptr PGF) -> Ptr PgfExn -> IO (Ptr PgfDB)
|
||||
pgf_boot_ngf :: CString -> CString -> Ptr (Ptr PGF) -> Ptr PgfProbsCallback -> Ptr PgfExn -> IO (Ptr PgfDB)
|
||||
|
||||
type ProbsCallback = Ptr PgfProbsCallback -> Ptr PgfText -> IO Double
|
||||
|
||||
foreign import ccall "wrapper" wrapProbsCallback :: Wrapper ProbsCallback
|
||||
|
||||
foreign import ccall "pgf_read_ngf"
|
||||
pgf_read_ngf :: CString -> Ptr (Ptr PGF) -> Ptr PgfExn -> IO (Ptr PgfDB)
|
||||
|
||||
@@ -515,7 +515,7 @@ pgf_readPGF(PyObject *self, PyObject *args)
|
||||
PGFObject *py_pgf = (PGFObject *)pgf_PGFType.tp_alloc(&pgf_PGFType, 0);
|
||||
|
||||
PgfExn err;
|
||||
py_pgf->db = pgf_read_pgf(fpath, &py_pgf->revision, &err);
|
||||
py_pgf->db = pgf_read_pgf(fpath, &py_pgf->revision, NULL, &err);
|
||||
if (handleError(err) != PGF_EXN_NONE) {
|
||||
Py_DECREF(py_pgf);
|
||||
return NULL;
|
||||
@@ -535,7 +535,7 @@ pgf_bootNGF(PyObject *self, PyObject *args)
|
||||
PGFObject *py_pgf = (PGFObject *)pgf_PGFType.tp_alloc(&pgf_PGFType, 0);
|
||||
|
||||
PgfExn err;
|
||||
py_pgf->db = pgf_boot_ngf(fpath, npath, &py_pgf->revision, &err);
|
||||
py_pgf->db = pgf_boot_ngf(fpath, npath, &py_pgf->revision, NULL, &err);
|
||||
if (handleError(err) != PGF_EXN_NONE) {
|
||||
Py_DECREF(py_pgf);
|
||||
return NULL;
|
||||
|
||||
Reference in New Issue
Block a user