1
0
forked from GitHub/gf-core

make it possible to replace the probabilities while reading a new .pgf

This commit is contained in:
Krasimir Angelov
2022-07-14 11:04:45 +02:00
parent f1cad40394
commit 4d0f33e3c3
10 changed files with 196 additions and 24 deletions

View File

@@ -3,9 +3,10 @@
#include <math.h>
#include <string.h>
PgfReader::PgfReader(FILE *in)
PgfReader::PgfReader(FILE *in,PgfProbsCallback *probs_callback)
{
this->in = in;
this->probs_callback = probs_callback;
this->abstract = 0;
this->concrete = 0;
}
@@ -71,6 +72,15 @@ double PgfReader::read_double()
return sign ? copysign(ret, -1.0) : ret;
}
prob_t PgfReader::read_prob(PgfText *name)
{
double d = read_double();
if (probs_callback != NULL) {
d = probs_callback->fn(probs_callback, name);
}
return - log(d);
}
uint64_t PgfReader::read_uint()
{
uint64_t u = 0;
@@ -318,7 +328,7 @@ ref<PgfAbsFun> PgfReader::read_absfun()
default:
throw pgf_error("Unknown tag, 0 or 1 expected");
}
absfun->prob = - log(read_double());
absfun->prob = read_prob(&absfun->name);
return absfun;
}
@@ -326,10 +336,76 @@ ref<PgfAbsCat> PgfReader::read_abscat()
{
ref<PgfAbsCat> abscat = read_name<PgfAbsCat>(&PgfAbsCat::name);
abscat->context = read_vector<PgfHypo>(&PgfReader::read_hypo);
abscat->prob = - log(read_double());
abscat->prob = read_prob(&abscat->name);
return abscat;
}
struct PGF_INTERNAL_DECL PgfAbsCatCounts
{
PgfText *name;
size_t n_nan_probs;
double probs_sum;
prob_t prob;
};
struct PGF_INTERNAL_DECL PgfProbItor : PgfItor
{
Vector<PgfAbsCatCounts> *cats;
};
static
PgfAbsCatCounts *find_counts(Vector<PgfAbsCatCounts> *cats, PgfText *name)
{
size_t i = 0;
size_t j = cats->len-1;
while (i <= j) {
size_t k = (i+j)/2;
PgfAbsCatCounts *counts = &cats->data[k];
int cmp = textcmp(name, counts->name);
if (cmp < 0) {
j = k-1;
} else if (cmp > 0) {
i = k+1;
} else {
return counts;
}
}
return NULL;
}
static
void collect_counts(PgfItor *itor, PgfText *key, object value, PgfExn *err)
{
PgfProbItor* prob_itor = (PgfProbItor*) itor;
ref<PgfAbsFun> absfun = value;
PgfAbsCatCounts *counts =
find_counts(prob_itor->cats, &absfun->type->name);
if (counts != NULL) {
if (isnan(absfun->prob)) {
counts->n_nan_probs++;
} else {
counts->probs_sum += exp(-absfun->prob);
}
}
}
static
void pad_probs(PgfItor *itor, PgfText *key, object value, PgfExn *err)
{
PgfProbItor* prob_itor = (PgfProbItor*) itor;
ref<PgfAbsFun> absfun = value;
if (isnan(absfun->prob)) {
PgfAbsCatCounts *counts =
find_counts(prob_itor->cats, &absfun->type->name);
if (counts != NULL) {
absfun->prob = counts->prob;
}
}
}
void PgfReader::read_abstract(ref<PgfAbstr> abstract)
{
this->abstract = abstract;
@@ -338,6 +414,27 @@ void PgfReader::read_abstract(ref<PgfAbstr> abstract)
abstract->aflags = read_namespace<PgfFlag>(&PgfReader::read_flag);
abstract->funs = read_namespace<PgfAbsFun>(&PgfReader::read_absfun);
abstract->cats = read_namespace<PgfAbsCat>(&PgfReader::read_abscat);
if (probs_callback != NULL) {
PgfExn err;
err.type = PGF_EXN_NONE;
PgfProbItor itor;
itor.cats = namespace_to_sorted_names<PgfAbsCat,PgfAbsCatCounts>(abstract->cats);
itor.fn = collect_counts;
namespace_iter(abstract->funs, &itor, &err);
for (size_t i = 0; i < itor.cats->len; i++) {
PgfAbsCatCounts *counts = &itor.cats->data[i];
counts->prob = - log((1-counts->probs_sum) / counts->n_nan_probs);
}
itor.fn = pad_probs;
namespace_iter(abstract->funs, &itor, &err);
free(itor.cats);
}
}
void PgfReader::merge_abstract(ref<PgfAbstr> abstract)