mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 11:42:49 -06:00
fix the parseval metric after the redesign in the C runtime
This commit is contained in:
@@ -3,7 +3,7 @@
|
|||||||
#include <pgf/linearizer.h>
|
#include <pgf/linearizer.h>
|
||||||
#include <pgf/parser.h>
|
#include <pgf/parser.h>
|
||||||
|
|
||||||
/*typedef struct {
|
typedef struct {
|
||||||
int start, end;
|
int start, end;
|
||||||
PgfCId cat;
|
PgfCId cat;
|
||||||
int lin_idx;
|
int lin_idx;
|
||||||
@@ -11,7 +11,9 @@
|
|||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PgfLinFuncs* funcs;
|
PgfLinFuncs* funcs;
|
||||||
PgfParseState* ps;
|
bool bind;
|
||||||
|
GuOut* out;
|
||||||
|
GuExn* err;
|
||||||
int pos;
|
int pos;
|
||||||
GuBuf* marks;
|
GuBuf* marks;
|
||||||
GuBuf* phrases;
|
GuBuf* phrases;
|
||||||
@@ -19,15 +21,28 @@ typedef struct {
|
|||||||
GuPool* pool;
|
GuPool* pool;
|
||||||
} PgfMetricsLznState;
|
} PgfMetricsLznState;
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_metrics_put_space(PgfMetricsLznState* state)
|
||||||
|
{
|
||||||
|
if (state->bind)
|
||||||
|
state->bind = false;
|
||||||
|
else {
|
||||||
|
if (state->out != NULL)
|
||||||
|
gu_putc(' ', state->out, state->err);
|
||||||
|
state->pos++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_metrics_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
pgf_metrics_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||||
{
|
{
|
||||||
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
||||||
|
|
||||||
if (state->ps != NULL)
|
|
||||||
state->ps = pgf_parser_next_state(state->ps, tok);
|
|
||||||
|
|
||||||
state->pos++;
|
pgf_metrics_put_space(state);
|
||||||
|
if (state->out != NULL)
|
||||||
|
gu_string_write(tok, state->out, state->err);
|
||||||
|
|
||||||
|
state->pos += strlen(tok);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -39,32 +54,27 @@ pgf_metrics_lzn_expr_literal(PgfLinFuncs** funcs, PgfLiteral lit)
|
|||||||
switch (i.tag) {
|
switch (i.tag) {
|
||||||
case PGF_LITERAL_STR: {
|
case PGF_LITERAL_STR: {
|
||||||
PgfLiteralStr* lstr = i.data;
|
PgfLiteralStr* lstr = i.data;
|
||||||
if (state->ps != NULL) {
|
if (state->out != NULL)
|
||||||
state->ps = pgf_parser_next_state(state->ps, lstr->val);
|
gu_string_write(lstr->val, state->out, state->err);
|
||||||
}
|
state->pos += strlen(lstr->val);
|
||||||
state->pos++;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PGF_LITERAL_INT: {
|
case PGF_LITERAL_INT: {
|
||||||
PgfLiteralInt* lint = i.data;
|
PgfLiteralInt* lint = i.data;
|
||||||
if (state->ps != NULL) {
|
GuString tok =
|
||||||
GuString tok =
|
gu_format_string(state->pool, "%d", lint->val);
|
||||||
gu_format_string(state->pool, "%d", lint->val);
|
if (state->out != NULL)
|
||||||
|
gu_string_write(tok, state->out, state->err);
|
||||||
state->ps = pgf_parser_next_state(state->ps, tok);
|
state->pos += strlen(tok);
|
||||||
}
|
|
||||||
state->pos++;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PGF_LITERAL_FLT: {
|
case PGF_LITERAL_FLT: {
|
||||||
PgfLiteralFlt* lflt = i.data;
|
PgfLiteralFlt* lflt = i.data;
|
||||||
if (state->ps != NULL) {
|
GuString tok =
|
||||||
GuString tok =
|
gu_format_string(state->pool, "%f", lflt->val);
|
||||||
gu_format_string(state->pool, "%f", lflt->val);
|
if (state->out != NULL)
|
||||||
|
gu_string_write(tok, state->out, state->err);
|
||||||
state->ps = pgf_parser_next_state(state->ps, tok);
|
state->pos += strlen(tok);
|
||||||
}
|
|
||||||
state->pos++;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@@ -83,7 +93,7 @@ static void
|
|||||||
pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_idx, PgfCId fun)
|
pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_idx, PgfCId fun)
|
||||||
{
|
{
|
||||||
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
||||||
|
|
||||||
int start = gu_buf_pop(state->marks, int);
|
int start = gu_buf_pop(state->marks, int);
|
||||||
int end = state->pos;
|
int end = state->pos;
|
||||||
|
|
||||||
@@ -97,15 +107,29 @@ pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_id
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_metrics_symbol_ne(PgfLinFuncs** funcs)
|
||||||
|
{
|
||||||
|
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
||||||
|
gu_raise(state->err, PgfLinNonExist);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_metrics_symbol_bind(PgfLinFuncs** funcs)
|
||||||
|
{
|
||||||
|
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
||||||
|
state->bind = true;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_idx, PgfCId fun)
|
pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_idx, PgfCId fun)
|
||||||
{
|
{
|
||||||
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
||||||
|
|
||||||
int start = gu_buf_pop(state->marks, int);
|
int start = gu_buf_pop(state->marks, int);
|
||||||
int end = state->pos;
|
int end = state->pos;
|
||||||
|
|
||||||
if (start != end) {
|
if (start != end) {
|
||||||
size_t n_phrases = gu_buf_length(state->phrases);
|
size_t n_phrases = gu_buf_length(state->phrases);
|
||||||
for (size_t i = 0; i < n_phrases; i++) {
|
for (size_t i = 0; i < n_phrases; i++) {
|
||||||
PgfPhrase* phrase = gu_buf_get(state->phrases, PgfPhrase*, i);
|
PgfPhrase* phrase = gu_buf_get(state->phrases, PgfPhrase*, i);
|
||||||
@@ -124,51 +148,29 @@ pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_id
|
|||||||
}
|
}
|
||||||
|
|
||||||
static PgfLinFuncs pgf_metrics_lin_funcs1 = {
|
static PgfLinFuncs pgf_metrics_lin_funcs1 = {
|
||||||
v v v v v v v
|
|
||||||
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
|
|
||||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
|
||||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
|
||||||
.end_phrase = pgf_metrics_lzn_end_phrase1
|
|
||||||
=============
|
|
||||||
.symbol_token = pgf_metrics_lzn_symbol_token,
|
.symbol_token = pgf_metrics_lzn_symbol_token,
|
||||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
.expr_literal = pgf_metrics_lzn_expr_literal,
|
||||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
||||||
.end_phrase = pgf_metrics_lzn_end_phrase1
|
.end_phrase = pgf_metrics_lzn_end_phrase1,
|
||||||
*************
|
.symbol_ne = pgf_metrics_symbol_ne,
|
||||||
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
|
.symbol_bind = pgf_metrics_symbol_bind
|
||||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
|
||||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
|
||||||
.end_phrase = pgf_metrics_lzn_end_phrase1,
|
|
||||||
.symbol_glue = NULL
|
|
||||||
^ ^ ^ ^ ^ ^ ^
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static PgfLinFuncs pgf_metrics_lin_funcs2 = {
|
static PgfLinFuncs pgf_metrics_lin_funcs2 = {
|
||||||
v v v v v v v
|
|
||||||
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
|
|
||||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
|
||||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
|
||||||
.end_phrase = pgf_metrics_lzn_end_phrase2
|
|
||||||
=============
|
|
||||||
.symbol_token = pgf_metrics_lzn_symbol_token,
|
.symbol_token = pgf_metrics_lzn_symbol_token,
|
||||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
.expr_literal = pgf_metrics_lzn_expr_literal,
|
||||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
||||||
.end_phrase = pgf_metrics_lzn_end_phrase2
|
.end_phrase = pgf_metrics_lzn_end_phrase2,
|
||||||
*************
|
.symbol_ne = pgf_metrics_symbol_ne,
|
||||||
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
|
.symbol_bind = pgf_metrics_symbol_bind
|
||||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
|
||||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
|
||||||
.end_phrase = pgf_metrics_lzn_end_phrase2,
|
|
||||||
.symbol_glue = NULL
|
|
||||||
^ ^ ^ ^ ^ ^ ^
|
|
||||||
};
|
};
|
||||||
*/
|
|
||||||
bool
|
bool
|
||||||
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
|
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
|
||||||
double *precision, double *recall, double *exact)
|
double *precision, double *recall, double *exact)
|
||||||
{
|
{
|
||||||
/* GuPool* pool = gu_new_pool();
|
GuPool* pool = gu_new_pool();
|
||||||
|
|
||||||
GuEnum* en_lins1 =
|
GuEnum* en_lins1 =
|
||||||
pgf_lzr_concretize(concr, expr, pool);
|
pgf_lzr_concretize(concr, expr, pool);
|
||||||
PgfCncTree ctree1 = gu_next(en_lins1, PgfCncTree, pool);
|
PgfCncTree ctree1 = gu_next(en_lins1, PgfCncTree, pool);
|
||||||
@@ -177,24 +179,33 @@ pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GuStringBuf* sbuf =
|
||||||
|
gu_string_buf(pool);
|
||||||
|
|
||||||
PgfMetricsLznState state;
|
PgfMetricsLznState state;
|
||||||
|
state.bind = true;
|
||||||
|
state.out = gu_string_buf_out(sbuf);
|
||||||
|
state.err = gu_new_exn(NULL, gu_kind(type), pool);
|
||||||
state.funcs = &pgf_metrics_lin_funcs1;
|
state.funcs = &pgf_metrics_lin_funcs1;
|
||||||
state.ps = pgf_parser_init_state(concr, cat, 0, -1, pool, pool);
|
|
||||||
state.marks = gu_new_buf(int, pool);
|
|
||||||
state.pos = 0;
|
state.pos = 0;
|
||||||
|
state.marks = gu_new_buf(int, pool);
|
||||||
state.phrases = gu_new_buf(PgfPhrase*, pool);
|
state.phrases = gu_new_buf(PgfPhrase*, pool);
|
||||||
state.matches = 0;
|
state.matches = 0;
|
||||||
state.found = 0;
|
state.found = 0;
|
||||||
state.pool = pool;
|
state.pool = pool;
|
||||||
|
|
||||||
pgf_lzr_linearize(concr, ctree1, 0, &state.funcs);
|
pgf_lzr_linearize(concr, ctree1, 0, &state.funcs);
|
||||||
|
if (!gu_ok(state.err)) {
|
||||||
if (state.ps == NULL) {
|
|
||||||
gu_pool_free(pool);
|
gu_pool_free(pool);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
GuEnum* en_trees = pgf_parse_result(state.ps);
|
GuString sentence =
|
||||||
|
gu_string_buf_freeze(sbuf, pool);
|
||||||
|
|
||||||
|
GuEnum* en_trees =
|
||||||
|
pgf_parse(concr, cat, sentence,
|
||||||
|
state.err, pool, pool);
|
||||||
PgfExprProb* ep = gu_next(en_trees, PgfExprProb*, pool);
|
PgfExprProb* ep = gu_next(en_trees, PgfExprProb*, pool);
|
||||||
if (ep == NULL) {
|
if (ep == NULL) {
|
||||||
gu_pool_free(pool);
|
gu_pool_free(pool);
|
||||||
@@ -210,13 +221,14 @@ pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
|
|||||||
}
|
}
|
||||||
|
|
||||||
state.funcs = &pgf_metrics_lin_funcs2;
|
state.funcs = &pgf_metrics_lin_funcs2;
|
||||||
state.ps = NULL;
|
state.bind = true;
|
||||||
state.pos = 0;
|
state.out = NULL;
|
||||||
|
state.pos = 0;
|
||||||
pgf_lzr_linearize(concr, ctree2, 0, &state.funcs);
|
pgf_lzr_linearize(concr, ctree2, 0, &state.funcs);
|
||||||
|
|
||||||
*precision = ((double) state.matches)/((double) state.found);
|
*precision = ((double) state.matches)/((double) state.found);
|
||||||
*recall = ((double) state.matches)/((double) gu_buf_length(state.phrases));
|
*recall = ((double) state.matches)/((double) gu_buf_length(state.phrases));
|
||||||
*exact = pgf_expr_eq(expr, ep->expr) ? 1 : 0;
|
*exact = pgf_expr_eq(expr, ep->expr) ? 1 : 0;
|
||||||
*/
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user