fix the parseval metric after the redesign in the C runtime

This commit is contained in:
kr.angelov
2013-11-01 17:36:22 +00:00
parent fda1416c4d
commit 523259b06d

View File

@@ -3,7 +3,7 @@
#include <pgf/linearizer.h> #include <pgf/linearizer.h>
#include <pgf/parser.h> #include <pgf/parser.h>
/*typedef struct { typedef struct {
int start, end; int start, end;
PgfCId cat; PgfCId cat;
int lin_idx; int lin_idx;
@@ -11,7 +11,9 @@
typedef struct { typedef struct {
PgfLinFuncs* funcs; PgfLinFuncs* funcs;
PgfParseState* ps; bool bind;
GuOut* out;
GuExn* err;
int pos; int pos;
GuBuf* marks; GuBuf* marks;
GuBuf* phrases; GuBuf* phrases;
@@ -19,15 +21,28 @@ typedef struct {
GuPool* pool; GuPool* pool;
} PgfMetricsLznState; } PgfMetricsLznState;
static void
pgf_metrics_put_space(PgfMetricsLznState* state)
{
if (state->bind)
state->bind = false;
else {
if (state->out != NULL)
gu_putc(' ', state->out, state->err);
state->pos++;
}
}
static void static void
pgf_metrics_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) pgf_metrics_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
{ {
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
if (state->ps != NULL)
state->ps = pgf_parser_next_state(state->ps, tok);
state->pos++; pgf_metrics_put_space(state);
if (state->out != NULL)
gu_string_write(tok, state->out, state->err);
state->pos += strlen(tok);
} }
static void static void
@@ -39,32 +54,27 @@ pgf_metrics_lzn_expr_literal(PgfLinFuncs** funcs, PgfLiteral lit)
switch (i.tag) { switch (i.tag) {
case PGF_LITERAL_STR: { case PGF_LITERAL_STR: {
PgfLiteralStr* lstr = i.data; PgfLiteralStr* lstr = i.data;
if (state->ps != NULL) { if (state->out != NULL)
state->ps = pgf_parser_next_state(state->ps, lstr->val); gu_string_write(lstr->val, state->out, state->err);
} state->pos += strlen(lstr->val);
state->pos++;
break; break;
} }
case PGF_LITERAL_INT: { case PGF_LITERAL_INT: {
PgfLiteralInt* lint = i.data; PgfLiteralInt* lint = i.data;
if (state->ps != NULL) { GuString tok =
GuString tok = gu_format_string(state->pool, "%d", lint->val);
gu_format_string(state->pool, "%d", lint->val); if (state->out != NULL)
gu_string_write(tok, state->out, state->err);
state->ps = pgf_parser_next_state(state->ps, tok); state->pos += strlen(tok);
}
state->pos++;
break; break;
} }
case PGF_LITERAL_FLT: { case PGF_LITERAL_FLT: {
PgfLiteralFlt* lflt = i.data; PgfLiteralFlt* lflt = i.data;
if (state->ps != NULL) { GuString tok =
GuString tok = gu_format_string(state->pool, "%f", lflt->val);
gu_format_string(state->pool, "%f", lflt->val); if (state->out != NULL)
gu_string_write(tok, state->out, state->err);
state->ps = pgf_parser_next_state(state->ps, tok); state->pos += strlen(tok);
}
state->pos++;
break; break;
} }
default: default:
@@ -83,7 +93,7 @@ static void
pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_idx, PgfCId fun) pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_idx, PgfCId fun)
{ {
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
int start = gu_buf_pop(state->marks, int); int start = gu_buf_pop(state->marks, int);
int end = state->pos; int end = state->pos;
@@ -97,15 +107,29 @@ pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_id
} }
} }
static void
pgf_metrics_symbol_ne(PgfLinFuncs** funcs)
{
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
gu_raise(state->err, PgfLinNonExist);
}
static void
pgf_metrics_symbol_bind(PgfLinFuncs** funcs)
{
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
state->bind = true;
}
static void static void
pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_idx, PgfCId fun) pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_idx, PgfCId fun)
{ {
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
int start = gu_buf_pop(state->marks, int); int start = gu_buf_pop(state->marks, int);
int end = state->pos; int end = state->pos;
if (start != end) { if (start != end) {
size_t n_phrases = gu_buf_length(state->phrases); size_t n_phrases = gu_buf_length(state->phrases);
for (size_t i = 0; i < n_phrases; i++) { for (size_t i = 0; i < n_phrases; i++) {
PgfPhrase* phrase = gu_buf_get(state->phrases, PgfPhrase*, i); PgfPhrase* phrase = gu_buf_get(state->phrases, PgfPhrase*, i);
@@ -124,51 +148,29 @@ pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_id
} }
static PgfLinFuncs pgf_metrics_lin_funcs1 = { static PgfLinFuncs pgf_metrics_lin_funcs1 = {
v v v v v v v
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
.expr_literal = pgf_metrics_lzn_expr_literal,
.begin_phrase = pgf_metrics_lzn_begin_phrase,
.end_phrase = pgf_metrics_lzn_end_phrase1
=============
.symbol_token = pgf_metrics_lzn_symbol_token, .symbol_token = pgf_metrics_lzn_symbol_token,
.expr_literal = pgf_metrics_lzn_expr_literal, .expr_literal = pgf_metrics_lzn_expr_literal,
.begin_phrase = pgf_metrics_lzn_begin_phrase, .begin_phrase = pgf_metrics_lzn_begin_phrase,
.end_phrase = pgf_metrics_lzn_end_phrase1 .end_phrase = pgf_metrics_lzn_end_phrase1,
************* .symbol_ne = pgf_metrics_symbol_ne,
.symbol_tokens = pgf_metrics_lzn_symbol_tokens, .symbol_bind = pgf_metrics_symbol_bind
.expr_literal = pgf_metrics_lzn_expr_literal,
.begin_phrase = pgf_metrics_lzn_begin_phrase,
.end_phrase = pgf_metrics_lzn_end_phrase1,
.symbol_glue = NULL
^ ^ ^ ^ ^ ^ ^
}; };
static PgfLinFuncs pgf_metrics_lin_funcs2 = { static PgfLinFuncs pgf_metrics_lin_funcs2 = {
v v v v v v v
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
.expr_literal = pgf_metrics_lzn_expr_literal,
.begin_phrase = pgf_metrics_lzn_begin_phrase,
.end_phrase = pgf_metrics_lzn_end_phrase2
=============
.symbol_token = pgf_metrics_lzn_symbol_token, .symbol_token = pgf_metrics_lzn_symbol_token,
.expr_literal = pgf_metrics_lzn_expr_literal, .expr_literal = pgf_metrics_lzn_expr_literal,
.begin_phrase = pgf_metrics_lzn_begin_phrase, .begin_phrase = pgf_metrics_lzn_begin_phrase,
.end_phrase = pgf_metrics_lzn_end_phrase2 .end_phrase = pgf_metrics_lzn_end_phrase2,
************* .symbol_ne = pgf_metrics_symbol_ne,
.symbol_tokens = pgf_metrics_lzn_symbol_tokens, .symbol_bind = pgf_metrics_symbol_bind
.expr_literal = pgf_metrics_lzn_expr_literal,
.begin_phrase = pgf_metrics_lzn_begin_phrase,
.end_phrase = pgf_metrics_lzn_end_phrase2,
.symbol_glue = NULL
^ ^ ^ ^ ^ ^ ^
}; };
*/
bool bool
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat, pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
double *precision, double *recall, double *exact) double *precision, double *recall, double *exact)
{ {
/* GuPool* pool = gu_new_pool(); GuPool* pool = gu_new_pool();
GuEnum* en_lins1 = GuEnum* en_lins1 =
pgf_lzr_concretize(concr, expr, pool); pgf_lzr_concretize(concr, expr, pool);
PgfCncTree ctree1 = gu_next(en_lins1, PgfCncTree, pool); PgfCncTree ctree1 = gu_next(en_lins1, PgfCncTree, pool);
@@ -177,24 +179,33 @@ pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
return false; return false;
} }
GuStringBuf* sbuf =
gu_string_buf(pool);
PgfMetricsLznState state; PgfMetricsLznState state;
state.bind = true;
state.out = gu_string_buf_out(sbuf);
state.err = gu_new_exn(NULL, gu_kind(type), pool);
state.funcs = &pgf_metrics_lin_funcs1; state.funcs = &pgf_metrics_lin_funcs1;
state.ps = pgf_parser_init_state(concr, cat, 0, -1, pool, pool);
state.marks = gu_new_buf(int, pool);
state.pos = 0; state.pos = 0;
state.marks = gu_new_buf(int, pool);
state.phrases = gu_new_buf(PgfPhrase*, pool); state.phrases = gu_new_buf(PgfPhrase*, pool);
state.matches = 0; state.matches = 0;
state.found = 0; state.found = 0;
state.pool = pool; state.pool = pool;
pgf_lzr_linearize(concr, ctree1, 0, &state.funcs); pgf_lzr_linearize(concr, ctree1, 0, &state.funcs);
if (!gu_ok(state.err)) {
if (state.ps == NULL) {
gu_pool_free(pool); gu_pool_free(pool);
return false; return false;
} }
GuEnum* en_trees = pgf_parse_result(state.ps); GuString sentence =
gu_string_buf_freeze(sbuf, pool);
GuEnum* en_trees =
pgf_parse(concr, cat, sentence,
state.err, pool, pool);
PgfExprProb* ep = gu_next(en_trees, PgfExprProb*, pool); PgfExprProb* ep = gu_next(en_trees, PgfExprProb*, pool);
if (ep == NULL) { if (ep == NULL) {
gu_pool_free(pool); gu_pool_free(pool);
@@ -210,13 +221,14 @@ pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
} }
state.funcs = &pgf_metrics_lin_funcs2; state.funcs = &pgf_metrics_lin_funcs2;
state.ps = NULL; state.bind = true;
state.pos = 0; state.out = NULL;
state.pos = 0;
pgf_lzr_linearize(concr, ctree2, 0, &state.funcs); pgf_lzr_linearize(concr, ctree2, 0, &state.funcs);
*precision = ((double) state.matches)/((double) state.found); *precision = ((double) state.matches)/((double) state.found);
*recall = ((double) state.matches)/((double) gu_buf_length(state.phrases)); *recall = ((double) state.matches)/((double) gu_buf_length(state.phrases));
*exact = pgf_expr_eq(expr, ep->expr) ? 1 : 0; *exact = pgf_expr_eq(expr, ep->expr) ? 1 : 0;
*/
return true; return true;
} }