forked from GitHub/gf-core
the C runtime and the Python binding now have an API for parser evaluation. The API computes PARSEVAL and Exact Match for a given tree. As a side effect the abstract trees in Python are now compared for equality by value and not by reference
This commit is contained in:
@@ -104,6 +104,7 @@ libpgf_la_SOURCES = \
|
|||||||
pgf/expr.h \
|
pgf/expr.h \
|
||||||
pgf/parser.c \
|
pgf/parser.c \
|
||||||
pgf/parser.h \
|
pgf/parser.h \
|
||||||
|
pgf/parseval.c \
|
||||||
pgf/lexer.c \
|
pgf/lexer.c \
|
||||||
pgf/lexer.h \
|
pgf/lexer.h \
|
||||||
pgf/literals.c \
|
pgf/literals.c \
|
||||||
|
|||||||
@@ -368,6 +368,79 @@ pgf_read_expr(GuReader* rdr, GuPool* pool, GuExn* err)
|
|||||||
return expr;
|
return expr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
pgf_literal_eq(PgfLiteral lit1, PgfLiteral lit2)
|
||||||
|
{
|
||||||
|
GuVariantInfo ei1 = gu_variant_open(lit1);
|
||||||
|
GuVariantInfo ei2 = gu_variant_open(lit2);
|
||||||
|
|
||||||
|
if (ei1.tag != ei2.tag)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
switch (ei1.tag) {
|
||||||
|
case PGF_LITERAL_STR: {
|
||||||
|
PgfLiteralStr* lit1 = ei1.data;
|
||||||
|
PgfLiteralStr* lit2 = ei2.data;
|
||||||
|
return gu_string_eq(lit1->val, lit2->val);
|
||||||
|
}
|
||||||
|
case PGF_LITERAL_INT: {
|
||||||
|
PgfLiteralInt* lit1 = ei1.data;
|
||||||
|
PgfLiteralInt* lit2 = ei2.data;
|
||||||
|
return (lit1->val == lit2->val);
|
||||||
|
}
|
||||||
|
case PGF_LITERAL_FLT: {
|
||||||
|
PgfLiteralFlt* lit1 = ei1.data;
|
||||||
|
PgfLiteralFlt* lit2 = ei2.data;
|
||||||
|
return (lit1->val == lit2->val);
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
gu_impossible();
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
pgf_expr_eq(PgfExpr e1, PgfExpr e2)
|
||||||
|
{
|
||||||
|
GuVariantInfo ei1 = gu_variant_open(e1);
|
||||||
|
GuVariantInfo ei2 = gu_variant_open(e2);
|
||||||
|
|
||||||
|
if (ei1.tag != ei2.tag)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
switch (ei1.tag) {
|
||||||
|
case PGF_EXPR_FUN: {
|
||||||
|
PgfExprFun* fun1 = ei1.data;
|
||||||
|
PgfExprFun* fun2 = ei2.data;
|
||||||
|
return gu_string_eq(fun1->fun, fun2->fun);
|
||||||
|
}
|
||||||
|
case PGF_EXPR_APP: {
|
||||||
|
PgfExprApp* app1 = ei1.data;
|
||||||
|
PgfExprApp* app2 = ei2.data;
|
||||||
|
return (pgf_expr_eq(app1->fun,app2->fun) &&
|
||||||
|
pgf_expr_eq(app1->arg,app2->arg));
|
||||||
|
}
|
||||||
|
case PGF_EXPR_LIT: {
|
||||||
|
PgfExprLit* lit1 = ei1.data;
|
||||||
|
PgfExprLit* lit2 = ei2.data;
|
||||||
|
return (pgf_literal_eq(lit1->lit,lit2->lit));
|
||||||
|
}
|
||||||
|
case PGF_EXPR_META:
|
||||||
|
return true;
|
||||||
|
case PGF_EXPR_ABS:
|
||||||
|
case PGF_EXPR_VAR:
|
||||||
|
case PGF_EXPR_TYPED:
|
||||||
|
case PGF_EXPR_IMPL_ARG:
|
||||||
|
gu_impossible();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
gu_impossible();
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
pgf_print_literal(PgfLiteral lit,
|
pgf_print_literal(PgfLiteral lit,
|
||||||
GuWriter* wtr, GuExn* err)
|
GuWriter* wtr, GuExn* err)
|
||||||
@@ -420,7 +493,6 @@ pgf_print_expr(PgfExpr expr, int prec,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PGF_EXPR_ABS:
|
|
||||||
case PGF_EXPR_LIT: {
|
case PGF_EXPR_LIT: {
|
||||||
PgfExprLit* lit = ei.data;
|
PgfExprLit* lit = ei.data;
|
||||||
pgf_print_literal(lit->lit, wtr, err);
|
pgf_print_literal(lit->lit, wtr, err);
|
||||||
@@ -429,6 +501,7 @@ pgf_print_expr(PgfExpr expr, int prec,
|
|||||||
case PGF_EXPR_META:
|
case PGF_EXPR_META:
|
||||||
gu_putc('?', wtr, err);
|
gu_putc('?', wtr, err);
|
||||||
break;
|
break;
|
||||||
|
case PGF_EXPR_ABS:
|
||||||
case PGF_EXPR_VAR:
|
case PGF_EXPR_VAR:
|
||||||
case PGF_EXPR_TYPED:
|
case PGF_EXPR_TYPED:
|
||||||
case PGF_EXPR_IMPL_ARG:
|
case PGF_EXPR_IMPL_ARG:
|
||||||
|
|||||||
@@ -154,6 +154,12 @@ pgf_expr_unapply(PgfExpr expr, GuPool* pool);
|
|||||||
PgfExpr
|
PgfExpr
|
||||||
pgf_read_expr(GuReader* rdr, GuPool* pool, GuExn* err);
|
pgf_read_expr(GuReader* rdr, GuPool* pool, GuExn* err);
|
||||||
|
|
||||||
|
bool
|
||||||
|
pgf_literal_eq(PgfLiteral lit1, PgfLiteral lit2);
|
||||||
|
|
||||||
|
bool
|
||||||
|
pgf_expr_eq(PgfExpr e1, PgfExpr e2);
|
||||||
|
|
||||||
void
|
void
|
||||||
pgf_print_literal(PgfLiteral lit, GuWriter* wtr, GuExn* err);
|
pgf_print_literal(PgfLiteral lit, GuWriter* wtr, GuExn* err);
|
||||||
|
|
||||||
|
|||||||
200
src/runtime/c/pgf/parseval.c
Normal file
200
src/runtime/c/pgf/parseval.c
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
#include <pgf/pgf.h>
|
||||||
|
#include <pgf/linearizer.h>
|
||||||
|
#include <pgf/parser.h>
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int start, end;
|
||||||
|
PgfCId cat;
|
||||||
|
int lin_idx;
|
||||||
|
} PgfPhrase;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
PgfLinFuncs* funcs;
|
||||||
|
PgfParseState* ps;
|
||||||
|
int pos;
|
||||||
|
GuBuf* marks;
|
||||||
|
GuBuf* phrases;
|
||||||
|
int found, matches;
|
||||||
|
GuPool* pool;
|
||||||
|
} PgfMetricsLznState;
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_metrics_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens toks)
|
||||||
|
{
|
||||||
|
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
||||||
|
|
||||||
|
size_t len = gu_seq_length(toks);
|
||||||
|
for (size_t i = 0; i < len; i++) {
|
||||||
|
PgfToken tok = gu_seq_get(toks, PgfToken, i);
|
||||||
|
|
||||||
|
if (state->ps != NULL)
|
||||||
|
state->ps = pgf_parser_next_state(state->ps, tok, state->pool);
|
||||||
|
|
||||||
|
state->pos++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_metrics_lzn_expr_literal(PgfLinFuncs** funcs, PgfLiteral lit)
|
||||||
|
{
|
||||||
|
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
||||||
|
|
||||||
|
GuVariantInfo i = gu_variant_open(lit);
|
||||||
|
switch (i.tag) {
|
||||||
|
case PGF_LITERAL_STR: {
|
||||||
|
PgfLiteralStr* lstr = i.data;
|
||||||
|
if (state->ps != NULL) {
|
||||||
|
state->ps = pgf_parser_next_state(state->ps, lstr->val, state->pool);
|
||||||
|
}
|
||||||
|
state->pos++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PGF_LITERAL_INT: {
|
||||||
|
PgfLiteralInt* lint = i.data;
|
||||||
|
if (state->ps != NULL) {
|
||||||
|
GuString tok =
|
||||||
|
gu_format_string(state->pool, "%d", lint->val);
|
||||||
|
|
||||||
|
state->ps = pgf_parser_next_state(state->ps, tok, state->pool);
|
||||||
|
}
|
||||||
|
state->pos++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PGF_LITERAL_FLT: {
|
||||||
|
PgfLiteralFlt* lflt = i.data;
|
||||||
|
if (state->ps != NULL) {
|
||||||
|
GuString tok =
|
||||||
|
gu_format_string(state->pool, "%f", lflt->val);
|
||||||
|
|
||||||
|
state->ps = pgf_parser_next_state(state->ps, tok, state->pool);
|
||||||
|
}
|
||||||
|
state->pos++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
gu_impossible();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_metrics_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_index, PgfCId fun)
|
||||||
|
{
|
||||||
|
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
||||||
|
gu_buf_push(state->marks, int, state->pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_idx, PgfCId fun)
|
||||||
|
{
|
||||||
|
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
||||||
|
|
||||||
|
int start = gu_buf_pop(state->marks, int);
|
||||||
|
int end = state->pos;
|
||||||
|
|
||||||
|
if (start != end) {
|
||||||
|
PgfPhrase* phrase = gu_new(PgfPhrase, state->pool);
|
||||||
|
phrase->start = start;
|
||||||
|
phrase->end = end;
|
||||||
|
phrase->cat = cat;
|
||||||
|
phrase->lin_idx = lin_idx;
|
||||||
|
gu_buf_push(state->phrases, PgfPhrase*, phrase);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_idx, PgfCId fun)
|
||||||
|
{
|
||||||
|
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
||||||
|
|
||||||
|
int start = gu_buf_pop(state->marks, int);
|
||||||
|
int end = state->pos;
|
||||||
|
|
||||||
|
if (start != end) {
|
||||||
|
size_t n_phrases = gu_buf_length(state->phrases);
|
||||||
|
for (size_t i = 0; i < n_phrases; i++) {
|
||||||
|
PgfPhrase* phrase = gu_buf_get(state->phrases, PgfPhrase*, i);
|
||||||
|
|
||||||
|
if (phrase->start == start &&
|
||||||
|
phrase->end == end &&
|
||||||
|
gu_string_eq(phrase->cat, cat) &&
|
||||||
|
phrase->lin_idx == lin_idx) {
|
||||||
|
state->matches++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
state->found++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static PgfLinFuncs pgf_metrics_lin_funcs1 = {
|
||||||
|
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
|
||||||
|
.expr_literal = pgf_metrics_lzn_expr_literal,
|
||||||
|
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
||||||
|
.end_phrase = pgf_metrics_lzn_end_phrase1
|
||||||
|
};
|
||||||
|
|
||||||
|
static PgfLinFuncs pgf_metrics_lin_funcs2 = {
|
||||||
|
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
|
||||||
|
.expr_literal = pgf_metrics_lzn_expr_literal,
|
||||||
|
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
||||||
|
.end_phrase = pgf_metrics_lzn_end_phrase2
|
||||||
|
};
|
||||||
|
|
||||||
|
bool
|
||||||
|
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
|
||||||
|
double *precision, double *recall, double *exact)
|
||||||
|
{
|
||||||
|
GuPool* pool = gu_new_pool();
|
||||||
|
|
||||||
|
GuEnum* en_lins1 =
|
||||||
|
pgf_lzr_concretize(concr, expr, pool);
|
||||||
|
PgfCncTree ctree1 = gu_next(en_lins1, PgfCncTree, pool);
|
||||||
|
if (gu_variant_is_null(ctree1)) {
|
||||||
|
gu_pool_free(pool);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
PgfMetricsLznState state;
|
||||||
|
state.funcs = &pgf_metrics_lin_funcs1;
|
||||||
|
state.ps = pgf_parser_init_state(concr, cat, 0, pool);
|
||||||
|
state.marks = gu_new_buf(int, pool);
|
||||||
|
state.pos = 0;
|
||||||
|
state.phrases = gu_new_buf(PgfPhrase*, pool);
|
||||||
|
state.matches = 0;
|
||||||
|
state.found = 0;
|
||||||
|
state.pool = pool;
|
||||||
|
|
||||||
|
pgf_lzr_linearize(concr, ctree1, 0, &state.funcs);
|
||||||
|
|
||||||
|
if (state.ps == NULL) {
|
||||||
|
gu_pool_free(pool);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
GuEnum* en_trees = pgf_parse_result(state.ps, pool);
|
||||||
|
PgfExprProb* ep = gu_next(en_trees, PgfExprProb*, pool);
|
||||||
|
if (ep == NULL) {
|
||||||
|
gu_pool_free(pool);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
GuEnum* en_lins2 =
|
||||||
|
pgf_lzr_concretize(concr, ep->expr, pool);
|
||||||
|
PgfCncTree ctree2 = gu_next(en_lins2, PgfCncTree, pool);
|
||||||
|
if (gu_variant_is_null(ctree2)) {
|
||||||
|
gu_pool_free(pool);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
state.funcs = &pgf_metrics_lin_funcs2;
|
||||||
|
state.ps = NULL;
|
||||||
|
state.pos = 0;
|
||||||
|
pgf_lzr_linearize(concr, ctree2, 0, &state.funcs);
|
||||||
|
|
||||||
|
*precision = ((double) state.matches)/((double) state.found);
|
||||||
|
*recall = ((double) state.matches)/((double) gu_buf_length(state.phrases));
|
||||||
|
*exact = pgf_expr_eq(expr, ep->expr) ? 1 : 0;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
@@ -116,6 +116,10 @@ pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
|
|||||||
PgfExprEnum*
|
PgfExprEnum*
|
||||||
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
|
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
|
||||||
|
|
||||||
|
bool
|
||||||
|
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
|
||||||
|
double *precision, double *recall, double *exact);
|
||||||
|
|
||||||
PgfExprEnum*
|
PgfExprEnum*
|
||||||
pgf_generate(PgfPGF* pgf, PgfCId cat, GuPool* pool);
|
pgf_generate(PgfPGF* pgf, PgfCId cat, GuPool* pool);
|
||||||
|
|
||||||
|
|||||||
@@ -135,6 +135,21 @@ Expr_repr(ExprObject *self)
|
|||||||
return pystr;
|
return pystr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyObject *
|
||||||
|
Expr_richcompare(ExprObject *e1, ExprObject *e2, int op)
|
||||||
|
{
|
||||||
|
bool cmp = pgf_expr_eq(e1->expr,e2->expr);
|
||||||
|
|
||||||
|
if (op == Py_EQ)
|
||||||
|
return cmp ? Py_True : Py_False;
|
||||||
|
else if (op == Py_NE)
|
||||||
|
return cmp ? Py_False : Py_True;
|
||||||
|
else {
|
||||||
|
PyErr_SetString(PyExc_TypeError, "the operation is not supported");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static PyMethodDef Expr_methods[] = {
|
static PyMethodDef Expr_methods[] = {
|
||||||
{"unpack", (PyCFunction)Expr_unpack, METH_VARARGS,
|
{"unpack", (PyCFunction)Expr_unpack, METH_VARARGS,
|
||||||
"Decomposes an expression into its components"
|
"Decomposes an expression into its components"
|
||||||
@@ -167,7 +182,7 @@ static PyTypeObject pgf_ExprType = {
|
|||||||
"abstract syntax tree", /*tp_doc*/
|
"abstract syntax tree", /*tp_doc*/
|
||||||
0, /*tp_traverse */
|
0, /*tp_traverse */
|
||||||
0, /*tp_clear */
|
0, /*tp_clear */
|
||||||
0, /*tp_richcompare */
|
(richcmpfunc) Expr_richcompare, /*tp_richcompare */
|
||||||
0, /*tp_weaklistoffset */
|
0, /*tp_weaklistoffset */
|
||||||
0, /*tp_iter */
|
0, /*tp_iter */
|
||||||
0, /*tp_iternext */
|
0, /*tp_iternext */
|
||||||
@@ -708,6 +723,30 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
|||||||
return pyres;
|
return pyres;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
Concr_parseval(ConcrObject* self, PyObject *args) {
|
||||||
|
ExprObject* pyexpr = NULL;
|
||||||
|
const char* s_cat = NULL;
|
||||||
|
if (!PyArg_ParseTuple(args, "O!s", &pgf_ExprType, &pyexpr, &s_cat))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
GuPool* tmp_pool = gu_local_pool();
|
||||||
|
|
||||||
|
PgfCId cat = gu_str_string(s_cat, tmp_pool);
|
||||||
|
|
||||||
|
double precision = 0;
|
||||||
|
double recall = 0;
|
||||||
|
double exact = 0;
|
||||||
|
|
||||||
|
if (!pgf_parseval(self->concr, pyexpr->expr, cat,
|
||||||
|
&precision, &recall, &exact))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
gu_pool_free(tmp_pool);
|
||||||
|
|
||||||
|
return Py_BuildValue("ddd", precision, recall, exact);
|
||||||
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
Concr_linearize(ConcrObject* self, PyObject *args)
|
Concr_linearize(ConcrObject* self, PyObject *args)
|
||||||
{
|
{
|
||||||
@@ -1006,6 +1045,9 @@ static PyMethodDef Concr_methods[] = {
|
|||||||
{"parse", (PyCFunction)Concr_parse, METH_VARARGS | METH_KEYWORDS,
|
{"parse", (PyCFunction)Concr_parse, METH_VARARGS | METH_KEYWORDS,
|
||||||
"Parses a string and returns an iterator over the abstract trees for this sentence"
|
"Parses a string and returns an iterator over the abstract trees for this sentence"
|
||||||
},
|
},
|
||||||
|
{"parseval", (PyCFunction)Concr_parseval, METH_VARARGS,
|
||||||
|
"Computes precision, recall and exact match for the parser on a given abstract tree"
|
||||||
|
},
|
||||||
{"linearize", (PyCFunction)Concr_linearize, METH_VARARGS,
|
{"linearize", (PyCFunction)Concr_linearize, METH_VARARGS,
|
||||||
"Takes an abstract tree and linearizes it to a string"
|
"Takes an abstract tree and linearizes it to a string"
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user