diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am index 4b22ba745..cfef382e6 100644 --- a/src/runtime/c/Makefile.am +++ b/src/runtime/c/Makefile.am @@ -104,6 +104,7 @@ libpgf_la_SOURCES = \ pgf/expr.h \ pgf/parser.c \ pgf/parser.h \ + pgf/parseval.c \ pgf/lexer.c \ pgf/lexer.h \ pgf/literals.c \ diff --git a/src/runtime/c/pgf/expr.c b/src/runtime/c/pgf/expr.c index 494708e8c..13093d14f 100644 --- a/src/runtime/c/pgf/expr.c +++ b/src/runtime/c/pgf/expr.c @@ -368,6 +368,79 @@ pgf_read_expr(GuReader* rdr, GuPool* pool, GuExn* err) return expr; } +bool +pgf_literal_eq(PgfLiteral lit1, PgfLiteral lit2) +{ + GuVariantInfo ei1 = gu_variant_open(lit1); + GuVariantInfo ei2 = gu_variant_open(lit2); + + if (ei1.tag != ei2.tag) + return false; + + switch (ei1.tag) { + case PGF_LITERAL_STR: { + PgfLiteralStr* lit1 = ei1.data; + PgfLiteralStr* lit2 = ei2.data; + return gu_string_eq(lit1->val, lit2->val); + } + case PGF_LITERAL_INT: { + PgfLiteralInt* lit1 = ei1.data; + PgfLiteralInt* lit2 = ei2.data; + return (lit1->val == lit2->val); + } + case PGF_LITERAL_FLT: { + PgfLiteralFlt* lit1 = ei1.data; + PgfLiteralFlt* lit2 = ei2.data; + return (lit1->val == lit2->val); + } + default: + gu_impossible(); + } + + return false; +} + +bool +pgf_expr_eq(PgfExpr e1, PgfExpr e2) +{ + GuVariantInfo ei1 = gu_variant_open(e1); + GuVariantInfo ei2 = gu_variant_open(e2); + + if (ei1.tag != ei2.tag) + return false; + + switch (ei1.tag) { + case PGF_EXPR_FUN: { + PgfExprFun* fun1 = ei1.data; + PgfExprFun* fun2 = ei2.data; + return gu_string_eq(fun1->fun, fun2->fun); + } + case PGF_EXPR_APP: { + PgfExprApp* app1 = ei1.data; + PgfExprApp* app2 = ei2.data; + return (pgf_expr_eq(app1->fun,app2->fun) && + pgf_expr_eq(app1->arg,app2->arg)); + } + case PGF_EXPR_LIT: { + PgfExprLit* lit1 = ei1.data; + PgfExprLit* lit2 = ei2.data; + return (pgf_literal_eq(lit1->lit,lit2->lit)); + } + case PGF_EXPR_META: + return true; + case PGF_EXPR_ABS: + case PGF_EXPR_VAR: + case PGF_EXPR_TYPED: + case PGF_EXPR_IMPL_ARG: + gu_impossible(); + break; + default: + gu_impossible(); + } + + return false; +} + void pgf_print_literal(PgfLiteral lit, GuWriter* wtr, GuExn* err) @@ -420,7 +493,6 @@ pgf_print_expr(PgfExpr expr, int prec, } break; } - case PGF_EXPR_ABS: case PGF_EXPR_LIT: { PgfExprLit* lit = ei.data; pgf_print_literal(lit->lit, wtr, err); @@ -429,6 +501,7 @@ pgf_print_expr(PgfExpr expr, int prec, case PGF_EXPR_META: gu_putc('?', wtr, err); break; + case PGF_EXPR_ABS: case PGF_EXPR_VAR: case PGF_EXPR_TYPED: case PGF_EXPR_IMPL_ARG: diff --git a/src/runtime/c/pgf/expr.h b/src/runtime/c/pgf/expr.h index 5828289f1..7bd2d218f 100644 --- a/src/runtime/c/pgf/expr.h +++ b/src/runtime/c/pgf/expr.h @@ -154,6 +154,12 @@ pgf_expr_unapply(PgfExpr expr, GuPool* pool); PgfExpr pgf_read_expr(GuReader* rdr, GuPool* pool, GuExn* err); +bool +pgf_literal_eq(PgfLiteral lit1, PgfLiteral lit2); + +bool +pgf_expr_eq(PgfExpr e1, PgfExpr e2); + void pgf_print_literal(PgfLiteral lit, GuWriter* wtr, GuExn* err); diff --git a/src/runtime/c/pgf/parseval.c b/src/runtime/c/pgf/parseval.c new file mode 100644 index 000000000..4f18573cf --- /dev/null +++ b/src/runtime/c/pgf/parseval.c @@ -0,0 +1,200 @@ +#include +#include +#include + +typedef struct { + int start, end; + PgfCId cat; + int lin_idx; +} PgfPhrase; + +typedef struct { + PgfLinFuncs* funcs; + PgfParseState* ps; + int pos; + GuBuf* marks; + GuBuf* phrases; + int found, matches; + GuPool* pool; +} PgfMetricsLznState; + +static void +pgf_metrics_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens toks) +{ + PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); + + size_t len = gu_seq_length(toks); + for (size_t i = 0; i < len; i++) { + PgfToken tok = gu_seq_get(toks, PgfToken, i); + + if (state->ps != NULL) + state->ps = pgf_parser_next_state(state->ps, tok, state->pool); + + state->pos++; + } +} + +static void +pgf_metrics_lzn_expr_literal(PgfLinFuncs** funcs, PgfLiteral lit) +{ + PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); + + GuVariantInfo i = gu_variant_open(lit); + switch (i.tag) { + case PGF_LITERAL_STR: { + PgfLiteralStr* lstr = i.data; + if (state->ps != NULL) { + state->ps = pgf_parser_next_state(state->ps, lstr->val, state->pool); + } + state->pos++; + break; + } + case PGF_LITERAL_INT: { + PgfLiteralInt* lint = i.data; + if (state->ps != NULL) { + GuString tok = + gu_format_string(state->pool, "%d", lint->val); + + state->ps = pgf_parser_next_state(state->ps, tok, state->pool); + } + state->pos++; + break; + } + case PGF_LITERAL_FLT: { + PgfLiteralFlt* lflt = i.data; + if (state->ps != NULL) { + GuString tok = + gu_format_string(state->pool, "%f", lflt->val); + + state->ps = pgf_parser_next_state(state->ps, tok, state->pool); + } + state->pos++; + break; + } + default: + gu_impossible(); + } +} + +static void +pgf_metrics_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_index, PgfCId fun) +{ + PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); + gu_buf_push(state->marks, int, state->pos); +} + +static void +pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_idx, PgfCId fun) +{ + PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); + + int start = gu_buf_pop(state->marks, int); + int end = state->pos; + + if (start != end) { + PgfPhrase* phrase = gu_new(PgfPhrase, state->pool); + phrase->start = start; + phrase->end = end; + phrase->cat = cat; + phrase->lin_idx = lin_idx; + gu_buf_push(state->phrases, PgfPhrase*, phrase); + } +} + +static void +pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_idx, PgfCId fun) +{ + PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); + + int start = gu_buf_pop(state->marks, int); + int end = state->pos; + + if (start != end) { + size_t n_phrases = gu_buf_length(state->phrases); + for (size_t i = 0; i < n_phrases; i++) { + PgfPhrase* phrase = gu_buf_get(state->phrases, PgfPhrase*, i); + + if (phrase->start == start && + phrase->end == end && + gu_string_eq(phrase->cat, cat) && + phrase->lin_idx == lin_idx) { + state->matches++; + break; + } + } + + state->found++; + } +} + +static PgfLinFuncs pgf_metrics_lin_funcs1 = { + .symbol_tokens = pgf_metrics_lzn_symbol_tokens, + .expr_literal = pgf_metrics_lzn_expr_literal, + .begin_phrase = pgf_metrics_lzn_begin_phrase, + .end_phrase = pgf_metrics_lzn_end_phrase1 +}; + +static PgfLinFuncs pgf_metrics_lin_funcs2 = { + .symbol_tokens = pgf_metrics_lzn_symbol_tokens, + .expr_literal = pgf_metrics_lzn_expr_literal, + .begin_phrase = pgf_metrics_lzn_begin_phrase, + .end_phrase = pgf_metrics_lzn_end_phrase2 +}; + +bool +pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat, + double *precision, double *recall, double *exact) +{ + GuPool* pool = gu_new_pool(); + + GuEnum* en_lins1 = + pgf_lzr_concretize(concr, expr, pool); + PgfCncTree ctree1 = gu_next(en_lins1, PgfCncTree, pool); + if (gu_variant_is_null(ctree1)) { + gu_pool_free(pool); + return false; + } + + PgfMetricsLznState state; + state.funcs = &pgf_metrics_lin_funcs1; + state.ps = pgf_parser_init_state(concr, cat, 0, pool); + state.marks = gu_new_buf(int, pool); + state.pos = 0; + state.phrases = gu_new_buf(PgfPhrase*, pool); + state.matches = 0; + state.found = 0; + state.pool = pool; + + pgf_lzr_linearize(concr, ctree1, 0, &state.funcs); + + if (state.ps == NULL) { + gu_pool_free(pool); + return false; + } + + GuEnum* en_trees = pgf_parse_result(state.ps, pool); + PgfExprProb* ep = gu_next(en_trees, PgfExprProb*, pool); + if (ep == NULL) { + gu_pool_free(pool); + return false; + } + + GuEnum* en_lins2 = + pgf_lzr_concretize(concr, ep->expr, pool); + PgfCncTree ctree2 = gu_next(en_lins2, PgfCncTree, pool); + if (gu_variant_is_null(ctree2)) { + gu_pool_free(pool); + return false; + } + + state.funcs = &pgf_metrics_lin_funcs2; + state.ps = NULL; + state.pos = 0; + pgf_lzr_linearize(concr, ctree2, 0, &state.funcs); + + *precision = ((double) state.matches)/((double) state.found); + *recall = ((double) state.matches)/((double) gu_buf_length(state.phrases)); + *exact = pgf_expr_eq(expr, ep->expr) ? 1 : 0; + + return true; +} diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index ef68e651f..03f1d4d48 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -116,6 +116,10 @@ pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err); PgfExprEnum* pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool); +bool +pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat, + double *precision, double *recall, double *exact); + PgfExprEnum* pgf_generate(PgfPGF* pgf, PgfCId cat, GuPool* pool); diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c index dd1c9d2ce..46c6a67be 100644 --- a/src/runtime/python/pypgf.c +++ b/src/runtime/python/pypgf.c @@ -135,6 +135,21 @@ Expr_repr(ExprObject *self) return pystr; } +PyObject * +Expr_richcompare(ExprObject *e1, ExprObject *e2, int op) +{ + bool cmp = pgf_expr_eq(e1->expr,e2->expr); + + if (op == Py_EQ) + return cmp ? Py_True : Py_False; + else if (op == Py_NE) + return cmp ? Py_False : Py_True; + else { + PyErr_SetString(PyExc_TypeError, "the operation is not supported"); + return NULL; + } +} + static PyMethodDef Expr_methods[] = { {"unpack", (PyCFunction)Expr_unpack, METH_VARARGS, "Decomposes an expression into its components" @@ -167,7 +182,7 @@ static PyTypeObject pgf_ExprType = { "abstract syntax tree", /*tp_doc*/ 0, /*tp_traverse */ 0, /*tp_clear */ - 0, /*tp_richcompare */ + (richcmpfunc) Expr_richcompare, /*tp_richcompare */ 0, /*tp_weaklistoffset */ 0, /*tp_iter */ 0, /*tp_iternext */ @@ -708,6 +723,30 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds) return pyres; } +static PyObject* +Concr_parseval(ConcrObject* self, PyObject *args) { + ExprObject* pyexpr = NULL; + const char* s_cat = NULL; + if (!PyArg_ParseTuple(args, "O!s", &pgf_ExprType, &pyexpr, &s_cat)) + return NULL; + + GuPool* tmp_pool = gu_local_pool(); + + PgfCId cat = gu_str_string(s_cat, tmp_pool); + + double precision = 0; + double recall = 0; + double exact = 0; + + if (!pgf_parseval(self->concr, pyexpr->expr, cat, + &precision, &recall, &exact)) + return NULL; + + gu_pool_free(tmp_pool); + + return Py_BuildValue("ddd", precision, recall, exact); +} + static PyObject* Concr_linearize(ConcrObject* self, PyObject *args) { @@ -1006,6 +1045,9 @@ static PyMethodDef Concr_methods[] = { {"parse", (PyCFunction)Concr_parse, METH_VARARGS | METH_KEYWORDS, "Parses a string and returns an iterator over the abstract trees for this sentence" }, + {"parseval", (PyCFunction)Concr_parseval, METH_VARARGS, + "Computes precision, recall and exact match for the parser on a given abstract tree" + }, {"linearize", (PyCFunction)Concr_linearize, METH_VARARGS, "Takes an abstract tree and linearizes it to a string" },