From 561e478ed4a7a240408237c1e0aecef66d185fc9 Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Mon, 6 May 2013 15:28:04 +0000 Subject: [PATCH] the statistical parser is now using two memory pools: one for parsing and one for the output trees. This means that the memory for parsing can be released as soon as the needed abstract trees are retrieved, while the trees themselves are retained in the separate output pool --- src/runtime/c/pgf/literals.c | 5 ++-- src/runtime/c/pgf/parser.c | 32 +++++++++++---------- src/runtime/c/pgf/pgf.c | 5 ++-- src/runtime/c/pgf/pgf.h | 3 +- src/runtime/c/utils/pgf-parse.c | 2 +- src/runtime/c/utils/pgf-service.c | 2 +- src/runtime/c/utils/pgf-translate.c | 2 +- src/runtime/python/pypgf.c | 44 +++++++++++++++++++++-------- 8 files changed, 60 insertions(+), 35 deletions(-) diff --git a/src/runtime/c/pgf/literals.c b/src/runtime/c/pgf/literals.c index e6c9a9ed3..e1c9ad880 100644 --- a/src/runtime/c/pgf/literals.c +++ b/src/runtime/c/pgf/literals.c @@ -176,13 +176,14 @@ static bool pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok, PgfExprProb** out_ep, GuPool *pool) { + GuPool* tmp_pool = gu_new_pool(); + size_t lin_idx; PgfSequence seq; - pgf_item_sequence(item, &lin_idx, &seq, pool); + pgf_item_sequence(item, &lin_idx, &seq, tmp_pool); gu_assert(lin_idx == 0); - GuPool* tmp_pool = gu_new_pool(); GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool); GuString hyp = gu_str_string("-", tmp_pool); diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index ba75b965f..2d1432f20 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -44,7 +44,8 @@ typedef GuBuf PgfCCatBuf; typedef struct { PgfConcr* concr; - GuPool* pool; + GuPool* pool; // this pool is used for structures internal to the parser + GuPool* out_pool; // this pool is used for the allocating the final abstract trees GuBuf* expr_queue; PgfExpr meta_var; PgfProduction meta_prod; @@ -119,7 +120,6 @@ typedef struct { typedef struct PgfParseResult PgfParseResult; struct PgfParseResult { - GuPool* tmp_pool; PgfParseState* state; PgfExprEnum en; }; @@ -1496,7 +1496,7 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item) bool accepted = pext->callback->match(before->ps->concr, item, tok, - &ep, before->ps->pool); + &ep, before->ps->out_pool); if (ep != NULL) pgf_parsing_complete(before, after, item, ep); @@ -1643,6 +1643,7 @@ pgf_new_parsing(PgfConcr* concr, GuPool* pool) PgfParsing* ps = gu_new(PgfParsing, pool); ps->concr = concr; ps->pool = pool; + ps->out_pool = NULL; ps->expr_queue = gu_new_buf(PgfExprState*, pool); ps->max_fid = concr->total_cats; #ifdef PGF_COUNTS_DEBUG @@ -2011,9 +2012,10 @@ pgf_result_predict(PgfParsing* ps, PgfExprState* st = gu_new(PgfExprState, ps->pool); st->answers = cont->answers; st->ep.expr = - gu_new_variant_i(ps->pool, PGF_EXPR_APP, - PgfExprApp, - .fun = cont->ep.expr, .arg = ep->expr); + gu_new_variant_i(ps->out_pool, + PGF_EXPR_APP, PgfExprApp, + .fun = cont->ep.expr, + .arg = ep->expr); st->ep.prob = cont->ep.prob+ep->prob; st->args = cont->args; st->arg_idx = cont->arg_idx+1; @@ -2024,7 +2026,7 @@ pgf_result_predict(PgfParsing* ps, } static PgfExprProb* -pgf_parse_result_next(PgfParseResult* pr, GuPool* pool) +pgf_parse_result_next(PgfParseResult* pr) { for (;;) { while (pgf_parsing_proceed(pr->state)); @@ -2052,8 +2054,8 @@ pgf_parse_result_next(PgfParseResult* pr, GuPool* pool) if (ccat->fid < pr->state->ps->concr->total_cats) { st->ep.expr = - gu_new_variant_i(pool, PGF_EXPR_APP, - PgfExprApp, + gu_new_variant_i(pr->state->ps->out_pool, + PGF_EXPR_APP, PgfExprApp, .fun = st->ep.expr, .arg = pr->state->ps->meta_var); st->arg_idx++; @@ -2075,9 +2077,10 @@ pgf_parse_result_next(PgfParseResult* pr, GuPool* pool) PgfExprState* st3 = gu_new(PgfExprState, pr->state->ps->pool); st3->answers = st2->answers; st3->ep.expr = - gu_new_variant_i(pr->state->ps->pool, PGF_EXPR_APP, - PgfExprApp, - .fun = st2->ep.expr, .arg = st->ep.expr); + gu_new_variant_i(pr->state->ps->out_pool, + PGF_EXPR_APP, PgfExprApp, + .fun = st2->ep.expr, + .arg = st->ep.expr); st3->ep.prob = st2->ep.prob + st->ep.prob; st3->args = st2->args; st3->arg_idx = st2->arg_idx+1; @@ -2094,7 +2097,7 @@ static void pgf_parse_result_enum_next(GuEnum* self, void* to, GuPool* pool) { PgfParseResult* pr = gu_container(self, PgfParseResult, en); - *(PgfExprProb**)to = pgf_parse_result_next(pr, pool); + *(PgfExprProb**)to = pgf_parse_result_next(pr); } PgfExprEnum* @@ -2104,11 +2107,10 @@ pgf_parse_result(PgfParseState* state, GuPool* pool) pgf_parsing_print_counts(state->ps); #endif - GuPool* tmp_pool = gu_new_pool(); + state->ps->out_pool = pool; PgfExprEnum* en = &gu_new_i(pool, PgfParseResult, - .tmp_pool = tmp_pool, .state = state, .en.next = pgf_parse_result_enum_next)->en; diff --git a/src/runtime/c/pgf/pgf.c b/src/runtime/c/pgf/pgf.c index 24d330981..b355593d8 100644 --- a/src/runtime/c/pgf/pgf.c +++ b/src/runtime/c/pgf/pgf.c @@ -207,7 +207,8 @@ pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err) } GuEnum* -pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool) +pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, + GuPool* pool, GuPool* out_pool) { // Begin parsing a sentence of the specified category PgfParseState* state = @@ -233,7 +234,7 @@ pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool) return NULL; // Now begin enumerating the resulting syntax trees - return pgf_parse_result(state, pool); + return pgf_parse_result(state, out_pool); } GuEnum* diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index 39dc0dd04..d83598cc0 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -114,7 +114,8 @@ void pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err); PgfExprEnum* -pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool); +pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, + GuPool* pool, GuPool* out_pool); GuEnum* pgf_get_completions(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, diff --git a/src/runtime/c/utils/pgf-parse.c b/src/runtime/c/utils/pgf-parse.c index 648295312..32f979ce0 100644 --- a/src/runtime/c/utils/pgf-parse.c +++ b/src/runtime/c/utils/pgf-parse.c @@ -124,7 +124,7 @@ int main(int argc, char* argv[]) { GuReader *rdr = gu_string_reader(gu_str_string(line, ppool), ppool); PgfLexer *lexer = pgf_new_simple_lexer(rdr, ppool); - GuEnum* result = pgf_parse(concr, cat, lexer, ppool); + GuEnum* result = pgf_parse(concr, cat, lexer, ppool, ppool); PgfExprProb* ep = NULL; if (result != NULL) diff --git a/src/runtime/c/utils/pgf-service.c b/src/runtime/c/utils/pgf-service.c index 94070efee..33369b9f5 100644 --- a/src/runtime/c/utils/pgf-service.c +++ b/src/runtime/c/utils/pgf-service.c @@ -335,7 +335,7 @@ int main () pgf_new_simple_lexer(rdr, ppool); GuEnum* result = - pgf_parse(from_concr, cat, lexer, ppool); + pgf_parse(from_concr, cat, lexer, ppool, ppool); if (result == NULL) { FCGI_printf("Status: 500 Internal Server Error\r\n"); FCGI_printf("Content-type: text/plain\r\n" diff --git a/src/runtime/c/utils/pgf-translate.c b/src/runtime/c/utils/pgf-translate.c index e58b9d282..f53a847e3 100644 --- a/src/runtime/c/utils/pgf-translate.c +++ b/src/runtime/c/utils/pgf-translate.c @@ -169,7 +169,7 @@ int main(int argc, char* argv[]) { clock_t start = clock(); result = - pgf_parse(from_concr, cat, lexer, ppool); + pgf_parse(from_concr, cat, lexer, ppool, ppool); if (result == NULL) { PgfToken tok = pgf_lexer_current_token(lexer); diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c index f9428f96d..de5ead10f 100644 --- a/src/runtime/python/pypgf.c +++ b/src/runtime/python/pypgf.c @@ -435,6 +435,7 @@ Expr_getattro(ExprObject *self, PyObject *attr_name) { typedef struct IterObject { PyObject_HEAD PGFObject* grammar; + PyObject* container; GuPool* pool; int max_count; int counter; @@ -454,8 +455,8 @@ Iter_fetch_expr(IterObject* self) return NULL; pyexpr->pool = NULL; pyexpr->expr = ep->expr; - pyexpr->master = (PyObject*) self; - Py_INCREF(self); + pyexpr->master = self->container; + Py_INCREF(self->container); PyObject* res = Py_BuildValue("(f,O)", ep->prob, pyexpr); Py_DECREF(pyexpr); @@ -483,6 +484,7 @@ Iter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) IterObject* self = (IterObject *)type->tp_alloc(type, 0); if (self != NULL) { self->grammar = NULL; + self->container = NULL; self->pool = NULL; self->max_count = -1; self->counter = 0; @@ -499,6 +501,8 @@ Iter_dealloc(IterObject* self) gu_pool_free(self->pool); Py_XDECREF(self->grammar); + + Py_XDECREF(self->container); self->ob_type->tp_free((PyObject*)self); } @@ -661,6 +665,14 @@ pypgf_new_python_lexer(PyObject* pylexer, GuPool* pool) return ((PgfLexer*) lexer); } +#define PGF_CONTAINER_NAME "pgf.Container" + +void pypgf_container_descructor(PyObject *capsule) +{ + GuPool* pool = PyCapsule_GetPointer(capsule, PGF_CONTAINER_NAME); + gu_pool_free(pool); +} + static IterObject* Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds) { @@ -698,28 +710,35 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds) pyres->grammar = self->grammar; Py_XINCREF(pyres->grammar); - pyres->pool = gu_new_pool(); + GuPool* out_pool = gu_new_pool(); + + PyObject* py_pool = + PyCapsule_New(out_pool, PGF_CONTAINER_NAME, + pypgf_container_descructor); + pyres->container = PyTuple_Pack(2, pyres->grammar, py_pool); + Py_DECREF(py_pool); + + pyres->pool = gu_new_pool(); pyres->max_count = max_count; pyres->counter = 0; pyres->fetch = Iter_fetch_expr; - GuPool *tmp_pool = gu_local_pool(); GuString catname = - (catname_s == NULL) ? pgf_start_cat(self->grammar->pgf, tmp_pool) - : gu_str_string(catname_s, tmp_pool); + (catname_s == NULL) ? pgf_start_cat(self->grammar->pgf, pyres->pool) + : gu_str_string(catname_s, pyres->pool); PgfLexer *lexer = NULL; if (buf != NULL) { - GuIn* in = gu_data_in(buf, len, tmp_pool); - GuReader* rdr = gu_new_utf8_reader(in, tmp_pool); - lexer = pgf_new_simple_lexer(rdr, tmp_pool); + GuIn* in = gu_data_in(buf, len, pyres->pool); + GuReader* rdr = gu_new_utf8_reader(in, pyres->pool); + lexer = pgf_new_simple_lexer(rdr, pyres->pool); } if (py_lexer != NULL) { - lexer = pypgf_new_python_lexer(py_lexer, tmp_pool); + lexer = pypgf_new_python_lexer(py_lexer, pyres->pool); } pyres->res = - pgf_parse(self->concr, catname, lexer, pyres->pool); + pgf_parse(self->concr, catname, lexer, pyres->pool, out_pool); if (pyres->res == NULL) { Py_DECREF(pyres); @@ -740,7 +759,6 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds) } Py_XDECREF(py_lexer); - gu_pool_free(tmp_pool); return pyres; } @@ -784,6 +802,8 @@ Concr_getCompletions(ConcrObject* self, PyObject *args, PyObject *keywds) pyres->grammar = self->grammar; Py_XINCREF(pyres->grammar); + + pyres->container = NULL; pyres->pool = gu_new_pool(); pyres->max_count = max_count;