the statistical parser is now using two memory pools: one for parsing and one for the output trees. This means that the memory for parsing can be released as soon as the needed abstract trees are retrieved, while the trees themselves are retained in the separate output pool

This commit is contained in:
kr.angelov
2013-05-06 15:28:04 +00:00
parent 3e22349d2b
commit 561e478ed4
8 changed files with 60 additions and 35 deletions

View File

@@ -176,13 +176,14 @@ static bool
pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok, pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
PgfExprProb** out_ep, GuPool *pool) PgfExprProb** out_ep, GuPool *pool)
{ {
GuPool* tmp_pool = gu_new_pool();
size_t lin_idx; size_t lin_idx;
PgfSequence seq; PgfSequence seq;
pgf_item_sequence(item, &lin_idx, &seq, pool); pgf_item_sequence(item, &lin_idx, &seq, tmp_pool);
gu_assert(lin_idx == 0); gu_assert(lin_idx == 0);
GuPool* tmp_pool = gu_new_pool();
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool); GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
GuString hyp = gu_str_string("-", tmp_pool); GuString hyp = gu_str_string("-", tmp_pool);

View File

@@ -44,7 +44,8 @@ typedef GuBuf PgfCCatBuf;
typedef struct { typedef struct {
PgfConcr* concr; PgfConcr* concr;
GuPool* pool; GuPool* pool; // this pool is used for structures internal to the parser
GuPool* out_pool; // this pool is used for the allocating the final abstract trees
GuBuf* expr_queue; GuBuf* expr_queue;
PgfExpr meta_var; PgfExpr meta_var;
PgfProduction meta_prod; PgfProduction meta_prod;
@@ -119,7 +120,6 @@ typedef struct {
typedef struct PgfParseResult PgfParseResult; typedef struct PgfParseResult PgfParseResult;
struct PgfParseResult { struct PgfParseResult {
GuPool* tmp_pool;
PgfParseState* state; PgfParseState* state;
PgfExprEnum en; PgfExprEnum en;
}; };
@@ -1496,7 +1496,7 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
bool accepted = bool accepted =
pext->callback->match(before->ps->concr, item, pext->callback->match(before->ps->concr, item,
tok, tok,
&ep, before->ps->pool); &ep, before->ps->out_pool);
if (ep != NULL) if (ep != NULL)
pgf_parsing_complete(before, after, item, ep); pgf_parsing_complete(before, after, item, ep);
@@ -1643,6 +1643,7 @@ pgf_new_parsing(PgfConcr* concr, GuPool* pool)
PgfParsing* ps = gu_new(PgfParsing, pool); PgfParsing* ps = gu_new(PgfParsing, pool);
ps->concr = concr; ps->concr = concr;
ps->pool = pool; ps->pool = pool;
ps->out_pool = NULL;
ps->expr_queue = gu_new_buf(PgfExprState*, pool); ps->expr_queue = gu_new_buf(PgfExprState*, pool);
ps->max_fid = concr->total_cats; ps->max_fid = concr->total_cats;
#ifdef PGF_COUNTS_DEBUG #ifdef PGF_COUNTS_DEBUG
@@ -2011,9 +2012,10 @@ pgf_result_predict(PgfParsing* ps,
PgfExprState* st = gu_new(PgfExprState, ps->pool); PgfExprState* st = gu_new(PgfExprState, ps->pool);
st->answers = cont->answers; st->answers = cont->answers;
st->ep.expr = st->ep.expr =
gu_new_variant_i(ps->pool, PGF_EXPR_APP, gu_new_variant_i(ps->out_pool,
PgfExprApp, PGF_EXPR_APP, PgfExprApp,
.fun = cont->ep.expr, .arg = ep->expr); .fun = cont->ep.expr,
.arg = ep->expr);
st->ep.prob = cont->ep.prob+ep->prob; st->ep.prob = cont->ep.prob+ep->prob;
st->args = cont->args; st->args = cont->args;
st->arg_idx = cont->arg_idx+1; st->arg_idx = cont->arg_idx+1;
@@ -2024,7 +2026,7 @@ pgf_result_predict(PgfParsing* ps,
} }
static PgfExprProb* static PgfExprProb*
pgf_parse_result_next(PgfParseResult* pr, GuPool* pool) pgf_parse_result_next(PgfParseResult* pr)
{ {
for (;;) { for (;;) {
while (pgf_parsing_proceed(pr->state)); while (pgf_parsing_proceed(pr->state));
@@ -2052,8 +2054,8 @@ pgf_parse_result_next(PgfParseResult* pr, GuPool* pool)
if (ccat->fid < pr->state->ps->concr->total_cats) { if (ccat->fid < pr->state->ps->concr->total_cats) {
st->ep.expr = st->ep.expr =
gu_new_variant_i(pool, PGF_EXPR_APP, gu_new_variant_i(pr->state->ps->out_pool,
PgfExprApp, PGF_EXPR_APP, PgfExprApp,
.fun = st->ep.expr, .fun = st->ep.expr,
.arg = pr->state->ps->meta_var); .arg = pr->state->ps->meta_var);
st->arg_idx++; st->arg_idx++;
@@ -2075,9 +2077,10 @@ pgf_parse_result_next(PgfParseResult* pr, GuPool* pool)
PgfExprState* st3 = gu_new(PgfExprState, pr->state->ps->pool); PgfExprState* st3 = gu_new(PgfExprState, pr->state->ps->pool);
st3->answers = st2->answers; st3->answers = st2->answers;
st3->ep.expr = st3->ep.expr =
gu_new_variant_i(pr->state->ps->pool, PGF_EXPR_APP, gu_new_variant_i(pr->state->ps->out_pool,
PgfExprApp, PGF_EXPR_APP, PgfExprApp,
.fun = st2->ep.expr, .arg = st->ep.expr); .fun = st2->ep.expr,
.arg = st->ep.expr);
st3->ep.prob = st2->ep.prob + st->ep.prob; st3->ep.prob = st2->ep.prob + st->ep.prob;
st3->args = st2->args; st3->args = st2->args;
st3->arg_idx = st2->arg_idx+1; st3->arg_idx = st2->arg_idx+1;
@@ -2094,7 +2097,7 @@ static void
pgf_parse_result_enum_next(GuEnum* self, void* to, GuPool* pool) pgf_parse_result_enum_next(GuEnum* self, void* to, GuPool* pool)
{ {
PgfParseResult* pr = gu_container(self, PgfParseResult, en); PgfParseResult* pr = gu_container(self, PgfParseResult, en);
*(PgfExprProb**)to = pgf_parse_result_next(pr, pool); *(PgfExprProb**)to = pgf_parse_result_next(pr);
} }
PgfExprEnum* PgfExprEnum*
@@ -2104,11 +2107,10 @@ pgf_parse_result(PgfParseState* state, GuPool* pool)
pgf_parsing_print_counts(state->ps); pgf_parsing_print_counts(state->ps);
#endif #endif
GuPool* tmp_pool = gu_new_pool(); state->ps->out_pool = pool;
PgfExprEnum* en = PgfExprEnum* en =
&gu_new_i(pool, PgfParseResult, &gu_new_i(pool, PgfParseResult,
.tmp_pool = tmp_pool,
.state = state, .state = state,
.en.next = pgf_parse_result_enum_next)->en; .en.next = pgf_parse_result_enum_next)->en;

View File

@@ -207,7 +207,8 @@ pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err)
} }
GuEnum* GuEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool) pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
GuPool* pool, GuPool* out_pool)
{ {
// Begin parsing a sentence of the specified category // Begin parsing a sentence of the specified category
PgfParseState* state = PgfParseState* state =
@@ -233,7 +234,7 @@ pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
return NULL; return NULL;
// Now begin enumerating the resulting syntax trees // Now begin enumerating the resulting syntax trees
return pgf_parse_result(state, pool); return pgf_parse_result(state, out_pool);
} }
GuEnum* GuEnum*

View File

@@ -114,7 +114,8 @@ void
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err); pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
PgfExprEnum* PgfExprEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool); pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
GuPool* pool, GuPool* out_pool);
GuEnum* GuEnum*
pgf_get_completions(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, pgf_get_completions(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,

View File

@@ -124,7 +124,7 @@ int main(int argc, char* argv[]) {
GuReader *rdr = gu_string_reader(gu_str_string(line, ppool), ppool); GuReader *rdr = gu_string_reader(gu_str_string(line, ppool), ppool);
PgfLexer *lexer = pgf_new_simple_lexer(rdr, ppool); PgfLexer *lexer = pgf_new_simple_lexer(rdr, ppool);
GuEnum* result = pgf_parse(concr, cat, lexer, ppool); GuEnum* result = pgf_parse(concr, cat, lexer, ppool, ppool);
PgfExprProb* ep = NULL; PgfExprProb* ep = NULL;
if (result != NULL) if (result != NULL)

View File

@@ -335,7 +335,7 @@ int main ()
pgf_new_simple_lexer(rdr, ppool); pgf_new_simple_lexer(rdr, ppool);
GuEnum* result = GuEnum* result =
pgf_parse(from_concr, cat, lexer, ppool); pgf_parse(from_concr, cat, lexer, ppool, ppool);
if (result == NULL) { if (result == NULL) {
FCGI_printf("Status: 500 Internal Server Error\r\n"); FCGI_printf("Status: 500 Internal Server Error\r\n");
FCGI_printf("Content-type: text/plain\r\n" FCGI_printf("Content-type: text/plain\r\n"

View File

@@ -169,7 +169,7 @@ int main(int argc, char* argv[]) {
clock_t start = clock(); clock_t start = clock();
result = result =
pgf_parse(from_concr, cat, lexer, ppool); pgf_parse(from_concr, cat, lexer, ppool, ppool);
if (result == NULL) { if (result == NULL) {
PgfToken tok = PgfToken tok =
pgf_lexer_current_token(lexer); pgf_lexer_current_token(lexer);

View File

@@ -435,6 +435,7 @@ Expr_getattro(ExprObject *self, PyObject *attr_name) {
typedef struct IterObject { typedef struct IterObject {
PyObject_HEAD PyObject_HEAD
PGFObject* grammar; PGFObject* grammar;
PyObject* container;
GuPool* pool; GuPool* pool;
int max_count; int max_count;
int counter; int counter;
@@ -454,8 +455,8 @@ Iter_fetch_expr(IterObject* self)
return NULL; return NULL;
pyexpr->pool = NULL; pyexpr->pool = NULL;
pyexpr->expr = ep->expr; pyexpr->expr = ep->expr;
pyexpr->master = (PyObject*) self; pyexpr->master = self->container;
Py_INCREF(self); Py_INCREF(self->container);
PyObject* res = Py_BuildValue("(f,O)", ep->prob, pyexpr); PyObject* res = Py_BuildValue("(f,O)", ep->prob, pyexpr);
Py_DECREF(pyexpr); Py_DECREF(pyexpr);
@@ -483,6 +484,7 @@ Iter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
IterObject* self = (IterObject *)type->tp_alloc(type, 0); IterObject* self = (IterObject *)type->tp_alloc(type, 0);
if (self != NULL) { if (self != NULL) {
self->grammar = NULL; self->grammar = NULL;
self->container = NULL;
self->pool = NULL; self->pool = NULL;
self->max_count = -1; self->max_count = -1;
self->counter = 0; self->counter = 0;
@@ -499,6 +501,8 @@ Iter_dealloc(IterObject* self)
gu_pool_free(self->pool); gu_pool_free(self->pool);
Py_XDECREF(self->grammar); Py_XDECREF(self->grammar);
Py_XDECREF(self->container);
self->ob_type->tp_free((PyObject*)self); self->ob_type->tp_free((PyObject*)self);
} }
@@ -661,6 +665,14 @@ pypgf_new_python_lexer(PyObject* pylexer, GuPool* pool)
return ((PgfLexer*) lexer); return ((PgfLexer*) lexer);
} }
#define PGF_CONTAINER_NAME "pgf.Container"
void pypgf_container_descructor(PyObject *capsule)
{
GuPool* pool = PyCapsule_GetPointer(capsule, PGF_CONTAINER_NAME);
gu_pool_free(pool);
}
static IterObject* static IterObject*
Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds) Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
{ {
@@ -698,28 +710,35 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
pyres->grammar = self->grammar; pyres->grammar = self->grammar;
Py_XINCREF(pyres->grammar); Py_XINCREF(pyres->grammar);
pyres->pool = gu_new_pool(); GuPool* out_pool = gu_new_pool();
PyObject* py_pool =
PyCapsule_New(out_pool, PGF_CONTAINER_NAME,
pypgf_container_descructor);
pyres->container = PyTuple_Pack(2, pyres->grammar, py_pool);
Py_DECREF(py_pool);
pyres->pool = gu_new_pool();
pyres->max_count = max_count; pyres->max_count = max_count;
pyres->counter = 0; pyres->counter = 0;
pyres->fetch = Iter_fetch_expr; pyres->fetch = Iter_fetch_expr;
GuPool *tmp_pool = gu_local_pool();
GuString catname = GuString catname =
(catname_s == NULL) ? pgf_start_cat(self->grammar->pgf, tmp_pool) (catname_s == NULL) ? pgf_start_cat(self->grammar->pgf, pyres->pool)
: gu_str_string(catname_s, tmp_pool); : gu_str_string(catname_s, pyres->pool);
PgfLexer *lexer = NULL; PgfLexer *lexer = NULL;
if (buf != NULL) { if (buf != NULL) {
GuIn* in = gu_data_in(buf, len, tmp_pool); GuIn* in = gu_data_in(buf, len, pyres->pool);
GuReader* rdr = gu_new_utf8_reader(in, tmp_pool); GuReader* rdr = gu_new_utf8_reader(in, pyres->pool);
lexer = pgf_new_simple_lexer(rdr, tmp_pool); lexer = pgf_new_simple_lexer(rdr, pyres->pool);
} }
if (py_lexer != NULL) { if (py_lexer != NULL) {
lexer = pypgf_new_python_lexer(py_lexer, tmp_pool); lexer = pypgf_new_python_lexer(py_lexer, pyres->pool);
} }
pyres->res = pyres->res =
pgf_parse(self->concr, catname, lexer, pyres->pool); pgf_parse(self->concr, catname, lexer, pyres->pool, out_pool);
if (pyres->res == NULL) { if (pyres->res == NULL) {
Py_DECREF(pyres); Py_DECREF(pyres);
@@ -740,7 +759,6 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
} }
Py_XDECREF(py_lexer); Py_XDECREF(py_lexer);
gu_pool_free(tmp_pool);
return pyres; return pyres;
} }
@@ -784,6 +802,8 @@ Concr_getCompletions(ConcrObject* self, PyObject *args, PyObject *keywds)
pyres->grammar = self->grammar; pyres->grammar = self->grammar;
Py_XINCREF(pyres->grammar); Py_XINCREF(pyres->grammar);
pyres->container = NULL;
pyres->pool = gu_new_pool(); pyres->pool = gu_new_pool();
pyres->max_count = max_count; pyres->max_count = max_count;