1
0
forked from GitHub/gf-core

the statistical parser is now using two memory pools: one for parsing and one for the output trees. This means that the memory for parsing can be released as soon as the needed abstract trees are retrieved, while the trees themselves are retained in the separate output pool

This commit is contained in:
kr.angelov
2013-05-06 15:28:04 +00:00
parent 520c2fb59d
commit 7ba27229b3
8 changed files with 60 additions and 35 deletions

View File

@@ -176,13 +176,14 @@ static bool
pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
PgfExprProb** out_ep, GuPool *pool)
{
GuPool* tmp_pool = gu_new_pool();
size_t lin_idx;
PgfSequence seq;
pgf_item_sequence(item, &lin_idx, &seq, pool);
pgf_item_sequence(item, &lin_idx, &seq, tmp_pool);
gu_assert(lin_idx == 0);
GuPool* tmp_pool = gu_new_pool();
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
GuString hyp = gu_str_string("-", tmp_pool);

View File

@@ -44,7 +44,8 @@ typedef GuBuf PgfCCatBuf;
typedef struct {
PgfConcr* concr;
GuPool* pool;
GuPool* pool; // this pool is used for structures internal to the parser
GuPool* out_pool; // this pool is used for the allocating the final abstract trees
GuBuf* expr_queue;
PgfExpr meta_var;
PgfProduction meta_prod;
@@ -119,7 +120,6 @@ typedef struct {
typedef struct PgfParseResult PgfParseResult;
struct PgfParseResult {
GuPool* tmp_pool;
PgfParseState* state;
PgfExprEnum en;
};
@@ -1496,7 +1496,7 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
bool accepted =
pext->callback->match(before->ps->concr, item,
tok,
&ep, before->ps->pool);
&ep, before->ps->out_pool);
if (ep != NULL)
pgf_parsing_complete(before, after, item, ep);
@@ -1643,6 +1643,7 @@ pgf_new_parsing(PgfConcr* concr, GuPool* pool)
PgfParsing* ps = gu_new(PgfParsing, pool);
ps->concr = concr;
ps->pool = pool;
ps->out_pool = NULL;
ps->expr_queue = gu_new_buf(PgfExprState*, pool);
ps->max_fid = concr->total_cats;
#ifdef PGF_COUNTS_DEBUG
@@ -2011,9 +2012,10 @@ pgf_result_predict(PgfParsing* ps,
PgfExprState* st = gu_new(PgfExprState, ps->pool);
st->answers = cont->answers;
st->ep.expr =
gu_new_variant_i(ps->pool, PGF_EXPR_APP,
PgfExprApp,
.fun = cont->ep.expr, .arg = ep->expr);
gu_new_variant_i(ps->out_pool,
PGF_EXPR_APP, PgfExprApp,
.fun = cont->ep.expr,
.arg = ep->expr);
st->ep.prob = cont->ep.prob+ep->prob;
st->args = cont->args;
st->arg_idx = cont->arg_idx+1;
@@ -2024,7 +2026,7 @@ pgf_result_predict(PgfParsing* ps,
}
static PgfExprProb*
pgf_parse_result_next(PgfParseResult* pr, GuPool* pool)
pgf_parse_result_next(PgfParseResult* pr)
{
for (;;) {
while (pgf_parsing_proceed(pr->state));
@@ -2052,8 +2054,8 @@ pgf_parse_result_next(PgfParseResult* pr, GuPool* pool)
if (ccat->fid < pr->state->ps->concr->total_cats) {
st->ep.expr =
gu_new_variant_i(pool, PGF_EXPR_APP,
PgfExprApp,
gu_new_variant_i(pr->state->ps->out_pool,
PGF_EXPR_APP, PgfExprApp,
.fun = st->ep.expr,
.arg = pr->state->ps->meta_var);
st->arg_idx++;
@@ -2075,9 +2077,10 @@ pgf_parse_result_next(PgfParseResult* pr, GuPool* pool)
PgfExprState* st3 = gu_new(PgfExprState, pr->state->ps->pool);
st3->answers = st2->answers;
st3->ep.expr =
gu_new_variant_i(pr->state->ps->pool, PGF_EXPR_APP,
PgfExprApp,
.fun = st2->ep.expr, .arg = st->ep.expr);
gu_new_variant_i(pr->state->ps->out_pool,
PGF_EXPR_APP, PgfExprApp,
.fun = st2->ep.expr,
.arg = st->ep.expr);
st3->ep.prob = st2->ep.prob + st->ep.prob;
st3->args = st2->args;
st3->arg_idx = st2->arg_idx+1;
@@ -2094,7 +2097,7 @@ static void
pgf_parse_result_enum_next(GuEnum* self, void* to, GuPool* pool)
{
PgfParseResult* pr = gu_container(self, PgfParseResult, en);
*(PgfExprProb**)to = pgf_parse_result_next(pr, pool);
*(PgfExprProb**)to = pgf_parse_result_next(pr);
}
PgfExprEnum*
@@ -2104,11 +2107,10 @@ pgf_parse_result(PgfParseState* state, GuPool* pool)
pgf_parsing_print_counts(state->ps);
#endif
GuPool* tmp_pool = gu_new_pool();
state->ps->out_pool = pool;
PgfExprEnum* en =
&gu_new_i(pool, PgfParseResult,
.tmp_pool = tmp_pool,
.state = state,
.en.next = pgf_parse_result_enum_next)->en;

View File

@@ -207,7 +207,8 @@ pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err)
}
GuEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
GuPool* pool, GuPool* out_pool)
{
// Begin parsing a sentence of the specified category
PgfParseState* state =
@@ -233,7 +234,7 @@ pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
return NULL;
// Now begin enumerating the resulting syntax trees
return pgf_parse_result(state, pool);
return pgf_parse_result(state, out_pool);
}
GuEnum*

View File

@@ -114,7 +114,8 @@ void
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
PgfExprEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
GuPool* pool, GuPool* out_pool);
GuEnum*
pgf_get_completions(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,

View File

@@ -124,7 +124,7 @@ int main(int argc, char* argv[]) {
GuReader *rdr = gu_string_reader(gu_str_string(line, ppool), ppool);
PgfLexer *lexer = pgf_new_simple_lexer(rdr, ppool);
GuEnum* result = pgf_parse(concr, cat, lexer, ppool);
GuEnum* result = pgf_parse(concr, cat, lexer, ppool, ppool);
PgfExprProb* ep = NULL;
if (result != NULL)

View File

@@ -335,7 +335,7 @@ int main ()
pgf_new_simple_lexer(rdr, ppool);
GuEnum* result =
pgf_parse(from_concr, cat, lexer, ppool);
pgf_parse(from_concr, cat, lexer, ppool, ppool);
if (result == NULL) {
FCGI_printf("Status: 500 Internal Server Error\r\n");
FCGI_printf("Content-type: text/plain\r\n"

View File

@@ -169,7 +169,7 @@ int main(int argc, char* argv[]) {
clock_t start = clock();
result =
pgf_parse(from_concr, cat, lexer, ppool);
pgf_parse(from_concr, cat, lexer, ppool, ppool);
if (result == NULL) {
PgfToken tok =
pgf_lexer_current_token(lexer);

View File

@@ -435,6 +435,7 @@ Expr_getattro(ExprObject *self, PyObject *attr_name) {
typedef struct IterObject {
PyObject_HEAD
PGFObject* grammar;
PyObject* container;
GuPool* pool;
int max_count;
int counter;
@@ -454,8 +455,8 @@ Iter_fetch_expr(IterObject* self)
return NULL;
pyexpr->pool = NULL;
pyexpr->expr = ep->expr;
pyexpr->master = (PyObject*) self;
Py_INCREF(self);
pyexpr->master = self->container;
Py_INCREF(self->container);
PyObject* res = Py_BuildValue("(f,O)", ep->prob, pyexpr);
Py_DECREF(pyexpr);
@@ -483,6 +484,7 @@ Iter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
IterObject* self = (IterObject *)type->tp_alloc(type, 0);
if (self != NULL) {
self->grammar = NULL;
self->container = NULL;
self->pool = NULL;
self->max_count = -1;
self->counter = 0;
@@ -499,6 +501,8 @@ Iter_dealloc(IterObject* self)
gu_pool_free(self->pool);
Py_XDECREF(self->grammar);
Py_XDECREF(self->container);
self->ob_type->tp_free((PyObject*)self);
}
@@ -661,6 +665,14 @@ pypgf_new_python_lexer(PyObject* pylexer, GuPool* pool)
return ((PgfLexer*) lexer);
}
#define PGF_CONTAINER_NAME "pgf.Container"
void pypgf_container_descructor(PyObject *capsule)
{
GuPool* pool = PyCapsule_GetPointer(capsule, PGF_CONTAINER_NAME);
gu_pool_free(pool);
}
static IterObject*
Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
{
@@ -698,28 +710,35 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
pyres->grammar = self->grammar;
Py_XINCREF(pyres->grammar);
pyres->pool = gu_new_pool();
GuPool* out_pool = gu_new_pool();
PyObject* py_pool =
PyCapsule_New(out_pool, PGF_CONTAINER_NAME,
pypgf_container_descructor);
pyres->container = PyTuple_Pack(2, pyres->grammar, py_pool);
Py_DECREF(py_pool);
pyres->pool = gu_new_pool();
pyres->max_count = max_count;
pyres->counter = 0;
pyres->fetch = Iter_fetch_expr;
GuPool *tmp_pool = gu_local_pool();
GuString catname =
(catname_s == NULL) ? pgf_start_cat(self->grammar->pgf, tmp_pool)
: gu_str_string(catname_s, tmp_pool);
(catname_s == NULL) ? pgf_start_cat(self->grammar->pgf, pyres->pool)
: gu_str_string(catname_s, pyres->pool);
PgfLexer *lexer = NULL;
if (buf != NULL) {
GuIn* in = gu_data_in(buf, len, tmp_pool);
GuReader* rdr = gu_new_utf8_reader(in, tmp_pool);
lexer = pgf_new_simple_lexer(rdr, tmp_pool);
GuIn* in = gu_data_in(buf, len, pyres->pool);
GuReader* rdr = gu_new_utf8_reader(in, pyres->pool);
lexer = pgf_new_simple_lexer(rdr, pyres->pool);
}
if (py_lexer != NULL) {
lexer = pypgf_new_python_lexer(py_lexer, tmp_pool);
lexer = pypgf_new_python_lexer(py_lexer, pyres->pool);
}
pyres->res =
pgf_parse(self->concr, catname, lexer, pyres->pool);
pgf_parse(self->concr, catname, lexer, pyres->pool, out_pool);
if (pyres->res == NULL) {
Py_DECREF(pyres);
@@ -740,7 +759,6 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
}
Py_XDECREF(py_lexer);
gu_pool_free(tmp_pool);
return pyres;
}
@@ -784,6 +802,8 @@ Concr_getCompletions(ConcrObject* self, PyObject *args, PyObject *keywds)
pyres->grammar = self->grammar;
Py_XINCREF(pyres->grammar);
pyres->container = NULL;
pyres->pool = gu_new_pool();
pyres->max_count = max_count;