forked from GitHub/gf-core
the statistical parser is now using two memory pools: one for parsing and one for the output trees. This means that the memory for parsing can be released as soon as the needed abstract trees are retrieved, while the trees themselves are retained in the separate output pool
This commit is contained in:
@@ -176,13 +176,14 @@ static bool
|
||||
pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
|
||||
PgfExprProb** out_ep, GuPool *pool)
|
||||
{
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
|
||||
size_t lin_idx;
|
||||
PgfSequence seq;
|
||||
pgf_item_sequence(item, &lin_idx, &seq, pool);
|
||||
pgf_item_sequence(item, &lin_idx, &seq, tmp_pool);
|
||||
|
||||
gu_assert(lin_idx == 0);
|
||||
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
|
||||
|
||||
GuString hyp = gu_str_string("-", tmp_pool);
|
||||
|
||||
@@ -44,7 +44,8 @@ typedef GuBuf PgfCCatBuf;
|
||||
|
||||
typedef struct {
|
||||
PgfConcr* concr;
|
||||
GuPool* pool;
|
||||
GuPool* pool; // this pool is used for structures internal to the parser
|
||||
GuPool* out_pool; // this pool is used for the allocating the final abstract trees
|
||||
GuBuf* expr_queue;
|
||||
PgfExpr meta_var;
|
||||
PgfProduction meta_prod;
|
||||
@@ -119,7 +120,6 @@ typedef struct {
|
||||
typedef struct PgfParseResult PgfParseResult;
|
||||
|
||||
struct PgfParseResult {
|
||||
GuPool* tmp_pool;
|
||||
PgfParseState* state;
|
||||
PgfExprEnum en;
|
||||
};
|
||||
@@ -1496,7 +1496,7 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
|
||||
bool accepted =
|
||||
pext->callback->match(before->ps->concr, item,
|
||||
tok,
|
||||
&ep, before->ps->pool);
|
||||
&ep, before->ps->out_pool);
|
||||
|
||||
if (ep != NULL)
|
||||
pgf_parsing_complete(before, after, item, ep);
|
||||
@@ -1643,6 +1643,7 @@ pgf_new_parsing(PgfConcr* concr, GuPool* pool)
|
||||
PgfParsing* ps = gu_new(PgfParsing, pool);
|
||||
ps->concr = concr;
|
||||
ps->pool = pool;
|
||||
ps->out_pool = NULL;
|
||||
ps->expr_queue = gu_new_buf(PgfExprState*, pool);
|
||||
ps->max_fid = concr->total_cats;
|
||||
#ifdef PGF_COUNTS_DEBUG
|
||||
@@ -2011,9 +2012,10 @@ pgf_result_predict(PgfParsing* ps,
|
||||
PgfExprState* st = gu_new(PgfExprState, ps->pool);
|
||||
st->answers = cont->answers;
|
||||
st->ep.expr =
|
||||
gu_new_variant_i(ps->pool, PGF_EXPR_APP,
|
||||
PgfExprApp,
|
||||
.fun = cont->ep.expr, .arg = ep->expr);
|
||||
gu_new_variant_i(ps->out_pool,
|
||||
PGF_EXPR_APP, PgfExprApp,
|
||||
.fun = cont->ep.expr,
|
||||
.arg = ep->expr);
|
||||
st->ep.prob = cont->ep.prob+ep->prob;
|
||||
st->args = cont->args;
|
||||
st->arg_idx = cont->arg_idx+1;
|
||||
@@ -2024,7 +2026,7 @@ pgf_result_predict(PgfParsing* ps,
|
||||
}
|
||||
|
||||
static PgfExprProb*
|
||||
pgf_parse_result_next(PgfParseResult* pr, GuPool* pool)
|
||||
pgf_parse_result_next(PgfParseResult* pr)
|
||||
{
|
||||
for (;;) {
|
||||
while (pgf_parsing_proceed(pr->state));
|
||||
@@ -2052,8 +2054,8 @@ pgf_parse_result_next(PgfParseResult* pr, GuPool* pool)
|
||||
|
||||
if (ccat->fid < pr->state->ps->concr->total_cats) {
|
||||
st->ep.expr =
|
||||
gu_new_variant_i(pool, PGF_EXPR_APP,
|
||||
PgfExprApp,
|
||||
gu_new_variant_i(pr->state->ps->out_pool,
|
||||
PGF_EXPR_APP, PgfExprApp,
|
||||
.fun = st->ep.expr,
|
||||
.arg = pr->state->ps->meta_var);
|
||||
st->arg_idx++;
|
||||
@@ -2075,9 +2077,10 @@ pgf_parse_result_next(PgfParseResult* pr, GuPool* pool)
|
||||
PgfExprState* st3 = gu_new(PgfExprState, pr->state->ps->pool);
|
||||
st3->answers = st2->answers;
|
||||
st3->ep.expr =
|
||||
gu_new_variant_i(pr->state->ps->pool, PGF_EXPR_APP,
|
||||
PgfExprApp,
|
||||
.fun = st2->ep.expr, .arg = st->ep.expr);
|
||||
gu_new_variant_i(pr->state->ps->out_pool,
|
||||
PGF_EXPR_APP, PgfExprApp,
|
||||
.fun = st2->ep.expr,
|
||||
.arg = st->ep.expr);
|
||||
st3->ep.prob = st2->ep.prob + st->ep.prob;
|
||||
st3->args = st2->args;
|
||||
st3->arg_idx = st2->arg_idx+1;
|
||||
@@ -2094,7 +2097,7 @@ static void
|
||||
pgf_parse_result_enum_next(GuEnum* self, void* to, GuPool* pool)
|
||||
{
|
||||
PgfParseResult* pr = gu_container(self, PgfParseResult, en);
|
||||
*(PgfExprProb**)to = pgf_parse_result_next(pr, pool);
|
||||
*(PgfExprProb**)to = pgf_parse_result_next(pr);
|
||||
}
|
||||
|
||||
PgfExprEnum*
|
||||
@@ -2104,11 +2107,10 @@ pgf_parse_result(PgfParseState* state, GuPool* pool)
|
||||
pgf_parsing_print_counts(state->ps);
|
||||
#endif
|
||||
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
state->ps->out_pool = pool;
|
||||
|
||||
PgfExprEnum* en =
|
||||
&gu_new_i(pool, PgfParseResult,
|
||||
.tmp_pool = tmp_pool,
|
||||
.state = state,
|
||||
.en.next = pgf_parse_result_enum_next)->en;
|
||||
|
||||
|
||||
@@ -207,7 +207,8 @@ pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err)
|
||||
}
|
||||
|
||||
GuEnum*
|
||||
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
|
||||
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
|
||||
GuPool* pool, GuPool* out_pool)
|
||||
{
|
||||
// Begin parsing a sentence of the specified category
|
||||
PgfParseState* state =
|
||||
@@ -233,7 +234,7 @@ pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
|
||||
return NULL;
|
||||
|
||||
// Now begin enumerating the resulting syntax trees
|
||||
return pgf_parse_result(state, pool);
|
||||
return pgf_parse_result(state, out_pool);
|
||||
}
|
||||
|
||||
GuEnum*
|
||||
|
||||
@@ -114,7 +114,8 @@ void
|
||||
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
|
||||
|
||||
PgfExprEnum*
|
||||
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
|
||||
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
|
||||
GuPool* pool, GuPool* out_pool);
|
||||
|
||||
GuEnum*
|
||||
pgf_get_completions(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
|
||||
|
||||
@@ -124,7 +124,7 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
GuReader *rdr = gu_string_reader(gu_str_string(line, ppool), ppool);
|
||||
PgfLexer *lexer = pgf_new_simple_lexer(rdr, ppool);
|
||||
GuEnum* result = pgf_parse(concr, cat, lexer, ppool);
|
||||
GuEnum* result = pgf_parse(concr, cat, lexer, ppool, ppool);
|
||||
|
||||
PgfExprProb* ep = NULL;
|
||||
if (result != NULL)
|
||||
|
||||
@@ -335,7 +335,7 @@ int main ()
|
||||
pgf_new_simple_lexer(rdr, ppool);
|
||||
|
||||
GuEnum* result =
|
||||
pgf_parse(from_concr, cat, lexer, ppool);
|
||||
pgf_parse(from_concr, cat, lexer, ppool, ppool);
|
||||
if (result == NULL) {
|
||||
FCGI_printf("Status: 500 Internal Server Error\r\n");
|
||||
FCGI_printf("Content-type: text/plain\r\n"
|
||||
|
||||
@@ -169,7 +169,7 @@ int main(int argc, char* argv[]) {
|
||||
clock_t start = clock();
|
||||
|
||||
result =
|
||||
pgf_parse(from_concr, cat, lexer, ppool);
|
||||
pgf_parse(from_concr, cat, lexer, ppool, ppool);
|
||||
if (result == NULL) {
|
||||
PgfToken tok =
|
||||
pgf_lexer_current_token(lexer);
|
||||
|
||||
@@ -435,6 +435,7 @@ Expr_getattro(ExprObject *self, PyObject *attr_name) {
|
||||
typedef struct IterObject {
|
||||
PyObject_HEAD
|
||||
PGFObject* grammar;
|
||||
PyObject* container;
|
||||
GuPool* pool;
|
||||
int max_count;
|
||||
int counter;
|
||||
@@ -454,8 +455,8 @@ Iter_fetch_expr(IterObject* self)
|
||||
return NULL;
|
||||
pyexpr->pool = NULL;
|
||||
pyexpr->expr = ep->expr;
|
||||
pyexpr->master = (PyObject*) self;
|
||||
Py_INCREF(self);
|
||||
pyexpr->master = self->container;
|
||||
Py_INCREF(self->container);
|
||||
|
||||
PyObject* res = Py_BuildValue("(f,O)", ep->prob, pyexpr);
|
||||
Py_DECREF(pyexpr);
|
||||
@@ -483,6 +484,7 @@ Iter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
IterObject* self = (IterObject *)type->tp_alloc(type, 0);
|
||||
if (self != NULL) {
|
||||
self->grammar = NULL;
|
||||
self->container = NULL;
|
||||
self->pool = NULL;
|
||||
self->max_count = -1;
|
||||
self->counter = 0;
|
||||
@@ -499,6 +501,8 @@ Iter_dealloc(IterObject* self)
|
||||
gu_pool_free(self->pool);
|
||||
|
||||
Py_XDECREF(self->grammar);
|
||||
|
||||
Py_XDECREF(self->container);
|
||||
|
||||
self->ob_type->tp_free((PyObject*)self);
|
||||
}
|
||||
@@ -661,6 +665,14 @@ pypgf_new_python_lexer(PyObject* pylexer, GuPool* pool)
|
||||
return ((PgfLexer*) lexer);
|
||||
}
|
||||
|
||||
#define PGF_CONTAINER_NAME "pgf.Container"
|
||||
|
||||
void pypgf_container_descructor(PyObject *capsule)
|
||||
{
|
||||
GuPool* pool = PyCapsule_GetPointer(capsule, PGF_CONTAINER_NAME);
|
||||
gu_pool_free(pool);
|
||||
}
|
||||
|
||||
static IterObject*
|
||||
Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
{
|
||||
@@ -698,28 +710,35 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
pyres->grammar = self->grammar;
|
||||
Py_XINCREF(pyres->grammar);
|
||||
|
||||
pyres->pool = gu_new_pool();
|
||||
GuPool* out_pool = gu_new_pool();
|
||||
|
||||
PyObject* py_pool =
|
||||
PyCapsule_New(out_pool, PGF_CONTAINER_NAME,
|
||||
pypgf_container_descructor);
|
||||
pyres->container = PyTuple_Pack(2, pyres->grammar, py_pool);
|
||||
Py_DECREF(py_pool);
|
||||
|
||||
pyres->pool = gu_new_pool();
|
||||
pyres->max_count = max_count;
|
||||
pyres->counter = 0;
|
||||
pyres->fetch = Iter_fetch_expr;
|
||||
|
||||
GuPool *tmp_pool = gu_local_pool();
|
||||
GuString catname =
|
||||
(catname_s == NULL) ? pgf_start_cat(self->grammar->pgf, tmp_pool)
|
||||
: gu_str_string(catname_s, tmp_pool);
|
||||
(catname_s == NULL) ? pgf_start_cat(self->grammar->pgf, pyres->pool)
|
||||
: gu_str_string(catname_s, pyres->pool);
|
||||
|
||||
PgfLexer *lexer = NULL;
|
||||
if (buf != NULL) {
|
||||
GuIn* in = gu_data_in(buf, len, tmp_pool);
|
||||
GuReader* rdr = gu_new_utf8_reader(in, tmp_pool);
|
||||
lexer = pgf_new_simple_lexer(rdr, tmp_pool);
|
||||
GuIn* in = gu_data_in(buf, len, pyres->pool);
|
||||
GuReader* rdr = gu_new_utf8_reader(in, pyres->pool);
|
||||
lexer = pgf_new_simple_lexer(rdr, pyres->pool);
|
||||
}
|
||||
if (py_lexer != NULL) {
|
||||
lexer = pypgf_new_python_lexer(py_lexer, tmp_pool);
|
||||
lexer = pypgf_new_python_lexer(py_lexer, pyres->pool);
|
||||
}
|
||||
|
||||
pyres->res =
|
||||
pgf_parse(self->concr, catname, lexer, pyres->pool);
|
||||
pgf_parse(self->concr, catname, lexer, pyres->pool, out_pool);
|
||||
|
||||
if (pyres->res == NULL) {
|
||||
Py_DECREF(pyres);
|
||||
@@ -740,7 +759,6 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
}
|
||||
|
||||
Py_XDECREF(py_lexer);
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
return pyres;
|
||||
}
|
||||
@@ -784,6 +802,8 @@ Concr_getCompletions(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
|
||||
pyres->grammar = self->grammar;
|
||||
Py_XINCREF(pyres->grammar);
|
||||
|
||||
pyres->container = NULL;
|
||||
|
||||
pyres->pool = gu_new_pool();
|
||||
pyres->max_count = max_count;
|
||||
|
||||
Reference in New Issue
Block a user