mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 19:42:50 -06:00
the statistical parser is now using two memory pools: one for parsing and one for the output trees. This means that the memory for parsing can be released as soon as the needed abstract trees are retrieved, while the trees themselves are retained in the separate output pool
This commit is contained in:
@@ -176,13 +176,14 @@ static bool
|
|||||||
pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
|
pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
|
||||||
PgfExprProb** out_ep, GuPool *pool)
|
PgfExprProb** out_ep, GuPool *pool)
|
||||||
{
|
{
|
||||||
|
GuPool* tmp_pool = gu_new_pool();
|
||||||
|
|
||||||
size_t lin_idx;
|
size_t lin_idx;
|
||||||
PgfSequence seq;
|
PgfSequence seq;
|
||||||
pgf_item_sequence(item, &lin_idx, &seq, pool);
|
pgf_item_sequence(item, &lin_idx, &seq, tmp_pool);
|
||||||
|
|
||||||
gu_assert(lin_idx == 0);
|
gu_assert(lin_idx == 0);
|
||||||
|
|
||||||
GuPool* tmp_pool = gu_new_pool();
|
|
||||||
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
|
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
|
||||||
|
|
||||||
GuString hyp = gu_str_string("-", tmp_pool);
|
GuString hyp = gu_str_string("-", tmp_pool);
|
||||||
|
|||||||
@@ -44,7 +44,8 @@ typedef GuBuf PgfCCatBuf;
|
|||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PgfConcr* concr;
|
PgfConcr* concr;
|
||||||
GuPool* pool;
|
GuPool* pool; // this pool is used for structures internal to the parser
|
||||||
|
GuPool* out_pool; // this pool is used for the allocating the final abstract trees
|
||||||
GuBuf* expr_queue;
|
GuBuf* expr_queue;
|
||||||
PgfExpr meta_var;
|
PgfExpr meta_var;
|
||||||
PgfProduction meta_prod;
|
PgfProduction meta_prod;
|
||||||
@@ -119,7 +120,6 @@ typedef struct {
|
|||||||
typedef struct PgfParseResult PgfParseResult;
|
typedef struct PgfParseResult PgfParseResult;
|
||||||
|
|
||||||
struct PgfParseResult {
|
struct PgfParseResult {
|
||||||
GuPool* tmp_pool;
|
|
||||||
PgfParseState* state;
|
PgfParseState* state;
|
||||||
PgfExprEnum en;
|
PgfExprEnum en;
|
||||||
};
|
};
|
||||||
@@ -1496,7 +1496,7 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
|
|||||||
bool accepted =
|
bool accepted =
|
||||||
pext->callback->match(before->ps->concr, item,
|
pext->callback->match(before->ps->concr, item,
|
||||||
tok,
|
tok,
|
||||||
&ep, before->ps->pool);
|
&ep, before->ps->out_pool);
|
||||||
|
|
||||||
if (ep != NULL)
|
if (ep != NULL)
|
||||||
pgf_parsing_complete(before, after, item, ep);
|
pgf_parsing_complete(before, after, item, ep);
|
||||||
@@ -1643,6 +1643,7 @@ pgf_new_parsing(PgfConcr* concr, GuPool* pool)
|
|||||||
PgfParsing* ps = gu_new(PgfParsing, pool);
|
PgfParsing* ps = gu_new(PgfParsing, pool);
|
||||||
ps->concr = concr;
|
ps->concr = concr;
|
||||||
ps->pool = pool;
|
ps->pool = pool;
|
||||||
|
ps->out_pool = NULL;
|
||||||
ps->expr_queue = gu_new_buf(PgfExprState*, pool);
|
ps->expr_queue = gu_new_buf(PgfExprState*, pool);
|
||||||
ps->max_fid = concr->total_cats;
|
ps->max_fid = concr->total_cats;
|
||||||
#ifdef PGF_COUNTS_DEBUG
|
#ifdef PGF_COUNTS_DEBUG
|
||||||
@@ -2011,9 +2012,10 @@ pgf_result_predict(PgfParsing* ps,
|
|||||||
PgfExprState* st = gu_new(PgfExprState, ps->pool);
|
PgfExprState* st = gu_new(PgfExprState, ps->pool);
|
||||||
st->answers = cont->answers;
|
st->answers = cont->answers;
|
||||||
st->ep.expr =
|
st->ep.expr =
|
||||||
gu_new_variant_i(ps->pool, PGF_EXPR_APP,
|
gu_new_variant_i(ps->out_pool,
|
||||||
PgfExprApp,
|
PGF_EXPR_APP, PgfExprApp,
|
||||||
.fun = cont->ep.expr, .arg = ep->expr);
|
.fun = cont->ep.expr,
|
||||||
|
.arg = ep->expr);
|
||||||
st->ep.prob = cont->ep.prob+ep->prob;
|
st->ep.prob = cont->ep.prob+ep->prob;
|
||||||
st->args = cont->args;
|
st->args = cont->args;
|
||||||
st->arg_idx = cont->arg_idx+1;
|
st->arg_idx = cont->arg_idx+1;
|
||||||
@@ -2024,7 +2026,7 @@ pgf_result_predict(PgfParsing* ps,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static PgfExprProb*
|
static PgfExprProb*
|
||||||
pgf_parse_result_next(PgfParseResult* pr, GuPool* pool)
|
pgf_parse_result_next(PgfParseResult* pr)
|
||||||
{
|
{
|
||||||
for (;;) {
|
for (;;) {
|
||||||
while (pgf_parsing_proceed(pr->state));
|
while (pgf_parsing_proceed(pr->state));
|
||||||
@@ -2052,8 +2054,8 @@ pgf_parse_result_next(PgfParseResult* pr, GuPool* pool)
|
|||||||
|
|
||||||
if (ccat->fid < pr->state->ps->concr->total_cats) {
|
if (ccat->fid < pr->state->ps->concr->total_cats) {
|
||||||
st->ep.expr =
|
st->ep.expr =
|
||||||
gu_new_variant_i(pool, PGF_EXPR_APP,
|
gu_new_variant_i(pr->state->ps->out_pool,
|
||||||
PgfExprApp,
|
PGF_EXPR_APP, PgfExprApp,
|
||||||
.fun = st->ep.expr,
|
.fun = st->ep.expr,
|
||||||
.arg = pr->state->ps->meta_var);
|
.arg = pr->state->ps->meta_var);
|
||||||
st->arg_idx++;
|
st->arg_idx++;
|
||||||
@@ -2075,9 +2077,10 @@ pgf_parse_result_next(PgfParseResult* pr, GuPool* pool)
|
|||||||
PgfExprState* st3 = gu_new(PgfExprState, pr->state->ps->pool);
|
PgfExprState* st3 = gu_new(PgfExprState, pr->state->ps->pool);
|
||||||
st3->answers = st2->answers;
|
st3->answers = st2->answers;
|
||||||
st3->ep.expr =
|
st3->ep.expr =
|
||||||
gu_new_variant_i(pr->state->ps->pool, PGF_EXPR_APP,
|
gu_new_variant_i(pr->state->ps->out_pool,
|
||||||
PgfExprApp,
|
PGF_EXPR_APP, PgfExprApp,
|
||||||
.fun = st2->ep.expr, .arg = st->ep.expr);
|
.fun = st2->ep.expr,
|
||||||
|
.arg = st->ep.expr);
|
||||||
st3->ep.prob = st2->ep.prob + st->ep.prob;
|
st3->ep.prob = st2->ep.prob + st->ep.prob;
|
||||||
st3->args = st2->args;
|
st3->args = st2->args;
|
||||||
st3->arg_idx = st2->arg_idx+1;
|
st3->arg_idx = st2->arg_idx+1;
|
||||||
@@ -2094,7 +2097,7 @@ static void
|
|||||||
pgf_parse_result_enum_next(GuEnum* self, void* to, GuPool* pool)
|
pgf_parse_result_enum_next(GuEnum* self, void* to, GuPool* pool)
|
||||||
{
|
{
|
||||||
PgfParseResult* pr = gu_container(self, PgfParseResult, en);
|
PgfParseResult* pr = gu_container(self, PgfParseResult, en);
|
||||||
*(PgfExprProb**)to = pgf_parse_result_next(pr, pool);
|
*(PgfExprProb**)to = pgf_parse_result_next(pr);
|
||||||
}
|
}
|
||||||
|
|
||||||
PgfExprEnum*
|
PgfExprEnum*
|
||||||
@@ -2104,11 +2107,10 @@ pgf_parse_result(PgfParseState* state, GuPool* pool)
|
|||||||
pgf_parsing_print_counts(state->ps);
|
pgf_parsing_print_counts(state->ps);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
GuPool* tmp_pool = gu_new_pool();
|
state->ps->out_pool = pool;
|
||||||
|
|
||||||
PgfExprEnum* en =
|
PgfExprEnum* en =
|
||||||
&gu_new_i(pool, PgfParseResult,
|
&gu_new_i(pool, PgfParseResult,
|
||||||
.tmp_pool = tmp_pool,
|
|
||||||
.state = state,
|
.state = state,
|
||||||
.en.next = pgf_parse_result_enum_next)->en;
|
.en.next = pgf_parse_result_enum_next)->en;
|
||||||
|
|
||||||
|
|||||||
@@ -207,7 +207,8 @@ pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err)
|
|||||||
}
|
}
|
||||||
|
|
||||||
GuEnum*
|
GuEnum*
|
||||||
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
|
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
|
||||||
|
GuPool* pool, GuPool* out_pool)
|
||||||
{
|
{
|
||||||
// Begin parsing a sentence of the specified category
|
// Begin parsing a sentence of the specified category
|
||||||
PgfParseState* state =
|
PgfParseState* state =
|
||||||
@@ -233,7 +234,7 @@ pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
|
|||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
// Now begin enumerating the resulting syntax trees
|
// Now begin enumerating the resulting syntax trees
|
||||||
return pgf_parse_result(state, pool);
|
return pgf_parse_result(state, out_pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
GuEnum*
|
GuEnum*
|
||||||
|
|||||||
@@ -114,7 +114,8 @@ void
|
|||||||
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
|
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
|
||||||
|
|
||||||
PgfExprEnum*
|
PgfExprEnum*
|
||||||
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
|
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
|
||||||
|
GuPool* pool, GuPool* out_pool);
|
||||||
|
|
||||||
GuEnum*
|
GuEnum*
|
||||||
pgf_get_completions(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
|
pgf_get_completions(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
|
||||||
|
|||||||
@@ -124,7 +124,7 @@ int main(int argc, char* argv[]) {
|
|||||||
|
|
||||||
GuReader *rdr = gu_string_reader(gu_str_string(line, ppool), ppool);
|
GuReader *rdr = gu_string_reader(gu_str_string(line, ppool), ppool);
|
||||||
PgfLexer *lexer = pgf_new_simple_lexer(rdr, ppool);
|
PgfLexer *lexer = pgf_new_simple_lexer(rdr, ppool);
|
||||||
GuEnum* result = pgf_parse(concr, cat, lexer, ppool);
|
GuEnum* result = pgf_parse(concr, cat, lexer, ppool, ppool);
|
||||||
|
|
||||||
PgfExprProb* ep = NULL;
|
PgfExprProb* ep = NULL;
|
||||||
if (result != NULL)
|
if (result != NULL)
|
||||||
|
|||||||
@@ -335,7 +335,7 @@ int main ()
|
|||||||
pgf_new_simple_lexer(rdr, ppool);
|
pgf_new_simple_lexer(rdr, ppool);
|
||||||
|
|
||||||
GuEnum* result =
|
GuEnum* result =
|
||||||
pgf_parse(from_concr, cat, lexer, ppool);
|
pgf_parse(from_concr, cat, lexer, ppool, ppool);
|
||||||
if (result == NULL) {
|
if (result == NULL) {
|
||||||
FCGI_printf("Status: 500 Internal Server Error\r\n");
|
FCGI_printf("Status: 500 Internal Server Error\r\n");
|
||||||
FCGI_printf("Content-type: text/plain\r\n"
|
FCGI_printf("Content-type: text/plain\r\n"
|
||||||
|
|||||||
@@ -169,7 +169,7 @@ int main(int argc, char* argv[]) {
|
|||||||
clock_t start = clock();
|
clock_t start = clock();
|
||||||
|
|
||||||
result =
|
result =
|
||||||
pgf_parse(from_concr, cat, lexer, ppool);
|
pgf_parse(from_concr, cat, lexer, ppool, ppool);
|
||||||
if (result == NULL) {
|
if (result == NULL) {
|
||||||
PgfToken tok =
|
PgfToken tok =
|
||||||
pgf_lexer_current_token(lexer);
|
pgf_lexer_current_token(lexer);
|
||||||
|
|||||||
@@ -435,6 +435,7 @@ Expr_getattro(ExprObject *self, PyObject *attr_name) {
|
|||||||
typedef struct IterObject {
|
typedef struct IterObject {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
PGFObject* grammar;
|
PGFObject* grammar;
|
||||||
|
PyObject* container;
|
||||||
GuPool* pool;
|
GuPool* pool;
|
||||||
int max_count;
|
int max_count;
|
||||||
int counter;
|
int counter;
|
||||||
@@ -454,8 +455,8 @@ Iter_fetch_expr(IterObject* self)
|
|||||||
return NULL;
|
return NULL;
|
||||||
pyexpr->pool = NULL;
|
pyexpr->pool = NULL;
|
||||||
pyexpr->expr = ep->expr;
|
pyexpr->expr = ep->expr;
|
||||||
pyexpr->master = (PyObject*) self;
|
pyexpr->master = self->container;
|
||||||
Py_INCREF(self);
|
Py_INCREF(self->container);
|
||||||
|
|
||||||
PyObject* res = Py_BuildValue("(f,O)", ep->prob, pyexpr);
|
PyObject* res = Py_BuildValue("(f,O)", ep->prob, pyexpr);
|
||||||
Py_DECREF(pyexpr);
|
Py_DECREF(pyexpr);
|
||||||
@@ -483,6 +484,7 @@ Iter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||||||
IterObject* self = (IterObject *)type->tp_alloc(type, 0);
|
IterObject* self = (IterObject *)type->tp_alloc(type, 0);
|
||||||
if (self != NULL) {
|
if (self != NULL) {
|
||||||
self->grammar = NULL;
|
self->grammar = NULL;
|
||||||
|
self->container = NULL;
|
||||||
self->pool = NULL;
|
self->pool = NULL;
|
||||||
self->max_count = -1;
|
self->max_count = -1;
|
||||||
self->counter = 0;
|
self->counter = 0;
|
||||||
@@ -499,6 +501,8 @@ Iter_dealloc(IterObject* self)
|
|||||||
gu_pool_free(self->pool);
|
gu_pool_free(self->pool);
|
||||||
|
|
||||||
Py_XDECREF(self->grammar);
|
Py_XDECREF(self->grammar);
|
||||||
|
|
||||||
|
Py_XDECREF(self->container);
|
||||||
|
|
||||||
self->ob_type->tp_free((PyObject*)self);
|
self->ob_type->tp_free((PyObject*)self);
|
||||||
}
|
}
|
||||||
@@ -661,6 +665,14 @@ pypgf_new_python_lexer(PyObject* pylexer, GuPool* pool)
|
|||||||
return ((PgfLexer*) lexer);
|
return ((PgfLexer*) lexer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define PGF_CONTAINER_NAME "pgf.Container"
|
||||||
|
|
||||||
|
void pypgf_container_descructor(PyObject *capsule)
|
||||||
|
{
|
||||||
|
GuPool* pool = PyCapsule_GetPointer(capsule, PGF_CONTAINER_NAME);
|
||||||
|
gu_pool_free(pool);
|
||||||
|
}
|
||||||
|
|
||||||
static IterObject*
|
static IterObject*
|
||||||
Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||||
{
|
{
|
||||||
@@ -698,28 +710,35 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
|||||||
pyres->grammar = self->grammar;
|
pyres->grammar = self->grammar;
|
||||||
Py_XINCREF(pyres->grammar);
|
Py_XINCREF(pyres->grammar);
|
||||||
|
|
||||||
pyres->pool = gu_new_pool();
|
GuPool* out_pool = gu_new_pool();
|
||||||
|
|
||||||
|
PyObject* py_pool =
|
||||||
|
PyCapsule_New(out_pool, PGF_CONTAINER_NAME,
|
||||||
|
pypgf_container_descructor);
|
||||||
|
pyres->container = PyTuple_Pack(2, pyres->grammar, py_pool);
|
||||||
|
Py_DECREF(py_pool);
|
||||||
|
|
||||||
|
pyres->pool = gu_new_pool();
|
||||||
pyres->max_count = max_count;
|
pyres->max_count = max_count;
|
||||||
pyres->counter = 0;
|
pyres->counter = 0;
|
||||||
pyres->fetch = Iter_fetch_expr;
|
pyres->fetch = Iter_fetch_expr;
|
||||||
|
|
||||||
GuPool *tmp_pool = gu_local_pool();
|
|
||||||
GuString catname =
|
GuString catname =
|
||||||
(catname_s == NULL) ? pgf_start_cat(self->grammar->pgf, tmp_pool)
|
(catname_s == NULL) ? pgf_start_cat(self->grammar->pgf, pyres->pool)
|
||||||
: gu_str_string(catname_s, tmp_pool);
|
: gu_str_string(catname_s, pyres->pool);
|
||||||
|
|
||||||
PgfLexer *lexer = NULL;
|
PgfLexer *lexer = NULL;
|
||||||
if (buf != NULL) {
|
if (buf != NULL) {
|
||||||
GuIn* in = gu_data_in(buf, len, tmp_pool);
|
GuIn* in = gu_data_in(buf, len, pyres->pool);
|
||||||
GuReader* rdr = gu_new_utf8_reader(in, tmp_pool);
|
GuReader* rdr = gu_new_utf8_reader(in, pyres->pool);
|
||||||
lexer = pgf_new_simple_lexer(rdr, tmp_pool);
|
lexer = pgf_new_simple_lexer(rdr, pyres->pool);
|
||||||
}
|
}
|
||||||
if (py_lexer != NULL) {
|
if (py_lexer != NULL) {
|
||||||
lexer = pypgf_new_python_lexer(py_lexer, tmp_pool);
|
lexer = pypgf_new_python_lexer(py_lexer, pyres->pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
pyres->res =
|
pyres->res =
|
||||||
pgf_parse(self->concr, catname, lexer, pyres->pool);
|
pgf_parse(self->concr, catname, lexer, pyres->pool, out_pool);
|
||||||
|
|
||||||
if (pyres->res == NULL) {
|
if (pyres->res == NULL) {
|
||||||
Py_DECREF(pyres);
|
Py_DECREF(pyres);
|
||||||
@@ -740,7 +759,6 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
|||||||
}
|
}
|
||||||
|
|
||||||
Py_XDECREF(py_lexer);
|
Py_XDECREF(py_lexer);
|
||||||
gu_pool_free(tmp_pool);
|
|
||||||
|
|
||||||
return pyres;
|
return pyres;
|
||||||
}
|
}
|
||||||
@@ -784,6 +802,8 @@ Concr_getCompletions(ConcrObject* self, PyObject *args, PyObject *keywds)
|
|||||||
|
|
||||||
pyres->grammar = self->grammar;
|
pyres->grammar = self->grammar;
|
||||||
Py_XINCREF(pyres->grammar);
|
Py_XINCREF(pyres->grammar);
|
||||||
|
|
||||||
|
pyres->container = NULL;
|
||||||
|
|
||||||
pyres->pool = gu_new_pool();
|
pyres->pool = gu_new_pool();
|
||||||
pyres->max_count = max_count;
|
pyres->max_count = max_count;
|
||||||
|
|||||||
Reference in New Issue
Block a user