The first prototype for exhaustive generation in the C runtime. The trees are always listed in decreasing probability order. There is also an API for generation from Python

This commit is contained in:
kr.angelov
2012-12-14 15:32:49 +00:00
parent 79711380a2
commit 8aefd1e072
7 changed files with 255 additions and 38 deletions

View File

@@ -25,7 +25,6 @@ import GF.Infra.Option
import GF.Data.Operations import GF.Data.Operations
import Data.List import Data.List
import Data.Function
import Data.Char (isDigit,isSpace) import Data.Char (isDigit,isSpace)
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Map as Map import qualified Data.Map as Map
@@ -63,8 +62,7 @@ mkCanon2pgf opts gr am = do
((m,c),AbsCat (Just (L _ cont)),addr) <- adefs] ((m,c),AbsCat (Just (L _ cont)),addr) <- adefs]
catfuns cat = catfuns cat =
(map (\x -> (0,snd x)) . sortBy (compare `on` fst)) [(0,i2i f) | ((m,f),AbsFun (Just (L _ ty)) _ _ (Just True),_) <- adefs, snd (GM.valCat ty) == cat]
[(loc,i2i f) | ((m,f),AbsFun (Just (L loc ty)) _ _ (Just True),_) <- adefs, snd (GM.valCat ty) == cat]
mkConcr cm = do mkConcr cm = do
let cflags = concatOptions [mflags mo | (i,mo) <- modules gr, isModCnc mo, let cflags = concatOptions [mflags mo | (i,mo) <- modules gr, isModCnc mo,

View File

@@ -111,6 +111,7 @@ libpgf_la_SOURCES = \
pgf/reader.h \ pgf/reader.h \
pgf/reader.c \ pgf/reader.c \
pgf/linearize.c \ pgf/linearize.c \
pgf/reasoner.c \
pgf/printer.c \ pgf/printer.c \
pgf/pgf.c \ pgf/pgf.c \
pgf/pgf.h pgf/pgf.h

View File

@@ -82,11 +82,6 @@ pgf_parser_add_literal(PgfConcr *concr, PgfCId cat,
* @{ * @{
*/ */
/// An enumeration of #PgfExpr elements.
typedef GuEnum PgfExprEnum;
/// Retrieve the current parses from the parse state. /// Retrieve the current parses from the parse state.
PgfExprEnum* PgfExprEnum*
pgf_parse_result(PgfParseState* state, GuPool* pool); pgf_parse_result(PgfParseState* state, GuPool* pool);

View File

@@ -54,6 +54,9 @@ extern GU_DECLARE_TYPE(PgfConcr, struct);
#include <pgf/expr.h> #include <pgf/expr.h>
#include <pgf/lexer.h> #include <pgf/lexer.h>
/// An enumeration of #PgfExpr elements.
typedef GuEnum PgfExprEnum;
PgfPGF* PgfPGF*
pgf_read(const char* fpath, pgf_read(const char* fpath,
GuPool* pool, GuExn* err); GuPool* pool, GuExn* err);
@@ -109,9 +112,12 @@ pgf_print_name(PgfConcr*, PgfCId id);
void void
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err); pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
GuEnum* PgfExprEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool); pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
PgfExprEnum*
pgf_generate(PgfPGF* pgf, PgfCId cat, GuPool* pool);
// an experimental function. Please don't use it // an experimental function. Please don't use it
void void
pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool); pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);

View File

@@ -0,0 +1,163 @@
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <math.h>
#include <stdio.h>
typedef struct PgfExprState PgfExprState;
struct PgfExprState {
PgfExprState* cont;
PgfExpr expr;
PgfHypos hypos;
size_t arg_idx;
};
typedef struct {
PgfExprState *st;
prob_t cont_prob;
size_t fun_idx;
PgfCat* abscat;
} PgfExprQState;
typedef struct {
GuPool* tmp_pool;
PgfAbstr* abstract;
GuBuf* pqueue;
PgfExprEnum en;
} PgfReasoner;
static int
cmp_expr_qstate(GuOrder* self, const void* a, const void* b)
{
PgfExprQState *q1 = (PgfExprQState *) a;
PgfExprQState *q2 = (PgfExprQState *) b;
prob_t prob1 = q1->cont_prob-log(q1->abscat->functions[q1->fun_idx].prob);
prob_t prob2 = q2->cont_prob-log(q2->abscat->functions[q2->fun_idx].prob);
if (prob1 < prob2)
return -1;
else if (prob1 > prob2)
return 1;
else
return 0;
}
static GuOrder
pgf_expr_qstate_order = { cmp_expr_qstate };
static bool
pgf_reasoner_cat_init(PgfReasoner* rs,
PgfExprState* cont, prob_t cont_prob, PgfCId cat,
GuPool* pool)
{
// Checking for loops in the chart
if (cont != NULL) {
PgfExprState* st = cont->cont;
while (st != NULL) {
PgfHypo* hypo = gu_seq_index(st->hypos, PgfHypo, st->arg_idx);
if (gu_string_eq(hypo->type->cid, cat))
return false;
st = st->cont;
}
}
PgfCat* abscat = gu_map_get(rs->abstract->cats, &cat, PgfCat*);
if (abscat == NULL) {
return false;
}
PgfExprQState q = {cont, cont_prob, 0, abscat};
gu_buf_heap_push(rs->pqueue, &pgf_expr_qstate_order, &q);
return true;
}
static PgfExprProb*
pgf_reasoner_next(PgfReasoner* rs, GuPool* pool)
{
if (rs->pqueue == NULL)
return NULL;
while (gu_buf_length(rs->pqueue) > 0) {
PgfExprQState q;
gu_buf_heap_pop(rs->pqueue, &pgf_expr_qstate_order, &q);
PgfCId fun = q.abscat->functions[q.fun_idx++].fun;
PgfFunDecl* absfun =
gu_map_get(rs->abstract->funs, &fun, PgfFunDecl*);
if (q.fun_idx < q.abscat->n_functions) {
gu_buf_heap_push(rs->pqueue, &pgf_expr_qstate_order, &q);
}
if (absfun == NULL)
continue;
PgfExprState *st = gu_new(PgfExprState, rs->tmp_pool);
st->cont = q.st;
st->expr =
gu_new_variant_i(pool, PGF_EXPR_FUN,
PgfExprFun,
.fun = fun);
st->hypos = absfun->type->hypos;
st->arg_idx = 0;
for (;;) {
prob_t prob = q.cont_prob+absfun->ep.prob;
if (st->arg_idx < gu_seq_length(st->hypos)) {
PgfHypo *hypo = gu_seq_index(st->hypos, PgfHypo, st->arg_idx);
pgf_reasoner_cat_init(rs, st, prob,
hypo->type->cid, pool);
break;
} else {
PgfExprState* cont = st->cont;
if (cont == NULL) {
PgfExprProb* ep = gu_new(PgfExprProb, pool);
ep->expr = st->expr;
ep->prob = prob;
return ep;
}
st->cont = cont->cont;
st->expr =
gu_new_variant_i(pool, PGF_EXPR_APP,
PgfExprApp,
.fun = cont->expr, .arg = st->expr);
st->hypos = cont->hypos;
st->arg_idx = cont->arg_idx+1;
}
}
}
gu_pool_free(rs->tmp_pool);
rs->tmp_pool = NULL;
rs->pqueue = NULL;
return NULL;
}
static void
pgf_reasoner_enum_next(GuEnum* self, void* to, GuPool* pool)
{
PgfReasoner* pr = gu_container(self, PgfReasoner, en);
*(PgfExprProb**)to = pgf_reasoner_next(pr, pool);
}
PgfExprEnum*
pgf_generate(PgfPGF* pgf, PgfCId cat, GuPool* pool)
{
GuPool* tmp_pool = gu_new_pool();
GuBuf* pqueue = gu_new_buf(PgfExprQState, tmp_pool);
PgfReasoner* rs =
gu_new_i(pool, PgfReasoner,
.tmp_pool = tmp_pool,
.abstract = &pgf->abstract,
.pqueue = pqueue,
.en.next = pgf_reasoner_enum_next);
pgf_reasoner_cat_init(rs, NULL, 0, cat, pool);
return &rs->en;
}

View File

@@ -138,15 +138,19 @@ static PyTypeObject pgf_ExprType = {
typedef struct { typedef struct {
PyObject_HEAD PyObject_HEAD
GuPool* pool; GuPool* pool;
int max_count;
int counter;
GuEnum* res; GuEnum* res;
} ParseResultObject; } ExprIterObject;
static ParseResultObject* static ExprIterObject*
ParseResult_new(PyTypeObject *type, PyObject *args, PyObject *kwds) ExprIter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{ {
ParseResultObject* self = (ParseResultObject *)type->tp_alloc(type, 0); ExprIterObject* self = (ExprIterObject *)type->tp_alloc(type, 0);
if (self != NULL) { if (self != NULL) {
self->pool = NULL; self->pool = NULL;
self->max_count = -1;
self->counter = 0;
self->res = NULL; self->res = NULL;
} }
@@ -154,7 +158,7 @@ ParseResult_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
} }
static void static void
ParseResult_dealloc(ParseResultObject* self) ExprIter_dealloc(ExprIterObject* self)
{ {
if (self->pool != NULL) if (self->pool != NULL)
gu_pool_free(self->pool); gu_pool_free(self->pool);
@@ -163,21 +167,26 @@ ParseResult_dealloc(ParseResultObject* self)
} }
static int static int
ParseResult_init(ParseResultObject *self, PyObject *args, PyObject *kwds) ExprIter_init(ExprIterObject *self, PyObject *args, PyObject *kwds)
{ {
return -1; return -1;
} }
static PyObject* static PyObject*
ParseResult_iter(ParseResultObject *self) ExprIter_iter(ExprIterObject *self)
{ {
Py_INCREF(self); Py_INCREF(self);
return (PyObject*) self; return (PyObject*) self;
} }
static ExprObject* static PyObject*
ParseResult_iternext(ParseResultObject *self) ExprIter_iternext(ExprIterObject *self)
{ {
if (self->max_count > 0 && self->counter >= self->max_count) {
return NULL;
}
self->counter++;
PgfExprProb* ep = gu_next(self->res, PgfExprProb*, self->pool); PgfExprProb* ep = gu_next(self->res, PgfExprProb*, self->pool);
if (ep == NULL) if (ep == NULL)
return NULL; return NULL;
@@ -190,20 +199,23 @@ ParseResult_iternext(ParseResultObject *self)
pyexpr->master = (PyObject*) self; pyexpr->master = (PyObject*) self;
Py_INCREF(self); Py_INCREF(self);
return pyexpr; PyObject* res = Py_BuildValue("(f,O)", ep->prob, pyexpr);
Py_DECREF(pyexpr);
return res;
} }
static PyMethodDef ParseResult_methods[] = { static PyMethodDef ExprIter_methods[] = {
{NULL} /* Sentinel */ {NULL} /* Sentinel */
}; };
static PyTypeObject pgf_ParseResultType = { static PyTypeObject pgf_ExprIterType = {
PyObject_HEAD_INIT(NULL) PyObject_HEAD_INIT(NULL)
0, /*ob_size*/ 0, /*ob_size*/
"pgf.ParseResult", /*tp_name*/ "pgf.ExprIter", /*tp_name*/
sizeof(ParseResultObject), /*tp_basicsize*/ sizeof(ExprIterObject), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
(destructor)ParseResult_dealloc, /*tp_dealloc*/ (destructor)ExprIter_dealloc, /*tp_dealloc*/
0, /*tp_print*/ 0, /*tp_print*/
0, /*tp_getattr*/ 0, /*tp_getattr*/
0, /*tp_setattr*/ 0, /*tp_setattr*/
@@ -219,14 +231,14 @@ static PyTypeObject pgf_ParseResultType = {
0, /*tp_setattro*/ 0, /*tp_setattro*/
0, /*tp_as_buffer*/ 0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
"parsing result", /*tp_doc*/ "an iterator over a sequence of expressions",/*tp_doc*/
0, /*tp_traverse */ 0, /*tp_traverse */
0, /*tp_clear */ 0, /*tp_clear */
0, /*tp_richcompare */ 0, /*tp_richcompare */
0, /*tp_weaklistoffset */ 0, /*tp_weaklistoffset */
(getiterfunc) ParseResult_iter, /*tp_iter */ (getiterfunc) ExprIter_iter, /*tp_iter */
(iternextfunc) ParseResult_iternext, /*tp_iternext */ (iternextfunc) ExprIter_iternext, /*tp_iternext */
ParseResult_methods, /*tp_methods */ ExprIter_methods, /*tp_methods */
0, /*tp_members */ 0, /*tp_members */
0, /*tp_getset */ 0, /*tp_getset */
0, /*tp_base */ 0, /*tp_base */
@@ -234,9 +246,9 @@ static PyTypeObject pgf_ParseResultType = {
0, /*tp_descr_get */ 0, /*tp_descr_get */
0, /*tp_descr_set */ 0, /*tp_descr_set */
0, /*tp_dictoffset */ 0, /*tp_dictoffset */
(initproc)ParseResult_init,/*tp_init */ (initproc)ExprIter_init, /*tp_init */
0, /*tp_alloc */ 0, /*tp_alloc */
(newfunc) ParseResult_new, /*tp_new */ (newfunc) ExprIter_new, /*tp_new */
}; };
typedef struct { typedef struct {
@@ -285,7 +297,7 @@ Concr_printName(ConcrObject* self, PyObject *args)
return pyname; return pyname;
} }
static ParseResultObject* static ExprIterObject*
Concr_parse(ConcrObject* self, PyObject *args) Concr_parse(ConcrObject* self, PyObject *args)
{ {
size_t len; size_t len;
@@ -294,13 +306,15 @@ Concr_parse(ConcrObject* self, PyObject *args)
if (!PyArg_ParseTuple(args, "ss#", &catname_s, &buf, &len)) if (!PyArg_ParseTuple(args, "ss#", &catname_s, &buf, &len))
return NULL; return NULL;
ParseResultObject* pyres = (ParseResultObject*) ExprIterObject* pyres = (ExprIterObject*)
pgf_ExprType.tp_alloc(&pgf_ParseResultType, 0); pgf_ExprType.tp_alloc(&pgf_ExprIterType, 0);
if (pyres == NULL) { if (pyres == NULL) {
return NULL; return NULL;
} }
pyres->pool = gu_new_pool(); pyres->pool = gu_new_pool();
pyres->max_count = -1;
pyres->counter = 0;
GuPool *tmp_pool = gu_local_pool(); GuPool *tmp_pool = gu_local_pool();
GuString catname = gu_str_string(catname_s, tmp_pool); GuString catname = gu_str_string(catname_s, tmp_pool);
@@ -658,6 +672,43 @@ PGF_functionsByCat(PGFObject* self, PyObject *args)
return functions; return functions;
} }
static ExprIterObject*
PGF_generate(PGFObject* self, PyObject *args, PyObject *keywds)
{
static char *kwlist[] = {"cat", "n", NULL};
const char *catname_s;
int max_count = -1;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|i", kwlist,
&catname_s, &max_count))
return NULL;
ExprIterObject* pyres = (ExprIterObject*)
pgf_ExprType.tp_alloc(&pgf_ExprIterType, 0);
if (pyres == NULL) {
return NULL;
}
pyres->pool = gu_new_pool();
pyres->max_count = max_count;
pyres->counter = 0;
GuPool *tmp_pool = gu_local_pool();
GuString catname = gu_str_string(catname_s, tmp_pool);
pyres->res =
pgf_generate(self->pgf, catname, pyres->pool);
if (pyres->res == NULL) {
Py_DECREF(pyres);
gu_pool_free(tmp_pool);
return NULL;
}
gu_pool_free(tmp_pool);
return pyres;
}
static PyGetSetDef PGF_getseters[] = { static PyGetSetDef PGF_getseters[] = {
{"abstractName", {"abstractName",
(getter)PGF_getAbstractName, NULL, (getter)PGF_getAbstractName, NULL,
@@ -688,7 +739,10 @@ static PyMemberDef PGF_members[] = {
static PyMethodDef PGF_methods[] = { static PyMethodDef PGF_methods[] = {
{"functionsByCat", (PyCFunction)PGF_functionsByCat, METH_VARARGS, {"functionsByCat", (PyCFunction)PGF_functionsByCat, METH_VARARGS,
"Return the list of functions for a given category" "Returns the list of functions for a given category"
},
{"generate", (PyCFunction)PGF_generate, METH_VARARGS | METH_KEYWORDS,
"Generates abstract syntax trees of given category in decreasing probability order"
}, },
{NULL} /* Sentinel */ {NULL} /* Sentinel */
}; };
@@ -816,7 +870,7 @@ initpgf(void)
if (PyType_Ready(&pgf_ExprType) < 0) if (PyType_Ready(&pgf_ExprType) < 0)
return; return;
if (PyType_Ready(&pgf_ParseResultType) < 0) if (PyType_Ready(&pgf_ExprIterType) < 0)
return; return;
m = Py_InitModule("pgf", module_methods); m = Py_InitModule("pgf", module_methods);
@@ -836,5 +890,5 @@ initpgf(void)
Py_INCREF(&pgf_PGFType); Py_INCREF(&pgf_PGFType);
Py_INCREF(&pgf_ConcrType); Py_INCREF(&pgf_ConcrType);
Py_INCREF(&pgf_ExprType); Py_INCREF(&pgf_ExprType);
Py_INCREF(&pgf_ParseResultType); Py_INCREF(&pgf_ExprIterType);
} }

View File

@@ -19,8 +19,8 @@ while True:
break; break;
try: try:
for e in gr.languages["ParseEng"].parse(gr.startCat,line): for (p,e) in gr.languages["ParseEng"].parse(gr.startCat,line):
print e sys.stdout.write("["+str(p)+"] "+str(e)+"\n")
print gr.languages["ParseEngBul"].linearize(e) print gr.languages["ParseEngBul"].linearize(e)
except pgf.ParseError as e: except pgf.ParseError as e:
print e.message print e.message