The first prototype for exhaustive generation in the C runtime. The trees are always listed in decreasing probability order. There is also an API for generation from Python

This commit is contained in:
kr.angelov
2012-12-14 15:32:49 +00:00
parent 79711380a2
commit 8aefd1e072
7 changed files with 255 additions and 38 deletions

View File

@@ -25,7 +25,6 @@ import GF.Infra.Option
import GF.Data.Operations
import Data.List
import Data.Function
import Data.Char (isDigit,isSpace)
import qualified Data.Set as Set
import qualified Data.Map as Map
@@ -63,8 +62,7 @@ mkCanon2pgf opts gr am = do
((m,c),AbsCat (Just (L _ cont)),addr) <- adefs]
catfuns cat =
(map (\x -> (0,snd x)) . sortBy (compare `on` fst))
[(loc,i2i f) | ((m,f),AbsFun (Just (L loc ty)) _ _ (Just True),_) <- adefs, snd (GM.valCat ty) == cat]
[(0,i2i f) | ((m,f),AbsFun (Just (L _ ty)) _ _ (Just True),_) <- adefs, snd (GM.valCat ty) == cat]
mkConcr cm = do
let cflags = concatOptions [mflags mo | (i,mo) <- modules gr, isModCnc mo,

View File

@@ -111,6 +111,7 @@ libpgf_la_SOURCES = \
pgf/reader.h \
pgf/reader.c \
pgf/linearize.c \
pgf/reasoner.c \
pgf/printer.c \
pgf/pgf.c \
pgf/pgf.h

View File

@@ -82,11 +82,6 @@ pgf_parser_add_literal(PgfConcr *concr, PgfCId cat,
* @{
*/
/// An enumeration of #PgfExpr elements.
typedef GuEnum PgfExprEnum;
/// Retrieve the current parses from the parse state.
PgfExprEnum*
pgf_parse_result(PgfParseState* state, GuPool* pool);

View File

@@ -54,6 +54,9 @@ extern GU_DECLARE_TYPE(PgfConcr, struct);
#include <pgf/expr.h>
#include <pgf/lexer.h>
/// An enumeration of #PgfExpr elements.
typedef GuEnum PgfExprEnum;
PgfPGF*
pgf_read(const char* fpath,
GuPool* pool, GuExn* err);
@@ -109,9 +112,12 @@ pgf_print_name(PgfConcr*, PgfCId id);
void
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
GuEnum*
PgfExprEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
PgfExprEnum*
pgf_generate(PgfPGF* pgf, PgfCId cat, GuPool* pool);
// an experimental function. Please don't use it
void
pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);

View File

@@ -0,0 +1,163 @@
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <math.h>
#include <stdio.h>
typedef struct PgfExprState PgfExprState;
struct PgfExprState {
PgfExprState* cont;
PgfExpr expr;
PgfHypos hypos;
size_t arg_idx;
};
typedef struct {
PgfExprState *st;
prob_t cont_prob;
size_t fun_idx;
PgfCat* abscat;
} PgfExprQState;
typedef struct {
GuPool* tmp_pool;
PgfAbstr* abstract;
GuBuf* pqueue;
PgfExprEnum en;
} PgfReasoner;
static int
cmp_expr_qstate(GuOrder* self, const void* a, const void* b)
{
PgfExprQState *q1 = (PgfExprQState *) a;
PgfExprQState *q2 = (PgfExprQState *) b;
prob_t prob1 = q1->cont_prob-log(q1->abscat->functions[q1->fun_idx].prob);
prob_t prob2 = q2->cont_prob-log(q2->abscat->functions[q2->fun_idx].prob);
if (prob1 < prob2)
return -1;
else if (prob1 > prob2)
return 1;
else
return 0;
}
static GuOrder
pgf_expr_qstate_order = { cmp_expr_qstate };
static bool
pgf_reasoner_cat_init(PgfReasoner* rs,
PgfExprState* cont, prob_t cont_prob, PgfCId cat,
GuPool* pool)
{
// Checking for loops in the chart
if (cont != NULL) {
PgfExprState* st = cont->cont;
while (st != NULL) {
PgfHypo* hypo = gu_seq_index(st->hypos, PgfHypo, st->arg_idx);
if (gu_string_eq(hypo->type->cid, cat))
return false;
st = st->cont;
}
}
PgfCat* abscat = gu_map_get(rs->abstract->cats, &cat, PgfCat*);
if (abscat == NULL) {
return false;
}
PgfExprQState q = {cont, cont_prob, 0, abscat};
gu_buf_heap_push(rs->pqueue, &pgf_expr_qstate_order, &q);
return true;
}
static PgfExprProb*
pgf_reasoner_next(PgfReasoner* rs, GuPool* pool)
{
if (rs->pqueue == NULL)
return NULL;
while (gu_buf_length(rs->pqueue) > 0) {
PgfExprQState q;
gu_buf_heap_pop(rs->pqueue, &pgf_expr_qstate_order, &q);
PgfCId fun = q.abscat->functions[q.fun_idx++].fun;
PgfFunDecl* absfun =
gu_map_get(rs->abstract->funs, &fun, PgfFunDecl*);
if (q.fun_idx < q.abscat->n_functions) {
gu_buf_heap_push(rs->pqueue, &pgf_expr_qstate_order, &q);
}
if (absfun == NULL)
continue;
PgfExprState *st = gu_new(PgfExprState, rs->tmp_pool);
st->cont = q.st;
st->expr =
gu_new_variant_i(pool, PGF_EXPR_FUN,
PgfExprFun,
.fun = fun);
st->hypos = absfun->type->hypos;
st->arg_idx = 0;
for (;;) {
prob_t prob = q.cont_prob+absfun->ep.prob;
if (st->arg_idx < gu_seq_length(st->hypos)) {
PgfHypo *hypo = gu_seq_index(st->hypos, PgfHypo, st->arg_idx);
pgf_reasoner_cat_init(rs, st, prob,
hypo->type->cid, pool);
break;
} else {
PgfExprState* cont = st->cont;
if (cont == NULL) {
PgfExprProb* ep = gu_new(PgfExprProb, pool);
ep->expr = st->expr;
ep->prob = prob;
return ep;
}
st->cont = cont->cont;
st->expr =
gu_new_variant_i(pool, PGF_EXPR_APP,
PgfExprApp,
.fun = cont->expr, .arg = st->expr);
st->hypos = cont->hypos;
st->arg_idx = cont->arg_idx+1;
}
}
}
gu_pool_free(rs->tmp_pool);
rs->tmp_pool = NULL;
rs->pqueue = NULL;
return NULL;
}
static void
pgf_reasoner_enum_next(GuEnum* self, void* to, GuPool* pool)
{
PgfReasoner* pr = gu_container(self, PgfReasoner, en);
*(PgfExprProb**)to = pgf_reasoner_next(pr, pool);
}
PgfExprEnum*
pgf_generate(PgfPGF* pgf, PgfCId cat, GuPool* pool)
{
GuPool* tmp_pool = gu_new_pool();
GuBuf* pqueue = gu_new_buf(PgfExprQState, tmp_pool);
PgfReasoner* rs =
gu_new_i(pool, PgfReasoner,
.tmp_pool = tmp_pool,
.abstract = &pgf->abstract,
.pqueue = pqueue,
.en.next = pgf_reasoner_enum_next);
pgf_reasoner_cat_init(rs, NULL, 0, cat, pool);
return &rs->en;
}

View File

@@ -138,15 +138,19 @@ static PyTypeObject pgf_ExprType = {
typedef struct {
PyObject_HEAD
GuPool* pool;
int max_count;
int counter;
GuEnum* res;
} ParseResultObject;
} ExprIterObject;
static ParseResultObject*
ParseResult_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
static ExprIterObject*
ExprIter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
ParseResultObject* self = (ParseResultObject *)type->tp_alloc(type, 0);
ExprIterObject* self = (ExprIterObject *)type->tp_alloc(type, 0);
if (self != NULL) {
self->pool = NULL;
self->max_count = -1;
self->counter = 0;
self->res = NULL;
}
@@ -154,7 +158,7 @@ ParseResult_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
}
static void
ParseResult_dealloc(ParseResultObject* self)
ExprIter_dealloc(ExprIterObject* self)
{
if (self->pool != NULL)
gu_pool_free(self->pool);
@@ -163,21 +167,26 @@ ParseResult_dealloc(ParseResultObject* self)
}
static int
ParseResult_init(ParseResultObject *self, PyObject *args, PyObject *kwds)
ExprIter_init(ExprIterObject *self, PyObject *args, PyObject *kwds)
{
return -1;
}
static PyObject*
ParseResult_iter(ParseResultObject *self)
ExprIter_iter(ExprIterObject *self)
{
Py_INCREF(self);
return (PyObject*) self;
}
static ExprObject*
ParseResult_iternext(ParseResultObject *self)
static PyObject*
ExprIter_iternext(ExprIterObject *self)
{
if (self->max_count > 0 && self->counter >= self->max_count) {
return NULL;
}
self->counter++;
PgfExprProb* ep = gu_next(self->res, PgfExprProb*, self->pool);
if (ep == NULL)
return NULL;
@@ -190,20 +199,23 @@ ParseResult_iternext(ParseResultObject *self)
pyexpr->master = (PyObject*) self;
Py_INCREF(self);
return pyexpr;
PyObject* res = Py_BuildValue("(f,O)", ep->prob, pyexpr);
Py_DECREF(pyexpr);
return res;
}
static PyMethodDef ParseResult_methods[] = {
static PyMethodDef ExprIter_methods[] = {
{NULL} /* Sentinel */
};
static PyTypeObject pgf_ParseResultType = {
static PyTypeObject pgf_ExprIterType = {
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
"pgf.ParseResult", /*tp_name*/
sizeof(ParseResultObject), /*tp_basicsize*/
"pgf.ExprIter", /*tp_name*/
sizeof(ExprIterObject), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)ParseResult_dealloc, /*tp_dealloc*/
(destructor)ExprIter_dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
@@ -219,14 +231,14 @@ static PyTypeObject pgf_ParseResultType = {
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
"parsing result", /*tp_doc*/
"an iterator over a sequence of expressions",/*tp_doc*/
0, /*tp_traverse */
0, /*tp_clear */
0, /*tp_richcompare */
0, /*tp_weaklistoffset */
(getiterfunc) ParseResult_iter, /*tp_iter */
(iternextfunc) ParseResult_iternext, /*tp_iternext */
ParseResult_methods, /*tp_methods */
(getiterfunc) ExprIter_iter, /*tp_iter */
(iternextfunc) ExprIter_iternext, /*tp_iternext */
ExprIter_methods, /*tp_methods */
0, /*tp_members */
0, /*tp_getset */
0, /*tp_base */
@@ -234,9 +246,9 @@ static PyTypeObject pgf_ParseResultType = {
0, /*tp_descr_get */
0, /*tp_descr_set */
0, /*tp_dictoffset */
(initproc)ParseResult_init,/*tp_init */
(initproc)ExprIter_init, /*tp_init */
0, /*tp_alloc */
(newfunc) ParseResult_new, /*tp_new */
(newfunc) ExprIter_new, /*tp_new */
};
typedef struct {
@@ -285,7 +297,7 @@ Concr_printName(ConcrObject* self, PyObject *args)
return pyname;
}
static ParseResultObject*
static ExprIterObject*
Concr_parse(ConcrObject* self, PyObject *args)
{
size_t len;
@@ -294,13 +306,15 @@ Concr_parse(ConcrObject* self, PyObject *args)
if (!PyArg_ParseTuple(args, "ss#", &catname_s, &buf, &len))
return NULL;
ParseResultObject* pyres = (ParseResultObject*)
pgf_ExprType.tp_alloc(&pgf_ParseResultType, 0);
ExprIterObject* pyres = (ExprIterObject*)
pgf_ExprType.tp_alloc(&pgf_ExprIterType, 0);
if (pyres == NULL) {
return NULL;
}
pyres->pool = gu_new_pool();
pyres->max_count = -1;
pyres->counter = 0;
GuPool *tmp_pool = gu_local_pool();
GuString catname = gu_str_string(catname_s, tmp_pool);
@@ -658,6 +672,43 @@ PGF_functionsByCat(PGFObject* self, PyObject *args)
return functions;
}
static ExprIterObject*
PGF_generate(PGFObject* self, PyObject *args, PyObject *keywds)
{
static char *kwlist[] = {"cat", "n", NULL};
const char *catname_s;
int max_count = -1;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|i", kwlist,
&catname_s, &max_count))
return NULL;
ExprIterObject* pyres = (ExprIterObject*)
pgf_ExprType.tp_alloc(&pgf_ExprIterType, 0);
if (pyres == NULL) {
return NULL;
}
pyres->pool = gu_new_pool();
pyres->max_count = max_count;
pyres->counter = 0;
GuPool *tmp_pool = gu_local_pool();
GuString catname = gu_str_string(catname_s, tmp_pool);
pyres->res =
pgf_generate(self->pgf, catname, pyres->pool);
if (pyres->res == NULL) {
Py_DECREF(pyres);
gu_pool_free(tmp_pool);
return NULL;
}
gu_pool_free(tmp_pool);
return pyres;
}
static PyGetSetDef PGF_getseters[] = {
{"abstractName",
(getter)PGF_getAbstractName, NULL,
@@ -688,7 +739,10 @@ static PyMemberDef PGF_members[] = {
static PyMethodDef PGF_methods[] = {
{"functionsByCat", (PyCFunction)PGF_functionsByCat, METH_VARARGS,
"Return the list of functions for a given category"
"Returns the list of functions for a given category"
},
{"generate", (PyCFunction)PGF_generate, METH_VARARGS | METH_KEYWORDS,
"Generates abstract syntax trees of given category in decreasing probability order"
},
{NULL} /* Sentinel */
};
@@ -816,7 +870,7 @@ initpgf(void)
if (PyType_Ready(&pgf_ExprType) < 0)
return;
if (PyType_Ready(&pgf_ParseResultType) < 0)
if (PyType_Ready(&pgf_ExprIterType) < 0)
return;
m = Py_InitModule("pgf", module_methods);
@@ -836,5 +890,5 @@ initpgf(void)
Py_INCREF(&pgf_PGFType);
Py_INCREF(&pgf_ConcrType);
Py_INCREF(&pgf_ExprType);
Py_INCREF(&pgf_ParseResultType);
Py_INCREF(&pgf_ExprIterType);
}

View File

@@ -19,8 +19,8 @@ while True:
break;
try:
for e in gr.languages["ParseEng"].parse(gr.startCat,line):
print e
for (p,e) in gr.languages["ParseEng"].parse(gr.startCat,line):
sys.stdout.write("["+str(p)+"] "+str(e)+"\n")
print gr.languages["ParseEngBul"].linearize(e)
except pgf.ParseError as e:
print e.message