mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
The first prototype for exhaustive generation in the C runtime. The trees are always listed in decreasing probability order. There is also an API for generation from Python
This commit is contained in:
@@ -25,7 +25,6 @@ import GF.Infra.Option
|
||||
import GF.Data.Operations
|
||||
|
||||
import Data.List
|
||||
import Data.Function
|
||||
import Data.Char (isDigit,isSpace)
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.Map as Map
|
||||
@@ -63,8 +62,7 @@ mkCanon2pgf opts gr am = do
|
||||
((m,c),AbsCat (Just (L _ cont)),addr) <- adefs]
|
||||
|
||||
catfuns cat =
|
||||
(map (\x -> (0,snd x)) . sortBy (compare `on` fst))
|
||||
[(loc,i2i f) | ((m,f),AbsFun (Just (L loc ty)) _ _ (Just True),_) <- adefs, snd (GM.valCat ty) == cat]
|
||||
[(0,i2i f) | ((m,f),AbsFun (Just (L _ ty)) _ _ (Just True),_) <- adefs, snd (GM.valCat ty) == cat]
|
||||
|
||||
mkConcr cm = do
|
||||
let cflags = concatOptions [mflags mo | (i,mo) <- modules gr, isModCnc mo,
|
||||
|
||||
@@ -111,6 +111,7 @@ libpgf_la_SOURCES = \
|
||||
pgf/reader.h \
|
||||
pgf/reader.c \
|
||||
pgf/linearize.c \
|
||||
pgf/reasoner.c \
|
||||
pgf/printer.c \
|
||||
pgf/pgf.c \
|
||||
pgf/pgf.h
|
||||
|
||||
@@ -82,11 +82,6 @@ pgf_parser_add_literal(PgfConcr *concr, PgfCId cat,
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/// An enumeration of #PgfExpr elements.
|
||||
typedef GuEnum PgfExprEnum;
|
||||
|
||||
|
||||
/// Retrieve the current parses from the parse state.
|
||||
PgfExprEnum*
|
||||
pgf_parse_result(PgfParseState* state, GuPool* pool);
|
||||
|
||||
@@ -54,6 +54,9 @@ extern GU_DECLARE_TYPE(PgfConcr, struct);
|
||||
#include <pgf/expr.h>
|
||||
#include <pgf/lexer.h>
|
||||
|
||||
/// An enumeration of #PgfExpr elements.
|
||||
typedef GuEnum PgfExprEnum;
|
||||
|
||||
PgfPGF*
|
||||
pgf_read(const char* fpath,
|
||||
GuPool* pool, GuExn* err);
|
||||
@@ -109,9 +112,12 @@ pgf_print_name(PgfConcr*, PgfCId id);
|
||||
void
|
||||
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
|
||||
|
||||
GuEnum*
|
||||
PgfExprEnum*
|
||||
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
|
||||
|
||||
PgfExprEnum*
|
||||
pgf_generate(PgfPGF* pgf, PgfCId cat, GuPool* pool);
|
||||
|
||||
// an experimental function. Please don't use it
|
||||
void
|
||||
pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
|
||||
|
||||
163
src/runtime/c/pgf/reasoner.c
Normal file
163
src/runtime/c/pgf/reasoner.c
Normal file
@@ -0,0 +1,163 @@
|
||||
#include <pgf/pgf.h>
|
||||
#include <pgf/data.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
|
||||
typedef struct PgfExprState PgfExprState;
|
||||
|
||||
struct PgfExprState {
|
||||
PgfExprState* cont;
|
||||
PgfExpr expr;
|
||||
PgfHypos hypos;
|
||||
size_t arg_idx;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
PgfExprState *st;
|
||||
prob_t cont_prob;
|
||||
size_t fun_idx;
|
||||
PgfCat* abscat;
|
||||
} PgfExprQState;
|
||||
|
||||
typedef struct {
|
||||
GuPool* tmp_pool;
|
||||
PgfAbstr* abstract;
|
||||
GuBuf* pqueue;
|
||||
PgfExprEnum en;
|
||||
} PgfReasoner;
|
||||
|
||||
static int
|
||||
cmp_expr_qstate(GuOrder* self, const void* a, const void* b)
|
||||
{
|
||||
PgfExprQState *q1 = (PgfExprQState *) a;
|
||||
PgfExprQState *q2 = (PgfExprQState *) b;
|
||||
|
||||
prob_t prob1 = q1->cont_prob-log(q1->abscat->functions[q1->fun_idx].prob);
|
||||
prob_t prob2 = q2->cont_prob-log(q2->abscat->functions[q2->fun_idx].prob);
|
||||
|
||||
if (prob1 < prob2)
|
||||
return -1;
|
||||
else if (prob1 > prob2)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static GuOrder
|
||||
pgf_expr_qstate_order = { cmp_expr_qstate };
|
||||
|
||||
static bool
|
||||
pgf_reasoner_cat_init(PgfReasoner* rs,
|
||||
PgfExprState* cont, prob_t cont_prob, PgfCId cat,
|
||||
GuPool* pool)
|
||||
{
|
||||
// Checking for loops in the chart
|
||||
if (cont != NULL) {
|
||||
PgfExprState* st = cont->cont;
|
||||
while (st != NULL) {
|
||||
PgfHypo* hypo = gu_seq_index(st->hypos, PgfHypo, st->arg_idx);
|
||||
if (gu_string_eq(hypo->type->cid, cat))
|
||||
return false;
|
||||
|
||||
st = st->cont;
|
||||
}
|
||||
}
|
||||
|
||||
PgfCat* abscat = gu_map_get(rs->abstract->cats, &cat, PgfCat*);
|
||||
if (abscat == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
PgfExprQState q = {cont, cont_prob, 0, abscat};
|
||||
gu_buf_heap_push(rs->pqueue, &pgf_expr_qstate_order, &q);
|
||||
return true;
|
||||
}
|
||||
|
||||
static PgfExprProb*
|
||||
pgf_reasoner_next(PgfReasoner* rs, GuPool* pool)
|
||||
{
|
||||
if (rs->pqueue == NULL)
|
||||
return NULL;
|
||||
|
||||
while (gu_buf_length(rs->pqueue) > 0) {
|
||||
PgfExprQState q;
|
||||
gu_buf_heap_pop(rs->pqueue, &pgf_expr_qstate_order, &q);
|
||||
|
||||
PgfCId fun = q.abscat->functions[q.fun_idx++].fun;
|
||||
PgfFunDecl* absfun =
|
||||
gu_map_get(rs->abstract->funs, &fun, PgfFunDecl*);
|
||||
|
||||
if (q.fun_idx < q.abscat->n_functions) {
|
||||
gu_buf_heap_push(rs->pqueue, &pgf_expr_qstate_order, &q);
|
||||
}
|
||||
|
||||
if (absfun == NULL)
|
||||
continue;
|
||||
|
||||
PgfExprState *st = gu_new(PgfExprState, rs->tmp_pool);
|
||||
st->cont = q.st;
|
||||
st->expr =
|
||||
gu_new_variant_i(pool, PGF_EXPR_FUN,
|
||||
PgfExprFun,
|
||||
.fun = fun);
|
||||
st->hypos = absfun->type->hypos;
|
||||
st->arg_idx = 0;
|
||||
|
||||
for (;;) {
|
||||
prob_t prob = q.cont_prob+absfun->ep.prob;
|
||||
|
||||
if (st->arg_idx < gu_seq_length(st->hypos)) {
|
||||
PgfHypo *hypo = gu_seq_index(st->hypos, PgfHypo, st->arg_idx);
|
||||
pgf_reasoner_cat_init(rs, st, prob,
|
||||
hypo->type->cid, pool);
|
||||
break;
|
||||
} else {
|
||||
PgfExprState* cont = st->cont;
|
||||
if (cont == NULL) {
|
||||
PgfExprProb* ep = gu_new(PgfExprProb, pool);
|
||||
ep->expr = st->expr;
|
||||
ep->prob = prob;
|
||||
return ep;
|
||||
}
|
||||
|
||||
st->cont = cont->cont;
|
||||
st->expr =
|
||||
gu_new_variant_i(pool, PGF_EXPR_APP,
|
||||
PgfExprApp,
|
||||
.fun = cont->expr, .arg = st->expr);
|
||||
st->hypos = cont->hypos;
|
||||
st->arg_idx = cont->arg_idx+1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
gu_pool_free(rs->tmp_pool);
|
||||
rs->tmp_pool = NULL;
|
||||
rs->pqueue = NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_reasoner_enum_next(GuEnum* self, void* to, GuPool* pool)
|
||||
{
|
||||
PgfReasoner* pr = gu_container(self, PgfReasoner, en);
|
||||
*(PgfExprProb**)to = pgf_reasoner_next(pr, pool);
|
||||
}
|
||||
|
||||
PgfExprEnum*
|
||||
pgf_generate(PgfPGF* pgf, PgfCId cat, GuPool* pool)
|
||||
{
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
GuBuf* pqueue = gu_new_buf(PgfExprQState, tmp_pool);
|
||||
|
||||
PgfReasoner* rs =
|
||||
gu_new_i(pool, PgfReasoner,
|
||||
.tmp_pool = tmp_pool,
|
||||
.abstract = &pgf->abstract,
|
||||
.pqueue = pqueue,
|
||||
.en.next = pgf_reasoner_enum_next);
|
||||
|
||||
pgf_reasoner_cat_init(rs, NULL, 0, cat, pool);
|
||||
|
||||
return &rs->en;
|
||||
}
|
||||
@@ -138,15 +138,19 @@ static PyTypeObject pgf_ExprType = {
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
GuPool* pool;
|
||||
int max_count;
|
||||
int counter;
|
||||
GuEnum* res;
|
||||
} ParseResultObject;
|
||||
} ExprIterObject;
|
||||
|
||||
static ParseResultObject*
|
||||
ParseResult_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
static ExprIterObject*
|
||||
ExprIter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
ParseResultObject* self = (ParseResultObject *)type->tp_alloc(type, 0);
|
||||
ExprIterObject* self = (ExprIterObject *)type->tp_alloc(type, 0);
|
||||
if (self != NULL) {
|
||||
self->pool = NULL;
|
||||
self->max_count = -1;
|
||||
self->counter = 0;
|
||||
self->res = NULL;
|
||||
}
|
||||
|
||||
@@ -154,7 +158,7 @@ ParseResult_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
}
|
||||
|
||||
static void
|
||||
ParseResult_dealloc(ParseResultObject* self)
|
||||
ExprIter_dealloc(ExprIterObject* self)
|
||||
{
|
||||
if (self->pool != NULL)
|
||||
gu_pool_free(self->pool);
|
||||
@@ -163,21 +167,26 @@ ParseResult_dealloc(ParseResultObject* self)
|
||||
}
|
||||
|
||||
static int
|
||||
ParseResult_init(ParseResultObject *self, PyObject *args, PyObject *kwds)
|
||||
ExprIter_init(ExprIterObject *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
ParseResult_iter(ParseResultObject *self)
|
||||
ExprIter_iter(ExprIterObject *self)
|
||||
{
|
||||
Py_INCREF(self);
|
||||
return (PyObject*) self;
|
||||
}
|
||||
|
||||
static ExprObject*
|
||||
ParseResult_iternext(ParseResultObject *self)
|
||||
static PyObject*
|
||||
ExprIter_iternext(ExprIterObject *self)
|
||||
{
|
||||
if (self->max_count > 0 && self->counter >= self->max_count) {
|
||||
return NULL;
|
||||
}
|
||||
self->counter++;
|
||||
|
||||
PgfExprProb* ep = gu_next(self->res, PgfExprProb*, self->pool);
|
||||
if (ep == NULL)
|
||||
return NULL;
|
||||
@@ -190,20 +199,23 @@ ParseResult_iternext(ParseResultObject *self)
|
||||
pyexpr->master = (PyObject*) self;
|
||||
Py_INCREF(self);
|
||||
|
||||
return pyexpr;
|
||||
PyObject* res = Py_BuildValue("(f,O)", ep->prob, pyexpr);
|
||||
Py_DECREF(pyexpr);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static PyMethodDef ParseResult_methods[] = {
|
||||
static PyMethodDef ExprIter_methods[] = {
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static PyTypeObject pgf_ParseResultType = {
|
||||
static PyTypeObject pgf_ExprIterType = {
|
||||
PyObject_HEAD_INIT(NULL)
|
||||
0, /*ob_size*/
|
||||
"pgf.ParseResult", /*tp_name*/
|
||||
sizeof(ParseResultObject), /*tp_basicsize*/
|
||||
"pgf.ExprIter", /*tp_name*/
|
||||
sizeof(ExprIterObject), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)ParseResult_dealloc, /*tp_dealloc*/
|
||||
(destructor)ExprIter_dealloc, /*tp_dealloc*/
|
||||
0, /*tp_print*/
|
||||
0, /*tp_getattr*/
|
||||
0, /*tp_setattr*/
|
||||
@@ -219,14 +231,14 @@ static PyTypeObject pgf_ParseResultType = {
|
||||
0, /*tp_setattro*/
|
||||
0, /*tp_as_buffer*/
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||
"parsing result", /*tp_doc*/
|
||||
"an iterator over a sequence of expressions",/*tp_doc*/
|
||||
0, /*tp_traverse */
|
||||
0, /*tp_clear */
|
||||
0, /*tp_richcompare */
|
||||
0, /*tp_weaklistoffset */
|
||||
(getiterfunc) ParseResult_iter, /*tp_iter */
|
||||
(iternextfunc) ParseResult_iternext, /*tp_iternext */
|
||||
ParseResult_methods, /*tp_methods */
|
||||
(getiterfunc) ExprIter_iter, /*tp_iter */
|
||||
(iternextfunc) ExprIter_iternext, /*tp_iternext */
|
||||
ExprIter_methods, /*tp_methods */
|
||||
0, /*tp_members */
|
||||
0, /*tp_getset */
|
||||
0, /*tp_base */
|
||||
@@ -234,9 +246,9 @@ static PyTypeObject pgf_ParseResultType = {
|
||||
0, /*tp_descr_get */
|
||||
0, /*tp_descr_set */
|
||||
0, /*tp_dictoffset */
|
||||
(initproc)ParseResult_init,/*tp_init */
|
||||
(initproc)ExprIter_init, /*tp_init */
|
||||
0, /*tp_alloc */
|
||||
(newfunc) ParseResult_new, /*tp_new */
|
||||
(newfunc) ExprIter_new, /*tp_new */
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
@@ -285,7 +297,7 @@ Concr_printName(ConcrObject* self, PyObject *args)
|
||||
return pyname;
|
||||
}
|
||||
|
||||
static ParseResultObject*
|
||||
static ExprIterObject*
|
||||
Concr_parse(ConcrObject* self, PyObject *args)
|
||||
{
|
||||
size_t len;
|
||||
@@ -294,13 +306,15 @@ Concr_parse(ConcrObject* self, PyObject *args)
|
||||
if (!PyArg_ParseTuple(args, "ss#", &catname_s, &buf, &len))
|
||||
return NULL;
|
||||
|
||||
ParseResultObject* pyres = (ParseResultObject*)
|
||||
pgf_ExprType.tp_alloc(&pgf_ParseResultType, 0);
|
||||
ExprIterObject* pyres = (ExprIterObject*)
|
||||
pgf_ExprType.tp_alloc(&pgf_ExprIterType, 0);
|
||||
if (pyres == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pyres->pool = gu_new_pool();
|
||||
pyres->max_count = -1;
|
||||
pyres->counter = 0;
|
||||
|
||||
GuPool *tmp_pool = gu_local_pool();
|
||||
GuString catname = gu_str_string(catname_s, tmp_pool);
|
||||
@@ -658,6 +672,43 @@ PGF_functionsByCat(PGFObject* self, PyObject *args)
|
||||
return functions;
|
||||
}
|
||||
|
||||
static ExprIterObject*
|
||||
PGF_generate(PGFObject* self, PyObject *args, PyObject *keywds)
|
||||
{
|
||||
static char *kwlist[] = {"cat", "n", NULL};
|
||||
|
||||
const char *catname_s;
|
||||
int max_count = -1;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|i", kwlist,
|
||||
&catname_s, &max_count))
|
||||
return NULL;
|
||||
|
||||
ExprIterObject* pyres = (ExprIterObject*)
|
||||
pgf_ExprType.tp_alloc(&pgf_ExprIterType, 0);
|
||||
if (pyres == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pyres->pool = gu_new_pool();
|
||||
pyres->max_count = max_count;
|
||||
pyres->counter = 0;
|
||||
|
||||
GuPool *tmp_pool = gu_local_pool();
|
||||
GuString catname = gu_str_string(catname_s, tmp_pool);
|
||||
|
||||
pyres->res =
|
||||
pgf_generate(self->pgf, catname, pyres->pool);
|
||||
if (pyres->res == NULL) {
|
||||
Py_DECREF(pyres);
|
||||
gu_pool_free(tmp_pool);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
return pyres;
|
||||
}
|
||||
|
||||
static PyGetSetDef PGF_getseters[] = {
|
||||
{"abstractName",
|
||||
(getter)PGF_getAbstractName, NULL,
|
||||
@@ -688,7 +739,10 @@ static PyMemberDef PGF_members[] = {
|
||||
|
||||
static PyMethodDef PGF_methods[] = {
|
||||
{"functionsByCat", (PyCFunction)PGF_functionsByCat, METH_VARARGS,
|
||||
"Return the list of functions for a given category"
|
||||
"Returns the list of functions for a given category"
|
||||
},
|
||||
{"generate", (PyCFunction)PGF_generate, METH_VARARGS | METH_KEYWORDS,
|
||||
"Generates abstract syntax trees of given category in decreasing probability order"
|
||||
},
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
@@ -816,7 +870,7 @@ initpgf(void)
|
||||
if (PyType_Ready(&pgf_ExprType) < 0)
|
||||
return;
|
||||
|
||||
if (PyType_Ready(&pgf_ParseResultType) < 0)
|
||||
if (PyType_Ready(&pgf_ExprIterType) < 0)
|
||||
return;
|
||||
|
||||
m = Py_InitModule("pgf", module_methods);
|
||||
@@ -836,5 +890,5 @@ initpgf(void)
|
||||
Py_INCREF(&pgf_PGFType);
|
||||
Py_INCREF(&pgf_ConcrType);
|
||||
Py_INCREF(&pgf_ExprType);
|
||||
Py_INCREF(&pgf_ParseResultType);
|
||||
Py_INCREF(&pgf_ExprIterType);
|
||||
}
|
||||
|
||||
@@ -19,8 +19,8 @@ while True:
|
||||
break;
|
||||
|
||||
try:
|
||||
for e in gr.languages["ParseEng"].parse(gr.startCat,line):
|
||||
print e
|
||||
for (p,e) in gr.languages["ParseEng"].parse(gr.startCat,line):
|
||||
sys.stdout.write("["+str(p)+"] "+str(e)+"\n")
|
||||
print gr.languages["ParseEngBul"].linearize(e)
|
||||
except pgf.ParseError as e:
|
||||
print e.message
|
||||
|
||||
Reference in New Issue
Block a user