forked from GitHub/gf-core
support for BIND tokens in the Python bindings
This commit is contained in:
@@ -44,6 +44,7 @@ typedef struct {
|
|||||||
PgfParseState *before;
|
PgfParseState *before;
|
||||||
PgfParseState *after;
|
PgfParseState *after;
|
||||||
PgfToken prefix;
|
PgfToken prefix;
|
||||||
|
bool prefix_bind;
|
||||||
PgfTokenProb* tp;
|
PgfTokenProb* tp;
|
||||||
PgfExprEnum en; // enumeration for the generated trees/tokens
|
PgfExprEnum en; // enumeration for the generated trees/tokens
|
||||||
#ifdef PGF_COUNTS_DEBUG
|
#ifdef PGF_COUNTS_DEBUG
|
||||||
@@ -1009,6 +1010,7 @@ pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
|
|||||||
(start_offset == end_offset);
|
(start_offset == end_offset);
|
||||||
state->start_offset = start_offset;
|
state->start_offset = start_offset;
|
||||||
state->end_offset = end_offset;
|
state->end_offset = end_offset;
|
||||||
|
|
||||||
state->viterbi_prob = viterbi_prob;
|
state->viterbi_prob = viterbi_prob;
|
||||||
state->lexicon_idx =
|
state->lexicon_idx =
|
||||||
gu_new_buf(PgfLexiconIdxEntry, ps->pool);
|
gu_new_buf(PgfLexiconIdxEntry, ps->pool);
|
||||||
@@ -1381,20 +1383,30 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PGF_SYMBOL_BIND: {
|
case PGF_SYMBOL_BIND: {
|
||||||
if (ps->before->start_offset == ps->before->end_offset &&
|
if (!ps->prefix_bind && ps->prefix != NULL && *(ps->sentence + ps->before->end_offset) == 0) {
|
||||||
ps->before->needs_bind) {
|
PgfProductionApply* papp = gu_variant_data(item->prod);
|
||||||
PgfParseState* state =
|
|
||||||
pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD,
|
ps->tp = gu_new(PgfTokenProb, ps->out_pool);
|
||||||
item->inside_prob+item->conts->outside_prob);
|
ps->tp->tok = NULL;
|
||||||
if (state != NULL) {
|
ps->tp->cat = item->conts->ccat->cnccat->abscat->name;
|
||||||
pgf_item_advance(item, ps->pool);
|
ps->tp->fun = papp->fun->absfun->name;
|
||||||
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
|
ps->tp->prob = item->inside_prob + item->conts->outside_prob;
|
||||||
} else {
|
} else {
|
||||||
pgf_item_free(ps, item);
|
if (ps->before->start_offset == ps->before->end_offset &&
|
||||||
}
|
ps->before->needs_bind) {
|
||||||
} else {
|
PgfParseState* state =
|
||||||
pgf_item_free(ps, item);
|
pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD,
|
||||||
}
|
item->inside_prob+item->conts->outside_prob);
|
||||||
|
if (state != NULL) {
|
||||||
|
pgf_item_advance(item, ps->pool);
|
||||||
|
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
|
||||||
|
} else {
|
||||||
|
pgf_item_free(ps, item);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pgf_item_free(ps, item);
|
||||||
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PGF_SYMBOL_SOFT_BIND:
|
case PGF_SYMBOL_SOFT_BIND:
|
||||||
@@ -2337,7 +2349,8 @@ pgf_parser_completions_next(GuEnum* self, void* to, GuPool* pool)
|
|||||||
|
|
||||||
PGF_API GuEnum*
|
PGF_API GuEnum*
|
||||||
pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence,
|
pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence,
|
||||||
GuString prefix, GuExn *err, GuPool* pool)
|
GuString prefix, bool prefix_bind,
|
||||||
|
GuExn *err, GuPool* pool)
|
||||||
{
|
{
|
||||||
if (concr->sequences == NULL ||
|
if (concr->sequences == NULL ||
|
||||||
concr->cnccats == NULL) {
|
concr->cnccats == NULL) {
|
||||||
@@ -2377,6 +2390,7 @@ pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence,
|
|||||||
// Now begin enumerating the completions
|
// Now begin enumerating the completions
|
||||||
ps->en.next = pgf_parser_completions_next;
|
ps->en.next = pgf_parser_completions_next;
|
||||||
ps->prefix = prefix;
|
ps->prefix = prefix;
|
||||||
|
ps->prefix_bind = prefix_bind;
|
||||||
ps->tp = NULL;
|
ps->tp = NULL;
|
||||||
return &ps->en;
|
return &ps->en;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -251,7 +251,8 @@ typedef struct {
|
|||||||
|
|
||||||
PGF_API_DECL GuEnum*
|
PGF_API_DECL GuEnum*
|
||||||
pgf_complete(PgfConcr* concr, PgfType* type, GuString string,
|
pgf_complete(PgfConcr* concr, PgfType* type, GuString string,
|
||||||
GuString prefix, GuExn* err, GuPool* pool);
|
GuString prefix, bool prefix_bind,
|
||||||
|
GuExn* err, GuPool* pool);
|
||||||
|
|
||||||
typedef struct PgfLiteralCallback PgfLiteralCallback;
|
typedef struct PgfLiteralCallback PgfLiteralCallback;
|
||||||
|
|
||||||
|
|||||||
@@ -1026,7 +1026,10 @@ complete lang (Type ctype _) sent pfx =
|
|||||||
touchConcr lang
|
touchConcr lang
|
||||||
return []
|
return []
|
||||||
else do
|
else do
|
||||||
tok <- peekUtf8CString =<< (#peek PgfTokenProb, tok) cmpEntry
|
p_tok <- (#peek PgfTokenProb, tok) cmpEntry
|
||||||
|
tok <- if p_tok == nullPtr
|
||||||
|
then return "&+"
|
||||||
|
else peekUtf8CString p_tok
|
||||||
cat <- peekUtf8CString =<< (#peek PgfTokenProb, cat) cmpEntry
|
cat <- peekUtf8CString =<< (#peek PgfTokenProb, cat) cmpEntry
|
||||||
fun <- peekUtf8CString =<< (#peek PgfTokenProb, fun) cmpEntry
|
fun <- peekUtf8CString =<< (#peek PgfTokenProb, fun) cmpEntry
|
||||||
prob <- (#peek PgfTokenProb, prob) cmpEntry
|
prob <- (#peek PgfTokenProb, prob) cmpEntry
|
||||||
|
|||||||
@@ -1155,6 +1155,80 @@ Iter_fetch_expr(IterObject* self)
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
PyObject_HEAD
|
||||||
|
} BINDObject;
|
||||||
|
|
||||||
|
static PyObject *BIND_instance = NULL;
|
||||||
|
|
||||||
|
static void
|
||||||
|
BIND_dealloc(PyTypeObject *self)
|
||||||
|
{
|
||||||
|
BIND_instance = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
BIND_repr(BINDObject *self)
|
||||||
|
{
|
||||||
|
return PyString_FromString("pgf.BIND");
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
BIND_str(BINDObject *self)
|
||||||
|
{
|
||||||
|
return PyString_FromString("&+");
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
BIND_alloc(PyTypeObject *self, Py_ssize_t nitems)
|
||||||
|
{
|
||||||
|
if (BIND_instance == NULL)
|
||||||
|
BIND_instance = PyType_GenericAlloc(self, nitems);
|
||||||
|
return BIND_instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyTypeObject pgf_BINDType = {
|
||||||
|
PyVarObject_HEAD_INIT(NULL, 0)
|
||||||
|
//0, /*ob_size*/
|
||||||
|
"pgf.BINDType", /*tp_name*/
|
||||||
|
sizeof(BINDObject), /*tp_basicsize*/
|
||||||
|
0, /*tp_itemsize*/
|
||||||
|
(destructor) BIND_dealloc, /*tp_dealloc*/
|
||||||
|
0, /*tp_print*/
|
||||||
|
0, /*tp_getattr*/
|
||||||
|
0, /*tp_setattr*/
|
||||||
|
0, /*tp_compare*/
|
||||||
|
(reprfunc) BIND_repr, /*tp_repr*/
|
||||||
|
0, /*tp_as_number*/
|
||||||
|
0, /*tp_as_sequence*/
|
||||||
|
0, /*tp_as_mapping*/
|
||||||
|
0, /*tp_hash */
|
||||||
|
0, /*tp_call*/
|
||||||
|
(reprfunc) BIND_str, /*tp_str*/
|
||||||
|
0, /*tp_getattro*/
|
||||||
|
0, /*tp_setattro*/
|
||||||
|
0, /*tp_as_buffer*/
|
||||||
|
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||||
|
"a marker for BIND in a bracketed string", /*tp_doc*/
|
||||||
|
0, /*tp_traverse */
|
||||||
|
0, /*tp_clear */
|
||||||
|
0, /*tp_richcompare */
|
||||||
|
0, /*tp_weaklistoffset */
|
||||||
|
0, /*tp_iter */
|
||||||
|
0, /*tp_iternext */
|
||||||
|
0, /*tp_methods */
|
||||||
|
0, /*tp_members */
|
||||||
|
0, /*tp_getset */
|
||||||
|
0, /*tp_base */
|
||||||
|
0, /*tp_dict */
|
||||||
|
0, /*tp_descr_get */
|
||||||
|
0, /*tp_descr_set */
|
||||||
|
0, /*tp_dictoffset */
|
||||||
|
0, /*tp_init */
|
||||||
|
BIND_alloc, /*tp_alloc */
|
||||||
|
0, /*tp_new */
|
||||||
|
};
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
Iter_fetch_token(IterObject* self)
|
Iter_fetch_token(IterObject* self)
|
||||||
{
|
{
|
||||||
@@ -1162,7 +1236,9 @@ Iter_fetch_token(IterObject* self)
|
|||||||
if (tp == NULL)
|
if (tp == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
PyObject* py_tok = PyString_FromString(tp->tok);
|
PyObject* py_tok =
|
||||||
|
(tp->tok != NULL) ? PyString_FromString(tp->tok)
|
||||||
|
: pgf_BINDType.tp_alloc(&pgf_BINDType, 0);
|
||||||
PyObject* py_cat = PyString_FromString(tp->cat);
|
PyObject* py_cat = PyString_FromString(tp->cat);
|
||||||
PyObject* py_fun = PyString_FromString(tp->fun);
|
PyObject* py_fun = PyString_FromString(tp->fun);
|
||||||
PyObject* res = Py_BuildValue("(f,O,O,O)", tp->prob, py_tok, py_cat, py_fun);
|
PyObject* res = Py_BuildValue("(f,O,O,O)", tp->prob, py_tok, py_cat, py_fun);
|
||||||
@@ -1599,16 +1675,18 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
|||||||
static IterObject*
|
static IterObject*
|
||||||
Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
|
Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||||
{
|
{
|
||||||
static char *kwlist[] = {"sentence", "cat", "prefix", "n", NULL};
|
static char *kwlist[] = {"sentence", "cat", "prefix", "n", NULL};
|
||||||
|
|
||||||
const char *sentence = NULL;
|
PyObject* sentence0 = NULL;
|
||||||
|
char* sentence = NULL;
|
||||||
PyObject* start = NULL;
|
PyObject* start = NULL;
|
||||||
GuString prefix = "";
|
GuString prefix = "";
|
||||||
int max_count = -1;
|
bool prefix_bind = false;
|
||||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|Osi", kwlist,
|
int max_count = -1;
|
||||||
&sentence, &start,
|
if (!PyArg_ParseTupleAndKeywords(args, keywds, "O|Osi", kwlist,
|
||||||
&prefix, &max_count))
|
&sentence0, &start,
|
||||||
return NULL;
|
&prefix, &max_count))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
IterObject* pyres = (IterObject*)
|
IterObject* pyres = (IterObject*)
|
||||||
pgf_IterType.tp_alloc(&pgf_IterType, 0);
|
pgf_IterType.tp_alloc(&pgf_IterType, 0);
|
||||||
@@ -1630,6 +1708,20 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
|
|||||||
|
|
||||||
GuExn* parse_err = gu_new_exn(tmp_pool);
|
GuExn* parse_err = gu_new_exn(tmp_pool);
|
||||||
|
|
||||||
|
if (PyTuple_Check(sentence0) &&
|
||||||
|
PyTuple_GET_SIZE(sentence0) == 2 &&
|
||||||
|
PyTuple_GET_ITEM(sentence0,1) == pgf_BINDType.tp_alloc(&pgf_BINDType, 0))
|
||||||
|
{
|
||||||
|
sentence0 = PyTuple_GET_ITEM(sentence0,0);
|
||||||
|
prefix_bind = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PyUnicode_Check(sentence0)) {
|
||||||
|
sentence = PyUnicode_AsUTF8(sentence0);
|
||||||
|
} else {
|
||||||
|
PyErr_SetString(PyExc_TypeError, "The sentence must be either a string or a tuple of string and pgf.BIND");
|
||||||
|
}
|
||||||
|
|
||||||
PgfType* type;
|
PgfType* type;
|
||||||
if (start == NULL) {
|
if (start == NULL) {
|
||||||
type = pgf_start_cat(self->grammar->pgf, pyres->pool);
|
type = pgf_start_cat(self->grammar->pgf, pyres->pool);
|
||||||
@@ -1642,7 +1734,7 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
|
|||||||
}
|
}
|
||||||
|
|
||||||
pyres->res =
|
pyres->res =
|
||||||
pgf_complete(self->concr, type, sentence, prefix, parse_err, pyres->pool);
|
pgf_complete(self->concr, type, sentence, prefix, prefix_bind, parse_err, pyres->pool);
|
||||||
|
|
||||||
if (!gu_ok(parse_err)) {
|
if (!gu_ok(parse_err)) {
|
||||||
Py_DECREF(pyres);
|
Py_DECREF(pyres);
|
||||||
@@ -2077,58 +2169,6 @@ static PyTypeObject pgf_BracketType = {
|
|||||||
0, /*tp_new */
|
0, /*tp_new */
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
PyObject_HEAD
|
|
||||||
} BINDObject;
|
|
||||||
|
|
||||||
static PyObject *
|
|
||||||
BIND_repr(BINDObject *self)
|
|
||||||
{
|
|
||||||
return PyString_FromString("&+");
|
|
||||||
}
|
|
||||||
|
|
||||||
static PyTypeObject pgf_BINDType = {
|
|
||||||
PyVarObject_HEAD_INIT(NULL, 0)
|
|
||||||
//0, /*ob_size*/
|
|
||||||
"pgf.BIND", /*tp_name*/
|
|
||||||
sizeof(BINDObject), /*tp_basicsize*/
|
|
||||||
0, /*tp_itemsize*/
|
|
||||||
0, /*tp_dealloc*/
|
|
||||||
0, /*tp_print*/
|
|
||||||
0, /*tp_getattr*/
|
|
||||||
0, /*tp_setattr*/
|
|
||||||
0, /*tp_compare*/
|
|
||||||
0, /*tp_repr*/
|
|
||||||
0, /*tp_as_number*/
|
|
||||||
0, /*tp_as_sequence*/
|
|
||||||
0, /*tp_as_mapping*/
|
|
||||||
0, /*tp_hash */
|
|
||||||
0, /*tp_call*/
|
|
||||||
(reprfunc) BIND_repr, /*tp_str*/
|
|
||||||
0, /*tp_getattro*/
|
|
||||||
0, /*tp_setattro*/
|
|
||||||
0, /*tp_as_buffer*/
|
|
||||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
|
||||||
"a marker for BIND in a bracketed string", /*tp_doc*/
|
|
||||||
0, /*tp_traverse */
|
|
||||||
0, /*tp_clear */
|
|
||||||
0, /*tp_richcompare */
|
|
||||||
0, /*tp_weaklistoffset */
|
|
||||||
0, /*tp_iter */
|
|
||||||
0, /*tp_iternext */
|
|
||||||
0, /*tp_methods */
|
|
||||||
0, /*tp_members */
|
|
||||||
0, /*tp_getset */
|
|
||||||
0, /*tp_base */
|
|
||||||
0, /*tp_dict */
|
|
||||||
0, /*tp_descr_get */
|
|
||||||
0, /*tp_descr_set */
|
|
||||||
0, /*tp_dictoffset */
|
|
||||||
0, /*tp_init */
|
|
||||||
0, /*tp_alloc */
|
|
||||||
0, /*tp_new */
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PgfLinFuncs* funcs;
|
PgfLinFuncs* funcs;
|
||||||
GuBuf* stack;
|
GuBuf* stack;
|
||||||
@@ -2726,6 +2766,11 @@ static PyMethodDef Concr_methods[] = {
|
|||||||
},
|
},
|
||||||
{"complete", (PyCFunction)Concr_complete, METH_VARARGS | METH_KEYWORDS,
|
{"complete", (PyCFunction)Concr_complete, METH_VARARGS | METH_KEYWORDS,
|
||||||
"Parses a partial string and returns a list with the top n possible next tokens"
|
"Parses a partial string and returns a list with the top n possible next tokens"
|
||||||
|
"Named arguments:\n"
|
||||||
|
"- sentence (string or a (string,pgf.BIND) tuple. The later indicates that the sentence ends with a BIND token)\n"
|
||||||
|
"- cat (string); OPTIONAL, default: the startcat of the grammar\n"
|
||||||
|
"- prefix (string); OPTIONAL, the prefix of predicted tokens"
|
||||||
|
"- n (int), max. number of predicted tokens"
|
||||||
},
|
},
|
||||||
{"parseval", (PyCFunction)Concr_parseval, METH_VARARGS,
|
{"parseval", (PyCFunction)Concr_parseval, METH_VARARGS,
|
||||||
"Computes precision, recall and exact match for the parser on a given abstract tree"
|
"Computes precision, recall and exact match for the parser on a given abstract tree"
|
||||||
@@ -3670,7 +3715,7 @@ MOD_INIT(pgf)
|
|||||||
PyModule_AddObject(m, "Bracket", (PyObject *) &pgf_BracketType);
|
PyModule_AddObject(m, "Bracket", (PyObject *) &pgf_BracketType);
|
||||||
Py_INCREF(&pgf_BracketType);
|
Py_INCREF(&pgf_BracketType);
|
||||||
|
|
||||||
PyModule_AddObject(m, "BIND", (PyObject *) &pgf_BINDType);
|
PyModule_AddObject(m, "BIND", pgf_BINDType.tp_alloc(&pgf_BINDType, 0));
|
||||||
Py_INCREF(&pgf_BINDType);
|
Py_INCREF(&pgf_BINDType);
|
||||||
|
|
||||||
return MOD_SUCCESS_VAL(m);
|
return MOD_SUCCESS_VAL(m);
|
||||||
|
|||||||
Reference in New Issue
Block a user