mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-10 05:29:30 -06:00
a major redesign in the C runtime. The parser and the linearizer now fully support BIND. The following things are still broken: parseval, word completion, handling 'pre', the robust mode
This commit is contained in:
@@ -1046,48 +1046,6 @@ Concr_printName(ConcrObject* self, PyObject *args)
|
||||
return PyString_FromString(pgf_print_name(self->concr, name));
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
PgfLexer base;
|
||||
PyObject* pylexer;
|
||||
GuPool* pool;
|
||||
} PgfPythonLexer;
|
||||
|
||||
GU_DEFINE_TYPE(PyPgfLexerExn, abstract, _);
|
||||
|
||||
static PgfToken
|
||||
pypgf_python_lexer_read_token(PgfLexer *base, GuExn* err)
|
||||
{
|
||||
PgfPythonLexer* lexer = (PgfPythonLexer*) base;
|
||||
lexer->base.tok = "";
|
||||
|
||||
PyObject* item = PyIter_Next(lexer->pylexer);
|
||||
if (item == NULL)
|
||||
if (PyErr_Occurred() != NULL)
|
||||
gu_raise(err, PyPgfLexerExn);
|
||||
else
|
||||
gu_raise(err, GuEOF);
|
||||
else {
|
||||
const char* str = PyString_AsString(item);
|
||||
if (str == NULL)
|
||||
gu_raise(err, PyPgfLexerExn);
|
||||
else
|
||||
lexer->base.tok = gu_string_copy(str, lexer->pool);
|
||||
}
|
||||
|
||||
return lexer->base.tok;
|
||||
}
|
||||
|
||||
static PgfLexer*
|
||||
pypgf_new_python_lexer(PyObject* pylexer, GuPool* pool)
|
||||
{
|
||||
PgfPythonLexer* lexer = gu_new(PgfPythonLexer, pool);
|
||||
lexer->base.read_token = pypgf_python_lexer_read_token;
|
||||
lexer->base.tok = "";
|
||||
lexer->pylexer = pylexer;
|
||||
lexer->pool = pool;
|
||||
return ((PgfLexer*) lexer);
|
||||
}
|
||||
|
||||
#if ( (PY_VERSION_HEX < 0x02070000) \
|
||||
|| ((PY_VERSION_HEX >= 0x03000000) \
|
||||
&& (PY_VERSION_HEX < 0x03010000)) )
|
||||
@@ -1114,35 +1072,19 @@ void pypgf_container_descructor(PyObject *capsule)
|
||||
static IterObject*
|
||||
Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
{
|
||||
static char *kwlist[] = {"sentence", "tokens", "cat", "n", "heuristics", NULL};
|
||||
static char *kwlist[] = {"sentence", "cat", "n", "heuristics", NULL};
|
||||
|
||||
int len;
|
||||
const uint8_t *buf = NULL;
|
||||
PyObject* py_lexer = NULL;
|
||||
const char *sentence = NULL;
|
||||
PgfCId catname = pgf_start_cat(self->grammar->pgf);
|
||||
int max_count = -1;
|
||||
double heuristics = -1;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#Osid", kwlist,
|
||||
&buf, &len, &py_lexer, &catname, &max_count, &heuristics))
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|sid", kwlist,
|
||||
&sentence, &catname, &max_count, &heuristics))
|
||||
return NULL;
|
||||
|
||||
if ((buf == NULL && py_lexer == NULL) ||
|
||||
(buf != NULL && py_lexer != NULL)) {
|
||||
PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (py_lexer != NULL) {
|
||||
// get an iterator out of the iterable object
|
||||
py_lexer = PyObject_GetIter(py_lexer);
|
||||
if (py_lexer == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
IterObject* pyres = (IterObject*)
|
||||
pgf_IterType.tp_alloc(&pgf_IterType, 0);
|
||||
if (pyres == NULL) {
|
||||
Py_XDECREF(py_lexer);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1160,30 +1102,22 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
pyres->counter = 0;
|
||||
pyres->fetch = Iter_fetch_expr;
|
||||
|
||||
PgfLexer *lexer = NULL;
|
||||
if (buf != NULL) {
|
||||
GuIn* in = gu_data_in(buf, len, pyres->pool);
|
||||
lexer = pgf_new_simple_lexer(in, pyres->pool);
|
||||
}
|
||||
if (py_lexer != NULL) {
|
||||
lexer = pypgf_new_python_lexer(py_lexer, pyres->pool);
|
||||
}
|
||||
GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), pyres->pool);
|
||||
|
||||
pyres->res =
|
||||
pgf_parse_with_heuristics(self->concr, catname, lexer,
|
||||
heuristics, pyres->pool, out_pool);
|
||||
pgf_parse_with_heuristics(self->concr, catname, sentence,
|
||||
heuristics, parse_err,
|
||||
pyres->pool, out_pool);
|
||||
|
||||
if (pyres->res == NULL) {
|
||||
PgfToken tok =
|
||||
pgf_lexer_current_token(lexer);
|
||||
|
||||
if (*tok == 0)
|
||||
PyErr_SetString(PGFError, "The sentence cannot be parsed");
|
||||
else {
|
||||
if (!gu_ok(parse_err)) {
|
||||
if (gu_exn_caught(parse_err) == gu_type(PgfExn)) {
|
||||
GuString msg = (GuString) gu_exn_caught_data(parse_err);
|
||||
PyErr_SetString(PGFError, msg);
|
||||
} else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) {
|
||||
GuString tok = (GuString) gu_exn_caught_data(parse_err);
|
||||
PyObject* py_tok = PyString_FromString(tok);
|
||||
PyObject_SetAttrString(ParseError, "token", py_tok);
|
||||
PyErr_Format(ParseError, "Unexpected token: \"%s\"",
|
||||
PyString_AsString(py_tok));
|
||||
PyErr_Format(ParseError, "Unexpected token: \"%s\"", tok);
|
||||
Py_DECREF(py_tok);
|
||||
}
|
||||
|
||||
@@ -1191,45 +1125,26 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
pyres = NULL;
|
||||
}
|
||||
|
||||
Py_XDECREF(py_lexer);
|
||||
|
||||
return pyres;
|
||||
}
|
||||
|
||||
static IterObject*
|
||||
Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
{
|
||||
static char *kwlist[] = {"sentence", "tokens", "cat",
|
||||
"prefix", "n", NULL};
|
||||
static char *kwlist[] = {"sentence", "cat", "prefix", "n", NULL};
|
||||
|
||||
int len;
|
||||
const uint8_t *buf = NULL;
|
||||
PyObject* py_lexer = NULL;
|
||||
const char *sentence = NULL;
|
||||
GuString catname = pgf_start_cat(self->grammar->pgf);
|
||||
GuString prefix = "";
|
||||
int max_count = -1;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#Ossi", kwlist,
|
||||
&buf, &len, &py_lexer, &catname,
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|ssi", kwlist,
|
||||
&sentence, &catname,
|
||||
&prefix, &max_count))
|
||||
return NULL;
|
||||
|
||||
if ((buf == NULL && py_lexer == NULL) ||
|
||||
(buf != NULL && py_lexer != NULL)) {
|
||||
PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (py_lexer != NULL) {
|
||||
// get an iterator out of the iterable object
|
||||
py_lexer = PyObject_GetIter(py_lexer);
|
||||
if (py_lexer == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
IterObject* pyres = (IterObject*)
|
||||
pgf_IterType.tp_alloc(&pgf_IterType, 0);
|
||||
if (pyres == NULL) {
|
||||
Py_XDECREF(py_lexer);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1245,37 +1160,27 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
|
||||
GuPool *tmp_pool = gu_local_pool();
|
||||
|
||||
PgfLexer *lexer = NULL;
|
||||
if (buf != NULL) {
|
||||
GuIn* in = gu_data_in(buf, len, tmp_pool);
|
||||
lexer = pgf_new_simple_lexer(in, tmp_pool);
|
||||
}
|
||||
if (py_lexer != NULL) {
|
||||
lexer = pypgf_new_python_lexer(py_lexer, tmp_pool);
|
||||
}
|
||||
|
||||
GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
|
||||
|
||||
pyres->res =
|
||||
pgf_complete(self->concr, catname, lexer, prefix, pyres->pool);
|
||||
pgf_complete(self->concr, catname, sentence, prefix, parse_err, pyres->pool);
|
||||
|
||||
if (pyres->res == NULL) {
|
||||
if (!gu_ok(parse_err)) {
|
||||
Py_DECREF(pyres);
|
||||
pyres = NULL;
|
||||
|
||||
PgfToken tok =
|
||||
pgf_lexer_current_token(lexer);
|
||||
|
||||
if (*tok == 0)
|
||||
PyErr_SetString(PGFError, "The sentence cannot be parsed");
|
||||
else {
|
||||
if (gu_exn_caught(parse_err) == gu_type(PgfExn)) {
|
||||
GuString msg = (GuString) gu_exn_caught_data(parse_err);
|
||||
PyErr_SetString(PGFError, msg);
|
||||
} else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) {
|
||||
GuString tok = (GuString) gu_exn_caught_data(parse_err);
|
||||
PyObject* py_tok = PyString_FromString(tok);
|
||||
PyObject_SetAttrString(ParseError, "token", py_tok);
|
||||
PyErr_Format(ParseError, "Unexpected token: \"%s\"",
|
||||
PyString_AsString(py_tok));
|
||||
PyErr_Format(ParseError, "Unexpected token: \"%s\"", tok);
|
||||
Py_DECREF(py_tok);
|
||||
}
|
||||
}
|
||||
|
||||
Py_XDECREF(py_lexer);
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
return pyres;
|
||||
@@ -1671,56 +1576,21 @@ pypgf_collect_morpho(PgfMorphoCallback* self,
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
Concr_lookupMorpho(ConcrObject* self, PyObject *args, PyObject *keywds) {
|
||||
static char *kwlist[] = {"sentence", "tokens", NULL};
|
||||
|
||||
int len;
|
||||
const uint8_t *buf = NULL;
|
||||
PyObject* py_lexer = NULL;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#O", kwlist,
|
||||
&buf, &len, &py_lexer))
|
||||
Concr_lookupMorpho(ConcrObject* self, PyObject *args) {
|
||||
GuString sent;
|
||||
if (!PyArg_ParseTuple(args, "s", &sent))
|
||||
return NULL;
|
||||
|
||||
if ((buf == NULL && py_lexer == NULL) ||
|
||||
(buf != NULL && py_lexer != NULL)) {
|
||||
PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
|
||||
PgfLexer *lexer = NULL;
|
||||
if (buf != NULL) {
|
||||
GuIn* in = gu_data_in(buf, len, tmp_pool);
|
||||
lexer = pgf_new_simple_lexer(in, tmp_pool);
|
||||
}
|
||||
if (py_lexer != NULL) {
|
||||
// get an iterator out of the iterable object
|
||||
py_lexer = PyObject_GetIter(py_lexer);
|
||||
if (py_lexer == NULL) {
|
||||
gu_pool_free(tmp_pool);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lexer = pypgf_new_python_lexer(py_lexer, tmp_pool);
|
||||
}
|
||||
|
||||
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
|
||||
GuPool *tmp_pool = gu_local_pool();
|
||||
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
|
||||
|
||||
PyObject* analyses = PyList_New(0);
|
||||
|
||||
PyMorphoCallback callback = { { pypgf_collect_morpho }, analyses };
|
||||
pgf_lookup_morpho(self->concr, lexer, &callback.fn, err);
|
||||
|
||||
Py_XDECREF(py_lexer);
|
||||
pgf_lookup_morpho(self->concr, sent, &callback.fn, err);
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
if (!gu_ok(err)) {
|
||||
Py_DECREF(analyses);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return analyses;
|
||||
}
|
||||
|
||||
@@ -1833,7 +1703,7 @@ static PyMethodDef Concr_methods[] = {
|
||||
{"graphvizParseTree", (PyCFunction)Concr_graphvizParseTree, METH_VARARGS,
|
||||
"Renders an abstract syntax tree as a parse tree in Graphviz format"
|
||||
},
|
||||
{"lookupMorpho", (PyCFunction)Concr_lookupMorpho, METH_VARARGS | METH_KEYWORDS,
|
||||
{"lookupMorpho", (PyCFunction)Concr_lookupMorpho, METH_VARARGS,
|
||||
"Looks up a word in the lexicon of the grammar"
|
||||
},
|
||||
{"fullFormLexicon", (PyCFunction)Concr_fullFormLexicon, METH_VARARGS,
|
||||
|
||||
Reference in New Issue
Block a user