1
0
forked from GitHub/gf-core

a major redesign in the C runtime. The parser and the linearizer now fully support BIND. The following things are still broken: parseval, word completion, handling 'pre', the robust mode

This commit is contained in:
kr.angelov
2013-10-09 12:08:51 +00:00
parent 20e4970ec1
commit 8cf03bc5b6
20 changed files with 1220 additions and 1763 deletions

View File

@@ -1046,48 +1046,6 @@ Concr_printName(ConcrObject* self, PyObject *args)
return PyString_FromString(pgf_print_name(self->concr, name));
}
typedef struct {
PgfLexer base;
PyObject* pylexer;
GuPool* pool;
} PgfPythonLexer;
GU_DEFINE_TYPE(PyPgfLexerExn, abstract, _);
static PgfToken
pypgf_python_lexer_read_token(PgfLexer *base, GuExn* err)
{
PgfPythonLexer* lexer = (PgfPythonLexer*) base;
lexer->base.tok = "";
PyObject* item = PyIter_Next(lexer->pylexer);
if (item == NULL)
if (PyErr_Occurred() != NULL)
gu_raise(err, PyPgfLexerExn);
else
gu_raise(err, GuEOF);
else {
const char* str = PyString_AsString(item);
if (str == NULL)
gu_raise(err, PyPgfLexerExn);
else
lexer->base.tok = gu_string_copy(str, lexer->pool);
}
return lexer->base.tok;
}
static PgfLexer*
pypgf_new_python_lexer(PyObject* pylexer, GuPool* pool)
{
PgfPythonLexer* lexer = gu_new(PgfPythonLexer, pool);
lexer->base.read_token = pypgf_python_lexer_read_token;
lexer->base.tok = "";
lexer->pylexer = pylexer;
lexer->pool = pool;
return ((PgfLexer*) lexer);
}
#if ( (PY_VERSION_HEX < 0x02070000) \
|| ((PY_VERSION_HEX >= 0x03000000) \
&& (PY_VERSION_HEX < 0x03010000)) )
@@ -1114,35 +1072,19 @@ void pypgf_container_descructor(PyObject *capsule)
static IterObject*
Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
{
static char *kwlist[] = {"sentence", "tokens", "cat", "n", "heuristics", NULL};
static char *kwlist[] = {"sentence", "cat", "n", "heuristics", NULL};
int len;
const uint8_t *buf = NULL;
PyObject* py_lexer = NULL;
const char *sentence = NULL;
PgfCId catname = pgf_start_cat(self->grammar->pgf);
int max_count = -1;
double heuristics = -1;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#Osid", kwlist,
&buf, &len, &py_lexer, &catname, &max_count, &heuristics))
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|sid", kwlist,
&sentence, &catname, &max_count, &heuristics))
return NULL;
if ((buf == NULL && py_lexer == NULL) ||
(buf != NULL && py_lexer != NULL)) {
PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
return NULL;
}
if (py_lexer != NULL) {
// get an iterator out of the iterable object
py_lexer = PyObject_GetIter(py_lexer);
if (py_lexer == NULL)
return NULL;
}
IterObject* pyres = (IterObject*)
pgf_IterType.tp_alloc(&pgf_IterType, 0);
if (pyres == NULL) {
Py_XDECREF(py_lexer);
return NULL;
}
@@ -1160,30 +1102,22 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
pyres->counter = 0;
pyres->fetch = Iter_fetch_expr;
PgfLexer *lexer = NULL;
if (buf != NULL) {
GuIn* in = gu_data_in(buf, len, pyres->pool);
lexer = pgf_new_simple_lexer(in, pyres->pool);
}
if (py_lexer != NULL) {
lexer = pypgf_new_python_lexer(py_lexer, pyres->pool);
}
GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), pyres->pool);
pyres->res =
pgf_parse_with_heuristics(self->concr, catname, lexer,
heuristics, pyres->pool, out_pool);
pgf_parse_with_heuristics(self->concr, catname, sentence,
heuristics, parse_err,
pyres->pool, out_pool);
if (pyres->res == NULL) {
PgfToken tok =
pgf_lexer_current_token(lexer);
if (*tok == 0)
PyErr_SetString(PGFError, "The sentence cannot be parsed");
else {
if (!gu_ok(parse_err)) {
if (gu_exn_caught(parse_err) == gu_type(PgfExn)) {
GuString msg = (GuString) gu_exn_caught_data(parse_err);
PyErr_SetString(PGFError, msg);
} else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) {
GuString tok = (GuString) gu_exn_caught_data(parse_err);
PyObject* py_tok = PyString_FromString(tok);
PyObject_SetAttrString(ParseError, "token", py_tok);
PyErr_Format(ParseError, "Unexpected token: \"%s\"",
PyString_AsString(py_tok));
PyErr_Format(ParseError, "Unexpected token: \"%s\"", tok);
Py_DECREF(py_tok);
}
@@ -1191,45 +1125,26 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
pyres = NULL;
}
Py_XDECREF(py_lexer);
return pyres;
}
static IterObject*
Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
{
static char *kwlist[] = {"sentence", "tokens", "cat",
"prefix", "n", NULL};
static char *kwlist[] = {"sentence", "cat", "prefix", "n", NULL};
int len;
const uint8_t *buf = NULL;
PyObject* py_lexer = NULL;
const char *sentence = NULL;
GuString catname = pgf_start_cat(self->grammar->pgf);
GuString prefix = "";
int max_count = -1;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#Ossi", kwlist,
&buf, &len, &py_lexer, &catname,
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|ssi", kwlist,
&sentence, &catname,
&prefix, &max_count))
return NULL;
if ((buf == NULL && py_lexer == NULL) ||
(buf != NULL && py_lexer != NULL)) {
PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
return NULL;
}
if (py_lexer != NULL) {
// get an iterator out of the iterable object
py_lexer = PyObject_GetIter(py_lexer);
if (py_lexer == NULL)
return NULL;
}
IterObject* pyres = (IterObject*)
pgf_IterType.tp_alloc(&pgf_IterType, 0);
if (pyres == NULL) {
Py_XDECREF(py_lexer);
return NULL;
}
@@ -1245,37 +1160,27 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
GuPool *tmp_pool = gu_local_pool();
PgfLexer *lexer = NULL;
if (buf != NULL) {
GuIn* in = gu_data_in(buf, len, tmp_pool);
lexer = pgf_new_simple_lexer(in, tmp_pool);
}
if (py_lexer != NULL) {
lexer = pypgf_new_python_lexer(py_lexer, tmp_pool);
}
GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
pyres->res =
pgf_complete(self->concr, catname, lexer, prefix, pyres->pool);
pgf_complete(self->concr, catname, sentence, prefix, parse_err, pyres->pool);
if (pyres->res == NULL) {
if (!gu_ok(parse_err)) {
Py_DECREF(pyres);
pyres = NULL;
PgfToken tok =
pgf_lexer_current_token(lexer);
if (*tok == 0)
PyErr_SetString(PGFError, "The sentence cannot be parsed");
else {
if (gu_exn_caught(parse_err) == gu_type(PgfExn)) {
GuString msg = (GuString) gu_exn_caught_data(parse_err);
PyErr_SetString(PGFError, msg);
} else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) {
GuString tok = (GuString) gu_exn_caught_data(parse_err);
PyObject* py_tok = PyString_FromString(tok);
PyObject_SetAttrString(ParseError, "token", py_tok);
PyErr_Format(ParseError, "Unexpected token: \"%s\"",
PyString_AsString(py_tok));
PyErr_Format(ParseError, "Unexpected token: \"%s\"", tok);
Py_DECREF(py_tok);
}
}
Py_XDECREF(py_lexer);
gu_pool_free(tmp_pool);
return pyres;
@@ -1671,56 +1576,21 @@ pypgf_collect_morpho(PgfMorphoCallback* self,
}
static PyObject*
Concr_lookupMorpho(ConcrObject* self, PyObject *args, PyObject *keywds) {
static char *kwlist[] = {"sentence", "tokens", NULL};
int len;
const uint8_t *buf = NULL;
PyObject* py_lexer = NULL;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#O", kwlist,
&buf, &len, &py_lexer))
Concr_lookupMorpho(ConcrObject* self, PyObject *args) {
GuString sent;
if (!PyArg_ParseTuple(args, "s", &sent))
return NULL;
if ((buf == NULL && py_lexer == NULL) ||
(buf != NULL && py_lexer != NULL)) {
PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
return NULL;
}
GuPool* tmp_pool = gu_local_pool();
PgfLexer *lexer = NULL;
if (buf != NULL) {
GuIn* in = gu_data_in(buf, len, tmp_pool);
lexer = pgf_new_simple_lexer(in, tmp_pool);
}
if (py_lexer != NULL) {
// get an iterator out of the iterable object
py_lexer = PyObject_GetIter(py_lexer);
if (py_lexer == NULL) {
gu_pool_free(tmp_pool);
return NULL;
}
lexer = pypgf_new_python_lexer(py_lexer, tmp_pool);
}
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
GuPool *tmp_pool = gu_local_pool();
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
PyObject* analyses = PyList_New(0);
PyMorphoCallback callback = { { pypgf_collect_morpho }, analyses };
pgf_lookup_morpho(self->concr, lexer, &callback.fn, err);
Py_XDECREF(py_lexer);
pgf_lookup_morpho(self->concr, sent, &callback.fn, err);
gu_pool_free(tmp_pool);
if (!gu_ok(err)) {
Py_DECREF(analyses);
return NULL;
}
return analyses;
}
@@ -1833,7 +1703,7 @@ static PyMethodDef Concr_methods[] = {
{"graphvizParseTree", (PyCFunction)Concr_graphvizParseTree, METH_VARARGS,
"Renders an abstract syntax tree as a parse tree in Graphviz format"
},
{"lookupMorpho", (PyCFunction)Concr_lookupMorpho, METH_VARARGS | METH_KEYWORDS,
{"lookupMorpho", (PyCFunction)Concr_lookupMorpho, METH_VARARGS,
"Looks up a word in the lexicon of the grammar"
},
{"fullFormLexicon", (PyCFunction)Concr_fullFormLexicon, METH_VARARGS,