From 2f51c8471c736c8d613783896ab41c540e4d5d21 Mon Sep 17 00:00:00 2001 From: "John J. Camilleri" Date: Thu, 9 Sep 2021 23:41:55 +0200 Subject: [PATCH] Fix conversion from PyUnicode to PgfText. Remove Python 2-style PyString macros. --- src/runtime/python/compat.h | 16 +++++----- src/runtime/python/expr.c | 14 ++++----- src/runtime/python/marshaller.c | 53 +++++++++++++++++++------------- src/runtime/python/pypgf.c | 6 ++-- src/runtime/python/setup.py | 32 ++++++++++--------- src/runtime/python/test_suite.py | 4 +-- 6 files changed, 69 insertions(+), 56 deletions(-) diff --git a/src/runtime/python/compat.h b/src/runtime/python/compat.h index 1c5483844..cc0ad2312 100644 --- a/src/runtime/python/compat.h +++ b/src/runtime/python/compat.h @@ -1,7 +1,7 @@ #ifndef PYPGF_COMPAT_H_ #define PYPGF_COMPAT_H_ -#if PY_MAJOR_VERSION >= 3 +// #if PY_MAJOR_VERSION >= 3 // #define PyIntObject PyLongObject // #define PyInt_Type PyLong_Type // #define PyInt_Check(op) PyLong_Check(op) @@ -17,13 +17,13 @@ // #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask // #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask - #define PyStringObject PyUnicodeObject - #define PyString_Check PyUnicode_Check - #define PyString_FromStringAndSize PyUnicode_FromStringAndSize - #define PyString_FromFormat PyUnicode_FromFormat + // #define PyStringObject PyUnicodeObject + // #define PyString_Check PyUnicode_Check + // #define PyString_FromStringAndSize PyUnicode_FromStringAndSize + // #define PyString_FromFormat PyUnicode_FromFormat // #define PyString_Concat(ps,s) {PyObject* tmp = *(ps); *(ps) = PyUnicode_Concat(tmp,s); Py_DECREF(tmp);} - #define PyString_Concat PyUnicode_Concat - #define PyString_Compare PyUnicode_Compare -#endif + // #define PyString_Concat PyUnicode_Concat + // #define PyString_Compare PyUnicode_Compare +// #endif #endif // PYPGF_COMPAT_H_ diff --git a/src/runtime/python/expr.c b/src/runtime/python/expr.c index 02a3ee325..c96a4af52 100644 --- a/src/runtime/python/expr.c +++ b/src/runtime/python/expr.c @@ -10,7 +10,7 @@ static PyObject * Expr_str(ExprObject *self) { PgfText *s = pgf_print_expr((PgfExpr) self, NULL, 1, &marshaller); - PyObject *str = PyString_FromStringAndSize(s->text, s->size); + PyObject *str = PyUnicode_FromStringAndSize(s->text, s->size); free(s); return str; } @@ -138,9 +138,9 @@ ExprLit_richcompare(ExprLitObject *t1, ExprLitObject *t2, int op) } else if (PyFloat_Check(t1->value)) { if (!PyFloat_Check(t2->value)) goto done; if (PyFloat_AsDouble(t1->value) != PyFloat_AsDouble(t2->value)) goto done; - } else if (PyString_Check(t1->value)) { - if (!PyString_Check(t2->value)) goto done; - if (PyString_Compare(t1->value, t2->value) != 0) goto done; + } else if (PyUnicode_Check(t1->value)) { + if (!PyUnicode_Check(t2->value)) goto done; + if (PyUnicode_Compare(t1->value, t2->value) != 0) goto done; } else { PyErr_SetString(PyExc_TypeError, "unknown literal type"); return NULL; @@ -208,7 +208,7 @@ static PyObject * Type_str(TypeObject *self) { PgfText *s = pgf_print_type((PgfType) self, NULL, 1, &marshaller); - PyObject *str = PyString_FromStringAndSize(s->text, s->size); + PyObject *str = PyUnicode_FromStringAndSize(s->text, s->size); free(s); return str; } @@ -217,14 +217,14 @@ static PyObject * Type_richcompare(TypeObject *t1, TypeObject *t2, int op) { bool same = false; - if (PyString_Compare(t1->cat, t2->cat) != 0) goto done; + if (PyUnicode_Compare(t1->cat, t2->cat) != 0) goto done; if (PyList_Size(t1->hypos) != PyList_Size(t2->hypos)) goto done; for (Py_ssize_t n = 0; n < PyList_Size(t1->hypos); n++) { PyObject *h1 = PyList_GetItem(t1->hypos, n); PyObject *h2 = PyList_GetItem(t2->hypos, n); if (PyTuple_GetItem(h1, 0) != PyTuple_GetItem(h2, 0)) goto done; - if (PyString_Compare(PyTuple_GetItem(h1, 1), PyTuple_GetItem(h2, 1)) != 0) goto done; + if (PyUnicode_Compare(PyTuple_GetItem(h1, 1), PyTuple_GetItem(h2, 1)) != 0) goto done; TypeObject *ht1 = (TypeObject *)PyTuple_GetItem(h1, 2); TypeObject *ht2 = (TypeObject *)PyTuple_GetItem(h2, 2); if (Type_richcompare(ht1, ht2, Py_EQ) != Py_True) goto done; diff --git a/src/runtime/python/marshaller.c b/src/runtime/python/marshaller.c index 13425574f..52806ec4b 100644 --- a/src/runtime/python/marshaller.c +++ b/src/runtime/python/marshaller.c @@ -71,11 +71,11 @@ PgfLiteral lint(PgfUnmarshaller *this, size_t size, uintmax_t *v) return (PgfLiteral) 0; } else if (size > 1) { // TODO: string concatenation works but probably not optimal - PyObject *sb = PyString_FromFormat("%ld", *v0); + PyObject *sb = PyUnicode_FromFormat("%ld", *v0); for (size_t n = 1; n < size; n++) { uintmax_t *vn = v + n; - PyObject *t = PyString_FromFormat("%lu", *vn); - sb = PyString_Concat(sb, t); + PyObject *t = PyUnicode_FromFormat("%lu", *vn); + sb = PyUnicode_Concat(sb, t); } PyObject *i = PyLong_FromUnicodeObject(sb, 10); return (PgfLiteral) i; @@ -93,7 +93,7 @@ PgfLiteral lflt(PgfUnmarshaller *this, double v) PgfLiteral lstr(PgfUnmarshaller *this, PgfText *v) { - PyObject *s = PyString_FromStringAndSize(v->text, v->size); + PyObject *s = PyUnicode_FromStringAndSize(v->text, v->size); return (PgfLiteral) s; } @@ -106,13 +106,13 @@ PgfType dtyp(PgfUnmarshaller *this, int n_hypos, PgfTypeHypo *hypos, PgfText *ca PgfTypeHypo *hypo = hypos + i; PyObject *tup = PyTuple_New(3); PyTuple_SetItem(tup, 0, PyLong_FromLong(hypo->bind_type == PGF_BIND_TYPE_EXPLICIT ? 0 : 1)); // TODO - PyTuple_SetItem(tup, 1, PyString_FromStringAndSize(hypo->cid->text, hypo->cid->size)); + PyTuple_SetItem(tup, 1, PyUnicode_FromStringAndSize(hypo->cid->text, hypo->cid->size)); PyTuple_SetItem(tup, 2, (PyObject *)hypo->type); Py_INCREF(hypo->type); PyList_Append(pytype->hypos, tup); } - pytype->cat = PyString_FromStringAndSize(cat->text, cat->size); + pytype->cat = PyUnicode_FromStringAndSize(cat->text, cat->size); pytype->exprs = PyList_New(0); for (int i = 0; i < n_exprs; i++) { @@ -151,15 +151,20 @@ PgfUnmarshaller unmarshaller = { &unmarshallerVtbl }; // ---------------------------------------------------------------------------- static PgfText * -PyString_AsPgfText(PyObject *pystr) +PyUnicode_AsPgfText(PyObject *pystr) { - if (!PyString_Check(pystr)) { - PyErr_SetString(PyExc_TypeError, "input to PyString_AsPgfText is not a string"); + if (!PyUnicode_Check(pystr)) { + PyErr_SetString(PyExc_TypeError, "input to PyUnicode_AsPgfText is not a string"); return NULL; } - size_t size = PyUnicode_GetLength(pystr); + if (PyUnicode_READY(pystr) != 0) { + return NULL; + } + + Py_ssize_t size; + const char * enc = PyUnicode_AsUTF8AndSize(pystr, &size); PgfText *ptext = (PgfText *)PyMem_Malloc(sizeof(PgfText)+size+1); - memcpy(ptext->text, pystr, size+1); + memcpy(ptext->text, enc, size+1); ptext->size = size; // Py_INCREF(ptext); return ptext; @@ -172,14 +177,15 @@ object match_lit(PgfMarshaller *this, PgfUnmarshaller *u, PgfLiteral lit) PyObject *pyobj = (PyObject *)lit; if (PyLong_Check(pyobj)) { + // TODO uintmax_t i = PyLong_AsUnsignedLong(pyobj); - size_t size = 1; // TODO + size_t size = 1; return u->vtbl->lint(u, size, &i); } else if (PyFloat_Check(pyobj)) { double d = PyFloat_AsDouble(pyobj); return u->vtbl->lflt(u, d); - } else if (PyString_Check(pyobj)) { - PgfText *t = PyString_AsPgfText(pyobj); + } else if (PyUnicode_Check(pyobj)) { + PgfText *t = PyUnicode_AsPgfText(pyobj); return u->vtbl->lstr(u, t); } else { PyErr_SetString(PyExc_TypeError, "unable to match on literal"); @@ -195,19 +201,24 @@ object match_expr(PgfMarshaller *this, PgfUnmarshaller *u, PgfExpr expr) object match_type(PgfMarshaller *this, PgfUnmarshaller *u, PgfType ty) { - // PySys_WriteStdout(">match_type<\n"); - TypeObject *type = (TypeObject *)ty; - // PySys_WriteStdout(">%s<\n", PyUnicode_AS_DATA(type->cat)); - int n_hypos = 0; //PyList_Size(type->hypos); PgfTypeHypo *hypos = NULL; // TODO - PgfText *cat = PyString_AsPgfText(type->cat); + PgfText *cat = PyUnicode_AsPgfText(type->cat); + if (cat == NULL) { + return 0; + } - int n_exprs = 0; //PyList_Size(type->exprs); - PgfExpr *exprs = NULL; // TODO + int n_exprs = PyList_Size(type->exprs); + PgfExpr *exprs; + if (n_exprs > 0) { + exprs = (PgfExpr *)PyList_GetItem(type->exprs, 0); + // TODO lay out others in memory in some way? + } else { + exprs = NULL; + } return u->vtbl->dtyp(u, n_hypos, hypos, cat, n_exprs, exprs); } diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c index 9df33d998..dec97e1a3 100644 --- a/src/runtime/python/pypgf.c +++ b/src/runtime/python/pypgf.c @@ -1790,7 +1790,7 @@ PGF_getAbstractName(PGFObject *self, void *closure) return NULL; } - PyObject *name = PyString_FromStringAndSize(txt->text, txt->size); + PyObject *name = PyUnicode_FromStringAndSize(txt->text, txt->size); free(txt); return name; } @@ -1865,7 +1865,7 @@ pgf_collect_cats(PgfItor* fn, PgfText* key, void* value, PgfExn *err) PgfText* name = key; PyPGFClosure* clo = (PyPGFClosure*) fn; - PyObject* py_name = PyString_FromStringAndSize(name->text, name->size); + PyObject* py_name = PyUnicode_FromStringAndSize(name->text, name->size); if (py_name == NULL) { err->type = PGF_EXN_OTHER_ERROR; err->msg = "unable to create string from category"; @@ -1920,7 +1920,7 @@ pgf_collect_funs(PgfItor* fn, PgfText* key, void* value, PgfExn *err) PgfText* name = key; PyPGFClosure* clo = (PyPGFClosure*) fn; - PyObject* py_name = PyString_FromStringAndSize(name->text, name->size); + PyObject* py_name = PyUnicode_FromStringAndSize(name->text, name->size); if (py_name == NULL) { err->type = PGF_EXN_OTHER_ERROR; err->msg = "unable to create string from function"; diff --git a/src/runtime/python/setup.py b/src/runtime/python/setup.py index 4a5b8f882..88ed419b8 100644 --- a/src/runtime/python/setup.py +++ b/src/runtime/python/setup.py @@ -8,23 +8,25 @@ libraries = os.getenv('EXTRA_LIB_DIRS','').split(':') if libraries==['']: libraries=[] -pgf_module = Extension('pgf', - sources = ['pypgf.c', 'marshaller.c', 'expr.c'], - extra_compile_args = ['-std=c99', '-Werror', '-Wno-error=int-conversion', '-Wno-error=unused-variable', '-Wno-comment'], - include_dirs = includes, - library_dirs = libraries, - libraries = ['pgf']) +pgf_module = Extension( + 'pgf', + sources = ['pypgf.c', 'marshaller.c', 'expr.c'], + extra_compile_args = ['-std=c99', '-Werror', '-Wno-error=int-conversion', '-Wno-error=unused-variable', '-Wno-comment'], + include_dirs = includes, + library_dirs = libraries, + libraries = ['pgf']) -setup (name = 'pgf', - version = '2.0', - description = 'Python bindings to the Grammatical Framework\'s PGF runtime', - long_description="""\ +setup( + name = 'pgf', + version = '2.0', + description = 'Python bindings to the Grammatical Framework\'s PGF runtime', + long_description="""\ Grammatical Framework (GF) is a programming language for multilingual grammar applications. This package provides Python bindings to GF runtime, which allows you to \ parse and generate text using GF grammars compiled into the PGF format. """, - url='https://www.grammaticalframework.org/', - author='Krasimir Angelov', - author_email='kr.angelov@gmail.com', - license='BSD', - ext_modules = [pgf_module]) + url='https://www.grammaticalframework.org/', + author='Krasimir Angelov', + author_email='kr.angelov@gmail.com', + license='BSD', + ext_modules = [pgf_module]) diff --git a/src/runtime/python/test_suite.py b/src/runtime/python/test_suite.py index 2a7a6fc17..11eb73728 100644 --- a/src/runtime/python/test_suite.py +++ b/src/runtime/python/test_suite.py @@ -105,8 +105,8 @@ def test_readType_inequality_1(): def test_readType_inequality_2(): assert pgf.readType("A -> B") != pgf.readType("B->B") -# def test_readType_str(): -# assert str(pgf.readType("A-> B")) == "A -> B" +def test_readType_str(): + assert str(pgf.readType("A-> BÄ")) == "A -> BÄ" def test_functionType_1(PGF): assert PGF.functionType("z") == pgf.readType("N")