From f050609101e25fdee6f884a0da43dafa8889772e Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Thu, 18 Apr 2013 13:37:09 +0000 Subject: [PATCH] added API for computing bracketed strings from Python and C --- src/runtime/c/pgf/data.h | 4 +- src/runtime/c/pgf/lexer.h | 1 + src/runtime/c/pgf/linearizer.c | 61 +++++++- src/runtime/c/pgf/linearizer.h | 35 +---- src/runtime/c/pgf/printer.c | 2 +- src/runtime/c/pgf/reader.c | 9 +- src/runtime/python/pypgf.c | 263 ++++++++++++++++++++++++++++++++- 7 files changed, 328 insertions(+), 47 deletions(-) diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index fe339a50b..d2e78c7e8 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -128,8 +128,6 @@ typedef struct { extern GU_DECLARE_TYPE(PgfCncCat, abstract); -typedef GuSeq PgfTokens; // -> PgfToken - bool pgf_tokens_equal(PgfTokens t1, PgfTokens t2); @@ -214,7 +212,7 @@ typedef GuSeq PgfSequence; // -> PgfSymbol typedef GuList(PgfSequence) PgfSequences; typedef struct { - PgfCId name; + PgfAbsFun* absfun; PgfExprProb *ep; int funid; GuLength n_lins; diff --git a/src/runtime/c/pgf/lexer.h b/src/runtime/c/pgf/lexer.h index f89629cea..270a7949b 100644 --- a/src/runtime/c/pgf/lexer.h +++ b/src/runtime/c/pgf/lexer.h @@ -5,6 +5,7 @@ /// A single lexical token typedef GuString PgfToken; +typedef GuSeq PgfTokens; // -> PgfToken typedef struct { PgfToken (*read_token)(); diff --git a/src/runtime/c/pgf/linearizer.c b/src/runtime/c/pgf/linearizer.c index 55249741c..144ef5154 100644 --- a/src/runtime/c/pgf/linearizer.c +++ b/src/runtime/c/pgf/linearizer.c @@ -64,12 +64,12 @@ pgf_lzr_index_itor(GuMapItor* fn, const void* key, void* value, GuExn* err) case PGF_PRODUCTION_APPLY: { PgfProductionApply* papply = data; PgfCncOverloadMap* overl_table = - gu_map_get(concr->fun_indices, &papply->fun->name, + gu_map_get(concr->fun_indices, &papply->fun->absfun->name, PgfCncOverloadMap*); if (!overl_table) { overl_table = gu_map_type_new(PgfCncOverloadMap, pool); gu_map_put(concr->fun_indices, - &papply->fun->name, PgfCncOverloadMap*, overl_table); + &papply->fun->absfun->name, PgfCncOverloadMap*, overl_table); } pgf_lzr_add_overl_entry(overl_table, ccat, papply, pool); break; @@ -98,6 +98,7 @@ struct PgfLzn { PgfConcr* concr; GuChoice* ch; PgfExpr expr; + int fid; GuEnum en; }; @@ -114,6 +115,7 @@ typedef enum { typedef struct { PgfCncFun* fun; + int fid; GuLength n_args; PgfCncTree args[]; } PgfCncTreeApp; @@ -124,6 +126,7 @@ typedef struct { } PgfCncTreeChunks; typedef struct { + int fid; PgfLiteral lit; } PgfCncTreeLit; @@ -176,6 +179,7 @@ static PgfCncTree pgf_lzn_resolve_app(PgfLzn* lzn, GuBuf* buf, GuBuf* args, GuPool* pool) { GuChoiceMark mark = gu_choice_mark(lzn->ch); + int save_fid = lzn->fid; redo:; int index = gu_choice_next(lzn->ch, gu_buf_length(buf)); @@ -195,6 +199,7 @@ redo:; PgfCncTreeApp, args, n_args, &ret, pool); capp->fun = papply->fun; + capp->fid = 0; capp->n_args = n_args; for (size_t i = 0; i < n_args; i++) { @@ -209,6 +214,7 @@ redo:; } else { int index = gu_choice_next(lzn->ch, gu_buf_length(coercions)); if (index < 0) { + lzn->fid = save_fid; gu_choice_reset(lzn->ch, mark); if (!gu_choice_advance(lzn->ch)) return gu_null_variant; @@ -223,13 +229,16 @@ redo:; capp->args[i] = pgf_lzn_resolve(lzn, earg, ccat, pool); if (gu_variant_is_null(capp->args[i])) { + lzn->fid = save_fid; gu_choice_reset(lzn->ch, mark); if (!gu_choice_advance(lzn->ch)) return gu_null_variant; goto redo; } } - + + capp->fid = lzn->fid++; + return ret; } @@ -243,6 +252,7 @@ pgf_lzn_resolve_def(PgfLzn* lzn, PgfCncFuns* lindefs, GuString s, GuPool* pool) gu_new_variant(PGF_CNC_TREE_LIT, PgfCncTreeLit, &lit, pool); + clit->fid = lzn->fid++; clit->lit = gu_new_variant_i(pool, PGF_LITERAL_STR, @@ -262,9 +272,10 @@ pgf_lzn_resolve_def(PgfLzn* lzn, PgfCncFuns* lindefs, GuString s, GuPool* pool) PgfCncTreeApp, args, 1, &ret, pool); capp->fun = gu_list_index(lindefs, index); + capp->fid = lzn->fid++; capp->n_args = 1; capp->args[0] = lit; - + return ret; } @@ -308,6 +319,7 @@ pgf_lzn_resolve(PgfLzn* lzn, PgfExpr expr, PgfCCat* ccat, GuPool* pool) gu_new_variant(PGF_CNC_TREE_LIT, PgfCncTreeLit, &ret, pool); + clit->fid = lzn->fid++; clit->lit = elit->lit; goto done; } @@ -413,6 +425,8 @@ pgf_cnc_tree_enum_next(GuEnum* self, void* to, GuPool* pool) return; } + lzn->fid = 0; + GuChoiceMark mark = gu_choice_mark(lzn->ch); *toc = pgf_lzn_resolve(lzn, lzn->expr, NULL, pool); gu_choice_reset(lzn->ch, mark); @@ -442,6 +456,7 @@ pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuPool* pool) PgfLzn* lzn = gu_new(PgfLzn, pool); lzn->concr = concr; lzn->expr = expr; + lzn->fid = 0; lzn->ch = gu_new_choice(pool); lzn->en.next = pgf_cnc_tree_enum_next; return &lzn->en; @@ -457,9 +472,14 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs case PGF_CNC_TREE_APP: { PgfCncTreeApp* fapp = cti.data; PgfCncFun* fun = fapp->fun; - if (fns->expr_apply) { - fns->expr_apply(fnsp, fun->name, fapp->n_args); + + if (fns->begin_phrase) { + fns->begin_phrase(fnsp, + fun->absfun->type->cid, + fapp->fid, lin_idx, + fun->absfun->name); } + gu_require(lin_idx < fun->n_lins); PgfSequence seq = fun->lins[lin_idx]; size_t nsyms = gu_seq_length(seq); @@ -473,6 +493,7 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs case PGF_SYMBOL_LIT: { PgfSymbolIdx* sidx = sym_i.data; gu_assert((unsigned) sidx->d < fapp->n_args); + PgfCncTree argf = fapp->args[sidx->d]; pgf_lzr_linearize(concr, argf, sidx->r, fnsp); break; @@ -497,6 +518,13 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs gu_impossible(); } } + + if (fns->end_phrase) { + fns->end_phrase(fnsp, + fun->absfun->type->cid, + fapp->fid, lin_idx, + fun->absfun->name); + } break; } case PGF_CNC_TREE_CHUNKS: { @@ -510,9 +538,26 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs case PGF_CNC_TREE_LIT: { gu_require(lin_idx == 0); PgfCncTreeLit* flit = cti.data; + + PgfCId cat = + pgf_literal_cat(concr, flit->lit)->cnccat->abscat->name; + + if (fns->begin_phrase) { + fns->begin_phrase(fnsp, + cat, flit->fid, 0, + gu_empty_string); + } + if (fns->expr_literal) { fns->expr_literal(fnsp, flit->lit); } + + if (fns->end_phrase) { + fns->end_phrase(fnsp, + cat, flit->fid, 0, + gu_empty_string); + } + break; } default: @@ -587,7 +632,9 @@ pgf_file_lzn_expr_literal(PgfLinFuncs** funcs, PgfLiteral lit) static PgfLinFuncs pgf_file_lin_funcs = { .symbol_tokens = pgf_file_lzn_symbol_tokens, - .expr_literal = pgf_file_lzn_expr_literal + .expr_literal = pgf_file_lzn_expr_literal, + .begin_phrase = NULL, + .end_phrase = NULL, }; void diff --git a/src/runtime/c/pgf/linearizer.h b/src/runtime/c/pgf/linearizer.h index ce7e483b6..72c972045 100644 --- a/src/runtime/c/pgf/linearizer.h +++ b/src/runtime/c/pgf/linearizer.h @@ -20,7 +20,6 @@ #include #include #include -#include /// Linearization of abstract syntax trees. /// @file @@ -48,7 +47,6 @@ typedef GuEnum PgfCncTreeEnum; PgfCncTreeEnum* pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuPool* pool); -/// Callback functions for linearization. typedef struct PgfLinFuncs PgfLinFuncs; struct PgfLinFuncs @@ -56,19 +54,15 @@ struct PgfLinFuncs /// Output tokens void (*symbol_tokens)(PgfLinFuncs** self, PgfTokens toks); - void (*symbol_expr)(PgfLinFuncs** self, - int argno, PgfExpr expr, int lin_idx); - - /// Begin application - void (*expr_apply)(PgfLinFuncs** self, PgfCId cid, int n_args); - /// Output literal void (*expr_literal)(PgfLinFuncs** self, PgfLiteral lit); - void (*abort)(PgfLinFuncs** self); - void (*finish)(PgfLinFuncs** self); -}; + /// Begin phrase + void (*begin_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, int lindex, PgfCId fun); + /// End phrase + void (*end_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, int lindex, PgfCId fun); +}; @@ -83,22 +77,3 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, void pgf_lzr_linearize_simple(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, GuWriter* wtr, GuExn* err); - - -/// Return the dimension of a concrete syntax tree. -int -pgf_cnc_tree_dimension(PgfCncTree ctree); -/**< - * @param ctree A concrete syntax tree. - * - * @return The dimension of the tree, i.e. the number of different - * linearizations the tree has. - */ - -//@} - - - -extern GuTypeTable -pgf_linearize_dump_table; - diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c index c94202ba9..1194ea85f 100644 --- a/src/runtime/c/pgf/printer.c +++ b/src/runtime/c/pgf/printer.c @@ -177,7 +177,7 @@ pgf_print_cncfun(PgfCncFun *cncfun, PgfSequences *sequences, } gu_puts(") [", wtr, err); - gu_string_write(cncfun->name, wtr, err); + gu_string_write(cncfun->absfun->name, wtr, err); gu_puts("]\n", wtr, err); } diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index 109e30895..80eb479d7 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -758,16 +758,15 @@ pgf_read_cncfun(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, int funid) size_t len = pgf_read_len(rdr); gu_return_on_exn(rdr->err, NULL); + PgfAbsFun* absfun = + gu_map_get(abstr->funs, &name, PgfAbsFun*); PgfCncFun* cncfun = gu_new_flex(rdr->opool, PgfCncFun, lins, len); - cncfun->name = name; + cncfun->absfun = absfun; + cncfun->ep = (absfun == NULL) ? NULL : &absfun->ep; cncfun->funid = funid; cncfun->n_lins = len; - PgfAbsFun* absfun = - gu_map_get(abstr->funs, &cncfun->name, PgfAbsFun*); - cncfun->ep = (absfun == NULL) ? NULL : &absfun->ep; - for (size_t i = 0; i < len; i++) { int seqid = pgf_read_int(rdr); gu_return_on_exn(rdr->err, NULL); diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c index 95d92fbce..dd1c9d2ce 100644 --- a/src/runtime/python/pypgf.c +++ b/src/runtime/python/pypgf.c @@ -5,6 +5,8 @@ #include #include #include +#include +#include static PyObject* PGFError; @@ -731,6 +733,258 @@ Concr_linearize(ConcrObject* self, PyObject *args) return pystr; } +typedef struct { + PyObject_HEAD + PyObject* cat; + int fid; + int lindex; + PyObject* fun; + PyObject* children; +} BracketObject; + +static void +Bracket_dealloc(BracketObject* self) +{ + Py_XDECREF(self->cat); + Py_XDECREF(self->fun); + Py_XDECREF(self->children); + self->ob_type->tp_free((PyObject*)self); +} + +static PyObject * +Bracket_repr(BracketObject *self) +{ + PyObject *repr = + PyString_FromFormat("(%s:%d", PyString_AsString(self->cat), self->fid); + if (repr == NULL) { + return NULL; + } + + PyObject *space = PyString_FromString(" "); + + size_t len = PyList_Size(self->children); + for (size_t i = 0; i < len; i++) { + PyObject *child = PyList_GetItem(self->children, i); + + PyString_Concat(&repr, space); + if (repr == NULL) { + Py_DECREF(space); + return NULL; + } + + PyObject *child_str = child->ob_type->tp_str(child); + if (child_str == NULL) { + Py_DECREF(repr); + Py_DECREF(space); + return NULL; + } + + PyString_Concat(&repr, child_str); + if (repr == NULL) { + Py_DECREF(space); + return NULL; + } + } + + Py_DECREF(space); + + PyObject *str = PyString_FromString(")"); + PyString_Concat(&repr, str); + if (repr == NULL) { + Py_DECREF(str); + return NULL; + } + Py_DECREF(str); + + return repr; +} + +static PyMemberDef Bracket_members[] = { + {"cat", T_OBJECT_EX, offsetof(BracketObject, cat), READONLY, + "the syntactic category for this bracket"}, + {"fun", T_OBJECT_EX, offsetof(BracketObject, fun), READONLY, + "the abstract function for this bracket"}, + {"fid", T_INT, offsetof(BracketObject, fid), READONLY, + "an unique id which identifies this bracket in the whole bracketed string"}, + {"lindex", T_INT, offsetof(BracketObject, lindex), READONLY, + "the constituent index"}, + {"children", T_OBJECT_EX, offsetof(BracketObject, children), READONLY, + "a list with the children of this bracket"}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject pgf_BracketType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "pgf.Bracket", /*tp_name*/ + sizeof(BracketObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)Bracket_dealloc,/*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + (reprfunc) Bracket_repr, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "a linearization bracket", /*tp_doc*/ + 0, /*tp_traverse */ + 0, /*tp_clear */ + 0, /*tp_richcompare */ + 0, /*tp_weaklistoffset */ + 0, /*tp_iter */ + 0, /*tp_iternext */ + 0, /*tp_methods */ + Bracket_members, /*tp_members */ + 0, /*tp_getset */ + 0, /*tp_base */ + 0, /*tp_dict */ + 0, /*tp_descr_get */ + 0, /*tp_descr_set */ + 0, /*tp_dictoffset */ + 0, /*tp_init */ + 0, /*tp_alloc */ + 0, /*tp_new */ +}; + +typedef struct { + PgfLinFuncs* funcs; + GuBuf* stack; + PyObject* list; +} PgfBracketLznState; + +static void +pgf_bracket_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens toks) +{ + PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); + + size_t len = gu_seq_length(toks); + for (size_t i = 0; i < len; i++) { + PgfToken tok = gu_seq_get(toks, PgfToken, i); + PyObject* str = gu2py_string(tok); + PyList_Append(state->list, str); + Py_DECREF(str); + } +} + +static void +pgf_bracket_lzn_expr_literal(PgfLinFuncs** funcs, PgfLiteral lit) +{ + PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); + + GuVariantInfo i = gu_variant_open(lit); + switch (i.tag) { + case PGF_LITERAL_STR: { + PgfLiteralStr* lstr = i.data; + PyObject* str = gu2py_string(lstr->val); + PyList_Append(state->list, str); + Py_DECREF(str); + break; + } + case PGF_LITERAL_INT: { + PgfLiteralInt* lint = i.data; + PyObject* str = PyString_FromFormat("%d", lint->val); + PyList_Append(state->list, str); + Py_DECREF(str); + break; + } + case PGF_LITERAL_FLT: { + PgfLiteralFlt* lflt = i.data; + PyObject* str = PyString_FromFormat("%f", lflt->val); + PyList_Append(state->list, str); + Py_DECREF(str); + break; + } + default: + gu_impossible(); + } +} + +static void +pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex, PgfCId fun) +{ + PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); + + gu_buf_push(state->stack, PyObject*, state->list); + state->list = PyList_New(0); +} + +static void +pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex, PgfCId fun) +{ + PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); + + PyObject* parent = gu_buf_pop(state->stack, PyObject*); + + if (PyList_Size(state->list) > 0) { + BracketObject* bracket = (BracketObject *) + pgf_BracketType.tp_alloc(&pgf_BracketType, 0); + if (bracket != NULL) { + bracket->cat = gu2py_string(cat); + bracket->fid = fid; + bracket->lindex = lindex; + bracket->fun = gu2py_string(fun); + bracket->children = state->list; + PyList_Append(parent, (PyObject*) bracket); + Py_DECREF(bracket); + } + } else { + Py_DECREF(state->list); + } + + state->list = parent; +} + +static PgfLinFuncs pgf_bracket_lin_funcs = { + .symbol_tokens = pgf_bracket_lzn_symbol_tokens, + .expr_literal = pgf_bracket_lzn_expr_literal, + .begin_phrase = pgf_bracket_lzn_begin_phrase, + .end_phrase = pgf_bracket_lzn_end_phrase +}; + +static PyObject* +Concr_bracketedLinearize(ConcrObject* self, PyObject *args) +{ + ExprObject* pyexpr; + if (!PyArg_ParseTuple(args, "O!", &pgf_ExprType, &pyexpr)) + return NULL; + + GuPool* tmp_pool = gu_local_pool(); + + GuEnum* cts = + pgf_lzr_concretize(self->concr, pyexpr->expr, tmp_pool); + PgfCncTree ctree = gu_next(cts, PgfCncTree, tmp_pool); + if (gu_variant_is_null(ctree)) { + PyErr_SetString(PGFError, "The abstract tree cannot be linearized"); + gu_pool_free(tmp_pool); + return NULL; + } + + PyObject* list = PyList_New(0); + + PgfBracketLznState state; + state.funcs = &pgf_bracket_lin_funcs; + state.stack = gu_new_buf(PyObject*, tmp_pool); + state.list = list; + pgf_lzr_linearize(self->concr, ctree, 0, &state.funcs); + + gu_pool_free(tmp_pool); + + PyObject* bracket = PyList_GetItem(list, 0); + Py_INCREF(bracket); + Py_DECREF(list); + + return bracket; +} + static PyObject* Concr_getName(ConcrObject *self, void *closure) { @@ -753,7 +1007,10 @@ static PyMethodDef Concr_methods[] = { "Parses a string and returns an iterator over the abstract trees for this sentence" }, {"linearize", (PyCFunction)Concr_linearize, METH_VARARGS, - "Takes an abstract tree and linearizes it to a sentence" + "Takes an abstract tree and linearizes it to a string" + }, + {"bracketedLinearize", (PyCFunction)Concr_bracketedLinearize, METH_VARARGS, + "Takes an abstract tree and linearizes it to a bracketed string" }, {NULL} /* Sentinel */ }; @@ -1234,6 +1491,9 @@ initpgf(void) if (PyType_Ready(&pgf_ConcrType) < 0) return; + if (PyType_Ready(&pgf_BracketType) < 0) + return; + if (PyType_Ready(&pgf_ExprType) < 0) return; @@ -1260,4 +1520,5 @@ initpgf(void) Py_INCREF(&pgf_PGFType); Py_INCREF(&pgf_ConcrType); Py_INCREF(&pgf_ExprIterType); + Py_INCREF(&pgf_BracketType); }