added API for computing bracketed strings from Python and C

This commit is contained in:
kr.angelov
2013-04-18 13:37:09 +00:00
parent ff691dee7b
commit f050609101
7 changed files with 328 additions and 47 deletions

View File

@@ -128,8 +128,6 @@ typedef struct {
extern GU_DECLARE_TYPE(PgfCncCat, abstract);
typedef GuSeq PgfTokens; // -> PgfToken
bool
pgf_tokens_equal(PgfTokens t1, PgfTokens t2);
@@ -214,7 +212,7 @@ typedef GuSeq PgfSequence; // -> PgfSymbol
typedef GuList(PgfSequence) PgfSequences;
typedef struct {
PgfCId name;
PgfAbsFun* absfun;
PgfExprProb *ep;
int funid;
GuLength n_lins;

View File

@@ -5,6 +5,7 @@
/// A single lexical token
typedef GuString PgfToken;
typedef GuSeq PgfTokens; // -> PgfToken
typedef struct {
PgfToken (*read_token)();

View File

@@ -64,12 +64,12 @@ pgf_lzr_index_itor(GuMapItor* fn, const void* key, void* value, GuExn* err)
case PGF_PRODUCTION_APPLY: {
PgfProductionApply* papply = data;
PgfCncOverloadMap* overl_table =
gu_map_get(concr->fun_indices, &papply->fun->name,
gu_map_get(concr->fun_indices, &papply->fun->absfun->name,
PgfCncOverloadMap*);
if (!overl_table) {
overl_table = gu_map_type_new(PgfCncOverloadMap, pool);
gu_map_put(concr->fun_indices,
&papply->fun->name, PgfCncOverloadMap*, overl_table);
&papply->fun->absfun->name, PgfCncOverloadMap*, overl_table);
}
pgf_lzr_add_overl_entry(overl_table, ccat, papply, pool);
break;
@@ -98,6 +98,7 @@ struct PgfLzn {
PgfConcr* concr;
GuChoice* ch;
PgfExpr expr;
int fid;
GuEnum en;
};
@@ -114,6 +115,7 @@ typedef enum {
typedef struct {
PgfCncFun* fun;
int fid;
GuLength n_args;
PgfCncTree args[];
} PgfCncTreeApp;
@@ -124,6 +126,7 @@ typedef struct {
} PgfCncTreeChunks;
typedef struct {
int fid;
PgfLiteral lit;
} PgfCncTreeLit;
@@ -176,6 +179,7 @@ static PgfCncTree
pgf_lzn_resolve_app(PgfLzn* lzn, GuBuf* buf, GuBuf* args, GuPool* pool)
{
GuChoiceMark mark = gu_choice_mark(lzn->ch);
int save_fid = lzn->fid;
redo:;
int index = gu_choice_next(lzn->ch, gu_buf_length(buf));
@@ -195,6 +199,7 @@ redo:;
PgfCncTreeApp,
args, n_args, &ret, pool);
capp->fun = papply->fun;
capp->fid = 0;
capp->n_args = n_args;
for (size_t i = 0; i < n_args; i++) {
@@ -209,6 +214,7 @@ redo:;
} else {
int index = gu_choice_next(lzn->ch, gu_buf_length(coercions));
if (index < 0) {
lzn->fid = save_fid;
gu_choice_reset(lzn->ch, mark);
if (!gu_choice_advance(lzn->ch))
return gu_null_variant;
@@ -223,13 +229,16 @@ redo:;
capp->args[i] =
pgf_lzn_resolve(lzn, earg, ccat, pool);
if (gu_variant_is_null(capp->args[i])) {
lzn->fid = save_fid;
gu_choice_reset(lzn->ch, mark);
if (!gu_choice_advance(lzn->ch))
return gu_null_variant;
goto redo;
}
}
capp->fid = lzn->fid++;
return ret;
}
@@ -243,6 +252,7 @@ pgf_lzn_resolve_def(PgfLzn* lzn, PgfCncFuns* lindefs, GuString s, GuPool* pool)
gu_new_variant(PGF_CNC_TREE_LIT,
PgfCncTreeLit,
&lit, pool);
clit->fid = lzn->fid++;
clit->lit =
gu_new_variant_i(pool,
PGF_LITERAL_STR,
@@ -262,9 +272,10 @@ pgf_lzn_resolve_def(PgfLzn* lzn, PgfCncFuns* lindefs, GuString s, GuPool* pool)
PgfCncTreeApp,
args, 1, &ret, pool);
capp->fun = gu_list_index(lindefs, index);
capp->fid = lzn->fid++;
capp->n_args = 1;
capp->args[0] = lit;
return ret;
}
@@ -308,6 +319,7 @@ pgf_lzn_resolve(PgfLzn* lzn, PgfExpr expr, PgfCCat* ccat, GuPool* pool)
gu_new_variant(PGF_CNC_TREE_LIT,
PgfCncTreeLit,
&ret, pool);
clit->fid = lzn->fid++;
clit->lit = elit->lit;
goto done;
}
@@ -413,6 +425,8 @@ pgf_cnc_tree_enum_next(GuEnum* self, void* to, GuPool* pool)
return;
}
lzn->fid = 0;
GuChoiceMark mark = gu_choice_mark(lzn->ch);
*toc = pgf_lzn_resolve(lzn, lzn->expr, NULL, pool);
gu_choice_reset(lzn->ch, mark);
@@ -442,6 +456,7 @@ pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuPool* pool)
PgfLzn* lzn = gu_new(PgfLzn, pool);
lzn->concr = concr;
lzn->expr = expr;
lzn->fid = 0;
lzn->ch = gu_new_choice(pool);
lzn->en.next = pgf_cnc_tree_enum_next;
return &lzn->en;
@@ -457,9 +472,14 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs
case PGF_CNC_TREE_APP: {
PgfCncTreeApp* fapp = cti.data;
PgfCncFun* fun = fapp->fun;
if (fns->expr_apply) {
fns->expr_apply(fnsp, fun->name, fapp->n_args);
if (fns->begin_phrase) {
fns->begin_phrase(fnsp,
fun->absfun->type->cid,
fapp->fid, lin_idx,
fun->absfun->name);
}
gu_require(lin_idx < fun->n_lins);
PgfSequence seq = fun->lins[lin_idx];
size_t nsyms = gu_seq_length(seq);
@@ -473,6 +493,7 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs
case PGF_SYMBOL_LIT: {
PgfSymbolIdx* sidx = sym_i.data;
gu_assert((unsigned) sidx->d < fapp->n_args);
PgfCncTree argf = fapp->args[sidx->d];
pgf_lzr_linearize(concr, argf, sidx->r, fnsp);
break;
@@ -497,6 +518,13 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs
gu_impossible();
}
}
if (fns->end_phrase) {
fns->end_phrase(fnsp,
fun->absfun->type->cid,
fapp->fid, lin_idx,
fun->absfun->name);
}
break;
}
case PGF_CNC_TREE_CHUNKS: {
@@ -510,9 +538,26 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs
case PGF_CNC_TREE_LIT: {
gu_require(lin_idx == 0);
PgfCncTreeLit* flit = cti.data;
PgfCId cat =
pgf_literal_cat(concr, flit->lit)->cnccat->abscat->name;
if (fns->begin_phrase) {
fns->begin_phrase(fnsp,
cat, flit->fid, 0,
gu_empty_string);
}
if (fns->expr_literal) {
fns->expr_literal(fnsp, flit->lit);
}
if (fns->end_phrase) {
fns->end_phrase(fnsp,
cat, flit->fid, 0,
gu_empty_string);
}
break;
}
default:
@@ -587,7 +632,9 @@ pgf_file_lzn_expr_literal(PgfLinFuncs** funcs, PgfLiteral lit)
static PgfLinFuncs pgf_file_lin_funcs = {
.symbol_tokens = pgf_file_lzn_symbol_tokens,
.expr_literal = pgf_file_lzn_expr_literal
.expr_literal = pgf_file_lzn_expr_literal,
.begin_phrase = NULL,
.end_phrase = NULL,
};
void

View File

@@ -20,7 +20,6 @@
#include <gu/type.h>
#include <gu/dump.h>
#include <gu/enum.h>
#include <pgf/data.h>
/// Linearization of abstract syntax trees.
/// @file
@@ -48,7 +47,6 @@ typedef GuEnum PgfCncTreeEnum;
PgfCncTreeEnum*
pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuPool* pool);
/// Callback functions for linearization.
typedef struct PgfLinFuncs PgfLinFuncs;
struct PgfLinFuncs
@@ -56,19 +54,15 @@ struct PgfLinFuncs
/// Output tokens
void (*symbol_tokens)(PgfLinFuncs** self, PgfTokens toks);
void (*symbol_expr)(PgfLinFuncs** self,
int argno, PgfExpr expr, int lin_idx);
/// Begin application
void (*expr_apply)(PgfLinFuncs** self, PgfCId cid, int n_args);
/// Output literal
void (*expr_literal)(PgfLinFuncs** self, PgfLiteral lit);
void (*abort)(PgfLinFuncs** self);
void (*finish)(PgfLinFuncs** self);
};
/// Begin phrase
void (*begin_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, int lindex, PgfCId fun);
/// End phrase
void (*end_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, int lindex, PgfCId fun);
};
@@ -83,22 +77,3 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx,
void
pgf_lzr_linearize_simple(PgfConcr* concr, PgfCncTree ctree,
size_t lin_idx, GuWriter* wtr, GuExn* err);
/// Return the dimension of a concrete syntax tree.
int
pgf_cnc_tree_dimension(PgfCncTree ctree);
/**<
* @param ctree A concrete syntax tree.
*
* @return The dimension of the tree, i.e. the number of different
* linearizations the tree has.
*/
//@}
extern GuTypeTable
pgf_linearize_dump_table;

View File

@@ -177,7 +177,7 @@ pgf_print_cncfun(PgfCncFun *cncfun, PgfSequences *sequences,
}
gu_puts(") [", wtr, err);
gu_string_write(cncfun->name, wtr, err);
gu_string_write(cncfun->absfun->name, wtr, err);
gu_puts("]\n", wtr, err);
}

View File

@@ -758,16 +758,15 @@ pgf_read_cncfun(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, int funid)
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
PgfAbsFun* absfun =
gu_map_get(abstr->funs, &name, PgfAbsFun*);
PgfCncFun* cncfun = gu_new_flex(rdr->opool, PgfCncFun, lins, len);
cncfun->name = name;
cncfun->absfun = absfun;
cncfun->ep = (absfun == NULL) ? NULL : &absfun->ep;
cncfun->funid = funid;
cncfun->n_lins = len;
PgfAbsFun* absfun =
gu_map_get(abstr->funs, &cncfun->name, PgfAbsFun*);
cncfun->ep = (absfun == NULL) ? NULL : &absfun->ep;
for (size_t i = 0; i < len; i++) {
int seqid = pgf_read_int(rdr);
gu_return_on_exn(rdr->err, NULL);

View File

@@ -5,6 +5,8 @@
#include <gu/map.h>
#include <gu/file.h>
#include <pgf/pgf.h>
#include <pgf/lexer.h>
#include <pgf/linearizer.h>
static PyObject* PGFError;
@@ -731,6 +733,258 @@ Concr_linearize(ConcrObject* self, PyObject *args)
return pystr;
}
typedef struct {
PyObject_HEAD
PyObject* cat;
int fid;
int lindex;
PyObject* fun;
PyObject* children;
} BracketObject;
static void
Bracket_dealloc(BracketObject* self)
{
Py_XDECREF(self->cat);
Py_XDECREF(self->fun);
Py_XDECREF(self->children);
self->ob_type->tp_free((PyObject*)self);
}
static PyObject *
Bracket_repr(BracketObject *self)
{
PyObject *repr =
PyString_FromFormat("(%s:%d", PyString_AsString(self->cat), self->fid);
if (repr == NULL) {
return NULL;
}
PyObject *space = PyString_FromString(" ");
size_t len = PyList_Size(self->children);
for (size_t i = 0; i < len; i++) {
PyObject *child = PyList_GetItem(self->children, i);
PyString_Concat(&repr, space);
if (repr == NULL) {
Py_DECREF(space);
return NULL;
}
PyObject *child_str = child->ob_type->tp_str(child);
if (child_str == NULL) {
Py_DECREF(repr);
Py_DECREF(space);
return NULL;
}
PyString_Concat(&repr, child_str);
if (repr == NULL) {
Py_DECREF(space);
return NULL;
}
}
Py_DECREF(space);
PyObject *str = PyString_FromString(")");
PyString_Concat(&repr, str);
if (repr == NULL) {
Py_DECREF(str);
return NULL;
}
Py_DECREF(str);
return repr;
}
static PyMemberDef Bracket_members[] = {
{"cat", T_OBJECT_EX, offsetof(BracketObject, cat), READONLY,
"the syntactic category for this bracket"},
{"fun", T_OBJECT_EX, offsetof(BracketObject, fun), READONLY,
"the abstract function for this bracket"},
{"fid", T_INT, offsetof(BracketObject, fid), READONLY,
"an unique id which identifies this bracket in the whole bracketed string"},
{"lindex", T_INT, offsetof(BracketObject, lindex), READONLY,
"the constituent index"},
{"children", T_OBJECT_EX, offsetof(BracketObject, children), READONLY,
"a list with the children of this bracket"},
{NULL} /* Sentinel */
};
static PyTypeObject pgf_BracketType = {
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
"pgf.Bracket", /*tp_name*/
sizeof(BracketObject), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)Bracket_dealloc,/*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
(reprfunc) Bracket_repr, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
"a linearization bracket", /*tp_doc*/
0, /*tp_traverse */
0, /*tp_clear */
0, /*tp_richcompare */
0, /*tp_weaklistoffset */
0, /*tp_iter */
0, /*tp_iternext */
0, /*tp_methods */
Bracket_members, /*tp_members */
0, /*tp_getset */
0, /*tp_base */
0, /*tp_dict */
0, /*tp_descr_get */
0, /*tp_descr_set */
0, /*tp_dictoffset */
0, /*tp_init */
0, /*tp_alloc */
0, /*tp_new */
};
typedef struct {
PgfLinFuncs* funcs;
GuBuf* stack;
PyObject* list;
} PgfBracketLznState;
static void
pgf_bracket_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens toks)
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
size_t len = gu_seq_length(toks);
for (size_t i = 0; i < len; i++) {
PgfToken tok = gu_seq_get(toks, PgfToken, i);
PyObject* str = gu2py_string(tok);
PyList_Append(state->list, str);
Py_DECREF(str);
}
}
static void
pgf_bracket_lzn_expr_literal(PgfLinFuncs** funcs, PgfLiteral lit)
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
GuVariantInfo i = gu_variant_open(lit);
switch (i.tag) {
case PGF_LITERAL_STR: {
PgfLiteralStr* lstr = i.data;
PyObject* str = gu2py_string(lstr->val);
PyList_Append(state->list, str);
Py_DECREF(str);
break;
}
case PGF_LITERAL_INT: {
PgfLiteralInt* lint = i.data;
PyObject* str = PyString_FromFormat("%d", lint->val);
PyList_Append(state->list, str);
Py_DECREF(str);
break;
}
case PGF_LITERAL_FLT: {
PgfLiteralFlt* lflt = i.data;
PyObject* str = PyString_FromFormat("%f", lflt->val);
PyList_Append(state->list, str);
Py_DECREF(str);
break;
}
default:
gu_impossible();
}
}
static void
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex, PgfCId fun)
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
gu_buf_push(state->stack, PyObject*, state->list);
state->list = PyList_New(0);
}
static void
pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex, PgfCId fun)
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
PyObject* parent = gu_buf_pop(state->stack, PyObject*);
if (PyList_Size(state->list) > 0) {
BracketObject* bracket = (BracketObject *)
pgf_BracketType.tp_alloc(&pgf_BracketType, 0);
if (bracket != NULL) {
bracket->cat = gu2py_string(cat);
bracket->fid = fid;
bracket->lindex = lindex;
bracket->fun = gu2py_string(fun);
bracket->children = state->list;
PyList_Append(parent, (PyObject*) bracket);
Py_DECREF(bracket);
}
} else {
Py_DECREF(state->list);
}
state->list = parent;
}
static PgfLinFuncs pgf_bracket_lin_funcs = {
.symbol_tokens = pgf_bracket_lzn_symbol_tokens,
.expr_literal = pgf_bracket_lzn_expr_literal,
.begin_phrase = pgf_bracket_lzn_begin_phrase,
.end_phrase = pgf_bracket_lzn_end_phrase
};
static PyObject*
Concr_bracketedLinearize(ConcrObject* self, PyObject *args)
{
ExprObject* pyexpr;
if (!PyArg_ParseTuple(args, "O!", &pgf_ExprType, &pyexpr))
return NULL;
GuPool* tmp_pool = gu_local_pool();
GuEnum* cts =
pgf_lzr_concretize(self->concr, pyexpr->expr, tmp_pool);
PgfCncTree ctree = gu_next(cts, PgfCncTree, tmp_pool);
if (gu_variant_is_null(ctree)) {
PyErr_SetString(PGFError, "The abstract tree cannot be linearized");
gu_pool_free(tmp_pool);
return NULL;
}
PyObject* list = PyList_New(0);
PgfBracketLznState state;
state.funcs = &pgf_bracket_lin_funcs;
state.stack = gu_new_buf(PyObject*, tmp_pool);
state.list = list;
pgf_lzr_linearize(self->concr, ctree, 0, &state.funcs);
gu_pool_free(tmp_pool);
PyObject* bracket = PyList_GetItem(list, 0);
Py_INCREF(bracket);
Py_DECREF(list);
return bracket;
}
static PyObject*
Concr_getName(ConcrObject *self, void *closure)
{
@@ -753,7 +1007,10 @@ static PyMethodDef Concr_methods[] = {
"Parses a string and returns an iterator over the abstract trees for this sentence"
},
{"linearize", (PyCFunction)Concr_linearize, METH_VARARGS,
"Takes an abstract tree and linearizes it to a sentence"
"Takes an abstract tree and linearizes it to a string"
},
{"bracketedLinearize", (PyCFunction)Concr_bracketedLinearize, METH_VARARGS,
"Takes an abstract tree and linearizes it to a bracketed string"
},
{NULL} /* Sentinel */
};
@@ -1234,6 +1491,9 @@ initpgf(void)
if (PyType_Ready(&pgf_ConcrType) < 0)
return;
if (PyType_Ready(&pgf_BracketType) < 0)
return;
if (PyType_Ready(&pgf_ExprType) < 0)
return;
@@ -1260,4 +1520,5 @@ initpgf(void)
Py_INCREF(&pgf_PGFType);
Py_INCREF(&pgf_ConcrType);
Py_INCREF(&pgf_ExprIterType);
Py_INCREF(&pgf_BracketType);
}