loading and unloading of languages in the C runtime and in the Python bindings

This commit is contained in:
kr.angelov
2013-12-10 15:00:52 +00:00
parent a4be3c169e
commit 6028a63b42
11 changed files with 303 additions and 68 deletions

View File

@@ -99,6 +99,7 @@ typedef struct {
PgfFlags* aflags;
PgfCIdMap* funs; // |-> PgfAbsFun*
PgfCIdMap* cats; // |-> PgfAbsCat*
PgfAbsFun* abs_lin_fun;
} PgfAbstr;
struct PgfPGF {
@@ -241,6 +242,9 @@ struct PgfConcr {
PgfCIdMap* cnccats;
PgfCallbacksMap* callbacks;
int total_cats;
GuPool* pool; // if the language is loaded separately then this is the pool
GuFinalizer fin; // and this is the finalizer in the pool of the whole grammar
};
extern GU_DECLARE_TYPE(PgfConcr, abstract);

View File

@@ -260,7 +260,10 @@ pgf_graphviz_parse_tree(PgfConcr* concr, PgfExpr expr, GuOut* out, GuExn* err)
GuPool* tmp_pool = gu_local_pool();
GuEnum* cts =
pgf_lzr_concretize(concr, expr, tmp_pool);
pgf_lzr_concretize(concr, expr, err, tmp_pool);
if (!gu_ok(err))
return;
PgfCncTree ctree = gu_next(cts, PgfCncTree, tmp_pool);
if (gu_variant_is_null(ctree)) {
gu_pool_free(tmp_pool);

View File

@@ -618,8 +618,17 @@ pgf_cnc_tree_enum_next(GuEnum* self, void* to, GuPool* pool)
}
PgfCncTreeEnum*
pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuPool* pool)
pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuExn* err, GuPool* pool)
{
if (concr->fun_indices == NULL ||
concr->coerce_idx == NULL) {
GuExnData* err_data = gu_raise(err, PgfExn);
if (err_data) {
err_data->data = "The concrete syntax is not loaded";
return NULL;
}
}
PgfLzn* lzn = gu_new(PgfLzn, pool);
lzn->concr = concr;
lzn->expr = expr;
@@ -947,7 +956,10 @@ pgf_linearize(PgfConcr* concr, PgfExpr expr, GuOut* out, GuExn* err)
GuPool* tmp_pool = gu_local_pool();
GuEnum* cts =
pgf_lzr_concretize(concr, expr, tmp_pool);
pgf_lzr_concretize(concr, expr, err, tmp_pool);
if (!gu_ok(err))
return;
PgfCncTree ctree = gu_next(cts, PgfCncTree, tmp_pool);
if (!gu_variant_is_null(ctree)) {
ctree = pgf_lzr_wrap_linref(ctree, tmp_pool);

View File

@@ -28,7 +28,7 @@ typedef GuEnum PgfCncTreeEnum;
/// Begin enumerating concrete syntax variants.
PgfCncTreeEnum*
pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuPool* pool);
pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuExn* err, GuPool* pool);
typedef struct {
} PgfLinNonExist;

View File

@@ -2438,6 +2438,16 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, GuString sentence,
GuExn* err,
GuPool* pool, GuPool* out_pool)
{
if (concr->sequences == NULL ||
concr->pre_sequences == NULL ||
concr->cnccats == NULL) {
GuExnData* err_data = gu_raise(err, PgfExn);
if (err_data) {
err_data->data = "The concrete syntax is not loaded";
return NULL;
}
}
// Begin parsing a sentence with the specified category
PgfParsing* ps =
pgf_parsing_init(concr, cat, 0, sentence, heuristics, err, pool, out_pool);
@@ -2552,6 +2562,14 @@ void
pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
PgfMorphoCallback* callback, GuExn* err)
{
if (concr->sequences == NULL) {
GuExnData* err_data = gu_raise(err, PgfExn);
if (err_data) {
err_data->data = "The concrete syntax is not loaded";
return;
}
}
PgfSequence* seq = (PgfSequence*)
gu_seq_binsearch(concr->sequences, pgf_sequence_order,
PgfSequence, (void*) sentence);
@@ -2577,21 +2595,23 @@ gu_fullform_enum_next(GuEnum* self, void* to, GuPool* pool)
PgfFullFormState* st = gu_container(self, PgfFullFormState, en);
PgfFullFormEntry* entry = NULL;
size_t n_seqs = gu_seq_length(st->sequences);
while (st->seq_idx < n_seqs) {
PgfSymbols* syms = gu_seq_index(st->sequences, PgfSequence, st->seq_idx)->syms;
GuString tokens = pgf_get_tokens(syms, 0, pool);
if (strlen(tokens) > 0 &&
gu_seq_index(st->sequences, PgfSequence, st->seq_idx)->idx != NULL) {
entry = gu_new(PgfFullFormEntry, pool);
entry->tokens = tokens;
entry->idx = gu_seq_index(st->sequences, PgfSequence, st->seq_idx)->idx;
if (st->sequences != NULL) {
size_t n_seqs = gu_seq_length(st->sequences);
while (st->seq_idx < n_seqs) {
PgfSymbols* syms = gu_seq_index(st->sequences, PgfSequence, st->seq_idx)->syms;
GuString tokens = pgf_get_tokens(syms, 0, pool);
if (strlen(tokens) > 0 &&
gu_seq_index(st->sequences, PgfSequence, st->seq_idx)->idx != NULL) {
entry = gu_new(PgfFullFormEntry, pool);
entry->tokens = tokens;
entry->idx = gu_seq_index(st->sequences, PgfSequence, st->seq_idx)->idx;
st->seq_idx++;
break;
}
st->seq_idx++;
break;
}
st->seq_idx++;
}
*((PgfFullFormEntry**) to) = entry;

View File

@@ -171,8 +171,15 @@ pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
{
GuPool* pool = gu_new_pool();
GuExn* err = gu_new_exn(NULL, gu_kind(type), pool);
GuEnum* en_lins1 =
pgf_lzr_concretize(concr, expr, pool);
pgf_lzr_concretize(concr, expr, err, pool);
if (!gu_ok(err)) {
gu_pool_free(pool);
return false;
}
PgfCncTree ctree1 = gu_next(en_lins1, PgfCncTree, pool);
if (gu_variant_is_null(ctree1)) {
gu_pool_free(pool);
@@ -213,7 +220,7 @@ pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
}
GuEnum* en_lins2 =
pgf_lzr_concretize(concr, ep->expr, pool);
pgf_lzr_concretize(concr, ep->expr, err, pool);
PgfCncTree ctree2 = gu_next(en_lins2, PgfCncTree, pool);
if (gu_variant_is_null(ctree2)) {
gu_pool_free(pool);

View File

@@ -577,6 +577,17 @@ pgf_read_abstract(PgfReader* rdr, PgfAbstr* abstract)
abstract->cats = pgf_read_abscats(rdr, abstract);
gu_return_on_exn(rdr->err, );
abstract->abs_lin_fun = gu_new(PgfAbsFun, rdr->opool);
abstract->abs_lin_fun->name = "_";
abstract->abs_lin_fun->type = gu_new(PgfType, rdr->opool);
abstract->abs_lin_fun->type->hypos = NULL;
abstract->abs_lin_fun->type->cid = "_";
abstract->abs_lin_fun->type->n_exprs = 0;
abstract->abs_lin_fun->arity = 0;
abstract->abs_lin_fun->defns = NULL;
abstract->abs_lin_fun->ep.prob = INFINITY;
abstract->abs_lin_fun->ep.expr = gu_null_variant;
}
static PgfCIdMap*
@@ -864,7 +875,7 @@ pgf_read_funid(PgfReader* rdr, PgfConcr* concr)
}
static void
pgf_read_lindefs(PgfReader* rdr, PgfAbsFun* abs_lin_fun, PgfConcr* concr)
pgf_read_lindefs(PgfReader* rdr, PgfConcr* concr)
{
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, );
@@ -878,14 +889,14 @@ pgf_read_lindefs(PgfReader* rdr, PgfAbsFun* abs_lin_fun, PgfConcr* concr)
ccat->lindefs = gu_new_seq(PgfCncFun*, n_funs, rdr->opool);
for (size_t j = 0; j < n_funs; j++) {
PgfCncFun* fun = pgf_read_funid(rdr, concr);
fun->absfun = abs_lin_fun;
fun->absfun = concr->abstr->abs_lin_fun;
gu_seq_set(ccat->lindefs, PgfCncFun*, j, fun);
}
}
}
static void
pgf_read_linrefs(PgfReader* rdr, PgfAbsFun* abs_lin_fun, PgfConcr* concr)
pgf_read_linrefs(PgfReader* rdr, PgfConcr* concr)
{
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, );
@@ -899,7 +910,7 @@ pgf_read_linrefs(PgfReader* rdr, PgfAbsFun* abs_lin_fun, PgfConcr* concr)
ccat->linrefs = gu_new_seq(PgfCncFun*, n_funs, rdr->opool);
for (size_t j = 0; j < n_funs; j++) {
PgfCncFun* fun = pgf_read_funid(rdr, concr);
fun->absfun = abs_lin_fun;
fun->absfun = concr->abstr->abs_lin_fun;
gu_seq_set(ccat->linrefs, PgfCncFun*, j, fun);
}
}
@@ -1147,8 +1158,67 @@ pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
// pgf_ccat_set_viterbi_prob(ccat);
}
void
pgf_read_concrete_content(PgfReader* rdr, PgfConcr* concr)
{
concr->printnames =
pgf_read_printnames(rdr);
gu_return_on_exn(rdr->err,);
concr->sequences =
pgf_read_sequences(rdr);
gu_return_on_exn(rdr->err,);
concr->pre_sequences = gu_new_buf(PgfSequence, rdr->opool);
concr->cncfuns =
pgf_read_cncfuns(rdr, concr->abstr, concr);
gu_return_on_exn(rdr->err,);
concr->ccats =
gu_new_int_map(PgfCCat*, &gu_null_struct, rdr->opool);
concr->fun_indices = gu_map_type_new(PgfCncFunOverloadMap, rdr->opool);
concr->coerce_idx = gu_map_type_new(PgfCncOverloadMap, rdr->opool);
pgf_read_lindefs(rdr, concr);
pgf_read_linrefs(rdr, concr);
pgf_read_ccats(rdr, concr);
concr->cnccats = pgf_read_cnccats(rdr, concr->abstr, concr);
concr->callbacks = pgf_new_callbacks_map(concr, rdr->opool);
concr->total_cats = pgf_read_int(rdr);
GuMapItor clo1 = { pgf_read_ccat_cb };
gu_map_iter(concr->ccats, &clo1, NULL);
}
static void
pgf_read_concrete_init_header(PgfConcr* concr)
{
concr->printnames = NULL;
concr->sequences = NULL;
concr->pre_sequences = NULL;
concr->cncfuns = NULL;
concr->ccats = NULL;
concr->fun_indices = NULL;
concr->coerce_idx = NULL;
concr->cnccats = NULL;
concr->callbacks = NULL;
concr->total_cats = 0;
}
static void
gu_concr_fini(GuFinalizer* fin)
{
PgfConcr* concr = gu_container(fin, PgfConcr, fin);
if (concr->pool != NULL) {
pgf_read_concrete_init_header(concr);
gu_pool_free(concr->pool);
concr->pool = NULL;
}
}
static PgfConcr*
pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr, PgfAbsFun* abs_lin_fun)
pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr, bool with_content)
{
PgfConcr* concr = gu_new(PgfConcr, rdr->opool);
@@ -1161,40 +1231,66 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr, PgfAbsFun* abs_lin_fun)
concr->cflags =
pgf_read_flags(rdr);
gu_return_on_exn(rdr->err, NULL);
concr->printnames =
pgf_read_printnames(rdr);
concr->pool = NULL;
if (with_content)
pgf_read_concrete_content(rdr, concr);
else {
pgf_read_concrete_init_header(concr);
concr->fin.fn = gu_concr_fini;
gu_pool_finally(rdr->opool, &concr->fin);
}
gu_return_on_exn(rdr->err, NULL);
concr->sequences =
pgf_read_sequences(rdr);
gu_return_on_exn(rdr->err, NULL);
concr->pre_sequences = gu_new_buf(PgfSequence, rdr->opool);
concr->cncfuns =
pgf_read_cncfuns(rdr, abstr, concr);
gu_return_on_exn(rdr->err, NULL);
concr->ccats =
gu_new_int_map(PgfCCat*, &gu_null_struct, rdr->opool);
concr->fun_indices = gu_map_type_new(PgfCncFunOverloadMap, rdr->opool);
concr->coerce_idx = gu_map_type_new(PgfCncOverloadMap, rdr->opool);
pgf_read_lindefs(rdr, abs_lin_fun, concr);
pgf_read_linrefs(rdr, abs_lin_fun, concr);
pgf_read_ccats(rdr, concr);
concr->cnccats = pgf_read_cnccats(rdr, abstr, concr);
concr->callbacks = pgf_new_callbacks_map(concr, rdr->opool);
concr->total_cats = pgf_read_int(rdr);
GuMapItor clo1 = { pgf_read_ccat_cb };
gu_map_iter(concr->ccats, &clo1, NULL);
return concr;
}
void
pgf_concrete_load(PgfConcr* concr, GuIn* in, GuExn* err)
{
if (concr->pool != NULL)
return; // already loaded
GuPool* pool = gu_new_pool();
GuPool* tmp_pool = gu_local_pool();
PgfReader* rdr = pgf_new_reader(in, pool, tmp_pool, err);
PgfCId name =
pgf_read_cid(rdr, rdr->tmp_pool);
gu_return_on_exn(rdr->err, );
if (strcmp(name, concr->name) != 0) {
GuExnData* err_data = gu_raise(rdr->err, PgfExn);
if (err_data) {
err_data->data = "This file contains different concrete syntax";
gu_pool_free(tmp_pool);
gu_pool_free(pool);
return;
}
}
concr->pool = pool;
pgf_read_flags(rdr);
gu_return_on_exn(rdr->err, );
pgf_read_concrete_content(rdr, concr);
gu_return_on_exn(rdr->err, );
gu_pool_free(tmp_pool);
}
void
pgf_concrete_unload(PgfConcr* concr)
{
gu_concr_fini(&concr->fin);
}
static PgfCIdMap*
pgf_read_concretes(PgfReader* rdr, PgfAbstr* abstr)
pgf_read_concretes(PgfReader* rdr, PgfAbstr* abstr, bool with_content)
{
GuMapType* map_type = (GuMapType*)
GU_TYPE_LIT(GuStringMap, _,
@@ -1205,19 +1301,8 @@ pgf_read_concretes(PgfReader* rdr, PgfAbstr* abstr)
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
PgfAbsFun* abs_lin_fun = gu_new(PgfAbsFun, rdr->opool);
abs_lin_fun->name = "_";
abs_lin_fun->type = gu_new(PgfType, rdr->opool);
abs_lin_fun->type->hypos = NULL;
abs_lin_fun->type->cid = "_";
abs_lin_fun->type->n_exprs = 0;
abs_lin_fun->arity = 0;
abs_lin_fun->defns = NULL;
abs_lin_fun->ep.prob = INFINITY;
abs_lin_fun->ep.expr = gu_null_variant;
for (size_t i = 0; i < len; i++) {
PgfConcr* concr = pgf_read_concrete(rdr, abstr, abs_lin_fun);
PgfConcr* concr = pgf_read_concrete(rdr, abstr, with_content);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(concretes, concr->name, PgfConcr*, concr);
@@ -1242,7 +1327,9 @@ pgf_read_pgf(PgfReader* rdr) {
pgf_read_abstract(rdr, &pgf->abstract);
gu_return_on_exn(rdr->err, NULL);
pgf->concretes = pgf_read_concretes(rdr, &pgf->abstract);
bool with_content =
gu_variant_is_null(gu_map_get(pgf->gflags, "split", PgfLiteral));
pgf->concretes = pgf_read_concretes(rdr, &pgf->abstract, with_content);
gu_return_on_exn(rdr->err, NULL);
return pgf;

View File

@@ -13,6 +13,12 @@ pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err);
PgfPGF*
pgf_read_pgf(PgfReader* rdr);
void
pgf_concrete_load(PgfConcr* concr, GuIn* in, GuExn* err);
void
pgf_concrete_unload(PgfConcr* concr);
void
pgf_reader_done(PgfReader* rdr, PgfPGF* pgf);

View File

@@ -24,7 +24,7 @@ print_result(PgfExprProb* ep, PgfConcr* to_concr,
// Enumerate the concrete syntax trees corresponding
// to the abstract tree.
GuEnum* cts = pgf_lzr_concretize(to_concr, ep->expr, ppool);
GuEnum* cts = pgf_lzr_concretize(to_concr, ep->expr, err, ppool);
while (true) {
PgfCncTree ctree =
gu_next(cts, PgfCncTree, ppool);

View File

@@ -27,6 +27,12 @@ public class Concr {
public native boolean hasLinearization(String id);
public native void load(String path) throws FileNotFoundException;
public native void load(InputStream stream);
public native void unload();
//////////////////////////////////////////////////////////////////
// private stuff

View File

@@ -6,6 +6,7 @@
#include <gu/file.h>
#include <pgf/pgf.h>
#include <pgf/linearizer.h>
#include <pgf/reader.h>
static PyObject* PGFError;
@@ -1245,7 +1246,11 @@ Concr_linearize(ConcrObject* self, PyObject *args)
if (!gu_ok(err)) {
if (gu_exn_caught(err) == gu_type(PgfLinNonExist))
Py_RETURN_NONE;
else {
else if (gu_exn_caught(err) == gu_type(PgfExn)) {
GuString msg = (GuString) gu_exn_caught_data(err);
PyErr_SetString(PGFError, msg);
return NULL;
} else {
PyErr_SetString(PGFError, "The abstract tree cannot be linearized");
return NULL;
}
@@ -1481,9 +1486,20 @@ Concr_bracketedLinearize(ConcrObject* self, PyObject *args)
return NULL;
GuPool* tmp_pool = gu_local_pool();
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
GuEnum* cts =
pgf_lzr_concretize(self->concr, pyexpr->expr, tmp_pool);
pgf_lzr_concretize(self->concr, pyexpr->expr, err, tmp_pool);
if (!gu_ok(err)) {
if (gu_exn_caught(err) == gu_type(PgfExn)) {
GuString msg = (GuString) gu_exn_caught_data(err);
PyErr_SetString(PGFError, msg);
return NULL;
} else {
PyErr_SetString(PGFError, "The abstract tree cannot be concretized");
}
}
PgfCncTree ctree = gu_next(cts, PgfCncTree, tmp_pool);
if (gu_variant_is_null(ctree)) {
PyErr_SetString(PGFError, "The abstract tree cannot be concretized");
@@ -1531,7 +1547,12 @@ Concr_graphvizParseTree(ConcrObject* self, PyObject *args) {
pgf_graphviz_parse_tree(self->concr, pyexpr->expr, out, err);
if (!gu_ok(err)) {
PyErr_SetString(PGFError, "The parse tree cannot be visualized");
if (gu_exn_caught(err) == gu_type(PgfExn)) {
GuString msg = (GuString) gu_exn_caught_data(err);
PyErr_SetString(PGFError, msg);
} else {
PyErr_SetString(PGFError, "The parse tree cannot be visualized");
}
return NULL;
}
@@ -1581,6 +1602,16 @@ Concr_lookupMorpho(ConcrObject* self, PyObject *args) {
PyMorphoCallback callback = { { pypgf_collect_morpho }, analyses };
pgf_lookup_morpho(self->concr, sent, &callback.fn, err);
if (!gu_ok(err)) {
if (gu_exn_caught(err) == gu_type(PgfExn)) {
GuString msg = (GuString) gu_exn_caught_data(err);
PyErr_SetString(PGFError, msg);
} else {
PyErr_SetString(PGFError, "The lookup failed");
}
Py_DECREF(analyses);
analyses = NULL;
}
gu_pool_free(tmp_pool);
@@ -1654,6 +1685,59 @@ Concr_fullFormLexicon(ConcrObject* self, PyObject *args)
return (PyObject*) pyres;
}
static PyObject*
Concr_load(ConcrObject* self, PyObject *args)
{
const char *fpath;
if (!PyArg_ParseTuple(args, "s", &fpath))
return NULL;
GuPool* tmp_pool = gu_local_pool();
// Create an exception frame that catches all errors.
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
FILE* infile = fopen(fpath, "rb");
if (infile == NULL) {
PyErr_SetFromErrnoWithFilename(PyExc_IOError, fpath);
return NULL;
}
// Create an input stream from the input file
GuIn* in = gu_file_in(infile, tmp_pool);
// Read the PGF grammar.
pgf_concrete_load(self->concr, in, err);
if (!gu_ok(err)) {
if (gu_exn_caught(err) == gu_type(GuErrno)) {
errno = *((GuErrno*) gu_exn_caught_data(err));
PyErr_SetFromErrnoWithFilename(PyExc_IOError, fpath);
} else if (gu_exn_caught(err) == gu_type(PgfExn)) {
GuString msg = (GuString) gu_exn_caught_data(err);
PyErr_SetString(PGFError, msg);
return NULL;
} else {
PyErr_SetString(PGFError, "The language cannot be loaded");
}
return NULL;
}
gu_pool_free(tmp_pool);
Py_RETURN_NONE;
}
static PyObject*
Concr_unload(ConcrObject* self, PyObject *args)
{
if (!PyArg_ParseTuple(args, ""))
return NULL;
pgf_concrete_unload(self->concr);
Py_RETURN_NONE;
}
static PyGetSetDef Concr_getseters[] = {
{"name",
(getter)Concr_getName, NULL,
@@ -1702,6 +1786,12 @@ static PyMethodDef Concr_methods[] = {
{"fullFormLexicon", (PyCFunction)Concr_fullFormLexicon, METH_VARARGS,
"Enumerates all words in the lexicon (useful for extracting full form lexicons)"
},
{"load", (PyCFunction)Concr_load, METH_VARARGS,
"Loads the concrete syntax from a .pgf_c file"
},
{"unload", (PyCFunction)Concr_unload, METH_VARARGS,
"Unloads the concrete syntax"
},
{NULL} /* Sentinel */
};