manually copy the "c-runtime" branch from the old repository.

This commit is contained in:
Krasimir Angelov
2018-11-02 14:38:44 +01:00
parent bf5abe2948
commit 5a2b200948
80 changed files with 2618 additions and 1527 deletions

View File

@@ -87,14 +87,13 @@ libpgf_la_SOURCES = \
pgf/graphviz.c \
pgf/aligner.c \
pgf/pgf.c \
pgf/pgf.h
libpgf_la_LDFLAGS = -no-undefined
pgf/pgf.h \
libpgf_la_LDFLAGS = "-no-undefined"
libpgf_la_LIBADD = libgu.la
libsg_la_SOURCES = \
sg/sqlite3Btree.c \
sg/sg.c
libsg_la_LDFLAGS = -no-undefined
libsg_la_LIBADD = libgu.la libpgf.la
bin_PROGRAMS =

View File

@@ -23,14 +23,6 @@
#define restrict __restrict
#elif defined(__MINGW32__)
#define GU_API_DECL
#define GU_API
#define GU_INTERNAL_DECL
#define GU_INTERNAL
#else
#define GU_API_DECL
@@ -38,9 +30,7 @@
#define GU_INTERNAL_DECL __attribute__ ((visibility ("hidden")))
#define GU_INTERNAL __attribute__ ((visibility ("hidden")))
#endif
// end MSVC workaround
#include <stddef.h>

View File

@@ -30,8 +30,8 @@ pgf_expr_unwrap(PgfExpr expr)
}
}
PGF_API int
pgf_expr_arity(PgfExpr expr)
static PgfExprTag
pgf_expr_arity(PgfExpr expr, int *arity)
{
int n = 0;
while (true) {
@@ -44,10 +44,9 @@ pgf_expr_arity(PgfExpr expr)
n = n + 1;
break;
}
case PGF_EXPR_FUN:
return n;
default:
return -1;
*arity = n;
return i.tag;
}
}
}
@@ -55,8 +54,8 @@ pgf_expr_arity(PgfExpr expr)
PGF_API PgfApplication*
pgf_expr_unapply(PgfExpr expr, GuPool* pool)
{
int arity = pgf_expr_arity(expr);
if (arity < 0) {
int arity;
if (pgf_expr_arity(expr, &arity) != PGF_EXPR_FUN) {
return NULL;
}
PgfApplication* appl = gu_new_flex(pool, PgfApplication, args, arity);
@@ -68,13 +67,38 @@ pgf_expr_unapply(PgfExpr expr, GuPool* pool)
appl->args[n] = app->arg;
expr = app->fun;
}
PgfExpr e = pgf_expr_unwrap(expr);
gu_assert(gu_variant_tag(e) == PGF_EXPR_FUN);
PgfExprFun* fun = gu_variant_data(e);
appl->efun = pgf_expr_unwrap(expr);
gu_assert(gu_variant_tag(appl->efun) == PGF_EXPR_FUN);
PgfExprFun* fun = gu_variant_data(appl->efun);
appl->fun = fun->fun;
return appl;
}
PGF_API PgfApplication*
pgf_expr_unapply_ex(PgfExpr expr, GuPool* pool)
{
int arity;
pgf_expr_arity(expr, &arity);
PgfApplication* appl = gu_new_flex(pool, PgfApplication, args, arity);
appl->n_args = arity;
for (int n = arity - 1; n >= 0; n--) {
PgfExpr e = pgf_expr_unwrap(expr);
gu_assert(gu_variant_tag(e) == PGF_EXPR_APP);
PgfExprApp* app = gu_variant_data(e);
appl->args[n] = app->arg;
expr = app->fun;
}
appl->efun = pgf_expr_unwrap(expr);
if (gu_variant_tag(appl->efun) == PGF_EXPR_FUN) {
PgfExprFun* fun = gu_variant_data(appl->efun);
appl->fun = fun->fun;
} else {
appl->fun = NULL;
}
return appl;
}
PGF_API PgfExpr
pgf_expr_apply(PgfApplication* app, GuPool* pool)
{
@@ -675,6 +699,17 @@ pgf_expr_parser_binds(PgfExprParser* parser)
return binds;
}
PGF_API GuString
pgf_expr_parser_ident(PgfExprParser* parser)
{
GuString ident = NULL;
if (parser->token_tag == PGF_TOKEN_IDENT) {
ident = gu_string_copy(gu_string_buf_data(parser->token_value), parser->expr_pool);
pgf_expr_parser_token(parser, true);
}
return ident;
}
PGF_API PgfExpr
pgf_expr_parser_expr(PgfExprParser* parser, bool mark)
{

View File

@@ -126,12 +126,10 @@ typedef struct {
PgfExpr expr;
} PgfExprProb;
PGF_API_DECL int
pgf_expr_arity(PgfExpr expr);
typedef struct PgfApplication PgfApplication;
struct PgfApplication {
PgfExpr efun;
PgfCId fun;
int n_args;
PgfExpr args[];
@@ -140,6 +138,9 @@ struct PgfApplication {
PGF_API_DECL PgfApplication*
pgf_expr_unapply(PgfExpr expr, GuPool* pool);
PGF_API_DECL PgfApplication*
pgf_expr_unapply_ex(PgfExpr expr, GuPool* pool);
PGF_API_DECL PgfExpr
pgf_expr_apply(PgfApplication*, GuPool* pool);

View File

@@ -175,9 +175,8 @@ redo:;
gu_buf_get(buf, PgfProductionApply*, index);
gu_assert(n_args == gu_seq_length(papply->args));
capp->abs_id = papply->fun->absfun->name;
capp->fun = papply->fun;
capp->fid = 0;
capp->fun = papply->fun;
capp->fid = 0;
capp->n_args = n_args;
for (size_t i = 0; i < n_args; i++) {
@@ -223,10 +222,10 @@ redo:;
static PgfCncTree
pgf_cnc_resolve_def(PgfCnc* cnc,
size_t n_vars, PgfPrintContext* context,
PgfCId abs_id, PgfCCat* ccat, GuString s, GuPool* pool)
PgfCCat* ccat, GuString s, GuPool* pool)
{
PgfCncTree ret = gu_null_variant;
PgfCncTree lit = gu_null_variant;
PgfCncTree ret = gu_null_variant;
PgfCncTreeLit* clit =
gu_new_variant(PGF_CNC_TREE_LIT,
@@ -234,7 +233,7 @@ pgf_cnc_resolve_def(PgfCnc* cnc,
&lit, pool);
clit->n_vars = 0;
clit->context = context;
clit->fid = -1; // don't report the literal in the bracket
clit->fid = cnc->fid++;
PgfLiteralStr* lit_str =
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
@@ -242,7 +241,7 @@ pgf_cnc_resolve_def(PgfCnc* cnc,
&clit->lit, pool);
strcpy((char*) lit_str->val, (char*) s);
if (ccat == NULL || ccat->lindefs == NULL)
if (ccat->lindefs == NULL)
return lit;
int index =
@@ -254,10 +253,9 @@ pgf_cnc_resolve_def(PgfCnc* cnc,
gu_new_flex_variant(PGF_CNC_TREE_APP,
PgfCncTreeApp,
args, 1, &ret, pool);
capp->ccat = ccat;
capp->abs_id= abs_id;
capp->fun = gu_seq_get(ccat->lindefs, PgfCncFun*, index);
capp->fid = cnc->fid++;
capp->ccat = ccat;
capp->fun = gu_seq_get(ccat->lindefs, PgfCncFun*, index);
capp->fid = cnc->fid++;
capp->n_vars = n_vars;
capp->context = context;
capp->n_args = 1;
@@ -297,7 +295,7 @@ pgf_lzr_wrap_linref(PgfCncTree ctree, GuPool* pool)
PgfCncTreeApp* capp = cti.data;
assert(gu_seq_length(capp->ccat->linrefs) > 0);
// here we must apply the linref function
PgfCncTree new_ctree;
PgfCncTreeApp* new_capp =
@@ -305,7 +303,6 @@ pgf_lzr_wrap_linref(PgfCncTree ctree, GuPool* pool)
PgfCncTreeApp,
args, 1, &new_ctree, pool);
new_capp->ccat = NULL;
new_capp->abs_id = NULL;
new_capp->fun = gu_seq_get(capp->ccat->linrefs, PgfCncFun*, 0);
new_capp->fid = -1;
new_capp->n_vars = 0;
@@ -317,7 +314,7 @@ pgf_lzr_wrap_linref(PgfCncTree ctree, GuPool* pool)
break;
}
}
return ctree;
}
@@ -399,17 +396,6 @@ pgf_cnc_resolve(PgfCnc* cnc,
goto done;
}
PgfCId abs_id = "?";
if (emeta->id > 0) {
GuPool* tmp_pool = gu_local_pool();
GuExn* err = gu_new_exn(tmp_pool);
GuStringBuf* sbuf = gu_new_string_buf(tmp_pool);
GuOut* out = gu_string_buf_out(sbuf);
gu_printf(out, err, "?%d", emeta->id);
abs_id = gu_string_buf_freeze(sbuf, pool);
}
int index =
gu_choice_next(cnc->ch, gu_seq_length(ccat->lindefs));
if (index < 0) {
@@ -420,7 +406,6 @@ pgf_cnc_resolve(PgfCnc* cnc,
PgfCncTreeApp,
args, 1, &ret, pool);
capp->ccat = ccat;
capp->abs_id = abs_id;
capp->fun = gu_seq_get(ccat->lindefs, PgfCncFun*, index);
capp->fid = cnc->fid++;
capp->n_vars = 0;
@@ -450,7 +435,23 @@ pgf_cnc_resolve(PgfCnc* cnc,
gu_putc(']', out, err);
GuString s = gu_string_buf_freeze(sbuf, tmp_pool);
ret = pgf_cnc_resolve_def(cnc, n_vars, context, efun->fun, ccat, s, pool);
if (ccat != NULL) {
ret = pgf_cnc_resolve_def(cnc, n_vars, context, ccat, s, pool);
} else {
PgfCncTreeLit* clit =
gu_new_variant(PGF_CNC_TREE_LIT,
PgfCncTreeLit,
&ret, pool);
clit->n_vars = 0;
clit->context = context;
clit->fid = cnc->fid++;
PgfLiteralStr* lit =
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, strlen(s)+1,
&clit->lit, pool);
strcpy(lit->val, s);
}
gu_pool_free(tmp_pool);
goto done;
@@ -498,7 +499,28 @@ redo:;
index--;
}
ret = pgf_cnc_resolve_def(cnc, n_vars, context, ctxt->name, ccat, ctxt->name, pool);
if (ccat != NULL && ccat->lindefs == NULL) {
goto done;
}
if (ccat != NULL) {
ret = pgf_cnc_resolve_def(cnc, n_vars, context, ccat, ctxt->name, pool);
} else {
PgfCncTreeLit* clit =
gu_new_variant(PGF_CNC_TREE_LIT,
PgfCncTreeLit,
&ret, pool);
clit->n_vars = 0;
clit->context = context;
clit->fid = cnc->fid++;
PgfLiteralStr* lit =
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, strlen(ctxt->name)+1,
&clit->lit, pool);
strcpy(lit->val, ctxt->name);
}
goto done;
}
case PGF_EXPR_TYPED: {
@@ -917,9 +939,9 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
if ((*lzr->funcs)->begin_phrase && fapp->ccat != NULL) {
(*lzr->funcs)->begin_phrase(lzr->funcs,
fapp->ccat->cnccat->abscat->name,
fun->absfun->type->cid,
fapp->fid, lin_idx,
fapp->abs_id);
fun->absfun->name);
}
gu_require(lin_idx < fun->n_lins);
@@ -927,9 +949,9 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
if ((*lzr->funcs)->end_phrase && fapp->ccat != NULL) {
(*lzr->funcs)->end_phrase(lzr->funcs,
fapp->ccat->cnccat->abscat->name,
fun->absfun->type->cid,
fapp->fid, lin_idx,
fapp->abs_id);
fun->absfun->name);
}
break;
}
@@ -955,7 +977,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
PgfCId cat =
pgf_literal_cat(lzr->concr, flit->lit)->cnccat->abscat->name;
if ((*lzr->funcs)->begin_phrase && flit->fid >= 0) {
if ((*lzr->funcs)->begin_phrase) {
(*lzr->funcs)->begin_phrase(lzr->funcs,
cat, flit->fid, 0,
"");
@@ -987,7 +1009,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
(*lzr->funcs)->symbol_token(lzr->funcs, tok);
}
if ((*lzr->funcs)->end_phrase && flit->fid >= 0) {
if ((*lzr->funcs)->end_phrase) {
(*lzr->funcs)->end_phrase(lzr->funcs,
cat, flit->fid, 0,
"");

View File

@@ -22,7 +22,6 @@ typedef enum {
typedef struct {
PgfCCat* ccat;
PgfCId abs_id;
PgfCncFun* fun;
int fid;

View File

@@ -9,9 +9,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#if defined(__MINGW32__) || defined(_MSC_VER)
#include <malloc.h>
#endif
//#define PGF_LOOKUP_DEBUG
//#define PGF_LINEARIZER_DEBUG
@@ -119,7 +116,7 @@ typedef struct {
static PgfAbsProduction*
pgf_lookup_new_production(PgfAbsFun* fun, GuPool *pool)
{
size_t n_hypos = fun->type->hypos ? gu_seq_length(fun->type->hypos) : 0;
size_t n_hypos = gu_seq_length(fun->type->hypos);
PgfAbsProduction* prod = gu_new_flex(pool, PgfAbsProduction, args, n_hypos);
prod->fun = fun;
prod->count = 0;
@@ -699,12 +696,8 @@ pgf_lookup_tokenize(GuMap* lexicon_idx, GuString sentence, GuPool* pool)
break;
const uint8_t* start = p-1;
if (strchr(".!?,:",c) != NULL)
while (c != 0 && !gu_ucs_is_space(c)) {
c = gu_utf8_decode(&p);
else {
while (c != 0 && strchr(".!?,:",c) == NULL && !gu_ucs_is_space(c)) {
c = gu_utf8_decode(&p);
}
}
const uint8_t* end = p-1;

View File

@@ -65,7 +65,6 @@ typedef enum { BIND_NONE, BIND_HARD, BIND_SOFT } BIND_TYPE;
typedef struct {
PgfProductionIdx* idx;
size_t offset;
size_t sym_idx;
} PgfLexiconIdxEntry;
typedef GuBuf PgfLexiconIdx;
@@ -1061,13 +1060,13 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
}
static int
pgf_symbols_cmp(GuString* psent, PgfSymbols* syms, size_t* sym_idx, bool case_sensitive)
pgf_symbols_cmp(GuString* psent, PgfSymbols* syms, bool case_sensitive)
{
size_t n_syms = gu_seq_length(syms);
while (*sym_idx < n_syms) {
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, *sym_idx);
for (size_t i = 0; i < n_syms; i++) {
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, i);
if (*sym_idx > 0) {
if (i > 0) {
if (!skip_space(psent)) {
if (**psent == 0)
return -1;
@@ -1111,8 +1110,6 @@ pgf_symbols_cmp(GuString* psent, PgfSymbols* syms, size_t* sym_idx, bool case_se
default:
gu_impossible();
}
(*sym_idx)++;
}
return 0;
@@ -1133,8 +1130,7 @@ pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state,
GuString start = ps->sentence + state->end_offset;
GuString current = start;
size_t sym_idx = 0;
int cmp = pgf_symbols_cmp(&current, seq->syms, &sym_idx, ps->case_sensitive);
int cmp = pgf_symbols_cmp(&current, seq->syms, ps->case_sensitive);
if (cmp < 0) {
j = k-1;
} else if (cmp > 0) {
@@ -1155,9 +1151,8 @@ pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state,
if (seq->idx != NULL) {
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
entry->idx = seq->idx;
entry->offset = (size_t) (current - ps->sentence);
entry->sym_idx = sym_idx;
entry->idx = seq->idx;
entry->offset = (size_t) (current - ps->sentence);
}
if (len+1 <= max)
@@ -1236,7 +1231,6 @@ pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
entry->idx = seq->idx;
entry->offset = state->start_offset;
entry->sym_idx= 0;
}
// Add non-epsilon lexical rules to the bottom up index
@@ -1284,15 +1278,14 @@ pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
static void
pgf_parsing_predict_lexeme(PgfParsing* ps, PgfItemConts* conts,
PgfProductionIdxEntry* entry,
size_t offset, size_t sym_idx)
size_t offset)
{
GuVariantInfo i = { PGF_PRODUCTION_APPLY, entry->papp };
PgfProduction prod = gu_variant_close(i);
PgfItem* item =
pgf_new_item(ps, conts, prod);
PgfSymbols* syms = entry->papp->fun->lins[conts->lin_idx]->syms;
item->sym_idx = sym_idx;
pgf_item_set_curr_symbol(item, ps->pool);
item->sym_idx = gu_seq_length(syms);
prob_t prob = item->inside_prob+item->conts->outside_prob;
PgfParseState* state =
pgf_new_parse_state(ps, offset, BIND_NONE, prob);
@@ -1365,7 +1358,7 @@ pgf_parsing_td_predict(PgfParsing* ps,
PgfProductionIdxEntry, &key);
if (value != NULL) {
pgf_parsing_predict_lexeme(ps, conts, value, lentry->offset, lentry->sym_idx);
pgf_parsing_predict_lexeme(ps, conts, value, lentry->offset);
PgfProductionIdxEntry* start =
gu_buf_data(lentry->idx);
@@ -1376,7 +1369,7 @@ pgf_parsing_td_predict(PgfParsing* ps,
while (left >= start &&
value->ccat->fid == left->ccat->fid &&
value->lin_idx == left->lin_idx) {
pgf_parsing_predict_lexeme(ps, conts, left, lentry->offset, lentry->sym_idx);
pgf_parsing_predict_lexeme(ps, conts, left, lentry->offset);
left--;
}
@@ -1384,7 +1377,7 @@ pgf_parsing_td_predict(PgfParsing* ps,
while (right <= end &&
value->ccat->fid == right->ccat->fid &&
value->lin_idx == right->lin_idx) {
pgf_parsing_predict_lexeme(ps, conts, right, lentry->offset, lentry->sym_idx);
pgf_parsing_predict_lexeme(ps, conts, right, lentry->offset);
right++;
}
}
@@ -1957,6 +1950,8 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
start_ccat->prods = NULL;
start_ccat->n_synprods = 0;
gu_assert(start_ccat->cnccat != NULL);
#ifdef PGF_COUNTS_DEBUG
state->ps->ccat_full_count++;
#endif
@@ -2300,7 +2295,7 @@ pgf_parser_completions_next(GuEnum* self, void* to, GuPool* pool)
}
PGF_API GuEnum*
pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence,
pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence,
GuString prefix, GuExn *err, GuPool* pool)
{
if (concr->sequences == NULL ||
@@ -2379,9 +2374,8 @@ pgf_sequence_cmp_fn(GuOrder* order, const void* p1, const void* p2)
GuString sent = (GuString) p1;
const PgfSequence* sp2 = p2;
size_t sym_idx = 0;
int res = pgf_symbols_cmp(&sent, sp2->syms, &sym_idx, self->case_sensitive);
if (res == 0 && (*sent != 0 || sym_idx != gu_seq_length(sp2->syms))) {
int res = pgf_symbols_cmp(&sent, sp2->syms, self->case_sensitive);
if (res == 0 && *sent != 0) {
res = 1;
}

View File

@@ -46,7 +46,7 @@ pgf_read_in(GuIn* in,
}
PGF_API_DECL void
pgf_write(PgfPGF* pgf, const char* fpath, GuExn* err)
pgf_write(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, const char* fpath, GuExn* err)
{
FILE* outfile = fopen(fpath, "wb");
if (outfile == NULL) {
@@ -60,13 +60,70 @@ pgf_write(PgfPGF* pgf, const char* fpath, GuExn* err)
GuOut* out = gu_file_out(outfile, tmp_pool);
PgfWriter* wtr = pgf_new_writer(out, tmp_pool, err);
pgf_write_pgf(pgf, wtr);
pgf_write_pgf(pgf, n_concrs, concrs, wtr);
gu_pool_free(tmp_pool);
fclose(outfile);
}
PGF_API void
pgf_concrete_save(PgfConcr* concr, const char* fpath, GuExn* err)
{
FILE* outfile = fopen(fpath, "wb");
if (outfile == NULL) {
gu_raise_errno(err);
return;
}
GuPool* tmp_pool = gu_local_pool();
// Create an input stream from the input file
GuOut* out = gu_file_out(outfile, tmp_pool);
PgfWriter* wtr = pgf_new_writer(out, tmp_pool, err);
pgf_write_concrete(concr, wtr, true);
gu_pool_free(tmp_pool);
fclose(outfile);
}
PGF_API bool
pgf_have_same_abstract(PgfPGF *one, PgfPGF *two)
{
if (strcmp(one->abstract.name, two->abstract.name) != 0)
return false;
size_t n_cats = gu_seq_length(one->abstract.cats);
if (n_cats != gu_seq_length(two->abstract.cats))
return false;
size_t n_funs = gu_seq_length(one->abstract.funs);
if (n_funs != gu_seq_length(two->abstract.funs))
return false;
for (size_t i = 0; i < n_cats; i++) {
PgfAbsCat* cat1 = gu_seq_index(one->abstract.cats, PgfAbsCat, i);
PgfAbsCat* cat2 = gu_seq_index(two->abstract.cats, PgfAbsCat, i);
if (strcmp(cat1->name, cat2->name) != 0)
return false;
}
for (size_t i = 0; i < n_funs; i++) {
PgfAbsFun* fun1 = gu_seq_index(one->abstract.funs, PgfAbsFun, i);
PgfAbsFun* fun2 = gu_seq_index(two->abstract.funs, PgfAbsFun, i);
if (strcmp(fun1->name, fun2->name) != 0)
return false;
if (!pgf_type_eq(fun1->type, fun2->type))
return false;
}
return true;
}
PGF_API GuString
pgf_abstract_name(PgfPGF* pgf)
{

View File

@@ -19,14 +19,6 @@
#define PGF_INTERNAL_DECL
#define PGF_INTERNAL
#elif defined(__MINGW32__)
#define PGF_API_DECL
#define PGF_API
#define PGF_INTERNAL_DECL
#define PGF_INTERNAL
#else
#define PGF_API_DECL
@@ -66,7 +58,10 @@ PGF_API_DECL void
pgf_concrete_unload(PgfConcr* concr);
PGF_API_DECL void
pgf_write(PgfPGF* pgf, const char* fpath, GuExn* err);
pgf_write(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, const char* fpath, GuExn* err);
PGF_API_DECL bool
pgf_have_same_abstract(PgfPGF *one, PgfPGF *two);
PGF_API_DECL GuString
pgf_abstract_name(PgfPGF*);
@@ -249,7 +244,8 @@ pgf_callbacks_map_add_literal(PgfConcr* concr, PgfCallbacksMap* callbacks,
PgfCId cat, PgfLiteralCallback* callback);
PGF_API_DECL void
pgf_print(PgfPGF* pgf, GuOut* out, GuExn* err);
pgf_print(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs,
GuOut* out, GuExn* err);
PGF_API_DECL void
pgf_check_expr(PgfPGF* gr, PgfExpr* pe, PgfType* ty,

View File

@@ -7,13 +7,17 @@ typedef struct {
} PgfPrintFn;
static void
pgf_print_flags(PgfFlags* flags, GuOut *out, GuExn* err)
pgf_print_flags(PgfFlags* flags, int indent, GuOut *out, GuExn* err)
{
size_t n_flags = gu_seq_length(flags);
for (size_t i = 0; i < n_flags; i++) {
PgfFlag* flag = gu_seq_index(flags, PgfFlag, i);
gu_puts(" flag ", out, err);
for (int k = 0; k < indent; k++) {
gu_putc(' ', out, err);
}
gu_puts("flag ", out, err);
pgf_print_cid(flag->name, out, err);
gu_puts(" = ", out, err);
pgf_print_literal(flag->value, out, err);
@@ -70,7 +74,7 @@ pgf_print_abstract(PgfAbstr* abstr, GuOut* out, GuExn* err)
pgf_print_cid(abstr->name, out, err);
gu_puts(" {\n", out, err);
pgf_print_flags(abstr->aflags, out, err);
pgf_print_flags(abstr->aflags, 2, out, err);
pgf_print_abscats(abstr->cats, out, err);
pgf_print_absfuns(abstr->funs, out, err);
@@ -358,7 +362,7 @@ pgf_print_concrete(PgfConcr* concr, GuOut* out, GuExn* err)
pgf_print_cid(concr->name, out, err);
gu_puts(" {\n", out, err);
pgf_print_flags(concr->cflags, out, err);
pgf_print_flags(concr->cflags, 2, out, err);
gu_puts(" productions\n", out, err);
PgfPrintFn clo2 = { { pgf_print_productions }, out };
@@ -396,13 +400,12 @@ pgf_print_concrete(PgfConcr* concr, GuOut* out, GuExn* err)
}
PGF_API void
pgf_print(PgfPGF* pgf, GuOut* out, GuExn* err)
pgf_print(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, GuOut* out, GuExn* err)
{
pgf_print_flags(pgf->gflags, 0, out, err);
pgf_print_abstract(&pgf->abstract, out, err);
size_t n_concrs = gu_seq_length(pgf->concretes);
for (size_t i = 0; i < n_concrs; i++) {
PgfConcr* concr = gu_seq_index(pgf->concretes, PgfConcr, i);
pgf_print_concrete(concr, out, err);
pgf_print_concrete(concrs[i], out, err);
}
}

View File

@@ -937,7 +937,7 @@ pgf_read_pargs(PgfReader* rdr, PgfConcr* concr)
}
PGF_API bool
pgf_production_is_lexical(PgfProductionApply *papp,
pgf_production_is_lexical(PgfProductionApply *papp,
GuBuf* non_lexical_buf, GuPool* pool)
{
if (gu_seq_length(papp->args) > 0)
@@ -1168,6 +1168,14 @@ pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
// pgf_ccat_set_viterbi_prob(ccat);
}
// The GF compiler needs to call this function when building in memory grammars.
PGF_API void
pgf_concrete_fix_internals(PgfConcr* concr)
{
GuMapItor clo1 = { pgf_read_ccat_cb };
gu_map_iter(concr->ccats, &clo1, NULL);
}
static void
pgf_read_concrete_content(PgfReader* rdr, PgfConcr* concr)
{
@@ -1193,8 +1201,7 @@ pgf_read_concrete_content(PgfReader* rdr, PgfConcr* concr)
concr->cnccats = pgf_read_cnccats(rdr, concr->abstr, concr);
concr->total_cats = pgf_read_int(rdr);
GuMapItor clo1 = { pgf_read_ccat_cb };
gu_map_iter(concr->ccats, &clo1, NULL);
pgf_concrete_fix_internals(concr);
}
static void

View File

@@ -72,10 +72,15 @@ pgf_write_cid(PgfCId id, PgfWriter* wtr)
PGF_INTERNAL void
pgf_write_string(GuString val, PgfWriter* wtr)
{
size_t len = strlen(val);
size_t len = 0;
const uint8_t* buf = (const uint8_t*) val;
const uint8_t* p = buf;
while (gu_utf8_decode(&p) != 0)
len++;
pgf_write_len(len, wtr);
gu_return_on_exn(wtr->err, );
gu_out_bytes(wtr->out, (uint8_t*) val, len, wtr->err);
gu_out_bytes(wtr->out, (uint8_t*) val, (p-buf)-1, wtr->err);
}
PGF_INTERNAL void
@@ -843,7 +848,7 @@ pgf_write_concrete_content(PgfConcr* concr, PgfWriter* wtr)
pgf_write_int(concr->total_cats, wtr);
}
static void
PGF_INTERNAL void
pgf_write_concrete(PgfConcr* concr, PgfWriter* wtr, bool with_content)
{
if (with_content &&
@@ -865,34 +870,20 @@ pgf_write_concrete(PgfConcr* concr, PgfWriter* wtr, bool with_content)
gu_return_on_exn(wtr->err, );
}
PGF_API void
pgf_concrete_save(PgfConcr* concr, GuOut* out, GuExn* err)
{
GuPool* pool = gu_new_pool();
PgfWriter* wtr = pgf_new_writer(out, pool, err);
pgf_write_concrete(concr, wtr, true);
gu_pool_free(pool);
}
static void
pgf_write_concretes(PgfConcrs* concretes, PgfWriter* wtr, bool with_content)
pgf_write_concretes(size_t n_concrs, PgfConcr** concrs, PgfWriter* wtr, bool with_content)
{
size_t n_concrs = gu_seq_length(concretes);
pgf_write_len(n_concrs, wtr);
gu_return_on_exn(wtr->err, );
for (size_t i = 0; i < n_concrs; i++) {
PgfConcr* concr = gu_seq_index(concretes, PgfConcr, i);
pgf_write_concrete(concr, wtr, with_content);
pgf_write_concrete(concrs[i], wtr, with_content);
gu_return_on_exn(wtr->err, );
}
}
PGF_INTERNAL void
pgf_write_pgf(PgfPGF* pgf, PgfWriter* wtr) {
pgf_write_pgf(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, PgfWriter* wtr) {
gu_out_u16be(wtr->out, pgf->major_version, wtr->err);
gu_return_on_exn(wtr->err, );
@@ -907,7 +898,7 @@ pgf_write_pgf(PgfPGF* pgf, PgfWriter* wtr) {
bool with_content =
(gu_seq_binsearch(pgf->gflags, pgf_flag_order, PgfFlag, "split") == NULL);
pgf_write_concretes(pgf->concretes, wtr, with_content);
pgf_write_concretes(n_concrs, concrs, wtr, with_content);
gu_return_on_exn(wtr->err, );
}

View File

@@ -33,7 +33,10 @@ pgf_write_len(size_t len, PgfWriter* wtr);
PGF_INTERNAL_DECL void
pgf_write_cid(PgfCId id, PgfWriter* wtr);
PGF_INTERNAL void
pgf_write_concrete(PgfConcr* concr, PgfWriter* wtr, bool with_content);
PGF_INTERNAL_DECL void
pgf_write_pgf(PgfPGF* pgf, PgfWriter* wtr);
pgf_write_pgf(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, PgfWriter* wtr);
#endif // WRITER_H_

View File

@@ -4918,7 +4918,6 @@ SQLITE_PRIVATE int sqlite3PendingByte;
# define SQLITE_UTF16NATIVE SQLITE_UTF16BE
#endif
#if !defined(SQLITE_BYTEORDER)
const int sqlite3one = 1;
# define SQLITE_BYTEORDER 0 /* 0 means "unknown at compile-time" */
# define SQLITE_BIGENDIAN (*(char *)(&sqlite3one)==0)
# define SQLITE_LITTLEENDIAN (*(char *)(&sqlite3one)==1)
@@ -5041,30 +5040,6 @@ SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip(int, const void *, UnpackedR
*/
/* #include "sqliteInt.h" */
/* An array to map all upper-case characters into their corresponding
** lower-case character.
**
** SQLite only considers US-ASCII (or EBCDIC) characters. We do not
** handle case conversions for the UTF character set since the tables
** involved are nearly as big or bigger than SQLite itself.
*/
const unsigned char sqlite3UpperToLower[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107,
108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,
162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,
180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,
198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,
216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,
234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,
252,253,254,255
};
/* EVIDENCE-OF: R-02982-34736 In order to maintain full backwards
** compatibility for legacy applications, the URI filename capability is
** disabled by default.
@@ -9088,22 +9063,6 @@ SQLITE_PRIVATE int sqlite3Strlen30(const char *z){
return 0x3fffffff & (int)strlen(z);
}
/* Convenient short-hand */
#define UpperToLower sqlite3UpperToLower
int sqlite3StrICmp(const char *zLeft, const char *zRight){
unsigned char *a, *b;
int c;
a = (unsigned char *)zLeft;
b = (unsigned char *)zRight;
for(;;){
c = (int)UpperToLower[*a] - (int)UpperToLower[*b];
if( c || *a==0 ) break;
a++;
b++;
}
return c;
}
/*
** The string z[] is an text representation of a real number.
** Convert this string to a double and write it into *pResult.
@@ -17871,6 +17830,13 @@ struct winFile {
#define WINFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */
#define WINFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */
/*
* The size of the buffer used by sqlite3_win32_write_debug().
*/
#ifndef SQLITE_WIN32_DBG_BUF_SIZE
# define SQLITE_WIN32_DBG_BUF_SIZE ((int)(4096-sizeof(DWORD)))
#endif
/*
* The value used with sqlite3_win32_set_directory() to specify that
* the temporary directory should be changed.
@@ -18819,6 +18785,43 @@ SQLITE_PRIVATE int sqlite3_win32_reset_heap(){
}
#endif /* SQLITE_WIN32_MALLOC */
/*
** This function outputs the specified (ANSI) string to the Win32 debugger
** (if available).
*/
SQLITE_PRIVATE void sqlite3_win32_write_debug(const char *zBuf, int nBuf){
char zDbgBuf[SQLITE_WIN32_DBG_BUF_SIZE];
int nMin = MIN(nBuf, (SQLITE_WIN32_DBG_BUF_SIZE - 1)); /* may be negative. */
if( nMin<-1 ) nMin = -1; /* all negative values become -1. */
assert( nMin==-1 || nMin==0 || nMin<SQLITE_WIN32_DBG_BUF_SIZE );
#if defined(SQLITE_WIN32_HAS_ANSI)
if( nMin>0 ){
memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE);
memcpy(zDbgBuf, zBuf, nMin);
osOutputDebugStringA(zDbgBuf);
}else{
osOutputDebugStringA(zBuf);
}
#elif defined(SQLITE_WIN32_HAS_WIDE)
memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE);
if ( osMultiByteToWideChar(
osAreFileApisANSI() ? CP_ACP : CP_OEMCP, 0, zBuf,
nMin, (LPWSTR)zDbgBuf, SQLITE_WIN32_DBG_BUF_SIZE/sizeof(WCHAR))<=0 ){
return;
}
osOutputDebugStringW((LPCWSTR)zDbgBuf);
#else
if( nMin>0 ){
memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE);
memcpy(zDbgBuf, zBuf, nMin);
fprintf(stderr, "%s", zDbgBuf);
}else{
fprintf(stderr, "%s", zBuf);
}
#endif
}
/*
** The following routine suspends the current thread for at least ms
** milliseconds. This is equivalent to the Win32 Sleep() interface.
@@ -19260,6 +19263,40 @@ SQLITE_PRIVATE char *sqlite3_win32_utf8_to_mbcs(const char *zFilename){
return zFilenameMbcs;
}
/*
** This function sets the data directory or the temporary directory based on
** the provided arguments. The type argument must be 1 in order to set the
** data directory or 2 in order to set the temporary directory. The zValue
** argument is the name of the directory to use. The return value will be
** SQLITE_OK if successful.
*/
SQLITE_PRIVATE int sqlite3_win32_set_directory(DWORD type, LPCWSTR zValue){
char **ppDirectory = 0;
#ifndef SQLITE_OMIT_AUTOINIT
int rc = sqlite3BtreeInitialize();
if( rc ) return rc;
#endif
if( type==SQLITE_WIN32_TEMP_DIRECTORY_TYPE ){
ppDirectory = &sqlite3_temp_directory;
}
assert( !ppDirectory || type==SQLITE_WIN32_TEMP_DIRECTORY_TYPE
);
assert( !ppDirectory || sqlite3MemdebugHasType(*ppDirectory, MEMTYPE_HEAP) );
if( ppDirectory ){
char *zValueUtf8 = 0;
if( zValue && zValue[0] ){
zValueUtf8 = winUnicodeToUtf8(zValue);
if ( zValueUtf8==0 ){
return SQLITE_NOMEM;
}
}
sqlite3_free(*ppDirectory);
*ppDirectory = zValueUtf8;
return SQLITE_OK;
}
return SQLITE_ERROR;
}
/*
** The return value of winGetLastErrorMsg
** is zero if the error message fits in the buffer, or non-zero
@@ -22331,6 +22368,9 @@ static int winOpen(
if( isReadonly ){
pFile->ctrlFlags |= WINFILE_RDONLY;
}
if( sqlite3_uri_boolean(zName, "psow", SQLITE_POWERSAFE_OVERWRITE) ){
pFile->ctrlFlags |= WINFILE_PSOW;
}
pFile->lastErrno = NO_ERROR;
pFile->zPath = zName;
#if SQLITE_MAX_MMAP_SIZE>0
@@ -22549,6 +22589,43 @@ static BOOL winIsDriveLetterAndColon(
return ( sqlite3Isalpha(zPathname[0]) && zPathname[1]==':' );
}
/*
** Returns non-zero if the specified path name should be used verbatim. If
** non-zero is returned from this function, the calling function must simply
** use the provided path name verbatim -OR- resolve it into a full path name
** using the GetFullPathName Win32 API function (if available).
*/
static BOOL winIsVerbatimPathname(
const char *zPathname
){
/*
** If the path name starts with a forward slash or a backslash, it is either
** a legal UNC name, a volume relative path, or an absolute path name in the
** "Unix" format on Windows. There is no easy way to differentiate between
** the final two cases; therefore, we return the safer return value of TRUE
** so that callers of this function will simply use it verbatim.
*/
if ( winIsDirSep(zPathname[0]) ){
return TRUE;
}
/*
** If the path name starts with a letter and a colon it is either a volume
** relative path or an absolute path. Callers of this function must not
** attempt to treat it as a relative path name (i.e. they should simply use
** it verbatim).
*/
if ( winIsDriveLetterAndColon(zPathname) ){
return TRUE;
}
/*
** If we get to this point, the path name should almost certainly be a purely
** relative one (i.e. not a UNC name, not absolute, and not volume relative).
*/
return FALSE;
}
/*
** Turn a relative pathname into a full pathname. Write the full
** pathname into zOut[]. zOut[] will be at least pVfs->mxPathname

View File

@@ -1,3 +1,274 @@
module PGF(module PGF2) where
module PGF (PGF, readPGF, showPGF,
abstractName,
import PGF2
CId, mkCId, wildCId, showCId, readCId,
categories, categoryContext, categoryProbability,
functions, functionsByCat, functionType, functionIsDataCon, browse,
PGF2.Expr,Tree,showExpr,PGF2.readExpr,pExpr,pIdent,
mkAbs,unAbs,
mkApp,unApp,unapply,
PGF2.mkStr,PGF2.unStr,
PGF2.mkInt,PGF2.unInt,
PGF2.mkFloat,PGF2.unFloat,
PGF2.mkMeta,PGF2.unMeta,
PGF2.exprSize, exprFunctions,PGF2.exprSubstitute,
compute,
rankTreesByProbs,treeProbability,
TcError, ppTcError, inferExpr, checkType,
PGF2.Type, PGF2.Hypo, showType, showContext, PGF2.readType,
mkType, unType,
Token,
Language, readLanguage, showLanguage,
languages, startCat, languageCode,
linearize, bracketedLinearize, tabularLinearizes, showBracketedString,
ParseOutput(..), parse, parse_, complete,
PGF2.BracketedString(..), PGF2.flattenBracketedString,
hasLinearization,
showPrintName,
Morpho, buildMorpho,
lookupMorpho, isInMorpho, morphoMissing, morphoKnown, fullFormLexicon,
Labels, getDepLabels, CncLabels, getCncDepLabels,
generateAllDepth, generateRandom, generateRandomFrom, generateRandomDepth, generateRandomFromDepth,
generateFromDepth,
PGF2.GraphvizOptions(..),
graphvizAbstractTree, graphvizParseTree, graphvizAlignment, graphvizDependencyTree, graphvizParseTreeDep,
-- * Tries
ATree(..),Trie(..),toATree,toTrie,
readProbabilitiesFromFile,
groupResults, conlls2latexDoc, gizaAlignment
) where
import PGF.Internal
import qualified PGF2
import qualified Data.Map as Map
import qualified Text.ParserCombinators.ReadP as RP
import Data.List(sortBy)
import Text.PrettyPrint(text)
import Data.Char(isDigit)
readPGF = PGF2.readPGF
showPGF gr = PGF2.showPGF gr
readLanguage = readCId
showLanguage (CId s) = s
startCat = PGF2.startCat
languageCode pgf lang = Just (PGF2.languageCode (lookConcr pgf lang))
abstractName gr = CId (PGF2.abstractName gr)
categories gr = map CId (PGF2.categories gr)
categoryContext gr (CId c) = PGF2.categoryContext gr c
categoryProbability gr (CId c) = PGF2.categoryProbability gr c
functions gr = map CId (PGF2.functions gr)
functionsByCat gr (CId c) = map CId (PGF2.functionsByCat gr c)
functionType gr (CId f) = PGF2.functionType gr f
functionIsDataCon gr (CId f) = PGF2.functionIsDataCon gr f
type Tree = PGF2.Expr
type Labels = Map.Map CId [String]
type CncLabels = [(String, String -> Maybe (String -> String,String,String))]
mkCId x = CId x
wildCId = CId "_"
showCId (CId x) = x
readCId s = Just (CId s)
showExpr xs e = PGF2.showExpr [x | CId x <- xs] e
pExpr = RP.readS_to_P PGF2.pExpr
pIdent = RP.readS_to_P PGF2.pIdent
mkAbs bind_type (CId var) e = PGF2.mkAbs bind_type var e
unAbs e = case PGF2.unAbs e of
Just (bind_type, var, e) -> Just (bind_type, CId var, e)
Nothing -> Nothing
mkApp (CId f) es = PGF2.mkApp f es
unApp e = case PGF2.unApp e of
Just (f,es) -> Just (CId f,es)
Nothing -> Nothing
unapply = PGF2.unapply
instance Read PGF2.Expr where
readsPrec _ s = case PGF2.readExpr s of
Just e -> [(e,"")]
Nothing -> []
showType xs ty = PGF2.showType [x | CId x <- xs] ty
showContext xs hypos = PGF2.showContext [x | CId x <- xs] hypos
mkType hypos (CId var) es = PGF2.mkType [(bt,var,ty) | (bt,CId var,ty) <- hypos] var es
unType ty = case PGF2.unType ty of
(hypos,var,es) -> ([(bt,CId var,ty) | (bt,var,ty) <- hypos],CId var,es)
linearize pgf lang e = PGF2.linearize (lookConcr pgf lang) e
bracketedLinearize pgf lang e = PGF2.bracketedLinearize (lookConcr pgf lang) e
tabularLinearizes pgf lang e = [PGF2.tabularLinearize (lookConcr pgf lang) e]
showBracketedString = PGF2.showBracketedString
type TcError = String
-- | This data type encodes the different outcomes which you could get from the parser.
data ParseOutput
= ParseFailed Int -- ^ The integer is the position in number of tokens where the parser failed.
| TypeError [(FId,TcError)] -- ^ The parsing was successful but none of the trees is type correct.
-- The forest id ('FId') points to the bracketed string from the parser
-- where the type checking failed. More than one error is returned
-- if there are many analizes for some phrase but they all are not type correct.
| ParseOk [Tree] -- ^ If the parsing and the type checking are successful we get a list of abstract syntax trees.
-- The list should be non-empty.
| ParseIncomplete -- ^ The sentence is not complete. Only partial output is produced
parse pgf lang cat s =
case PGF2.parse (lookConcr pgf lang) cat s of
PGF2.ParseOk ts -> map fst ts
_ -> []
parse_ pgf lang cat dp s =
case PGF2.parse (lookConcr pgf lang) cat s of
PGF2.ParseFailed pos _ -> (ParseFailed pos, PGF2.Leaf s)
PGF2.ParseOk ts -> (ParseOk (map fst ts), PGF2.Leaf s)
PGF2.ParseIncomplete -> (ParseIncomplete, PGF2.Leaf s)
complete pgf lang cat s prefix =
let compls = Map.fromListWith (++) [(tok,[CId fun]) | (tok,_,fun,_) <- PGF2.complete (lookConcr pgf lang) cat s prefix]
in (PGF2.Leaf [],s,compls)
hasLinearization pgf lang (CId f) = PGF2.hasLinearization (lookConcr pgf lang) f
ppTcError s = s
inferExpr gr e =
case PGF2.inferExpr gr e of
Right res -> Right res
Left msg -> Left (text msg)
checkType gr ty =
case PGF2.checkType gr ty of
Right res -> Right res
Left msg -> Left (text msg)
showPrintName pgf lang (CId f) =
case PGF2.printName (lookConcr pgf lang) f of
Just n -> n
Nothing -> f
getDepLabels :: String -> Labels
getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map words (lines s)]
getCncDepLabels :: String -> CncLabels
getCncDepLabels = PGF2.getCncDepLabels
generateAllDepth gr ty _ = map fst (PGF2.generateAll gr ty)
generateFromDepth = error "generateFromDepth is not implemented"
generateRandom = error "generateRandom is not implemented"
generateRandomFrom = error "generateRandomFrom is not implemented"
generateRandomDepth = error "generateRandomDepth is not implemented"
generateRandomFromDepth = error "generateRandomFromDepth is not implemented"
exprFunctions e = [CId f | f <- PGF2.exprFunctions e]
compute = error "compute is not implemented"
-- | rank from highest to lowest probability
rankTreesByProbs :: PGF -> [PGF2.Expr] -> [(PGF2.Expr,Double)]
rankTreesByProbs pgf ts = sortBy (\ (_,p) (_,q) -> compare q p)
[(t, realToFrac (PGF2.treeProbability pgf t)) | t <- ts]
treeProbability = PGF2.treeProbability
languages pgf = fmap CId (Map.keys (PGF2.languages pgf))
type Morpho = PGF2.Concr
buildMorpho pgf lang = lookConcr pgf lang
lookupMorpho cnc w = [(CId lemma,an) | (lemma,an,_) <- PGF2.lookupMorpho cnc w]
isInMorpho cnc w = not (null (PGF2.lookupMorpho cnc w))
fullFormLexicon cnc = [(w, [(CId fun,an) | (fun,an,_) <- analyses]) | (w, analyses) <- PGF2.fullFormLexicon cnc]
graphvizAbstractTree pgf (funs,cats) = PGF2.graphvizAbstractTree pgf PGF2.graphvizDefaults{PGF2.noFun=not funs,PGF2.noCat=not cats}
graphvizParseTree pgf lang = PGF2.graphvizParseTree (lookConcr pgf lang)
graphvizAlignment pgf langs = PGF2.graphvizWordAlignment (map (lookConcr pgf) langs) PGF2.graphvizDefaults
graphvizDependencyTree format debug lbls cnclbls pgf lang e =
let to_lbls' lbls = Map.fromList [(id,xs) | (CId id,xs) <- Map.toList lbls]
in PGF2.graphvizDependencyTree format debug (fmap to_lbls' lbls) cnclbls (lookConcr pgf lang) e
graphvizParseTreeDep = error "graphvizParseTreeDep is not implemented"
browse :: PGF -> CId -> Maybe (String,[CId],[CId])
browse = error "browse is not implemented"
-- | A type for plain applicative trees
data ATree t = Other t | App CId [ATree t] deriving Show
data Trie = Oth Tree | Ap CId [[Trie ]] deriving Show
-- ^ A type for tries of plain applicative trees
-- | Convert a 'Tree' to an 'ATree'
toATree :: Tree -> ATree Tree
toATree e = maybe (Other e) app (PGF2.unApp e)
where
app (f,es) = App (mkCId f) (map toATree es)
-- | Combine a list of trees into a trie
toTrie = combines . map ((:[]) . singleton)
where
singleton t = case t of
Other e -> Oth e
App f ts -> Ap f [map singleton ts]
combines [] = []
combines (ts:tss) = ts1:combines tss2
where
(ts1,tss2) = combines2 [] tss ts
combines2 ots [] ts1 = (ts1,reverse ots)
combines2 ots (ts2:tss) ts1 =
maybe (combines2 (ts2:ots) tss ts1) (combines2 ots tss) (combine ts1 ts2)
combine ts us = mapM combine2 (zip ts us)
where
combine2 (Ap f ts,Ap g us) | f==g = Just (Ap f (combines (ts++us)))
combine2 _ = Nothing
readProbabilitiesFromFile :: FilePath -> IO (Map.Map CId Double)
readProbabilitiesFromFile fpath = do
s <- readFile fpath
return $ Map.fromList [(mkCId f,read p) | f:p:_ <- map words (lines s)]
groupResults :: [[(Language,String)]] -> [(Language,[String])]
groupResults = Map.toList . foldr more Map.empty . start . concat
where
start ls = [(l,[s]) | (l,s) <- ls]
more (l,s) =
Map.insertWith (\ [x] xs -> if elem x xs then xs else (x : xs)) l s
conlls2latexDoc = error "conlls2latexDoc is not implemented"
morphoMissing :: Morpho -> [String] -> [String]
morphoMissing = morphoClassify False
morphoKnown :: Morpho -> [String] -> [String]
morphoKnown = morphoClassify True
morphoClassify :: Bool -> Morpho -> [String] -> [String]
morphoClassify k mo ws = [w | w <- ws, k /= null (lookupMorpho mo w), notLiteral w] where
notLiteral w = not (all isDigit w) ---- should be defined somewhere
gizaAlignment = error "gizaAlignment is not implemented"

View File

@@ -1 +1,163 @@
module PGF.Internal where
{-# LANGUAGE ImplicitParams #-}
module PGF.Internal(CId(..),Language,PGF2.PGF,
PGF2.Concr,lookConcr,
PGF2.FId,isPredefFId,
PGF2.FunId,PGF2.SeqId,PGF2.LIndex,PGF2.Token,
PGF2.Production(..),PGF2.PArg(..),PGF2.Symbol(..),PGF2.Literal(..),PGF2.BindType(..),Sequence,
globalFlags, abstrFlags, concrFlags,
concrTotalCats, concrCategories, concrProductions,
concrTotalFuns, concrFunction,
concrTotalSeqs, concrSequence,
PGF2.CodeLabel, PGF2.Instr(..), PGF2.IVal(..), PGF2.TailInfo(..),
PGF2.Builder, PGF2.B, PGF2.build,
eAbs, eApp, eMeta, eFun, eVar, eLit, eTyped, eImplArg, dTyp, hypo,
PGF2.AbstrInfo, newAbstr, PGF2.ConcrInfo, newConcr, newPGF,
-- * Write an in-memory PGF to a file
writePGF, writeConcr,
PGF2.fidString, PGF2.fidInt, PGF2.fidFloat, PGF2.fidVar, PGF2.fidStart,
ppFunId, ppSeqId, ppFId, ppMeta, ppLit, ppSeq,
unionPGF
) where
import qualified PGF2
import qualified PGF2.Internal as PGF2
import qualified Data.Map as Map
import PGF2.FFI(PGF(..))
import Data.Array.IArray
import Data.Array.Unboxed
import Text.PrettyPrint
newtype CId = CId String deriving (Show,Read,Eq,Ord)
type Language = CId
lookConcr (PGF _ langs _) (CId lang) =
case Map.lookup lang langs of
Just cnc -> cnc
Nothing -> error "Unknown language"
globalFlags pgf = Map.fromAscList [(CId name,value) | (name,value) <- PGF2.globalFlags pgf]
abstrFlags pgf = Map.fromAscList [(CId name,value) | (name,value) <- PGF2.abstrFlags pgf]
concrFlags concr = Map.fromAscList [(CId name,value) | (name,value) <- PGF2.concrFlags concr]
concrTotalCats = PGF2.concrTotalCats
concrCategories :: PGF2.Concr -> [(CId,PGF2.FId,PGF2.FId,[String])]
concrCategories c = [(CId cat,start,end,lbls) | (cat,start,end,lbls) <- PGF2.concrCategories c]
concrProductions :: PGF2.Concr -> PGF2.FId -> [PGF2.Production]
concrProductions = PGF2.concrProductions
concrTotalFuns = PGF2.concrTotalFuns
concrFunction :: PGF2.Concr -> PGF2.FunId -> (CId,[PGF2.SeqId])
concrFunction c funid =
let (fun,seqids) = PGF2.concrFunction c funid
in (CId fun,seqids)
concrTotalSeqs :: PGF2.Concr -> PGF2.SeqId
concrTotalSeqs = PGF2.concrTotalSeqs
concrSequence = PGF2.concrSequence
isPredefFId = PGF2.isPredefFId
type Sequence = [PGF2.Symbol]
eAbs :: (?builder :: PGF2.Builder s) => PGF2.BindType -> CId -> PGF2.B s PGF2.Expr -> PGF2.B s PGF2.Expr
eAbs bind_type (CId var) body = PGF2.eAbs bind_type var body
eApp :: (?builder :: PGF2.Builder s) => PGF2.B s PGF2.Expr -> PGF2.B s PGF2.Expr -> PGF2.B s PGF2.Expr
eApp = PGF2.eApp
eMeta :: (?builder :: PGF2.Builder s) => Int -> PGF2.B s PGF2.Expr
eMeta = PGF2.eMeta
eFun (CId fun) = PGF2.eFun fun
eVar :: (?builder :: PGF2.Builder s) => Int -> PGF2.B s PGF2.Expr
eVar = PGF2.eVar
eLit :: (?builder :: PGF2.Builder s) => PGF2.Literal -> PGF2.B s PGF2.Expr
eLit = PGF2.eLit
eTyped :: (?builder :: PGF2.Builder s) => PGF2.B s PGF2.Expr -> PGF2.B s PGF2.Type -> PGF2.B s PGF2.Expr
eTyped = PGF2.eTyped
eImplArg :: (?builder :: PGF2.Builder s) => PGF2.B s PGF2.Expr -> PGF2.B s PGF2.Expr
eImplArg = PGF2.eImplArg
dTyp :: (?builder :: PGF2.Builder s) => [PGF2.B s (PGF2.BindType,String,PGF2.Type)] -> CId -> [PGF2.B s PGF2.Expr] -> PGF2.B s PGF2.Type
dTyp hypos (CId cat) es = PGF2.dTyp hypos cat es
hypo bind_type (CId var) ty = PGF2.hypo bind_type var ty
newAbstr flags cats funs = PGF2.newAbstr [(flag,lit) | (CId flag,lit) <- flags]
[(cat,hypos,prob) | (CId cat,hypos,prob) <- cats]
[(fun,ty,arity,prob) | (CId fun,ty,arity,prob) <- funs]
newConcr abs flags printnames lindefs linrefs prods cncfuns seqs cnccats total_ccats =
PGF2.newConcr abs [(flag,lit) | (CId flag,lit) <- flags]
[(id,name) | (CId id,name) <- printnames]
lindefs linrefs
prods
[(fun,seq_ids) | (CId fun,seq_ids) <- cncfuns]
seqs
[(cat,start,end,labels) | (CId cat,start,end,labels) <- cnccats]
total_ccats
newPGF flags (CId name) abstr concrs =
PGF2.newPGF [(flag,lit) | (CId flag,lit) <- flags]
name
abstr
[(name,concr) | (CId name,concr) <- concrs]
writePGF = PGF2.writePGF
writeConcr fpath pgf lang = PGF2.writeConcr fpath (lookConcr pgf lang)
ppFunId funid = char 'F' <> int funid
ppSeqId seqid = char 'S' <> int seqid
ppFId fid
| fid == PGF2.fidString = text "CString"
| fid == PGF2.fidInt = text "CInt"
| fid == PGF2.fidFloat = text "CFloat"
| fid == PGF2.fidVar = text "CVar"
| fid == PGF2.fidStart = text "CStart"
| otherwise = char 'C' <> int fid
ppMeta :: Int -> Doc
ppMeta n
| n == 0 = char '?'
| otherwise = char '?' <> int n
ppLit (PGF2.LStr s) = text (show s)
ppLit (PGF2.LInt n) = int n
ppLit (PGF2.LFlt d) = double d
ppSeq (seqid,seq) =
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol seq)
ppSymbol (PGF2.SymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (PGF2.SymLit d r) = char '{' <> int d <> comma <> int r <> char '}'
ppSymbol (PGF2.SymVar d r) = char '<' <> int d <> comma <> char '$' <> int r <> char '>'
ppSymbol (PGF2.SymKS t) = doubleQuotes (text t)
ppSymbol PGF2.SymNE = text "nonExist"
ppSymbol PGF2.SymBIND = text "BIND"
ppSymbol PGF2.SymSOFT_BIND = text "SOFT_BIND"
ppSymbol PGF2.SymSOFT_SPACE= text "SOFT_SPACE"
ppSymbol PGF2.SymCAPIT = text "CAPIT"
ppSymbol PGF2.SymALL_CAPIT = text "ALL_CAPIT"
ppSymbol (PGF2.SymKP syms alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppSymbol syms) : map ppAlt alts)))
ppAlt (syms,ps) = hsep (map ppSymbol syms) <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
unionPGF = PGF2.unionPGF

View File

@@ -21,25 +21,21 @@
module PGF2 (-- * PGF
PGF,readPGF,showPGF,
-- * Identifiers
CId,
-- * Abstract syntax
AbsName,abstractName,
-- ** Categories
Cat,categories,categoryContext,
Cat,categories,categoryContext,categoryProbability,
-- ** Functions
Fun, functions, functionsByCat,
functionType, functionIsConstructor, hasLinearization,
functionType, functionIsDataCon, hasLinearization,
-- ** Expressions
Expr,showExpr,readExpr,pExpr,
Expr,showExpr,readExpr,pExpr,pIdent,
mkAbs,unAbs,
mkApp,unApp,
mkApp,unApp,unapply,
mkStr,unStr,
mkInt,unInt,
mkFloat,unFloat,
mkMeta,unMeta,
mkCId,
exprHash, exprSize, exprFunctions, exprSubstitute,
treeProbability,
@@ -58,13 +54,13 @@ module PGF2 (-- * PGF
ConcName,Concr,languages,concreteName,languageCode,
-- ** Linearization
linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,bracketedLinearizeAll,
linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,
FId, LIndex, BracketedString(..), showBracketedString, flattenBracketedString,
printName,
alignWords,
-- ** Parsing
ParseOutput(..), parse, parseWithHeuristics,
ParseOutput(..), parse, parseWithHeuristics, complete,
-- ** Sentence Lookup
lookupSentence,
-- ** Generation
@@ -73,7 +69,9 @@ module PGF2 (-- * PGF
MorphoAnalysis, lookupMorpho, fullFormLexicon,
-- ** Visualizations
GraphvizOptions(..), graphvizDefaults,
graphvizAbstractTree, graphvizParseTree, graphvizWordAlignment,
graphvizAbstractTree, graphvizParseTree,
graphvizDependencyTree, conlls2latexDoc, getCncDepLabels,
graphvizWordAlignment,
-- * Exceptions
PGFError(..),
@@ -82,7 +80,7 @@ module PGF2 (-- * PGF
LiteralCallback,literalCallbacks
) where
import Prelude hiding (fromEnum,(<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
import Prelude hiding (fromEnum)
import Control.Exception(Exception,throwIO)
import Control.Monad(forM_)
import System.IO.Unsafe(unsafePerformIO,unsafeInterleaveIO)
@@ -97,7 +95,8 @@ import Data.Typeable
import qualified Data.Map as Map
import Data.IORef
import Data.Char(isUpper,isSpace)
import Data.List(isSuffixOf,maximumBy,nub)
import Data.List(isSuffixOf,maximumBy,nub,mapAccumL,intersperse,groupBy,find)
import Data.Maybe(fromMaybe)
import Data.Function(on)
@@ -110,8 +109,8 @@ import Data.Function(on)
-- to Concr but has lost its reference to PGF.
type AbsName = CId -- ^ Name of abstract syntax
type ConcName = CId -- ^ Name of concrete syntax
type AbsName = String -- ^ Name of abstract syntax
type ConcName = String -- ^ Name of concrete syntax
-- | Reads file in Portable Grammar Format and produces
-- 'PGF' structure. The file is usually produced with:
@@ -136,7 +135,22 @@ readPGF fpath =
throwIO (PGFError "The grammar cannot be loaded")
else return pgf
pgfFPtr <- newForeignPtr gu_pool_finalizer pool
return (PGF pgf (touchForeignPtr pgfFPtr))
let touch = touchForeignPtr pgfFPtr
ref <- newIORef Map.empty
allocaBytes (#size GuMapItor) $ \itor ->
do fptr <- wrapMapItorCallback (getLanguages ref touch)
(#poke GuMapItor, fn) itor fptr
pgf_iter_languages pgf itor nullPtr
freeHaskellFunPtr fptr
langs <- readIORef ref
return (PGF pgf langs touch)
where
getLanguages :: IORef (Map.Map String Concr) -> Touch -> MapItorCallback
getLanguages ref touch itor key value exn = do
langs <- readIORef ref
name <- peekUtf8CString (castPtr key)
concr <- fmap (\ptr -> Concr ptr touch) $ peek (castPtr value)
writeIORef ref $! Map.insert name concr langs
showPGF :: PGF -> String
showPGF p =
@@ -144,29 +158,15 @@ showPGF p =
withGuPool $ \tmpPl ->
do (sb,out) <- newOut tmpPl
exn <- gu_new_exn tmpPl
pgf_print (pgf p) out exn
withArrayLen ((map concr . Map.elems . languages) p) $ \n_concrs concrs ->
pgf_print (pgf p) (fromIntegral n_concrs) concrs out exn
touchPGF p
s <- gu_string_buf_freeze sb tmpPl
peekUtf8CString s
-- | List of all languages available in the grammar.
languages :: PGF -> Map.Map ConcName Concr
languages p =
unsafePerformIO $
do ref <- newIORef Map.empty
allocaBytes (#size GuMapItor) $ \itor ->
do fptr <- wrapMapItorCallback (getLanguages ref)
(#poke GuMapItor, fn) itor fptr
pgf_iter_languages (pgf p) itor nullPtr
freeHaskellFunPtr fptr
readIORef ref
where
getLanguages :: IORef (Map.Map String Concr) -> MapItorCallback
getLanguages ref itor key value exn = do
langs <- readIORef ref
name <- peekUtf8CString (castPtr key)
concr <- fmap (\ptr -> Concr ptr (touchPGF p)) $ peek (castPtr value)
writeIORef ref $! Map.insert name concr langs
languages p = langs p
-- | The abstract language name is the name of the top-level
-- abstract module
@@ -242,8 +242,8 @@ functionType p fn =
else Just (Type c_type (touchPGF p)))
-- | The type of a function
functionIsConstructor :: PGF -> Fun -> Bool
functionIsConstructor p fn =
functionIsDataCon :: PGF -> Fun -> Bool
functionIsDataCon p fn =
unsafePerformIO $
withGuPool $ \tmpPl -> do
c_fn <- newUtf8CString fn tmpPl
@@ -253,15 +253,15 @@ functionIsConstructor p fn =
-- | Checks an expression against a specified type.
checkExpr :: PGF -> Expr -> Type -> Either String Expr
checkExpr (PGF p _) (Expr c_expr touch1) (Type c_ty touch2) =
checkExpr p (Expr c_expr touch1) (Type c_ty touch2) =
unsafePerformIO $
alloca $ \pexpr ->
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
exprPl <- gu_new_pool
poke pexpr c_expr
pgf_check_expr p pexpr c_ty exn exprPl
touch1 >> touch2
pgf_check_expr (pgf p) pexpr c_ty exn exprPl
touchPGF p >> touch1 >> touch2
status <- gu_exn_is_raised exn
if not status
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
@@ -280,15 +280,15 @@ checkExpr (PGF p _) (Expr c_expr touch1) (Type c_ty touch2) =
-- possible to infer its type in the GF type system.
-- In this case the function returns an error.
inferExpr :: PGF -> Expr -> Either String (Expr, Type)
inferExpr (PGF p _) (Expr c_expr touch1) =
inferExpr p (Expr c_expr touch1) =
unsafePerformIO $
alloca $ \pexpr ->
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
exprPl <- gu_new_pool
poke pexpr c_expr
c_ty <- pgf_infer_expr p pexpr exn exprPl
touch1
c_ty <- pgf_infer_expr (pgf p) pexpr exn exprPl
touchPGF p >> touch1
status <- gu_exn_is_raised exn
if not status
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
@@ -306,15 +306,15 @@ inferExpr (PGF p _) (Expr c_expr touch1) =
-- | Check whether a type is consistent with the abstract
-- syntax of the grammar.
checkType :: PGF -> Type -> Either String Type
checkType (PGF p _) (Type c_ty touch1) =
checkType p (Type c_ty touch1) =
unsafePerformIO $
alloca $ \pty ->
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
typePl <- gu_new_pool
poke pty c_ty
pgf_check_type p pty exn typePl
touch1
pgf_check_type (pgf p) pty exn typePl
touchPGF p >> touch1
status <- gu_exn_is_raised exn
if not status
then do typeFPl <- newForeignPtr gu_pool_finalizer typePl
@@ -329,13 +329,13 @@ checkType (PGF p _) (Type c_ty touch1) =
else throwIO (PGFError msg)
compute :: PGF -> Expr -> Expr
compute (PGF p _) (Expr c_expr touch1) =
compute p (Expr c_expr touch1) =
unsafePerformIO $
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
exprPl <- gu_new_pool
c_expr <- pgf_compute p c_expr exn tmpPl exprPl
touch1
c_expr <- pgf_compute (pgf p) c_expr exn tmpPl exprPl
touchPGF p >> touch1
status <- gu_exn_is_raised exn
if not status
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
@@ -346,10 +346,10 @@ compute (PGF p _) (Expr c_expr touch1) =
throwIO (PGFError msg)
treeProbability :: PGF -> Expr -> Float
treeProbability (PGF p _) (Expr c_expr touch1) =
treeProbability p (Expr c_expr touch1) =
unsafePerformIO $ do
res <- pgf_compute_tree_probability p c_expr
touch1
res <- pgf_compute_tree_probability (pgf p) c_expr
touchPGF p >> touch1
return (realToFrac res)
exprHash :: Int32 -> Expr -> Int32
@@ -447,6 +447,433 @@ graphvizWordAlignment cs opts e =
s <- gu_string_buf_freeze sb tmpPl
peekUtf8CString s
type Labels = Map.Map Fun [String]
-- | Visualize word dependency tree.
graphvizDependencyTree
:: String -- ^ Output format: @"latex"@, @"conll"@, @"malt_tab"@, @"malt_input"@ or @"dot"@
-> Bool -- ^ Include extra information (debug)
-> Maybe Labels -- ^ abstract label information obtained with 'getDepLabels'
-> Maybe CncLabels -- ^ concrete label information obtained with ' ' (was: unused (was: @Maybe String@))
-> Concr
-> Expr
-> String -- ^ Rendered output in the specified format
graphvizDependencyTree format debug mlab mclab concr t =
case format of
"latex" -> render . ppLaTeX $ conll2latex' conll
"svg" -> render . ppSVG . toSVG $ conll2latex' conll
"conll" -> printCoNLL conll
"malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes)
"malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
_ -> render $ text "digraph {" $$
space $$
nest 2 (text "rankdir=LR ;" $$
text "node [shape = plaintext] ;" $$
vcat nodes $$
vcat links) $$
text "}"
where
conll = maybe conll0 (\ls -> fixCoNLL ls conll0) mclab
conll0 = (map.map) render wnodes
nodes = map mkNode leaves
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves]
-- CoNLL format: ID FORM LEMMA PLEMMA POS PPOS FEAT PFEAT HEAD PHEAD DEPREL PDEPREL
-- P variants are automatically predicted rather than gold standard
wnodes = [[int i, maltws ws, text fun, text (posCat cat), text cat, unspec, int parent, text lab, unspec, unspec] |
((cat,fid,fun),i,ws) <- tail leaves,
let (lab,parent) = fromMaybe (dep_lbl,0)
(do (lbl,fid) <- lookup fid deps
(_,i,_) <- find (\((_,fid1,_),i,_) -> fid == fid1) leaves
return (lbl,i))
]
maltws = text . concat . intersperse "+" . words -- no spaces in column 2
nil = -1
bss = bracketedLinearize concr t
root = ("_",nil,"_")
leaves = (root,0,root_lbl) : (groupAndIndexIt 1 . concatMap (getLeaves root)) bss
deps = let (_,(h,deps)) = getDeps 0 [] t
in (h,(dep_lbl,nil)):deps
groupAndIndexIt id [] = []
groupAndIndexIt id ((p,w):pws) = (p,id,w) : groupAndIndexIt (id+1) pws
--- groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
--- in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
where
collect pws@((p1,w):pws1)
| p == p1 = let (ws,pws2) = collect pws1
in (w:ws,pws2)
collect pws = ([],pws)
getLeaves parent bs =
case bs of
Leaf w -> [(parent,w)]
Bracket cat fid _ fun bss -> concatMap (getLeaves (cat,fid,fun)) bss
mkNode ((_,p,_),i,w) =
tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi
mkLink (x,(lbl,y)) = tag y <+> text "->" <+> tag x <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;"
labels = maybe Map.empty id mlab
clabels = maybe [] id mclab
posCat cat = case Map.lookup cat labels of
Just [p] -> p
_ -> cat
getDeps n_fid xs e =
case unAbs e of
Just (_, x, e) -> getDeps n_fid (x:xs) e
Nothing -> case unApp e of
Just (f,es) -> let (n_fid_1,ds) = descend n_fid xs es
(mb_h, deps) = selectHead f ds
in case mb_h of
Just (fid,deps0) -> (n_fid_1+1,(fid,deps0++
[(n_fid_1,(dep_lbl,fid))]++
concat [(m,(lbl,fid)):ds | (lbl,(m,ds)) <- deps]))
Nothing -> (n_fid_1+1,(n_fid_1,concat [(m,(lbl,n_fid_1)):ds | (lbl,(m,ds)) <- deps]))
Nothing -> (n_fid+1,(n_fid,[]))
descend n_fid xs es = mapAccumL (\n_fid e -> getDeps n_fid xs e) n_fid es
selectHead f ds =
case Map.lookup f labels of
Just lbls -> extractHead (zip lbls ds)
Nothing -> extractLast ds
where
extractHead [] = (Nothing, [])
extractHead (ld@(l,d):lds)
| l == head_lbl = (Just d,lds)
| otherwise = let (mb_h,deps) = extractHead lds
in (mb_h,ld:deps)
extractLast [] = (Nothing, [])
extractLast (d:ds)
| null ds = (Just d,[])
| otherwise = let (mb_h,deps) = extractLast ds
in (mb_h,(dep_lbl,d):deps)
dep_lbl = "dep"
head_lbl = "head"
root_lbl = "ROOT"
unspec = text "_"
---------------------- should be a separate module?
-- visualization with latex output. AR Nov 2015
conlls2latexDoc :: [String] -> String
conlls2latexDoc =
render .
latexDoc .
vcat .
intersperse (text "" $+$ app "vspace" (text "4mm")) .
map conll2latex .
filter (not . null)
conll2latex :: String -> Doc
conll2latex = ppLaTeX . conll2latex' . parseCoNLL
conll2latex' :: CoNLL -> [LaTeX]
conll2latex' = dep2latex . conll2dep'
data Dep = Dep {
wordLength :: Int -> Double -- length of word at position int -- was: fixed width, millimetres (>= 20.0)
, tokens :: [(String,String)] -- word, pos (0..)
, deps :: [((Int,Int),String)] -- from, to, label
, root :: Int -- root word position
}
-- some general measures
defaultWordLength = 20.0 -- the default fixed width word length, making word 100 units
defaultUnit = 0.2 -- unit in latex pictures, 0.2 millimetres
spaceLength = 10.0
charWidth = 1.8
wsize rwld w = 100 * rwld w + spaceLength -- word length, units
wpos rwld i = sum [wsize rwld j | j <- [0..i-1]] -- start position of the i'th word
wdist rwld x y = sum [wsize rwld i | i <- [min x y .. max x y - 1]] -- distance between words x and y
labelheight h = h + arcbase + 3 -- label just above arc; 25 would put it just below
labelstart c = c - 15.0 -- label starts 15u left of arc centre
arcbase = 30.0 -- arcs start and end 40u above the bottom
arcfactor r = r * 600 -- reduction of arc size from word distance
xyratio = 3 -- width/height ratio of arcs
putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> [DrawingCommand]
putArc frwld height x y label = [oval,arrowhead,labelling] where
oval = Put (ctr,arcbase) (OvalTop (wdth,hght))
arrowhead = Put (endp,arcbase + 5) (ArrowDown 5) -- downgoing arrow 5u above the arc base
labelling = Put (labelstart ctr,labelheight (hght/2)) (TinyText label)
dxy = wdist frwld x y -- distance between words, >>= 20.0
ndxy = 100 * rwld * fromIntegral height -- distance that is indep of word length
hdxy = dxy / 2 -- half the distance
wdth = dxy - (arcfactor rwld)/dxy -- longer arcs are wider in proportion
hght = ndxy / (xyratio * rwld) -- arc height is independent of word length
begp = min x y -- begin position of oval
ctr = wpos frwld begp + hdxy + (if x < y then 20 else 10) -- LR arcs are farther right from center of oval
endp = (if x < y then (+) else (-)) ctr (wdth/2) -- the point of the arrow
rwld = 0.5 ----
dep2latex :: Dep -> [LaTeX]
dep2latex d =
[Comment (unwords (map fst (tokens d))),
Picture defaultUnit (width,height) (
[Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))] -- words
++ [Put (wpos rwld i,15) (TinyText w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom
++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels
++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))]
++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")]
)]
where
wld i = wordLength d i -- >= 20.0
rwld i = (wld i) / defaultWordLength -- >= 1.0
aheight x y = depth (min x y) (max x y) + 1 ---- abs (x-y)
arcs = [(min u v, max u v) | ((u,v),_) <- deps d]
depth x y = case [(u,v) | (u,v) <- arcs, (x < u && v <= y) || (x == u && v < y)] of ---- only projective arcs counted
[] -> 0
uvs -> 1 + maximum (0:[depth u v | (u,v) <- uvs])
width = {-round-} (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + {-round-} spaceLength * fromIntegral ((length (tokens d)) - 1)
height = 50 + 20 * {-round-} (maximum (0:[aheight x y | ((x,y),_) <- deps d]))
type CoNLL = [[String]]
parseCoNLL :: String -> CoNLL
parseCoNLL = map words . lines
--conll2dep :: String -> Dep
--conll2dep = conll2dep' . parseCoNLL
conll2dep' :: CoNLL -> Dep
conll2dep' ls = Dep {
wordLength = wld
, tokens = toks
, deps = dps
, root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1]
}
where
wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]])
toks = [(w,c) | _:w:_:c:_ <- ls]
dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"]
--maxdist = maximum [abs (x-y) | ((x,y),_) <- dps]
-- * LaTeX Pictures (see https://en.wikibooks.org/wiki/LaTeX/Picture)
-- We render both LaTeX and SVG from this intermediate representation of
-- LaTeX pictures.
data LaTeX = Comment String | Picture UnitLengthMM Size [DrawingCommand]
data DrawingCommand = Put Position Object
data Object = Text String | TinyText String | OvalTop Size | ArrowDown Length
type UnitLengthMM = Double
type Size = (Double,Double)
type Position = (Double,Double)
type Length = Double
-- * latex formatting
ppLaTeX = vcat . map ppLaTeX1
where
ppLaTeX1 el =
case el of
Comment s -> comment s
Picture unit size cmds ->
app "setlength{\\unitlength}" (text (show unit ++ "mm"))
$$ hang (app "begin" (text "picture")<>text (show size)) 2
(vcat (map ppDrawingCommand cmds))
$$ app "end" (text "picture")
$$ text ""
ppDrawingCommand (Put pos obj) = put pos (ppObject obj)
ppObject obj =
case obj of
Text s -> text s
TinyText s -> small (text s)
OvalTop size -> text "\\oval" <> text (show size) <> text "[t]"
ArrowDown len -> app "vector(0,-1)" (text (show len))
put p@(_,_) = app ("put" ++ show p)
small w = text "{\\tiny" <+> w <> text "}"
comment s = text "%%" <+> text s -- line break show follow
app macro arg = text "\\" <> text macro <> text "{" <> arg <> text "}"
latexDoc :: Doc -> Doc
latexDoc body =
vcat [text "\\documentclass{article}",
text "\\usepackage[utf8]{inputenc}",
text "\\begin{document}",
body,
text "\\end{document}"]
-- * SVG (see https://www.w3.org/Graphics/SVG/IG/resources/svgprimer.html)
-- | Render LaTeX pictures as SVG
toSVG = concatMap toSVG1
where
toSVG1 el =
case el of
Comment s -> []
Picture unit size@(w,h) cmds ->
[Elem "svg" ["width".=x1,"height".=y0+5,
("viewBox",unwords (map show [0,0,x1,y0+5])),
("version","1.1"),
("xmlns","http://www.w3.org/2000/svg")]
(white_bg:concatMap draw cmds)]
where
white_bg =
Elem "rect" ["x".=0,"y".=0,"width".=x1,"height".=y0+5,
("fill","white")] []
draw (Put pos obj) = objectSVG pos obj
objectSVG pos obj =
case obj of
Text s -> [text 16 pos s]
TinyText s -> [text 10 pos s]
OvalTop size -> [ovalTop pos size]
ArrowDown len -> arrowDown pos len
text h (x,y) s =
Elem "text" ["x".=xc x,"y".=yc y-2,"font-size".=h]
[CharData s]
ovalTop (x,y) (w,h) =
Elem "path" [("d",path),("stroke","black"),("fill","none")] []
where
x1 = x-w/2
x2 = min x (x1+r)
x3 = max x (x4-r)
x4 = x+w/2
y1 = y
y2 = y+r
r = h/2
sx = show . xc
sy = show . yc
path = unwords (["M",sx x1,sy y1,"Q",sx x1,sy y2,sx x2,sy y2,
"L",sx x3,sy y2,"Q",sx x4,sy y2,sx x4,sy y1])
arrowDown (x,y) len =
[Elem "line" ["x1".=xc x,"y1".=yc y,"x2".=xc x,"y2".=y2,
("stroke","black")] [],
Elem "path" [("d",unwords arrowhead)] []]
where
x2 = xc x
y2 = yc (y-len)
arrowhead = "M":map show [x2,y2,x2-3,y2-6,x2+3,y2-6]
xc x = num x+5
yc y = y0-num y
x1 = num w+10
y0 = num h+20
num x = round (scale*x)
scale = unit*5
infix 0 .=
n.=v = (n,show v)
-- * SVG is XML
data SVG = CharData String | Elem TagName Attrs [SVG]
type TagName = String
type Attrs = [(String,String)]
ppSVG svg =
vcat [text "<?xml version=\"1.0\" standalone=\"no\"?>",
text "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"",
text "\"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">",
text "",
vcat (map ppSVG1 svg)] -- It should be a single <svg> element...
where
ppSVG1 svg1 =
case svg1 of
CharData s -> text (encode s)
Elem tag attrs [] ->
text "<"<>text tag<>cat (map attr attrs) <> text "/>"
Elem tag attrs svg ->
cat [text "<"<>text tag<>cat (map attr attrs) <> text ">",
nest 2 (cat (map ppSVG1 svg)),
text "</"<>text tag<>text ">"]
attr (n,v) = text " "<>text n<>text "=\""<>text (encode v)<>text "\""
encode s = foldr encodeEntity "" s
encodeEntity = encodeEntity' (const False)
encodeEntity' esc c r =
case c of
'&' -> "&amp;"++r
'<' -> "&lt;"++r
'>' -> "&gt;"++r
_ -> c:r
----------------------------------
-- concrete syntax annotations (local) on top of conll
-- examples of annotations:
-- UseComp {"not"} PART neg head
-- UseComp {*} AUX cop head
type CncLabels = [(String, String -> Maybe (String -> String,String,String))]
-- (fun, word -> (pos,label,target))
-- the pos can remain unchanged, as in the current notation in the article
fixCoNLL :: CncLabels -> CoNLL -> CoNLL
fixCoNLL labels conll = map fixc conll where
fixc row = case row of
(i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:x_:"0":"root":xs) --- change the root label from dep to root
(i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of
Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:x_:j :label':xs)
Just (pos',label',target) -> (i:word:fun:pos' pos:cat:x_: getDep j target:label':xs)
_ -> row
_ -> row
look (fun,word) = case lookup fun labels of
Just relabel -> case relabel word of
Just row -> Just row
_ -> case lookup "*" labels of
Just starlabel -> starlabel word
_ -> Nothing
_ -> case lookup "*" labels of
Just starlabel -> starlabel word
_ -> Nothing
getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll]
getCncDepLabels :: String -> CncLabels
getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap analyse . filter choose . lines where
--- choose is for compatibility with the general notation
choose line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules
analyse line = case break (=='{') line of
(beg,_:ws) -> case break (=='}') ws of
(toks,_:target) -> case (words beg, words target) of
(fun:_,[ label,j]) -> [(fun, (tok, (id, label,j))) | tok <- getToks toks]
(fun:_,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | tok <- getToks toks]
_ -> []
_ -> []
_ -> []
merge rules@((fun,_):_) = (fun, \tok ->
case lookup tok (map snd rules) of
Just new -> return new
_ -> lookup "*" (map snd rules)
)
getToks = words . map (\c -> if elem c "\"," then ' ' else c)
printCoNLL :: CoNLL -> String
printCoNLL = unlines . map (concat . intersperse "\t")
newGraphvizOptions :: Ptr GuPool -> GraphvizOptions -> IO (Ptr PgfGraphvizOptions)
newGraphvizOptions pool opts = do
c_opts <- gu_malloc pool (#size PgfGraphvizOptions)
@@ -542,7 +969,7 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse
-- If a literal has been recognized then the output should
-- be Just (expr,probability,end_offset)
-> ParseOutput
parseWithHeuristics lang (Type ctype touchType) sent heuristic callbacks =
parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
unsafePerformIO $
do exprPl <- gu_new_pool
parsePl <- gu_new_pool
@@ -550,7 +977,6 @@ parseWithHeuristics lang (Type ctype touchType) sent heuristic callbacks =
sent <- newUtf8CString sent parsePl
callbacks_map <- mkCallbacksMap (concr lang) callbacks parsePl
enum <- pgf_parse_with_heuristics (concr lang) ctype sent heuristic callbacks_map exn parsePl exprPl
touchType
failed <- gu_exn_is_raised exn
if failed
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
@@ -618,6 +1044,26 @@ mkCallbacksMap concr callbacks pool = do
predict_callback _ _ _ = return nullPtr
complete :: Concr -- ^ the language with which we do word completion
-> Type -- ^ the start category
-> String -- ^ the input sentence
-> String -- ^ prefix for the word to be completed
-> [(String, Cat, Fun, Float)]
complete lang (Type ctype _) sent prefix =
unsafePerformIO $
do pl <- gu_new_pool
exn <- gu_new_exn pl
sent <- newUtf8CString sent pl
prefix <- newUtf8CString prefix pl
enum <- pgf_complete (concr lang) ctype sent prefix exn pl
failed <- gu_exn_is_raised exn
if failed
then do gu_pool_free pl
return []
else do fpl <- newForeignPtr gu_pool_finalizer pl
tokens <- fromPgfTokenEnum enum fpl
return tokens
lookupSentence :: Concr -- ^ the language with which we parse
-> Type -- ^ the start category
-> String -- ^ the input sentence
@@ -862,9 +1308,8 @@ type LIndex = Int
-- mark the beginning and the end of each constituent.
data BracketedString
= Leaf String -- ^ this is the leaf i.e. a single token
| BIND -- ^ the surrounding tokens must be bound together
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedString]
-- ^ this is a bracket. The 'CId' is the category of
| Bracket Cat {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex Fun [BracketedString]
-- ^ this is a bracket. The 'Cat' is the category of
-- the phrase. The 'FId' is an unique identifier for
-- every phrase in the sentence. For context-free grammars
-- i.e. without discontinuous constituents this identifier
@@ -875,7 +1320,7 @@ data BracketedString
-- the constituent index i.e. 'LIndex'. If the grammar is reduplicating
-- then the constituent indices will be the same for all brackets
-- that represents the same constituent.
-- The second 'CId' is the name of the abstract function that generated
-- The 'Fun' is the name of the abstract function that generated
-- this phrase.
-- | Renders the bracketed string as a string where
@@ -885,13 +1330,11 @@ showBracketedString :: BracketedString -> String
showBracketedString = render . ppBracketedString
ppBracketedString (Leaf t) = text t
ppBracketedString BIND = text "&+"
ppBracketedString (Bracket cat fid index _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
-- | Extracts the sequence of tokens from the bracketed string
flattenBracketedString :: BracketedString -> [String]
flattenBracketedString (Leaf w) = [w]
flattenBracketedString BIND = []
flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss
bracketedLinearize :: Concr -> Expr -> [BracketedString]
@@ -909,8 +1352,27 @@ bracketedLinearize lang e = unsafePerformIO $
return []
else do ctree <- pgf_lzr_wrap_linref ctree pl
ref <- newIORef ([],[])
withBracketLinFuncs ref exn $ \ppLinFuncs ->
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
allocaBytes (#size PgfLinFuncs) $ \pLinFuncs ->
alloca $ \ppLinFuncs -> do
fptr_symbol_token <- wrapSymbolTokenCallback (symbol_token ref)
fptr_begin_phrase <- wrapPhraseCallback (begin_phrase ref)
fptr_end_phrase <- wrapPhraseCallback (end_phrase ref)
fptr_symbol_ne <- wrapSymbolNonExistCallback (symbol_ne exn)
fptr_symbol_meta <- wrapSymbolMetaCallback (symbol_meta ref)
(#poke PgfLinFuncs, symbol_token) pLinFuncs fptr_symbol_token
(#poke PgfLinFuncs, begin_phrase) pLinFuncs fptr_begin_phrase
(#poke PgfLinFuncs, end_phrase) pLinFuncs fptr_end_phrase
(#poke PgfLinFuncs, symbol_ne) pLinFuncs fptr_symbol_ne
(#poke PgfLinFuncs, symbol_bind) pLinFuncs nullPtr
(#poke PgfLinFuncs, symbol_capit) pLinFuncs nullPtr
(#poke PgfLinFuncs, symbol_meta) pLinFuncs fptr_symbol_meta
poke ppLinFuncs pLinFuncs
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
freeHaskellFunPtr fptr_symbol_token
freeHaskellFunPtr fptr_begin_phrase
freeHaskellFunPtr fptr_end_phrase
freeHaskellFunPtr fptr_symbol_ne
freeHaskellFunPtr fptr_symbol_meta
failed <- gu_exn_is_raised exn
if failed
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
@@ -919,65 +1381,6 @@ bracketedLinearize lang e = unsafePerformIO $
else throwExn exn
else do (_,bs) <- readIORef ref
return (reverse bs)
bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]]
bracketedLinearizeAll lang e = unsafePerformIO $
withGuPool $ \pl ->
do exn <- gu_new_exn pl
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
failed <- gu_exn_is_raised exn
if failed
then do touchExpr e
throwExn exn
else do ref <- newIORef ([],[])
bss <- withBracketLinFuncs ref exn $ \ppLinFuncs ->
collect ref cts ppLinFuncs exn pl
touchExpr e
return bss
where
collect ref cts ppLinFuncs exn pl = withGuPool $ \tmpPl -> do
ctree <- alloca $ \ptr -> do gu_enum_next cts ptr tmpPl
peek ptr
if ctree == nullPtr
then return []
else do ctree <- pgf_lzr_wrap_linref ctree pl
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
failed <- gu_exn_is_raised exn
if failed
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
if is_nonexist
then collect ref cts ppLinFuncs exn pl
else throwExn exn
else do (_,bs) <- readIORef ref
writeIORef ref ([],[])
bss <- collect ref cts ppLinFuncs exn pl
return (reverse bs : bss)
withBracketLinFuncs ref exn f =
allocaBytes (#size PgfLinFuncs) $ \pLinFuncs ->
alloca $ \ppLinFuncs -> do
fptr_symbol_token <- wrapSymbolTokenCallback (symbol_token ref)
fptr_begin_phrase <- wrapPhraseCallback (begin_phrase ref)
fptr_end_phrase <- wrapPhraseCallback (end_phrase ref)
fptr_symbol_ne <- wrapSymbolNonExistCallback (symbol_ne exn)
fptr_symbol_bind <- wrapSymbolBindCallback (symbol_bind ref)
fptr_symbol_meta <- wrapSymbolMetaCallback (symbol_meta ref)
(#poke PgfLinFuncs, symbol_token) pLinFuncs fptr_symbol_token
(#poke PgfLinFuncs, begin_phrase) pLinFuncs fptr_begin_phrase
(#poke PgfLinFuncs, end_phrase) pLinFuncs fptr_end_phrase
(#poke PgfLinFuncs, symbol_ne) pLinFuncs fptr_symbol_ne
(#poke PgfLinFuncs, symbol_bind) pLinFuncs fptr_symbol_bind
(#poke PgfLinFuncs, symbol_capit) pLinFuncs nullPtr
(#poke PgfLinFuncs, symbol_meta) pLinFuncs fptr_symbol_meta
poke ppLinFuncs pLinFuncs
res <- f ppLinFuncs
freeHaskellFunPtr fptr_symbol_token
freeHaskellFunPtr fptr_begin_phrase
freeHaskellFunPtr fptr_end_phrase
freeHaskellFunPtr fptr_symbol_ne
freeHaskellFunPtr fptr_symbol_bind
freeHaskellFunPtr fptr_symbol_meta
return res
where
symbol_token ref _ c_token = do
(stack,bs) <- readIORef ref
@@ -1000,22 +1403,17 @@ withBracketLinFuncs ref exn f =
gu_exn_raise exn gu_exn_type_PgfLinNonExist
return ()
symbol_bind ref _ = do
(stack,bs) <- readIORef ref
writeIORef ref (stack,BIND : bs)
return ()
symbol_meta ref _ meta_id = do
(stack,bs) <- readIORef ref
writeIORef ref (stack,Leaf "?" : bs)
throwExn exn = do
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- (#peek GuExn, data.data) exn
msg <- peekUtf8CString c_msg
throwIO (PGFError msg)
else do throwIO (PGFError "The abstract tree cannot be linearized")
throwExn exn = do
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- (#peek GuExn, data.data) exn
msg <- peekUtf8CString c_msg
throwIO (PGFError msg)
else do throwIO (PGFError "The abstract tree cannot be linearized")
alignWords :: Concr -> Expr -> [(String, [Int])]
alignWords lang e = unsafePerformIO $
@@ -1128,16 +1526,17 @@ categories p =
name <- peekUtf8CString (castPtr key)
writeIORef ref $! (name : names)
categoryContext :: PGF -> Cat -> [Hypo]
categoryContext :: PGF -> Cat -> Maybe [Hypo]
categoryContext p cat =
unsafePerformIO $
withGuPool $ \tmpPl ->
do c_cat <- newUtf8CString cat tmpPl
c_hypos <- pgf_category_context (pgf p) c_cat
if c_hypos == nullPtr
then return []
then return Nothing
else do n_hypos <- (#peek GuSeq, len) c_hypos
peekHypos (c_hypos `plusPtr` (#offset GuSeq, data)) 0 n_hypos
hypos <- peekHypos (c_hypos `plusPtr` (#offset GuSeq, data)) 0 n_hypos
return (Just hypos)
where
peekHypos :: Ptr a -> Int -> Int -> IO [Hypo]
peekHypos c_hypo i n
@@ -1152,8 +1551,8 @@ categoryContext p cat =
toBindType (#const PGF_BIND_TYPE_EXPLICIT) = Explicit
toBindType (#const PGF_BIND_TYPE_IMPLICIT) = Implicit
categoryProb :: PGF -> Cat -> Float
categoryProb p cat =
categoryProbability :: PGF -> Cat -> Float
categoryProbability p cat =
unsafePerformIO $
withGuPool $ \tmpPl ->
do c_cat <- newUtf8CString cat tmpPl
@@ -1164,7 +1563,7 @@ categoryProb p cat =
-----------------------------------------------------------------------------
-- Helper functions
fromPgfExprEnum :: Ptr GuEnum -> ForeignPtr GuPool -> IO () -> IO [(Expr, Float)]
fromPgfExprEnum :: Ptr GuEnum -> ForeignPtr GuPool -> Touch -> IO [(Expr, Float)]
fromPgfExprEnum enum fpl touch =
do pgfExprProb <- alloca $ \ptr ->
withForeignPtr fpl $ \pl ->
@@ -1178,6 +1577,22 @@ fromPgfExprEnum enum fpl touch =
prob <- (#peek PgfExprProb, prob) pgfExprProb
return ((Expr expr touch,prob) : ts)
fromPgfTokenEnum :: Ptr GuEnum -> ForeignPtr GuPool -> IO [(String, Cat, Fun, Float)]
fromPgfTokenEnum enum fpl =
do pgfTokenProb <- alloca $ \ptr ->
withForeignPtr fpl $ \pl ->
do gu_enum_next enum ptr pl
peek ptr
if pgfTokenProb == nullPtr
then do finalizeForeignPtr fpl
return []
else do tok <- (#peek PgfTokenProb, tok) pgfTokenProb >>= peekUtf8CString
cat <- (#peek PgfTokenProb, cat) pgfTokenProb >>= peekUtf8CString
fun <- (#peek PgfTokenProb, fun) pgfTokenProb >>= peekUtf8CString
prob <- (#peek PgfTokenProb, prob) pgfTokenProb
ts <- unsafeInterleaveIO (fromPgfTokenEnum enum fpl)
return ((tok,cat,fun,prob) : ts)
-----------------------------------------------------------------------
-- Exceptions
@@ -1256,3 +1671,7 @@ capitalized' test s@(c:_) | test c =
case span isSpace rest1 of
(space,rest2) -> Just (name++space,rest2)
capitalized' not s = Nothing
tag i
| i < 0 = char 'r' <> int (negate i)
| otherwise = char 'n' <> int i

View File

@@ -8,19 +8,13 @@ import Foreign.C
import Data.IORef
import PGF2.FFI
-- | An data type that represents
-- identifiers for functions and categories in PGF.
type CId = String
wildCId = "_" :: CId
type Cat = CId -- ^ Name of syntactic category
type Fun = CId -- ^ Name of function
type Cat = String -- ^ Name of syntactic category
type Fun = String -- ^ Name of function
data BindType =
Explicit
| Implicit
deriving Show
deriving (Show, Eq, Ord)
-----------------------------------------------------------------------------
-- Expressions
@@ -43,7 +37,7 @@ instance Eq Expr where
return (res /= 0)
-- | Constructs an expression by lambda abstraction
mkAbs :: BindType -> CId -> Expr -> Expr
mkAbs :: BindType -> String -> Expr -> Expr
mkAbs bind_type var (Expr body bodyTouch) =
unsafePerformIO $ do
exprPl <- gu_new_pool
@@ -58,7 +52,7 @@ mkAbs bind_type var (Expr body bodyTouch) =
Implicit -> (#const PGF_BIND_TYPE_IMPLICIT)
-- | Decomposes an expression into an abstraction and a body
unAbs :: Expr -> Maybe (BindType, CId, Expr)
unAbs :: Expr -> Maybe (BindType, String, Expr)
unAbs (Expr expr touch) =
unsafePerformIO $ do
c_abs <- pgf_expr_unabs expr
@@ -103,6 +97,17 @@ unApp (Expr expr touch) =
c_args <- peekArray (fromIntegral arity) (appl `plusPtr` (#offset PgfApplication, args))
return $ Just (fun, [Expr c_arg touch | c_arg <- c_args])
-- | Decomposes an expression into an application of a function
unapply :: Expr -> (Expr,[Expr])
unapply (Expr expr touch) =
unsafePerformIO $
withGuPool $ \pl -> do
appl <- pgf_expr_unapply_ex expr pl
efun <- (#peek PgfApplication, efun) appl
arity <- (#peek PgfApplication, n_args) appl :: IO CInt
c_args <- peekArray (fromIntegral arity) (appl `plusPtr` (#offset PgfApplication, args))
return (Expr efun touch, [Expr c_arg touch | c_arg <- c_args])
-- | Constructs an expression from a string literal
mkStr :: String -> Expr
mkStr str =
@@ -184,9 +189,6 @@ unMeta (Expr expr touch) =
touch
return (Just (fromIntegral (id :: CInt)))
-- | this functions is only for backward compatibility with the old Haskell runtime
mkCId x = x
-- | parses a 'String' as an expression
readExpr :: String -> Maybe Expr
readExpr str =
@@ -204,6 +206,22 @@ readExpr str =
else do gu_pool_free exprPl
return Nothing
pIdent :: ReadS String
pIdent str =
unsafePerformIO $
withGuPool $ \tmpPl ->
do ref <- newIORef (str,str,str)
exn <- gu_new_exn tmpPl
c_fetch_char <- wrapParserGetc (fetch_char ref)
c_parser <- pgf_new_parser nullPtr c_fetch_char tmpPl tmpPl exn
c_ident <- pgf_expr_parser_ident c_parser
status <- gu_exn_is_raised exn
if (not status && c_ident /= nullPtr)
then do ident <- peekUtf8CString c_ident
(str,_,_) <- readIORef ref
return [(ident,str)]
else do return []
pExpr :: ReadS Expr
pExpr str =
unsafePerformIO $
@@ -221,19 +239,19 @@ pExpr str =
return [(Expr c_expr (touchForeignPtr exprFPl),str)]
else do gu_pool_free exprPl
return []
where
fetch_char :: IORef (String,String,String) -> Ptr () -> (#type bool) -> Ptr GuExn -> IO (#type GuUCS)
fetch_char ref _ mark exn = do
(str1,str2,str3) <- readIORef ref
let str1' = if mark /= 0
then str2
else str1
case str3 of
[] -> do writeIORef ref (str1',str3,[])
gu_exn_raise exn gu_exn_type_GuEOF
return (-1)
(c:cs) -> do writeIORef ref (str1',str3,cs)
return ((fromIntegral . fromEnum) c)
fetch_char :: IORef (String,String,String) -> Ptr () -> (#type bool) -> Ptr GuExn -> IO (#type GuUCS)
fetch_char ref _ mark exn = do
(str1,str2,str3) <- readIORef ref
let str1' = if mark /= 0
then str2
else str1
case str3 of
[] -> do writeIORef ref (str1',str3,[])
gu_exn_raise exn gu_exn_type_GuEOF
return (-1)
(c:cs) -> do writeIORef ref (str1',str3,cs)
return ((fromIntegral . fromEnum) c)
foreign import ccall "pgf/expr.h pgf_new_parser"
pgf_new_parser :: Ptr () -> (FunPtr ParserGetc) -> Ptr GuPool -> Ptr GuPool -> Ptr GuExn -> IO (Ptr PgfExprParser)
@@ -241,16 +259,20 @@ foreign import ccall "pgf/expr.h pgf_new_parser"
foreign import ccall "pgf/expr.h pgf_expr_parser_expr"
pgf_expr_parser_expr :: Ptr PgfExprParser -> (#type bool) -> IO PgfExpr
foreign import ccall "pgf/expr.h pgf_expr_parser_ident"
pgf_expr_parser_ident :: Ptr PgfExprParser -> IO CString
type ParserGetc = Ptr () -> (#type bool) -> Ptr GuExn -> IO (#type GuUCS)
foreign import ccall "wrapper"
wrapParserGetc :: ParserGetc -> IO (FunPtr ParserGetc)
-- | renders an expression as a 'String'. The list
-- of identifiers is the list of all free variables
-- in the expression in order reverse to the order
-- of binding.
showExpr :: [CId] -> Expr -> String
showExpr :: [String] -> Expr -> String
showExpr scope e =
unsafePerformIO $
withGuPool $ \tmpPl ->

View File

@@ -15,12 +15,13 @@ import Control.Exception
import GHC.Ptr
import Data.Int
import Data.Word
import qualified Data.Map as Map
type Touch = IO ()
-- | An abstract data type representing multilingual grammar
-- in Portable Grammar Format.
data PGF = PGF {pgf :: Ptr PgfPGF, touchPGF :: Touch}
data PGF = PGF {pgf :: Ptr PgfPGF, langs :: Map.Map String Concr, touchPGF :: Touch}
data Concr = Concr {concr :: Ptr PgfConcr, touchConcr :: Touch}
------------------------------------------------------------------
@@ -32,7 +33,6 @@ data GuIn
data GuOut
data GuKind
data GuType
data GuString
data GuStringBuf
data GuMap
data GuMapItor
@@ -266,7 +266,13 @@ foreign import ccall "pgf/pgf.h pgf_read"
pgf_read :: CString -> Ptr GuPool -> Ptr GuExn -> IO (Ptr PgfPGF)
foreign import ccall "pgf/pgf.h pgf_write"
pgf_write :: Ptr PgfPGF -> CString -> Ptr GuExn -> IO ()
pgf_write :: Ptr PgfPGF -> CSizeT -> Ptr (Ptr PgfConcr) -> CString -> Ptr GuExn -> IO ()
foreign import ccall "pgf/writer.h pgf_concrete_save"
pgf_concrete_save :: Ptr PgfConcr -> CString -> Ptr GuExn -> IO ()
foreign import ccall "pgf/pgf.h pgf_have_same_abstract"
pgf_have_same_abstract :: Ptr PgfPGF -> Ptr PgfPGF -> (#type bool)
foreign import ccall "pgf/pgf.h pgf_abstract_name"
pgf_abstract_name :: Ptr PgfPGF -> IO CString
@@ -292,6 +298,9 @@ foreign import ccall "pgf/pgf.h pgf_language_code"
foreign import ccall "pgf/pgf.h pgf_iter_categories"
pgf_iter_categories :: Ptr PgfPGF -> Ptr GuMapItor -> Ptr GuExn -> IO ()
foreign import ccall "pgf/pgf.h pgf_concrete_fix_internals"
pgf_concrete_fix_internals :: Ptr PgfConcr -> IO ()
foreign import ccall "pgf/pgf.h pgf_start_cat"
pgf_start_cat :: Ptr PgfPGF -> Ptr GuPool -> IO PgfType
@@ -340,7 +349,6 @@ foreign import ccall "pgf/pgf.h pgf_lzr_get_table"
type SymbolTokenCallback = Ptr (Ptr PgfLinFuncs) -> CString -> IO ()
type PhraseCallback = Ptr (Ptr PgfLinFuncs) -> CString -> CInt -> CSizeT -> CString -> IO ()
type NonExistCallback = Ptr (Ptr PgfLinFuncs) -> IO ()
type BindCallback = Ptr (Ptr PgfLinFuncs) -> IO ()
type MetaCallback = Ptr (Ptr PgfLinFuncs) -> CInt -> IO ()
foreign import ccall "wrapper"
@@ -352,9 +360,6 @@ foreign import ccall "wrapper"
foreign import ccall "wrapper"
wrapSymbolNonExistCallback :: NonExistCallback -> IO (FunPtr NonExistCallback)
foreign import ccall "wrapper"
wrapSymbolBindCallback :: BindCallback -> IO (FunPtr BindCallback)
foreign import ccall "wrapper"
wrapSymbolMetaCallback :: MetaCallback -> IO (FunPtr MetaCallback)
@@ -364,6 +369,9 @@ foreign import ccall "pgf/pgf.h pgf_align_words"
foreign import ccall "pgf/pgf.h pgf_parse_with_heuristics"
pgf_parse_with_heuristics :: Ptr PgfConcr -> PgfType -> CString -> Double -> Ptr PgfCallbacksMap -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
foreign import ccall "pgf/pgf.h pgf_complete"
pgf_complete :: Ptr PgfConcr -> PgfType -> CString -> CString -> Ptr GuExn -> Ptr GuPool -> IO (Ptr GuEnum)
foreign import ccall "pgf/pgf.h pgf_lookup_sentence"
pgf_lookup_sentence :: Ptr PgfConcr -> PgfType -> CString -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
@@ -425,6 +433,9 @@ foreign import ccall "pgf/pgf.h pgf_expr_apply"
foreign import ccall "pgf/pgf.h pgf_expr_unapply"
pgf_expr_unapply :: PgfExpr -> Ptr GuPool -> IO (Ptr PgfApplication)
foreign import ccall "pgf/pgf.h pgf_expr_unapply_ex"
pgf_expr_unapply_ex :: PgfExpr -> Ptr GuPool -> IO (Ptr PgfApplication)
foreign import ccall "pgf/pgf.h pgf_expr_abs"
pgf_expr_abs :: PgfBindType -> CString -> PgfExpr -> Ptr GuPool -> IO PgfExpr
@@ -450,12 +461,12 @@ foreign import ccall "pgf/pgf.h pgf_expr_float"
foreign import ccall "pgf/pgf.h pgf_expr_unlit"
pgf_expr_unlit :: PgfExpr -> CInt -> IO (Ptr a)
foreign import ccall "pgf/expr.h pgf_expr_arity"
pgf_expr_arity :: PgfExpr -> IO CInt
foreign import ccall "pgf/expr.h pgf_expr_eq"
pgf_expr_eq :: PgfExpr -> PgfExpr -> IO CInt
foreign import ccall "pgf/expr.h pgf_type_eq"
pgf_type_eq :: PgfType -> PgfType -> IO (#type bool)
foreign import ccall "pgf/expr.h pgf_expr_hash"
pgf_expr_hash :: GuHash -> PgfExpr -> IO GuHash
@@ -499,7 +510,7 @@ foreign import ccall "pgf/pgf.h pgf_generate_all"
pgf_generate_all :: Ptr PgfPGF -> PgfType -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
foreign import ccall "pgf/pgf.h pgf_print"
pgf_print :: Ptr PgfPGF -> Ptr GuOut -> Ptr GuExn -> IO ()
pgf_print :: Ptr PgfPGF -> CSizeT -> Ptr (Ptr PgfConcr) -> Ptr GuOut -> Ptr GuExn -> IO ()
foreign import ccall "pgf/expr.h pgf_read_expr"
pgf_read_expr :: Ptr GuIn -> Ptr GuPool -> Ptr GuPool -> Ptr GuExn -> IO PgfExpr

View File

@@ -2,18 +2,25 @@
module PGF2.Internal(-- * Access the internal structures
FId,isPredefFId,
FunId,Token,Production(..),PArg(..),Symbol(..),Literal(..),
FunId,SeqId,Token,Production(..),PArg(..),Symbol(..),Literal(..),
globalFlags, abstrFlags, concrFlags,
concrTotalCats, concrCategories, concrProductions,
concrTotalFuns, concrFunction,
concrTotalSeqs, concrSequence,
-- * Byte code
CodeLabel, Instr(..), IVal(..), TailInfo(..),
-- * Building new PGFs in memory
build, eAbs, eApp, eMeta, eFun, eVar, eTyped, eImplArg, dTyp, hypo,
build, Builder, B,
eAbs, eApp, eMeta, eFun, eVar, eLit, eTyped, eImplArg, dTyp, hypo,
AbstrInfo, newAbstr, ConcrInfo, newConcr, newPGF,
-- * Write an in-memory PGF to a file
writePGF
unionPGF, writePGF, writeConcr,
-- * Predefined concrete categories
fidString, fidInt, fidFloat, fidVar, fidStart
) where
#include <pgf/data.h>
@@ -29,7 +36,7 @@ import Data.IORef
import Data.Maybe(fromMaybe)
import Data.List(sortBy)
import Control.Exception(Exception,throwIO)
import Control.Monad(foldM)
import Control.Monad(foldM,when)
import qualified Data.Map as Map
type Token = String
@@ -50,7 +57,7 @@ data Production
= PApply {-# UNPACK #-} !FunId [PArg]
| PCoerce {-# UNPACK #-} !FId
deriving (Eq,Ord,Show)
data PArg = PArg [FId] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
data PArg = PArg [(FId,FId)] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
type FunId = Int
type SeqId = Int
data Literal =
@@ -59,6 +66,42 @@ data Literal =
| LFlt Double -- ^ a floating point constant
deriving (Eq,Ord,Show)
type CodeLabel = Int
data Instr
= CHECK_ARGS {-# UNPACK #-} !Int
| CASE Fun {-# UNPACK #-} !CodeLabel
| CASE_LIT Literal {-# UNPACK #-} !CodeLabel
| SAVE {-# UNPACK #-} !Int
| ALLOC {-# UNPACK #-} !Int
| PUT_CONSTR Fun
| PUT_CLOSURE {-# UNPACK #-} !CodeLabel
| PUT_LIT Literal
| SET IVal
| SET_PAD
| PUSH_FRAME
| PUSH IVal
| TUCK IVal {-# UNPACK #-} !Int
| EVAL IVal TailInfo
| DROP {-# UNPACK #-} !Int
| JUMP {-# UNPACK #-} !CodeLabel
| FAIL
| PUSH_ACCUM Literal
| POP_ACCUM
| ADD
data IVal
= HEAP {-# UNPACK #-} !Int
| ARG_VAR {-# UNPACK #-} !Int
| FREE_VAR {-# UNPACK #-} !Int
| GLOBAL Fun
deriving Eq
data TailInfo
= RecCall
| TailCall {-# UNPACK #-} !Int
| UpdateCall
-----------------------------------------------------------------------
-- Access the internal structures
@@ -181,7 +224,7 @@ concrProductions c fid = unsafePerformIO $ do
hypos <- peekSequence (deRef peekFId) (#size int) c_hypos
c_ccat <- (#peek PgfPArg, ccat) ptr
fid <- peekFId c_ccat
return (PArg hypos fid)
return (PArg [(fid,fid) | fid <- hypos] fid)
peekFId c_ccat = do
c_fid <- (#peek PgfCCat, fid) c_ccat
@@ -197,6 +240,9 @@ concrTotalFuns c = unsafePerformIO $ do
concrFunction :: Concr -> FunId -> (Fun,[SeqId])
concrFunction c funid = unsafePerformIO $ do
c_cncfuns <- (#peek PgfConcr, cncfuns) (concr c)
c_len <- (#peek GuSeq, len) c_cncfuns
when (funid >= fromIntegral (c_len :: CSizeT)) $
throwIO (PGFError ("Invalid concrete function: F"++show funid))
c_cncfun <- peek (c_cncfuns `plusPtr` ((#offset GuSeq, data)+funid*(#size PgfCncFun*)))
c_absfun <- (#peek PgfCncFun, absfun) c_cncfun
c_name <- (#peek PgfAbsFun, name) c_absfun
@@ -220,6 +266,9 @@ concrTotalSeqs c = unsafePerformIO $ do
concrSequence :: Concr -> SeqId -> [Symbol]
concrSequence c seqid = unsafePerformIO $ do
c_sequences <- (#peek PgfConcr, sequences) (concr c)
c_len <- (#peek GuSeq, len) c_sequences
when (seqid >= fromIntegral (c_len :: CSizeT)) $
throwIO (PGFError ("Invalid concrete sequence: S"++show seqid))
let c_sequence = c_sequences `plusPtr` ((#offset GuSeq, data)+seqid*(#size PgfSequence))
c_syms <- (#peek PgfSequence, syms) c_sequence
res <- peekSequence (deRef peekSymbol) (#size GuVariant) c_syms
@@ -288,6 +337,9 @@ isPredefFId = (`elem` [fidString, fidInt, fidFloat, fidVar])
data Builder s = Builder (Ptr GuPool) Touch
newtype B s a = B a
instance Functor (B s) where
fmap f (B x) = B (f x)
build :: (forall s . (?builder :: Builder s) => B s a) -> a
build f =
unsafePerformIO $ do
@@ -376,6 +428,21 @@ eVar var =
where
(Builder pool touch) = ?builder
eLit :: (?builder :: Builder s) => Literal -> B s Expr
eLit value =
unsafePerformIO $
alloca $ \pptr -> do
ptr <- gu_alloc_variant (#const PGF_EXPR_LIT)
(fromIntegral (#size PgfExprLit))
(#const gu_alignof(PgfExprLit))
pptr pool
c_value <- newLiteral value pool
(#poke PgfExprLit, lit) ptr c_value
e <- peek pptr
return (B (Expr e touch))
where
(Builder pool touch) = ?builder
eTyped :: (?builder :: Builder s) => B s Expr -> B s Type -> B s Expr
eTyped (B (Expr e _)) (B (Type ty _)) =
unsafePerformIO $
@@ -405,7 +472,7 @@ eImplArg (B (Expr e _)) =
where
(Builder pool touch) = ?builder
hypo :: BindType -> CId -> B s Type -> (B s Hypo)
hypo :: BindType -> String -> B s Type -> (B s Hypo)
hypo bind_type var (B ty) = B (bind_type,var,ty)
dTyp :: (?builder :: Builder s) => [B s Hypo] -> Cat -> [B s Expr] -> B s Type
@@ -450,14 +517,14 @@ data AbstrInfo = AbstrInfo (Ptr GuSeq) (Ptr GuSeq) (Map.Map String (Ptr PgfAbsCa
newAbstr :: (?builder :: Builder s) => [(String,Literal)] ->
[(Cat,[B s Hypo],Float)] ->
[(Fun,B s Type,Int,Float)] ->
AbstrInfo
B s AbstrInfo
newAbstr aflags cats funs = unsafePerformIO $ do
c_aflags <- newFlags aflags pool
(c_cats,abscats) <- newAbsCats (sortByFst3 cats) pool
(c_funs,absfuns) <- newAbsFuns (sortByFst4 funs) pool
c_abs_lin_fun <- newAbsLinFun
c_non_lexical_buf <- gu_make_buf (#size PgfProductionIdxEntry) pool
return (AbstrInfo c_aflags c_cats abscats c_funs absfuns c_abs_lin_fun c_non_lexical_buf touch)
return (B (AbstrInfo c_aflags c_cats abscats c_funs absfuns c_abs_lin_fun c_non_lexical_buf touch))
where
(Builder pool touch) = ?builder
@@ -525,7 +592,7 @@ newAbstr aflags cats funs = unsafePerformIO $ do
data ConcrInfo = ConcrInfo (Ptr GuSeq) (Ptr GuMap) (Ptr GuMap) (Ptr GuSeq) (Ptr GuSeq) (Ptr GuMap) (Ptr PgfConcr -> Ptr GuPool -> IO ()) CInt
newConcr :: (?builder :: Builder s) => AbstrInfo ->
newConcr :: (?builder :: Builder s) => B s AbstrInfo ->
[(String,Literal)] -> -- ^ Concrete syntax flags
[(String,String)] -> -- ^ Printnames
[(FId,[FunId])] -> -- ^ Lindefs
@@ -535,8 +602,8 @@ newConcr :: (?builder :: Builder s) => AbstrInfo ->
[[Symbol]] -> -- ^ Sequences (must be sorted)
[(Cat,FId,FId,[String])] -> -- ^ Concrete categories
FId -> -- ^ The total count of the categories
ConcrInfo
newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cflags printnames lindefs linrefs prods cncfuns sequences cnccats total_cats = unsafePerformIO $ do
B s ConcrInfo
newConcr (B (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _)) cflags printnames lindefs linrefs prods cncfuns sequences cnccats total_cats = unsafePerformIO $ do
c_cflags <- newFlags cflags pool
c_printname <- newMap (#size GuString) gu_string_hasher newUtf8CString
(#size GuString) (pokeString pool)
@@ -553,12 +620,12 @@ newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cf
mapM_ (addLinrefs c_ccats funs_ptr) linrefs
mk_index <- foldM (addProductions c_ccats funs_ptr c_non_lexical_buf) (\concr pool -> return ()) prods
c_cnccats <- newMap (#size GuString) gu_string_hasher newUtf8CString (#size PgfCncCat*) (pokeCncCat c_ccats) (map (\v@(k,_,_,_) -> (k,v)) cnccats) pool
return (ConcrInfo c_cflags c_printname c_ccats c_cncfuns c_seqs c_cnccats mk_index (fromIntegral total_cats))
return (B (ConcrInfo c_cflags c_printname c_ccats c_cncfuns c_seqs c_cnccats mk_index (fromIntegral total_cats)))
where
(Builder pool touch) = ?builder
pokeCncFun seqs_ptr ptr cncfun = do
c_cncfun <- newCncFun absfuns nullPtr cncfun pool
pokeCncFun seqs_ptr ptr cncfun@(funid,_) = do
c_cncfun <- newCncFun absfuns seqs_ptr cncfun pool
poke ptr c_cncfun
pokeSequence c_seq syms = do
@@ -583,7 +650,9 @@ newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cf
(#poke PgfCCat, prods) c_ccat c_prods
pokeProductions c_ccat (c_prods `plusPtr` (#offset GuSeq, data)) 0 (n_prods-1) mk_index prods
where
pokeProductions c_ccat ptr top bot mk_index [] = return mk_index
pokeProductions c_ccat ptr top bot mk_index [] = do
(#poke PgfCCat, n_synprods) c_ccat (fromIntegral top :: CSizeT)
return mk_index
pokeProductions c_ccat ptr top bot mk_index (prod:prods) = do
(is_lexical,c_prod) <- newProduction c_ccats funs_ptr c_non_lexical_buf prod pool
let mk_index' = \concr pool -> do pgf_parser_index concr c_ccat c_prod is_lexical pool
@@ -596,27 +665,29 @@ newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cf
pokeProductions c_ccat ptr top (bot-1) mk_index' prods
pokeRefDefFunId funs_ptr ptr funid = do
let c_fun = funs_ptr `plusPtr` (funid * (#size PgfCncFun))
c_fun <- peek (funs_ptr `plusPtr` (funid * (#size PgfCncFun*)))
(#poke PgfCncFun, absfun) c_fun c_abs_lin_fun
poke ptr c_fun
pokeCncCat c_ccats ptr (name,start,end,labels) = do
let n_lins = fromIntegral (length labels) :: CSizeT
c_cnccat <- gu_malloc_aligned pool
c_cnccat <- gu_malloc_aligned pool
((#size PgfCncCat)+n_lins*(#size GuString))
(#const gu_flex_alignof(PgfCncCat))
case Map.lookup name abscats of
Just c_abscat -> (#poke PgfCncCat, abscat) c_cnccat c_abscat
Nothing -> throwIO (PGFError ("The category "++name++" is not in the abstract syntax"))
c_ccats <- newSequence (#size PgfCCat*) pokeFId [start..end] pool
c_ccats <- newSequence (#size PgfCCat*) (pokeFId c_cnccat) [start..end] pool
(#poke PgfCncCat, cats) c_cnccat c_ccats
(#poke PgfCncCat, n_lins) c_cnccat n_lins
pokeLabels (c_cnccat `plusPtr` (#offset PgfCncCat, labels)) labels
poke ptr c_cnccat
where
pokeFId ptr fid = do
pokeFId c_cnccat ptr fid = do
c_ccat <- getCCat c_ccats fid pool
(#poke PgfCCat, cnccat) c_ccat c_cnccat
poke ptr c_ccat
pokeLabels ptr [] = return []
pokeLabels ptr (l:ls) = do
c_l <- newUtf8CString l pool
@@ -626,10 +697,10 @@ newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cf
newPGF :: (?builder :: Builder s) => [(String,Literal)] ->
AbsName ->
AbstrInfo ->
[(ConcName,ConcrInfo)] ->
B s AbstrInfo ->
[(ConcName,B s ConcrInfo)] ->
B s PGF
newPGF gflags absname (AbstrInfo c_aflags c_cats _ c_funs _ c_abs_lin_fun _ _) concrs =
newPGF gflags absname (B (AbstrInfo c_aflags c_cats _ c_funs _ c_abs_lin_fun _ _)) concrs =
unsafePerformIO $ do
ptr <- gu_malloc_aligned pool
(#size PgfPGF)
@@ -637,7 +708,8 @@ newPGF gflags absname (AbstrInfo c_aflags c_cats _ c_funs _ c_abs_lin_fun _ _) c
c_gflags <- newFlags gflags pool
c_absname <- newUtf8CString absname pool
let c_abstr = ptr `plusPtr` (#offset PgfPGF, abstract)
c_concrs <- newSequence (#size PgfConcr) (pokeConcr c_abstr) concrs pool
c_concrs <- gu_make_seq (#size PgfConcr) (fromIntegral (length concrs)) pool
langs <- pokeConcrs c_abstr (c_concrs `plusPtr` (#offset GuSeq, data)) Map.empty concrs
(#poke PgfPGF, major_version) ptr (2 :: (#type uint16_t))
(#poke PgfPGF, minor_version) ptr (0 :: (#type uint16_t))
(#poke PgfPGF, gflags) ptr c_gflags
@@ -648,11 +720,18 @@ newPGF gflags absname (AbstrInfo c_aflags c_cats _ c_funs _ c_abs_lin_fun _ _) c
(#poke PgfPGF, abstract.abs_lin_fun) ptr c_abs_lin_fun
(#poke PgfPGF, concretes) ptr c_concrs
(#poke PgfPGF, pool) ptr pool
return (B (PGF ptr touch))
return (B (PGF ptr langs touch))
where
(Builder pool touch) = ?builder
pokeConcr c_abstr ptr (name, ConcrInfo c_cflags c_printnames c_ccats c_cncfuns c_seqs c_cnccats mk_index c_total_cats) = do
pokeConcrs c_abstr ptr langs [] = return langs
pokeConcrs c_abstr ptr langs ((name, B info):xs) = do
pokeConcr c_abstr ptr name info
pokeConcrs c_abstr (ptr `plusPtr` (fromIntegral (#size PgfConcr)))
(Map.insert name (Concr ptr touch) langs)
xs
pokeConcr c_abstr ptr name (ConcrInfo c_cflags c_printnames c_ccats c_cncfuns c_seqs c_cnccats mk_index c_total_cats) = do
c_name <- newUtf8CString name pool
c_fun_indices <- gu_make_map (#size GuString) gu_string_hasher
(#size PgfCncOverloadMap*) gu_null_struct
@@ -674,7 +753,9 @@ newPGF gflags absname (AbstrInfo c_aflags c_cats _ c_funs _ c_abs_lin_fun _ _) c
(#poke PgfConcr, cnccats) ptr c_cnccats
(#poke PgfConcr, total_cats) ptr c_total_cats
(#poke PgfConcr, pool) ptr nullPtr
mk_index ptr pool
pgf_concrete_fix_internals ptr
newFlags :: [(String,Literal)] -> Ptr GuPool -> IO (Ptr GuSeq)
@@ -715,15 +796,15 @@ newLiteral (LFlt val) pool =
newProduction :: Ptr GuMap -> Ptr PgfCncFun -> Ptr GuBuf -> Production -> Ptr GuPool -> IO ((#type bool), GuVariant)
newProduction c_ccats funs_ptr c_non_lexical_buf (PApply fun_id args) pool =
newProduction c_ccats funs_ptr c_non_lexical_buf (PApply funid args) pool =
alloca $ \pptr -> do
let c_fun = funs_ptr `plusPtr` (fun_id * (#size PgfCncFun))
c_fun <- peek (funs_ptr `plusPtr` (funid * (#size PgfCncFun*)))
c_args <- newSequence (#size PgfPArg) pokePArg args pool
ptr <- gu_alloc_variant (#const PGF_PRODUCTION_APPLY)
(fromIntegral (#size PgfProductionApply))
(#const gu_alignof(PgfProductionApply))
pptr pool
(#poke PgfProductionApply, fun) ptr c_fun
(#poke PgfProductionApply, fun) ptr (c_fun :: Ptr PgfCncFun)
(#poke PgfProductionApply, args) ptr c_args
is_lexical <- pgf_production_is_lexical ptr c_non_lexical_buf pool
c_prod <- peek pptr
@@ -732,7 +813,7 @@ newProduction c_ccats funs_ptr c_non_lexical_buf (PApply fun_id args) pool =
pokePArg ptr (PArg hypos ccat) = do
c_ccat <- getCCat c_ccats ccat pool
(#poke PgfPArg, ccat) ptr c_ccat
c_hypos <- newSequence (#size PgfCCat*) pokeCCat hypos pool
c_hypos <- newSequence (#size PgfCCat*) pokeCCat (map snd hypos) pool
(#poke PgfPArg, hypos) ptr c_hypos
pokeCCat ptr ccat = do
@@ -907,12 +988,18 @@ newMap key_size hasher newKey elem_size pokeElem values pool = do
insert map values pool
unionPGF :: PGF -> PGF -> Maybe PGF
unionPGF one@(PGF ptr1 langs1 touch1) two@(PGF ptr2 langs2 touch2)
| pgf_have_same_abstract ptr1 ptr2 /= 0 = Just (PGF ptr1 (Map.union langs1 langs2) (touch1 >> touch2))
| otherwise = Nothing
writePGF :: FilePath -> PGF -> IO ()
writePGF fpath p = do
pool <- gu_new_pool
exn <- gu_new_exn pool
withCString fpath $ \c_fpath ->
pgf_write (pgf p) c_fpath exn
withArrayLen ((map concr . Map.elems . languages) p) $ \n_concrs concrs ->
withCString fpath $ \c_fpath ->
pgf_write (pgf p) (fromIntegral n_concrs) concrs c_fpath exn
touchPGF p
failed <- gu_exn_is_raised exn
if failed
@@ -927,6 +1014,26 @@ writePGF fpath p = do
else do gu_pool_free pool
return ()
writeConcr :: FilePath -> Concr -> IO ()
writeConcr fpath c = do
pool <- gu_new_pool
exn <- gu_new_exn pool
withCString fpath $ \c_fpath ->
pgf_concrete_save (concr c) c_fpath exn
touchConcr c
failed <- gu_exn_is_raised exn
if failed
then do is_errno <- gu_exn_caught exn gu_exn_type_GuErrno
if is_errno
then do perrno <- (#peek GuExn, data.data) exn
errno <- peek perrno
gu_pool_free pool
ioError (errnoToIOError "writeConcr" (Errno errno) Nothing (Just fpath))
else do gu_pool_free pool
throwIO (PGFError "The grammar cannot be stored")
else do gu_pool_free pool
return ()
sortByFst = sortBy (\(x,_) (y,_) -> compare x y)
sortByFst3 = sortBy (\(x,_,_) (y,_,_) -> compare x y)
sortByFst4 = sortBy (\(x,_,_,_) (y,_,_,_) -> compare x y)

View File

@@ -17,11 +17,18 @@ import PGF2.FFI
data Type = Type {typ :: PgfExpr, touchType :: Touch}
-- | 'Hypo' represents a hypothesis in a type i.e. in the type A -> B, A is the hypothesis
type Hypo = (BindType,CId,Type)
type Hypo = (BindType,String,Type)
instance Show Type where
show = showType []
instance Eq Type where
(Type ty1 ty1_touch) == (Type ty2 ty2_touch) =
unsafePerformIO $ do
res <- pgf_type_eq ty1 ty2
ty1_touch >> ty2_touch
return (res /= 0)
-- | parses a 'String' as a type
readType :: String -> Maybe Type
readType str =
@@ -43,7 +50,7 @@ readType str =
-- of identifiers is the list of all free variables
-- in the type in order reverse to the order
-- of binding.
showType :: [CId] -> Type -> String
showType :: [String] -> Type -> String
showType scope (Type ty touch) =
unsafePerformIO $
withGuPool $ \tmpPl ->
@@ -59,7 +66,7 @@ showType scope (Type ty touch) =
-- a list of arguments for the category. The operation
-- @mkType [h_1,...,h_n] C [e_1,...,e_m]@ will create
-- @h_1 -> ... -> h_n -> C e_1 ... e_m@
mkType :: [Hypo] -> CId -> [Expr] -> Type
mkType :: [Hypo] -> String -> [Expr] -> Type
mkType hypos cat exprs = unsafePerformIO $ do
typPl <- gu_new_pool
let n_exprs = fromIntegral (length exprs) :: CSizeT
@@ -94,7 +101,7 @@ touchHypo (_,_,ty) = touchType ty
-- | Decomposes a type into a list of hypothesises, a category and
-- a list of arguments for the category.
unType :: Type -> ([Hypo],CId,[Expr])
unType :: Type -> ([Hypo],String,[Expr])
unType (Type c_type touch) = unsafePerformIO $ do
cid <- (#peek PgfType, cid) c_type >>= peekUtf8CString
c_hypos <- (#peek PgfType, hypos) c_type
@@ -127,7 +134,7 @@ unType (Type c_type touch) = unsafePerformIO $ do
-- of identifiers is the list of all free variables
-- in the type in order reverse to the order
-- of binding.
showContext :: [CId] -> [Hypo] -> String
showContext :: [String] -> [Hypo] -> String
showContext scope hypos =
unsafePerformIO $
withGuPool $ \tmpPl ->

View File

@@ -1,31 +1,31 @@
name: pgf2
version: 0.1.0.0
-- synopsis:
-- description:
-- synopsis:
-- description:
homepage: http://www.grammaticalframework.org
license: LGPL-3
--license-file: LICENSE
author: Krasimir Angelov, Inari
maintainer:
-- copyright:
maintainer:
-- copyright:
category: Language
build-type: Simple
extra-source-files: README
cabal-version: >=1.10
library
exposed-modules: PGF2, PGF2.Internal, SG
exposed-modules: PGF2, PGF2.Internal, SG,
-- backwards compatibility API:
--, PGF, PGF.Internal
PGF, PGF.Internal
other-modules: PGF2.FFI, PGF2.Expr, PGF2.Type, SG.FFI
build-depends: base >=4.3,
containers, pretty
-- hs-source-dirs:
build-depends: base >=4.3, containers, pretty, array
-- hs-source-dirs:
default-language: Haskell2010
build-tools: hsc2hs
extra-libraries: sg pgf gu
cc-options: -std=c99
default-language: Haskell2010
c-sources: utils.c
executable pgf-shell

View File

@@ -16,8 +16,7 @@
module PGF(
-- * PGF
PGF,
readPGF,
parsePGF,
readPGF, showPGF,
-- * Identifiers
CId, mkCId, wildCId,
@@ -54,12 +53,14 @@ module PGF(
mkDouble, unDouble,
mkFloat, unFloat,
mkMeta, unMeta,
exprSubstitute,
-- extra
pExpr, exprSize, exprFunctions,
-- * Operations
-- ** Linearization
linearize, linearizeAllLang, linearizeAll, bracketedLinearize, bracketedLinearizeAll, tabularLinearizes,
linearize, linearizeAllLang, linearizeAll, bracketedLinearize, tabularLinearizes,
groupResults, -- lins of trees by language, removing duplicates
showPrintName,
@@ -166,17 +167,18 @@ import PGF.Macros
import PGF.Expr (Tree)
import PGF.Morphology
import PGF.Data
import PGF.Binary ()
import PGF.Binary()
import qualified PGF.Forest as Forest
import qualified PGF.Parse as Parse
import PGF.Utilities(replace)
import PGF.Printer
import Text.PrettyPrint
--import Data.Char
import qualified Data.Map as Map
--import qualified Data.IntMap as IntMap
--import Data.Maybe
import Data.Binary
import Data.ByteString.Lazy (ByteString)
import Data.List(mapAccumL)
--import System.Random (newStdGen)
--import Control.Monad
@@ -192,11 +194,6 @@ import Text.PrettyPrint
-- > $ gf -make <grammar file name>
readPGF :: FilePath -> IO PGF
-- | Like @readPGF@ but you have the manage file-handling.
--
-- @since 3.9.1
parsePGF :: ByteString -> PGF
-- | Tries to parse the given string in the specified language
-- and to produce abstract syntax expression.
parse :: PGF -> Language -> Type -> String -> [Tree]
@@ -261,9 +258,9 @@ functionType :: PGF -> CId -> Maybe Type
-- Implementation
---------------------------------------------------
readPGF = decodeFile
readPGF f = decodeFile f
parsePGF = decode
showPGF pgf = render (ppPGF pgf)
parse pgf lang typ s =
case parse_ pgf lang typ (Just 4) s of

View File

@@ -2,7 +2,7 @@ module PGF.ByteCode(Literal(..),
CodeLabel, Instr(..), IVal(..), TailInfo(..),
ppLit, ppCode, ppInstr
) where
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
import PGF.CId
import Text.PrettyPrint

View File

@@ -74,7 +74,7 @@ data Production
deriving (Eq,Ord,Show)
data PArg = PArg [(FId,FId)] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
data CncCat = CncCat {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !(Array LIndex String)
data CncFun = CncFun [CId] {-# UNPACK #-} !(UArray LIndex SeqId) deriving (Eq,Ord,Show)
data CncFun = CncFun CId {-# UNPACK #-} !(UArray LIndex SeqId) deriving (Eq,Ord,Show)
type Sequence = Array DotPos Symbol
type FunId = Int
type SeqId = Int
@@ -93,14 +93,6 @@ msgUnionPGF one two = case absname one of
_ -> (two, -- abstracts don't match, discard the old one -- error msg in Importing.ioUnionPGF
Just "Abstract changed, previous concretes discarded.")
emptyPGF :: PGF
emptyPGF = PGF {
gflags = Map.empty,
absname = wildCId,
abstract = error "empty grammar, no abstract",
concretes = Map.empty
}
-- sameness of function type signatures, checked when importing a new concrete in env
haveSameFunsPGF :: PGF -> PGF -> Bool
haveSameFunsPGF one two =

View File

@@ -8,6 +8,7 @@ module PGF.Expr(Tree, BindType(..), Expr(..), Literal(..), Patt(..), Equation(..
mkDouble, unDouble,
mkFloat, unFloat,
mkMeta, unMeta,
exprSubstitute,
normalForm,
@@ -169,6 +170,16 @@ unMeta (ETyped e ty) = unMeta e
unMeta (EImplArg e) = unMeta e
unMeta _ = Nothing
exprSubstitute :: Expr -> [Expr] -> Expr
exprSubstitute e es =
case e of
EAbs b x e -> EAbs b x (exprSubstitute e es)
EApp e1 e2 -> EApp (exprSubstitute e1 es) (exprSubstitute e2 es)
ELit l -> ELit l
EMeta i -> es !! i
EFun x -> EFun x
-----------------------------------------------------
-- Parsing
-----------------------------------------------------

View File

@@ -71,10 +71,10 @@ bracketedTokn dp f@(Forest abs cnc forest root) =
in (ct,fid',fun,es,(map getVar hypos,lin))
Nothing -> error ("wrong forest id " ++ show fid)
where
descend forest (PApply funid args) = let (CncFun pfuns _lins) = cncfuns cnc ! funid
cat = case pfuns of
[] -> wildCId
(pfun:_) -> case Map.lookup pfun (funs abs) of
descend forest (PApply funid args) = let (CncFun fun _lins) = cncfuns cnc ! funid
cat = case isLindefCId fun of
Just cat -> cat
Nothing -> case Map.lookup fun (funs abs) of
Just (DTyp _ cat _,_,_,_) -> cat
largs = map (render forest) args
ltable = mkLinTable cnc isTrusted [] funid largs
@@ -103,6 +103,14 @@ bracketedTokn dp f@(Forest abs cnc forest root) =
descend (PCoerce fid) = trustedSpots parents' (PArg [] fid)
descend (PConst c e _) = IntSet.empty
isLindefCId id
| take l s == lindef = Just (mkCId (drop l s))
| otherwise = Nothing
where
s = showCId id
lindef = "lindef "
l = length lindef
-- | This function extracts the list of all completed parse trees
-- that spans the whole input consumed so far. The trees are also
-- limited by the category specified, which is usually
@@ -124,13 +132,13 @@ getAbsTrees (Forest abs cnc forest root) arg@(PArg _ fid) ty dp =
| otherwise = do fid0 <- get
put fid
x <- foldForest (\funid args trees ->
do let CncFun fns _lins = cncfuns cnc ! funid
case fns of
[] -> do arg <- go (Set.insert fid rec_) scope mb_tty (head args)
do let CncFun fn _lins = cncfuns cnc ! funid
case isLindefCId fn of
Just _ -> do arg <- go (Set.insert fid rec_) scope mb_tty (head args)
return (mkAbs arg)
fns -> do ty_fn <- lookupFunType (head fns)
Nothing -> do ty_fn <- lookupFunType fn
(e,tty0) <- foldM (\(e1,tty) arg -> goArg (Set.insert fid rec_) scope fid e1 arg tty)
(EFun (head fns),TTyp [] ty_fn) args
(EFun fn,TTyp [] ty_fn) args
case mb_tty of
Just tty -> do i <- newGuardedMeta e
eqType scope (scopeSize scope) i tty tty0

View File

@@ -1,19 +1,169 @@
{-# OPTIONS_HADDOCK hide #-}
{-# LANGUAGE ImplicitParams, RankNTypes #-}
-------------------------------------------------
-- |
-- Stability : unstable
--
-------------------------------------------------
module PGF.Internal(module Internal) where
import PGF.Binary as Internal
import PGF.Data as Internal
import PGF.Macros as Internal
import PGF.Optimize as Internal
import PGF.Printer as Internal
import PGF.Utilities as Internal
import PGF.ByteCode as Internal
module PGF.Internal(CId,Language,PGF,
Concr,lookConcr,
FId,isPredefFId,
FunId,SeqId,LIndex,Token,
Production(..),PArg(..),Symbol(..),Literal(..),BindType(..),PGF.Internal.Sequence,
globalFlags, abstrFlags, concrFlags,
concrTotalCats, concrCategories, concrProductions,
concrTotalFuns, concrFunction,
concrTotalSeqs, concrSequence,
import Data.Binary as Internal
import Data.Binary.Get as Internal
import Data.Binary.IEEE754 as Internal
import Data.Binary.Put as Internal
CodeLabel, Instr(..), IVal(..), TailInfo(..),
Builder, B, build,
eAbs, eApp, eMeta, eFun, eVar, eLit, eTyped, eImplArg, dTyp, hypo,
AbstrInfo, newAbstr, ConcrInfo, newConcr, newPGF,
dTyp, hypo,
fidString, fidInt, fidFloat, fidVar, fidStart,
ppFunId, ppSeqId, ppFId, ppMeta, ppLit, PGF.Internal.ppSeq
) where
import PGF.Data
import PGF.Macros
import PGF.Printer
import PGF.ByteCode
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Data.Array.IArray
import Text.PrettyPrint
globalFlags pgf = gflags pgf
abstrFlags pgf = aflags (abstract pgf)
concrFlags concr = cflags concr
concrTotalCats = totalCats
concrCategories :: Concr -> [(CId,FId,FId,[String])]
concrCategories c = [(cat,start,end,elems lbls) | (cat,CncCat start end lbls) <- Map.toList (cnccats c)]
concrTotalFuns c =
let (s,e) = bounds (cncfuns c)
in e-s+1
concrFunction :: Concr -> FunId -> (CId,[SeqId])
concrFunction c funid =
let CncFun fun lins = cncfuns c ! funid
in (fun,elems lins)
concrTotalSeqs :: Concr -> SeqId
concrTotalSeqs c =
let (s,e) = bounds (sequences c)
in e-s+1
type Sequence = [Symbol]
concrSequence :: Concr -> SeqId -> [Symbol]
concrSequence c seqid = elems (sequences c ! seqid)
concrProductions :: Concr -> FId -> [Production]
concrProductions c fid =
case IntMap.lookup fid (productions c) of
Just set -> Set.toList set
Nothing -> []
data Builder s
newtype B s a = B a
build :: (forall s . (?builder :: Builder s) => B s a) -> a
build x = let ?builder = undefined
in case x of
B x -> x
eAbs :: (?builder :: Builder s) => BindType -> CId -> B s Expr -> B s Expr
eAbs bind_type var (B body) = B (EAbs bind_type var body)
eApp :: (?builder :: Builder s) => B s Expr -> B s Expr -> B s Expr
eApp (B f) (B x) = B (EApp f x)
eMeta :: (?builder :: Builder s) => Int -> B s Expr
eMeta i = B (EMeta i)
eFun :: (?builder :: Builder s) => CId -> B s Expr
eFun f = B (EFun f)
eVar :: (?builder :: Builder s) => Int -> B s Expr
eVar i = B (EVar i)
eLit :: (?builder :: Builder s) => Literal -> B s Expr
eLit l = B (ELit l)
eTyped :: (?builder :: Builder s) => B s Expr -> B s Type -> B s Expr
eTyped (B e) (B ty) = B (ETyped e ty)
eImplArg :: (?builder :: Builder s) => B s Expr -> B s Expr
eImplArg (B e) = B (EImplArg e)
hypo :: BindType -> CId -> B s Type -> (B s Hypo)
hypo bind_type var (B ty) = B (bind_type,var,ty)
dTyp :: (?builder :: Builder s) => [B s Hypo] -> CId -> [B s Expr] -> B s Type
dTyp hypos cat es = B (DTyp [hypo | B hypo <- hypos] cat [e | B e <- es])
type AbstrInfo = Abstr
newAbstr :: (?builder :: Builder s) => [(CId,Literal)] ->
[(CId,[B s Hypo],Float)] ->
[(CId,B s Type,Int,Float)] ->
B s AbstrInfo
newAbstr aflags cats funs = B (Abstr (Map.fromList aflags)
(Map.fromList [(fun,(ty,arity,Nothing,realToFrac prob)) | (fun,B ty,arity,prob) <- funs])
(Map.fromList [(cat,([hypo | B hypo <- hypos],[],realToFrac prob)) | (cat,hypos,prob) <- cats]))
type ConcrInfo = Concr
newConcr :: (?builder :: Builder s) => B s AbstrInfo ->
[(CId,Literal)] -> -- ^ Concrete syntax flags
[(CId,String)] -> -- ^ Printnames
[(FId,[FunId])] -> -- ^ Lindefs
[(FId,[FunId])] -> -- ^ Linrefs
[(FId,[Production])] -> -- ^ Productions
[(CId,[SeqId])] -> -- ^ Concrete functions (must be sorted by Fun)
[[Symbol]] -> -- ^ Sequences (must be sorted)
[(CId,FId,FId,[String])] -> -- ^ Concrete categories
FId -> -- ^ The total count of the categories
B s ConcrInfo
newConcr _ cflags printnames lindefs linrefs productions cncfuns sequences cnccats totalCats =
B (Concr {cflags = Map.fromList cflags
,printnames = Map.fromList printnames
,lindefs = IntMap.fromList lindefs
,linrefs = IntMap.fromList linrefs
,productions = IntMap.fromList [(fid,Set.fromList prods) | (fid,prods) <- productions]
,cncfuns = mkArray [CncFun fun (mkArray lins) | (fun,lins) <- cncfuns]
,sequences = mkArray (map mkArray sequences)
,cnccats = Map.fromList [(cat,CncCat s e (mkArray lbls)) | (cat,s,e,lbls) <- cnccats]
,totalCats = totalCats
})
{-
pproductions :: IntMap.IntMap (Set.Set Production), -- productions needed for parsing
lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)), -- productions needed for linearization
lexicon :: IntMap.IntMap (IntMap.IntMap (TMap.TrieMap Token IntSet.IntSet)),
-}
newPGF :: (?builder :: Builder s) => [(CId,Literal)] ->
CId ->
B s AbstrInfo ->
[(CId,B s ConcrInfo)] ->
B s PGF
newPGF gflags absname (B abstract) concretes =
B (PGF {gflags = Map.fromList gflags
,absname = absname
,abstract = abstract
,concretes = Map.fromList [(cname,concr) | (cname,B concr) <- concretes]
})
ppSeq (seqid,seq) = PGF.Printer.ppSeq (seqid,mkArray seq)
mkArray l = listArray (0,length l-1) l

View File

@@ -4,7 +4,6 @@ module PGF.Linearize
, linearizeAll
, linearizeAllLang
, bracketedLinearize
, bracketedLinearizeAll
, tabularLinearizes
) where
@@ -48,12 +47,6 @@ bracketedLinearize pgf lang = head . map (snd . untokn Nothing . firstLin cnc) .
head [] = []
head (bs:bss) = bs
-- | Linearizes given expression as a bracketed string in the language
bracketedLinearizeAll :: PGF -> Language -> Tree -> [[BracketedString]]
bracketedLinearizeAll pgf lang = map (snd . untokn Nothing . firstLin cnc) . linTree pgf cnc
where
cnc = lookMap (error "no lang") lang (concretes pgf)
firstLin cnc arg@(ct@(cat,n_fid),fid,fun,es,(xs,lin)) =
case IntMap.lookup fid (linrefs cnc) of
Just (funid:_) -> snd (mkLinTable cnc (const True) [] funid [arg]) ! 0

View File

@@ -1,5 +1,4 @@
module PGF.Macros where
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
import PGF.CId
import PGF.Data

View File

@@ -31,8 +31,7 @@ collectWords pinfo = Map.fromListWith (++)
[(t, [(fun,lbls ! l)]) | (CncCat s e lbls) <- Map.elems (cnccats pinfo)
, fid <- [s..e]
, PApply funid _ <- maybe [] Set.toList (IntMap.lookup fid (productions pinfo))
, let CncFun funs lins = cncfuns pinfo ! funid
, fun <- funs
, let CncFun fun lins = cncfuns pinfo ! funid
, (l,seqid) <- assocs lins
, sym <- elems (sequences pinfo ! seqid)
, t <- sym2tokns sym]

View File

@@ -60,7 +60,7 @@ getConcr =
cnccats <- getMap getCId getCncCat
totalCats <- get
let rseq = listToArray [SymCat 0 0]
rfun = CncFun [mkCId "linref"] (listToArray [scnt])
rfun = CncFun (mkCId "linref") (listToArray [scnt])
linrefs = IntMap.fromList [(i,[fcnt])|i<-[0..totalCats-1]]
return (Concr{ cflags=cflags, printnames=printnames
, sequences=toArray (scnt+1,seqs++[rseq])
@@ -110,7 +110,7 @@ getBindType =
1 -> return Implicit
_ -> decodingError "getBindType"
getCncFun = liftM2 CncFun (fmap (:[]) getCId) (getArray get)
getCncFun = liftM2 CncFun getCId (getArray get)
getCncCat = liftM3 CncCat get get (getArray get)

View File

@@ -21,7 +21,6 @@ import qualified Data.IntMap as IntMap
import qualified PGF.TrieMap as TrieMap
import qualified Data.List as List
import Control.Monad.ST
import Debug.Trace
optimizePGF :: PGF -> PGF
optimizePGF pgf = pgf{concretes=fmap (updateConcrete (abstract pgf) .
@@ -179,26 +178,26 @@ topDownFilter startCat cnc =
bottomUpFilter :: Concr -> Concr
bottomUpFilter cnc = cnc{productions=filterProductions IntMap.empty (productions cnc)}
bottomUpFilter cnc = cnc{productions=filterProductions IntMap.empty IntSet.empty (productions cnc)}
filterProductions prods0 prods
filterProductions prods0 hoc0 prods
| prods0 == prods1 = prods0
| otherwise = filterProductions prods1 prods
| otherwise = filterProductions prods1 hoc1 prods
where
prods1 = IntMap.foldWithKey foldProdSet IntMap.empty prods
hoc = IntMap.fold (\set !hoc -> Set.fold accumHOC hoc set) IntSet.empty prods
(prods1,hoc1) = IntMap.foldWithKey foldProdSet (IntMap.empty,IntSet.empty) prods
foldProdSet fid set !prods
| Set.null set1 = prods
| otherwise = IntMap.insert fid set1 prods
foldProdSet fid set (!prods,!hoc)
| Set.null set1 = (prods,hoc)
| otherwise = (IntMap.insert fid set1 prods,hoc1)
where
set1 = Set.filter filterRule set
hoc1 = Set.fold accumHOC hoc set1
filterRule (PApply funid args) = all (\(PArg _ fid) -> isLive fid) args
filterRule (PCoerce fid) = isLive fid
filterRule _ = True
isLive fid = isPredefFId fid || IntMap.member fid prods0 || IntSet.member fid hoc
isLive fid = isPredefFId fid || IntMap.member fid prods0 || IntSet.member fid hoc0
accumHOC (PApply funid args) hoc = List.foldl' (\hoc (PArg hypos _) -> List.foldl' (\hoc (_,fid) -> IntSet.insert fid hoc) hoc hypos) hoc args
accumHOC _ hoc = hoc
@@ -242,7 +241,7 @@ splitLexicalRules cnc p_prods =
seq2prefix (SymALL_CAPIT :syms) = TrieMap.fromList [wf ["&|"]]
updateConcrete abs cnc =
let p_prods0 = filterProductions IntMap.empty (productions cnc)
let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc)
(lex,p_prods) = splitLexicalRules cnc p_prods0
l_prods = linIndex cnc p_prods0
in cnc{pproductions = p_prods, lproductions = l_prods, lexicon = lex}
@@ -253,7 +252,7 @@ updateConcrete abs cnc =
, prod <- Set.toList prods
, fun <- getFunctions prod]
where
getFunctions (PApply funid args) = let CncFun funs _ = cncfuns cnc ! funid in funs
getFunctions (PApply funid args) = let CncFun fun _ = cncfuns cnc ! funid in [fun]
getFunctions (PCoerce fid) = case IntMap.lookup fid productions of
Nothing -> []
Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod]

View File

@@ -503,14 +503,14 @@ type Continuation = TrieMap.TrieMap Token ActiveSet
-- | Return the Continuation of a Parsestate with exportable types
-- Used by PGFService
getContinuationInfo :: ParseState -> Map.Map [Token] [(FunId, CId, String)]
getContinuationInfo pstate = Map.map (concatMap f . Set.toList) contMap
getContinuationInfo pstate = Map.map (map f . Set.toList) contMap
where
PState _abstr concr _chart cont = pstate
contMap = Map.fromList (TrieMap.toList cont) -- always get [([], _::ActiveSet)]
f :: Active -> [(FunId,CId,String)]
f (Active int dotpos funid seqid pargs ak) = [(funid, fn, seq) | fn <- fns]
f :: Active -> (FunId,CId,String)
f (Active int dotpos funid seqid pargs ak) = (funid, cid, seq)
where
CncFun fns _ = cncfuns concr ! funid
CncFun cid _ = cncfuns concr ! funid
seq = showSeq dotpos (sequences concr ! seqid)
showSeq :: DotPos -> Sequence -> String

View File

@@ -1,6 +1,5 @@
{-# LANGUAGE FlexibleContexts #-}
module PGF.Printer (ppPGF,ppCat,ppFId,ppFunId,ppSeqId,ppSeq,ppFun) where
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
import PGF.CId
import PGF.Data
@@ -73,8 +72,8 @@ ppProduction (fid,PCoerce arg) =
ppProduction (fid,PConst _ _ ss) =
ppFId fid <+> text "->" <+> ppStrs ss
ppCncFun (funid,CncFun funs arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (hsep (map ppCId funs))
ppCncFun (funid,CncFun fun arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
ppLinDefs (fid,funids) =
[ppFId fid <+> text "->" <+> ppFunId funid <> brackets (ppFId fidVar) | funid <- funids]
@@ -82,6 +81,7 @@ ppLinDefs (fid,funids) =
ppLinRefs (fid,funids) =
[ppFId fidVar <+> text "->" <+> ppFunId funid <> brackets (ppFId fid) | funid <- funids]
ppSeq :: (SeqId,Sequence) -> Doc
ppSeq (seqid,seq) =
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))

View File

@@ -23,7 +23,6 @@ module PGF.VisualizeTree
, gizaAlignment
, conlls2latexDoc
) where
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
import PGF.CId (wildCId,showCId,ppCId,mkCId) --CId,pCId,
import PGF.Data

View File

@@ -1,5 +1,5 @@
name: pgf
version: 3.9.1-git
version: 3.9-git
cabal-version: >= 1.20
build-type: Simple
@@ -8,7 +8,7 @@ category: Natural Language Processing
synopsis: Grammatical Framework
description: A library for interpreting the Portable Grammar Format (PGF)
homepage: http://www.grammaticalframework.org/
bug-reports: https://github.com/GrammaticalFramework/gf-core/issues
bug-reports: https://github.com/GrammaticalFramework/GF/issues
maintainer: Thomas Hallgren
tested-with: GHC==7.6.3, GHC==7.8.3, GHC==7.10.3, GHC==8.0.2
@@ -30,6 +30,7 @@ Library
exceptions
if flag(custom-binary)
hs-source-dirs: ., binary
other-modules:
-- not really part of GF but I have changed the original binary library
-- and we have to keep the copy for now.
@@ -45,9 +46,9 @@ Library
--if impl(ghc>=7.8)
-- ghc-options: +RTS -A20M -RTS
ghc-prof-options: -fprof-auto
extensions:
extensions:
exposed-modules:
exposed-modules:
PGF
PGF.Internal
PGF.Haskell

View File

@@ -2619,6 +2619,21 @@ PGF_dealloc(PGFObject* self)
Py_TYPE(self)->tp_free((PyObject*)self);
}
typedef struct {
GuMapItor fn;
PGFObject* grammar;
void* collection;
} PyPGFClosure;
static void
pgf_collect_langs_seq(GuMapItor* fn, const void* key, void* value, GuExn* err)
{
PgfConcr* concr = *((PgfConcr**) value);
PyPGFClosure* clo = (PyPGFClosure*) fn;
gu_buf_push((GuBuf*) clo->collection, PgfConcr*, concr);
}
static PyObject *
PGF_repr(PGFObject *self)
{
@@ -2628,7 +2643,14 @@ PGF_repr(PGFObject *self)
GuStringBuf* sbuf = gu_new_string_buf(tmp_pool);
GuOut* out = gu_string_buf_out(sbuf);
pgf_print(self->pgf, out, err);
GuBuf* languages = gu_new_buf(PgfConcr*, tmp_pool);
PyPGFClosure clo = { { pgf_collect_langs_seq }, self, languages };
pgf_iter_languages(self->pgf, &clo.fn, err);
pgf_print(self->pgf, gu_buf_length(languages),
gu_buf_data(languages),
out, err);
PyObject* pystr = PyString_FromStringAndSize(gu_string_buf_data(sbuf),
gu_string_buf_length(sbuf));
@@ -2643,14 +2665,8 @@ PGF_getAbstractName(PGFObject *self, void *closure)
return PyString_FromString(pgf_abstract_name(self->pgf));
}
typedef struct {
GuMapItor fn;
PGFObject* grammar;
PyObject* object;
} PyPGFClosure;
static void
pgf_collect_langs(GuMapItor* fn, const void* key, void* value, GuExn* err)
pgf_collect_langs_dict(GuMapItor* fn, const void* key, void* value, GuExn* err)
{
PgfCId name = (PgfCId) key;
PgfConcr* concr = *((PgfConcr**) value);
@@ -2675,7 +2691,7 @@ pgf_collect_langs(GuMapItor* fn, const void* key, void* value, GuExn* err)
((ConcrObject *) py_lang)->grammar = clo->grammar;
Py_INCREF(clo->grammar);
if (PyDict_SetItem(clo->object, py_name, py_lang) != 0) {
if (PyDict_SetItem((PyObject*) clo->collection, py_name, py_lang) != 0) {
gu_raise(err, PgfExn);
goto end;
}
@@ -2697,7 +2713,7 @@ PGF_getLanguages(PGFObject *self, void *closure)
// Create an exception frame that catches all errors.
GuExn* err = gu_new_exn(tmp_pool);
PyPGFClosure clo = { { pgf_collect_langs }, self, languages };
PyPGFClosure clo = { { pgf_collect_langs_dict }, self, languages };
pgf_iter_languages(self->pgf, &clo.fn, err);
if (!gu_ok(err)) {
Py_DECREF(languages);
@@ -2727,7 +2743,7 @@ pgf_collect_cats(GuMapItor* fn, const void* key, void* value, GuExn* err)
goto end;
}
if (PyList_Append(clo->object, py_name) != 0) {
if (PyList_Append((PyObject*) clo->collection, py_name) != 0) {
gu_raise(err, PgfExn);
goto end;
}
@@ -2794,7 +2810,7 @@ pgf_collect_funs(GuMapItor* fn, const void* key, void* value, GuExn* err)
goto end;
}
if (PyList_Append(clo->object, py_name) != 0) {
if (PyList_Append((PyObject*) clo->collection, py_name) != 0) {
gu_raise(err, PgfExn);
goto end;
}
@@ -3142,7 +3158,7 @@ pgf_embed_funs(GuMapItor* fn, const void* key, void* value, GuExn* err)
Py_INCREF(pyexpr->master);
if (PyModule_AddObject(clo->object, name, (PyObject*) pyexpr) != 0) {
if (PyModule_AddObject((PyObject*) clo->collection, name, (PyObject*) pyexpr) != 0) {
Py_DECREF(pyexpr);
gu_raise(err, PgfExn);
}