mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-13 05:02:50 -06:00
manually copy the "c-runtime" branch from the old repository.
This commit is contained in:
@@ -87,14 +87,13 @@ libpgf_la_SOURCES = \
|
||||
pgf/graphviz.c \
|
||||
pgf/aligner.c \
|
||||
pgf/pgf.c \
|
||||
pgf/pgf.h
|
||||
libpgf_la_LDFLAGS = -no-undefined
|
||||
pgf/pgf.h \
|
||||
libpgf_la_LDFLAGS = "-no-undefined"
|
||||
libpgf_la_LIBADD = libgu.la
|
||||
|
||||
libsg_la_SOURCES = \
|
||||
sg/sqlite3Btree.c \
|
||||
sg/sg.c
|
||||
libsg_la_LDFLAGS = -no-undefined
|
||||
libsg_la_LIBADD = libgu.la libpgf.la
|
||||
|
||||
bin_PROGRAMS =
|
||||
|
||||
@@ -23,14 +23,6 @@
|
||||
|
||||
#define restrict __restrict
|
||||
|
||||
#elif defined(__MINGW32__)
|
||||
|
||||
#define GU_API_DECL
|
||||
#define GU_API
|
||||
|
||||
#define GU_INTERNAL_DECL
|
||||
#define GU_INTERNAL
|
||||
|
||||
#else
|
||||
|
||||
#define GU_API_DECL
|
||||
@@ -38,9 +30,7 @@
|
||||
|
||||
#define GU_INTERNAL_DECL __attribute__ ((visibility ("hidden")))
|
||||
#define GU_INTERNAL __attribute__ ((visibility ("hidden")))
|
||||
|
||||
#endif
|
||||
|
||||
// end MSVC workaround
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
@@ -30,8 +30,8 @@ pgf_expr_unwrap(PgfExpr expr)
|
||||
}
|
||||
}
|
||||
|
||||
PGF_API int
|
||||
pgf_expr_arity(PgfExpr expr)
|
||||
static PgfExprTag
|
||||
pgf_expr_arity(PgfExpr expr, int *arity)
|
||||
{
|
||||
int n = 0;
|
||||
while (true) {
|
||||
@@ -44,10 +44,9 @@ pgf_expr_arity(PgfExpr expr)
|
||||
n = n + 1;
|
||||
break;
|
||||
}
|
||||
case PGF_EXPR_FUN:
|
||||
return n;
|
||||
default:
|
||||
return -1;
|
||||
*arity = n;
|
||||
return i.tag;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -55,8 +54,8 @@ pgf_expr_arity(PgfExpr expr)
|
||||
PGF_API PgfApplication*
|
||||
pgf_expr_unapply(PgfExpr expr, GuPool* pool)
|
||||
{
|
||||
int arity = pgf_expr_arity(expr);
|
||||
if (arity < 0) {
|
||||
int arity;
|
||||
if (pgf_expr_arity(expr, &arity) != PGF_EXPR_FUN) {
|
||||
return NULL;
|
||||
}
|
||||
PgfApplication* appl = gu_new_flex(pool, PgfApplication, args, arity);
|
||||
@@ -68,13 +67,38 @@ pgf_expr_unapply(PgfExpr expr, GuPool* pool)
|
||||
appl->args[n] = app->arg;
|
||||
expr = app->fun;
|
||||
}
|
||||
PgfExpr e = pgf_expr_unwrap(expr);
|
||||
gu_assert(gu_variant_tag(e) == PGF_EXPR_FUN);
|
||||
PgfExprFun* fun = gu_variant_data(e);
|
||||
appl->efun = pgf_expr_unwrap(expr);
|
||||
gu_assert(gu_variant_tag(appl->efun) == PGF_EXPR_FUN);
|
||||
PgfExprFun* fun = gu_variant_data(appl->efun);
|
||||
appl->fun = fun->fun;
|
||||
return appl;
|
||||
}
|
||||
|
||||
PGF_API PgfApplication*
|
||||
pgf_expr_unapply_ex(PgfExpr expr, GuPool* pool)
|
||||
{
|
||||
int arity;
|
||||
pgf_expr_arity(expr, &arity);
|
||||
|
||||
PgfApplication* appl = gu_new_flex(pool, PgfApplication, args, arity);
|
||||
appl->n_args = arity;
|
||||
for (int n = arity - 1; n >= 0; n--) {
|
||||
PgfExpr e = pgf_expr_unwrap(expr);
|
||||
gu_assert(gu_variant_tag(e) == PGF_EXPR_APP);
|
||||
PgfExprApp* app = gu_variant_data(e);
|
||||
appl->args[n] = app->arg;
|
||||
expr = app->fun;
|
||||
}
|
||||
appl->efun = pgf_expr_unwrap(expr);
|
||||
if (gu_variant_tag(appl->efun) == PGF_EXPR_FUN) {
|
||||
PgfExprFun* fun = gu_variant_data(appl->efun);
|
||||
appl->fun = fun->fun;
|
||||
} else {
|
||||
appl->fun = NULL;
|
||||
}
|
||||
return appl;
|
||||
}
|
||||
|
||||
PGF_API PgfExpr
|
||||
pgf_expr_apply(PgfApplication* app, GuPool* pool)
|
||||
{
|
||||
@@ -675,6 +699,17 @@ pgf_expr_parser_binds(PgfExprParser* parser)
|
||||
return binds;
|
||||
}
|
||||
|
||||
PGF_API GuString
|
||||
pgf_expr_parser_ident(PgfExprParser* parser)
|
||||
{
|
||||
GuString ident = NULL;
|
||||
if (parser->token_tag == PGF_TOKEN_IDENT) {
|
||||
ident = gu_string_copy(gu_string_buf_data(parser->token_value), parser->expr_pool);
|
||||
pgf_expr_parser_token(parser, true);
|
||||
}
|
||||
return ident;
|
||||
}
|
||||
|
||||
PGF_API PgfExpr
|
||||
pgf_expr_parser_expr(PgfExprParser* parser, bool mark)
|
||||
{
|
||||
|
||||
@@ -126,12 +126,10 @@ typedef struct {
|
||||
PgfExpr expr;
|
||||
} PgfExprProb;
|
||||
|
||||
PGF_API_DECL int
|
||||
pgf_expr_arity(PgfExpr expr);
|
||||
|
||||
typedef struct PgfApplication PgfApplication;
|
||||
|
||||
struct PgfApplication {
|
||||
PgfExpr efun;
|
||||
PgfCId fun;
|
||||
int n_args;
|
||||
PgfExpr args[];
|
||||
@@ -140,6 +138,9 @@ struct PgfApplication {
|
||||
PGF_API_DECL PgfApplication*
|
||||
pgf_expr_unapply(PgfExpr expr, GuPool* pool);
|
||||
|
||||
PGF_API_DECL PgfApplication*
|
||||
pgf_expr_unapply_ex(PgfExpr expr, GuPool* pool);
|
||||
|
||||
PGF_API_DECL PgfExpr
|
||||
pgf_expr_apply(PgfApplication*, GuPool* pool);
|
||||
|
||||
|
||||
@@ -175,9 +175,8 @@ redo:;
|
||||
gu_buf_get(buf, PgfProductionApply*, index);
|
||||
gu_assert(n_args == gu_seq_length(papply->args));
|
||||
|
||||
capp->abs_id = papply->fun->absfun->name;
|
||||
capp->fun = papply->fun;
|
||||
capp->fid = 0;
|
||||
capp->fun = papply->fun;
|
||||
capp->fid = 0;
|
||||
capp->n_args = n_args;
|
||||
|
||||
for (size_t i = 0; i < n_args; i++) {
|
||||
@@ -223,10 +222,10 @@ redo:;
|
||||
static PgfCncTree
|
||||
pgf_cnc_resolve_def(PgfCnc* cnc,
|
||||
size_t n_vars, PgfPrintContext* context,
|
||||
PgfCId abs_id, PgfCCat* ccat, GuString s, GuPool* pool)
|
||||
PgfCCat* ccat, GuString s, GuPool* pool)
|
||||
{
|
||||
PgfCncTree ret = gu_null_variant;
|
||||
PgfCncTree lit = gu_null_variant;
|
||||
PgfCncTree ret = gu_null_variant;
|
||||
|
||||
PgfCncTreeLit* clit =
|
||||
gu_new_variant(PGF_CNC_TREE_LIT,
|
||||
@@ -234,7 +233,7 @@ pgf_cnc_resolve_def(PgfCnc* cnc,
|
||||
&lit, pool);
|
||||
clit->n_vars = 0;
|
||||
clit->context = context;
|
||||
clit->fid = -1; // don't report the literal in the bracket
|
||||
clit->fid = cnc->fid++;
|
||||
PgfLiteralStr* lit_str =
|
||||
gu_new_flex_variant(PGF_LITERAL_STR,
|
||||
PgfLiteralStr,
|
||||
@@ -242,7 +241,7 @@ pgf_cnc_resolve_def(PgfCnc* cnc,
|
||||
&clit->lit, pool);
|
||||
strcpy((char*) lit_str->val, (char*) s);
|
||||
|
||||
if (ccat == NULL || ccat->lindefs == NULL)
|
||||
if (ccat->lindefs == NULL)
|
||||
return lit;
|
||||
|
||||
int index =
|
||||
@@ -254,10 +253,9 @@ pgf_cnc_resolve_def(PgfCnc* cnc,
|
||||
gu_new_flex_variant(PGF_CNC_TREE_APP,
|
||||
PgfCncTreeApp,
|
||||
args, 1, &ret, pool);
|
||||
capp->ccat = ccat;
|
||||
capp->abs_id= abs_id;
|
||||
capp->fun = gu_seq_get(ccat->lindefs, PgfCncFun*, index);
|
||||
capp->fid = cnc->fid++;
|
||||
capp->ccat = ccat;
|
||||
capp->fun = gu_seq_get(ccat->lindefs, PgfCncFun*, index);
|
||||
capp->fid = cnc->fid++;
|
||||
capp->n_vars = n_vars;
|
||||
capp->context = context;
|
||||
capp->n_args = 1;
|
||||
@@ -297,7 +295,7 @@ pgf_lzr_wrap_linref(PgfCncTree ctree, GuPool* pool)
|
||||
PgfCncTreeApp* capp = cti.data;
|
||||
|
||||
assert(gu_seq_length(capp->ccat->linrefs) > 0);
|
||||
|
||||
|
||||
// here we must apply the linref function
|
||||
PgfCncTree new_ctree;
|
||||
PgfCncTreeApp* new_capp =
|
||||
@@ -305,7 +303,6 @@ pgf_lzr_wrap_linref(PgfCncTree ctree, GuPool* pool)
|
||||
PgfCncTreeApp,
|
||||
args, 1, &new_ctree, pool);
|
||||
new_capp->ccat = NULL;
|
||||
new_capp->abs_id = NULL;
|
||||
new_capp->fun = gu_seq_get(capp->ccat->linrefs, PgfCncFun*, 0);
|
||||
new_capp->fid = -1;
|
||||
new_capp->n_vars = 0;
|
||||
@@ -317,7 +314,7 @@ pgf_lzr_wrap_linref(PgfCncTree ctree, GuPool* pool)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return ctree;
|
||||
}
|
||||
|
||||
@@ -399,17 +396,6 @@ pgf_cnc_resolve(PgfCnc* cnc,
|
||||
goto done;
|
||||
}
|
||||
|
||||
PgfCId abs_id = "?";
|
||||
if (emeta->id > 0) {
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
GuExn* err = gu_new_exn(tmp_pool);
|
||||
GuStringBuf* sbuf = gu_new_string_buf(tmp_pool);
|
||||
GuOut* out = gu_string_buf_out(sbuf);
|
||||
|
||||
gu_printf(out, err, "?%d", emeta->id);
|
||||
abs_id = gu_string_buf_freeze(sbuf, pool);
|
||||
}
|
||||
|
||||
int index =
|
||||
gu_choice_next(cnc->ch, gu_seq_length(ccat->lindefs));
|
||||
if (index < 0) {
|
||||
@@ -420,7 +406,6 @@ pgf_cnc_resolve(PgfCnc* cnc,
|
||||
PgfCncTreeApp,
|
||||
args, 1, &ret, pool);
|
||||
capp->ccat = ccat;
|
||||
capp->abs_id = abs_id;
|
||||
capp->fun = gu_seq_get(ccat->lindefs, PgfCncFun*, index);
|
||||
capp->fid = cnc->fid++;
|
||||
capp->n_vars = 0;
|
||||
@@ -450,7 +435,23 @@ pgf_cnc_resolve(PgfCnc* cnc,
|
||||
gu_putc(']', out, err);
|
||||
GuString s = gu_string_buf_freeze(sbuf, tmp_pool);
|
||||
|
||||
ret = pgf_cnc_resolve_def(cnc, n_vars, context, efun->fun, ccat, s, pool);
|
||||
if (ccat != NULL) {
|
||||
ret = pgf_cnc_resolve_def(cnc, n_vars, context, ccat, s, pool);
|
||||
} else {
|
||||
PgfCncTreeLit* clit =
|
||||
gu_new_variant(PGF_CNC_TREE_LIT,
|
||||
PgfCncTreeLit,
|
||||
&ret, pool);
|
||||
clit->n_vars = 0;
|
||||
clit->context = context;
|
||||
clit->fid = cnc->fid++;
|
||||
PgfLiteralStr* lit =
|
||||
gu_new_flex_variant(PGF_LITERAL_STR,
|
||||
PgfLiteralStr,
|
||||
val, strlen(s)+1,
|
||||
&clit->lit, pool);
|
||||
strcpy(lit->val, s);
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
goto done;
|
||||
@@ -498,7 +499,28 @@ redo:;
|
||||
index--;
|
||||
}
|
||||
|
||||
ret = pgf_cnc_resolve_def(cnc, n_vars, context, ctxt->name, ccat, ctxt->name, pool);
|
||||
if (ccat != NULL && ccat->lindefs == NULL) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (ccat != NULL) {
|
||||
ret = pgf_cnc_resolve_def(cnc, n_vars, context, ccat, ctxt->name, pool);
|
||||
} else {
|
||||
PgfCncTreeLit* clit =
|
||||
gu_new_variant(PGF_CNC_TREE_LIT,
|
||||
PgfCncTreeLit,
|
||||
&ret, pool);
|
||||
clit->n_vars = 0;
|
||||
clit->context = context;
|
||||
clit->fid = cnc->fid++;
|
||||
PgfLiteralStr* lit =
|
||||
gu_new_flex_variant(PGF_LITERAL_STR,
|
||||
PgfLiteralStr,
|
||||
val, strlen(ctxt->name)+1,
|
||||
&clit->lit, pool);
|
||||
strcpy(lit->val, ctxt->name);
|
||||
}
|
||||
|
||||
goto done;
|
||||
}
|
||||
case PGF_EXPR_TYPED: {
|
||||
@@ -917,9 +939,9 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
|
||||
|
||||
if ((*lzr->funcs)->begin_phrase && fapp->ccat != NULL) {
|
||||
(*lzr->funcs)->begin_phrase(lzr->funcs,
|
||||
fapp->ccat->cnccat->abscat->name,
|
||||
fun->absfun->type->cid,
|
||||
fapp->fid, lin_idx,
|
||||
fapp->abs_id);
|
||||
fun->absfun->name);
|
||||
}
|
||||
|
||||
gu_require(lin_idx < fun->n_lins);
|
||||
@@ -927,9 +949,9 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
|
||||
|
||||
if ((*lzr->funcs)->end_phrase && fapp->ccat != NULL) {
|
||||
(*lzr->funcs)->end_phrase(lzr->funcs,
|
||||
fapp->ccat->cnccat->abscat->name,
|
||||
fun->absfun->type->cid,
|
||||
fapp->fid, lin_idx,
|
||||
fapp->abs_id);
|
||||
fun->absfun->name);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -955,7 +977,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
|
||||
PgfCId cat =
|
||||
pgf_literal_cat(lzr->concr, flit->lit)->cnccat->abscat->name;
|
||||
|
||||
if ((*lzr->funcs)->begin_phrase && flit->fid >= 0) {
|
||||
if ((*lzr->funcs)->begin_phrase) {
|
||||
(*lzr->funcs)->begin_phrase(lzr->funcs,
|
||||
cat, flit->fid, 0,
|
||||
"");
|
||||
@@ -987,7 +1009,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
|
||||
(*lzr->funcs)->symbol_token(lzr->funcs, tok);
|
||||
}
|
||||
|
||||
if ((*lzr->funcs)->end_phrase && flit->fid >= 0) {
|
||||
if ((*lzr->funcs)->end_phrase) {
|
||||
(*lzr->funcs)->end_phrase(lzr->funcs,
|
||||
cat, flit->fid, 0,
|
||||
"");
|
||||
|
||||
@@ -22,7 +22,6 @@ typedef enum {
|
||||
|
||||
typedef struct {
|
||||
PgfCCat* ccat;
|
||||
PgfCId abs_id;
|
||||
PgfCncFun* fun;
|
||||
int fid;
|
||||
|
||||
|
||||
@@ -9,9 +9,6 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#if defined(__MINGW32__) || defined(_MSC_VER)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
//#define PGF_LOOKUP_DEBUG
|
||||
//#define PGF_LINEARIZER_DEBUG
|
||||
@@ -119,7 +116,7 @@ typedef struct {
|
||||
static PgfAbsProduction*
|
||||
pgf_lookup_new_production(PgfAbsFun* fun, GuPool *pool)
|
||||
{
|
||||
size_t n_hypos = fun->type->hypos ? gu_seq_length(fun->type->hypos) : 0;
|
||||
size_t n_hypos = gu_seq_length(fun->type->hypos);
|
||||
PgfAbsProduction* prod = gu_new_flex(pool, PgfAbsProduction, args, n_hypos);
|
||||
prod->fun = fun;
|
||||
prod->count = 0;
|
||||
@@ -699,12 +696,8 @@ pgf_lookup_tokenize(GuMap* lexicon_idx, GuString sentence, GuPool* pool)
|
||||
break;
|
||||
|
||||
const uint8_t* start = p-1;
|
||||
if (strchr(".!?,:",c) != NULL)
|
||||
while (c != 0 && !gu_ucs_is_space(c)) {
|
||||
c = gu_utf8_decode(&p);
|
||||
else {
|
||||
while (c != 0 && strchr(".!?,:",c) == NULL && !gu_ucs_is_space(c)) {
|
||||
c = gu_utf8_decode(&p);
|
||||
}
|
||||
}
|
||||
const uint8_t* end = p-1;
|
||||
|
||||
|
||||
@@ -65,7 +65,6 @@ typedef enum { BIND_NONE, BIND_HARD, BIND_SOFT } BIND_TYPE;
|
||||
typedef struct {
|
||||
PgfProductionIdx* idx;
|
||||
size_t offset;
|
||||
size_t sym_idx;
|
||||
} PgfLexiconIdxEntry;
|
||||
|
||||
typedef GuBuf PgfLexiconIdx;
|
||||
@@ -1061,13 +1060,13 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
|
||||
}
|
||||
|
||||
static int
|
||||
pgf_symbols_cmp(GuString* psent, PgfSymbols* syms, size_t* sym_idx, bool case_sensitive)
|
||||
pgf_symbols_cmp(GuString* psent, PgfSymbols* syms, bool case_sensitive)
|
||||
{
|
||||
size_t n_syms = gu_seq_length(syms);
|
||||
while (*sym_idx < n_syms) {
|
||||
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, *sym_idx);
|
||||
for (size_t i = 0; i < n_syms; i++) {
|
||||
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, i);
|
||||
|
||||
if (*sym_idx > 0) {
|
||||
if (i > 0) {
|
||||
if (!skip_space(psent)) {
|
||||
if (**psent == 0)
|
||||
return -1;
|
||||
@@ -1111,8 +1110,6 @@ pgf_symbols_cmp(GuString* psent, PgfSymbols* syms, size_t* sym_idx, bool case_se
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
|
||||
(*sym_idx)++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -1133,8 +1130,7 @@ pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state,
|
||||
|
||||
GuString start = ps->sentence + state->end_offset;
|
||||
GuString current = start;
|
||||
size_t sym_idx = 0;
|
||||
int cmp = pgf_symbols_cmp(¤t, seq->syms, &sym_idx, ps->case_sensitive);
|
||||
int cmp = pgf_symbols_cmp(¤t, seq->syms, ps->case_sensitive);
|
||||
if (cmp < 0) {
|
||||
j = k-1;
|
||||
} else if (cmp > 0) {
|
||||
@@ -1155,9 +1151,8 @@ pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state,
|
||||
|
||||
if (seq->idx != NULL) {
|
||||
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
|
||||
entry->idx = seq->idx;
|
||||
entry->offset = (size_t) (current - ps->sentence);
|
||||
entry->sym_idx = sym_idx;
|
||||
entry->idx = seq->idx;
|
||||
entry->offset = (size_t) (current - ps->sentence);
|
||||
}
|
||||
|
||||
if (len+1 <= max)
|
||||
@@ -1236,7 +1231,6 @@ pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
|
||||
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
|
||||
entry->idx = seq->idx;
|
||||
entry->offset = state->start_offset;
|
||||
entry->sym_idx= 0;
|
||||
}
|
||||
|
||||
// Add non-epsilon lexical rules to the bottom up index
|
||||
@@ -1284,15 +1278,14 @@ pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
|
||||
static void
|
||||
pgf_parsing_predict_lexeme(PgfParsing* ps, PgfItemConts* conts,
|
||||
PgfProductionIdxEntry* entry,
|
||||
size_t offset, size_t sym_idx)
|
||||
size_t offset)
|
||||
{
|
||||
GuVariantInfo i = { PGF_PRODUCTION_APPLY, entry->papp };
|
||||
PgfProduction prod = gu_variant_close(i);
|
||||
PgfItem* item =
|
||||
pgf_new_item(ps, conts, prod);
|
||||
PgfSymbols* syms = entry->papp->fun->lins[conts->lin_idx]->syms;
|
||||
item->sym_idx = sym_idx;
|
||||
pgf_item_set_curr_symbol(item, ps->pool);
|
||||
item->sym_idx = gu_seq_length(syms);
|
||||
prob_t prob = item->inside_prob+item->conts->outside_prob;
|
||||
PgfParseState* state =
|
||||
pgf_new_parse_state(ps, offset, BIND_NONE, prob);
|
||||
@@ -1365,7 +1358,7 @@ pgf_parsing_td_predict(PgfParsing* ps,
|
||||
PgfProductionIdxEntry, &key);
|
||||
|
||||
if (value != NULL) {
|
||||
pgf_parsing_predict_lexeme(ps, conts, value, lentry->offset, lentry->sym_idx);
|
||||
pgf_parsing_predict_lexeme(ps, conts, value, lentry->offset);
|
||||
|
||||
PgfProductionIdxEntry* start =
|
||||
gu_buf_data(lentry->idx);
|
||||
@@ -1376,7 +1369,7 @@ pgf_parsing_td_predict(PgfParsing* ps,
|
||||
while (left >= start &&
|
||||
value->ccat->fid == left->ccat->fid &&
|
||||
value->lin_idx == left->lin_idx) {
|
||||
pgf_parsing_predict_lexeme(ps, conts, left, lentry->offset, lentry->sym_idx);
|
||||
pgf_parsing_predict_lexeme(ps, conts, left, lentry->offset);
|
||||
left--;
|
||||
}
|
||||
|
||||
@@ -1384,7 +1377,7 @@ pgf_parsing_td_predict(PgfParsing* ps,
|
||||
while (right <= end &&
|
||||
value->ccat->fid == right->ccat->fid &&
|
||||
value->lin_idx == right->lin_idx) {
|
||||
pgf_parsing_predict_lexeme(ps, conts, right, lentry->offset, lentry->sym_idx);
|
||||
pgf_parsing_predict_lexeme(ps, conts, right, lentry->offset);
|
||||
right++;
|
||||
}
|
||||
}
|
||||
@@ -1957,6 +1950,8 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
|
||||
start_ccat->prods = NULL;
|
||||
start_ccat->n_synprods = 0;
|
||||
|
||||
gu_assert(start_ccat->cnccat != NULL);
|
||||
|
||||
#ifdef PGF_COUNTS_DEBUG
|
||||
state->ps->ccat_full_count++;
|
||||
#endif
|
||||
@@ -2300,7 +2295,7 @@ pgf_parser_completions_next(GuEnum* self, void* to, GuPool* pool)
|
||||
}
|
||||
|
||||
PGF_API GuEnum*
|
||||
pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence,
|
||||
pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence,
|
||||
GuString prefix, GuExn *err, GuPool* pool)
|
||||
{
|
||||
if (concr->sequences == NULL ||
|
||||
@@ -2379,9 +2374,8 @@ pgf_sequence_cmp_fn(GuOrder* order, const void* p1, const void* p2)
|
||||
GuString sent = (GuString) p1;
|
||||
const PgfSequence* sp2 = p2;
|
||||
|
||||
size_t sym_idx = 0;
|
||||
int res = pgf_symbols_cmp(&sent, sp2->syms, &sym_idx, self->case_sensitive);
|
||||
if (res == 0 && (*sent != 0 || sym_idx != gu_seq_length(sp2->syms))) {
|
||||
int res = pgf_symbols_cmp(&sent, sp2->syms, self->case_sensitive);
|
||||
if (res == 0 && *sent != 0) {
|
||||
res = 1;
|
||||
}
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ pgf_read_in(GuIn* in,
|
||||
}
|
||||
|
||||
PGF_API_DECL void
|
||||
pgf_write(PgfPGF* pgf, const char* fpath, GuExn* err)
|
||||
pgf_write(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, const char* fpath, GuExn* err)
|
||||
{
|
||||
FILE* outfile = fopen(fpath, "wb");
|
||||
if (outfile == NULL) {
|
||||
@@ -60,13 +60,70 @@ pgf_write(PgfPGF* pgf, const char* fpath, GuExn* err)
|
||||
GuOut* out = gu_file_out(outfile, tmp_pool);
|
||||
|
||||
PgfWriter* wtr = pgf_new_writer(out, tmp_pool, err);
|
||||
pgf_write_pgf(pgf, wtr);
|
||||
pgf_write_pgf(pgf, n_concrs, concrs, wtr);
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
fclose(outfile);
|
||||
}
|
||||
|
||||
PGF_API void
|
||||
pgf_concrete_save(PgfConcr* concr, const char* fpath, GuExn* err)
|
||||
{
|
||||
FILE* outfile = fopen(fpath, "wb");
|
||||
if (outfile == NULL) {
|
||||
gu_raise_errno(err);
|
||||
return;
|
||||
}
|
||||
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
|
||||
// Create an input stream from the input file
|
||||
GuOut* out = gu_file_out(outfile, tmp_pool);
|
||||
|
||||
PgfWriter* wtr = pgf_new_writer(out, tmp_pool, err);
|
||||
pgf_write_concrete(concr, wtr, true);
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
fclose(outfile);
|
||||
}
|
||||
|
||||
PGF_API bool
|
||||
pgf_have_same_abstract(PgfPGF *one, PgfPGF *two)
|
||||
{
|
||||
if (strcmp(one->abstract.name, two->abstract.name) != 0)
|
||||
return false;
|
||||
|
||||
size_t n_cats = gu_seq_length(one->abstract.cats);
|
||||
if (n_cats != gu_seq_length(two->abstract.cats))
|
||||
return false;
|
||||
size_t n_funs = gu_seq_length(one->abstract.funs);
|
||||
if (n_funs != gu_seq_length(two->abstract.funs))
|
||||
return false;
|
||||
|
||||
for (size_t i = 0; i < n_cats; i++) {
|
||||
PgfAbsCat* cat1 = gu_seq_index(one->abstract.cats, PgfAbsCat, i);
|
||||
PgfAbsCat* cat2 = gu_seq_index(two->abstract.cats, PgfAbsCat, i);
|
||||
|
||||
if (strcmp(cat1->name, cat2->name) != 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < n_funs; i++) {
|
||||
PgfAbsFun* fun1 = gu_seq_index(one->abstract.funs, PgfAbsFun, i);
|
||||
PgfAbsFun* fun2 = gu_seq_index(two->abstract.funs, PgfAbsFun, i);
|
||||
|
||||
if (strcmp(fun1->name, fun2->name) != 0)
|
||||
return false;
|
||||
|
||||
if (!pgf_type_eq(fun1->type, fun2->type))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
PGF_API GuString
|
||||
pgf_abstract_name(PgfPGF* pgf)
|
||||
{
|
||||
|
||||
@@ -19,14 +19,6 @@
|
||||
#define PGF_INTERNAL_DECL
|
||||
#define PGF_INTERNAL
|
||||
|
||||
#elif defined(__MINGW32__)
|
||||
|
||||
#define PGF_API_DECL
|
||||
#define PGF_API
|
||||
|
||||
#define PGF_INTERNAL_DECL
|
||||
#define PGF_INTERNAL
|
||||
|
||||
#else
|
||||
|
||||
#define PGF_API_DECL
|
||||
@@ -66,7 +58,10 @@ PGF_API_DECL void
|
||||
pgf_concrete_unload(PgfConcr* concr);
|
||||
|
||||
PGF_API_DECL void
|
||||
pgf_write(PgfPGF* pgf, const char* fpath, GuExn* err);
|
||||
pgf_write(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, const char* fpath, GuExn* err);
|
||||
|
||||
PGF_API_DECL bool
|
||||
pgf_have_same_abstract(PgfPGF *one, PgfPGF *two);
|
||||
|
||||
PGF_API_DECL GuString
|
||||
pgf_abstract_name(PgfPGF*);
|
||||
@@ -249,7 +244,8 @@ pgf_callbacks_map_add_literal(PgfConcr* concr, PgfCallbacksMap* callbacks,
|
||||
PgfCId cat, PgfLiteralCallback* callback);
|
||||
|
||||
PGF_API_DECL void
|
||||
pgf_print(PgfPGF* pgf, GuOut* out, GuExn* err);
|
||||
pgf_print(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs,
|
||||
GuOut* out, GuExn* err);
|
||||
|
||||
PGF_API_DECL void
|
||||
pgf_check_expr(PgfPGF* gr, PgfExpr* pe, PgfType* ty,
|
||||
|
||||
@@ -7,13 +7,17 @@ typedef struct {
|
||||
} PgfPrintFn;
|
||||
|
||||
static void
|
||||
pgf_print_flags(PgfFlags* flags, GuOut *out, GuExn* err)
|
||||
pgf_print_flags(PgfFlags* flags, int indent, GuOut *out, GuExn* err)
|
||||
{
|
||||
size_t n_flags = gu_seq_length(flags);
|
||||
for (size_t i = 0; i < n_flags; i++) {
|
||||
PgfFlag* flag = gu_seq_index(flags, PgfFlag, i);
|
||||
|
||||
gu_puts(" flag ", out, err);
|
||||
|
||||
for (int k = 0; k < indent; k++) {
|
||||
gu_putc(' ', out, err);
|
||||
}
|
||||
|
||||
gu_puts("flag ", out, err);
|
||||
pgf_print_cid(flag->name, out, err);
|
||||
gu_puts(" = ", out, err);
|
||||
pgf_print_literal(flag->value, out, err);
|
||||
@@ -70,7 +74,7 @@ pgf_print_abstract(PgfAbstr* abstr, GuOut* out, GuExn* err)
|
||||
pgf_print_cid(abstr->name, out, err);
|
||||
gu_puts(" {\n", out, err);
|
||||
|
||||
pgf_print_flags(abstr->aflags, out, err);
|
||||
pgf_print_flags(abstr->aflags, 2, out, err);
|
||||
pgf_print_abscats(abstr->cats, out, err);
|
||||
pgf_print_absfuns(abstr->funs, out, err);
|
||||
|
||||
@@ -358,7 +362,7 @@ pgf_print_concrete(PgfConcr* concr, GuOut* out, GuExn* err)
|
||||
pgf_print_cid(concr->name, out, err);
|
||||
gu_puts(" {\n", out, err);
|
||||
|
||||
pgf_print_flags(concr->cflags, out, err);
|
||||
pgf_print_flags(concr->cflags, 2, out, err);
|
||||
|
||||
gu_puts(" productions\n", out, err);
|
||||
PgfPrintFn clo2 = { { pgf_print_productions }, out };
|
||||
@@ -396,13 +400,12 @@ pgf_print_concrete(PgfConcr* concr, GuOut* out, GuExn* err)
|
||||
}
|
||||
|
||||
PGF_API void
|
||||
pgf_print(PgfPGF* pgf, GuOut* out, GuExn* err)
|
||||
pgf_print(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, GuOut* out, GuExn* err)
|
||||
{
|
||||
pgf_print_flags(pgf->gflags, 0, out, err);
|
||||
pgf_print_abstract(&pgf->abstract, out, err);
|
||||
|
||||
size_t n_concrs = gu_seq_length(pgf->concretes);
|
||||
|
||||
for (size_t i = 0; i < n_concrs; i++) {
|
||||
PgfConcr* concr = gu_seq_index(pgf->concretes, PgfConcr, i);
|
||||
pgf_print_concrete(concr, out, err);
|
||||
pgf_print_concrete(concrs[i], out, err);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -937,7 +937,7 @@ pgf_read_pargs(PgfReader* rdr, PgfConcr* concr)
|
||||
}
|
||||
|
||||
PGF_API bool
|
||||
pgf_production_is_lexical(PgfProductionApply *papp,
|
||||
pgf_production_is_lexical(PgfProductionApply *papp,
|
||||
GuBuf* non_lexical_buf, GuPool* pool)
|
||||
{
|
||||
if (gu_seq_length(papp->args) > 0)
|
||||
@@ -1168,6 +1168,14 @@ pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
// pgf_ccat_set_viterbi_prob(ccat);
|
||||
}
|
||||
|
||||
// The GF compiler needs to call this function when building in memory grammars.
|
||||
PGF_API void
|
||||
pgf_concrete_fix_internals(PgfConcr* concr)
|
||||
{
|
||||
GuMapItor clo1 = { pgf_read_ccat_cb };
|
||||
gu_map_iter(concr->ccats, &clo1, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_read_concrete_content(PgfReader* rdr, PgfConcr* concr)
|
||||
{
|
||||
@@ -1193,8 +1201,7 @@ pgf_read_concrete_content(PgfReader* rdr, PgfConcr* concr)
|
||||
concr->cnccats = pgf_read_cnccats(rdr, concr->abstr, concr);
|
||||
concr->total_cats = pgf_read_int(rdr);
|
||||
|
||||
GuMapItor clo1 = { pgf_read_ccat_cb };
|
||||
gu_map_iter(concr->ccats, &clo1, NULL);
|
||||
pgf_concrete_fix_internals(concr);
|
||||
}
|
||||
|
||||
static void
|
||||
|
||||
@@ -72,10 +72,15 @@ pgf_write_cid(PgfCId id, PgfWriter* wtr)
|
||||
PGF_INTERNAL void
|
||||
pgf_write_string(GuString val, PgfWriter* wtr)
|
||||
{
|
||||
size_t len = strlen(val);
|
||||
size_t len = 0;
|
||||
const uint8_t* buf = (const uint8_t*) val;
|
||||
const uint8_t* p = buf;
|
||||
while (gu_utf8_decode(&p) != 0)
|
||||
len++;
|
||||
|
||||
pgf_write_len(len, wtr);
|
||||
gu_return_on_exn(wtr->err, );
|
||||
gu_out_bytes(wtr->out, (uint8_t*) val, len, wtr->err);
|
||||
gu_out_bytes(wtr->out, (uint8_t*) val, (p-buf)-1, wtr->err);
|
||||
}
|
||||
|
||||
PGF_INTERNAL void
|
||||
@@ -843,7 +848,7 @@ pgf_write_concrete_content(PgfConcr* concr, PgfWriter* wtr)
|
||||
pgf_write_int(concr->total_cats, wtr);
|
||||
}
|
||||
|
||||
static void
|
||||
PGF_INTERNAL void
|
||||
pgf_write_concrete(PgfConcr* concr, PgfWriter* wtr, bool with_content)
|
||||
{
|
||||
if (with_content &&
|
||||
@@ -865,34 +870,20 @@ pgf_write_concrete(PgfConcr* concr, PgfWriter* wtr, bool with_content)
|
||||
gu_return_on_exn(wtr->err, );
|
||||
}
|
||||
|
||||
PGF_API void
|
||||
pgf_concrete_save(PgfConcr* concr, GuOut* out, GuExn* err)
|
||||
{
|
||||
GuPool* pool = gu_new_pool();
|
||||
|
||||
PgfWriter* wtr = pgf_new_writer(out, pool, err);
|
||||
|
||||
pgf_write_concrete(concr, wtr, true);
|
||||
|
||||
gu_pool_free(pool);
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_write_concretes(PgfConcrs* concretes, PgfWriter* wtr, bool with_content)
|
||||
pgf_write_concretes(size_t n_concrs, PgfConcr** concrs, PgfWriter* wtr, bool with_content)
|
||||
{
|
||||
size_t n_concrs = gu_seq_length(concretes);
|
||||
pgf_write_len(n_concrs, wtr);
|
||||
gu_return_on_exn(wtr->err, );
|
||||
|
||||
for (size_t i = 0; i < n_concrs; i++) {
|
||||
PgfConcr* concr = gu_seq_index(concretes, PgfConcr, i);
|
||||
pgf_write_concrete(concr, wtr, with_content);
|
||||
pgf_write_concrete(concrs[i], wtr, with_content);
|
||||
gu_return_on_exn(wtr->err, );
|
||||
}
|
||||
}
|
||||
|
||||
PGF_INTERNAL void
|
||||
pgf_write_pgf(PgfPGF* pgf, PgfWriter* wtr) {
|
||||
pgf_write_pgf(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, PgfWriter* wtr) {
|
||||
gu_out_u16be(wtr->out, pgf->major_version, wtr->err);
|
||||
gu_return_on_exn(wtr->err, );
|
||||
|
||||
@@ -907,7 +898,7 @@ pgf_write_pgf(PgfPGF* pgf, PgfWriter* wtr) {
|
||||
|
||||
bool with_content =
|
||||
(gu_seq_binsearch(pgf->gflags, pgf_flag_order, PgfFlag, "split") == NULL);
|
||||
pgf_write_concretes(pgf->concretes, wtr, with_content);
|
||||
pgf_write_concretes(n_concrs, concrs, wtr, with_content);
|
||||
gu_return_on_exn(wtr->err, );
|
||||
}
|
||||
|
||||
|
||||
@@ -33,7 +33,10 @@ pgf_write_len(size_t len, PgfWriter* wtr);
|
||||
PGF_INTERNAL_DECL void
|
||||
pgf_write_cid(PgfCId id, PgfWriter* wtr);
|
||||
|
||||
PGF_INTERNAL void
|
||||
pgf_write_concrete(PgfConcr* concr, PgfWriter* wtr, bool with_content);
|
||||
|
||||
PGF_INTERNAL_DECL void
|
||||
pgf_write_pgf(PgfPGF* pgf, PgfWriter* wtr);
|
||||
pgf_write_pgf(PgfPGF* pgf, size_t n_concrs, PgfConcr** concrs, PgfWriter* wtr);
|
||||
|
||||
#endif // WRITER_H_
|
||||
|
||||
@@ -4918,7 +4918,6 @@ SQLITE_PRIVATE int sqlite3PendingByte;
|
||||
# define SQLITE_UTF16NATIVE SQLITE_UTF16BE
|
||||
#endif
|
||||
#if !defined(SQLITE_BYTEORDER)
|
||||
const int sqlite3one = 1;
|
||||
# define SQLITE_BYTEORDER 0 /* 0 means "unknown at compile-time" */
|
||||
# define SQLITE_BIGENDIAN (*(char *)(&sqlite3one)==0)
|
||||
# define SQLITE_LITTLEENDIAN (*(char *)(&sqlite3one)==1)
|
||||
@@ -5041,30 +5040,6 @@ SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip(int, const void *, UnpackedR
|
||||
*/
|
||||
/* #include "sqliteInt.h" */
|
||||
|
||||
/* An array to map all upper-case characters into their corresponding
|
||||
** lower-case character.
|
||||
**
|
||||
** SQLite only considers US-ASCII (or EBCDIC) characters. We do not
|
||||
** handle case conversions for the UTF character set since the tables
|
||||
** involved are nearly as big or bigger than SQLite itself.
|
||||
*/
|
||||
const unsigned char sqlite3UpperToLower[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
|
||||
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
|
||||
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
|
||||
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
|
||||
122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107,
|
||||
108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
|
||||
126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,
|
||||
162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,
|
||||
180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,
|
||||
198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,
|
||||
234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,
|
||||
252,253,254,255
|
||||
};
|
||||
/* EVIDENCE-OF: R-02982-34736 In order to maintain full backwards
|
||||
** compatibility for legacy applications, the URI filename capability is
|
||||
** disabled by default.
|
||||
@@ -9088,22 +9063,6 @@ SQLITE_PRIVATE int sqlite3Strlen30(const char *z){
|
||||
return 0x3fffffff & (int)strlen(z);
|
||||
}
|
||||
|
||||
/* Convenient short-hand */
|
||||
#define UpperToLower sqlite3UpperToLower
|
||||
|
||||
int sqlite3StrICmp(const char *zLeft, const char *zRight){
|
||||
unsigned char *a, *b;
|
||||
int c;
|
||||
a = (unsigned char *)zLeft;
|
||||
b = (unsigned char *)zRight;
|
||||
for(;;){
|
||||
c = (int)UpperToLower[*a] - (int)UpperToLower[*b];
|
||||
if( c || *a==0 ) break;
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
/*
|
||||
** The string z[] is an text representation of a real number.
|
||||
** Convert this string to a double and write it into *pResult.
|
||||
@@ -17871,6 +17830,13 @@ struct winFile {
|
||||
#define WINFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */
|
||||
#define WINFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */
|
||||
|
||||
/*
|
||||
* The size of the buffer used by sqlite3_win32_write_debug().
|
||||
*/
|
||||
#ifndef SQLITE_WIN32_DBG_BUF_SIZE
|
||||
# define SQLITE_WIN32_DBG_BUF_SIZE ((int)(4096-sizeof(DWORD)))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The value used with sqlite3_win32_set_directory() to specify that
|
||||
* the temporary directory should be changed.
|
||||
@@ -18819,6 +18785,43 @@ SQLITE_PRIVATE int sqlite3_win32_reset_heap(){
|
||||
}
|
||||
#endif /* SQLITE_WIN32_MALLOC */
|
||||
|
||||
/*
|
||||
** This function outputs the specified (ANSI) string to the Win32 debugger
|
||||
** (if available).
|
||||
*/
|
||||
|
||||
SQLITE_PRIVATE void sqlite3_win32_write_debug(const char *zBuf, int nBuf){
|
||||
char zDbgBuf[SQLITE_WIN32_DBG_BUF_SIZE];
|
||||
int nMin = MIN(nBuf, (SQLITE_WIN32_DBG_BUF_SIZE - 1)); /* may be negative. */
|
||||
if( nMin<-1 ) nMin = -1; /* all negative values become -1. */
|
||||
assert( nMin==-1 || nMin==0 || nMin<SQLITE_WIN32_DBG_BUF_SIZE );
|
||||
#if defined(SQLITE_WIN32_HAS_ANSI)
|
||||
if( nMin>0 ){
|
||||
memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE);
|
||||
memcpy(zDbgBuf, zBuf, nMin);
|
||||
osOutputDebugStringA(zDbgBuf);
|
||||
}else{
|
||||
osOutputDebugStringA(zBuf);
|
||||
}
|
||||
#elif defined(SQLITE_WIN32_HAS_WIDE)
|
||||
memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE);
|
||||
if ( osMultiByteToWideChar(
|
||||
osAreFileApisANSI() ? CP_ACP : CP_OEMCP, 0, zBuf,
|
||||
nMin, (LPWSTR)zDbgBuf, SQLITE_WIN32_DBG_BUF_SIZE/sizeof(WCHAR))<=0 ){
|
||||
return;
|
||||
}
|
||||
osOutputDebugStringW((LPCWSTR)zDbgBuf);
|
||||
#else
|
||||
if( nMin>0 ){
|
||||
memset(zDbgBuf, 0, SQLITE_WIN32_DBG_BUF_SIZE);
|
||||
memcpy(zDbgBuf, zBuf, nMin);
|
||||
fprintf(stderr, "%s", zDbgBuf);
|
||||
}else{
|
||||
fprintf(stderr, "%s", zBuf);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
** The following routine suspends the current thread for at least ms
|
||||
** milliseconds. This is equivalent to the Win32 Sleep() interface.
|
||||
@@ -19260,6 +19263,40 @@ SQLITE_PRIVATE char *sqlite3_win32_utf8_to_mbcs(const char *zFilename){
|
||||
return zFilenameMbcs;
|
||||
}
|
||||
|
||||
/*
|
||||
** This function sets the data directory or the temporary directory based on
|
||||
** the provided arguments. The type argument must be 1 in order to set the
|
||||
** data directory or 2 in order to set the temporary directory. The zValue
|
||||
** argument is the name of the directory to use. The return value will be
|
||||
** SQLITE_OK if successful.
|
||||
*/
|
||||
SQLITE_PRIVATE int sqlite3_win32_set_directory(DWORD type, LPCWSTR zValue){
|
||||
char **ppDirectory = 0;
|
||||
#ifndef SQLITE_OMIT_AUTOINIT
|
||||
int rc = sqlite3BtreeInitialize();
|
||||
if( rc ) return rc;
|
||||
#endif
|
||||
if( type==SQLITE_WIN32_TEMP_DIRECTORY_TYPE ){
|
||||
ppDirectory = &sqlite3_temp_directory;
|
||||
}
|
||||
assert( !ppDirectory || type==SQLITE_WIN32_TEMP_DIRECTORY_TYPE
|
||||
);
|
||||
assert( !ppDirectory || sqlite3MemdebugHasType(*ppDirectory, MEMTYPE_HEAP) );
|
||||
if( ppDirectory ){
|
||||
char *zValueUtf8 = 0;
|
||||
if( zValue && zValue[0] ){
|
||||
zValueUtf8 = winUnicodeToUtf8(zValue);
|
||||
if ( zValueUtf8==0 ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
}
|
||||
sqlite3_free(*ppDirectory);
|
||||
*ppDirectory = zValueUtf8;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
** The return value of winGetLastErrorMsg
|
||||
** is zero if the error message fits in the buffer, or non-zero
|
||||
@@ -22331,6 +22368,9 @@ static int winOpen(
|
||||
if( isReadonly ){
|
||||
pFile->ctrlFlags |= WINFILE_RDONLY;
|
||||
}
|
||||
if( sqlite3_uri_boolean(zName, "psow", SQLITE_POWERSAFE_OVERWRITE) ){
|
||||
pFile->ctrlFlags |= WINFILE_PSOW;
|
||||
}
|
||||
pFile->lastErrno = NO_ERROR;
|
||||
pFile->zPath = zName;
|
||||
#if SQLITE_MAX_MMAP_SIZE>0
|
||||
@@ -22549,6 +22589,43 @@ static BOOL winIsDriveLetterAndColon(
|
||||
return ( sqlite3Isalpha(zPathname[0]) && zPathname[1]==':' );
|
||||
}
|
||||
|
||||
/*
|
||||
** Returns non-zero if the specified path name should be used verbatim. If
|
||||
** non-zero is returned from this function, the calling function must simply
|
||||
** use the provided path name verbatim -OR- resolve it into a full path name
|
||||
** using the GetFullPathName Win32 API function (if available).
|
||||
*/
|
||||
static BOOL winIsVerbatimPathname(
|
||||
const char *zPathname
|
||||
){
|
||||
/*
|
||||
** If the path name starts with a forward slash or a backslash, it is either
|
||||
** a legal UNC name, a volume relative path, or an absolute path name in the
|
||||
** "Unix" format on Windows. There is no easy way to differentiate between
|
||||
** the final two cases; therefore, we return the safer return value of TRUE
|
||||
** so that callers of this function will simply use it verbatim.
|
||||
*/
|
||||
if ( winIsDirSep(zPathname[0]) ){
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
** If the path name starts with a letter and a colon it is either a volume
|
||||
** relative path or an absolute path. Callers of this function must not
|
||||
** attempt to treat it as a relative path name (i.e. they should simply use
|
||||
** it verbatim).
|
||||
*/
|
||||
if ( winIsDriveLetterAndColon(zPathname) ){
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
** If we get to this point, the path name should almost certainly be a purely
|
||||
** relative one (i.e. not a UNC name, not absolute, and not volume relative).
|
||||
*/
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
** Turn a relative pathname into a full pathname. Write the full
|
||||
** pathname into zOut[]. zOut[] will be at least pVfs->mxPathname
|
||||
|
||||
@@ -1,3 +1,274 @@
|
||||
module PGF(module PGF2) where
|
||||
module PGF (PGF, readPGF, showPGF,
|
||||
abstractName,
|
||||
|
||||
import PGF2
|
||||
CId, mkCId, wildCId, showCId, readCId,
|
||||
|
||||
categories, categoryContext, categoryProbability,
|
||||
functions, functionsByCat, functionType, functionIsDataCon, browse,
|
||||
|
||||
PGF2.Expr,Tree,showExpr,PGF2.readExpr,pExpr,pIdent,
|
||||
mkAbs,unAbs,
|
||||
mkApp,unApp,unapply,
|
||||
PGF2.mkStr,PGF2.unStr,
|
||||
PGF2.mkInt,PGF2.unInt,
|
||||
PGF2.mkFloat,PGF2.unFloat,
|
||||
PGF2.mkMeta,PGF2.unMeta,
|
||||
PGF2.exprSize, exprFunctions,PGF2.exprSubstitute,
|
||||
compute,
|
||||
rankTreesByProbs,treeProbability,
|
||||
|
||||
TcError, ppTcError, inferExpr, checkType,
|
||||
|
||||
PGF2.Type, PGF2.Hypo, showType, showContext, PGF2.readType,
|
||||
mkType, unType,
|
||||
|
||||
Token,
|
||||
|
||||
Language, readLanguage, showLanguage,
|
||||
languages, startCat, languageCode,
|
||||
linearize, bracketedLinearize, tabularLinearizes, showBracketedString,
|
||||
ParseOutput(..), parse, parse_, complete,
|
||||
PGF2.BracketedString(..), PGF2.flattenBracketedString,
|
||||
hasLinearization,
|
||||
showPrintName,
|
||||
|
||||
Morpho, buildMorpho,
|
||||
lookupMorpho, isInMorpho, morphoMissing, morphoKnown, fullFormLexicon,
|
||||
|
||||
Labels, getDepLabels, CncLabels, getCncDepLabels,
|
||||
|
||||
generateAllDepth, generateRandom, generateRandomFrom, generateRandomDepth, generateRandomFromDepth,
|
||||
generateFromDepth,
|
||||
|
||||
PGF2.GraphvizOptions(..),
|
||||
graphvizAbstractTree, graphvizParseTree, graphvizAlignment, graphvizDependencyTree, graphvizParseTreeDep,
|
||||
|
||||
-- * Tries
|
||||
ATree(..),Trie(..),toATree,toTrie,
|
||||
|
||||
readProbabilitiesFromFile,
|
||||
|
||||
groupResults, conlls2latexDoc, gizaAlignment
|
||||
) where
|
||||
|
||||
import PGF.Internal
|
||||
import qualified PGF2
|
||||
import qualified Data.Map as Map
|
||||
import qualified Text.ParserCombinators.ReadP as RP
|
||||
import Data.List(sortBy)
|
||||
import Text.PrettyPrint(text)
|
||||
import Data.Char(isDigit)
|
||||
|
||||
readPGF = PGF2.readPGF
|
||||
|
||||
showPGF gr = PGF2.showPGF gr
|
||||
|
||||
readLanguage = readCId
|
||||
showLanguage (CId s) = s
|
||||
|
||||
startCat = PGF2.startCat
|
||||
languageCode pgf lang = Just (PGF2.languageCode (lookConcr pgf lang))
|
||||
|
||||
abstractName gr = CId (PGF2.abstractName gr)
|
||||
|
||||
categories gr = map CId (PGF2.categories gr)
|
||||
categoryContext gr (CId c) = PGF2.categoryContext gr c
|
||||
categoryProbability gr (CId c) = PGF2.categoryProbability gr c
|
||||
|
||||
functions gr = map CId (PGF2.functions gr)
|
||||
functionsByCat gr (CId c) = map CId (PGF2.functionsByCat gr c)
|
||||
functionType gr (CId f) = PGF2.functionType gr f
|
||||
functionIsDataCon gr (CId f) = PGF2.functionIsDataCon gr f
|
||||
|
||||
type Tree = PGF2.Expr
|
||||
type Labels = Map.Map CId [String]
|
||||
type CncLabels = [(String, String -> Maybe (String -> String,String,String))]
|
||||
|
||||
|
||||
mkCId x = CId x
|
||||
wildCId = CId "_"
|
||||
showCId (CId x) = x
|
||||
readCId s = Just (CId s)
|
||||
|
||||
showExpr xs e = PGF2.showExpr [x | CId x <- xs] e
|
||||
|
||||
pExpr = RP.readS_to_P PGF2.pExpr
|
||||
pIdent = RP.readS_to_P PGF2.pIdent
|
||||
|
||||
mkAbs bind_type (CId var) e = PGF2.mkAbs bind_type var e
|
||||
unAbs e = case PGF2.unAbs e of
|
||||
Just (bind_type, var, e) -> Just (bind_type, CId var, e)
|
||||
Nothing -> Nothing
|
||||
|
||||
mkApp (CId f) es = PGF2.mkApp f es
|
||||
unApp e = case PGF2.unApp e of
|
||||
Just (f,es) -> Just (CId f,es)
|
||||
Nothing -> Nothing
|
||||
|
||||
unapply = PGF2.unapply
|
||||
|
||||
instance Read PGF2.Expr where
|
||||
readsPrec _ s = case PGF2.readExpr s of
|
||||
Just e -> [(e,"")]
|
||||
Nothing -> []
|
||||
|
||||
showType xs ty = PGF2.showType [x | CId x <- xs] ty
|
||||
showContext xs hypos = PGF2.showContext [x | CId x <- xs] hypos
|
||||
|
||||
mkType hypos (CId var) es = PGF2.mkType [(bt,var,ty) | (bt,CId var,ty) <- hypos] var es
|
||||
unType ty = case PGF2.unType ty of
|
||||
(hypos,var,es) -> ([(bt,CId var,ty) | (bt,var,ty) <- hypos],CId var,es)
|
||||
|
||||
linearize pgf lang e = PGF2.linearize (lookConcr pgf lang) e
|
||||
bracketedLinearize pgf lang e = PGF2.bracketedLinearize (lookConcr pgf lang) e
|
||||
tabularLinearizes pgf lang e = [PGF2.tabularLinearize (lookConcr pgf lang) e]
|
||||
showBracketedString = PGF2.showBracketedString
|
||||
|
||||
type TcError = String
|
||||
|
||||
-- | This data type encodes the different outcomes which you could get from the parser.
|
||||
data ParseOutput
|
||||
= ParseFailed Int -- ^ The integer is the position in number of tokens where the parser failed.
|
||||
| TypeError [(FId,TcError)] -- ^ The parsing was successful but none of the trees is type correct.
|
||||
-- The forest id ('FId') points to the bracketed string from the parser
|
||||
-- where the type checking failed. More than one error is returned
|
||||
-- if there are many analizes for some phrase but they all are not type correct.
|
||||
| ParseOk [Tree] -- ^ If the parsing and the type checking are successful we get a list of abstract syntax trees.
|
||||
-- The list should be non-empty.
|
||||
| ParseIncomplete -- ^ The sentence is not complete. Only partial output is produced
|
||||
|
||||
parse pgf lang cat s =
|
||||
case PGF2.parse (lookConcr pgf lang) cat s of
|
||||
PGF2.ParseOk ts -> map fst ts
|
||||
_ -> []
|
||||
|
||||
parse_ pgf lang cat dp s =
|
||||
case PGF2.parse (lookConcr pgf lang) cat s of
|
||||
PGF2.ParseFailed pos _ -> (ParseFailed pos, PGF2.Leaf s)
|
||||
PGF2.ParseOk ts -> (ParseOk (map fst ts), PGF2.Leaf s)
|
||||
PGF2.ParseIncomplete -> (ParseIncomplete, PGF2.Leaf s)
|
||||
|
||||
complete pgf lang cat s prefix =
|
||||
let compls = Map.fromListWith (++) [(tok,[CId fun]) | (tok,_,fun,_) <- PGF2.complete (lookConcr pgf lang) cat s prefix]
|
||||
in (PGF2.Leaf [],s,compls)
|
||||
|
||||
hasLinearization pgf lang (CId f) = PGF2.hasLinearization (lookConcr pgf lang) f
|
||||
|
||||
ppTcError s = s
|
||||
|
||||
inferExpr gr e =
|
||||
case PGF2.inferExpr gr e of
|
||||
Right res -> Right res
|
||||
Left msg -> Left (text msg)
|
||||
|
||||
checkType gr ty =
|
||||
case PGF2.checkType gr ty of
|
||||
Right res -> Right res
|
||||
Left msg -> Left (text msg)
|
||||
|
||||
showPrintName pgf lang (CId f) =
|
||||
case PGF2.printName (lookConcr pgf lang) f of
|
||||
Just n -> n
|
||||
Nothing -> f
|
||||
|
||||
getDepLabels :: String -> Labels
|
||||
getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map words (lines s)]
|
||||
|
||||
getCncDepLabels :: String -> CncLabels
|
||||
getCncDepLabels = PGF2.getCncDepLabels
|
||||
|
||||
generateAllDepth gr ty _ = map fst (PGF2.generateAll gr ty)
|
||||
generateFromDepth = error "generateFromDepth is not implemented"
|
||||
generateRandom = error "generateRandom is not implemented"
|
||||
generateRandomFrom = error "generateRandomFrom is not implemented"
|
||||
generateRandomDepth = error "generateRandomDepth is not implemented"
|
||||
generateRandomFromDepth = error "generateRandomFromDepth is not implemented"
|
||||
|
||||
exprFunctions e = [CId f | f <- PGF2.exprFunctions e]
|
||||
|
||||
compute = error "compute is not implemented"
|
||||
|
||||
-- | rank from highest to lowest probability
|
||||
rankTreesByProbs :: PGF -> [PGF2.Expr] -> [(PGF2.Expr,Double)]
|
||||
rankTreesByProbs pgf ts = sortBy (\ (_,p) (_,q) -> compare q p)
|
||||
[(t, realToFrac (PGF2.treeProbability pgf t)) | t <- ts]
|
||||
|
||||
treeProbability = PGF2.treeProbability
|
||||
|
||||
languages pgf = fmap CId (Map.keys (PGF2.languages pgf))
|
||||
|
||||
type Morpho = PGF2.Concr
|
||||
|
||||
buildMorpho pgf lang = lookConcr pgf lang
|
||||
lookupMorpho cnc w = [(CId lemma,an) | (lemma,an,_) <- PGF2.lookupMorpho cnc w]
|
||||
isInMorpho cnc w = not (null (PGF2.lookupMorpho cnc w))
|
||||
fullFormLexicon cnc = [(w, [(CId fun,an) | (fun,an,_) <- analyses]) | (w, analyses) <- PGF2.fullFormLexicon cnc]
|
||||
|
||||
graphvizAbstractTree pgf (funs,cats) = PGF2.graphvizAbstractTree pgf PGF2.graphvizDefaults{PGF2.noFun=not funs,PGF2.noCat=not cats}
|
||||
graphvizParseTree pgf lang = PGF2.graphvizParseTree (lookConcr pgf lang)
|
||||
graphvizAlignment pgf langs = PGF2.graphvizWordAlignment (map (lookConcr pgf) langs) PGF2.graphvizDefaults
|
||||
graphvizDependencyTree format debug lbls cnclbls pgf lang e =
|
||||
let to_lbls' lbls = Map.fromList [(id,xs) | (CId id,xs) <- Map.toList lbls]
|
||||
in PGF2.graphvizDependencyTree format debug (fmap to_lbls' lbls) cnclbls (lookConcr pgf lang) e
|
||||
graphvizParseTreeDep = error "graphvizParseTreeDep is not implemented"
|
||||
|
||||
browse :: PGF -> CId -> Maybe (String,[CId],[CId])
|
||||
browse = error "browse is not implemented"
|
||||
|
||||
-- | A type for plain applicative trees
|
||||
data ATree t = Other t | App CId [ATree t] deriving Show
|
||||
data Trie = Oth Tree | Ap CId [[Trie ]] deriving Show
|
||||
-- ^ A type for tries of plain applicative trees
|
||||
|
||||
-- | Convert a 'Tree' to an 'ATree'
|
||||
toATree :: Tree -> ATree Tree
|
||||
toATree e = maybe (Other e) app (PGF2.unApp e)
|
||||
where
|
||||
app (f,es) = App (mkCId f) (map toATree es)
|
||||
|
||||
-- | Combine a list of trees into a trie
|
||||
toTrie = combines . map ((:[]) . singleton)
|
||||
where
|
||||
singleton t = case t of
|
||||
Other e -> Oth e
|
||||
App f ts -> Ap f [map singleton ts]
|
||||
|
||||
combines [] = []
|
||||
combines (ts:tss) = ts1:combines tss2
|
||||
where
|
||||
(ts1,tss2) = combines2 [] tss ts
|
||||
combines2 ots [] ts1 = (ts1,reverse ots)
|
||||
combines2 ots (ts2:tss) ts1 =
|
||||
maybe (combines2 (ts2:ots) tss ts1) (combines2 ots tss) (combine ts1 ts2)
|
||||
|
||||
combine ts us = mapM combine2 (zip ts us)
|
||||
where
|
||||
combine2 (Ap f ts,Ap g us) | f==g = Just (Ap f (combines (ts++us)))
|
||||
combine2 _ = Nothing
|
||||
|
||||
readProbabilitiesFromFile :: FilePath -> IO (Map.Map CId Double)
|
||||
readProbabilitiesFromFile fpath = do
|
||||
s <- readFile fpath
|
||||
return $ Map.fromList [(mkCId f,read p) | f:p:_ <- map words (lines s)]
|
||||
|
||||
groupResults :: [[(Language,String)]] -> [(Language,[String])]
|
||||
groupResults = Map.toList . foldr more Map.empty . start . concat
|
||||
where
|
||||
start ls = [(l,[s]) | (l,s) <- ls]
|
||||
more (l,s) =
|
||||
Map.insertWith (\ [x] xs -> if elem x xs then xs else (x : xs)) l s
|
||||
|
||||
conlls2latexDoc = error "conlls2latexDoc is not implemented"
|
||||
|
||||
|
||||
morphoMissing :: Morpho -> [String] -> [String]
|
||||
morphoMissing = morphoClassify False
|
||||
|
||||
morphoKnown :: Morpho -> [String] -> [String]
|
||||
morphoKnown = morphoClassify True
|
||||
|
||||
morphoClassify :: Bool -> Morpho -> [String] -> [String]
|
||||
morphoClassify k mo ws = [w | w <- ws, k /= null (lookupMorpho mo w), notLiteral w] where
|
||||
notLiteral w = not (all isDigit w) ---- should be defined somewhere
|
||||
|
||||
gizaAlignment = error "gizaAlignment is not implemented"
|
||||
|
||||
@@ -1 +1,163 @@
|
||||
module PGF.Internal where
|
||||
{-# LANGUAGE ImplicitParams #-}
|
||||
module PGF.Internal(CId(..),Language,PGF2.PGF,
|
||||
PGF2.Concr,lookConcr,
|
||||
PGF2.FId,isPredefFId,
|
||||
PGF2.FunId,PGF2.SeqId,PGF2.LIndex,PGF2.Token,
|
||||
PGF2.Production(..),PGF2.PArg(..),PGF2.Symbol(..),PGF2.Literal(..),PGF2.BindType(..),Sequence,
|
||||
globalFlags, abstrFlags, concrFlags,
|
||||
concrTotalCats, concrCategories, concrProductions,
|
||||
concrTotalFuns, concrFunction,
|
||||
concrTotalSeqs, concrSequence,
|
||||
|
||||
PGF2.CodeLabel, PGF2.Instr(..), PGF2.IVal(..), PGF2.TailInfo(..),
|
||||
|
||||
PGF2.Builder, PGF2.B, PGF2.build,
|
||||
eAbs, eApp, eMeta, eFun, eVar, eLit, eTyped, eImplArg, dTyp, hypo,
|
||||
PGF2.AbstrInfo, newAbstr, PGF2.ConcrInfo, newConcr, newPGF,
|
||||
|
||||
-- * Write an in-memory PGF to a file
|
||||
writePGF, writeConcr,
|
||||
|
||||
PGF2.fidString, PGF2.fidInt, PGF2.fidFloat, PGF2.fidVar, PGF2.fidStart,
|
||||
|
||||
ppFunId, ppSeqId, ppFId, ppMeta, ppLit, ppSeq,
|
||||
|
||||
unionPGF
|
||||
) where
|
||||
|
||||
import qualified PGF2
|
||||
import qualified PGF2.Internal as PGF2
|
||||
import qualified Data.Map as Map
|
||||
import PGF2.FFI(PGF(..))
|
||||
import Data.Array.IArray
|
||||
import Data.Array.Unboxed
|
||||
import Text.PrettyPrint
|
||||
|
||||
newtype CId = CId String deriving (Show,Read,Eq,Ord)
|
||||
|
||||
type Language = CId
|
||||
|
||||
lookConcr (PGF _ langs _) (CId lang) =
|
||||
case Map.lookup lang langs of
|
||||
Just cnc -> cnc
|
||||
Nothing -> error "Unknown language"
|
||||
|
||||
globalFlags pgf = Map.fromAscList [(CId name,value) | (name,value) <- PGF2.globalFlags pgf]
|
||||
abstrFlags pgf = Map.fromAscList [(CId name,value) | (name,value) <- PGF2.abstrFlags pgf]
|
||||
concrFlags concr = Map.fromAscList [(CId name,value) | (name,value) <- PGF2.concrFlags concr]
|
||||
|
||||
concrTotalCats = PGF2.concrTotalCats
|
||||
|
||||
concrCategories :: PGF2.Concr -> [(CId,PGF2.FId,PGF2.FId,[String])]
|
||||
concrCategories c = [(CId cat,start,end,lbls) | (cat,start,end,lbls) <- PGF2.concrCategories c]
|
||||
|
||||
concrProductions :: PGF2.Concr -> PGF2.FId -> [PGF2.Production]
|
||||
concrProductions = PGF2.concrProductions
|
||||
|
||||
concrTotalFuns = PGF2.concrTotalFuns
|
||||
|
||||
concrFunction :: PGF2.Concr -> PGF2.FunId -> (CId,[PGF2.SeqId])
|
||||
concrFunction c funid =
|
||||
let (fun,seqids) = PGF2.concrFunction c funid
|
||||
in (CId fun,seqids)
|
||||
|
||||
concrTotalSeqs :: PGF2.Concr -> PGF2.SeqId
|
||||
concrTotalSeqs = PGF2.concrTotalSeqs
|
||||
|
||||
concrSequence = PGF2.concrSequence
|
||||
|
||||
isPredefFId = PGF2.isPredefFId
|
||||
|
||||
type Sequence = [PGF2.Symbol]
|
||||
|
||||
eAbs :: (?builder :: PGF2.Builder s) => PGF2.BindType -> CId -> PGF2.B s PGF2.Expr -> PGF2.B s PGF2.Expr
|
||||
eAbs bind_type (CId var) body = PGF2.eAbs bind_type var body
|
||||
|
||||
eApp :: (?builder :: PGF2.Builder s) => PGF2.B s PGF2.Expr -> PGF2.B s PGF2.Expr -> PGF2.B s PGF2.Expr
|
||||
eApp = PGF2.eApp
|
||||
|
||||
eMeta :: (?builder :: PGF2.Builder s) => Int -> PGF2.B s PGF2.Expr
|
||||
eMeta = PGF2.eMeta
|
||||
|
||||
eFun (CId fun) = PGF2.eFun fun
|
||||
|
||||
eVar :: (?builder :: PGF2.Builder s) => Int -> PGF2.B s PGF2.Expr
|
||||
eVar = PGF2.eVar
|
||||
|
||||
eLit :: (?builder :: PGF2.Builder s) => PGF2.Literal -> PGF2.B s PGF2.Expr
|
||||
eLit = PGF2.eLit
|
||||
|
||||
eTyped :: (?builder :: PGF2.Builder s) => PGF2.B s PGF2.Expr -> PGF2.B s PGF2.Type -> PGF2.B s PGF2.Expr
|
||||
eTyped = PGF2.eTyped
|
||||
|
||||
eImplArg :: (?builder :: PGF2.Builder s) => PGF2.B s PGF2.Expr -> PGF2.B s PGF2.Expr
|
||||
eImplArg = PGF2.eImplArg
|
||||
|
||||
dTyp :: (?builder :: PGF2.Builder s) => [PGF2.B s (PGF2.BindType,String,PGF2.Type)] -> CId -> [PGF2.B s PGF2.Expr] -> PGF2.B s PGF2.Type
|
||||
dTyp hypos (CId cat) es = PGF2.dTyp hypos cat es
|
||||
|
||||
hypo bind_type (CId var) ty = PGF2.hypo bind_type var ty
|
||||
|
||||
newAbstr flags cats funs = PGF2.newAbstr [(flag,lit) | (CId flag,lit) <- flags]
|
||||
[(cat,hypos,prob) | (CId cat,hypos,prob) <- cats]
|
||||
[(fun,ty,arity,prob) | (CId fun,ty,arity,prob) <- funs]
|
||||
|
||||
newConcr abs flags printnames lindefs linrefs prods cncfuns seqs cnccats total_ccats =
|
||||
PGF2.newConcr abs [(flag,lit) | (CId flag,lit) <- flags]
|
||||
[(id,name) | (CId id,name) <- printnames]
|
||||
lindefs linrefs
|
||||
prods
|
||||
[(fun,seq_ids) | (CId fun,seq_ids) <- cncfuns]
|
||||
seqs
|
||||
[(cat,start,end,labels) | (CId cat,start,end,labels) <- cnccats]
|
||||
total_ccats
|
||||
|
||||
newPGF flags (CId name) abstr concrs =
|
||||
PGF2.newPGF [(flag,lit) | (CId flag,lit) <- flags]
|
||||
name
|
||||
abstr
|
||||
[(name,concr) | (CId name,concr) <- concrs]
|
||||
|
||||
writePGF = PGF2.writePGF
|
||||
writeConcr fpath pgf lang = PGF2.writeConcr fpath (lookConcr pgf lang)
|
||||
|
||||
|
||||
ppFunId funid = char 'F' <> int funid
|
||||
ppSeqId seqid = char 'S' <> int seqid
|
||||
|
||||
ppFId fid
|
||||
| fid == PGF2.fidString = text "CString"
|
||||
| fid == PGF2.fidInt = text "CInt"
|
||||
| fid == PGF2.fidFloat = text "CFloat"
|
||||
| fid == PGF2.fidVar = text "CVar"
|
||||
| fid == PGF2.fidStart = text "CStart"
|
||||
| otherwise = char 'C' <> int fid
|
||||
|
||||
ppMeta :: Int -> Doc
|
||||
ppMeta n
|
||||
| n == 0 = char '?'
|
||||
| otherwise = char '?' <> int n
|
||||
|
||||
ppLit (PGF2.LStr s) = text (show s)
|
||||
ppLit (PGF2.LInt n) = int n
|
||||
ppLit (PGF2.LFlt d) = double d
|
||||
|
||||
ppSeq (seqid,seq) =
|
||||
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol seq)
|
||||
|
||||
ppSymbol (PGF2.SymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||
ppSymbol (PGF2.SymLit d r) = char '{' <> int d <> comma <> int r <> char '}'
|
||||
ppSymbol (PGF2.SymVar d r) = char '<' <> int d <> comma <> char '$' <> int r <> char '>'
|
||||
ppSymbol (PGF2.SymKS t) = doubleQuotes (text t)
|
||||
ppSymbol PGF2.SymNE = text "nonExist"
|
||||
ppSymbol PGF2.SymBIND = text "BIND"
|
||||
ppSymbol PGF2.SymSOFT_BIND = text "SOFT_BIND"
|
||||
ppSymbol PGF2.SymSOFT_SPACE= text "SOFT_SPACE"
|
||||
ppSymbol PGF2.SymCAPIT = text "CAPIT"
|
||||
ppSymbol PGF2.SymALL_CAPIT = text "ALL_CAPIT"
|
||||
ppSymbol (PGF2.SymKP syms alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppSymbol syms) : map ppAlt alts)))
|
||||
|
||||
ppAlt (syms,ps) = hsep (map ppSymbol syms) <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
|
||||
|
||||
unionPGF = PGF2.unionPGF
|
||||
|
||||
|
||||
@@ -21,25 +21,21 @@
|
||||
module PGF2 (-- * PGF
|
||||
PGF,readPGF,showPGF,
|
||||
|
||||
-- * Identifiers
|
||||
CId,
|
||||
|
||||
-- * Abstract syntax
|
||||
AbsName,abstractName,
|
||||
-- ** Categories
|
||||
Cat,categories,categoryContext,
|
||||
Cat,categories,categoryContext,categoryProbability,
|
||||
-- ** Functions
|
||||
Fun, functions, functionsByCat,
|
||||
functionType, functionIsConstructor, hasLinearization,
|
||||
functionType, functionIsDataCon, hasLinearization,
|
||||
-- ** Expressions
|
||||
Expr,showExpr,readExpr,pExpr,
|
||||
Expr,showExpr,readExpr,pExpr,pIdent,
|
||||
mkAbs,unAbs,
|
||||
mkApp,unApp,
|
||||
mkApp,unApp,unapply,
|
||||
mkStr,unStr,
|
||||
mkInt,unInt,
|
||||
mkFloat,unFloat,
|
||||
mkMeta,unMeta,
|
||||
mkCId,
|
||||
exprHash, exprSize, exprFunctions, exprSubstitute,
|
||||
treeProbability,
|
||||
|
||||
@@ -58,13 +54,13 @@ module PGF2 (-- * PGF
|
||||
ConcName,Concr,languages,concreteName,languageCode,
|
||||
|
||||
-- ** Linearization
|
||||
linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,bracketedLinearizeAll,
|
||||
linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,
|
||||
FId, LIndex, BracketedString(..), showBracketedString, flattenBracketedString,
|
||||
printName,
|
||||
|
||||
alignWords,
|
||||
-- ** Parsing
|
||||
ParseOutput(..), parse, parseWithHeuristics,
|
||||
ParseOutput(..), parse, parseWithHeuristics, complete,
|
||||
-- ** Sentence Lookup
|
||||
lookupSentence,
|
||||
-- ** Generation
|
||||
@@ -73,7 +69,9 @@ module PGF2 (-- * PGF
|
||||
MorphoAnalysis, lookupMorpho, fullFormLexicon,
|
||||
-- ** Visualizations
|
||||
GraphvizOptions(..), graphvizDefaults,
|
||||
graphvizAbstractTree, graphvizParseTree, graphvizWordAlignment,
|
||||
graphvizAbstractTree, graphvizParseTree,
|
||||
graphvizDependencyTree, conlls2latexDoc, getCncDepLabels,
|
||||
graphvizWordAlignment,
|
||||
|
||||
-- * Exceptions
|
||||
PGFError(..),
|
||||
@@ -82,7 +80,7 @@ module PGF2 (-- * PGF
|
||||
LiteralCallback,literalCallbacks
|
||||
) where
|
||||
|
||||
import Prelude hiding (fromEnum,(<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
|
||||
import Prelude hiding (fromEnum)
|
||||
import Control.Exception(Exception,throwIO)
|
||||
import Control.Monad(forM_)
|
||||
import System.IO.Unsafe(unsafePerformIO,unsafeInterleaveIO)
|
||||
@@ -97,7 +95,8 @@ import Data.Typeable
|
||||
import qualified Data.Map as Map
|
||||
import Data.IORef
|
||||
import Data.Char(isUpper,isSpace)
|
||||
import Data.List(isSuffixOf,maximumBy,nub)
|
||||
import Data.List(isSuffixOf,maximumBy,nub,mapAccumL,intersperse,groupBy,find)
|
||||
import Data.Maybe(fromMaybe)
|
||||
import Data.Function(on)
|
||||
|
||||
|
||||
@@ -110,8 +109,8 @@ import Data.Function(on)
|
||||
-- to Concr but has lost its reference to PGF.
|
||||
|
||||
|
||||
type AbsName = CId -- ^ Name of abstract syntax
|
||||
type ConcName = CId -- ^ Name of concrete syntax
|
||||
type AbsName = String -- ^ Name of abstract syntax
|
||||
type ConcName = String -- ^ Name of concrete syntax
|
||||
|
||||
-- | Reads file in Portable Grammar Format and produces
|
||||
-- 'PGF' structure. The file is usually produced with:
|
||||
@@ -136,7 +135,22 @@ readPGF fpath =
|
||||
throwIO (PGFError "The grammar cannot be loaded")
|
||||
else return pgf
|
||||
pgfFPtr <- newForeignPtr gu_pool_finalizer pool
|
||||
return (PGF pgf (touchForeignPtr pgfFPtr))
|
||||
let touch = touchForeignPtr pgfFPtr
|
||||
ref <- newIORef Map.empty
|
||||
allocaBytes (#size GuMapItor) $ \itor ->
|
||||
do fptr <- wrapMapItorCallback (getLanguages ref touch)
|
||||
(#poke GuMapItor, fn) itor fptr
|
||||
pgf_iter_languages pgf itor nullPtr
|
||||
freeHaskellFunPtr fptr
|
||||
langs <- readIORef ref
|
||||
return (PGF pgf langs touch)
|
||||
where
|
||||
getLanguages :: IORef (Map.Map String Concr) -> Touch -> MapItorCallback
|
||||
getLanguages ref touch itor key value exn = do
|
||||
langs <- readIORef ref
|
||||
name <- peekUtf8CString (castPtr key)
|
||||
concr <- fmap (\ptr -> Concr ptr touch) $ peek (castPtr value)
|
||||
writeIORef ref $! Map.insert name concr langs
|
||||
|
||||
showPGF :: PGF -> String
|
||||
showPGF p =
|
||||
@@ -144,29 +158,15 @@ showPGF p =
|
||||
withGuPool $ \tmpPl ->
|
||||
do (sb,out) <- newOut tmpPl
|
||||
exn <- gu_new_exn tmpPl
|
||||
pgf_print (pgf p) out exn
|
||||
withArrayLen ((map concr . Map.elems . languages) p) $ \n_concrs concrs ->
|
||||
pgf_print (pgf p) (fromIntegral n_concrs) concrs out exn
|
||||
touchPGF p
|
||||
s <- gu_string_buf_freeze sb tmpPl
|
||||
peekUtf8CString s
|
||||
|
||||
-- | List of all languages available in the grammar.
|
||||
languages :: PGF -> Map.Map ConcName Concr
|
||||
languages p =
|
||||
unsafePerformIO $
|
||||
do ref <- newIORef Map.empty
|
||||
allocaBytes (#size GuMapItor) $ \itor ->
|
||||
do fptr <- wrapMapItorCallback (getLanguages ref)
|
||||
(#poke GuMapItor, fn) itor fptr
|
||||
pgf_iter_languages (pgf p) itor nullPtr
|
||||
freeHaskellFunPtr fptr
|
||||
readIORef ref
|
||||
where
|
||||
getLanguages :: IORef (Map.Map String Concr) -> MapItorCallback
|
||||
getLanguages ref itor key value exn = do
|
||||
langs <- readIORef ref
|
||||
name <- peekUtf8CString (castPtr key)
|
||||
concr <- fmap (\ptr -> Concr ptr (touchPGF p)) $ peek (castPtr value)
|
||||
writeIORef ref $! Map.insert name concr langs
|
||||
languages p = langs p
|
||||
|
||||
-- | The abstract language name is the name of the top-level
|
||||
-- abstract module
|
||||
@@ -242,8 +242,8 @@ functionType p fn =
|
||||
else Just (Type c_type (touchPGF p)))
|
||||
|
||||
-- | The type of a function
|
||||
functionIsConstructor :: PGF -> Fun -> Bool
|
||||
functionIsConstructor p fn =
|
||||
functionIsDataCon :: PGF -> Fun -> Bool
|
||||
functionIsDataCon p fn =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \tmpPl -> do
|
||||
c_fn <- newUtf8CString fn tmpPl
|
||||
@@ -253,15 +253,15 @@ functionIsConstructor p fn =
|
||||
|
||||
-- | Checks an expression against a specified type.
|
||||
checkExpr :: PGF -> Expr -> Type -> Either String Expr
|
||||
checkExpr (PGF p _) (Expr c_expr touch1) (Type c_ty touch2) =
|
||||
checkExpr p (Expr c_expr touch1) (Type c_ty touch2) =
|
||||
unsafePerformIO $
|
||||
alloca $ \pexpr ->
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
exprPl <- gu_new_pool
|
||||
poke pexpr c_expr
|
||||
pgf_check_expr p pexpr c_ty exn exprPl
|
||||
touch1 >> touch2
|
||||
pgf_check_expr (pgf p) pexpr c_ty exn exprPl
|
||||
touchPGF p >> touch1 >> touch2
|
||||
status <- gu_exn_is_raised exn
|
||||
if not status
|
||||
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
@@ -280,15 +280,15 @@ checkExpr (PGF p _) (Expr c_expr touch1) (Type c_ty touch2) =
|
||||
-- possible to infer its type in the GF type system.
|
||||
-- In this case the function returns an error.
|
||||
inferExpr :: PGF -> Expr -> Either String (Expr, Type)
|
||||
inferExpr (PGF p _) (Expr c_expr touch1) =
|
||||
inferExpr p (Expr c_expr touch1) =
|
||||
unsafePerformIO $
|
||||
alloca $ \pexpr ->
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
exprPl <- gu_new_pool
|
||||
poke pexpr c_expr
|
||||
c_ty <- pgf_infer_expr p pexpr exn exprPl
|
||||
touch1
|
||||
c_ty <- pgf_infer_expr (pgf p) pexpr exn exprPl
|
||||
touchPGF p >> touch1
|
||||
status <- gu_exn_is_raised exn
|
||||
if not status
|
||||
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
@@ -306,15 +306,15 @@ inferExpr (PGF p _) (Expr c_expr touch1) =
|
||||
-- | Check whether a type is consistent with the abstract
|
||||
-- syntax of the grammar.
|
||||
checkType :: PGF -> Type -> Either String Type
|
||||
checkType (PGF p _) (Type c_ty touch1) =
|
||||
checkType p (Type c_ty touch1) =
|
||||
unsafePerformIO $
|
||||
alloca $ \pty ->
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
typePl <- gu_new_pool
|
||||
poke pty c_ty
|
||||
pgf_check_type p pty exn typePl
|
||||
touch1
|
||||
pgf_check_type (pgf p) pty exn typePl
|
||||
touchPGF p >> touch1
|
||||
status <- gu_exn_is_raised exn
|
||||
if not status
|
||||
then do typeFPl <- newForeignPtr gu_pool_finalizer typePl
|
||||
@@ -329,13 +329,13 @@ checkType (PGF p _) (Type c_ty touch1) =
|
||||
else throwIO (PGFError msg)
|
||||
|
||||
compute :: PGF -> Expr -> Expr
|
||||
compute (PGF p _) (Expr c_expr touch1) =
|
||||
compute p (Expr c_expr touch1) =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
exprPl <- gu_new_pool
|
||||
c_expr <- pgf_compute p c_expr exn tmpPl exprPl
|
||||
touch1
|
||||
c_expr <- pgf_compute (pgf p) c_expr exn tmpPl exprPl
|
||||
touchPGF p >> touch1
|
||||
status <- gu_exn_is_raised exn
|
||||
if not status
|
||||
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
@@ -346,10 +346,10 @@ compute (PGF p _) (Expr c_expr touch1) =
|
||||
throwIO (PGFError msg)
|
||||
|
||||
treeProbability :: PGF -> Expr -> Float
|
||||
treeProbability (PGF p _) (Expr c_expr touch1) =
|
||||
treeProbability p (Expr c_expr touch1) =
|
||||
unsafePerformIO $ do
|
||||
res <- pgf_compute_tree_probability p c_expr
|
||||
touch1
|
||||
res <- pgf_compute_tree_probability (pgf p) c_expr
|
||||
touchPGF p >> touch1
|
||||
return (realToFrac res)
|
||||
|
||||
exprHash :: Int32 -> Expr -> Int32
|
||||
@@ -447,6 +447,433 @@ graphvizWordAlignment cs opts e =
|
||||
s <- gu_string_buf_freeze sb tmpPl
|
||||
peekUtf8CString s
|
||||
|
||||
|
||||
type Labels = Map.Map Fun [String]
|
||||
|
||||
-- | Visualize word dependency tree.
|
||||
graphvizDependencyTree
|
||||
:: String -- ^ Output format: @"latex"@, @"conll"@, @"malt_tab"@, @"malt_input"@ or @"dot"@
|
||||
-> Bool -- ^ Include extra information (debug)
|
||||
-> Maybe Labels -- ^ abstract label information obtained with 'getDepLabels'
|
||||
-> Maybe CncLabels -- ^ concrete label information obtained with ' ' (was: unused (was: @Maybe String@))
|
||||
-> Concr
|
||||
-> Expr
|
||||
-> String -- ^ Rendered output in the specified format
|
||||
graphvizDependencyTree format debug mlab mclab concr t =
|
||||
case format of
|
||||
"latex" -> render . ppLaTeX $ conll2latex' conll
|
||||
"svg" -> render . ppSVG . toSVG $ conll2latex' conll
|
||||
"conll" -> printCoNLL conll
|
||||
"malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes)
|
||||
"malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
|
||||
_ -> render $ text "digraph {" $$
|
||||
space $$
|
||||
nest 2 (text "rankdir=LR ;" $$
|
||||
text "node [shape = plaintext] ;" $$
|
||||
vcat nodes $$
|
||||
vcat links) $$
|
||||
text "}"
|
||||
where
|
||||
conll = maybe conll0 (\ls -> fixCoNLL ls conll0) mclab
|
||||
conll0 = (map.map) render wnodes
|
||||
nodes = map mkNode leaves
|
||||
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves]
|
||||
|
||||
-- CoNLL format: ID FORM LEMMA PLEMMA POS PPOS FEAT PFEAT HEAD PHEAD DEPREL PDEPREL
|
||||
-- P variants are automatically predicted rather than gold standard
|
||||
|
||||
wnodes = [[int i, maltws ws, text fun, text (posCat cat), text cat, unspec, int parent, text lab, unspec, unspec] |
|
||||
((cat,fid,fun),i,ws) <- tail leaves,
|
||||
let (lab,parent) = fromMaybe (dep_lbl,0)
|
||||
(do (lbl,fid) <- lookup fid deps
|
||||
(_,i,_) <- find (\((_,fid1,_),i,_) -> fid == fid1) leaves
|
||||
return (lbl,i))
|
||||
]
|
||||
maltws = text . concat . intersperse "+" . words -- no spaces in column 2
|
||||
|
||||
nil = -1
|
||||
|
||||
bss = bracketedLinearize concr t
|
||||
|
||||
root = ("_",nil,"_")
|
||||
|
||||
leaves = (root,0,root_lbl) : (groupAndIndexIt 1 . concatMap (getLeaves root)) bss
|
||||
deps = let (_,(h,deps)) = getDeps 0 [] t
|
||||
in (h,(dep_lbl,nil)):deps
|
||||
|
||||
groupAndIndexIt id [] = []
|
||||
groupAndIndexIt id ((p,w):pws) = (p,id,w) : groupAndIndexIt (id+1) pws
|
||||
--- groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
|
||||
--- in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
|
||||
where
|
||||
collect pws@((p1,w):pws1)
|
||||
| p == p1 = let (ws,pws2) = collect pws1
|
||||
in (w:ws,pws2)
|
||||
collect pws = ([],pws)
|
||||
|
||||
getLeaves parent bs =
|
||||
case bs of
|
||||
Leaf w -> [(parent,w)]
|
||||
Bracket cat fid _ fun bss -> concatMap (getLeaves (cat,fid,fun)) bss
|
||||
|
||||
mkNode ((_,p,_),i,w) =
|
||||
tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi
|
||||
|
||||
mkLink (x,(lbl,y)) = tag y <+> text "->" <+> tag x <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;"
|
||||
|
||||
labels = maybe Map.empty id mlab
|
||||
clabels = maybe [] id mclab
|
||||
|
||||
posCat cat = case Map.lookup cat labels of
|
||||
Just [p] -> p
|
||||
_ -> cat
|
||||
|
||||
getDeps n_fid xs e =
|
||||
case unAbs e of
|
||||
Just (_, x, e) -> getDeps n_fid (x:xs) e
|
||||
Nothing -> case unApp e of
|
||||
Just (f,es) -> let (n_fid_1,ds) = descend n_fid xs es
|
||||
(mb_h, deps) = selectHead f ds
|
||||
in case mb_h of
|
||||
Just (fid,deps0) -> (n_fid_1+1,(fid,deps0++
|
||||
[(n_fid_1,(dep_lbl,fid))]++
|
||||
concat [(m,(lbl,fid)):ds | (lbl,(m,ds)) <- deps]))
|
||||
Nothing -> (n_fid_1+1,(n_fid_1,concat [(m,(lbl,n_fid_1)):ds | (lbl,(m,ds)) <- deps]))
|
||||
Nothing -> (n_fid+1,(n_fid,[]))
|
||||
|
||||
descend n_fid xs es = mapAccumL (\n_fid e -> getDeps n_fid xs e) n_fid es
|
||||
|
||||
selectHead f ds =
|
||||
case Map.lookup f labels of
|
||||
Just lbls -> extractHead (zip lbls ds)
|
||||
Nothing -> extractLast ds
|
||||
where
|
||||
extractHead [] = (Nothing, [])
|
||||
extractHead (ld@(l,d):lds)
|
||||
| l == head_lbl = (Just d,lds)
|
||||
| otherwise = let (mb_h,deps) = extractHead lds
|
||||
in (mb_h,ld:deps)
|
||||
|
||||
extractLast [] = (Nothing, [])
|
||||
extractLast (d:ds)
|
||||
| null ds = (Just d,[])
|
||||
| otherwise = let (mb_h,deps) = extractLast ds
|
||||
in (mb_h,(dep_lbl,d):deps)
|
||||
|
||||
dep_lbl = "dep"
|
||||
head_lbl = "head"
|
||||
root_lbl = "ROOT"
|
||||
unspec = text "_"
|
||||
|
||||
|
||||
---------------------- should be a separate module?
|
||||
|
||||
-- visualization with latex output. AR Nov 2015
|
||||
|
||||
conlls2latexDoc :: [String] -> String
|
||||
conlls2latexDoc =
|
||||
render .
|
||||
latexDoc .
|
||||
vcat .
|
||||
intersperse (text "" $+$ app "vspace" (text "4mm")) .
|
||||
map conll2latex .
|
||||
filter (not . null)
|
||||
|
||||
conll2latex :: String -> Doc
|
||||
conll2latex = ppLaTeX . conll2latex' . parseCoNLL
|
||||
|
||||
conll2latex' :: CoNLL -> [LaTeX]
|
||||
conll2latex' = dep2latex . conll2dep'
|
||||
|
||||
data Dep = Dep {
|
||||
wordLength :: Int -> Double -- length of word at position int -- was: fixed width, millimetres (>= 20.0)
|
||||
, tokens :: [(String,String)] -- word, pos (0..)
|
||||
, deps :: [((Int,Int),String)] -- from, to, label
|
||||
, root :: Int -- root word position
|
||||
}
|
||||
|
||||
-- some general measures
|
||||
defaultWordLength = 20.0 -- the default fixed width word length, making word 100 units
|
||||
defaultUnit = 0.2 -- unit in latex pictures, 0.2 millimetres
|
||||
spaceLength = 10.0
|
||||
charWidth = 1.8
|
||||
|
||||
wsize rwld w = 100 * rwld w + spaceLength -- word length, units
|
||||
wpos rwld i = sum [wsize rwld j | j <- [0..i-1]] -- start position of the i'th word
|
||||
wdist rwld x y = sum [wsize rwld i | i <- [min x y .. max x y - 1]] -- distance between words x and y
|
||||
labelheight h = h + arcbase + 3 -- label just above arc; 25 would put it just below
|
||||
labelstart c = c - 15.0 -- label starts 15u left of arc centre
|
||||
arcbase = 30.0 -- arcs start and end 40u above the bottom
|
||||
arcfactor r = r * 600 -- reduction of arc size from word distance
|
||||
xyratio = 3 -- width/height ratio of arcs
|
||||
|
||||
putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> [DrawingCommand]
|
||||
putArc frwld height x y label = [oval,arrowhead,labelling] where
|
||||
oval = Put (ctr,arcbase) (OvalTop (wdth,hght))
|
||||
arrowhead = Put (endp,arcbase + 5) (ArrowDown 5) -- downgoing arrow 5u above the arc base
|
||||
labelling = Put (labelstart ctr,labelheight (hght/2)) (TinyText label)
|
||||
dxy = wdist frwld x y -- distance between words, >>= 20.0
|
||||
ndxy = 100 * rwld * fromIntegral height -- distance that is indep of word length
|
||||
hdxy = dxy / 2 -- half the distance
|
||||
wdth = dxy - (arcfactor rwld)/dxy -- longer arcs are wider in proportion
|
||||
hght = ndxy / (xyratio * rwld) -- arc height is independent of word length
|
||||
begp = min x y -- begin position of oval
|
||||
ctr = wpos frwld begp + hdxy + (if x < y then 20 else 10) -- LR arcs are farther right from center of oval
|
||||
endp = (if x < y then (+) else (-)) ctr (wdth/2) -- the point of the arrow
|
||||
rwld = 0.5 ----
|
||||
|
||||
dep2latex :: Dep -> [LaTeX]
|
||||
dep2latex d =
|
||||
[Comment (unwords (map fst (tokens d))),
|
||||
Picture defaultUnit (width,height) (
|
||||
[Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))] -- words
|
||||
++ [Put (wpos rwld i,15) (TinyText w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom
|
||||
++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels
|
||||
++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))]
|
||||
++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")]
|
||||
)]
|
||||
where
|
||||
wld i = wordLength d i -- >= 20.0
|
||||
rwld i = (wld i) / defaultWordLength -- >= 1.0
|
||||
aheight x y = depth (min x y) (max x y) + 1 ---- abs (x-y)
|
||||
arcs = [(min u v, max u v) | ((u,v),_) <- deps d]
|
||||
depth x y = case [(u,v) | (u,v) <- arcs, (x < u && v <= y) || (x == u && v < y)] of ---- only projective arcs counted
|
||||
[] -> 0
|
||||
uvs -> 1 + maximum (0:[depth u v | (u,v) <- uvs])
|
||||
width = {-round-} (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + {-round-} spaceLength * fromIntegral ((length (tokens d)) - 1)
|
||||
height = 50 + 20 * {-round-} (maximum (0:[aheight x y | ((x,y),_) <- deps d]))
|
||||
|
||||
type CoNLL = [[String]]
|
||||
parseCoNLL :: String -> CoNLL
|
||||
parseCoNLL = map words . lines
|
||||
|
||||
--conll2dep :: String -> Dep
|
||||
--conll2dep = conll2dep' . parseCoNLL
|
||||
|
||||
conll2dep' :: CoNLL -> Dep
|
||||
conll2dep' ls = Dep {
|
||||
wordLength = wld
|
||||
, tokens = toks
|
||||
, deps = dps
|
||||
, root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1]
|
||||
}
|
||||
where
|
||||
wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]])
|
||||
toks = [(w,c) | _:w:_:c:_ <- ls]
|
||||
dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"]
|
||||
--maxdist = maximum [abs (x-y) | ((x,y),_) <- dps]
|
||||
|
||||
|
||||
-- * LaTeX Pictures (see https://en.wikibooks.org/wiki/LaTeX/Picture)
|
||||
|
||||
-- We render both LaTeX and SVG from this intermediate representation of
|
||||
-- LaTeX pictures.
|
||||
|
||||
data LaTeX = Comment String | Picture UnitLengthMM Size [DrawingCommand]
|
||||
data DrawingCommand = Put Position Object
|
||||
data Object = Text String | TinyText String | OvalTop Size | ArrowDown Length
|
||||
|
||||
type UnitLengthMM = Double
|
||||
type Size = (Double,Double)
|
||||
type Position = (Double,Double)
|
||||
type Length = Double
|
||||
|
||||
|
||||
-- * latex formatting
|
||||
ppLaTeX = vcat . map ppLaTeX1
|
||||
where
|
||||
ppLaTeX1 el =
|
||||
case el of
|
||||
Comment s -> comment s
|
||||
Picture unit size cmds ->
|
||||
app "setlength{\\unitlength}" (text (show unit ++ "mm"))
|
||||
$$ hang (app "begin" (text "picture")<>text (show size)) 2
|
||||
(vcat (map ppDrawingCommand cmds))
|
||||
$$ app "end" (text "picture")
|
||||
$$ text ""
|
||||
|
||||
ppDrawingCommand (Put pos obj) = put pos (ppObject obj)
|
||||
|
||||
ppObject obj =
|
||||
case obj of
|
||||
Text s -> text s
|
||||
TinyText s -> small (text s)
|
||||
OvalTop size -> text "\\oval" <> text (show size) <> text "[t]"
|
||||
ArrowDown len -> app "vector(0,-1)" (text (show len))
|
||||
|
||||
put p@(_,_) = app ("put" ++ show p)
|
||||
small w = text "{\\tiny" <+> w <> text "}"
|
||||
comment s = text "%%" <+> text s -- line break show follow
|
||||
|
||||
app macro arg = text "\\" <> text macro <> text "{" <> arg <> text "}"
|
||||
|
||||
|
||||
latexDoc :: Doc -> Doc
|
||||
latexDoc body =
|
||||
vcat [text "\\documentclass{article}",
|
||||
text "\\usepackage[utf8]{inputenc}",
|
||||
text "\\begin{document}",
|
||||
body,
|
||||
text "\\end{document}"]
|
||||
|
||||
-- * SVG (see https://www.w3.org/Graphics/SVG/IG/resources/svgprimer.html)
|
||||
|
||||
-- | Render LaTeX pictures as SVG
|
||||
toSVG = concatMap toSVG1
|
||||
where
|
||||
toSVG1 el =
|
||||
case el of
|
||||
Comment s -> []
|
||||
Picture unit size@(w,h) cmds ->
|
||||
[Elem "svg" ["width".=x1,"height".=y0+5,
|
||||
("viewBox",unwords (map show [0,0,x1,y0+5])),
|
||||
("version","1.1"),
|
||||
("xmlns","http://www.w3.org/2000/svg")]
|
||||
(white_bg:concatMap draw cmds)]
|
||||
where
|
||||
white_bg =
|
||||
Elem "rect" ["x".=0,"y".=0,"width".=x1,"height".=y0+5,
|
||||
("fill","white")] []
|
||||
|
||||
draw (Put pos obj) = objectSVG pos obj
|
||||
|
||||
objectSVG pos obj =
|
||||
case obj of
|
||||
Text s -> [text 16 pos s]
|
||||
TinyText s -> [text 10 pos s]
|
||||
OvalTop size -> [ovalTop pos size]
|
||||
ArrowDown len -> arrowDown pos len
|
||||
|
||||
text h (x,y) s =
|
||||
Elem "text" ["x".=xc x,"y".=yc y-2,"font-size".=h]
|
||||
[CharData s]
|
||||
|
||||
ovalTop (x,y) (w,h) =
|
||||
Elem "path" [("d",path),("stroke","black"),("fill","none")] []
|
||||
where
|
||||
x1 = x-w/2
|
||||
x2 = min x (x1+r)
|
||||
x3 = max x (x4-r)
|
||||
x4 = x+w/2
|
||||
y1 = y
|
||||
y2 = y+r
|
||||
r = h/2
|
||||
sx = show . xc
|
||||
sy = show . yc
|
||||
path = unwords (["M",sx x1,sy y1,"Q",sx x1,sy y2,sx x2,sy y2,
|
||||
"L",sx x3,sy y2,"Q",sx x4,sy y2,sx x4,sy y1])
|
||||
|
||||
arrowDown (x,y) len =
|
||||
[Elem "line" ["x1".=xc x,"y1".=yc y,"x2".=xc x,"y2".=y2,
|
||||
("stroke","black")] [],
|
||||
Elem "path" [("d",unwords arrowhead)] []]
|
||||
where
|
||||
x2 = xc x
|
||||
y2 = yc (y-len)
|
||||
arrowhead = "M":map show [x2,y2,x2-3,y2-6,x2+3,y2-6]
|
||||
|
||||
xc x = num x+5
|
||||
yc y = y0-num y
|
||||
x1 = num w+10
|
||||
y0 = num h+20
|
||||
num x = round (scale*x)
|
||||
scale = unit*5
|
||||
|
||||
infix 0 .=
|
||||
n.=v = (n,show v)
|
||||
|
||||
-- * SVG is XML
|
||||
|
||||
data SVG = CharData String | Elem TagName Attrs [SVG]
|
||||
type TagName = String
|
||||
type Attrs = [(String,String)]
|
||||
|
||||
ppSVG svg =
|
||||
vcat [text "<?xml version=\"1.0\" standalone=\"no\"?>",
|
||||
text "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"",
|
||||
text "\"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">",
|
||||
text "",
|
||||
vcat (map ppSVG1 svg)] -- It should be a single <svg> element...
|
||||
where
|
||||
ppSVG1 svg1 =
|
||||
case svg1 of
|
||||
CharData s -> text (encode s)
|
||||
Elem tag attrs [] ->
|
||||
text "<"<>text tag<>cat (map attr attrs) <> text "/>"
|
||||
Elem tag attrs svg ->
|
||||
cat [text "<"<>text tag<>cat (map attr attrs) <> text ">",
|
||||
nest 2 (cat (map ppSVG1 svg)),
|
||||
text "</"<>text tag<>text ">"]
|
||||
|
||||
attr (n,v) = text " "<>text n<>text "=\""<>text (encode v)<>text "\""
|
||||
|
||||
encode s = foldr encodeEntity "" s
|
||||
|
||||
encodeEntity = encodeEntity' (const False)
|
||||
encodeEntity' esc c r =
|
||||
case c of
|
||||
'&' -> "&"++r
|
||||
'<' -> "<"++r
|
||||
'>' -> ">"++r
|
||||
_ -> c:r
|
||||
|
||||
|
||||
----------------------------------
|
||||
-- concrete syntax annotations (local) on top of conll
|
||||
-- examples of annotations:
|
||||
-- UseComp {"not"} PART neg head
|
||||
-- UseComp {*} AUX cop head
|
||||
|
||||
type CncLabels = [(String, String -> Maybe (String -> String,String,String))]
|
||||
-- (fun, word -> (pos,label,target))
|
||||
-- the pos can remain unchanged, as in the current notation in the article
|
||||
|
||||
fixCoNLL :: CncLabels -> CoNLL -> CoNLL
|
||||
fixCoNLL labels conll = map fixc conll where
|
||||
fixc row = case row of
|
||||
(i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:x_:"0":"root":xs) --- change the root label from dep to root
|
||||
(i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of
|
||||
Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:x_:j :label':xs)
|
||||
Just (pos',label',target) -> (i:word:fun:pos' pos:cat:x_: getDep j target:label':xs)
|
||||
_ -> row
|
||||
_ -> row
|
||||
|
||||
look (fun,word) = case lookup fun labels of
|
||||
Just relabel -> case relabel word of
|
||||
Just row -> Just row
|
||||
_ -> case lookup "*" labels of
|
||||
Just starlabel -> starlabel word
|
||||
_ -> Nothing
|
||||
_ -> case lookup "*" labels of
|
||||
Just starlabel -> starlabel word
|
||||
_ -> Nothing
|
||||
|
||||
getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll]
|
||||
|
||||
getCncDepLabels :: String -> CncLabels
|
||||
getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap analyse . filter choose . lines where
|
||||
--- choose is for compatibility with the general notation
|
||||
choose line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules
|
||||
|
||||
analyse line = case break (=='{') line of
|
||||
(beg,_:ws) -> case break (=='}') ws of
|
||||
(toks,_:target) -> case (words beg, words target) of
|
||||
(fun:_,[ label,j]) -> [(fun, (tok, (id, label,j))) | tok <- getToks toks]
|
||||
(fun:_,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | tok <- getToks toks]
|
||||
_ -> []
|
||||
_ -> []
|
||||
_ -> []
|
||||
merge rules@((fun,_):_) = (fun, \tok ->
|
||||
case lookup tok (map snd rules) of
|
||||
Just new -> return new
|
||||
_ -> lookup "*" (map snd rules)
|
||||
)
|
||||
getToks = words . map (\c -> if elem c "\"," then ' ' else c)
|
||||
|
||||
printCoNLL :: CoNLL -> String
|
||||
printCoNLL = unlines . map (concat . intersperse "\t")
|
||||
|
||||
|
||||
newGraphvizOptions :: Ptr GuPool -> GraphvizOptions -> IO (Ptr PgfGraphvizOptions)
|
||||
newGraphvizOptions pool opts = do
|
||||
c_opts <- gu_malloc pool (#size PgfGraphvizOptions)
|
||||
@@ -542,7 +969,7 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse
|
||||
-- If a literal has been recognized then the output should
|
||||
-- be Just (expr,probability,end_offset)
|
||||
-> ParseOutput
|
||||
parseWithHeuristics lang (Type ctype touchType) sent heuristic callbacks =
|
||||
parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
|
||||
unsafePerformIO $
|
||||
do exprPl <- gu_new_pool
|
||||
parsePl <- gu_new_pool
|
||||
@@ -550,7 +977,6 @@ parseWithHeuristics lang (Type ctype touchType) sent heuristic callbacks =
|
||||
sent <- newUtf8CString sent parsePl
|
||||
callbacks_map <- mkCallbacksMap (concr lang) callbacks parsePl
|
||||
enum <- pgf_parse_with_heuristics (concr lang) ctype sent heuristic callbacks_map exn parsePl exprPl
|
||||
touchType
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
|
||||
@@ -618,6 +1044,26 @@ mkCallbacksMap concr callbacks pool = do
|
||||
|
||||
predict_callback _ _ _ = return nullPtr
|
||||
|
||||
complete :: Concr -- ^ the language with which we do word completion
|
||||
-> Type -- ^ the start category
|
||||
-> String -- ^ the input sentence
|
||||
-> String -- ^ prefix for the word to be completed
|
||||
-> [(String, Cat, Fun, Float)]
|
||||
complete lang (Type ctype _) sent prefix =
|
||||
unsafePerformIO $
|
||||
do pl <- gu_new_pool
|
||||
exn <- gu_new_exn pl
|
||||
sent <- newUtf8CString sent pl
|
||||
prefix <- newUtf8CString prefix pl
|
||||
enum <- pgf_complete (concr lang) ctype sent prefix exn pl
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do gu_pool_free pl
|
||||
return []
|
||||
else do fpl <- newForeignPtr gu_pool_finalizer pl
|
||||
tokens <- fromPgfTokenEnum enum fpl
|
||||
return tokens
|
||||
|
||||
lookupSentence :: Concr -- ^ the language with which we parse
|
||||
-> Type -- ^ the start category
|
||||
-> String -- ^ the input sentence
|
||||
@@ -862,9 +1308,8 @@ type LIndex = Int
|
||||
-- mark the beginning and the end of each constituent.
|
||||
data BracketedString
|
||||
= Leaf String -- ^ this is the leaf i.e. a single token
|
||||
| BIND -- ^ the surrounding tokens must be bound together
|
||||
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedString]
|
||||
-- ^ this is a bracket. The 'CId' is the category of
|
||||
| Bracket Cat {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex Fun [BracketedString]
|
||||
-- ^ this is a bracket. The 'Cat' is the category of
|
||||
-- the phrase. The 'FId' is an unique identifier for
|
||||
-- every phrase in the sentence. For context-free grammars
|
||||
-- i.e. without discontinuous constituents this identifier
|
||||
@@ -875,7 +1320,7 @@ data BracketedString
|
||||
-- the constituent index i.e. 'LIndex'. If the grammar is reduplicating
|
||||
-- then the constituent indices will be the same for all brackets
|
||||
-- that represents the same constituent.
|
||||
-- The second 'CId' is the name of the abstract function that generated
|
||||
-- The 'Fun' is the name of the abstract function that generated
|
||||
-- this phrase.
|
||||
|
||||
-- | Renders the bracketed string as a string where
|
||||
@@ -885,13 +1330,11 @@ showBracketedString :: BracketedString -> String
|
||||
showBracketedString = render . ppBracketedString
|
||||
|
||||
ppBracketedString (Leaf t) = text t
|
||||
ppBracketedString BIND = text "&+"
|
||||
ppBracketedString (Bracket cat fid index _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
|
||||
|
||||
-- | Extracts the sequence of tokens from the bracketed string
|
||||
flattenBracketedString :: BracketedString -> [String]
|
||||
flattenBracketedString (Leaf w) = [w]
|
||||
flattenBracketedString BIND = []
|
||||
flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss
|
||||
|
||||
bracketedLinearize :: Concr -> Expr -> [BracketedString]
|
||||
@@ -909,8 +1352,27 @@ bracketedLinearize lang e = unsafePerformIO $
|
||||
return []
|
||||
else do ctree <- pgf_lzr_wrap_linref ctree pl
|
||||
ref <- newIORef ([],[])
|
||||
withBracketLinFuncs ref exn $ \ppLinFuncs ->
|
||||
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
|
||||
allocaBytes (#size PgfLinFuncs) $ \pLinFuncs ->
|
||||
alloca $ \ppLinFuncs -> do
|
||||
fptr_symbol_token <- wrapSymbolTokenCallback (symbol_token ref)
|
||||
fptr_begin_phrase <- wrapPhraseCallback (begin_phrase ref)
|
||||
fptr_end_phrase <- wrapPhraseCallback (end_phrase ref)
|
||||
fptr_symbol_ne <- wrapSymbolNonExistCallback (symbol_ne exn)
|
||||
fptr_symbol_meta <- wrapSymbolMetaCallback (symbol_meta ref)
|
||||
(#poke PgfLinFuncs, symbol_token) pLinFuncs fptr_symbol_token
|
||||
(#poke PgfLinFuncs, begin_phrase) pLinFuncs fptr_begin_phrase
|
||||
(#poke PgfLinFuncs, end_phrase) pLinFuncs fptr_end_phrase
|
||||
(#poke PgfLinFuncs, symbol_ne) pLinFuncs fptr_symbol_ne
|
||||
(#poke PgfLinFuncs, symbol_bind) pLinFuncs nullPtr
|
||||
(#poke PgfLinFuncs, symbol_capit) pLinFuncs nullPtr
|
||||
(#poke PgfLinFuncs, symbol_meta) pLinFuncs fptr_symbol_meta
|
||||
poke ppLinFuncs pLinFuncs
|
||||
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
|
||||
freeHaskellFunPtr fptr_symbol_token
|
||||
freeHaskellFunPtr fptr_begin_phrase
|
||||
freeHaskellFunPtr fptr_end_phrase
|
||||
freeHaskellFunPtr fptr_symbol_ne
|
||||
freeHaskellFunPtr fptr_symbol_meta
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
|
||||
@@ -919,65 +1381,6 @@ bracketedLinearize lang e = unsafePerformIO $
|
||||
else throwExn exn
|
||||
else do (_,bs) <- readIORef ref
|
||||
return (reverse bs)
|
||||
|
||||
bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]]
|
||||
bracketedLinearizeAll lang e = unsafePerformIO $
|
||||
withGuPool $ \pl ->
|
||||
do exn <- gu_new_exn pl
|
||||
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do touchExpr e
|
||||
throwExn exn
|
||||
else do ref <- newIORef ([],[])
|
||||
bss <- withBracketLinFuncs ref exn $ \ppLinFuncs ->
|
||||
collect ref cts ppLinFuncs exn pl
|
||||
touchExpr e
|
||||
return bss
|
||||
where
|
||||
collect ref cts ppLinFuncs exn pl = withGuPool $ \tmpPl -> do
|
||||
ctree <- alloca $ \ptr -> do gu_enum_next cts ptr tmpPl
|
||||
peek ptr
|
||||
if ctree == nullPtr
|
||||
then return []
|
||||
else do ctree <- pgf_lzr_wrap_linref ctree pl
|
||||
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
|
||||
if is_nonexist
|
||||
then collect ref cts ppLinFuncs exn pl
|
||||
else throwExn exn
|
||||
else do (_,bs) <- readIORef ref
|
||||
writeIORef ref ([],[])
|
||||
bss <- collect ref cts ppLinFuncs exn pl
|
||||
return (reverse bs : bss)
|
||||
|
||||
withBracketLinFuncs ref exn f =
|
||||
allocaBytes (#size PgfLinFuncs) $ \pLinFuncs ->
|
||||
alloca $ \ppLinFuncs -> do
|
||||
fptr_symbol_token <- wrapSymbolTokenCallback (symbol_token ref)
|
||||
fptr_begin_phrase <- wrapPhraseCallback (begin_phrase ref)
|
||||
fptr_end_phrase <- wrapPhraseCallback (end_phrase ref)
|
||||
fptr_symbol_ne <- wrapSymbolNonExistCallback (symbol_ne exn)
|
||||
fptr_symbol_bind <- wrapSymbolBindCallback (symbol_bind ref)
|
||||
fptr_symbol_meta <- wrapSymbolMetaCallback (symbol_meta ref)
|
||||
(#poke PgfLinFuncs, symbol_token) pLinFuncs fptr_symbol_token
|
||||
(#poke PgfLinFuncs, begin_phrase) pLinFuncs fptr_begin_phrase
|
||||
(#poke PgfLinFuncs, end_phrase) pLinFuncs fptr_end_phrase
|
||||
(#poke PgfLinFuncs, symbol_ne) pLinFuncs fptr_symbol_ne
|
||||
(#poke PgfLinFuncs, symbol_bind) pLinFuncs fptr_symbol_bind
|
||||
(#poke PgfLinFuncs, symbol_capit) pLinFuncs nullPtr
|
||||
(#poke PgfLinFuncs, symbol_meta) pLinFuncs fptr_symbol_meta
|
||||
poke ppLinFuncs pLinFuncs
|
||||
res <- f ppLinFuncs
|
||||
freeHaskellFunPtr fptr_symbol_token
|
||||
freeHaskellFunPtr fptr_begin_phrase
|
||||
freeHaskellFunPtr fptr_end_phrase
|
||||
freeHaskellFunPtr fptr_symbol_ne
|
||||
freeHaskellFunPtr fptr_symbol_bind
|
||||
freeHaskellFunPtr fptr_symbol_meta
|
||||
return res
|
||||
where
|
||||
symbol_token ref _ c_token = do
|
||||
(stack,bs) <- readIORef ref
|
||||
@@ -1000,22 +1403,17 @@ withBracketLinFuncs ref exn f =
|
||||
gu_exn_raise exn gu_exn_type_PgfLinNonExist
|
||||
return ()
|
||||
|
||||
symbol_bind ref _ = do
|
||||
(stack,bs) <- readIORef ref
|
||||
writeIORef ref (stack,BIND : bs)
|
||||
return ()
|
||||
|
||||
symbol_meta ref _ meta_id = do
|
||||
(stack,bs) <- readIORef ref
|
||||
writeIORef ref (stack,Leaf "?" : bs)
|
||||
|
||||
throwExn exn = do
|
||||
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekUtf8CString c_msg
|
||||
throwIO (PGFError msg)
|
||||
else do throwIO (PGFError "The abstract tree cannot be linearized")
|
||||
throwExn exn = do
|
||||
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekUtf8CString c_msg
|
||||
throwIO (PGFError msg)
|
||||
else do throwIO (PGFError "The abstract tree cannot be linearized")
|
||||
|
||||
alignWords :: Concr -> Expr -> [(String, [Int])]
|
||||
alignWords lang e = unsafePerformIO $
|
||||
@@ -1128,16 +1526,17 @@ categories p =
|
||||
name <- peekUtf8CString (castPtr key)
|
||||
writeIORef ref $! (name : names)
|
||||
|
||||
categoryContext :: PGF -> Cat -> [Hypo]
|
||||
categoryContext :: PGF -> Cat -> Maybe [Hypo]
|
||||
categoryContext p cat =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \tmpPl ->
|
||||
do c_cat <- newUtf8CString cat tmpPl
|
||||
c_hypos <- pgf_category_context (pgf p) c_cat
|
||||
if c_hypos == nullPtr
|
||||
then return []
|
||||
then return Nothing
|
||||
else do n_hypos <- (#peek GuSeq, len) c_hypos
|
||||
peekHypos (c_hypos `plusPtr` (#offset GuSeq, data)) 0 n_hypos
|
||||
hypos <- peekHypos (c_hypos `plusPtr` (#offset GuSeq, data)) 0 n_hypos
|
||||
return (Just hypos)
|
||||
where
|
||||
peekHypos :: Ptr a -> Int -> Int -> IO [Hypo]
|
||||
peekHypos c_hypo i n
|
||||
@@ -1152,8 +1551,8 @@ categoryContext p cat =
|
||||
toBindType (#const PGF_BIND_TYPE_EXPLICIT) = Explicit
|
||||
toBindType (#const PGF_BIND_TYPE_IMPLICIT) = Implicit
|
||||
|
||||
categoryProb :: PGF -> Cat -> Float
|
||||
categoryProb p cat =
|
||||
categoryProbability :: PGF -> Cat -> Float
|
||||
categoryProbability p cat =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \tmpPl ->
|
||||
do c_cat <- newUtf8CString cat tmpPl
|
||||
@@ -1164,7 +1563,7 @@ categoryProb p cat =
|
||||
-----------------------------------------------------------------------------
|
||||
-- Helper functions
|
||||
|
||||
fromPgfExprEnum :: Ptr GuEnum -> ForeignPtr GuPool -> IO () -> IO [(Expr, Float)]
|
||||
fromPgfExprEnum :: Ptr GuEnum -> ForeignPtr GuPool -> Touch -> IO [(Expr, Float)]
|
||||
fromPgfExprEnum enum fpl touch =
|
||||
do pgfExprProb <- alloca $ \ptr ->
|
||||
withForeignPtr fpl $ \pl ->
|
||||
@@ -1178,6 +1577,22 @@ fromPgfExprEnum enum fpl touch =
|
||||
prob <- (#peek PgfExprProb, prob) pgfExprProb
|
||||
return ((Expr expr touch,prob) : ts)
|
||||
|
||||
fromPgfTokenEnum :: Ptr GuEnum -> ForeignPtr GuPool -> IO [(String, Cat, Fun, Float)]
|
||||
fromPgfTokenEnum enum fpl =
|
||||
do pgfTokenProb <- alloca $ \ptr ->
|
||||
withForeignPtr fpl $ \pl ->
|
||||
do gu_enum_next enum ptr pl
|
||||
peek ptr
|
||||
if pgfTokenProb == nullPtr
|
||||
then do finalizeForeignPtr fpl
|
||||
return []
|
||||
else do tok <- (#peek PgfTokenProb, tok) pgfTokenProb >>= peekUtf8CString
|
||||
cat <- (#peek PgfTokenProb, cat) pgfTokenProb >>= peekUtf8CString
|
||||
fun <- (#peek PgfTokenProb, fun) pgfTokenProb >>= peekUtf8CString
|
||||
prob <- (#peek PgfTokenProb, prob) pgfTokenProb
|
||||
ts <- unsafeInterleaveIO (fromPgfTokenEnum enum fpl)
|
||||
return ((tok,cat,fun,prob) : ts)
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
-- Exceptions
|
||||
|
||||
@@ -1256,3 +1671,7 @@ capitalized' test s@(c:_) | test c =
|
||||
case span isSpace rest1 of
|
||||
(space,rest2) -> Just (name++space,rest2)
|
||||
capitalized' not s = Nothing
|
||||
|
||||
tag i
|
||||
| i < 0 = char 'r' <> int (negate i)
|
||||
| otherwise = char 'n' <> int i
|
||||
|
||||
@@ -8,19 +8,13 @@ import Foreign.C
|
||||
import Data.IORef
|
||||
import PGF2.FFI
|
||||
|
||||
-- | An data type that represents
|
||||
-- identifiers for functions and categories in PGF.
|
||||
type CId = String
|
||||
|
||||
wildCId = "_" :: CId
|
||||
|
||||
type Cat = CId -- ^ Name of syntactic category
|
||||
type Fun = CId -- ^ Name of function
|
||||
type Cat = String -- ^ Name of syntactic category
|
||||
type Fun = String -- ^ Name of function
|
||||
|
||||
data BindType =
|
||||
Explicit
|
||||
| Implicit
|
||||
deriving Show
|
||||
deriving (Show, Eq, Ord)
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
-- Expressions
|
||||
@@ -43,7 +37,7 @@ instance Eq Expr where
|
||||
return (res /= 0)
|
||||
|
||||
-- | Constructs an expression by lambda abstraction
|
||||
mkAbs :: BindType -> CId -> Expr -> Expr
|
||||
mkAbs :: BindType -> String -> Expr -> Expr
|
||||
mkAbs bind_type var (Expr body bodyTouch) =
|
||||
unsafePerformIO $ do
|
||||
exprPl <- gu_new_pool
|
||||
@@ -58,7 +52,7 @@ mkAbs bind_type var (Expr body bodyTouch) =
|
||||
Implicit -> (#const PGF_BIND_TYPE_IMPLICIT)
|
||||
|
||||
-- | Decomposes an expression into an abstraction and a body
|
||||
unAbs :: Expr -> Maybe (BindType, CId, Expr)
|
||||
unAbs :: Expr -> Maybe (BindType, String, Expr)
|
||||
unAbs (Expr expr touch) =
|
||||
unsafePerformIO $ do
|
||||
c_abs <- pgf_expr_unabs expr
|
||||
@@ -103,6 +97,17 @@ unApp (Expr expr touch) =
|
||||
c_args <- peekArray (fromIntegral arity) (appl `plusPtr` (#offset PgfApplication, args))
|
||||
return $ Just (fun, [Expr c_arg touch | c_arg <- c_args])
|
||||
|
||||
-- | Decomposes an expression into an application of a function
|
||||
unapply :: Expr -> (Expr,[Expr])
|
||||
unapply (Expr expr touch) =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \pl -> do
|
||||
appl <- pgf_expr_unapply_ex expr pl
|
||||
efun <- (#peek PgfApplication, efun) appl
|
||||
arity <- (#peek PgfApplication, n_args) appl :: IO CInt
|
||||
c_args <- peekArray (fromIntegral arity) (appl `plusPtr` (#offset PgfApplication, args))
|
||||
return (Expr efun touch, [Expr c_arg touch | c_arg <- c_args])
|
||||
|
||||
-- | Constructs an expression from a string literal
|
||||
mkStr :: String -> Expr
|
||||
mkStr str =
|
||||
@@ -184,9 +189,6 @@ unMeta (Expr expr touch) =
|
||||
touch
|
||||
return (Just (fromIntegral (id :: CInt)))
|
||||
|
||||
-- | this functions is only for backward compatibility with the old Haskell runtime
|
||||
mkCId x = x
|
||||
|
||||
-- | parses a 'String' as an expression
|
||||
readExpr :: String -> Maybe Expr
|
||||
readExpr str =
|
||||
@@ -204,6 +206,22 @@ readExpr str =
|
||||
else do gu_pool_free exprPl
|
||||
return Nothing
|
||||
|
||||
pIdent :: ReadS String
|
||||
pIdent str =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \tmpPl ->
|
||||
do ref <- newIORef (str,str,str)
|
||||
exn <- gu_new_exn tmpPl
|
||||
c_fetch_char <- wrapParserGetc (fetch_char ref)
|
||||
c_parser <- pgf_new_parser nullPtr c_fetch_char tmpPl tmpPl exn
|
||||
c_ident <- pgf_expr_parser_ident c_parser
|
||||
status <- gu_exn_is_raised exn
|
||||
if (not status && c_ident /= nullPtr)
|
||||
then do ident <- peekUtf8CString c_ident
|
||||
(str,_,_) <- readIORef ref
|
||||
return [(ident,str)]
|
||||
else do return []
|
||||
|
||||
pExpr :: ReadS Expr
|
||||
pExpr str =
|
||||
unsafePerformIO $
|
||||
@@ -221,19 +239,19 @@ pExpr str =
|
||||
return [(Expr c_expr (touchForeignPtr exprFPl),str)]
|
||||
else do gu_pool_free exprPl
|
||||
return []
|
||||
where
|
||||
fetch_char :: IORef (String,String,String) -> Ptr () -> (#type bool) -> Ptr GuExn -> IO (#type GuUCS)
|
||||
fetch_char ref _ mark exn = do
|
||||
(str1,str2,str3) <- readIORef ref
|
||||
let str1' = if mark /= 0
|
||||
then str2
|
||||
else str1
|
||||
case str3 of
|
||||
[] -> do writeIORef ref (str1',str3,[])
|
||||
gu_exn_raise exn gu_exn_type_GuEOF
|
||||
return (-1)
|
||||
(c:cs) -> do writeIORef ref (str1',str3,cs)
|
||||
return ((fromIntegral . fromEnum) c)
|
||||
|
||||
fetch_char :: IORef (String,String,String) -> Ptr () -> (#type bool) -> Ptr GuExn -> IO (#type GuUCS)
|
||||
fetch_char ref _ mark exn = do
|
||||
(str1,str2,str3) <- readIORef ref
|
||||
let str1' = if mark /= 0
|
||||
then str2
|
||||
else str1
|
||||
case str3 of
|
||||
[] -> do writeIORef ref (str1',str3,[])
|
||||
gu_exn_raise exn gu_exn_type_GuEOF
|
||||
return (-1)
|
||||
(c:cs) -> do writeIORef ref (str1',str3,cs)
|
||||
return ((fromIntegral . fromEnum) c)
|
||||
|
||||
foreign import ccall "pgf/expr.h pgf_new_parser"
|
||||
pgf_new_parser :: Ptr () -> (FunPtr ParserGetc) -> Ptr GuPool -> Ptr GuPool -> Ptr GuExn -> IO (Ptr PgfExprParser)
|
||||
@@ -241,16 +259,20 @@ foreign import ccall "pgf/expr.h pgf_new_parser"
|
||||
foreign import ccall "pgf/expr.h pgf_expr_parser_expr"
|
||||
pgf_expr_parser_expr :: Ptr PgfExprParser -> (#type bool) -> IO PgfExpr
|
||||
|
||||
foreign import ccall "pgf/expr.h pgf_expr_parser_ident"
|
||||
pgf_expr_parser_ident :: Ptr PgfExprParser -> IO CString
|
||||
|
||||
type ParserGetc = Ptr () -> (#type bool) -> Ptr GuExn -> IO (#type GuUCS)
|
||||
|
||||
foreign import ccall "wrapper"
|
||||
wrapParserGetc :: ParserGetc -> IO (FunPtr ParserGetc)
|
||||
|
||||
|
||||
-- | renders an expression as a 'String'. The list
|
||||
-- of identifiers is the list of all free variables
|
||||
-- in the expression in order reverse to the order
|
||||
-- of binding.
|
||||
showExpr :: [CId] -> Expr -> String
|
||||
showExpr :: [String] -> Expr -> String
|
||||
showExpr scope e =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \tmpPl ->
|
||||
|
||||
@@ -15,12 +15,13 @@ import Control.Exception
|
||||
import GHC.Ptr
|
||||
import Data.Int
|
||||
import Data.Word
|
||||
import qualified Data.Map as Map
|
||||
|
||||
type Touch = IO ()
|
||||
|
||||
-- | An abstract data type representing multilingual grammar
|
||||
-- in Portable Grammar Format.
|
||||
data PGF = PGF {pgf :: Ptr PgfPGF, touchPGF :: Touch}
|
||||
data PGF = PGF {pgf :: Ptr PgfPGF, langs :: Map.Map String Concr, touchPGF :: Touch}
|
||||
data Concr = Concr {concr :: Ptr PgfConcr, touchConcr :: Touch}
|
||||
|
||||
------------------------------------------------------------------
|
||||
@@ -32,7 +33,6 @@ data GuIn
|
||||
data GuOut
|
||||
data GuKind
|
||||
data GuType
|
||||
data GuString
|
||||
data GuStringBuf
|
||||
data GuMap
|
||||
data GuMapItor
|
||||
@@ -266,7 +266,13 @@ foreign import ccall "pgf/pgf.h pgf_read"
|
||||
pgf_read :: CString -> Ptr GuPool -> Ptr GuExn -> IO (Ptr PgfPGF)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_write"
|
||||
pgf_write :: Ptr PgfPGF -> CString -> Ptr GuExn -> IO ()
|
||||
pgf_write :: Ptr PgfPGF -> CSizeT -> Ptr (Ptr PgfConcr) -> CString -> Ptr GuExn -> IO ()
|
||||
|
||||
foreign import ccall "pgf/writer.h pgf_concrete_save"
|
||||
pgf_concrete_save :: Ptr PgfConcr -> CString -> Ptr GuExn -> IO ()
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_have_same_abstract"
|
||||
pgf_have_same_abstract :: Ptr PgfPGF -> Ptr PgfPGF -> (#type bool)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_abstract_name"
|
||||
pgf_abstract_name :: Ptr PgfPGF -> IO CString
|
||||
@@ -292,6 +298,9 @@ foreign import ccall "pgf/pgf.h pgf_language_code"
|
||||
foreign import ccall "pgf/pgf.h pgf_iter_categories"
|
||||
pgf_iter_categories :: Ptr PgfPGF -> Ptr GuMapItor -> Ptr GuExn -> IO ()
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_concrete_fix_internals"
|
||||
pgf_concrete_fix_internals :: Ptr PgfConcr -> IO ()
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_start_cat"
|
||||
pgf_start_cat :: Ptr PgfPGF -> Ptr GuPool -> IO PgfType
|
||||
|
||||
@@ -340,7 +349,6 @@ foreign import ccall "pgf/pgf.h pgf_lzr_get_table"
|
||||
type SymbolTokenCallback = Ptr (Ptr PgfLinFuncs) -> CString -> IO ()
|
||||
type PhraseCallback = Ptr (Ptr PgfLinFuncs) -> CString -> CInt -> CSizeT -> CString -> IO ()
|
||||
type NonExistCallback = Ptr (Ptr PgfLinFuncs) -> IO ()
|
||||
type BindCallback = Ptr (Ptr PgfLinFuncs) -> IO ()
|
||||
type MetaCallback = Ptr (Ptr PgfLinFuncs) -> CInt -> IO ()
|
||||
|
||||
foreign import ccall "wrapper"
|
||||
@@ -352,9 +360,6 @@ foreign import ccall "wrapper"
|
||||
foreign import ccall "wrapper"
|
||||
wrapSymbolNonExistCallback :: NonExistCallback -> IO (FunPtr NonExistCallback)
|
||||
|
||||
foreign import ccall "wrapper"
|
||||
wrapSymbolBindCallback :: BindCallback -> IO (FunPtr BindCallback)
|
||||
|
||||
foreign import ccall "wrapper"
|
||||
wrapSymbolMetaCallback :: MetaCallback -> IO (FunPtr MetaCallback)
|
||||
|
||||
@@ -364,6 +369,9 @@ foreign import ccall "pgf/pgf.h pgf_align_words"
|
||||
foreign import ccall "pgf/pgf.h pgf_parse_with_heuristics"
|
||||
pgf_parse_with_heuristics :: Ptr PgfConcr -> PgfType -> CString -> Double -> Ptr PgfCallbacksMap -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_complete"
|
||||
pgf_complete :: Ptr PgfConcr -> PgfType -> CString -> CString -> Ptr GuExn -> Ptr GuPool -> IO (Ptr GuEnum)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_lookup_sentence"
|
||||
pgf_lookup_sentence :: Ptr PgfConcr -> PgfType -> CString -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
|
||||
|
||||
@@ -425,6 +433,9 @@ foreign import ccall "pgf/pgf.h pgf_expr_apply"
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_expr_unapply"
|
||||
pgf_expr_unapply :: PgfExpr -> Ptr GuPool -> IO (Ptr PgfApplication)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_expr_unapply_ex"
|
||||
pgf_expr_unapply_ex :: PgfExpr -> Ptr GuPool -> IO (Ptr PgfApplication)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_expr_abs"
|
||||
pgf_expr_abs :: PgfBindType -> CString -> PgfExpr -> Ptr GuPool -> IO PgfExpr
|
||||
@@ -450,12 +461,12 @@ foreign import ccall "pgf/pgf.h pgf_expr_float"
|
||||
foreign import ccall "pgf/pgf.h pgf_expr_unlit"
|
||||
pgf_expr_unlit :: PgfExpr -> CInt -> IO (Ptr a)
|
||||
|
||||
foreign import ccall "pgf/expr.h pgf_expr_arity"
|
||||
pgf_expr_arity :: PgfExpr -> IO CInt
|
||||
|
||||
foreign import ccall "pgf/expr.h pgf_expr_eq"
|
||||
pgf_expr_eq :: PgfExpr -> PgfExpr -> IO CInt
|
||||
|
||||
foreign import ccall "pgf/expr.h pgf_type_eq"
|
||||
pgf_type_eq :: PgfType -> PgfType -> IO (#type bool)
|
||||
|
||||
foreign import ccall "pgf/expr.h pgf_expr_hash"
|
||||
pgf_expr_hash :: GuHash -> PgfExpr -> IO GuHash
|
||||
|
||||
@@ -499,7 +510,7 @@ foreign import ccall "pgf/pgf.h pgf_generate_all"
|
||||
pgf_generate_all :: Ptr PgfPGF -> PgfType -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_print"
|
||||
pgf_print :: Ptr PgfPGF -> Ptr GuOut -> Ptr GuExn -> IO ()
|
||||
pgf_print :: Ptr PgfPGF -> CSizeT -> Ptr (Ptr PgfConcr) -> Ptr GuOut -> Ptr GuExn -> IO ()
|
||||
|
||||
foreign import ccall "pgf/expr.h pgf_read_expr"
|
||||
pgf_read_expr :: Ptr GuIn -> Ptr GuPool -> Ptr GuPool -> Ptr GuExn -> IO PgfExpr
|
||||
|
||||
@@ -2,18 +2,25 @@
|
||||
|
||||
module PGF2.Internal(-- * Access the internal structures
|
||||
FId,isPredefFId,
|
||||
FunId,Token,Production(..),PArg(..),Symbol(..),Literal(..),
|
||||
FunId,SeqId,Token,Production(..),PArg(..),Symbol(..),Literal(..),
|
||||
globalFlags, abstrFlags, concrFlags,
|
||||
concrTotalCats, concrCategories, concrProductions,
|
||||
concrTotalFuns, concrFunction,
|
||||
concrTotalSeqs, concrSequence,
|
||||
|
||||
|
||||
-- * Byte code
|
||||
CodeLabel, Instr(..), IVal(..), TailInfo(..),
|
||||
|
||||
-- * Building new PGFs in memory
|
||||
build, eAbs, eApp, eMeta, eFun, eVar, eTyped, eImplArg, dTyp, hypo,
|
||||
build, Builder, B,
|
||||
eAbs, eApp, eMeta, eFun, eVar, eLit, eTyped, eImplArg, dTyp, hypo,
|
||||
AbstrInfo, newAbstr, ConcrInfo, newConcr, newPGF,
|
||||
|
||||
-- * Write an in-memory PGF to a file
|
||||
writePGF
|
||||
unionPGF, writePGF, writeConcr,
|
||||
|
||||
-- * Predefined concrete categories
|
||||
fidString, fidInt, fidFloat, fidVar, fidStart
|
||||
) where
|
||||
|
||||
#include <pgf/data.h>
|
||||
@@ -29,7 +36,7 @@ import Data.IORef
|
||||
import Data.Maybe(fromMaybe)
|
||||
import Data.List(sortBy)
|
||||
import Control.Exception(Exception,throwIO)
|
||||
import Control.Monad(foldM)
|
||||
import Control.Monad(foldM,when)
|
||||
import qualified Data.Map as Map
|
||||
|
||||
type Token = String
|
||||
@@ -50,7 +57,7 @@ data Production
|
||||
= PApply {-# UNPACK #-} !FunId [PArg]
|
||||
| PCoerce {-# UNPACK #-} !FId
|
||||
deriving (Eq,Ord,Show)
|
||||
data PArg = PArg [FId] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
|
||||
data PArg = PArg [(FId,FId)] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
|
||||
type FunId = Int
|
||||
type SeqId = Int
|
||||
data Literal =
|
||||
@@ -59,6 +66,42 @@ data Literal =
|
||||
| LFlt Double -- ^ a floating point constant
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
type CodeLabel = Int
|
||||
|
||||
data Instr
|
||||
= CHECK_ARGS {-# UNPACK #-} !Int
|
||||
| CASE Fun {-# UNPACK #-} !CodeLabel
|
||||
| CASE_LIT Literal {-# UNPACK #-} !CodeLabel
|
||||
| SAVE {-# UNPACK #-} !Int
|
||||
| ALLOC {-# UNPACK #-} !Int
|
||||
| PUT_CONSTR Fun
|
||||
| PUT_CLOSURE {-# UNPACK #-} !CodeLabel
|
||||
| PUT_LIT Literal
|
||||
| SET IVal
|
||||
| SET_PAD
|
||||
| PUSH_FRAME
|
||||
| PUSH IVal
|
||||
| TUCK IVal {-# UNPACK #-} !Int
|
||||
| EVAL IVal TailInfo
|
||||
| DROP {-# UNPACK #-} !Int
|
||||
| JUMP {-# UNPACK #-} !CodeLabel
|
||||
| FAIL
|
||||
| PUSH_ACCUM Literal
|
||||
| POP_ACCUM
|
||||
| ADD
|
||||
|
||||
data IVal
|
||||
= HEAP {-# UNPACK #-} !Int
|
||||
| ARG_VAR {-# UNPACK #-} !Int
|
||||
| FREE_VAR {-# UNPACK #-} !Int
|
||||
| GLOBAL Fun
|
||||
deriving Eq
|
||||
|
||||
data TailInfo
|
||||
= RecCall
|
||||
| TailCall {-# UNPACK #-} !Int
|
||||
| UpdateCall
|
||||
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
-- Access the internal structures
|
||||
@@ -181,7 +224,7 @@ concrProductions c fid = unsafePerformIO $ do
|
||||
hypos <- peekSequence (deRef peekFId) (#size int) c_hypos
|
||||
c_ccat <- (#peek PgfPArg, ccat) ptr
|
||||
fid <- peekFId c_ccat
|
||||
return (PArg hypos fid)
|
||||
return (PArg [(fid,fid) | fid <- hypos] fid)
|
||||
|
||||
peekFId c_ccat = do
|
||||
c_fid <- (#peek PgfCCat, fid) c_ccat
|
||||
@@ -197,6 +240,9 @@ concrTotalFuns c = unsafePerformIO $ do
|
||||
concrFunction :: Concr -> FunId -> (Fun,[SeqId])
|
||||
concrFunction c funid = unsafePerformIO $ do
|
||||
c_cncfuns <- (#peek PgfConcr, cncfuns) (concr c)
|
||||
c_len <- (#peek GuSeq, len) c_cncfuns
|
||||
when (funid >= fromIntegral (c_len :: CSizeT)) $
|
||||
throwIO (PGFError ("Invalid concrete function: F"++show funid))
|
||||
c_cncfun <- peek (c_cncfuns `plusPtr` ((#offset GuSeq, data)+funid*(#size PgfCncFun*)))
|
||||
c_absfun <- (#peek PgfCncFun, absfun) c_cncfun
|
||||
c_name <- (#peek PgfAbsFun, name) c_absfun
|
||||
@@ -220,6 +266,9 @@ concrTotalSeqs c = unsafePerformIO $ do
|
||||
concrSequence :: Concr -> SeqId -> [Symbol]
|
||||
concrSequence c seqid = unsafePerformIO $ do
|
||||
c_sequences <- (#peek PgfConcr, sequences) (concr c)
|
||||
c_len <- (#peek GuSeq, len) c_sequences
|
||||
when (seqid >= fromIntegral (c_len :: CSizeT)) $
|
||||
throwIO (PGFError ("Invalid concrete sequence: S"++show seqid))
|
||||
let c_sequence = c_sequences `plusPtr` ((#offset GuSeq, data)+seqid*(#size PgfSequence))
|
||||
c_syms <- (#peek PgfSequence, syms) c_sequence
|
||||
res <- peekSequence (deRef peekSymbol) (#size GuVariant) c_syms
|
||||
@@ -288,6 +337,9 @@ isPredefFId = (`elem` [fidString, fidInt, fidFloat, fidVar])
|
||||
data Builder s = Builder (Ptr GuPool) Touch
|
||||
newtype B s a = B a
|
||||
|
||||
instance Functor (B s) where
|
||||
fmap f (B x) = B (f x)
|
||||
|
||||
build :: (forall s . (?builder :: Builder s) => B s a) -> a
|
||||
build f =
|
||||
unsafePerformIO $ do
|
||||
@@ -376,6 +428,21 @@ eVar var =
|
||||
where
|
||||
(Builder pool touch) = ?builder
|
||||
|
||||
eLit :: (?builder :: Builder s) => Literal -> B s Expr
|
||||
eLit value =
|
||||
unsafePerformIO $
|
||||
alloca $ \pptr -> do
|
||||
ptr <- gu_alloc_variant (#const PGF_EXPR_LIT)
|
||||
(fromIntegral (#size PgfExprLit))
|
||||
(#const gu_alignof(PgfExprLit))
|
||||
pptr pool
|
||||
c_value <- newLiteral value pool
|
||||
(#poke PgfExprLit, lit) ptr c_value
|
||||
e <- peek pptr
|
||||
return (B (Expr e touch))
|
||||
where
|
||||
(Builder pool touch) = ?builder
|
||||
|
||||
eTyped :: (?builder :: Builder s) => B s Expr -> B s Type -> B s Expr
|
||||
eTyped (B (Expr e _)) (B (Type ty _)) =
|
||||
unsafePerformIO $
|
||||
@@ -405,7 +472,7 @@ eImplArg (B (Expr e _)) =
|
||||
where
|
||||
(Builder pool touch) = ?builder
|
||||
|
||||
hypo :: BindType -> CId -> B s Type -> (B s Hypo)
|
||||
hypo :: BindType -> String -> B s Type -> (B s Hypo)
|
||||
hypo bind_type var (B ty) = B (bind_type,var,ty)
|
||||
|
||||
dTyp :: (?builder :: Builder s) => [B s Hypo] -> Cat -> [B s Expr] -> B s Type
|
||||
@@ -450,14 +517,14 @@ data AbstrInfo = AbstrInfo (Ptr GuSeq) (Ptr GuSeq) (Map.Map String (Ptr PgfAbsCa
|
||||
newAbstr :: (?builder :: Builder s) => [(String,Literal)] ->
|
||||
[(Cat,[B s Hypo],Float)] ->
|
||||
[(Fun,B s Type,Int,Float)] ->
|
||||
AbstrInfo
|
||||
B s AbstrInfo
|
||||
newAbstr aflags cats funs = unsafePerformIO $ do
|
||||
c_aflags <- newFlags aflags pool
|
||||
(c_cats,abscats) <- newAbsCats (sortByFst3 cats) pool
|
||||
(c_funs,absfuns) <- newAbsFuns (sortByFst4 funs) pool
|
||||
c_abs_lin_fun <- newAbsLinFun
|
||||
c_non_lexical_buf <- gu_make_buf (#size PgfProductionIdxEntry) pool
|
||||
return (AbstrInfo c_aflags c_cats abscats c_funs absfuns c_abs_lin_fun c_non_lexical_buf touch)
|
||||
return (B (AbstrInfo c_aflags c_cats abscats c_funs absfuns c_abs_lin_fun c_non_lexical_buf touch))
|
||||
where
|
||||
(Builder pool touch) = ?builder
|
||||
|
||||
@@ -525,7 +592,7 @@ newAbstr aflags cats funs = unsafePerformIO $ do
|
||||
|
||||
data ConcrInfo = ConcrInfo (Ptr GuSeq) (Ptr GuMap) (Ptr GuMap) (Ptr GuSeq) (Ptr GuSeq) (Ptr GuMap) (Ptr PgfConcr -> Ptr GuPool -> IO ()) CInt
|
||||
|
||||
newConcr :: (?builder :: Builder s) => AbstrInfo ->
|
||||
newConcr :: (?builder :: Builder s) => B s AbstrInfo ->
|
||||
[(String,Literal)] -> -- ^ Concrete syntax flags
|
||||
[(String,String)] -> -- ^ Printnames
|
||||
[(FId,[FunId])] -> -- ^ Lindefs
|
||||
@@ -535,8 +602,8 @@ newConcr :: (?builder :: Builder s) => AbstrInfo ->
|
||||
[[Symbol]] -> -- ^ Sequences (must be sorted)
|
||||
[(Cat,FId,FId,[String])] -> -- ^ Concrete categories
|
||||
FId -> -- ^ The total count of the categories
|
||||
ConcrInfo
|
||||
newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cflags printnames lindefs linrefs prods cncfuns sequences cnccats total_cats = unsafePerformIO $ do
|
||||
B s ConcrInfo
|
||||
newConcr (B (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _)) cflags printnames lindefs linrefs prods cncfuns sequences cnccats total_cats = unsafePerformIO $ do
|
||||
c_cflags <- newFlags cflags pool
|
||||
c_printname <- newMap (#size GuString) gu_string_hasher newUtf8CString
|
||||
(#size GuString) (pokeString pool)
|
||||
@@ -553,12 +620,12 @@ newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cf
|
||||
mapM_ (addLinrefs c_ccats funs_ptr) linrefs
|
||||
mk_index <- foldM (addProductions c_ccats funs_ptr c_non_lexical_buf) (\concr pool -> return ()) prods
|
||||
c_cnccats <- newMap (#size GuString) gu_string_hasher newUtf8CString (#size PgfCncCat*) (pokeCncCat c_ccats) (map (\v@(k,_,_,_) -> (k,v)) cnccats) pool
|
||||
return (ConcrInfo c_cflags c_printname c_ccats c_cncfuns c_seqs c_cnccats mk_index (fromIntegral total_cats))
|
||||
return (B (ConcrInfo c_cflags c_printname c_ccats c_cncfuns c_seqs c_cnccats mk_index (fromIntegral total_cats)))
|
||||
where
|
||||
(Builder pool touch) = ?builder
|
||||
|
||||
pokeCncFun seqs_ptr ptr cncfun = do
|
||||
c_cncfun <- newCncFun absfuns nullPtr cncfun pool
|
||||
pokeCncFun seqs_ptr ptr cncfun@(funid,_) = do
|
||||
c_cncfun <- newCncFun absfuns seqs_ptr cncfun pool
|
||||
poke ptr c_cncfun
|
||||
|
||||
pokeSequence c_seq syms = do
|
||||
@@ -583,7 +650,9 @@ newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cf
|
||||
(#poke PgfCCat, prods) c_ccat c_prods
|
||||
pokeProductions c_ccat (c_prods `plusPtr` (#offset GuSeq, data)) 0 (n_prods-1) mk_index prods
|
||||
where
|
||||
pokeProductions c_ccat ptr top bot mk_index [] = return mk_index
|
||||
pokeProductions c_ccat ptr top bot mk_index [] = do
|
||||
(#poke PgfCCat, n_synprods) c_ccat (fromIntegral top :: CSizeT)
|
||||
return mk_index
|
||||
pokeProductions c_ccat ptr top bot mk_index (prod:prods) = do
|
||||
(is_lexical,c_prod) <- newProduction c_ccats funs_ptr c_non_lexical_buf prod pool
|
||||
let mk_index' = \concr pool -> do pgf_parser_index concr c_ccat c_prod is_lexical pool
|
||||
@@ -596,27 +665,29 @@ newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cf
|
||||
pokeProductions c_ccat ptr top (bot-1) mk_index' prods
|
||||
|
||||
pokeRefDefFunId funs_ptr ptr funid = do
|
||||
let c_fun = funs_ptr `plusPtr` (funid * (#size PgfCncFun))
|
||||
c_fun <- peek (funs_ptr `plusPtr` (funid * (#size PgfCncFun*)))
|
||||
(#poke PgfCncFun, absfun) c_fun c_abs_lin_fun
|
||||
poke ptr c_fun
|
||||
|
||||
pokeCncCat c_ccats ptr (name,start,end,labels) = do
|
||||
let n_lins = fromIntegral (length labels) :: CSizeT
|
||||
c_cnccat <- gu_malloc_aligned pool
|
||||
c_cnccat <- gu_malloc_aligned pool
|
||||
((#size PgfCncCat)+n_lins*(#size GuString))
|
||||
(#const gu_flex_alignof(PgfCncCat))
|
||||
case Map.lookup name abscats of
|
||||
Just c_abscat -> (#poke PgfCncCat, abscat) c_cnccat c_abscat
|
||||
Nothing -> throwIO (PGFError ("The category "++name++" is not in the abstract syntax"))
|
||||
c_ccats <- newSequence (#size PgfCCat*) pokeFId [start..end] pool
|
||||
c_ccats <- newSequence (#size PgfCCat*) (pokeFId c_cnccat) [start..end] pool
|
||||
(#poke PgfCncCat, cats) c_cnccat c_ccats
|
||||
(#poke PgfCncCat, n_lins) c_cnccat n_lins
|
||||
pokeLabels (c_cnccat `plusPtr` (#offset PgfCncCat, labels)) labels
|
||||
poke ptr c_cnccat
|
||||
where
|
||||
pokeFId ptr fid = do
|
||||
pokeFId c_cnccat ptr fid = do
|
||||
c_ccat <- getCCat c_ccats fid pool
|
||||
(#poke PgfCCat, cnccat) c_ccat c_cnccat
|
||||
poke ptr c_ccat
|
||||
|
||||
|
||||
pokeLabels ptr [] = return []
|
||||
pokeLabels ptr (l:ls) = do
|
||||
c_l <- newUtf8CString l pool
|
||||
@@ -626,10 +697,10 @@ newConcr (AbstrInfo _ _ abscats _ absfuns c_abs_lin_fun c_non_lexical_buf _) cf
|
||||
|
||||
newPGF :: (?builder :: Builder s) => [(String,Literal)] ->
|
||||
AbsName ->
|
||||
AbstrInfo ->
|
||||
[(ConcName,ConcrInfo)] ->
|
||||
B s AbstrInfo ->
|
||||
[(ConcName,B s ConcrInfo)] ->
|
||||
B s PGF
|
||||
newPGF gflags absname (AbstrInfo c_aflags c_cats _ c_funs _ c_abs_lin_fun _ _) concrs =
|
||||
newPGF gflags absname (B (AbstrInfo c_aflags c_cats _ c_funs _ c_abs_lin_fun _ _)) concrs =
|
||||
unsafePerformIO $ do
|
||||
ptr <- gu_malloc_aligned pool
|
||||
(#size PgfPGF)
|
||||
@@ -637,7 +708,8 @@ newPGF gflags absname (AbstrInfo c_aflags c_cats _ c_funs _ c_abs_lin_fun _ _) c
|
||||
c_gflags <- newFlags gflags pool
|
||||
c_absname <- newUtf8CString absname pool
|
||||
let c_abstr = ptr `plusPtr` (#offset PgfPGF, abstract)
|
||||
c_concrs <- newSequence (#size PgfConcr) (pokeConcr c_abstr) concrs pool
|
||||
c_concrs <- gu_make_seq (#size PgfConcr) (fromIntegral (length concrs)) pool
|
||||
langs <- pokeConcrs c_abstr (c_concrs `plusPtr` (#offset GuSeq, data)) Map.empty concrs
|
||||
(#poke PgfPGF, major_version) ptr (2 :: (#type uint16_t))
|
||||
(#poke PgfPGF, minor_version) ptr (0 :: (#type uint16_t))
|
||||
(#poke PgfPGF, gflags) ptr c_gflags
|
||||
@@ -648,11 +720,18 @@ newPGF gflags absname (AbstrInfo c_aflags c_cats _ c_funs _ c_abs_lin_fun _ _) c
|
||||
(#poke PgfPGF, abstract.abs_lin_fun) ptr c_abs_lin_fun
|
||||
(#poke PgfPGF, concretes) ptr c_concrs
|
||||
(#poke PgfPGF, pool) ptr pool
|
||||
return (B (PGF ptr touch))
|
||||
return (B (PGF ptr langs touch))
|
||||
where
|
||||
(Builder pool touch) = ?builder
|
||||
|
||||
pokeConcr c_abstr ptr (name, ConcrInfo c_cflags c_printnames c_ccats c_cncfuns c_seqs c_cnccats mk_index c_total_cats) = do
|
||||
pokeConcrs c_abstr ptr langs [] = return langs
|
||||
pokeConcrs c_abstr ptr langs ((name, B info):xs) = do
|
||||
pokeConcr c_abstr ptr name info
|
||||
pokeConcrs c_abstr (ptr `plusPtr` (fromIntegral (#size PgfConcr)))
|
||||
(Map.insert name (Concr ptr touch) langs)
|
||||
xs
|
||||
|
||||
pokeConcr c_abstr ptr name (ConcrInfo c_cflags c_printnames c_ccats c_cncfuns c_seqs c_cnccats mk_index c_total_cats) = do
|
||||
c_name <- newUtf8CString name pool
|
||||
c_fun_indices <- gu_make_map (#size GuString) gu_string_hasher
|
||||
(#size PgfCncOverloadMap*) gu_null_struct
|
||||
@@ -674,7 +753,9 @@ newPGF gflags absname (AbstrInfo c_aflags c_cats _ c_funs _ c_abs_lin_fun _ _) c
|
||||
(#poke PgfConcr, cnccats) ptr c_cnccats
|
||||
(#poke PgfConcr, total_cats) ptr c_total_cats
|
||||
(#poke PgfConcr, pool) ptr nullPtr
|
||||
|
||||
mk_index ptr pool
|
||||
pgf_concrete_fix_internals ptr
|
||||
|
||||
|
||||
newFlags :: [(String,Literal)] -> Ptr GuPool -> IO (Ptr GuSeq)
|
||||
@@ -715,15 +796,15 @@ newLiteral (LFlt val) pool =
|
||||
|
||||
|
||||
newProduction :: Ptr GuMap -> Ptr PgfCncFun -> Ptr GuBuf -> Production -> Ptr GuPool -> IO ((#type bool), GuVariant)
|
||||
newProduction c_ccats funs_ptr c_non_lexical_buf (PApply fun_id args) pool =
|
||||
newProduction c_ccats funs_ptr c_non_lexical_buf (PApply funid args) pool =
|
||||
alloca $ \pptr -> do
|
||||
let c_fun = funs_ptr `plusPtr` (fun_id * (#size PgfCncFun))
|
||||
c_fun <- peek (funs_ptr `plusPtr` (funid * (#size PgfCncFun*)))
|
||||
c_args <- newSequence (#size PgfPArg) pokePArg args pool
|
||||
ptr <- gu_alloc_variant (#const PGF_PRODUCTION_APPLY)
|
||||
(fromIntegral (#size PgfProductionApply))
|
||||
(#const gu_alignof(PgfProductionApply))
|
||||
pptr pool
|
||||
(#poke PgfProductionApply, fun) ptr c_fun
|
||||
(#poke PgfProductionApply, fun) ptr (c_fun :: Ptr PgfCncFun)
|
||||
(#poke PgfProductionApply, args) ptr c_args
|
||||
is_lexical <- pgf_production_is_lexical ptr c_non_lexical_buf pool
|
||||
c_prod <- peek pptr
|
||||
@@ -732,7 +813,7 @@ newProduction c_ccats funs_ptr c_non_lexical_buf (PApply fun_id args) pool =
|
||||
pokePArg ptr (PArg hypos ccat) = do
|
||||
c_ccat <- getCCat c_ccats ccat pool
|
||||
(#poke PgfPArg, ccat) ptr c_ccat
|
||||
c_hypos <- newSequence (#size PgfCCat*) pokeCCat hypos pool
|
||||
c_hypos <- newSequence (#size PgfCCat*) pokeCCat (map snd hypos) pool
|
||||
(#poke PgfPArg, hypos) ptr c_hypos
|
||||
|
||||
pokeCCat ptr ccat = do
|
||||
@@ -907,12 +988,18 @@ newMap key_size hasher newKey elem_size pokeElem values pool = do
|
||||
insert map values pool
|
||||
|
||||
|
||||
unionPGF :: PGF -> PGF -> Maybe PGF
|
||||
unionPGF one@(PGF ptr1 langs1 touch1) two@(PGF ptr2 langs2 touch2)
|
||||
| pgf_have_same_abstract ptr1 ptr2 /= 0 = Just (PGF ptr1 (Map.union langs1 langs2) (touch1 >> touch2))
|
||||
| otherwise = Nothing
|
||||
|
||||
writePGF :: FilePath -> PGF -> IO ()
|
||||
writePGF fpath p = do
|
||||
pool <- gu_new_pool
|
||||
exn <- gu_new_exn pool
|
||||
withCString fpath $ \c_fpath ->
|
||||
pgf_write (pgf p) c_fpath exn
|
||||
withArrayLen ((map concr . Map.elems . languages) p) $ \n_concrs concrs ->
|
||||
withCString fpath $ \c_fpath ->
|
||||
pgf_write (pgf p) (fromIntegral n_concrs) concrs c_fpath exn
|
||||
touchPGF p
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
@@ -927,6 +1014,26 @@ writePGF fpath p = do
|
||||
else do gu_pool_free pool
|
||||
return ()
|
||||
|
||||
writeConcr :: FilePath -> Concr -> IO ()
|
||||
writeConcr fpath c = do
|
||||
pool <- gu_new_pool
|
||||
exn <- gu_new_exn pool
|
||||
withCString fpath $ \c_fpath ->
|
||||
pgf_concrete_save (concr c) c_fpath exn
|
||||
touchConcr c
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do is_errno <- gu_exn_caught exn gu_exn_type_GuErrno
|
||||
if is_errno
|
||||
then do perrno <- (#peek GuExn, data.data) exn
|
||||
errno <- peek perrno
|
||||
gu_pool_free pool
|
||||
ioError (errnoToIOError "writeConcr" (Errno errno) Nothing (Just fpath))
|
||||
else do gu_pool_free pool
|
||||
throwIO (PGFError "The grammar cannot be stored")
|
||||
else do gu_pool_free pool
|
||||
return ()
|
||||
|
||||
sortByFst = sortBy (\(x,_) (y,_) -> compare x y)
|
||||
sortByFst3 = sortBy (\(x,_,_) (y,_,_) -> compare x y)
|
||||
sortByFst4 = sortBy (\(x,_,_,_) (y,_,_,_) -> compare x y)
|
||||
|
||||
@@ -17,11 +17,18 @@ import PGF2.FFI
|
||||
data Type = Type {typ :: PgfExpr, touchType :: Touch}
|
||||
|
||||
-- | 'Hypo' represents a hypothesis in a type i.e. in the type A -> B, A is the hypothesis
|
||||
type Hypo = (BindType,CId,Type)
|
||||
type Hypo = (BindType,String,Type)
|
||||
|
||||
instance Show Type where
|
||||
show = showType []
|
||||
|
||||
instance Eq Type where
|
||||
(Type ty1 ty1_touch) == (Type ty2 ty2_touch) =
|
||||
unsafePerformIO $ do
|
||||
res <- pgf_type_eq ty1 ty2
|
||||
ty1_touch >> ty2_touch
|
||||
return (res /= 0)
|
||||
|
||||
-- | parses a 'String' as a type
|
||||
readType :: String -> Maybe Type
|
||||
readType str =
|
||||
@@ -43,7 +50,7 @@ readType str =
|
||||
-- of identifiers is the list of all free variables
|
||||
-- in the type in order reverse to the order
|
||||
-- of binding.
|
||||
showType :: [CId] -> Type -> String
|
||||
showType :: [String] -> Type -> String
|
||||
showType scope (Type ty touch) =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \tmpPl ->
|
||||
@@ -59,7 +66,7 @@ showType scope (Type ty touch) =
|
||||
-- a list of arguments for the category. The operation
|
||||
-- @mkType [h_1,...,h_n] C [e_1,...,e_m]@ will create
|
||||
-- @h_1 -> ... -> h_n -> C e_1 ... e_m@
|
||||
mkType :: [Hypo] -> CId -> [Expr] -> Type
|
||||
mkType :: [Hypo] -> String -> [Expr] -> Type
|
||||
mkType hypos cat exprs = unsafePerformIO $ do
|
||||
typPl <- gu_new_pool
|
||||
let n_exprs = fromIntegral (length exprs) :: CSizeT
|
||||
@@ -94,7 +101,7 @@ touchHypo (_,_,ty) = touchType ty
|
||||
|
||||
-- | Decomposes a type into a list of hypothesises, a category and
|
||||
-- a list of arguments for the category.
|
||||
unType :: Type -> ([Hypo],CId,[Expr])
|
||||
unType :: Type -> ([Hypo],String,[Expr])
|
||||
unType (Type c_type touch) = unsafePerformIO $ do
|
||||
cid <- (#peek PgfType, cid) c_type >>= peekUtf8CString
|
||||
c_hypos <- (#peek PgfType, hypos) c_type
|
||||
@@ -127,7 +134,7 @@ unType (Type c_type touch) = unsafePerformIO $ do
|
||||
-- of identifiers is the list of all free variables
|
||||
-- in the type in order reverse to the order
|
||||
-- of binding.
|
||||
showContext :: [CId] -> [Hypo] -> String
|
||||
showContext :: [String] -> [Hypo] -> String
|
||||
showContext scope hypos =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \tmpPl ->
|
||||
|
||||
@@ -1,31 +1,31 @@
|
||||
name: pgf2
|
||||
version: 0.1.0.0
|
||||
-- synopsis:
|
||||
-- description:
|
||||
-- synopsis:
|
||||
-- description:
|
||||
homepage: http://www.grammaticalframework.org
|
||||
license: LGPL-3
|
||||
--license-file: LICENSE
|
||||
author: Krasimir Angelov, Inari
|
||||
maintainer:
|
||||
-- copyright:
|
||||
maintainer:
|
||||
-- copyright:
|
||||
category: Language
|
||||
build-type: Simple
|
||||
extra-source-files: README
|
||||
cabal-version: >=1.10
|
||||
|
||||
library
|
||||
exposed-modules: PGF2, PGF2.Internal, SG
|
||||
exposed-modules: PGF2, PGF2.Internal, SG,
|
||||
-- backwards compatibility API:
|
||||
--, PGF, PGF.Internal
|
||||
PGF, PGF.Internal
|
||||
other-modules: PGF2.FFI, PGF2.Expr, PGF2.Type, SG.FFI
|
||||
build-depends: base >=4.3,
|
||||
containers, pretty
|
||||
-- hs-source-dirs:
|
||||
build-depends: base >=4.3, containers, pretty, array
|
||||
-- hs-source-dirs:
|
||||
default-language: Haskell2010
|
||||
build-tools: hsc2hs
|
||||
|
||||
extra-libraries: sg pgf gu
|
||||
cc-options: -std=c99
|
||||
default-language: Haskell2010
|
||||
c-sources: utils.c
|
||||
|
||||
executable pgf-shell
|
||||
|
||||
@@ -16,8 +16,7 @@
|
||||
module PGF(
|
||||
-- * PGF
|
||||
PGF,
|
||||
readPGF,
|
||||
parsePGF,
|
||||
readPGF, showPGF,
|
||||
|
||||
-- * Identifiers
|
||||
CId, mkCId, wildCId,
|
||||
@@ -54,12 +53,14 @@ module PGF(
|
||||
mkDouble, unDouble,
|
||||
mkFloat, unFloat,
|
||||
mkMeta, unMeta,
|
||||
exprSubstitute,
|
||||
|
||||
-- extra
|
||||
pExpr, exprSize, exprFunctions,
|
||||
|
||||
-- * Operations
|
||||
-- ** Linearization
|
||||
linearize, linearizeAllLang, linearizeAll, bracketedLinearize, bracketedLinearizeAll, tabularLinearizes,
|
||||
linearize, linearizeAllLang, linearizeAll, bracketedLinearize, tabularLinearizes,
|
||||
groupResults, -- lins of trees by language, removing duplicates
|
||||
showPrintName,
|
||||
|
||||
@@ -166,17 +167,18 @@ import PGF.Macros
|
||||
import PGF.Expr (Tree)
|
||||
import PGF.Morphology
|
||||
import PGF.Data
|
||||
import PGF.Binary ()
|
||||
import PGF.Binary()
|
||||
import qualified PGF.Forest as Forest
|
||||
import qualified PGF.Parse as Parse
|
||||
import PGF.Utilities(replace)
|
||||
import PGF.Printer
|
||||
import Text.PrettyPrint
|
||||
|
||||
--import Data.Char
|
||||
import qualified Data.Map as Map
|
||||
--import qualified Data.IntMap as IntMap
|
||||
--import Data.Maybe
|
||||
import Data.Binary
|
||||
import Data.ByteString.Lazy (ByteString)
|
||||
import Data.List(mapAccumL)
|
||||
--import System.Random (newStdGen)
|
||||
--import Control.Monad
|
||||
@@ -192,11 +194,6 @@ import Text.PrettyPrint
|
||||
-- > $ gf -make <grammar file name>
|
||||
readPGF :: FilePath -> IO PGF
|
||||
|
||||
-- | Like @readPGF@ but you have the manage file-handling.
|
||||
--
|
||||
-- @since 3.9.1
|
||||
parsePGF :: ByteString -> PGF
|
||||
|
||||
-- | Tries to parse the given string in the specified language
|
||||
-- and to produce abstract syntax expression.
|
||||
parse :: PGF -> Language -> Type -> String -> [Tree]
|
||||
@@ -261,9 +258,9 @@ functionType :: PGF -> CId -> Maybe Type
|
||||
-- Implementation
|
||||
---------------------------------------------------
|
||||
|
||||
readPGF = decodeFile
|
||||
readPGF f = decodeFile f
|
||||
|
||||
parsePGF = decode
|
||||
showPGF pgf = render (ppPGF pgf)
|
||||
|
||||
parse pgf lang typ s =
|
||||
case parse_ pgf lang typ (Just 4) s of
|
||||
|
||||
@@ -2,7 +2,7 @@ module PGF.ByteCode(Literal(..),
|
||||
CodeLabel, Instr(..), IVal(..), TailInfo(..),
|
||||
ppLit, ppCode, ppInstr
|
||||
) where
|
||||
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
|
||||
|
||||
import PGF.CId
|
||||
import Text.PrettyPrint
|
||||
|
||||
|
||||
@@ -74,7 +74,7 @@ data Production
|
||||
deriving (Eq,Ord,Show)
|
||||
data PArg = PArg [(FId,FId)] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
|
||||
data CncCat = CncCat {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !(Array LIndex String)
|
||||
data CncFun = CncFun [CId] {-# UNPACK #-} !(UArray LIndex SeqId) deriving (Eq,Ord,Show)
|
||||
data CncFun = CncFun CId {-# UNPACK #-} !(UArray LIndex SeqId) deriving (Eq,Ord,Show)
|
||||
type Sequence = Array DotPos Symbol
|
||||
type FunId = Int
|
||||
type SeqId = Int
|
||||
@@ -93,14 +93,6 @@ msgUnionPGF one two = case absname one of
|
||||
_ -> (two, -- abstracts don't match, discard the old one -- error msg in Importing.ioUnionPGF
|
||||
Just "Abstract changed, previous concretes discarded.")
|
||||
|
||||
emptyPGF :: PGF
|
||||
emptyPGF = PGF {
|
||||
gflags = Map.empty,
|
||||
absname = wildCId,
|
||||
abstract = error "empty grammar, no abstract",
|
||||
concretes = Map.empty
|
||||
}
|
||||
|
||||
-- sameness of function type signatures, checked when importing a new concrete in env
|
||||
haveSameFunsPGF :: PGF -> PGF -> Bool
|
||||
haveSameFunsPGF one two =
|
||||
|
||||
@@ -8,6 +8,7 @@ module PGF.Expr(Tree, BindType(..), Expr(..), Literal(..), Patt(..), Equation(..
|
||||
mkDouble, unDouble,
|
||||
mkFloat, unFloat,
|
||||
mkMeta, unMeta,
|
||||
exprSubstitute,
|
||||
|
||||
normalForm,
|
||||
|
||||
@@ -169,6 +170,16 @@ unMeta (ETyped e ty) = unMeta e
|
||||
unMeta (EImplArg e) = unMeta e
|
||||
unMeta _ = Nothing
|
||||
|
||||
exprSubstitute :: Expr -> [Expr] -> Expr
|
||||
exprSubstitute e es =
|
||||
case e of
|
||||
EAbs b x e -> EAbs b x (exprSubstitute e es)
|
||||
EApp e1 e2 -> EApp (exprSubstitute e1 es) (exprSubstitute e2 es)
|
||||
ELit l -> ELit l
|
||||
EMeta i -> es !! i
|
||||
EFun x -> EFun x
|
||||
|
||||
|
||||
-----------------------------------------------------
|
||||
-- Parsing
|
||||
-----------------------------------------------------
|
||||
|
||||
@@ -71,10 +71,10 @@ bracketedTokn dp f@(Forest abs cnc forest root) =
|
||||
in (ct,fid',fun,es,(map getVar hypos,lin))
|
||||
Nothing -> error ("wrong forest id " ++ show fid)
|
||||
where
|
||||
descend forest (PApply funid args) = let (CncFun pfuns _lins) = cncfuns cnc ! funid
|
||||
cat = case pfuns of
|
||||
[] -> wildCId
|
||||
(pfun:_) -> case Map.lookup pfun (funs abs) of
|
||||
descend forest (PApply funid args) = let (CncFun fun _lins) = cncfuns cnc ! funid
|
||||
cat = case isLindefCId fun of
|
||||
Just cat -> cat
|
||||
Nothing -> case Map.lookup fun (funs abs) of
|
||||
Just (DTyp _ cat _,_,_,_) -> cat
|
||||
largs = map (render forest) args
|
||||
ltable = mkLinTable cnc isTrusted [] funid largs
|
||||
@@ -103,6 +103,14 @@ bracketedTokn dp f@(Forest abs cnc forest root) =
|
||||
descend (PCoerce fid) = trustedSpots parents' (PArg [] fid)
|
||||
descend (PConst c e _) = IntSet.empty
|
||||
|
||||
isLindefCId id
|
||||
| take l s == lindef = Just (mkCId (drop l s))
|
||||
| otherwise = Nothing
|
||||
where
|
||||
s = showCId id
|
||||
lindef = "lindef "
|
||||
l = length lindef
|
||||
|
||||
-- | This function extracts the list of all completed parse trees
|
||||
-- that spans the whole input consumed so far. The trees are also
|
||||
-- limited by the category specified, which is usually
|
||||
@@ -124,13 +132,13 @@ getAbsTrees (Forest abs cnc forest root) arg@(PArg _ fid) ty dp =
|
||||
| otherwise = do fid0 <- get
|
||||
put fid
|
||||
x <- foldForest (\funid args trees ->
|
||||
do let CncFun fns _lins = cncfuns cnc ! funid
|
||||
case fns of
|
||||
[] -> do arg <- go (Set.insert fid rec_) scope mb_tty (head args)
|
||||
do let CncFun fn _lins = cncfuns cnc ! funid
|
||||
case isLindefCId fn of
|
||||
Just _ -> do arg <- go (Set.insert fid rec_) scope mb_tty (head args)
|
||||
return (mkAbs arg)
|
||||
fns -> do ty_fn <- lookupFunType (head fns)
|
||||
Nothing -> do ty_fn <- lookupFunType fn
|
||||
(e,tty0) <- foldM (\(e1,tty) arg -> goArg (Set.insert fid rec_) scope fid e1 arg tty)
|
||||
(EFun (head fns),TTyp [] ty_fn) args
|
||||
(EFun fn,TTyp [] ty_fn) args
|
||||
case mb_tty of
|
||||
Just tty -> do i <- newGuardedMeta e
|
||||
eqType scope (scopeSize scope) i tty tty0
|
||||
|
||||
@@ -1,19 +1,169 @@
|
||||
{-# OPTIONS_HADDOCK hide #-}
|
||||
{-# LANGUAGE ImplicitParams, RankNTypes #-}
|
||||
-------------------------------------------------
|
||||
-- |
|
||||
-- Stability : unstable
|
||||
--
|
||||
-------------------------------------------------
|
||||
module PGF.Internal(module Internal) where
|
||||
import PGF.Binary as Internal
|
||||
import PGF.Data as Internal
|
||||
import PGF.Macros as Internal
|
||||
import PGF.Optimize as Internal
|
||||
import PGF.Printer as Internal
|
||||
import PGF.Utilities as Internal
|
||||
import PGF.ByteCode as Internal
|
||||
module PGF.Internal(CId,Language,PGF,
|
||||
Concr,lookConcr,
|
||||
FId,isPredefFId,
|
||||
FunId,SeqId,LIndex,Token,
|
||||
Production(..),PArg(..),Symbol(..),Literal(..),BindType(..),PGF.Internal.Sequence,
|
||||
globalFlags, abstrFlags, concrFlags,
|
||||
concrTotalCats, concrCategories, concrProductions,
|
||||
concrTotalFuns, concrFunction,
|
||||
concrTotalSeqs, concrSequence,
|
||||
|
||||
import Data.Binary as Internal
|
||||
import Data.Binary.Get as Internal
|
||||
import Data.Binary.IEEE754 as Internal
|
||||
import Data.Binary.Put as Internal
|
||||
CodeLabel, Instr(..), IVal(..), TailInfo(..),
|
||||
|
||||
Builder, B, build,
|
||||
eAbs, eApp, eMeta, eFun, eVar, eLit, eTyped, eImplArg, dTyp, hypo,
|
||||
AbstrInfo, newAbstr, ConcrInfo, newConcr, newPGF,
|
||||
dTyp, hypo,
|
||||
|
||||
fidString, fidInt, fidFloat, fidVar, fidStart,
|
||||
|
||||
ppFunId, ppSeqId, ppFId, ppMeta, ppLit, PGF.Internal.ppSeq
|
||||
) where
|
||||
|
||||
import PGF.Data
|
||||
import PGF.Macros
|
||||
import PGF.Printer
|
||||
import PGF.ByteCode
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.Set as Set
|
||||
import Data.Array.IArray
|
||||
import Text.PrettyPrint
|
||||
|
||||
globalFlags pgf = gflags pgf
|
||||
abstrFlags pgf = aflags (abstract pgf)
|
||||
concrFlags concr = cflags concr
|
||||
|
||||
concrTotalCats = totalCats
|
||||
|
||||
concrCategories :: Concr -> [(CId,FId,FId,[String])]
|
||||
concrCategories c = [(cat,start,end,elems lbls) | (cat,CncCat start end lbls) <- Map.toList (cnccats c)]
|
||||
|
||||
concrTotalFuns c =
|
||||
let (s,e) = bounds (cncfuns c)
|
||||
in e-s+1
|
||||
|
||||
concrFunction :: Concr -> FunId -> (CId,[SeqId])
|
||||
concrFunction c funid =
|
||||
let CncFun fun lins = cncfuns c ! funid
|
||||
in (fun,elems lins)
|
||||
|
||||
concrTotalSeqs :: Concr -> SeqId
|
||||
concrTotalSeqs c =
|
||||
let (s,e) = bounds (sequences c)
|
||||
in e-s+1
|
||||
|
||||
type Sequence = [Symbol]
|
||||
|
||||
concrSequence :: Concr -> SeqId -> [Symbol]
|
||||
concrSequence c seqid = elems (sequences c ! seqid)
|
||||
|
||||
concrProductions :: Concr -> FId -> [Production]
|
||||
concrProductions c fid =
|
||||
case IntMap.lookup fid (productions c) of
|
||||
Just set -> Set.toList set
|
||||
Nothing -> []
|
||||
|
||||
|
||||
data Builder s
|
||||
newtype B s a = B a
|
||||
|
||||
build :: (forall s . (?builder :: Builder s) => B s a) -> a
|
||||
build x = let ?builder = undefined
|
||||
in case x of
|
||||
B x -> x
|
||||
|
||||
eAbs :: (?builder :: Builder s) => BindType -> CId -> B s Expr -> B s Expr
|
||||
eAbs bind_type var (B body) = B (EAbs bind_type var body)
|
||||
|
||||
eApp :: (?builder :: Builder s) => B s Expr -> B s Expr -> B s Expr
|
||||
eApp (B f) (B x) = B (EApp f x)
|
||||
|
||||
eMeta :: (?builder :: Builder s) => Int -> B s Expr
|
||||
eMeta i = B (EMeta i)
|
||||
|
||||
eFun :: (?builder :: Builder s) => CId -> B s Expr
|
||||
eFun f = B (EFun f)
|
||||
|
||||
eVar :: (?builder :: Builder s) => Int -> B s Expr
|
||||
eVar i = B (EVar i)
|
||||
|
||||
eLit :: (?builder :: Builder s) => Literal -> B s Expr
|
||||
eLit l = B (ELit l)
|
||||
|
||||
eTyped :: (?builder :: Builder s) => B s Expr -> B s Type -> B s Expr
|
||||
eTyped (B e) (B ty) = B (ETyped e ty)
|
||||
|
||||
eImplArg :: (?builder :: Builder s) => B s Expr -> B s Expr
|
||||
eImplArg (B e) = B (EImplArg e)
|
||||
|
||||
hypo :: BindType -> CId -> B s Type -> (B s Hypo)
|
||||
hypo bind_type var (B ty) = B (bind_type,var,ty)
|
||||
|
||||
dTyp :: (?builder :: Builder s) => [B s Hypo] -> CId -> [B s Expr] -> B s Type
|
||||
dTyp hypos cat es = B (DTyp [hypo | B hypo <- hypos] cat [e | B e <- es])
|
||||
|
||||
|
||||
type AbstrInfo = Abstr
|
||||
|
||||
newAbstr :: (?builder :: Builder s) => [(CId,Literal)] ->
|
||||
[(CId,[B s Hypo],Float)] ->
|
||||
[(CId,B s Type,Int,Float)] ->
|
||||
B s AbstrInfo
|
||||
newAbstr aflags cats funs = B (Abstr (Map.fromList aflags)
|
||||
(Map.fromList [(fun,(ty,arity,Nothing,realToFrac prob)) | (fun,B ty,arity,prob) <- funs])
|
||||
(Map.fromList [(cat,([hypo | B hypo <- hypos],[],realToFrac prob)) | (cat,hypos,prob) <- cats]))
|
||||
|
||||
type ConcrInfo = Concr
|
||||
|
||||
newConcr :: (?builder :: Builder s) => B s AbstrInfo ->
|
||||
[(CId,Literal)] -> -- ^ Concrete syntax flags
|
||||
[(CId,String)] -> -- ^ Printnames
|
||||
[(FId,[FunId])] -> -- ^ Lindefs
|
||||
[(FId,[FunId])] -> -- ^ Linrefs
|
||||
[(FId,[Production])] -> -- ^ Productions
|
||||
[(CId,[SeqId])] -> -- ^ Concrete functions (must be sorted by Fun)
|
||||
[[Symbol]] -> -- ^ Sequences (must be sorted)
|
||||
[(CId,FId,FId,[String])] -> -- ^ Concrete categories
|
||||
FId -> -- ^ The total count of the categories
|
||||
B s ConcrInfo
|
||||
newConcr _ cflags printnames lindefs linrefs productions cncfuns sequences cnccats totalCats =
|
||||
B (Concr {cflags = Map.fromList cflags
|
||||
,printnames = Map.fromList printnames
|
||||
,lindefs = IntMap.fromList lindefs
|
||||
,linrefs = IntMap.fromList linrefs
|
||||
,productions = IntMap.fromList [(fid,Set.fromList prods) | (fid,prods) <- productions]
|
||||
,cncfuns = mkArray [CncFun fun (mkArray lins) | (fun,lins) <- cncfuns]
|
||||
,sequences = mkArray (map mkArray sequences)
|
||||
,cnccats = Map.fromList [(cat,CncCat s e (mkArray lbls)) | (cat,s,e,lbls) <- cnccats]
|
||||
,totalCats = totalCats
|
||||
})
|
||||
{-
|
||||
pproductions :: IntMap.IntMap (Set.Set Production), -- productions needed for parsing
|
||||
lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)), -- productions needed for linearization
|
||||
lexicon :: IntMap.IntMap (IntMap.IntMap (TMap.TrieMap Token IntSet.IntSet)),
|
||||
-}
|
||||
|
||||
newPGF :: (?builder :: Builder s) => [(CId,Literal)] ->
|
||||
CId ->
|
||||
B s AbstrInfo ->
|
||||
[(CId,B s ConcrInfo)] ->
|
||||
B s PGF
|
||||
newPGF gflags absname (B abstract) concretes =
|
||||
B (PGF {gflags = Map.fromList gflags
|
||||
,absname = absname
|
||||
,abstract = abstract
|
||||
,concretes = Map.fromList [(cname,concr) | (cname,B concr) <- concretes]
|
||||
})
|
||||
|
||||
|
||||
ppSeq (seqid,seq) = PGF.Printer.ppSeq (seqid,mkArray seq)
|
||||
|
||||
mkArray l = listArray (0,length l-1) l
|
||||
|
||||
@@ -4,7 +4,6 @@ module PGF.Linearize
|
||||
, linearizeAll
|
||||
, linearizeAllLang
|
||||
, bracketedLinearize
|
||||
, bracketedLinearizeAll
|
||||
, tabularLinearizes
|
||||
) where
|
||||
|
||||
@@ -48,12 +47,6 @@ bracketedLinearize pgf lang = head . map (snd . untokn Nothing . firstLin cnc) .
|
||||
head [] = []
|
||||
head (bs:bss) = bs
|
||||
|
||||
-- | Linearizes given expression as a bracketed string in the language
|
||||
bracketedLinearizeAll :: PGF -> Language -> Tree -> [[BracketedString]]
|
||||
bracketedLinearizeAll pgf lang = map (snd . untokn Nothing . firstLin cnc) . linTree pgf cnc
|
||||
where
|
||||
cnc = lookMap (error "no lang") lang (concretes pgf)
|
||||
|
||||
firstLin cnc arg@(ct@(cat,n_fid),fid,fun,es,(xs,lin)) =
|
||||
case IntMap.lookup fid (linrefs cnc) of
|
||||
Just (funid:_) -> snd (mkLinTable cnc (const True) [] funid [arg]) ! 0
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
module PGF.Macros where
|
||||
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
|
||||
@@ -31,8 +31,7 @@ collectWords pinfo = Map.fromListWith (++)
|
||||
[(t, [(fun,lbls ! l)]) | (CncCat s e lbls) <- Map.elems (cnccats pinfo)
|
||||
, fid <- [s..e]
|
||||
, PApply funid _ <- maybe [] Set.toList (IntMap.lookup fid (productions pinfo))
|
||||
, let CncFun funs lins = cncfuns pinfo ! funid
|
||||
, fun <- funs
|
||||
, let CncFun fun lins = cncfuns pinfo ! funid
|
||||
, (l,seqid) <- assocs lins
|
||||
, sym <- elems (sequences pinfo ! seqid)
|
||||
, t <- sym2tokns sym]
|
||||
|
||||
@@ -60,7 +60,7 @@ getConcr =
|
||||
cnccats <- getMap getCId getCncCat
|
||||
totalCats <- get
|
||||
let rseq = listToArray [SymCat 0 0]
|
||||
rfun = CncFun [mkCId "linref"] (listToArray [scnt])
|
||||
rfun = CncFun (mkCId "linref") (listToArray [scnt])
|
||||
linrefs = IntMap.fromList [(i,[fcnt])|i<-[0..totalCats-1]]
|
||||
return (Concr{ cflags=cflags, printnames=printnames
|
||||
, sequences=toArray (scnt+1,seqs++[rseq])
|
||||
@@ -110,7 +110,7 @@ getBindType =
|
||||
1 -> return Implicit
|
||||
_ -> decodingError "getBindType"
|
||||
|
||||
getCncFun = liftM2 CncFun (fmap (:[]) getCId) (getArray get)
|
||||
getCncFun = liftM2 CncFun getCId (getArray get)
|
||||
|
||||
getCncCat = liftM3 CncCat get get (getArray get)
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ import qualified Data.IntMap as IntMap
|
||||
import qualified PGF.TrieMap as TrieMap
|
||||
import qualified Data.List as List
|
||||
import Control.Monad.ST
|
||||
import Debug.Trace
|
||||
|
||||
optimizePGF :: PGF -> PGF
|
||||
optimizePGF pgf = pgf{concretes=fmap (updateConcrete (abstract pgf) .
|
||||
@@ -179,26 +178,26 @@ topDownFilter startCat cnc =
|
||||
|
||||
|
||||
bottomUpFilter :: Concr -> Concr
|
||||
bottomUpFilter cnc = cnc{productions=filterProductions IntMap.empty (productions cnc)}
|
||||
bottomUpFilter cnc = cnc{productions=filterProductions IntMap.empty IntSet.empty (productions cnc)}
|
||||
|
||||
filterProductions prods0 prods
|
||||
filterProductions prods0 hoc0 prods
|
||||
| prods0 == prods1 = prods0
|
||||
| otherwise = filterProductions prods1 prods
|
||||
| otherwise = filterProductions prods1 hoc1 prods
|
||||
where
|
||||
prods1 = IntMap.foldWithKey foldProdSet IntMap.empty prods
|
||||
hoc = IntMap.fold (\set !hoc -> Set.fold accumHOC hoc set) IntSet.empty prods
|
||||
(prods1,hoc1) = IntMap.foldWithKey foldProdSet (IntMap.empty,IntSet.empty) prods
|
||||
|
||||
foldProdSet fid set !prods
|
||||
| Set.null set1 = prods
|
||||
| otherwise = IntMap.insert fid set1 prods
|
||||
foldProdSet fid set (!prods,!hoc)
|
||||
| Set.null set1 = (prods,hoc)
|
||||
| otherwise = (IntMap.insert fid set1 prods,hoc1)
|
||||
where
|
||||
set1 = Set.filter filterRule set
|
||||
hoc1 = Set.fold accumHOC hoc set1
|
||||
|
||||
filterRule (PApply funid args) = all (\(PArg _ fid) -> isLive fid) args
|
||||
filterRule (PCoerce fid) = isLive fid
|
||||
filterRule _ = True
|
||||
|
||||
isLive fid = isPredefFId fid || IntMap.member fid prods0 || IntSet.member fid hoc
|
||||
isLive fid = isPredefFId fid || IntMap.member fid prods0 || IntSet.member fid hoc0
|
||||
|
||||
accumHOC (PApply funid args) hoc = List.foldl' (\hoc (PArg hypos _) -> List.foldl' (\hoc (_,fid) -> IntSet.insert fid hoc) hoc hypos) hoc args
|
||||
accumHOC _ hoc = hoc
|
||||
@@ -242,7 +241,7 @@ splitLexicalRules cnc p_prods =
|
||||
seq2prefix (SymALL_CAPIT :syms) = TrieMap.fromList [wf ["&|"]]
|
||||
|
||||
updateConcrete abs cnc =
|
||||
let p_prods0 = filterProductions IntMap.empty (productions cnc)
|
||||
let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc)
|
||||
(lex,p_prods) = splitLexicalRules cnc p_prods0
|
||||
l_prods = linIndex cnc p_prods0
|
||||
in cnc{pproductions = p_prods, lproductions = l_prods, lexicon = lex}
|
||||
@@ -253,7 +252,7 @@ updateConcrete abs cnc =
|
||||
, prod <- Set.toList prods
|
||||
, fun <- getFunctions prod]
|
||||
where
|
||||
getFunctions (PApply funid args) = let CncFun funs _ = cncfuns cnc ! funid in funs
|
||||
getFunctions (PApply funid args) = let CncFun fun _ = cncfuns cnc ! funid in [fun]
|
||||
getFunctions (PCoerce fid) = case IntMap.lookup fid productions of
|
||||
Nothing -> []
|
||||
Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod]
|
||||
|
||||
@@ -503,14 +503,14 @@ type Continuation = TrieMap.TrieMap Token ActiveSet
|
||||
-- | Return the Continuation of a Parsestate with exportable types
|
||||
-- Used by PGFService
|
||||
getContinuationInfo :: ParseState -> Map.Map [Token] [(FunId, CId, String)]
|
||||
getContinuationInfo pstate = Map.map (concatMap f . Set.toList) contMap
|
||||
getContinuationInfo pstate = Map.map (map f . Set.toList) contMap
|
||||
where
|
||||
PState _abstr concr _chart cont = pstate
|
||||
contMap = Map.fromList (TrieMap.toList cont) -- always get [([], _::ActiveSet)]
|
||||
f :: Active -> [(FunId,CId,String)]
|
||||
f (Active int dotpos funid seqid pargs ak) = [(funid, fn, seq) | fn <- fns]
|
||||
f :: Active -> (FunId,CId,String)
|
||||
f (Active int dotpos funid seqid pargs ak) = (funid, cid, seq)
|
||||
where
|
||||
CncFun fns _ = cncfuns concr ! funid
|
||||
CncFun cid _ = cncfuns concr ! funid
|
||||
seq = showSeq dotpos (sequences concr ! seqid)
|
||||
|
||||
showSeq :: DotPos -> Sequence -> String
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
module PGF.Printer (ppPGF,ppCat,ppFId,ppFunId,ppSeqId,ppSeq,ppFun) where
|
||||
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
@@ -73,8 +72,8 @@ ppProduction (fid,PCoerce arg) =
|
||||
ppProduction (fid,PConst _ _ ss) =
|
||||
ppFId fid <+> text "->" <+> ppStrs ss
|
||||
|
||||
ppCncFun (funid,CncFun funs arr) =
|
||||
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (hsep (map ppCId funs))
|
||||
ppCncFun (funid,CncFun fun arr) =
|
||||
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
|
||||
|
||||
ppLinDefs (fid,funids) =
|
||||
[ppFId fid <+> text "->" <+> ppFunId funid <> brackets (ppFId fidVar) | funid <- funids]
|
||||
@@ -82,6 +81,7 @@ ppLinDefs (fid,funids) =
|
||||
ppLinRefs (fid,funids) =
|
||||
[ppFId fidVar <+> text "->" <+> ppFunId funid <> brackets (ppFId fid) | funid <- funids]
|
||||
|
||||
ppSeq :: (SeqId,Sequence) -> Doc
|
||||
ppSeq (seqid,seq) =
|
||||
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@ module PGF.VisualizeTree
|
||||
, gizaAlignment
|
||||
, conlls2latexDoc
|
||||
) where
|
||||
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
|
||||
|
||||
import PGF.CId (wildCId,showCId,ppCId,mkCId) --CId,pCId,
|
||||
import PGF.Data
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
name: pgf
|
||||
version: 3.9.1-git
|
||||
version: 3.9-git
|
||||
|
||||
cabal-version: >= 1.20
|
||||
build-type: Simple
|
||||
@@ -8,7 +8,7 @@ category: Natural Language Processing
|
||||
synopsis: Grammatical Framework
|
||||
description: A library for interpreting the Portable Grammar Format (PGF)
|
||||
homepage: http://www.grammaticalframework.org/
|
||||
bug-reports: https://github.com/GrammaticalFramework/gf-core/issues
|
||||
bug-reports: https://github.com/GrammaticalFramework/GF/issues
|
||||
maintainer: Thomas Hallgren
|
||||
tested-with: GHC==7.6.3, GHC==7.8.3, GHC==7.10.3, GHC==8.0.2
|
||||
|
||||
@@ -30,6 +30,7 @@ Library
|
||||
exceptions
|
||||
|
||||
if flag(custom-binary)
|
||||
hs-source-dirs: ., binary
|
||||
other-modules:
|
||||
-- not really part of GF but I have changed the original binary library
|
||||
-- and we have to keep the copy for now.
|
||||
@@ -45,9 +46,9 @@ Library
|
||||
--if impl(ghc>=7.8)
|
||||
-- ghc-options: +RTS -A20M -RTS
|
||||
ghc-prof-options: -fprof-auto
|
||||
extensions:
|
||||
extensions:
|
||||
|
||||
exposed-modules:
|
||||
exposed-modules:
|
||||
PGF
|
||||
PGF.Internal
|
||||
PGF.Haskell
|
||||
|
||||
@@ -2619,6 +2619,21 @@ PGF_dealloc(PGFObject* self)
|
||||
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
GuMapItor fn;
|
||||
PGFObject* grammar;
|
||||
void* collection;
|
||||
} PyPGFClosure;
|
||||
|
||||
static void
|
||||
pgf_collect_langs_seq(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
{
|
||||
PgfConcr* concr = *((PgfConcr**) value);
|
||||
PyPGFClosure* clo = (PyPGFClosure*) fn;
|
||||
|
||||
gu_buf_push((GuBuf*) clo->collection, PgfConcr*, concr);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
PGF_repr(PGFObject *self)
|
||||
{
|
||||
@@ -2628,7 +2643,14 @@ PGF_repr(PGFObject *self)
|
||||
GuStringBuf* sbuf = gu_new_string_buf(tmp_pool);
|
||||
GuOut* out = gu_string_buf_out(sbuf);
|
||||
|
||||
pgf_print(self->pgf, out, err);
|
||||
GuBuf* languages = gu_new_buf(PgfConcr*, tmp_pool);
|
||||
|
||||
PyPGFClosure clo = { { pgf_collect_langs_seq }, self, languages };
|
||||
pgf_iter_languages(self->pgf, &clo.fn, err);
|
||||
|
||||
pgf_print(self->pgf, gu_buf_length(languages),
|
||||
gu_buf_data(languages),
|
||||
out, err);
|
||||
|
||||
PyObject* pystr = PyString_FromStringAndSize(gu_string_buf_data(sbuf),
|
||||
gu_string_buf_length(sbuf));
|
||||
@@ -2643,14 +2665,8 @@ PGF_getAbstractName(PGFObject *self, void *closure)
|
||||
return PyString_FromString(pgf_abstract_name(self->pgf));
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
GuMapItor fn;
|
||||
PGFObject* grammar;
|
||||
PyObject* object;
|
||||
} PyPGFClosure;
|
||||
|
||||
static void
|
||||
pgf_collect_langs(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
pgf_collect_langs_dict(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
{
|
||||
PgfCId name = (PgfCId) key;
|
||||
PgfConcr* concr = *((PgfConcr**) value);
|
||||
@@ -2675,7 +2691,7 @@ pgf_collect_langs(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
((ConcrObject *) py_lang)->grammar = clo->grammar;
|
||||
Py_INCREF(clo->grammar);
|
||||
|
||||
if (PyDict_SetItem(clo->object, py_name, py_lang) != 0) {
|
||||
if (PyDict_SetItem((PyObject*) clo->collection, py_name, py_lang) != 0) {
|
||||
gu_raise(err, PgfExn);
|
||||
goto end;
|
||||
}
|
||||
@@ -2697,7 +2713,7 @@ PGF_getLanguages(PGFObject *self, void *closure)
|
||||
// Create an exception frame that catches all errors.
|
||||
GuExn* err = gu_new_exn(tmp_pool);
|
||||
|
||||
PyPGFClosure clo = { { pgf_collect_langs }, self, languages };
|
||||
PyPGFClosure clo = { { pgf_collect_langs_dict }, self, languages };
|
||||
pgf_iter_languages(self->pgf, &clo.fn, err);
|
||||
if (!gu_ok(err)) {
|
||||
Py_DECREF(languages);
|
||||
@@ -2727,7 +2743,7 @@ pgf_collect_cats(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (PyList_Append(clo->object, py_name) != 0) {
|
||||
if (PyList_Append((PyObject*) clo->collection, py_name) != 0) {
|
||||
gu_raise(err, PgfExn);
|
||||
goto end;
|
||||
}
|
||||
@@ -2794,7 +2810,7 @@ pgf_collect_funs(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (PyList_Append(clo->object, py_name) != 0) {
|
||||
if (PyList_Append((PyObject*) clo->collection, py_name) != 0) {
|
||||
gu_raise(err, PgfExn);
|
||||
goto end;
|
||||
}
|
||||
@@ -3142,7 +3158,7 @@ pgf_embed_funs(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
|
||||
Py_INCREF(pyexpr->master);
|
||||
|
||||
if (PyModule_AddObject(clo->object, name, (PyObject*) pyexpr) != 0) {
|
||||
if (PyModule_AddObject((PyObject*) clo->collection, name, (PyObject*) pyexpr) != 0) {
|
||||
Py_DECREF(pyexpr);
|
||||
gu_raise(err, PgfExn);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user