forked from GitHub/gf-core
Merge branch 'master' into c-runtime
This commit is contained in:
@@ -14,6 +14,9 @@ For Linux users
|
||||
|
||||
You will need the packages: autoconf, automake, libtool, make
|
||||
|
||||
- On Ubuntu: $ apt-get install autotools-dev
|
||||
- On Fedora: $ dnf install autoconf automake libtool
|
||||
|
||||
The compilation steps are:
|
||||
|
||||
$ autoreconf -i
|
||||
@@ -28,7 +31,7 @@ For Mac OSX users
|
||||
The following is what I did to make it work on MacOSX 10.8:
|
||||
|
||||
- Install XCode and XCode command line tools
|
||||
- Install Homebrew: http://mxcl.github.com/homebrew/
|
||||
- Install Homebrew: https://brew.sh
|
||||
|
||||
$ brew install automake autoconf libtool
|
||||
$ glibtoolize
|
||||
@@ -49,7 +52,7 @@ For Windows users
|
||||
After the installation, don't forget to fix the fstab file. See here:
|
||||
http://www.mingw.org/wiki/Getting_Started
|
||||
|
||||
- From the MSYS shell (c:/MinGW/msys/1.0/msys.bat) go to the directory
|
||||
- From the MSYS shell (c:/MinGW/msys/1.0/msys.bat) go to the directory
|
||||
which contains the INSTALL file and do:
|
||||
|
||||
$ autoreconf -i
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
lib_LTLIBRARIES = libgu.la libpgf.la libsg.la
|
||||
lib_LTLIBRARIES = libgu.la libpgf.la
|
||||
|
||||
pkgconfigdir = $(libdir)/pkgconfig
|
||||
pkgconfig_DATA = libgu.pc libpgf.pc libsg.pc
|
||||
pkgconfig_DATA = libgu.pc libpgf.pc
|
||||
|
||||
configincludedir = $(libdir)/libgu/include
|
||||
|
||||
@@ -37,10 +37,6 @@ pgfinclude_HEADERS = \
|
||||
pgf/pgf.h \
|
||||
pgf/data.h
|
||||
|
||||
sgincludedir=$(includedir)/sg
|
||||
sginclude_HEADERS = \
|
||||
sg/sg.h
|
||||
|
||||
libgu_la_SOURCES = \
|
||||
gu/assert.c \
|
||||
gu/bits.c \
|
||||
@@ -92,11 +88,6 @@ libpgf_la_SOURCES = \
|
||||
libpgf_la_LDFLAGS = "-no-undefined"
|
||||
libpgf_la_LIBADD = libgu.la
|
||||
|
||||
libsg_la_SOURCES = \
|
||||
sg/sqlite3Btree.c \
|
||||
sg/sg.c
|
||||
libsg_la_LIBADD = libgu.la libpgf.la
|
||||
|
||||
bin_PROGRAMS =
|
||||
|
||||
AUTOMAKE_OPTIONS = foreign subdir-objects dist-bzip2
|
||||
@@ -104,5 +95,4 @@ ACLOCAL_AMFLAGS = -I m4
|
||||
|
||||
EXTRA_DIST = \
|
||||
libgu.pc.in \
|
||||
libpgf.pc.in \
|
||||
libsg.pc.in
|
||||
libpgf.pc.in
|
||||
|
||||
@@ -58,7 +58,6 @@ AC_CONFIG_LINKS(pgf/lightning/asm.h:$cpu_dir/asm.h dnl
|
||||
AC_CONFIG_FILES([Makefile
|
||||
libgu.pc
|
||||
libpgf.pc
|
||||
libsg.pc
|
||||
])
|
||||
|
||||
AC_OUTPUT
|
||||
|
||||
@@ -322,7 +322,7 @@ gu_map_iter(GuMap* map, GuMapItor* itor, GuExn* err)
|
||||
}
|
||||
|
||||
GU_API bool
|
||||
gu_map_next(GuMap* map, size_t* pi, void** pkey, void* pvalue)
|
||||
gu_map_next(GuMap* map, size_t* pi, void* pkey, void* pvalue)
|
||||
{
|
||||
while (*pi < map->data.n_entries) {
|
||||
if (gu_map_entry_is_free(map, &map->data, *pi)) {
|
||||
@@ -330,14 +330,17 @@ gu_map_next(GuMap* map, size_t* pi, void** pkey, void* pvalue)
|
||||
continue;
|
||||
}
|
||||
|
||||
*pkey = &map->data.keys[*pi * map->key_size];
|
||||
if (map->hasher == gu_addr_hasher) {
|
||||
*pkey = *(void**) *pkey;
|
||||
*((void**) pkey) = *((void**) &map->data.keys[*pi * sizeof(void*)]);
|
||||
} else if (map->hasher == gu_word_hasher) {
|
||||
*((GuWord*) pkey) = *((GuWord*) &map->data.keys[*pi * sizeof(GuWord)]);
|
||||
} else if (map->hasher == gu_string_hasher) {
|
||||
*pkey = *(void**) *pkey;
|
||||
}
|
||||
*((GuString*) pkey) = *((GuString*) &map->data.keys[*pi * sizeof(GuString)]);
|
||||
} else {
|
||||
memcpy(pkey, &map->data.keys[*pi * map->key_size], map->key_size);
|
||||
}
|
||||
|
||||
memcpy(pvalue, &map->data.values[*pi * map->cell_size],
|
||||
memcpy(pvalue, &map->data.values[*pi * map->cell_size],
|
||||
map->value_size);
|
||||
|
||||
(*pi)++;
|
||||
|
||||
@@ -75,7 +75,7 @@ GU_API_DECL void
|
||||
gu_map_iter(GuMap* ht, GuMapItor* itor, GuExn* err);
|
||||
|
||||
GU_API bool
|
||||
gu_map_next(GuMap* map, size_t* pi, void** pkey, void* pvalue);
|
||||
gu_map_next(GuMap* map, size_t* pi, void* pkey, void* pvalue);
|
||||
|
||||
typedef GuMap GuIntMap;
|
||||
|
||||
|
||||
3
src/runtime/c/install.sh
Executable file
3
src/runtime/c/install.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
bash setup.sh configure
|
||||
bash setup.sh build
|
||||
bash setup.sh install
|
||||
@@ -1,10 +0,0 @@
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@
|
||||
|
||||
Name: libsg
|
||||
Description: Semantic Graph library
|
||||
Version: @VERSION@
|
||||
Libs: -L${libdir} -lsg -lpgf
|
||||
Cflags: -I${includedir}
|
||||
@@ -142,14 +142,14 @@ pgf_aligner_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_aligner_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
|
||||
pgf_aligner_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
|
||||
{
|
||||
PgfAlignerLin* alin = gu_container(funcs, PgfAlignerLin, funcs);
|
||||
gu_buf_push(alin->parent_stack, int, fid);
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_aligner_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
|
||||
pgf_aligner_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
|
||||
{
|
||||
PgfAlignerLin* alin = gu_container(funcs, PgfAlignerLin, funcs);
|
||||
gu_buf_pop(alin->parent_stack, int);
|
||||
|
||||
@@ -322,7 +322,8 @@ typedef struct PgfProductionCoerce
|
||||
|
||||
typedef struct {
|
||||
PgfExprProb *ep;
|
||||
GuSeq* lins;
|
||||
size_t n_lins;
|
||||
PgfSymbols* lins[];
|
||||
} PgfProductionExtern;
|
||||
|
||||
typedef struct {
|
||||
|
||||
@@ -953,94 +953,6 @@ pgf_read_expr(GuIn* in, GuPool* pool, GuPool* tmp_pool, GuExn* err)
|
||||
return expr;
|
||||
}
|
||||
|
||||
PGF_API int
|
||||
pgf_read_expr_tuple(GuIn* in,
|
||||
size_t n_exprs, PgfExpr exprs[],
|
||||
GuPool* pool, GuExn* err)
|
||||
{
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
PgfExprParser* parser =
|
||||
pgf_new_parser(in, pgf_expr_parser_in_getc, pool, tmp_pool, err);
|
||||
if (parser->token_tag != PGF_TOKEN_LTRIANGLE)
|
||||
goto fail;
|
||||
pgf_expr_parser_token(parser, false);
|
||||
for (size_t i = 0; i < n_exprs; i++) {
|
||||
if (i > 0) {
|
||||
if (parser->token_tag != PGF_TOKEN_COMMA)
|
||||
goto fail;
|
||||
pgf_expr_parser_token(parser, false);
|
||||
}
|
||||
|
||||
exprs[i] = pgf_expr_parser_expr(parser, false);
|
||||
if (gu_variant_is_null(exprs[i]))
|
||||
goto fail;
|
||||
}
|
||||
if (parser->token_tag != PGF_TOKEN_RTRIANGLE)
|
||||
goto fail;
|
||||
pgf_expr_parser_token(parser, false);
|
||||
if (parser->token_tag != PGF_TOKEN_EOF)
|
||||
goto fail;
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
return 1;
|
||||
|
||||
fail:
|
||||
gu_pool_free(tmp_pool);
|
||||
return 0;
|
||||
}
|
||||
|
||||
PGF_API GuSeq*
|
||||
pgf_read_expr_matrix(GuIn* in,
|
||||
size_t n_exprs,
|
||||
GuPool* pool, GuExn* err)
|
||||
{
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
PgfExprParser* parser =
|
||||
pgf_new_parser(in, pgf_expr_parser_in_getc, pool, tmp_pool, err);
|
||||
if (parser->token_tag != PGF_TOKEN_LTRIANGLE)
|
||||
goto fail;
|
||||
pgf_expr_parser_token(parser, false);
|
||||
|
||||
GuBuf* buf = gu_new_buf(PgfExpr, pool);
|
||||
|
||||
if (parser->token_tag != PGF_TOKEN_RTRIANGLE) {
|
||||
for (;;) {
|
||||
PgfExpr* exprs = gu_buf_extend_n(buf, n_exprs);
|
||||
|
||||
for (size_t i = 0; i < n_exprs; i++) {
|
||||
if (i > 0) {
|
||||
if (parser->token_tag != PGF_TOKEN_COMMA)
|
||||
goto fail;
|
||||
pgf_expr_parser_token(parser, false);
|
||||
}
|
||||
|
||||
exprs[i] = pgf_expr_parser_expr(parser, false);
|
||||
if (gu_variant_is_null(exprs[i]))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (parser->token_tag != PGF_TOKEN_SEMI)
|
||||
break;
|
||||
|
||||
pgf_expr_parser_token(parser, false);
|
||||
}
|
||||
|
||||
if (parser->token_tag != PGF_TOKEN_RTRIANGLE)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
pgf_expr_parser_token(parser, false);
|
||||
if (parser->token_tag != PGF_TOKEN_EOF)
|
||||
goto fail;
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
return gu_buf_data_seq(buf);
|
||||
|
||||
fail:
|
||||
gu_pool_free(tmp_pool);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PGF_API PgfType*
|
||||
pgf_read_type(GuIn* in, GuPool* pool, GuPool* tmp_pool, GuExn* err)
|
||||
{
|
||||
@@ -1758,19 +1670,6 @@ pgf_print_context(PgfHypos *hypos, PgfPrintContext* ctxt,
|
||||
}
|
||||
}
|
||||
|
||||
PGF_API void
|
||||
pgf_print_expr_tuple(size_t n_exprs, PgfExpr exprs[], PgfPrintContext* ctxt,
|
||||
GuOut* out, GuExn* err)
|
||||
{
|
||||
gu_putc('<', out, err);
|
||||
for (size_t i = 0; i < n_exprs; i++) {
|
||||
if (i > 0)
|
||||
gu_putc(',', out, err);
|
||||
pgf_print_expr(exprs[i], ctxt, 0, out, err);
|
||||
}
|
||||
gu_putc('>', out, err);
|
||||
}
|
||||
|
||||
PGF_API bool
|
||||
pgf_type_eq(PgfType* t1, PgfType* t2)
|
||||
{
|
||||
@@ -1806,6 +1705,168 @@ pgf_type_eq(PgfType* t1, PgfType* t2)
|
||||
return true;
|
||||
}
|
||||
|
||||
PGF_API PgfLiteral
|
||||
pgf_clone_literal(PgfLiteral lit, GuPool* pool)
|
||||
{
|
||||
PgfLiteral new_lit = gu_null_variant;
|
||||
|
||||
GuVariantInfo inf = gu_variant_open(lit);
|
||||
switch (inf.tag) {
|
||||
case PGF_LITERAL_STR: {
|
||||
PgfLiteralStr* lit_str = inf.data;
|
||||
PgfLiteralStr* new_lit_str =
|
||||
gu_new_flex_variant(PGF_LITERAL_STR,
|
||||
PgfLiteralStr,
|
||||
val, strlen(lit_str->val)+1,
|
||||
&new_lit, pool);
|
||||
strcpy(new_lit_str->val, lit_str->val);
|
||||
break;
|
||||
}
|
||||
case PGF_LITERAL_INT: {
|
||||
PgfLiteralInt *lit_int = inf.data;
|
||||
PgfLiteralInt *new_lit_int =
|
||||
gu_new_variant(PGF_LITERAL_INT,
|
||||
PgfLiteralInt,
|
||||
&new_lit, pool);
|
||||
new_lit_int->val = lit_int->val;
|
||||
break;
|
||||
}
|
||||
case PGF_LITERAL_FLT: {
|
||||
PgfLiteralFlt *lit_flt = inf.data;
|
||||
PgfLiteralFlt *new_lit_flt =
|
||||
gu_new_variant(PGF_LITERAL_FLT,
|
||||
PgfLiteralFlt,
|
||||
&new_lit, pool);
|
||||
new_lit_flt->val = lit_flt->val;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
|
||||
return new_lit;
|
||||
}
|
||||
|
||||
PGF_API PgfExpr
|
||||
pgf_clone_expr(PgfExpr expr, GuPool* pool)
|
||||
{
|
||||
PgfExpr new_expr = gu_null_variant;
|
||||
|
||||
GuVariantInfo inf = gu_variant_open(expr);
|
||||
switch (inf.tag) {
|
||||
case PGF_EXPR_ABS: {
|
||||
PgfExprAbs* abs = inf.data;
|
||||
PgfExprAbs* new_abs =
|
||||
gu_new_variant(PGF_EXPR_ABS,
|
||||
PgfExprAbs,
|
||||
&new_expr, pool);
|
||||
|
||||
new_abs->bind_type = abs->bind_type;
|
||||
new_abs->id = gu_string_copy(abs->id, pool);
|
||||
new_abs->body = pgf_clone_expr(abs->body,pool);
|
||||
break;
|
||||
}
|
||||
case PGF_EXPR_APP: {
|
||||
PgfExprApp* app = inf.data;
|
||||
PgfExprApp* new_app =
|
||||
gu_new_variant(PGF_EXPR_APP,
|
||||
PgfExprApp,
|
||||
&new_expr, pool);
|
||||
new_app->fun = pgf_clone_expr(app->fun, pool);
|
||||
new_app->arg = pgf_clone_expr(app->arg, pool);
|
||||
break;
|
||||
}
|
||||
case PGF_EXPR_LIT: {
|
||||
PgfExprLit* lit = inf.data;
|
||||
PgfExprLit* new_lit =
|
||||
gu_new_variant(PGF_EXPR_LIT,
|
||||
PgfExprLit,
|
||||
&new_expr, pool);
|
||||
new_lit->lit = pgf_clone_literal(lit->lit, pool);
|
||||
break;
|
||||
}
|
||||
case PGF_EXPR_META: {
|
||||
PgfExprMeta* meta = inf.data;
|
||||
PgfExprMeta* new_meta =
|
||||
gu_new_variant(PGF_EXPR_META,
|
||||
PgfExprMeta,
|
||||
&new_expr, pool);
|
||||
new_meta->id = meta->id;
|
||||
break;
|
||||
}
|
||||
case PGF_EXPR_FUN: {
|
||||
PgfExprFun* fun = inf.data;
|
||||
PgfExprFun* new_fun =
|
||||
gu_new_flex_variant(PGF_EXPR_FUN,
|
||||
PgfExprFun,
|
||||
fun, strlen(fun->fun)+1,
|
||||
&new_expr, pool);
|
||||
strcpy(new_fun->fun, fun->fun);
|
||||
break;
|
||||
}
|
||||
case PGF_EXPR_VAR: {
|
||||
PgfExprVar* var = inf.data;
|
||||
PgfExprVar* new_var =
|
||||
gu_new_variant(PGF_EXPR_VAR,
|
||||
PgfExprVar,
|
||||
&new_expr, pool);
|
||||
new_var->var = var->var;
|
||||
break;
|
||||
}
|
||||
case PGF_EXPR_TYPED: {
|
||||
PgfExprTyped* typed = inf.data;
|
||||
|
||||
PgfExprTyped *new_typed =
|
||||
gu_new_variant(PGF_EXPR_TYPED,
|
||||
PgfExprTyped,
|
||||
&new_expr, pool);
|
||||
new_typed->expr = pgf_clone_expr(typed->expr, pool);
|
||||
new_typed->type = pgf_clone_type(typed->type, pool);
|
||||
break;
|
||||
}
|
||||
case PGF_EXPR_IMPL_ARG: {
|
||||
PgfExprImplArg* impl = inf.data;
|
||||
PgfExprImplArg *new_impl =
|
||||
gu_new_variant(PGF_EXPR_IMPL_ARG,
|
||||
PgfExprImplArg,
|
||||
&new_expr, pool);
|
||||
new_impl->expr = pgf_clone_expr(impl->expr, pool);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
|
||||
return new_expr;
|
||||
}
|
||||
|
||||
PGF_API PgfType*
|
||||
pgf_clone_type(PgfType* type, GuPool* pool)
|
||||
{
|
||||
PgfType* new_type =
|
||||
gu_new_flex(pool, PgfType, exprs, type->n_exprs);
|
||||
|
||||
size_t n_hypos = gu_seq_length(type->hypos);
|
||||
new_type->hypos = gu_new_seq(PgfHypo, n_hypos, pool);
|
||||
for (size_t i = 0; i < n_hypos; i++) {
|
||||
PgfHypo* hypo = gu_seq_index(type->hypos, PgfHypo, i);
|
||||
PgfHypo* new_hypo = gu_seq_index(new_type->hypos, PgfHypo, i);
|
||||
|
||||
new_hypo->bind_type = hypo->bind_type;
|
||||
new_hypo->cid = gu_string_copy(hypo->cid, pool);
|
||||
new_hypo->type = pgf_clone_type(hypo->type, pool);
|
||||
}
|
||||
|
||||
new_type->cid = gu_string_copy(type->cid, pool);
|
||||
|
||||
new_type->n_exprs = type->n_exprs;
|
||||
for (size_t i = 0; i < new_type->n_exprs; i++) {
|
||||
new_type->exprs[i] = pgf_clone_expr(type->exprs[i], pool);
|
||||
}
|
||||
|
||||
return new_type;
|
||||
}
|
||||
|
||||
PGF_API prob_t
|
||||
pgf_compute_tree_probability(PgfPGF *gr, PgfExpr expr)
|
||||
{
|
||||
|
||||
@@ -171,15 +171,6 @@ pgf_expr_unmeta(PgfExpr expr);
|
||||
PGF_API_DECL PgfExpr
|
||||
pgf_read_expr(GuIn* in, GuPool* pool, GuPool* tmp_pool, GuExn* err);
|
||||
|
||||
PGF_API_DECL int
|
||||
pgf_read_expr_tuple(GuIn* in,
|
||||
size_t n_exprs, PgfExpr exprs[],
|
||||
GuPool* pool, GuExn* err);
|
||||
|
||||
PGF_API_DECL GuSeq*
|
||||
pgf_read_expr_matrix(GuIn* in, size_t n_exprs,
|
||||
GuPool* pool, GuExn* err);
|
||||
|
||||
PGF_API_DECL PgfType*
|
||||
pgf_read_type(GuIn* in, GuPool* pool, GuPool* tmp_pool, GuExn* err);
|
||||
|
||||
@@ -239,9 +230,14 @@ PGF_API_DECL void
|
||||
pgf_print_context(PgfHypos *hypos, PgfPrintContext* ctxt,
|
||||
GuOut *out, GuExn *err);
|
||||
|
||||
PGF_API_DECL void
|
||||
pgf_print_expr_tuple(size_t n_exprs, PgfExpr exprs[], PgfPrintContext* ctxt,
|
||||
GuOut* out, GuExn* err);
|
||||
PGF_API PgfLiteral
|
||||
pgf_clone_literal(PgfLiteral lit, GuPool* pool);
|
||||
|
||||
PGF_API PgfExpr
|
||||
pgf_clone_expr(PgfExpr expr, GuPool* pool);
|
||||
|
||||
PGF_API PgfType*
|
||||
pgf_clone_type(PgfType* type, GuPool* pool);
|
||||
|
||||
PGF_API_DECL prob_t
|
||||
pgf_compute_tree_probability(PgfPGF *gr, PgfExpr expr);
|
||||
|
||||
@@ -155,7 +155,7 @@ pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
|
||||
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
|
||||
{
|
||||
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
|
||||
|
||||
@@ -192,7 +192,7 @@ pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t li
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
|
||||
pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
|
||||
{
|
||||
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
|
||||
|
||||
|
||||
@@ -628,7 +628,7 @@ typedef struct {
|
||||
PgfLzrCachedTag tag;
|
||||
PgfCId cat;
|
||||
int fid;
|
||||
int lin_idx;
|
||||
GuString ann;
|
||||
PgfCId fun;
|
||||
} PgfLzrCached;
|
||||
|
||||
@@ -666,7 +666,7 @@ pgf_lzr_cache_flush(PgfLzrCache* cache, PgfSymbols* form)
|
||||
cache->lzr->funcs,
|
||||
event->cat,
|
||||
event->fid,
|
||||
event->lin_idx,
|
||||
event->ann,
|
||||
event->fun);
|
||||
}
|
||||
break;
|
||||
@@ -676,7 +676,7 @@ pgf_lzr_cache_flush(PgfLzrCache* cache, PgfSymbols* form)
|
||||
cache->lzr->funcs,
|
||||
event->cat,
|
||||
event->fid,
|
||||
event->lin_idx,
|
||||
event->ann,
|
||||
event->fun);
|
||||
}
|
||||
break;
|
||||
@@ -731,27 +731,27 @@ found:
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_lzr_cache_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun)
|
||||
pgf_lzr_cache_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
|
||||
{
|
||||
PgfLzrCache* cache = gu_container(funcs, PgfLzrCache, funcs);
|
||||
PgfLzrCached* event = gu_buf_extend(cache->events);
|
||||
event->tag = PGF_CACHED_BEGIN;
|
||||
event->cat = cat;
|
||||
event->fid = fid;
|
||||
event->lin_idx = lin_idx;
|
||||
event->fun = fun;
|
||||
event->tag = PGF_CACHED_BEGIN;
|
||||
event->cat = cat;
|
||||
event->fid = fid;
|
||||
event->ann = ann;
|
||||
event->fun = fun;
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_lzr_cache_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun)
|
||||
pgf_lzr_cache_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
|
||||
{
|
||||
PgfLzrCache* cache = gu_container(funcs, PgfLzrCache, funcs);
|
||||
PgfLzrCached* event = gu_buf_extend(cache->events);
|
||||
event->tag = PGF_CACHED_END;
|
||||
event->cat = cat;
|
||||
event->fid = fid;
|
||||
event->lin_idx = lin_idx;
|
||||
event->fun = fun;
|
||||
event->tag = PGF_CACHED_END;
|
||||
event->cat = cat;
|
||||
event->fid = fid;
|
||||
event->ann = ann;
|
||||
event->fun = fun;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -939,8 +939,8 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
|
||||
|
||||
if ((*lzr->funcs)->begin_phrase && fapp->ccat != NULL) {
|
||||
(*lzr->funcs)->begin_phrase(lzr->funcs,
|
||||
fun->absfun->type->cid,
|
||||
fapp->fid, lin_idx,
|
||||
fapp->ccat->cnccat->abscat->name,
|
||||
fapp->fid, fapp->ccat->cnccat->labels[lin_idx],
|
||||
fun->absfun->name);
|
||||
}
|
||||
|
||||
@@ -949,8 +949,8 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
|
||||
|
||||
if ((*lzr->funcs)->end_phrase && fapp->ccat != NULL) {
|
||||
(*lzr->funcs)->end_phrase(lzr->funcs,
|
||||
fun->absfun->type->cid,
|
||||
fapp->fid, lin_idx,
|
||||
fapp->ccat->cnccat->abscat->name,
|
||||
fapp->fid, fapp->ccat->cnccat->labels[lin_idx],
|
||||
fun->absfun->name);
|
||||
}
|
||||
break;
|
||||
@@ -979,7 +979,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
|
||||
|
||||
if ((*lzr->funcs)->begin_phrase) {
|
||||
(*lzr->funcs)->begin_phrase(lzr->funcs,
|
||||
cat, flit->fid, 0,
|
||||
cat, flit->fid, "s",
|
||||
"");
|
||||
}
|
||||
|
||||
@@ -1011,7 +1011,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
|
||||
|
||||
if ((*lzr->funcs)->end_phrase) {
|
||||
(*lzr->funcs)->end_phrase(lzr->funcs,
|
||||
cat, flit->fid, 0,
|
||||
cat, flit->fid, "s",
|
||||
"");
|
||||
}
|
||||
|
||||
|
||||
@@ -83,10 +83,10 @@ struct PgfLinFuncs
|
||||
void (*symbol_token)(PgfLinFuncs** self, PgfToken tok);
|
||||
|
||||
/// Begin phrase
|
||||
void (*begin_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId fun);
|
||||
void (*begin_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId fun);
|
||||
|
||||
/// End phrase
|
||||
void (*end_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId fun);
|
||||
void (*end_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId fun);
|
||||
|
||||
/// handling nonExist
|
||||
void (*symbol_ne)(PgfLinFuncs** self);
|
||||
|
||||
@@ -6,11 +6,12 @@
|
||||
|
||||
static PgfExprProb*
|
||||
pgf_match_string_lit(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
size_t lin_idx,
|
||||
GuString ann,
|
||||
GuString sentence, size_t* poffset,
|
||||
GuPool *out_pool)
|
||||
{
|
||||
gu_assert(lin_idx == 0);
|
||||
if (strcmp(ann,"s") != 0)
|
||||
return NULL;
|
||||
|
||||
const uint8_t* buf = (uint8_t*) (sentence + *poffset);
|
||||
const uint8_t* p = buf;
|
||||
@@ -51,7 +52,7 @@ pgf_predict_empty_next(GuEnum* self, void* to, GuPool* pool)
|
||||
|
||||
static GuEnum*
|
||||
pgf_predict_empty(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
size_t lin_idx,
|
||||
GuString ann,
|
||||
GuString prefix,
|
||||
GuPool *out_pool)
|
||||
{
|
||||
@@ -67,11 +68,12 @@ static PgfLiteralCallback pgf_string_literal_callback =
|
||||
|
||||
static PgfExprProb*
|
||||
pgf_match_int_lit(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
size_t lin_idx,
|
||||
GuString ann,
|
||||
GuString sentence, size_t* poffset,
|
||||
GuPool *out_pool)
|
||||
{
|
||||
gu_assert(lin_idx == 0);
|
||||
if (strcmp(ann,"s") != 0)
|
||||
return NULL;
|
||||
|
||||
const uint8_t* buf = (uint8_t*) (sentence + *poffset);
|
||||
const uint8_t* p = buf;
|
||||
@@ -121,11 +123,12 @@ static PgfLiteralCallback pgf_int_literal_callback =
|
||||
|
||||
static PgfExprProb*
|
||||
pgf_match_float_lit(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
size_t lin_idx,
|
||||
GuString ann,
|
||||
GuString sentence, size_t* poffset,
|
||||
GuPool *out_pool)
|
||||
{
|
||||
gu_assert(lin_idx == 0);
|
||||
if (strcmp(ann,"s") != 0)
|
||||
return NULL;
|
||||
|
||||
const uint8_t* buf = (uint8_t*) (sentence + *poffset);
|
||||
const uint8_t* p = buf;
|
||||
@@ -226,11 +229,11 @@ pgf_match_name_morpho_callback(PgfMorphoCallback* self_,
|
||||
|
||||
static PgfExprProb*
|
||||
pgf_match_name_lit(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
size_t lin_idx,
|
||||
GuString ann,
|
||||
GuString sentence, size_t* poffset,
|
||||
GuPool *out_pool)
|
||||
{
|
||||
if (lin_idx != 0)
|
||||
if (strcmp(ann,"s") != 0)
|
||||
return NULL;
|
||||
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
@@ -349,7 +352,7 @@ pgf_match_unknown_morpho_callback(PgfMorphoCallback* self_,
|
||||
|
||||
static PgfExprProb*
|
||||
pgf_match_unknown_lit(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
size_t lin_idx,
|
||||
GuString ann,
|
||||
GuString sentence, size_t* poffset,
|
||||
GuPool *out_pool)
|
||||
{
|
||||
|
||||
@@ -869,7 +869,7 @@ pgf_lookup_symbol_token(PgfLinFuncs** self, PgfToken token)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_lookup_begin_phrase(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId funname)
|
||||
pgf_lookup_begin_phrase(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId funname)
|
||||
{
|
||||
PgfLookupState* st = gu_container(self, PgfLookupState, funcs);
|
||||
|
||||
@@ -883,7 +883,7 @@ pgf_lookup_begin_phrase(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex,
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_lookup_end_phrase(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId fun)
|
||||
pgf_lookup_end_phrase(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId fun)
|
||||
{
|
||||
PgfLookupState* st = gu_container(self, PgfLookupState, funcs);
|
||||
st->curr_absfun = NULL;
|
||||
|
||||
@@ -61,6 +61,14 @@ typedef struct {
|
||||
|
||||
typedef enum { BIND_NONE, BIND_HARD, BIND_SOFT } BIND_TYPE;
|
||||
|
||||
typedef struct {
|
||||
PgfProductionIdx* idx;
|
||||
size_t offset;
|
||||
size_t sym_idx;
|
||||
} PgfLexiconIdxEntry;
|
||||
|
||||
typedef GuBuf PgfLexiconIdx;
|
||||
|
||||
struct PgfParseState {
|
||||
PgfParseState* next;
|
||||
|
||||
@@ -74,6 +82,8 @@ struct PgfParseState {
|
||||
size_t end_offset;
|
||||
|
||||
prob_t viterbi_prob;
|
||||
|
||||
PgfLexiconIdx* lexicon_idx;
|
||||
};
|
||||
|
||||
typedef struct PgfAnswers {
|
||||
@@ -113,43 +123,10 @@ struct PgfItem {
|
||||
prob_t inside_prob;
|
||||
};
|
||||
|
||||
static PgfSymbol
|
||||
pgf_prev_extern_sym(PgfSymbol sym)
|
||||
{
|
||||
GuVariantInfo i = gu_variant_open(sym);
|
||||
switch (i.tag) {
|
||||
case PGF_SYMBOL_CAT:
|
||||
return *((PgfSymbol*) (((PgfSymbolCat*) i.data)+1));
|
||||
case PGF_SYMBOL_KP:
|
||||
return *((PgfSymbol*) (((PgfSymbolKP*) i.data)+1));
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* sks = (PgfSymbolKS*) i.data;
|
||||
size_t tok_len = strlen(sks->token);
|
||||
return *((PgfSymbol*) (((uint8_t*) sks)+sizeof(PgfSymbolKS)+tok_len+1));
|
||||
}
|
||||
case PGF_SYMBOL_LIT:
|
||||
return *((PgfSymbol*) (((PgfSymbolLit*) i.data)+1));
|
||||
case PGF_SYMBOL_VAR:
|
||||
return *((PgfSymbol*) (((PgfSymbolVar*) i.data)+1));
|
||||
case PGF_SYMBOL_BIND:
|
||||
case PGF_SYMBOL_SOFT_BIND:
|
||||
case PGF_SYMBOL_SOFT_SPACE:
|
||||
return *((PgfSymbol*) (((PgfSymbolBIND*) i.data)+1));
|
||||
case PGF_SYMBOL_CAPIT:
|
||||
case PGF_SYMBOL_ALL_CAPIT:
|
||||
return *((PgfSymbol*) (((PgfSymbolCAPIT*) i.data)+1));
|
||||
case PGF_SYMBOL_NE:
|
||||
return *((PgfSymbol*) (((PgfSymbolNE*) i.data)+1));
|
||||
default:
|
||||
gu_impossible();
|
||||
return gu_null_variant;
|
||||
}
|
||||
}
|
||||
|
||||
static PgfSymbol
|
||||
static PgfSymbols*
|
||||
pgf_collect_extern_tok(PgfParsing* ps, size_t start_offset, size_t end_offset)
|
||||
{
|
||||
PgfSymbol sym = gu_null_variant;
|
||||
GuBuf* syms = gu_new_buf(PgfSymbol, ps->pool);
|
||||
|
||||
const uint8_t* start = (uint8_t*) ps->sentence+start_offset;
|
||||
const uint8_t* end = (uint8_t*) ps->sentence+end_offset;
|
||||
@@ -163,16 +140,15 @@ pgf_collect_extern_tok(PgfParsing* ps, size_t start_offset, size_t end_offset)
|
||||
ucs = gu_utf8_decode(&p);
|
||||
}
|
||||
|
||||
PgfSymbol new_sym;
|
||||
PgfSymbol sym;
|
||||
PgfSymbolKS* sks = (PgfSymbolKS*)
|
||||
gu_alloc_variant(PGF_SYMBOL_KS,
|
||||
sizeof(PgfSymbol)+sizeof(PgfSymbolKS)+len+1,
|
||||
gu_alignof(PgfSymbolKS),
|
||||
&new_sym, ps->pool);
|
||||
sizeof(PgfSymbolKS)+len+1,
|
||||
gu_alignof(PgfSymbolKS),
|
||||
&sym, ps->pool);
|
||||
memcpy((char*) sks->token, start, len);
|
||||
((char*) sks->token)[len] = 0;
|
||||
*((PgfSymbol*) (((uint8_t*) sks)+sizeof(PgfSymbolKS)+len+1)) = sym;
|
||||
sym = new_sym;
|
||||
gu_buf_push(syms, PgfSymbol, sym);
|
||||
|
||||
start = p;
|
||||
while (gu_ucs_is_space(ucs)) {
|
||||
@@ -181,68 +157,16 @@ pgf_collect_extern_tok(PgfParsing* ps, size_t start_offset, size_t end_offset)
|
||||
}
|
||||
}
|
||||
|
||||
return sym;
|
||||
}
|
||||
|
||||
static size_t
|
||||
pgf_item_symbols_length(PgfItem* item)
|
||||
{
|
||||
GuVariantInfo i = gu_variant_open(item->prod);
|
||||
switch (i.tag) {
|
||||
case PGF_PRODUCTION_APPLY: {
|
||||
PgfProductionApply* papp = i.data;
|
||||
return gu_seq_length(papp->fun->lins[item->conts->lin_idx]->syms);
|
||||
}
|
||||
case PGF_PRODUCTION_COERCE: {
|
||||
return 1;
|
||||
}
|
||||
case PGF_PRODUCTION_EXTERN: {
|
||||
PgfProductionExtern* pext = i.data;
|
||||
PgfSymbols* syms;
|
||||
|
||||
if (pext->lins != NULL &&
|
||||
(syms = gu_seq_get(pext->lins,PgfSymbols*,item->conts->lin_idx)) != NULL) {
|
||||
return gu_seq_length(syms);
|
||||
} else {
|
||||
int seq_len = 0;
|
||||
PgfSymbol sym = item->curr_sym;
|
||||
while (!gu_variant_is_null(sym)) {
|
||||
seq_len++;
|
||||
sym = pgf_prev_extern_sym(sym);
|
||||
}
|
||||
|
||||
return seq_len;
|
||||
}
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static PgfSymbols*
|
||||
pgf_extern_syms_get(PgfItem* item, GuPool* pool)
|
||||
{
|
||||
int syms_len = pgf_item_symbols_length(item);
|
||||
|
||||
PgfSymbols* syms =
|
||||
gu_new_seq(PgfSymbol, syms_len, pool);
|
||||
PgfSymbol sym = item->curr_sym;
|
||||
while (!gu_variant_is_null(sym)) {
|
||||
gu_seq_set(syms, PgfSymbol, --syms_len, sym);
|
||||
sym = pgf_prev_extern_sym(sym);
|
||||
}
|
||||
|
||||
return syms;
|
||||
return gu_buf_data_seq(syms);
|
||||
}
|
||||
|
||||
#ifdef PGF_PARSER_DEBUG
|
||||
PGF_INTERNAL void
|
||||
pgf_print_fid(int fid, GuOut* out, GuExn* err);
|
||||
|
||||
PGF_INTERNAL_DECL void
|
||||
pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err);
|
||||
|
||||
#ifdef PGF_PARSER_DEBUG
|
||||
static void
|
||||
pgf_item_symbols(PgfItem* item,
|
||||
size_t* lin_idx, PgfSymbols** syms,
|
||||
@@ -267,11 +191,7 @@ pgf_item_symbols(PgfItem* item,
|
||||
}
|
||||
case PGF_PRODUCTION_EXTERN: {
|
||||
PgfProductionExtern* pext = i.data;
|
||||
|
||||
if (pext->lins == NULL ||
|
||||
(*syms = gu_seq_get(pext->lins, PgfSymbols*, item->conts->lin_idx)) == NULL) {
|
||||
*syms = pgf_extern_syms_get(item, pool);
|
||||
}
|
||||
*syms = pext->lins[item->conts->lin_idx];
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -603,16 +523,11 @@ pgf_item_set_curr_symbol(PgfItem* item, GuPool* pool)
|
||||
case PGF_PRODUCTION_EXTERN: {
|
||||
PgfProductionExtern* pext = i.data;
|
||||
|
||||
PgfSymbols* syms;
|
||||
if (pext->lins != NULL &&
|
||||
(syms = gu_seq_get(pext->lins,PgfSymbols*,item->conts->lin_idx)) != NULL) {
|
||||
if (item->sym_idx == gu_seq_length(syms)) {
|
||||
item->curr_sym = gu_null_variant;
|
||||
} else {
|
||||
item->curr_sym = gu_seq_get(syms, PgfSymbol, item->sym_idx);
|
||||
}
|
||||
} else {
|
||||
PgfSymbols* syms = pext->lins[item->conts->lin_idx];
|
||||
if (item->sym_idx == gu_seq_length(syms)) {
|
||||
item->curr_sym = gu_null_variant;
|
||||
} else {
|
||||
item->curr_sym = gu_seq_get(syms, PgfSymbol, item->sym_idx);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -781,16 +696,6 @@ pgf_result_production(PgfParsing* ps,
|
||||
static void
|
||||
pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep);
|
||||
|
||||
static void
|
||||
pgf_parsing_push_item(PgfParseState* state, PgfItem* item)
|
||||
{
|
||||
if (gu_buf_length(state->agenda) == 0) {
|
||||
state->viterbi_prob =
|
||||
item->inside_prob+item->conts->outside_prob;
|
||||
}
|
||||
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_parsing_push_production(PgfParsing* ps, PgfParseState* state,
|
||||
PgfItemConts* conts, PgfProduction prod)
|
||||
@@ -822,7 +727,7 @@ pgf_parsing_combine(PgfParsing* ps,
|
||||
}
|
||||
|
||||
pgf_item_advance(item, ps->pool);
|
||||
pgf_parsing_push_item(before, item);
|
||||
gu_buf_heap_push(before->agenda, pgf_item_prob_order, &item);
|
||||
}
|
||||
|
||||
static PgfProduction
|
||||
@@ -851,36 +756,7 @@ pgf_parsing_new_production(PgfItem* item, PgfExprProb *ep, GuPool *pool)
|
||||
break;
|
||||
}
|
||||
case PGF_PRODUCTION_EXTERN: {
|
||||
PgfProductionExtern* pext = i.data;
|
||||
|
||||
if (pext->lins == NULL ||
|
||||
gu_seq_get(pext->lins,PgfSymbols*,item->conts->lin_idx) == NULL) {
|
||||
PgfSymbols* syms =
|
||||
pgf_extern_syms_get(item, pool);
|
||||
|
||||
size_t n_lins = item->conts->ccat->cnccat->n_lins;
|
||||
|
||||
PgfProductionExtern* new_pext = (PgfProductionExtern*)
|
||||
gu_new_variant(PGF_PRODUCTION_EXTERN,
|
||||
PgfProductionExtern,
|
||||
&prod, pool);
|
||||
new_pext->ep = ep;
|
||||
new_pext->lins = gu_new_seq(PgfSymbols*, n_lins, pool);
|
||||
|
||||
if (pext->lins == NULL) {
|
||||
for (size_t i = 0; i < n_lins; i++) {
|
||||
gu_seq_set(new_pext->lins,PgfSymbols*,i,NULL);
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < n_lins; i++) {
|
||||
gu_seq_set(new_pext->lins,PgfSymbols*,i,
|
||||
gu_seq_get(pext->lins,PgfSymbols*,i));
|
||||
}
|
||||
}
|
||||
gu_seq_set(new_pext->lins,PgfSymbols*,item->conts->lin_idx,syms);
|
||||
} else {
|
||||
prod = item->prod;
|
||||
}
|
||||
prod = item->prod;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -1022,9 +898,65 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
|
||||
}
|
||||
}
|
||||
|
||||
PGF_INTERNAL_DECL int
|
||||
pgf_symbols_cmp(PgfCohortSpot* spot,
|
||||
PgfSymbols* syms, size_t* sym_idx,
|
||||
bool case_sensitive);
|
||||
|
||||
static void
|
||||
pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state,
|
||||
int i, int j, ptrdiff_t min, ptrdiff_t max)
|
||||
{
|
||||
// This is a variation of a binary search algorithm which
|
||||
// can retrieve all prefixes of a string with minimal
|
||||
// comparisons, i.e. there is no need to lookup every
|
||||
// prefix separately.
|
||||
|
||||
while (i <= j) {
|
||||
int k = (i+j) / 2;
|
||||
PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, k);
|
||||
|
||||
PgfCohortSpot start = {0, ps->sentence + state->end_offset};
|
||||
PgfCohortSpot current = start;
|
||||
size_t sym_idx = 0;
|
||||
int cmp = pgf_symbols_cmp(¤t, seq->syms, &sym_idx, ps->case_sensitive);
|
||||
if (cmp < 0) {
|
||||
j = k-1;
|
||||
} else if (cmp > 0) {
|
||||
ptrdiff_t len = current.ptr - start.ptr;
|
||||
|
||||
if (min <= len)
|
||||
pgf_parsing_lookahead(ps, state, i, k-1, min, len);
|
||||
|
||||
if (len+1 <= max)
|
||||
pgf_parsing_lookahead(ps, state, k+1, j, len+1, max);
|
||||
|
||||
break;
|
||||
} else {
|
||||
ptrdiff_t len = current.ptr - start.ptr;
|
||||
|
||||
if (min <= len-1)
|
||||
pgf_parsing_lookahead(ps, state, i, k-1, min, len-1);
|
||||
|
||||
if (seq->idx != NULL) {
|
||||
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
|
||||
entry->idx = seq->idx;
|
||||
entry->offset = (size_t) (current.ptr - ps->sentence);
|
||||
entry->sym_idx = sym_idx;
|
||||
}
|
||||
|
||||
if (len+1 <= max)
|
||||
pgf_parsing_lookahead(ps, state, k+1, j, len+1, max);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static PgfParseState*
|
||||
pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
|
||||
BIND_TYPE bind_type)
|
||||
BIND_TYPE bind_type,
|
||||
prob_t viterbi_prob)
|
||||
{
|
||||
PgfParseState** pstate;
|
||||
if (ps->before == NULL && start_offset == 0)
|
||||
@@ -1077,172 +1009,36 @@ pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
|
||||
(start_offset == end_offset);
|
||||
state->start_offset = start_offset;
|
||||
state->end_offset = end_offset;
|
||||
state->viterbi_prob = 0;
|
||||
state->viterbi_prob = viterbi_prob;
|
||||
state->lexicon_idx =
|
||||
gu_new_buf(PgfLexiconIdxEntry, ps->pool);
|
||||
|
||||
if (ps->before == NULL && start_offset == 0)
|
||||
state->needs_bind = false;
|
||||
|
||||
if (gu_seq_length(ps->concr->sequences) > 0) {
|
||||
// Add epsilon lexical rules to the bottom up index
|
||||
PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, 0);
|
||||
if (gu_seq_length(seq->syms) == 0 && seq->idx != NULL) {
|
||||
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
|
||||
entry->idx = seq->idx;
|
||||
entry->offset = state->start_offset;
|
||||
entry->sym_idx= 0;
|
||||
}
|
||||
|
||||
// Add non-epsilon lexical rules to the bottom up index
|
||||
if (!state->needs_bind) {
|
||||
pgf_parsing_lookahead(ps, state,
|
||||
0, gu_seq_length(ps->concr->sequences)-1,
|
||||
1, strlen(ps->sentence)-state->end_offset);
|
||||
}
|
||||
}
|
||||
|
||||
*pstate = state;
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
PGF_INTERNAL_DECL int
|
||||
pgf_symbols_cmp(PgfCohortSpot* spot,
|
||||
PgfSymbols* syms, size_t* sym_idx,
|
||||
bool case_sensitive);
|
||||
|
||||
static bool
|
||||
pgf_parsing_scan_helper(PgfParsing *ps, PgfParseState* state,
|
||||
int i, int j, ptrdiff_t min, ptrdiff_t max)
|
||||
{
|
||||
// This is a variation of a binary search algorithm which
|
||||
// can retrieve all prefixes of a string with minimal
|
||||
// comparisons, i.e. there is no need to lookup every
|
||||
// prefix separately.
|
||||
|
||||
bool found = false;
|
||||
while (i <= j) {
|
||||
int k = (i+j) / 2;
|
||||
PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, k);
|
||||
|
||||
PgfCohortSpot start = {0, ps->sentence+state->end_offset};
|
||||
PgfCohortSpot current = start;
|
||||
|
||||
size_t sym_idx = 0;
|
||||
int cmp = pgf_symbols_cmp(¤t, seq->syms, &sym_idx, ps->case_sensitive);
|
||||
if (cmp < 0) {
|
||||
j = k-1;
|
||||
} else if (cmp > 0) {
|
||||
ptrdiff_t len = current.ptr - start.ptr;
|
||||
|
||||
if (min <= len)
|
||||
if (pgf_parsing_scan_helper(ps, state, i, k-1, min, len))
|
||||
found = true;
|
||||
|
||||
if (len+1 <= max)
|
||||
if (pgf_parsing_scan_helper(ps, state, k+1, j, len+1, max))
|
||||
found = true;
|
||||
|
||||
break;
|
||||
} else {
|
||||
ptrdiff_t len = current.ptr - start.ptr;
|
||||
|
||||
if (min <= len)
|
||||
if (pgf_parsing_scan_helper(ps, state, i, k-1, min, len))
|
||||
found = true;
|
||||
|
||||
// Here we do bottom-up prediction for all lexical categories.
|
||||
// The epsilon productions will be predicted in top-down
|
||||
// fashion while parsing.
|
||||
if (seq->idx != NULL && len > 0) {
|
||||
found = true;
|
||||
|
||||
// A new state will mark the end of the current match
|
||||
PgfParseState* new_state =
|
||||
pgf_new_parse_state(ps, (size_t) (current.ptr - ps->sentence), BIND_NONE);
|
||||
|
||||
// Bottom-up prediction for lexical rules
|
||||
size_t n_entries = gu_buf_length(seq->idx);
|
||||
for (size_t i = 0; i < n_entries; i++) {
|
||||
PgfProductionIdxEntry* entry =
|
||||
gu_buf_index(seq->idx, PgfProductionIdxEntry, i);
|
||||
|
||||
PgfItemConts* conts =
|
||||
pgf_parsing_get_conts(state,
|
||||
entry->ccat, entry->lin_idx,
|
||||
ps->pool);
|
||||
|
||||
// Create the new category if it doesn't exist yet
|
||||
PgfCCat* tmp_ccat = pgf_parsing_get_completed(new_state, conts);
|
||||
PgfCCat* ccat = tmp_ccat;
|
||||
if (ccat == NULL) {
|
||||
ccat = pgf_parsing_create_completed(ps, new_state, conts, INFINITY);
|
||||
}
|
||||
|
||||
// Add the production
|
||||
if (ccat->prods == NULL || ccat->n_synprods >= gu_seq_length(ccat->prods)) {
|
||||
ccat->prods = gu_realloc_seq(ccat->prods, PgfProduction, ccat->n_synprods+1);
|
||||
}
|
||||
GuVariantInfo i;
|
||||
i.tag = PGF_PRODUCTION_APPLY;
|
||||
i.data = entry->papp;
|
||||
PgfProduction prod = gu_variant_close(i);
|
||||
gu_seq_set(ccat->prods, PgfProduction, ccat->n_synprods++, prod);
|
||||
|
||||
// Update the category's probability to be minimum
|
||||
if (ccat->viterbi_prob > entry->papp->fun->ep->prob)
|
||||
ccat->viterbi_prob = entry->papp->fun->ep->prob;
|
||||
|
||||
#ifdef PGF_PARSER_DEBUG
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
GuOut* out = gu_file_out(stderr, tmp_pool);
|
||||
GuExn* err = gu_exn(tmp_pool);
|
||||
if (tmp_ccat == NULL) {
|
||||
gu_printf(out, err, "[");
|
||||
pgf_print_range(state, new_state, out, err);
|
||||
gu_puts("; ", out, err);
|
||||
pgf_print_fid(conts->ccat->fid, out, err);
|
||||
gu_printf(out, err, "; %d; ",
|
||||
conts->lin_idx);
|
||||
pgf_print_fid(ccat->fid, out, err);
|
||||
gu_puts("] ", out, err);
|
||||
pgf_print_fid(ccat->fid, out, err);
|
||||
gu_printf(out, err, ".chunk_count=%d\n", ccat->chunk_count);
|
||||
}
|
||||
pgf_print_production(ccat->fid, prod, out, err);
|
||||
gu_pool_free(tmp_pool);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
if (len <= max)
|
||||
if (pgf_parsing_scan_helper(ps, state, k+1, j, len, max))
|
||||
found = true;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_parsing_scan(PgfParsing *ps)
|
||||
{
|
||||
size_t len = strlen(ps->sentence);
|
||||
|
||||
PgfParseState* state =
|
||||
pgf_new_parse_state(ps, 0, BIND_SOFT);
|
||||
|
||||
while (state != NULL && state->end_offset < len) {
|
||||
if (state->needs_bind) {
|
||||
// We have encountered two tokens without space in between.
|
||||
// Those can be accepted only if there is a BIND token
|
||||
// in between. We encode this by having one more state
|
||||
// at the same offset. A transition between these two
|
||||
// states is possible only with the BIND token.
|
||||
state =
|
||||
pgf_new_parse_state(ps, state->end_offset, BIND_HARD);
|
||||
}
|
||||
|
||||
if (!pgf_parsing_scan_helper
|
||||
(ps, state,
|
||||
0, gu_seq_length(ps->concr->sequences)-1,
|
||||
1, len-state->end_offset)) {
|
||||
// skip one character and try again
|
||||
GuString s = ps->sentence+state->end_offset;
|
||||
gu_utf8_decode((const uint8_t**) &s);
|
||||
pgf_new_parse_state(ps, s-ps->sentence, BIND_NONE);
|
||||
}
|
||||
|
||||
if (state == ps->before)
|
||||
state = ps->after;
|
||||
else
|
||||
state = state->next;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
|
||||
{
|
||||
@@ -1262,8 +1058,9 @@ pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
|
||||
if (!ps->before->needs_bind && cmp_string(¤t, tok, ps->case_sensitive) == 0) {
|
||||
PgfParseState* state =
|
||||
pgf_new_parse_state(ps, (current.ptr - ps->sentence),
|
||||
BIND_NONE);
|
||||
pgf_parsing_push_item(state, item);
|
||||
BIND_NONE,
|
||||
item->inside_prob+item->conts->outside_prob);
|
||||
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
|
||||
} else {
|
||||
pgf_item_free(ps, item);
|
||||
}
|
||||
@@ -1273,17 +1070,18 @@ pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
|
||||
static void
|
||||
pgf_parsing_predict_lexeme(PgfParsing* ps, PgfItemConts* conts,
|
||||
PgfProductionIdxEntry* entry,
|
||||
size_t offset)
|
||||
size_t offset, size_t sym_idx)
|
||||
{
|
||||
GuVariantInfo i = { PGF_PRODUCTION_APPLY, entry->papp };
|
||||
PgfProduction prod = gu_variant_close(i);
|
||||
PgfItem* item =
|
||||
pgf_new_item(ps, conts, prod);
|
||||
PgfSymbols* syms = entry->papp->fun->lins[conts->lin_idx]->syms;
|
||||
item->sym_idx = gu_seq_length(syms);
|
||||
item->sym_idx = sym_idx;
|
||||
pgf_item_set_curr_symbol(item, ps->pool);
|
||||
prob_t prob = item->inside_prob+item->conts->outside_prob;
|
||||
PgfParseState* state =
|
||||
pgf_new_parse_state(ps, offset, BIND_NONE);
|
||||
pgf_new_parse_state(ps, offset, BIND_NONE, prob);
|
||||
if (state->viterbi_prob > prob) {
|
||||
state->viterbi_prob = prob;
|
||||
}
|
||||
@@ -1337,36 +1135,34 @@ pgf_parsing_td_predict(PgfParsing* ps,
|
||||
pgf_parsing_push_production(ps, ps->before, conts, prod);
|
||||
}
|
||||
|
||||
// Top-down prediction for epsilon lexical rules if any
|
||||
PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, 0);
|
||||
if (gu_seq_length(seq->syms) == 0 && seq->idx != NULL) {
|
||||
// Bottom-up prediction for lexical and epsilon rules
|
||||
size_t n_idcs = gu_buf_length(ps->before->lexicon_idx);
|
||||
for (size_t i = 0; i < n_idcs; i++) {
|
||||
PgfLexiconIdxEntry* lentry =
|
||||
gu_buf_index(ps->before->lexicon_idx, PgfLexiconIdxEntry, i);
|
||||
|
||||
PgfProductionIdxEntry key;
|
||||
key.ccat = ccat;
|
||||
key.lin_idx = lin_idx;
|
||||
key.papp = NULL;
|
||||
PgfProductionIdxEntry* value =
|
||||
gu_seq_binsearch(gu_buf_data_seq(seq->idx),
|
||||
gu_seq_binsearch(gu_buf_data_seq(lentry->idx),
|
||||
pgf_production_idx_entry_order,
|
||||
PgfProductionIdxEntry, &key);
|
||||
|
||||
if (value != NULL) {
|
||||
GuVariantInfo i = { PGF_PRODUCTION_APPLY, value->papp };
|
||||
PgfProduction prod = gu_variant_close(i);
|
||||
pgf_parsing_push_production(ps, ps->before, conts, prod);
|
||||
pgf_parsing_predict_lexeme(ps, conts, value, lentry->offset, lentry->sym_idx);
|
||||
|
||||
PgfProductionIdxEntry* start =
|
||||
gu_buf_data(seq->idx);
|
||||
gu_buf_data(lentry->idx);
|
||||
PgfProductionIdxEntry* end =
|
||||
start + gu_buf_length(seq->idx)-1;
|
||||
start + gu_buf_length(lentry->idx)-1;
|
||||
|
||||
PgfProductionIdxEntry* left = value-1;
|
||||
while (left >= start &&
|
||||
value->ccat->fid == left->ccat->fid &&
|
||||
value->lin_idx == left->lin_idx) {
|
||||
GuVariantInfo i = { PGF_PRODUCTION_APPLY, left->papp };
|
||||
PgfProduction prod = gu_variant_close(i);
|
||||
pgf_parsing_push_production(ps, ps->before, conts, prod);
|
||||
pgf_parsing_predict_lexeme(ps, conts, left, lentry->offset, lentry->sym_idx);
|
||||
left--;
|
||||
}
|
||||
|
||||
@@ -1374,9 +1170,7 @@ pgf_parsing_td_predict(PgfParsing* ps,
|
||||
while (right <= end &&
|
||||
value->ccat->fid == right->ccat->fid &&
|
||||
value->lin_idx == right->lin_idx) {
|
||||
GuVariantInfo i = { PGF_PRODUCTION_APPLY, right->papp };
|
||||
PgfProduction prod = gu_variant_close(i);
|
||||
pgf_parsing_push_production(ps, ps->before, conts, prod);
|
||||
pgf_parsing_predict_lexeme(ps, conts, right, lentry->offset, lentry->sym_idx);
|
||||
right++;
|
||||
}
|
||||
}
|
||||
@@ -1415,7 +1209,7 @@ pgf_parsing_pre(PgfParsing* ps, PgfItem* item, PgfSymbols* syms)
|
||||
} else {
|
||||
item->alt = 0;
|
||||
pgf_item_advance(item, ps->pool);
|
||||
pgf_parsing_push_item(ps->before, item);
|
||||
gu_buf_heap_push(ps->before->agenda, pgf_item_prob_order, &item);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1514,28 +1308,40 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
|
||||
|
||||
if (callback != NULL) {
|
||||
ep = callback->match(callback, ps->concr,
|
||||
slit->r,
|
||||
parg->ccat->cnccat->labels[slit->r],
|
||||
ps->sentence, &offset,
|
||||
ps->out_pool);
|
||||
}
|
||||
}
|
||||
|
||||
if (ep != NULL) {
|
||||
PgfSymbols* syms =
|
||||
pgf_collect_extern_tok(ps, start, offset);
|
||||
|
||||
size_t n_lins = conts->ccat->cnccat->n_lins;
|
||||
|
||||
PgfProduction prod;
|
||||
PgfProductionExtern* pext =
|
||||
gu_new_variant(PGF_PRODUCTION_EXTERN,
|
||||
PgfProductionExtern,
|
||||
&prod, ps->pool);
|
||||
pext->ep = ep;
|
||||
pext->lins = NULL;
|
||||
gu_new_flex_variant(PGF_PRODUCTION_EXTERN,
|
||||
PgfProductionExtern,
|
||||
lins, n_lins,
|
||||
&prod, ps->pool);
|
||||
pext->ep = ep;
|
||||
pext->n_lins = n_lins;
|
||||
|
||||
for (size_t i = 0; i < n_lins; i++) {
|
||||
pext->lins[i] = NULL;
|
||||
}
|
||||
pext->lins[conts->lin_idx] = syms;
|
||||
|
||||
PgfItem* item =
|
||||
pgf_new_item(ps, conts, prod);
|
||||
item->curr_sym = pgf_collect_extern_tok(ps,start,offset);
|
||||
item->sym_idx = pgf_item_symbols_length(item);
|
||||
item->curr_sym = gu_null_variant;
|
||||
item->sym_idx = gu_seq_length(syms);
|
||||
PgfParseState* state =
|
||||
pgf_new_parse_state(ps, offset, BIND_NONE);
|
||||
pgf_parsing_push_item(state, item);
|
||||
pgf_new_parse_state(ps, offset, BIND_NONE,
|
||||
item->inside_prob+item->conts->outside_prob);
|
||||
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
|
||||
match = true;
|
||||
}
|
||||
}
|
||||
@@ -1578,10 +1384,11 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
|
||||
if (ps->before->start_offset == ps->before->end_offset &&
|
||||
ps->before->needs_bind) {
|
||||
PgfParseState* state =
|
||||
pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD);
|
||||
pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD,
|
||||
item->inside_prob+item->conts->outside_prob);
|
||||
if (state != NULL) {
|
||||
pgf_item_advance(item, ps->pool);
|
||||
pgf_parsing_push_item(state, item);
|
||||
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
|
||||
} else {
|
||||
pgf_item_free(ps, item);
|
||||
}
|
||||
@@ -1595,10 +1402,11 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
|
||||
if (ps->before->start_offset == ps->before->end_offset) {
|
||||
if (ps->before->needs_bind) {
|
||||
PgfParseState* state =
|
||||
pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD);
|
||||
pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD,
|
||||
item->inside_prob+item->conts->outside_prob);
|
||||
if (state != NULL) {
|
||||
pgf_item_advance(item, ps->pool);
|
||||
pgf_parsing_push_item(state, item);
|
||||
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
|
||||
} else {
|
||||
pgf_item_free(ps, item);
|
||||
}
|
||||
@@ -1607,12 +1415,13 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
|
||||
}
|
||||
} else {
|
||||
pgf_item_advance(item, ps->pool);
|
||||
pgf_parsing_push_item(ps->before, item);
|
||||
gu_buf_heap_push(ps->before->agenda, pgf_item_prob_order, &item);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAPIT:
|
||||
case PGF_SYMBOL_ALL_CAPIT: {
|
||||
printf("PGF_SYMBOL_CAPIT\n");
|
||||
pgf_item_advance(item, ps->pool);
|
||||
pgf_parsing_symbol(ps, item, item->curr_sym);
|
||||
break;
|
||||
@@ -1857,7 +1666,8 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
|
||||
ps->heuristic_factor = heuristic_factor;
|
||||
}
|
||||
|
||||
pgf_parsing_scan(ps);
|
||||
PgfParseState* state =
|
||||
pgf_new_parse_state(ps, 0, BIND_SOFT, 0);
|
||||
|
||||
int fidString = -1;
|
||||
PgfCCat* start_ccat = gu_new(PgfCCat, ps->pool);
|
||||
@@ -1879,7 +1689,7 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
|
||||
#endif
|
||||
|
||||
PgfItemConts* conts =
|
||||
pgf_parsing_get_conts(ps->before, start_ccat, 0, ps->pool);
|
||||
pgf_parsing_get_conts(state, start_ccat, 0, ps->pool);
|
||||
gu_buf_push(conts->items, PgfItem*, NULL);
|
||||
|
||||
size_t n_ccats = gu_seq_length(cnccat->cats);
|
||||
@@ -2218,6 +2028,8 @@ pgf_process_generated_cat(PgfParsing* ps,
|
||||
children[i] = pcoerce->coerce;
|
||||
break;
|
||||
}
|
||||
case PGF_PRODUCTION_EXTERN:
|
||||
just_coercions = false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2363,6 +2175,104 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence,
|
||||
return &ps->en;
|
||||
}
|
||||
|
||||
PGF_API PgfParsing*
|
||||
pgf_parse_to_chart(PgfConcr* concr, PgfType* typ, GuString sentence,
|
||||
double heuristics,
|
||||
PgfCallbacksMap* callbacks,
|
||||
size_t n_roots,
|
||||
GuExn* err,
|
||||
GuPool* pool, GuPool* out_pool)
|
||||
{
|
||||
if (concr->sequences == NULL ||
|
||||
concr->cnccats == NULL) {
|
||||
GuExnData* err_data = gu_raise(err, PgfExn);
|
||||
if (err_data) {
|
||||
err_data->data = "The concrete syntax is not loaded";
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Begin parsing a sentence with the specified category
|
||||
PgfParsing* ps =
|
||||
pgf_parsing_init(concr, typ->cid, sentence, heuristics, callbacks, NULL, err, pool, out_pool);
|
||||
if (ps == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef PGF_COUNTS_DEBUG
|
||||
pgf_parsing_print_counts(ps);
|
||||
#endif
|
||||
|
||||
while (gu_buf_length(ps->expr_queue) < n_roots) {
|
||||
if (!pgf_parsing_proceed(ps)) {
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef PGF_COUNTS_DEBUG
|
||||
pgf_parsing_print_counts(ps);
|
||||
#endif
|
||||
}
|
||||
|
||||
return ps;
|
||||
}
|
||||
|
||||
PGF_API PgfCCats*
|
||||
pgf_get_parse_roots(PgfParsing* ps, GuPool* pool)
|
||||
{
|
||||
size_t n_cats = 0;
|
||||
size_t n_states = gu_buf_length(ps->expr_queue);
|
||||
GuSeq* roots = gu_new_seq(PgfCCat*, n_states, pool);
|
||||
for (size_t i = 0; i < n_states; i++) {
|
||||
PgfCCat* ccat = gu_buf_get(ps->expr_queue, PgfExprState*, i)->answers->ccat;
|
||||
|
||||
bool found = false;
|
||||
for (size_t j = 0; j < n_cats; j++) {
|
||||
if (gu_seq_get(roots, PgfCCat*, j) == ccat) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
gu_seq_set(roots, PgfCCat*, n_cats, ccat);
|
||||
n_cats++;
|
||||
}
|
||||
}
|
||||
roots->len = n_cats;
|
||||
return roots;
|
||||
}
|
||||
|
||||
PGF_API GuSeq*
|
||||
pgf_ccat_to_range(PgfParsing* ps, PgfCCat* ccat, GuPool* pool)
|
||||
{
|
||||
PgfParseState* state = ps->before;
|
||||
GuBuf* buf = gu_new_buf(PgfParseRange, pool);
|
||||
|
||||
while (ccat->conts != NULL) {
|
||||
size_t start = ccat->conts->state->end_offset;
|
||||
size_t end = start;
|
||||
while (state != NULL) {
|
||||
if (pgf_parsing_get_completed(state, ccat->conts) == ccat) {
|
||||
if (state->start_offset >= start)
|
||||
end = state->start_offset;
|
||||
break;
|
||||
}
|
||||
state = state->next;
|
||||
}
|
||||
|
||||
if (start != end) {
|
||||
PgfParseRange* range = gu_buf_extend(buf);
|
||||
range->start = start;
|
||||
range->end = end;
|
||||
range->field = ccat->cnccat->labels[ccat->conts->lin_idx];
|
||||
}
|
||||
|
||||
ccat = ccat->conts->ccat;
|
||||
}
|
||||
|
||||
return gu_buf_data_seq(buf);
|
||||
}
|
||||
|
||||
PGF_API PgfExprEnum*
|
||||
pgf_parse_with_oracle(PgfConcr* concr, PgfType* typ,
|
||||
GuString sentence,
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
typedef struct {
|
||||
int start, end;
|
||||
PgfCId cat;
|
||||
size_t lin_idx;
|
||||
GuString ann;
|
||||
} PgfPhrase;
|
||||
|
||||
typedef struct {
|
||||
@@ -46,14 +46,14 @@ pgf_metrics_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_metrics_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_index, PgfCId fun)
|
||||
pgf_metrics_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
|
||||
{
|
||||
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
||||
gu_buf_push(state->marks, int, state->pos);
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun)
|
||||
pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
|
||||
{
|
||||
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
||||
|
||||
@@ -65,7 +65,7 @@ pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin
|
||||
phrase->start = start;
|
||||
phrase->end = end;
|
||||
phrase->cat = cat;
|
||||
phrase->lin_idx = lin_idx;
|
||||
phrase->ann = ann;
|
||||
gu_buf_push(state->phrases, PgfPhrase*, phrase);
|
||||
}
|
||||
}
|
||||
@@ -85,7 +85,7 @@ pgf_metrics_symbol_bind(PgfLinFuncs** funcs)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun)
|
||||
pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
|
||||
{
|
||||
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
||||
|
||||
@@ -100,7 +100,7 @@ pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin
|
||||
if (phrase->start == start &&
|
||||
phrase->end == end &&
|
||||
strcmp(phrase->cat, cat) == 0 &&
|
||||
phrase->lin_idx == lin_idx) {
|
||||
strcmp(phrase->ann, ann) == 0) {
|
||||
state->matches++;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -220,6 +220,20 @@ pgf_category_prob(PgfPGF* pgf, PgfCId catname)
|
||||
return abscat->prob;
|
||||
}
|
||||
|
||||
PGF_API GuString*
|
||||
pgf_category_fields(PgfConcr* concr, PgfCId catname, size_t *n_lins)
|
||||
{
|
||||
PgfCncCat* cnccat =
|
||||
gu_map_get(concr->cnccats, catname, PgfCncCat*);
|
||||
if (!cnccat) {
|
||||
*n_lins = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*n_lins = cnccat->n_lins;
|
||||
return &cnccat->labels;
|
||||
}
|
||||
|
||||
PGF_API GuString
|
||||
pgf_language_code(PgfConcr* concr)
|
||||
{
|
||||
|
||||
@@ -90,6 +90,9 @@ pgf_category_context(PgfPGF *gr, PgfCId catname);
|
||||
PGF_API_DECL prob_t
|
||||
pgf_category_prob(PgfPGF* pgf, PgfCId catname);
|
||||
|
||||
PGF_API GuString*
|
||||
pgf_category_fields(PgfConcr* concr, PgfCId catname, size_t *n_lins);
|
||||
|
||||
PGF_API_DECL void
|
||||
pgf_iter_functions(PgfPGF* pgf, GuMapItor* itor, GuExn* err);
|
||||
|
||||
@@ -163,8 +166,8 @@ pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
|
||||
PgfMorphoCallback* callback, GuExn* err);
|
||||
|
||||
typedef struct {
|
||||
size_t pos;
|
||||
GuString ptr;
|
||||
size_t pos; // position in Unicode characters
|
||||
GuString ptr; // pointer into the string
|
||||
} PgfCohortSpot;
|
||||
|
||||
typedef struct {
|
||||
@@ -203,6 +206,12 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ,
|
||||
GuExn* err,
|
||||
GuPool* pool, GuPool* out_pool);
|
||||
|
||||
typedef struct {
|
||||
size_t start;
|
||||
size_t end;
|
||||
GuString field;
|
||||
} PgfParseRange;
|
||||
|
||||
typedef struct PgfOracleCallback PgfOracleCallback;
|
||||
|
||||
struct PgfOracleCallback {
|
||||
@@ -243,11 +252,11 @@ typedef struct PgfLiteralCallback PgfLiteralCallback;
|
||||
|
||||
struct PgfLiteralCallback {
|
||||
PgfExprProb* (*match)(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
size_t lin_idx,
|
||||
GuString ann,
|
||||
GuString sentence, size_t* poffset,
|
||||
GuPool *out_pool);
|
||||
GuEnum* (*predict)(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
size_t lin_idx,
|
||||
GuString ann,
|
||||
GuString prefix,
|
||||
GuPool *out_pool);
|
||||
};
|
||||
|
||||
@@ -114,7 +114,7 @@ pgf_morpho_iter(PgfProductionIdx* idx,
|
||||
|
||||
PgfCId lemma = entry->papp->fun->absfun->name;
|
||||
GuString analysis = entry->ccat->cnccat->labels[entry->lin_idx];
|
||||
|
||||
|
||||
prob_t prob = entry->ccat->cnccat->abscat->prob +
|
||||
entry->papp->fun->absfun->ep.prob;
|
||||
callback->callback(callback,
|
||||
@@ -234,12 +234,13 @@ typedef struct {
|
||||
GuEnum en;
|
||||
PgfConcr* concr;
|
||||
GuString sentence;
|
||||
GuString current;
|
||||
size_t len;
|
||||
PgfMorphoCallback* callback;
|
||||
GuExn* err;
|
||||
bool case_sensitive;
|
||||
GuBuf* spots;
|
||||
GuBuf* skip_spots;
|
||||
GuBuf* empty_buf;
|
||||
GuBuf* found;
|
||||
} PgfCohortsState;
|
||||
|
||||
@@ -255,6 +256,23 @@ cmp_cohort_spot(GuOrder* self, const void* a, const void* b)
|
||||
static GuOrder
|
||||
pgf_cohort_spot_order[1] = {{ cmp_cohort_spot }};
|
||||
|
||||
static void
|
||||
pgf_lookup_cohorts_report_skip(PgfCohortsState *state,
|
||||
PgfCohortSpot* spot)
|
||||
{
|
||||
size_t n_spots = gu_buf_length(state->skip_spots);
|
||||
for (size_t i = 0; i < n_spots; i++) {
|
||||
PgfCohortSpot* skip_spot =
|
||||
gu_buf_index(state->skip_spots, PgfCohortSpot, i);
|
||||
|
||||
PgfCohortRange* range = gu_buf_insert(state->found, 0);
|
||||
range->start = *skip_spot;
|
||||
range->end = *spot;
|
||||
range->buf = state->empty_buf;
|
||||
}
|
||||
gu_buf_flush(state->skip_spots);
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_lookup_cohorts_helper(PgfCohortsState *state, PgfCohortSpot* spot,
|
||||
int i, int j, ptrdiff_t min, ptrdiff_t max)
|
||||
@@ -291,18 +309,23 @@ pgf_lookup_cohorts_helper(PgfCohortsState *state, PgfCohortSpot* spot,
|
||||
pgf_lookup_cohorts_helper(state, spot, i, k-1, min, len);
|
||||
|
||||
if (seq->idx != NULL && gu_buf_length(seq->idx) > 0) {
|
||||
// Report unknown words
|
||||
pgf_lookup_cohorts_report_skip(state, spot);
|
||||
|
||||
// Report the actual hit
|
||||
PgfCohortRange* range = gu_buf_insert(state->found, 0);
|
||||
range->start = *spot;
|
||||
range->end = current;
|
||||
range->buf = seq->idx;
|
||||
}
|
||||
|
||||
while (*current.ptr != 0) {
|
||||
if (!skip_space(¤t.ptr, ¤t.pos))
|
||||
break;
|
||||
}
|
||||
// Schedule the next search spot
|
||||
while (*current.ptr != 0) {
|
||||
if (!skip_space(¤t.ptr, ¤t.pos))
|
||||
break;
|
||||
}
|
||||
|
||||
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, ¤t);
|
||||
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, ¤t);
|
||||
}
|
||||
|
||||
if (len <= max)
|
||||
pgf_lookup_cohorts_helper(state, spot, k+1, j, len, max);
|
||||
@@ -318,29 +341,67 @@ pgf_lookup_cohorts_enum_next(GuEnum* self, void* to, GuPool* pool)
|
||||
PgfCohortsState* state = gu_container(self, PgfCohortsState, en);
|
||||
|
||||
while (gu_buf_length(state->found) == 0 &&
|
||||
gu_buf_length(state->spots) > 0) {
|
||||
gu_buf_length(state->spots) > 0) {
|
||||
PgfCohortSpot spot;
|
||||
gu_buf_heap_pop(state->spots, pgf_cohort_spot_order, &spot);
|
||||
|
||||
if (spot.ptr == state->current)
|
||||
continue;
|
||||
GuString next_ptr = state->sentence+state->len;
|
||||
while (gu_buf_length(state->spots) > 0) {
|
||||
GuString ptr =
|
||||
gu_buf_index(state->spots, PgfCohortSpot, 0)->ptr;
|
||||
if (ptr > spot.ptr) {
|
||||
next_ptr = ptr;
|
||||
break;
|
||||
}
|
||||
gu_buf_heap_pop(state->spots, pgf_cohort_spot_order, &spot);
|
||||
}
|
||||
|
||||
if (*spot.ptr == 0)
|
||||
break;
|
||||
bool needs_report = true;
|
||||
while (next_ptr > spot.ptr) {
|
||||
pgf_lookup_cohorts_helper
|
||||
(state, &spot,
|
||||
0, gu_seq_length(state->concr->sequences)-1,
|
||||
1, (state->sentence+state->len)-spot.ptr);
|
||||
|
||||
pgf_lookup_cohorts_helper
|
||||
(state, &spot,
|
||||
0, gu_seq_length(state->concr->sequences)-1,
|
||||
1, (state->sentence+state->len)-spot.ptr);
|
||||
|
||||
if (gu_buf_length(state->found) == 0) {
|
||||
// skip one character and try again
|
||||
gu_utf8_decode((const uint8_t**) &spot.ptr);
|
||||
spot.pos++;
|
||||
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot);
|
||||
// got a hit -> exit
|
||||
if (gu_buf_length(state->found) > 0)
|
||||
break;
|
||||
|
||||
if (needs_report) {
|
||||
// no hit, but the word must be reported as unknown.
|
||||
gu_buf_push(state->skip_spots, PgfCohortSpot, spot);
|
||||
needs_report = false;
|
||||
}
|
||||
|
||||
// skip one character
|
||||
const uint8_t* ptr = (const uint8_t*) spot.ptr;
|
||||
GuUCS c = gu_utf8_decode(&ptr);
|
||||
if (gu_ucs_is_space(c)) {
|
||||
// We have encounter a space and we must report
|
||||
// a new unknown word.
|
||||
pgf_lookup_cohorts_report_skip(state, &spot);
|
||||
|
||||
spot.ptr = (GuString) ptr;
|
||||
spot.pos++;
|
||||
|
||||
// Schedule the next search spot
|
||||
while (*spot.ptr != 0) {
|
||||
if (!skip_space(&spot.ptr, &spot.pos))
|
||||
break;
|
||||
}
|
||||
|
||||
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot);
|
||||
break;
|
||||
} else {
|
||||
spot.ptr = (GuString) ptr;
|
||||
spot.pos++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PgfCohortSpot end_spot = {state->len, state->sentence+state->len};
|
||||
pgf_lookup_cohorts_report_skip(state, &end_spot);
|
||||
|
||||
PgfCohortRange* pRes = (PgfCohortRange*)to;
|
||||
|
||||
if (gu_buf_length(state->found) == 0) {
|
||||
@@ -349,15 +410,19 @@ pgf_lookup_cohorts_enum_next(GuEnum* self, void* to, GuPool* pool)
|
||||
pRes->end.pos = 0;
|
||||
pRes->end.ptr = NULL;
|
||||
pRes->buf = NULL;
|
||||
state->current = NULL;
|
||||
return;
|
||||
} else do {
|
||||
} else for (;;) {
|
||||
*pRes = gu_buf_pop(state->found, PgfCohortRange);
|
||||
state->current = pRes->start.ptr;
|
||||
pgf_morpho_iter(pRes->buf, state->callback, state->err);
|
||||
} while (gu_buf_length(state->found) > 0 &&
|
||||
gu_buf_index_last(state->found, PgfCohortRange)->end.ptr == pRes->end.ptr);
|
||||
|
||||
|
||||
if (gu_buf_length(state->found) <= 0)
|
||||
break;
|
||||
|
||||
PgfCohortRange* last =
|
||||
gu_buf_index_last(state->found, PgfCohortRange);
|
||||
if (last->start.ptr != pRes->start.ptr ||
|
||||
last->end.ptr != pRes->end.ptr)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
PGF_API GuEnum*
|
||||
@@ -374,15 +439,17 @@ pgf_lookup_cohorts(PgfConcr *concr, GuString sentence,
|
||||
}
|
||||
|
||||
PgfCohortsState* state = gu_new(PgfCohortsState, pool);
|
||||
state->en.next = pgf_lookup_cohorts_enum_next;
|
||||
state->concr = concr;
|
||||
state->sentence= sentence;
|
||||
state->len = strlen(sentence);
|
||||
state->callback= callback;
|
||||
state->err = err;
|
||||
state->case_sensitive = pgf_is_case_sensitive(concr);
|
||||
state->spots = gu_new_buf(PgfCohortSpot, pool);
|
||||
state->found = gu_new_buf(PgfCohortRange, pool);
|
||||
state->en.next = pgf_lookup_cohorts_enum_next;
|
||||
state->concr = concr;
|
||||
state->sentence = sentence;
|
||||
state->len = strlen(sentence);
|
||||
state->callback = callback;
|
||||
state->err = err;
|
||||
state->case_sensitive= pgf_is_case_sensitive(concr);
|
||||
state->spots = gu_new_buf(PgfCohortSpot, pool);
|
||||
state->skip_spots = gu_new_buf(PgfCohortSpot, pool);
|
||||
state->empty_buf = gu_new_buf(PgfProductionIdxEntry, pool);
|
||||
state->found = gu_new_buf(PgfCohortRange, pool);
|
||||
|
||||
PgfCohortSpot spot = {0,sentence};
|
||||
while (*spot.ptr != 0) {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,94 +0,0 @@
|
||||
#ifndef SG_SG_H_
|
||||
#define SG_SG_H_
|
||||
|
||||
typedef long long int SgId;
|
||||
|
||||
#include <gu/exn.h>
|
||||
#include <pgf/pgf.h>
|
||||
|
||||
typedef struct SgSG SgSG;
|
||||
|
||||
SgSG*
|
||||
sg_open(const char *filename, GuExn* err);
|
||||
|
||||
void
|
||||
sg_close(SgSG *sg, GuExn* err);
|
||||
|
||||
void
|
||||
sg_begin_trans(SgSG* sg, GuExn* err);
|
||||
|
||||
void
|
||||
sg_commit(SgSG* sg, GuExn* err);
|
||||
|
||||
void
|
||||
sg_rollback(SgSG* sg, GuExn* err);
|
||||
|
||||
|
||||
SgId
|
||||
sg_insert_expr(SgSG *sg, PgfExpr expr, int wrFlag, GuExn* err);
|
||||
|
||||
PgfExpr
|
||||
sg_get_expr(SgSG *sg, SgId key, GuPool* out_pool, GuExn* err);
|
||||
|
||||
typedef struct SgQueryExprResult SgQueryExprResult;
|
||||
|
||||
SgQueryExprResult*
|
||||
sg_query_expr(SgSG *sg, PgfExpr expr, GuPool* pool, GuExn* err);
|
||||
|
||||
PgfExpr
|
||||
sg_query_next(SgSG *sg, SgQueryExprResult* ctxt, SgId* pKey, GuPool* pool, GuExn* err);
|
||||
|
||||
void
|
||||
sg_query_close(SgSG* sg, SgQueryExprResult* ctxt, GuExn* err);
|
||||
|
||||
void
|
||||
sg_update_fts_index(SgSG* sg, PgfPGF* pgf, GuExn* err);
|
||||
|
||||
GuSeq*
|
||||
sg_query_linearization(SgSG *sg, GuString tok, GuPool* pool, GuExn* err);
|
||||
|
||||
|
||||
typedef PgfExpr SgTriple[3];
|
||||
|
||||
SgId
|
||||
sg_insert_triple(SgSG *sg, SgTriple triple, GuExn* err);
|
||||
|
||||
int
|
||||
sg_get_triple(SgSG *sg, SgId key, SgTriple triple,
|
||||
GuPool* out_pool, GuExn* err);
|
||||
|
||||
typedef struct SgTripleResult SgTripleResult;
|
||||
|
||||
SgTripleResult*
|
||||
sg_query_triple(SgSG *sg, SgTriple triple, GuExn* err);
|
||||
|
||||
int
|
||||
sg_triple_result_fetch(SgTripleResult* tres, SgId* pKey, SgTriple triple,
|
||||
GuPool* out_pool, GuExn* err);
|
||||
|
||||
void
|
||||
sg_triple_result_get_query(SgTripleResult* tres, SgTriple triple);
|
||||
|
||||
void
|
||||
sg_triple_result_close(SgTripleResult* tres, GuExn* err);
|
||||
|
||||
typedef struct SgQueryResult SgQueryResult;
|
||||
|
||||
SgQueryResult*
|
||||
sg_query(SgSG *sg, size_t n_triples, SgTriple* triples, GuExn* err);
|
||||
|
||||
size_t
|
||||
sg_query_result_columns(SgQueryResult* qres);
|
||||
|
||||
int
|
||||
sg_query_result_fetch_columns(SgQueryResult* qres, PgfExpr* res,
|
||||
GuPool* out_pool, GuExn* err);
|
||||
|
||||
PgfExpr
|
||||
sg_query_result_fetch_expr(SgQueryResult* qres, PgfExpr expr,
|
||||
GuPool* out_pool, GuExn* err);
|
||||
|
||||
void
|
||||
sg_query_result_close(SgQueryResult* qres, GuExn* err);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,705 +0,0 @@
|
||||
/*
|
||||
** 2001 September 15
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This header file defines the interface that the sqlite B-Tree file
|
||||
** subsystem. See comments in the source code for a detailed description
|
||||
** of what each interface routine does.
|
||||
*/
|
||||
#ifndef _BTREE_H_
|
||||
#define _BTREE_H_
|
||||
|
||||
/*
|
||||
** The SQLITE_THREADSAFE macro must be defined as 0, 1, or 2.
|
||||
** 0 means mutexes are permanently disable and the library is never
|
||||
** threadsafe. 1 means the library is serialized which is the highest
|
||||
** level of threadsafety. 2 means the library is multithreaded - multiple
|
||||
** threads can use SQLite as long as no two threads try to use the same
|
||||
** database connection at the same time.
|
||||
**
|
||||
** Older versions of SQLite used an optional THREADSAFE macro.
|
||||
** We support that for legacy.
|
||||
*/
|
||||
#if !defined(SQLITE_THREADSAFE)
|
||||
# if defined(THREADSAFE)
|
||||
# define SQLITE_THREADSAFE THREADSAFE
|
||||
# else
|
||||
# define SQLITE_THREADSAFE 1 /* IMP: R-07272-22309 */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
** CAPI3REF: 64-Bit Integer Types
|
||||
** KEYWORDS: sqlite_int64 sqlite_uint64
|
||||
**
|
||||
** Because there is no cross-platform way to specify 64-bit integer types
|
||||
** SQLite includes typedefs for 64-bit signed and unsigned integers.
|
||||
**
|
||||
** The sqlite3_int64 and sqlite3_uint64 are the preferred type definitions.
|
||||
** The sqlite_int64 and sqlite_uint64 types are supported for backwards
|
||||
** compatibility only.
|
||||
**
|
||||
** ^The sqlite3_int64 and sqlite_int64 types can store integer values
|
||||
** between -9223372036854775808 and +9223372036854775807 inclusive. ^The
|
||||
** sqlite3_uint64 and sqlite_uint64 types can store integer values
|
||||
** between 0 and +18446744073709551615 inclusive.
|
||||
*/
|
||||
#ifdef SQLITE_INT64_TYPE
|
||||
typedef SQLITE_INT64_TYPE sqlite_int64;
|
||||
typedef unsigned SQLITE_INT64_TYPE sqlite_uint64;
|
||||
#elif defined(_MSC_VER) || defined(__BORLANDC__)
|
||||
typedef __int64 sqlite_int64;
|
||||
typedef unsigned __int64 sqlite_uint64;
|
||||
#else
|
||||
typedef long long int sqlite_int64;
|
||||
typedef unsigned long long int sqlite_uint64;
|
||||
#endif
|
||||
typedef sqlite_int64 sqlite3_int64;
|
||||
typedef sqlite_uint64 sqlite3_uint64;
|
||||
|
||||
/*
|
||||
** Integers of known sizes. These typedefs might change for architectures
|
||||
** where the sizes very. Preprocessor macros are available so that the
|
||||
** types can be conveniently redefined at compile-type. Like this:
|
||||
**
|
||||
** cc '-DUINTPTR_TYPE=long long int' ...
|
||||
*/
|
||||
#ifndef UINT32_TYPE
|
||||
# ifdef HAVE_UINT32_T
|
||||
# define UINT32_TYPE uint32_t
|
||||
# else
|
||||
# define UINT32_TYPE unsigned int
|
||||
# endif
|
||||
#endif
|
||||
#ifndef UINT16_TYPE
|
||||
# ifdef HAVE_UINT16_T
|
||||
# define UINT16_TYPE uint16_t
|
||||
# else
|
||||
# define UINT16_TYPE unsigned short int
|
||||
# endif
|
||||
#endif
|
||||
#ifndef INT16_TYPE
|
||||
# ifdef HAVE_INT16_T
|
||||
# define INT16_TYPE int16_t
|
||||
# else
|
||||
# define INT16_TYPE short int
|
||||
# endif
|
||||
#endif
|
||||
#ifndef UINT8_TYPE
|
||||
# ifdef HAVE_UINT8_T
|
||||
# define UINT8_TYPE uint8_t
|
||||
# else
|
||||
# define UINT8_TYPE unsigned char
|
||||
# endif
|
||||
#endif
|
||||
#ifndef INT8_TYPE
|
||||
# ifdef HAVE_INT8_T
|
||||
# define INT8_TYPE int8_t
|
||||
# else
|
||||
# define INT8_TYPE signed char
|
||||
# endif
|
||||
#endif
|
||||
#ifndef LONGDOUBLE_TYPE
|
||||
# define LONGDOUBLE_TYPE long double
|
||||
#endif
|
||||
typedef sqlite_int64 i64; /* 8-byte signed integer */
|
||||
typedef sqlite_uint64 u64; /* 8-byte unsigned integer */
|
||||
typedef UINT32_TYPE u32; /* 4-byte unsigned integer */
|
||||
typedef UINT16_TYPE u16; /* 2-byte unsigned integer */
|
||||
typedef INT16_TYPE i16; /* 2-byte signed integer */
|
||||
typedef UINT8_TYPE u8; /* 1-byte unsigned integer */
|
||||
typedef INT8_TYPE i8; /* 1-byte signed integer */
|
||||
|
||||
/* TODO: This definition is just included so other modules compile. It
|
||||
** needs to be revisited.
|
||||
*/
|
||||
#define SQLITE_N_BTREE_META 16
|
||||
|
||||
/*
|
||||
** If defined as non-zero, auto-vacuum is enabled by default. Otherwise
|
||||
** it must be turned on for each database using "PRAGMA auto_vacuum = 1".
|
||||
*/
|
||||
#ifndef SQLITE_DEFAULT_AUTOVACUUM
|
||||
#define SQLITE_DEFAULT_AUTOVACUUM 0
|
||||
#endif
|
||||
|
||||
#define BTREE_AUTOVACUUM_NONE 0 /* Do not do auto-vacuum */
|
||||
#define BTREE_AUTOVACUUM_FULL 1 /* Do full auto-vacuum */
|
||||
#define BTREE_AUTOVACUUM_INCR 2 /* Incremental vacuum */
|
||||
|
||||
/*
|
||||
** CAPI3REF: Initialize The SQLite Library
|
||||
**
|
||||
** ^The sqlite3BtreeInitialize() routine initializes the
|
||||
** SQLite library. ^The sqlite3BtreeShutdown() routine
|
||||
** deallocates any resources that were allocated by sqlite3BtreeInitialize().
|
||||
** These routines are designed to aid in process initialization and
|
||||
** shutdown on embedded systems. Workstation applications using
|
||||
** SQLite normally do not need to invoke either of these routines.
|
||||
**
|
||||
** A call to sqlite3BtreeInitialize() is an "effective" call if it is
|
||||
** the first time sqlite3BtreeInitialize() is invoked during the lifetime of
|
||||
** the process, or if it is the first time sqlite3BtreeInitialize() is invoked
|
||||
** following a call to sqlite3BtreeShutdown(). ^(Only an effective call
|
||||
** of sqlite3BtreeInitialize() does any initialization. All other calls
|
||||
** are harmless no-ops.)^
|
||||
**
|
||||
** A call to sqlite3BtreeShutdown() is an "effective" call if it is the first
|
||||
** call to sqlite3BtreeShutdown() since the last sqlite3BtreeInitialize(). ^(Only
|
||||
** an effective call to sqlite3BtreeShutdown() does any deinitialization.
|
||||
** All other valid calls to sqlite3BtreeShutdown() are harmless no-ops.)^
|
||||
**
|
||||
** The sqlite3BtreeInitialize() interface is threadsafe, but sqlite3BtreeShutdown()
|
||||
** is not. The sqlite3BtreeShutdown() interface must only be called from a
|
||||
** single thread. All open [database connections] must be closed and all
|
||||
** other SQLite resources must be deallocated prior to invoking
|
||||
** sqlite3BtreeShutdown().
|
||||
**
|
||||
** Among other things, ^sqlite3BtreeInitialize() will invoke
|
||||
** sqlite3_os_init(). Similarly, ^sqlite3BtreeShutdown()
|
||||
** will invoke sqlite3_os_end().
|
||||
**
|
||||
** ^The sqlite3BtreeInitialize() routine returns [SQLITE_OK] on success.
|
||||
** ^If for some reason, sqlite3BtreeInitialize() is unable to initialize
|
||||
** the library (perhaps it is unable to allocate a needed resource such
|
||||
** as a mutex) it returns an [error code] other than [SQLITE_OK].
|
||||
**
|
||||
** ^The sqlite3BtreeInitialize() routine is called internally by many other
|
||||
** SQLite interfaces so that an application usually does not need to
|
||||
** invoke sqlite3BtreeInitialize() directly. For example, [sqlite3_open()]
|
||||
** calls sqlite3BtreeInitialize() so the SQLite library will be automatically
|
||||
** initialized when [sqlite3_open()] is called if it has not be initialized
|
||||
** already. ^However, if SQLite is compiled with the [SQLITE_OMIT_AUTOINIT]
|
||||
** compile-time option, then the automatic calls to sqlite3BtreeInitialize()
|
||||
** are omitted and the application must call sqlite3BtreeInitialize() directly
|
||||
** prior to using any other SQLite interface. For maximum portability,
|
||||
** it is recommended that applications always invoke sqlite3BtreeInitialize()
|
||||
** directly prior to using any other SQLite interface. Future releases
|
||||
** of SQLite may require this. In other words, the behavior exhibited
|
||||
** when SQLite is compiled with [SQLITE_OMIT_AUTOINIT] might become the
|
||||
** default behavior in some future release of SQLite.
|
||||
**
|
||||
** The sqlite3_os_init() routine does operating-system specific
|
||||
** initialization of the SQLite library. The sqlite3_os_end()
|
||||
** routine undoes the effect of sqlite3_os_init(). Typical tasks
|
||||
** performed by these routines include allocation or deallocation
|
||||
** of static resources, initialization of global variables,
|
||||
** setting up a default [sqlite3_vfs] module, or setting up
|
||||
** a default configuration using [sqlite3_config()].
|
||||
**
|
||||
** The application should never invoke either sqlite3_os_init()
|
||||
** or sqlite3_os_end() directly. The application should only invoke
|
||||
** sqlite3BtreeInitialize() and sqlite3BtreeShutdown(). The sqlite3_os_init()
|
||||
** interface is called automatically by sqlite3BtreeInitialize() and
|
||||
** sqlite3_os_end() is called by sqlite3BtreeShutdown(). Appropriate
|
||||
** implementations for sqlite3_os_init() and sqlite3_os_end()
|
||||
** are built into SQLite when it is compiled for Unix, Windows, or OS/2.
|
||||
** When [custom builds | built for other platforms]
|
||||
** (using the [SQLITE_OS_OTHER=1] compile-time
|
||||
** option) the application must supply a suitable implementation for
|
||||
** sqlite3_os_init() and sqlite3_os_end(). An application-supplied
|
||||
** implementation of sqlite3_os_init() or sqlite3_os_end()
|
||||
** must return [SQLITE_OK] on success and some other [error code] upon
|
||||
** failure.
|
||||
*/
|
||||
int sqlite3BtreeInitialize(void);
|
||||
int sqlite3BtreeShutdown(void);
|
||||
|
||||
/*
|
||||
** CAPI3REF: Result Codes
|
||||
** KEYWORDS: {result code definitions}
|
||||
**
|
||||
** Many SQLite functions return an integer result code from the set shown
|
||||
** here in order to indicate success or failure.
|
||||
**
|
||||
** New error codes may be added in future versions of SQLite.
|
||||
**
|
||||
** See also: [extended result code definitions]
|
||||
*/
|
||||
#define SQLITE_OK 0 /* Successful result */
|
||||
/* beginning-of-error-codes */
|
||||
#define SQLITE_ERROR 1 /* SQL error or missing database */
|
||||
#define SQLITE_INTERNAL 2 /* Internal logic error in SQLite */
|
||||
#define SQLITE_PERM 3 /* Access permission denied */
|
||||
#define SQLITE_ABORT 4 /* Callback routine requested an abort */
|
||||
#define SQLITE_BUSY 5 /* The database file is locked */
|
||||
#define SQLITE_LOCKED 6 /* A table in the database is locked */
|
||||
#define SQLITE_NOMEM 7 /* A malloc() failed */
|
||||
#define SQLITE_READONLY 8 /* Attempt to write a readonly database */
|
||||
#define SQLITE_INTERRUPT 9 /* Operation terminated by sqlite3_interrupt()*/
|
||||
#define SQLITE_IOERR 10 /* Some kind of disk I/O error occurred */
|
||||
#define SQLITE_CORRUPT 11 /* The database disk image is malformed */
|
||||
#define SQLITE_NOTFOUND 12 /* Unknown opcode in sqlite3_file_control() */
|
||||
#define SQLITE_FULL 13 /* Insertion failed because database is full */
|
||||
#define SQLITE_CANTOPEN 14 /* Unable to open the database file */
|
||||
#define SQLITE_PROTOCOL 15 /* Database lock protocol error */
|
||||
#define SQLITE_EMPTY 16 /* Database is empty */
|
||||
#define SQLITE_SCHEMA 17 /* The database schema changed */
|
||||
#define SQLITE_TOOBIG 18 /* String or BLOB exceeds size limit */
|
||||
#define SQLITE_CONSTRAINT 19 /* Abort due to constraint violation */
|
||||
#define SQLITE_MISMATCH 20 /* Data type mismatch */
|
||||
#define SQLITE_MISUSE 21 /* Library used incorrectly */
|
||||
#define SQLITE_NOLFS 22 /* Uses OS features not supported on host */
|
||||
#define SQLITE_AUTH 23 /* Authorization denied */
|
||||
#define SQLITE_FORMAT 24 /* Auxiliary database format error */
|
||||
#define SQLITE_RANGE 25 /* 2nd parameter to sqlite3_bind out of range */
|
||||
#define SQLITE_NOTADB 26 /* File opened that is not a database file */
|
||||
#define SQLITE_NOTICE 27 /* Notifications from sqlite3_log() */
|
||||
#define SQLITE_WARNING 28 /* Warnings from sqlite3_log() */
|
||||
#define SQLITE_ROW 100 /* sqlite3_step() has another row ready */
|
||||
#define SQLITE_DONE 101 /* sqlite3_step() has finished executing */
|
||||
/* end-of-error-codes */
|
||||
|
||||
/*
|
||||
** CAPI3REF: Extended Result Codes
|
||||
** KEYWORDS: {extended result code definitions}
|
||||
**
|
||||
** In its default configuration, SQLite API routines return one of 30 integer
|
||||
** [result codes]. However, experience has shown that many of
|
||||
** these result codes are too coarse-grained. They do not provide as
|
||||
** much information about problems as programmers might like. In an effort to
|
||||
** address this, newer versions of SQLite (version 3.3.8 and later) include
|
||||
** support for additional result codes that provide more detailed information
|
||||
** about errors. These [extended result codes] are enabled or disabled
|
||||
** on a per database connection basis using the
|
||||
** [sqlite3_extended_result_codes()] API. Or, the extended code for
|
||||
** the most recent error can be obtained using
|
||||
** [sqlite3_extended_errcode()].
|
||||
*/
|
||||
#define SQLITE_IOERR_READ (SQLITE_IOERR | (1<<8))
|
||||
#define SQLITE_IOERR_SHORT_READ (SQLITE_IOERR | (2<<8))
|
||||
#define SQLITE_IOERR_WRITE (SQLITE_IOERR | (3<<8))
|
||||
#define SQLITE_IOERR_FSYNC (SQLITE_IOERR | (4<<8))
|
||||
#define SQLITE_IOERR_DIR_FSYNC (SQLITE_IOERR | (5<<8))
|
||||
#define SQLITE_IOERR_TRUNCATE (SQLITE_IOERR | (6<<8))
|
||||
#define SQLITE_IOERR_FSTAT (SQLITE_IOERR | (7<<8))
|
||||
#define SQLITE_IOERR_UNLOCK (SQLITE_IOERR | (8<<8))
|
||||
#define SQLITE_IOERR_RDLOCK (SQLITE_IOERR | (9<<8))
|
||||
#define SQLITE_IOERR_DELETE (SQLITE_IOERR | (10<<8))
|
||||
#define SQLITE_IOERR_BLOCKED (SQLITE_IOERR | (11<<8))
|
||||
#define SQLITE_IOERR_NOMEM (SQLITE_IOERR | (12<<8))
|
||||
#define SQLITE_IOERR_ACCESS (SQLITE_IOERR | (13<<8))
|
||||
#define SQLITE_IOERR_CHECKRESERVEDLOCK (SQLITE_IOERR | (14<<8))
|
||||
#define SQLITE_IOERR_LOCK (SQLITE_IOERR | (15<<8))
|
||||
#define SQLITE_IOERR_CLOSE (SQLITE_IOERR | (16<<8))
|
||||
#define SQLITE_IOERR_DIR_CLOSE (SQLITE_IOERR | (17<<8))
|
||||
#define SQLITE_IOERR_SHMOPEN (SQLITE_IOERR | (18<<8))
|
||||
#define SQLITE_IOERR_SHMSIZE (SQLITE_IOERR | (19<<8))
|
||||
#define SQLITE_IOERR_SHMLOCK (SQLITE_IOERR | (20<<8))
|
||||
#define SQLITE_IOERR_SHMMAP (SQLITE_IOERR | (21<<8))
|
||||
#define SQLITE_IOERR_SEEK (SQLITE_IOERR | (22<<8))
|
||||
#define SQLITE_IOERR_DELETE_NOENT (SQLITE_IOERR | (23<<8))
|
||||
#define SQLITE_IOERR_MMAP (SQLITE_IOERR | (24<<8))
|
||||
#define SQLITE_IOERR_GETTEMPPATH (SQLITE_IOERR | (25<<8))
|
||||
#define SQLITE_IOERR_CONVPATH (SQLITE_IOERR | (26<<8))
|
||||
#define SQLITE_IOERR_VNODE (SQLITE_IOERR | (27<<8))
|
||||
#define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8))
|
||||
#define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8))
|
||||
#define SQLITE_BUSY_SNAPSHOT (SQLITE_BUSY | (2<<8))
|
||||
#define SQLITE_CANTOPEN_NOTEMPDIR (SQLITE_CANTOPEN | (1<<8))
|
||||
#define SQLITE_CANTOPEN_ISDIR (SQLITE_CANTOPEN | (2<<8))
|
||||
#define SQLITE_CANTOPEN_FULLPATH (SQLITE_CANTOPEN | (3<<8))
|
||||
#define SQLITE_CANTOPEN_CONVPATH (SQLITE_CANTOPEN | (4<<8))
|
||||
#define SQLITE_CORRUPT_VTAB (SQLITE_CORRUPT | (1<<8))
|
||||
#define SQLITE_READONLY_RECOVERY (SQLITE_READONLY | (1<<8))
|
||||
#define SQLITE_READONLY_CANTLOCK (SQLITE_READONLY | (2<<8))
|
||||
#define SQLITE_READONLY_ROLLBACK (SQLITE_READONLY | (3<<8))
|
||||
#define SQLITE_READONLY_DBMOVED (SQLITE_READONLY | (4<<8))
|
||||
#define SQLITE_ABORT_ROLLBACK (SQLITE_ABORT | (2<<8))
|
||||
#define SQLITE_CONSTRAINT_CHECK (SQLITE_CONSTRAINT | (1<<8))
|
||||
#define SQLITE_CONSTRAINT_COMMITHOOK (SQLITE_CONSTRAINT | (2<<8))
|
||||
#define SQLITE_CONSTRAINT_FOREIGNKEY (SQLITE_CONSTRAINT | (3<<8))
|
||||
#define SQLITE_CONSTRAINT_FUNCTION (SQLITE_CONSTRAINT | (4<<8))
|
||||
#define SQLITE_CONSTRAINT_NOTNULL (SQLITE_CONSTRAINT | (5<<8))
|
||||
#define SQLITE_CONSTRAINT_PRIMARYKEY (SQLITE_CONSTRAINT | (6<<8))
|
||||
#define SQLITE_CONSTRAINT_TRIGGER (SQLITE_CONSTRAINT | (7<<8))
|
||||
#define SQLITE_CONSTRAINT_UNIQUE (SQLITE_CONSTRAINT | (8<<8))
|
||||
#define SQLITE_CONSTRAINT_VTAB (SQLITE_CONSTRAINT | (9<<8))
|
||||
#define SQLITE_CONSTRAINT_ROWID (SQLITE_CONSTRAINT |(10<<8))
|
||||
#define SQLITE_NOTICE_RECOVER_WAL (SQLITE_NOTICE | (1<<8))
|
||||
#define SQLITE_NOTICE_RECOVER_ROLLBACK (SQLITE_NOTICE | (2<<8))
|
||||
#define SQLITE_WARNING_AUTOINDEX (SQLITE_WARNING | (1<<8))
|
||||
#define SQLITE_AUTH_USER (SQLITE_AUTH | (1<<8))
|
||||
|
||||
/* Reserved: 0x00F00000 */
|
||||
|
||||
/*
|
||||
** Forward declarations of structure
|
||||
*/
|
||||
typedef struct Btree Btree;
|
||||
typedef struct BtCursor BtCursor;
|
||||
typedef struct BtShared BtShared;
|
||||
typedef struct Mem Mem;
|
||||
typedef struct KeyInfo KeyInfo;
|
||||
typedef struct UnpackedRecord UnpackedRecord;
|
||||
|
||||
|
||||
int sqlite3BtreeOpen(
|
||||
const char *zVfs, /* VFS to use with this b-tree */
|
||||
const char *zFilename, /* Name of database file to open */
|
||||
Btree **ppBtree, /* Return open Btree* here */
|
||||
int flags, /* Flags */
|
||||
int vfsFlags /* Flags passed through to VFS open */
|
||||
);
|
||||
|
||||
/* The flags parameter to sqlite3BtreeOpen can be the bitwise or of the
|
||||
** following values.
|
||||
**
|
||||
** NOTE: These values must match the corresponding PAGER_ values in
|
||||
** pager.h.
|
||||
*/
|
||||
#define BTREE_OMIT_JOURNAL 1 /* Do not create or use a rollback journal */
|
||||
#define BTREE_MEMORY 2 /* This is an in-memory DB */
|
||||
#define BTREE_SINGLE 4 /* The file contains at most 1 b-tree */
|
||||
#define BTREE_UNORDERED 8 /* Use of a hash implementation is OK */
|
||||
|
||||
/*
|
||||
** CAPI3REF: Flags For File Open Operations
|
||||
**
|
||||
** These bit values are intended for use in the
|
||||
** 3rd parameter to the [sqlite3_open_v2()] interface and
|
||||
** in the 4th parameter to the [sqlite3_vfs.xOpen] method.
|
||||
*/
|
||||
#define SQLITE_OPEN_READONLY 0x00000001 /* Ok for sqlite3_open_v2() */
|
||||
#define SQLITE_OPEN_READWRITE 0x00000002 /* Ok for sqlite3_open_v2() */
|
||||
#define SQLITE_OPEN_CREATE 0x00000004 /* Ok for sqlite3_open_v2() */
|
||||
#define SQLITE_OPEN_DELETEONCLOSE 0x00000008 /* VFS only */
|
||||
#define SQLITE_OPEN_EXCLUSIVE 0x00000010 /* VFS only */
|
||||
#define SQLITE_OPEN_AUTOPROXY 0x00000020 /* VFS only */
|
||||
#define SQLITE_OPEN_URI 0x00000040 /* Ok for sqlite3_open_v2() */
|
||||
#define SQLITE_OPEN_MEMORY 0x00000080 /* Ok for sqlite3_open_v2() */
|
||||
#define SQLITE_OPEN_MAIN_DB 0x00000100 /* VFS only */
|
||||
#define SQLITE_OPEN_TEMP_DB 0x00000200 /* VFS only */
|
||||
#define SQLITE_OPEN_TRANSIENT_DB 0x00000400 /* VFS only */
|
||||
#define SQLITE_OPEN_MAIN_JOURNAL 0x00000800 /* VFS only */
|
||||
#define SQLITE_OPEN_TEMP_JOURNAL 0x00001000 /* VFS only */
|
||||
#define SQLITE_OPEN_SUBJOURNAL 0x00002000 /* VFS only */
|
||||
#define SQLITE_OPEN_MASTER_JOURNAL 0x00004000 /* VFS only */
|
||||
#define SQLITE_OPEN_NOMUTEX 0x00008000 /* Ok for sqlite3_open_v2() */
|
||||
#define SQLITE_OPEN_FULLMUTEX 0x00010000 /* Ok for sqlite3_open_v2() */
|
||||
#define SQLITE_OPEN_SHAREDCACHE 0x00020000 /* Ok for sqlite3_open_v2() */
|
||||
#define SQLITE_OPEN_PRIVATECACHE 0x00040000 /* Ok for sqlite3_open_v2() */
|
||||
#define SQLITE_OPEN_WAL 0x00080000 /* VFS only */
|
||||
|
||||
int sqlite3BtreeClose(Btree*);
|
||||
int sqlite3BtreeSetCacheSize(Btree*,int);
|
||||
#if SQLITE_MAX_MMAP_SIZE>0
|
||||
int sqlite3BtreeSetMmapLimit(Btree*,sqlite3_int64);
|
||||
#endif
|
||||
int sqlite3BtreeSetPagerFlags(Btree*,unsigned);
|
||||
int sqlite3BtreeSyncDisabled(Btree*);
|
||||
int sqlite3BtreeSetPageSize(Btree *p, int nPagesize, int nReserve, int eFix);
|
||||
int sqlite3BtreeGetPageSize(Btree*);
|
||||
int sqlite3BtreeMaxPageCount(Btree*,int);
|
||||
u32 sqlite3BtreeLastPage(Btree*);
|
||||
int sqlite3BtreeSecureDelete(Btree*,int);
|
||||
int sqlite3BtreeGetOptimalReserve(Btree*);
|
||||
int sqlite3BtreeGetReserveNoMutex(Btree *p);
|
||||
int sqlite3BtreeSetAutoVacuum(Btree *, int);
|
||||
int sqlite3BtreeGetAutoVacuum(Btree *);
|
||||
int sqlite3BtreeBeginTrans(Btree*,int);
|
||||
int sqlite3BtreeCommitPhaseOne(Btree*, const char *zMaster);
|
||||
int sqlite3BtreeCommitPhaseTwo(Btree*, int);
|
||||
int sqlite3BtreeCommit(Btree*);
|
||||
int sqlite3BtreeRollback(Btree*,int,int);
|
||||
int sqlite3BtreeBeginStmt(Btree*,int);
|
||||
int sqlite3BtreeCreateTable(Btree*, int*, int flags);
|
||||
int sqlite3BtreeIsInTrans(Btree*);
|
||||
int sqlite3BtreeIsInReadTrans(Btree*);
|
||||
int sqlite3BtreeIsInBackup(Btree*);
|
||||
void *sqlite3BtreeSchema(Btree *, int, void(*)(void *));
|
||||
int sqlite3BtreeSchemaLocked(Btree *pBtree);
|
||||
int sqlite3BtreeLockTable(Btree *pBtree, int iTab, u8 isWriteLock);
|
||||
int sqlite3BtreeSavepoint(Btree *, int, int);
|
||||
|
||||
int sqlite3BtreeFileFormat(Btree *);
|
||||
const char *sqlite3BtreeGetFilename(Btree *);
|
||||
const char *sqlite3BtreeGetJournalname(Btree *);
|
||||
int sqlite3BtreeCopyFile(Btree *, Btree *);
|
||||
|
||||
int sqlite3BtreeIncrVacuum(Btree *);
|
||||
|
||||
/* The flags parameter to sqlite3BtreeCreateTable can be the bitwise OR
|
||||
** of the flags shown below.
|
||||
**
|
||||
** Every SQLite table must have either BTREE_INTKEY or BTREE_BLOBKEY set.
|
||||
** With BTREE_INTKEY, the table key is a 64-bit integer and arbitrary data
|
||||
** is stored in the leaves. (BTREE_INTKEY is used for SQL tables.) With
|
||||
** BTREE_BLOBKEY, the key is an arbitrary BLOB and no content is stored
|
||||
** anywhere - the key is the content. (BTREE_BLOBKEY is used for SQL
|
||||
** indices.)
|
||||
*/
|
||||
#define BTREE_INTKEY 1 /* Table has only 64-bit signed integer keys */
|
||||
#define BTREE_BLOBKEY 2 /* Table has keys only - no data */
|
||||
|
||||
int sqlite3BtreeDropTable(Btree*, int, int*);
|
||||
int sqlite3BtreeClearTable(Btree*, int, int*);
|
||||
int sqlite3BtreeClearTableOfCursor(BtCursor*);
|
||||
int sqlite3BtreeTripAllCursors(Btree*, int, int);
|
||||
|
||||
void sqlite3BtreeGetMeta(Btree *pBtree, int idx, u32 *pValue);
|
||||
int sqlite3BtreeUpdateMeta(Btree*, int idx, u32 value);
|
||||
|
||||
int sqlite3BtreeNewDb(Btree *p);
|
||||
|
||||
/*
|
||||
** The second parameter to sqlite3BtreeGetMeta or sqlite3BtreeUpdateMeta
|
||||
** should be one of the following values. The integer values are assigned
|
||||
** to constants so that the offset of the corresponding field in an
|
||||
** SQLite database header may be found using the following formula:
|
||||
**
|
||||
** offset = 36 + (idx * 4)
|
||||
**
|
||||
** For example, the free-page-count field is located at byte offset 36 of
|
||||
** the database file header. The incr-vacuum-flag field is located at
|
||||
** byte offset 64 (== 36+4*7).
|
||||
**
|
||||
** The BTREE_DATA_VERSION value is not really a value stored in the header.
|
||||
** It is a read-only number computed by the pager. But we merge it with
|
||||
** the header value access routines since its access pattern is the same.
|
||||
** Call it a "virtual meta value".
|
||||
*/
|
||||
#define BTREE_FREE_PAGE_COUNT 0
|
||||
#define BTREE_SCHEMA_VERSION 1
|
||||
#define BTREE_FILE_FORMAT 2
|
||||
#define BTREE_DEFAULT_CACHE_SIZE 3
|
||||
#define BTREE_LARGEST_ROOT_PAGE 4
|
||||
#define BTREE_TEXT_ENCODING 5
|
||||
#define BTREE_USER_VERSION 6
|
||||
#define BTREE_INCR_VACUUM 7
|
||||
#define BTREE_APPLICATION_ID 8
|
||||
#define BTREE_DATA_VERSION 15 /* A virtual meta-value */
|
||||
|
||||
/*
|
||||
** An instance of the following structure holds information about a
|
||||
** single index record that has already been parsed out into individual
|
||||
** values.
|
||||
**
|
||||
** A record is an object that contains one or more fields of data.
|
||||
** Records are used to store the content of a table row and to store
|
||||
** the key of an index. A blob encoding of a record is created by
|
||||
** the OP_MakeRecord opcode of the VDBE and is disassembled by the
|
||||
** OP_Column opcode.
|
||||
**
|
||||
** This structure holds a record that has already been disassembled
|
||||
** into its constituent fields.
|
||||
**
|
||||
** The r1 and r2 member variables are only used by the optimized comparison
|
||||
** functions vdbeRecordCompareInt() and vdbeRecordCompareString().
|
||||
*/
|
||||
struct UnpackedRecord {
|
||||
KeyInfo *pKeyInfo; /* Collation and sort-order information */
|
||||
u16 nField; /* Number of entries in apMem[] */
|
||||
i8 default_rc; /* Comparison result if keys are equal */
|
||||
u8 errCode; /* Error detected by xRecordCompare (CORRUPT or NOMEM) */
|
||||
Mem *aMem; /* Values */
|
||||
int r1; /* Value to return if (lhs > rhs) */
|
||||
int r2; /* Value to return if (rhs < lhs) */
|
||||
};
|
||||
|
||||
/* One or more of the following flags are set to indicate the validOK
|
||||
** representations of the value stored in the Mem struct.
|
||||
**
|
||||
** If the MEM_Null flag is set, then the value is an SQL NULL value.
|
||||
** No other flags may be set in this case.
|
||||
**
|
||||
** If the MEM_Str flag is set then Mem.z points at a string representation.
|
||||
** Usually this is encoded in the same unicode encoding as the main
|
||||
** database (see below for exceptions). If the MEM_Term flag is also
|
||||
** set, then the string is nul terminated. The MEM_Int and MEM_Real
|
||||
** flags may coexist with the MEM_Str flag.
|
||||
*/
|
||||
#define MEM_Null 0x0001 /* Value is NULL */
|
||||
#define MEM_Str 0x0002 /* Value is a string */
|
||||
#define MEM_Int 0x0004 /* Value is an integer */
|
||||
#define MEM_Real 0x0008 /* Value is a real number */
|
||||
#define MEM_Blob 0x0010 /* Value is a BLOB */
|
||||
|
||||
#define MEM_Term 0x0200 /* String rep is nul terminated */
|
||||
#define MEM_Dyn 0x0400 /* Need to call Mem.xDel() on Mem.z */
|
||||
#define MEM_Static 0x0800 /* Mem.z points to a static string */
|
||||
#define MEM_Ephem 0x1000 /* Mem.z points to an ephemeral string */
|
||||
#define MEM_Zero 0x4000 /* Mem.i contains count of 0s appended to blob */
|
||||
|
||||
/*
|
||||
** Internally, the vdbe manipulates nearly all SQL values as Mem
|
||||
** structures. Each Mem struct may cache multiple representations (string,
|
||||
** integer etc.) of the same value.
|
||||
*/
|
||||
struct Mem {
|
||||
union MemValue {
|
||||
double r; /* Real value used when MEM_Real is set in flags */
|
||||
i64 i; /* Integer value used when MEM_Int is set in flags */
|
||||
int nZero; /* Used when bit MEM_Zero is set in flags */
|
||||
} u;
|
||||
u16 flags; /* Some combination of MEM_Null, MEM_Str, MEM_Dyn, etc. */
|
||||
u8 enc; /* SQLITE_UTF8, SQLITE_UTF16BE, SQLITE_UTF16LE */
|
||||
u8 eSubtype; /* Subtype for this value */
|
||||
int n; /* Number of characters in string value, excluding '\0' */
|
||||
char *z; /* String or BLOB value */
|
||||
/* ShallowCopy only needs to copy the information above */
|
||||
char *zMalloc; /* Space to hold MEM_Str or MEM_Blob if szMalloc>0 */
|
||||
int szMalloc; /* Size of the zMalloc allocation */
|
||||
u32 uTemp; /* Transient storage for serial_type in OP_MakeRecord */
|
||||
Btree *pBtree; /* The associated database connection */
|
||||
void (*xDel)(void*);/* Destructor for Mem.z - only valid if MEM_Dyn */
|
||||
#ifdef SQLITE_DEBUG
|
||||
Mem *pScopyFrom; /* This Mem is a shallow copy of pScopyFrom */
|
||||
void *pFiller; /* So that sizeof(Mem) is a multiple of 8 */
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
** Values that may be OR'd together to form the second argument of an
|
||||
** sqlite3BtreeCursorHints() call.
|
||||
**
|
||||
** The BTREE_BULKLOAD flag is set on index cursors when the index is going
|
||||
** to be filled with content that is already in sorted order.
|
||||
**
|
||||
** The BTREE_SEEK_EQ flag is set on cursors that will get OP_SeekGE or
|
||||
** OP_SeekLE opcodes for a range search, but where the range of entries
|
||||
** selected will all have the same key. In other words, the cursor will
|
||||
** be used only for equality key searches.
|
||||
**
|
||||
*/
|
||||
#define BTREE_BULKLOAD 0x00000001 /* Used to full index in sorted order */
|
||||
#define BTREE_SEEK_EQ 0x00000002 /* EQ seeks only - no range seeks */
|
||||
|
||||
int sqlite3BtreeCursor(
|
||||
Btree*, /* BTree containing table to open */
|
||||
int iTable, /* Index of root page */
|
||||
int wrFlag, /* 1 for writing. 0 for read-only */
|
||||
int N, int X, /* index of N key columns and X extra columns */
|
||||
BtCursor **ppCursor /* Space to write cursor pointer */
|
||||
);
|
||||
int sqlite3BtreeCursorSize(void);
|
||||
|
||||
int sqlite3BtreeCloseCursor(BtCursor*);
|
||||
void sqlite3BtreeInitUnpackedRecord(
|
||||
UnpackedRecord *pUnKey,
|
||||
BtCursor* pCur,
|
||||
int nField,
|
||||
int default_rc,
|
||||
Mem* pMem);
|
||||
int sqlite3BtreeMovetoUnpacked(
|
||||
BtCursor*,
|
||||
UnpackedRecord *pUnKey,
|
||||
i64 intKey,
|
||||
int bias,
|
||||
int *pRes
|
||||
);
|
||||
int sqlite3BtreeCursorHasMoved(BtCursor*);
|
||||
int sqlite3BtreeCursorRestore(BtCursor*, int*);
|
||||
int sqlite3BtreeDelete(BtCursor*, int);
|
||||
int sqlite3BtreeInsert(BtCursor*, const void *pKey, i64 nKey,
|
||||
const void *pData, int nData,
|
||||
int nZero, int bias, int seekResult);
|
||||
int sqlite3BtreeFirst(BtCursor*, int *pRes);
|
||||
int sqlite3BtreeLast(BtCursor*, int *pRes);
|
||||
int sqlite3BtreeNext(BtCursor*, int *pRes);
|
||||
int sqlite3BtreeEof(BtCursor*);
|
||||
int sqlite3BtreePrevious(BtCursor*, int *pRes);
|
||||
int sqlite3BtreeKeySize(BtCursor*, i64 *pSize);
|
||||
int sqlite3BtreeKey(BtCursor*, u32 offset, u32 amt, void*);
|
||||
const void *sqlite3BtreeKeyFetch(BtCursor*, u32 *pAmt);
|
||||
const void *sqlite3BtreeDataFetch(BtCursor*, u32 *pAmt);
|
||||
int sqlite3BtreeDataSize(BtCursor*, u32 *pSize);
|
||||
int sqlite3BtreeData(BtCursor*, u32 offset, u32 amt, void*);
|
||||
|
||||
char *sqlite3BtreeIntegrityCheck(Btree*, int *aRoot, int nRoot, int, int*);
|
||||
struct Pager *sqlite3BtreePager(Btree*);
|
||||
|
||||
int sqlite3BtreePutData(BtCursor*, u32 offset, u32 amt, void*);
|
||||
void sqlite3BtreeIncrblobCursor(BtCursor *);
|
||||
void sqlite3BtreeClearCursor(BtCursor *);
|
||||
int sqlite3BtreeSetVersion(Btree *pBt, int iVersion);
|
||||
void sqlite3BtreeCursorHints(BtCursor *, unsigned int mask);
|
||||
#ifdef SQLITE_DEBUG
|
||||
int sqlite3BtreeCursorHasHint(BtCursor*, unsigned int mask);
|
||||
#endif
|
||||
int sqlite3BtreeIsReadonly(Btree *pBt);
|
||||
|
||||
#ifndef NDEBUG
|
||||
int sqlite3BtreeCursorIsValid(BtCursor*);
|
||||
#endif
|
||||
|
||||
#ifndef SQLITE_OMIT_BTREECOUNT
|
||||
int sqlite3BtreeCount(BtCursor *, i64 *);
|
||||
#endif
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
int sqlite3BtreeCursorInfo(BtCursor*, int*, int);
|
||||
void sqlite3BtreeCursorList(Btree*);
|
||||
#endif
|
||||
|
||||
#ifndef SQLITE_OMIT_WAL
|
||||
int sqlite3BtreeCheckpoint(Btree*, int, int *, int *);
|
||||
#endif
|
||||
|
||||
/*
|
||||
** If we are not using shared cache, then there is no need to
|
||||
** use mutexes to access the BtShared structures. So make the
|
||||
** Enter and Leave procedures no-ops.
|
||||
*/
|
||||
#ifndef SQLITE_OMIT_SHARED_CACHE
|
||||
void sqlite3BtreeEnter(Btree*);
|
||||
#else
|
||||
# define sqlite3BtreeEnter(X)
|
||||
#endif
|
||||
|
||||
#if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE
|
||||
int sqlite3BtreeSharable(Btree*);
|
||||
void sqlite3BtreeLeave(Btree*);
|
||||
void sqlite3BtreeEnterCursor(BtCursor*);
|
||||
void sqlite3BtreeLeaveCursor(BtCursor*);
|
||||
#else
|
||||
|
||||
# define sqlite3BtreeSharable(X) 0
|
||||
# define sqlite3BtreeLeave(X)
|
||||
# define sqlite3BtreeEnterCursor(X)
|
||||
# define sqlite3BtreeLeaveCursor(X)
|
||||
#endif
|
||||
|
||||
u32 sqlite3BtreeSerialType(Mem *pMem, int file_format);
|
||||
u32 sqlite3BtreeSerialTypeLen(u32);
|
||||
u32 sqlite3BtreeSerialGet(const unsigned char*, u32, Mem *);
|
||||
u32 sqlite3BtreeSerialPut(u8*, Mem*, u32);
|
||||
|
||||
/*
|
||||
** Routines to read and write variable-length integers. These used to
|
||||
** be defined locally, but now we use the varint routines in the util.c
|
||||
** file.
|
||||
*/
|
||||
int sqlite3BtreePutVarint(unsigned char*, u64);
|
||||
u8 sqlite3BtreeGetVarint(const unsigned char *, u64 *);
|
||||
u8 sqlite3BtreeGetVarint32(const unsigned char *, u32 *);
|
||||
int sqlite3BtreeVarintLen(u64 v);
|
||||
|
||||
/*
|
||||
** The common case is for a varint to be a single byte. They following
|
||||
** macros handle the common case without a procedure call, but then call
|
||||
** the procedure for larger varints.
|
||||
*/
|
||||
#define getVarint32(A,B) \
|
||||
(u8)((*(A)<(u8)0x80)?((B)=(u32)*(A)),1:sqlite3BtreeGetVarint32((A),(u32 *)&(B)))
|
||||
#define putVarint32(A,B) \
|
||||
(u8)(((u32)(B)<(u32)0x80)?(*(A)=(unsigned char)(B)),1:\
|
||||
sqlite3BtreePutVarint((A),(B)))
|
||||
#define getVarint sqlite3BtreeGetVarint
|
||||
#define putVarint sqlite3BtreePutVarint
|
||||
|
||||
|
||||
int sqlite3BtreeIdxRowid(Btree*, BtCursor*, i64*);
|
||||
|
||||
int sqlite3BtreeRecordCompare(int,const void*,UnpackedRecord*);
|
||||
|
||||
const char *sqlite3BtreeErrName(int rc);
|
||||
|
||||
#endif /* _BTREE_H_ */
|
||||
22
src/runtime/haskell/CHANGELOG.md
Normal file
22
src/runtime/haskell/CHANGELOG.md
Normal file
@@ -0,0 +1,22 @@
|
||||
## 1.3.0
|
||||
|
||||
- Add completion support.
|
||||
|
||||
## 1.2.1
|
||||
|
||||
- Remove deprecated `pgf_print_expr_tuple`.
|
||||
- Added an API for cloning expressions/types/literals.
|
||||
|
||||
## 1.2.0
|
||||
|
||||
- Stop `pgf-shell` from being built by default.
|
||||
- parseToChart also returns the category.
|
||||
- bugfix in bracketedLinearize.
|
||||
|
||||
## 1.1.0
|
||||
|
||||
- Remove SG library.
|
||||
|
||||
## 1.0.0
|
||||
|
||||
- Everything up until 2020-07-11.
|
||||
10
src/runtime/haskell/HACKAGE.md
Normal file
10
src/runtime/haskell/HACKAGE.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# Instructions for uploading to Hackage
|
||||
|
||||
You will need a Hackage account for steps 4 & 5.
|
||||
|
||||
1. Bump the version number in `pgf2.cabal`
|
||||
2. Add details in `CHANGELOG.md`
|
||||
3. Run `stack sdist` (or `cabal sdist`)
|
||||
4. Visit `https://hackage.haskell.org/upload` and upload the file `./.stack-work/dist/x86_64-osx/Cabal-2.2.0.1/pgf2-x.y.z.tar.gz` (or Cabal equivalent)
|
||||
5. If successful, upload documentation with `./stack-haddock-upload.sh pgf2 x.y.z` (compilation on Hackage's servers will fail because of missing C libraries)
|
||||
6. Commit and push to this repository (`gf-core`)
|
||||
165
src/runtime/haskell/LICENSE
Normal file
165
src/runtime/haskell/LICENSE
Normal file
@@ -0,0 +1,165 @@
|
||||
GNU LESSER GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
|
||||
This version of the GNU Lesser General Public License incorporates
|
||||
the terms and conditions of version 3 of the GNU General Public
|
||||
License, supplemented by the additional permissions listed below.
|
||||
|
||||
0. Additional Definitions.
|
||||
|
||||
As used herein, "this License" refers to version 3 of the GNU Lesser
|
||||
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
||||
General Public License.
|
||||
|
||||
"The Library" refers to a covered work governed by this License,
|
||||
other than an Application or a Combined Work as defined below.
|
||||
|
||||
An "Application" is any work that makes use of an interface provided
|
||||
by the Library, but which is not otherwise based on the Library.
|
||||
Defining a subclass of a class defined by the Library is deemed a mode
|
||||
of using an interface provided by the Library.
|
||||
|
||||
A "Combined Work" is a work produced by combining or linking an
|
||||
Application with the Library. The particular version of the Library
|
||||
with which the Combined Work was made is also called the "Linked
|
||||
Version".
|
||||
|
||||
The "Minimal Corresponding Source" for a Combined Work means the
|
||||
Corresponding Source for the Combined Work, excluding any source code
|
||||
for portions of the Combined Work that, considered in isolation, are
|
||||
based on the Application, and not on the Linked Version.
|
||||
|
||||
The "Corresponding Application Code" for a Combined Work means the
|
||||
object code and/or source code for the Application, including any data
|
||||
and utility programs needed for reproducing the Combined Work from the
|
||||
Application, but excluding the System Libraries of the Combined Work.
|
||||
|
||||
1. Exception to Section 3 of the GNU GPL.
|
||||
|
||||
You may convey a covered work under sections 3 and 4 of this License
|
||||
without being bound by section 3 of the GNU GPL.
|
||||
|
||||
2. Conveying Modified Versions.
|
||||
|
||||
If you modify a copy of the Library, and, in your modifications, a
|
||||
facility refers to a function or data to be supplied by an Application
|
||||
that uses the facility (other than as an argument passed when the
|
||||
facility is invoked), then you may convey a copy of the modified
|
||||
version:
|
||||
|
||||
a) under this License, provided that you make a good faith effort to
|
||||
ensure that, in the event an Application does not supply the
|
||||
function or data, the facility still operates, and performs
|
||||
whatever part of its purpose remains meaningful, or
|
||||
|
||||
b) under the GNU GPL, with none of the additional permissions of
|
||||
this License applicable to that copy.
|
||||
|
||||
3. Object Code Incorporating Material from Library Header Files.
|
||||
|
||||
The object code form of an Application may incorporate material from
|
||||
a header file that is part of the Library. You may convey such object
|
||||
code under terms of your choice, provided that, if the incorporated
|
||||
material is not limited to numerical parameters, data structure
|
||||
layouts and accessors, or small macros, inline functions and templates
|
||||
(ten or fewer lines in length), you do both of the following:
|
||||
|
||||
a) Give prominent notice with each copy of the object code that the
|
||||
Library is used in it and that the Library and its use are
|
||||
covered by this License.
|
||||
|
||||
b) Accompany the object code with a copy of the GNU GPL and this license
|
||||
document.
|
||||
|
||||
4. Combined Works.
|
||||
|
||||
You may convey a Combined Work under terms of your choice that,
|
||||
taken together, effectively do not restrict modification of the
|
||||
portions of the Library contained in the Combined Work and reverse
|
||||
engineering for debugging such modifications, if you also do each of
|
||||
the following:
|
||||
|
||||
a) Give prominent notice with each copy of the Combined Work that
|
||||
the Library is used in it and that the Library and its use are
|
||||
covered by this License.
|
||||
|
||||
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
||||
document.
|
||||
|
||||
c) For a Combined Work that displays copyright notices during
|
||||
execution, include the copyright notice for the Library among
|
||||
these notices, as well as a reference directing the user to the
|
||||
copies of the GNU GPL and this license document.
|
||||
|
||||
d) Do one of the following:
|
||||
|
||||
0) Convey the Minimal Corresponding Source under the terms of this
|
||||
License, and the Corresponding Application Code in a form
|
||||
suitable for, and under terms that permit, the user to
|
||||
recombine or relink the Application with a modified version of
|
||||
the Linked Version to produce a modified Combined Work, in the
|
||||
manner specified by section 6 of the GNU GPL for conveying
|
||||
Corresponding Source.
|
||||
|
||||
1) Use a suitable shared library mechanism for linking with the
|
||||
Library. A suitable mechanism is one that (a) uses at run time
|
||||
a copy of the Library already present on the user's computer
|
||||
system, and (b) will operate properly with a modified version
|
||||
of the Library that is interface-compatible with the Linked
|
||||
Version.
|
||||
|
||||
e) Provide Installation Information, but only if you would otherwise
|
||||
be required to provide such information under section 6 of the
|
||||
GNU GPL, and only to the extent that such information is
|
||||
necessary to install and execute a modified version of the
|
||||
Combined Work produced by recombining or relinking the
|
||||
Application with a modified version of the Linked Version. (If
|
||||
you use option 4d0, the Installation Information must accompany
|
||||
the Minimal Corresponding Source and Corresponding Application
|
||||
Code. If you use option 4d1, you must provide the Installation
|
||||
Information in the manner specified by section 6 of the GNU GPL
|
||||
for conveying Corresponding Source.)
|
||||
|
||||
5. Combined Libraries.
|
||||
|
||||
You may place library facilities that are a work based on the
|
||||
Library side by side in a single library together with other library
|
||||
facilities that are not Applications and are not covered by this
|
||||
License, and convey such a combined library under terms of your
|
||||
choice, if you do both of the following:
|
||||
|
||||
a) Accompany the combined library with a copy of the same work based
|
||||
on the Library, uncombined with any other library facilities,
|
||||
conveyed under the terms of this License.
|
||||
|
||||
b) Give prominent notice with the combined library that part of it
|
||||
is a work based on the Library, and explaining where to find the
|
||||
accompanying uncombined form of the same work.
|
||||
|
||||
6. Revised Versions of the GNU Lesser General Public License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions
|
||||
of the GNU Lesser General Public License from time to time. Such new
|
||||
versions will be similar in spirit to the present version, but may
|
||||
differ in detail to address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Library as you received it specifies that a certain numbered version
|
||||
of the GNU Lesser General Public License "or any later version"
|
||||
applies to it, you have the option of following the terms and
|
||||
conditions either of that published version or of any later version
|
||||
published by the Free Software Foundation. If the Library as you
|
||||
received it does not specify a version number of the GNU Lesser
|
||||
General Public License, you may choose any version of the GNU Lesser
|
||||
General Public License ever published by the Free Software Foundation.
|
||||
|
||||
If the Library as you received it specifies that a proxy can decide
|
||||
whether future versions of the GNU Lesser General Public License shall
|
||||
apply, that proxy's public statement of acceptance of any version is
|
||||
permanent authorization for you to choose that version for the
|
||||
Library.
|
||||
@@ -158,7 +158,7 @@ parse_ pgf lang cat dp s =
|
||||
PGF2.ParseIncomplete -> (ParseIncomplete, PGF2.Leaf s)
|
||||
|
||||
complete pgf lang cat s prefix =
|
||||
let compls = Map.fromListWith (++) [(tok,[CId fun]) | (tok,_,fun,_) <- PGF2.complete (lookConcr pgf lang) cat s prefix]
|
||||
let compls = Map.fromListWith (++) [(tok,[CId fun]) | PGF2.ParseOk res <- [PGF2.complete (lookConcr pgf lang) cat s prefix], (tok,_,fun,_) <- res]
|
||||
in (PGF2.Leaf [],s,compls)
|
||||
|
||||
hasLinearization pgf lang (CId f) = PGF2.hasLinearization (lookConcr pgf lang) f
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
|
||||
#include <pgf/pgf.h>
|
||||
#include <pgf/linearizer.h>
|
||||
#include <pgf/data.h>
|
||||
#include <gu/enum.h>
|
||||
#include <gu/exn.h>
|
||||
|
||||
@@ -38,30 +39,28 @@ module PGF2 (-- * PGF
|
||||
mkMeta,unMeta,
|
||||
exprHash, exprSize, exprFunctions, exprSubstitute,
|
||||
treeProbability,
|
||||
|
||||
-- ** Types
|
||||
Type, Hypo, BindType(..), startCat,
|
||||
readType, showType, showContext,
|
||||
mkType, unType,
|
||||
|
||||
-- ** Type checking
|
||||
-- | Dynamically-built expressions should always be type-checked before using in other functions,
|
||||
-- as the exceptions thrown by using invalid expressions may not catchable.
|
||||
checkExpr, inferExpr, checkType,
|
||||
|
||||
-- ** Computing
|
||||
compute,
|
||||
|
||||
-- * Concrete syntax
|
||||
ConcName,Concr,languages,concreteName,languageCode,
|
||||
|
||||
-- ** Linearization
|
||||
linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,
|
||||
FId, LIndex, BracketedString(..), showBracketedString, flattenBracketedString,
|
||||
printName,
|
||||
linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,bracketedLinearizeAll,
|
||||
FId, BracketedString(..), showBracketedString, flattenBracketedString,
|
||||
printName, categoryFields,
|
||||
alignWords, gizaAlignment,
|
||||
|
||||
-- ** Parsing
|
||||
ParseOutput(..), parse, parseWithHeuristics, complete,
|
||||
|
||||
ParseOutput(..), parse, parseWithHeuristics,
|
||||
parseToChart, PArg(..),
|
||||
complete,
|
||||
-- ** Sentence Lookup
|
||||
lookupSentence,
|
||||
|
||||
@@ -71,6 +70,7 @@ module PGF2 (-- * PGF
|
||||
|
||||
-- ** Morphological Analysis
|
||||
MorphoAnalysis, lookupMorpho, lookupCohorts, fullFormLexicon,
|
||||
filterBest, filterLongest,
|
||||
-- ** Visualizations
|
||||
GraphvizOptions(..), graphvizDefaults,
|
||||
graphvizAbstractTree, graphvizParseTree,
|
||||
@@ -88,11 +88,12 @@ module PGF2 (-- * PGF
|
||||
readProbabilitiesFromFile
|
||||
) where
|
||||
|
||||
import Prelude hiding (fromEnum)
|
||||
import Prelude hiding (fromEnum,(<>))
|
||||
import Control.Exception(Exception,throwIO)
|
||||
import Control.Monad(forM_)
|
||||
import System.IO.Unsafe(unsafePerformIO,unsafeInterleaveIO)
|
||||
import System.Random
|
||||
import System.IO(fixIO)
|
||||
import Text.PrettyPrint
|
||||
import PGF2.Expr
|
||||
import PGF2.Type
|
||||
@@ -103,12 +104,12 @@ import Foreign.C
|
||||
import Data.Typeable
|
||||
import qualified Data.Map as Map
|
||||
import Data.IORef
|
||||
import Data.Char(isUpper,isSpace)
|
||||
import Data.Char(isUpper,isSpace,isPunctuation)
|
||||
import Data.List(isSuffixOf,maximumBy,nub,mapAccumL,intersperse,groupBy,find)
|
||||
import Data.Maybe(fromMaybe)
|
||||
import Data.Function(on)
|
||||
import Data.Maybe(maybe)
|
||||
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
-- Functions that take a PGF.
|
||||
-- PGF has many Concrs.
|
||||
@@ -188,7 +189,7 @@ languageCode c = unsafePerformIO $ do
|
||||
else fmap Just (peekUtf8CString c_code)
|
||||
|
||||
-- | Generates an exhaustive possibly infinite list of
|
||||
-- all abstract syntax expressions of the given type.
|
||||
-- all abstract syntax expressions of the given type.
|
||||
-- The expressions are ordered by their probability.
|
||||
generateAll :: PGF -> Type -> [(Expr,Float)]
|
||||
generateAll p (Type ctype _) =
|
||||
@@ -450,6 +451,7 @@ graphvizParseTree c opts e =
|
||||
c_opts <- newGraphvizOptions tmpPl opts
|
||||
pgf_graphviz_parse_tree (concr c) (expr e) c_opts out exn
|
||||
touchExpr e
|
||||
touchConcr c
|
||||
s <- gu_string_buf_freeze sb tmpPl
|
||||
peekUtf8CString s
|
||||
|
||||
@@ -915,21 +917,21 @@ newGraphvizOptions pool opts = do
|
||||
-- Functions using Concr
|
||||
-- Morpho analyses, parsing & linearization
|
||||
|
||||
-- | This triple is returned by all functions that deal with
|
||||
-- | This triple is returned by all functions that deal with
|
||||
-- the grammar's lexicon. Its first element is the name of an abstract
|
||||
-- lexical function which can produce a given word or
|
||||
-- lexical function which can produce a given word or
|
||||
-- a multiword expression (i.e. this is the lemma).
|
||||
-- After that follows a string which describes
|
||||
-- After that follows a string which describes
|
||||
-- the particular inflection form.
|
||||
--
|
||||
-- The last element is a logarithm from the
|
||||
-- the probability of the function. The probability is not
|
||||
-- the probability of the function. The probability is not
|
||||
-- conditionalized on the category of the function. This makes it
|
||||
-- possible to compare the likelihood of two functions even if they
|
||||
-- have different types.
|
||||
-- have different types.
|
||||
type MorphoAnalysis = (Fun,String,Float)
|
||||
|
||||
-- | 'lookupMorpho' takes a string which must be a single word or
|
||||
-- | 'lookupMorpho' takes a string which must be a single word or
|
||||
-- a multiword expression. It then computes the list of all possible
|
||||
-- morphological analyses.
|
||||
lookupMorpho :: Concr -> String -> [MorphoAnalysis]
|
||||
@@ -954,7 +956,7 @@ lookupMorpho (Concr concr master) sent =
|
||||
-- The list is sorted first by the @start@ position and after than
|
||||
-- by the @end@ position. This can be used for instance if you want to
|
||||
-- filter only the longest matches.
|
||||
lookupCohorts :: Concr -> String -> [(Int,[MorphoAnalysis],Int)]
|
||||
lookupCohorts :: Concr -> String -> [(Int,String,[MorphoAnalysis],Int)]
|
||||
lookupCohorts lang@(Concr concr master) sent =
|
||||
unsafePerformIO $
|
||||
do pl <- gu_new_pool
|
||||
@@ -965,9 +967,9 @@ lookupCohorts lang@(Concr concr master) sent =
|
||||
c_sent <- newUtf8CString sent pl
|
||||
enum <- pgf_lookup_cohorts concr c_sent cback pl nullPtr
|
||||
fpl <- newForeignPtr gu_pool_finalizer pl
|
||||
fromCohortRange enum fpl fptr ref
|
||||
fromCohortRange enum fpl fptr 0 sent ref
|
||||
where
|
||||
fromCohortRange enum fpl fptr ref =
|
||||
fromCohortRange enum fpl fptr i sent ref =
|
||||
allocaBytes (#size PgfCohortRange) $ \ptr ->
|
||||
withForeignPtr fpl $ \pl ->
|
||||
do gu_enum_next enum ptr pl
|
||||
@@ -981,8 +983,80 @@ lookupCohorts lang@(Concr concr master) sent =
|
||||
end <- (#peek PgfCohortRange, end.pos) ptr
|
||||
ans <- readIORef ref
|
||||
writeIORef ref []
|
||||
cohs <- unsafeInterleaveIO (fromCohortRange enum fpl fptr ref)
|
||||
return ((start,ans,end):cohs)
|
||||
let sent' = drop (start-i) sent
|
||||
tok = take (end-start) sent'
|
||||
cohs <- unsafeInterleaveIO (fromCohortRange enum fpl fptr start sent' ref)
|
||||
return ((start,tok,ans,end):cohs)
|
||||
|
||||
filterBest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)]
|
||||
filterBest ans =
|
||||
reverse (iterate (maxBound :: Int) [(0,0,[],ans)] [] [])
|
||||
where
|
||||
iterate v0 [] [] res = res
|
||||
iterate v0 [] new res = iterate v0 new [] res
|
||||
iterate v0 ((_,v,conf, []):old) new res =
|
||||
case compare v0 v of
|
||||
LT -> res
|
||||
EQ -> iterate v0 old new (merge conf res)
|
||||
GT -> iterate v old new conf
|
||||
iterate v0 ((_,v,conf,an:ans):old) new res = iterate v0 old (insert (v+valueOf an) conf an ans [] new) res
|
||||
|
||||
valueOf (_,_,[],_) = 2
|
||||
valueOf _ = 1
|
||||
|
||||
insert v conf an@(start,_,_,end) ans l_new [] =
|
||||
match start v conf ans ((end,v,comb conf an,filter end ans):l_new) []
|
||||
insert v conf an@(start,_,_,end) ans l_new (new@(end0,v0,conf0,ans0):r_new) =
|
||||
case compare end0 end of
|
||||
LT -> insert v conf an ans (new:l_new) r_new
|
||||
EQ -> case compare v0 v of
|
||||
LT -> match start v conf ans ((end,v, conf0,ans0): l_new) r_new
|
||||
EQ -> match start v conf ans ((end,v,merge (comb conf an) conf0,ans0): l_new) r_new
|
||||
GT -> match start v conf ans ((end,v,comb conf an, ans0): l_new) r_new
|
||||
GT -> match start v conf ans ((end,v,comb conf an, filter end ans):new:l_new) r_new
|
||||
|
||||
match start0 v conf (an@(start,_,_,end):ans) l_new r_new
|
||||
| start0 == start = insert v conf an ans l_new r_new
|
||||
match start0 v conf ans l_new r_new = revOn l_new r_new
|
||||
|
||||
comb ((start0,w0,an0,end0):conf) (start,w,an,end)
|
||||
| end0 == start && (unk w0 an0 || unk w an) = (start0,w0++w,[],end):conf
|
||||
comb conf an = an:conf
|
||||
|
||||
filter end [] = []
|
||||
filter end (next@(start,_,_,_):ans)
|
||||
| end <= start = next:ans
|
||||
| otherwise = filter end ans
|
||||
|
||||
revOn [] ys = ys
|
||||
revOn (x:xs) ys = revOn xs (x:ys)
|
||||
|
||||
merge [] ans = ans
|
||||
merge ans [] = ans
|
||||
merge (an1@(start1,_,_,end1):ans1) (an2@(start2,_,_,end2):ans2) =
|
||||
case compare (start1,end1) (start2,end2) of
|
||||
GT -> an1 : merge ans1 (an2:ans2)
|
||||
EQ -> an1 : merge ans1 ans2
|
||||
LT -> an2 : merge (an1:ans1) ans2
|
||||
|
||||
filterLongest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)]
|
||||
filterLongest [] = []
|
||||
filterLongest (an:ans) = longest an ans
|
||||
where
|
||||
longest prev [] = [prev]
|
||||
longest prev@(start0,_,_,end0) (next@(start,_,_,end):ans)
|
||||
| start0 == start = longest next ans
|
||||
| otherwise = filter prev (next:ans)
|
||||
|
||||
filter prev [] = [prev]
|
||||
filter prev@(start0,w0,an0,end0) (next@(start,w,an,end):ans)
|
||||
| end0 == start && (unk w0 an0 || unk w an)
|
||||
= filter (start0,w0++w,[],end) ans
|
||||
| end0 <= start = prev : longest next ans
|
||||
| otherwise = filter prev ans
|
||||
|
||||
unk w [] | any (not . isPunctuation) w = True
|
||||
unk _ _ = False
|
||||
|
||||
fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
|
||||
fullFormLexicon lang =
|
||||
@@ -1020,32 +1094,32 @@ getAnalysis ref self c_lemma c_anal prob exn = do
|
||||
writeIORef ref ((lemma, anal, prob):ans)
|
||||
|
||||
-- | This data type encodes the different outcomes which you could get from the parser.
|
||||
data ParseOutput
|
||||
data ParseOutput a
|
||||
= ParseFailed Int String -- ^ The integer is the position in number of unicode characters where the parser failed.
|
||||
-- The string is the token where the parser have failed.
|
||||
| ParseOk [(Expr,Float)] -- ^ If the parsing and the type checking are successful we get a list of abstract syntax trees.
|
||||
-- The list should be non-empty.
|
||||
| ParseOk a -- ^ If the parsing and the type checking are successful
|
||||
-- we get the abstract syntax trees as either a list or a chart.
|
||||
| ParseIncomplete -- ^ The sentence is not complete.
|
||||
|
||||
parse :: Concr -> Type -> String -> ParseOutput
|
||||
parse :: Concr -> Type -> String -> ParseOutput [(Expr,Float)]
|
||||
parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
|
||||
|
||||
parseWithHeuristics :: Concr -- ^ the language with which we parse
|
||||
-> Type -- ^ the start category
|
||||
-> String -- ^ the input sentence
|
||||
-> Double -- ^ the heuristic factor.
|
||||
-- A negative value tells the parser
|
||||
-- to lookup up the default from
|
||||
-> Double -- ^ the heuristic factor.
|
||||
-- A negative value tells the parser
|
||||
-- to lookup up the default from
|
||||
-- the grammar flags
|
||||
-> [(Cat, Int -> Int -> Maybe (Expr,Float,Int))]
|
||||
-> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
|
||||
-- ^ a list of callbacks for literal categories.
|
||||
-- The arguments of the callback are:
|
||||
-- the index of the constituent for the literal category;
|
||||
-- the input sentence; the current offset in the sentence.
|
||||
-- If a literal has been recognized then the output should
|
||||
-- be Just (expr,probability,end_offset)
|
||||
-> ParseOutput
|
||||
parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
|
||||
-> ParseOutput [(Expr,Float)]
|
||||
parseWithHeuristics lang (Type ctype touchType) sent heuristic callbacks =
|
||||
unsafePerformIO $
|
||||
do exprPl <- gu_new_pool
|
||||
parsePl <- gu_new_pool
|
||||
@@ -1085,7 +1159,137 @@ parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
|
||||
exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl)
|
||||
return (ParseOk exprs)
|
||||
|
||||
mkCallbacksMap :: Ptr PgfConcr -> [(String, Int -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap)
|
||||
parseToChart :: Concr -- ^ the language with which we parse
|
||||
-> Type -- ^ the start category
|
||||
-> String -- ^ the input sentence
|
||||
-> Double -- ^ the heuristic factor.
|
||||
-- A negative value tells the parser
|
||||
-- to lookup up the default from
|
||||
-- the grammar flags
|
||||
-> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
|
||||
-- ^ a list of callbacks for literal categories.
|
||||
-- The arguments of the callback are:
|
||||
-- the index of the constituent for the literal category;
|
||||
-- the input sentence; the current offset in the sentence.
|
||||
-- If a literal has been recognized then the output should
|
||||
-- be Just (expr,probability,end_offset)
|
||||
-> Int -- ^ the maximal number of roots
|
||||
-> ParseOutput ([FId],Map.Map FId ([(Int,Int,String)],[(Expr,[PArg],Float)],Cat))
|
||||
parseToChart lang (Type ctype touchType) sent heuristic callbacks roots =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \parsePl -> do
|
||||
do exn <- gu_new_exn parsePl
|
||||
sent <- newUtf8CString sent parsePl
|
||||
callbacks_map <- mkCallbacksMap (concr lang) callbacks parsePl
|
||||
ps <- pgf_parse_to_chart (concr lang) ctype sent heuristic callbacks_map (fromIntegral roots) exn parsePl parsePl
|
||||
touchType
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
|
||||
if is_parse_error
|
||||
then do c_err <- (#peek GuExn, data.data) exn
|
||||
c_incomplete <- (#peek PgfParseError, incomplete) c_err
|
||||
if (c_incomplete :: CInt) == 0
|
||||
then do c_offset <- (#peek PgfParseError, offset) c_err
|
||||
token_ptr <- (#peek PgfParseError, token_ptr) c_err
|
||||
token_len <- (#peek PgfParseError, token_len) c_err
|
||||
tok <- peekUtf8CStringLen token_ptr token_len
|
||||
touchConcr lang
|
||||
return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
|
||||
else do touchConcr lang
|
||||
return ParseIncomplete
|
||||
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekUtf8CString c_msg
|
||||
touchConcr lang
|
||||
throwIO (PGFError msg)
|
||||
else do touchConcr lang
|
||||
throwIO (PGFError "Parsing failed")
|
||||
else do c_roots <- pgf_get_parse_roots ps parsePl
|
||||
let get_range c_ccat = pgf_ccat_to_range ps c_ccat parsePl
|
||||
c_len <- (#peek GuSeq, len) c_roots
|
||||
chart <- peekCCats get_range Map.empty (c_len :: CSizeT) (c_roots `plusPtr` (#offset GuSeq, data))
|
||||
touchConcr lang
|
||||
return (ParseOk chart)
|
||||
where
|
||||
peekCCats get_range chart 0 ptr = return ([],chart)
|
||||
peekCCats get_range chart len ptr = do
|
||||
(root, chart) <- deRef (peekCCat get_range chart) ptr
|
||||
(roots,chart) <- peekCCats get_range chart (len-1) (ptr `plusPtr` (#size PgfCCat*))
|
||||
return (root:roots,chart)
|
||||
|
||||
peekCCat get_range chart c_ccat = do
|
||||
fid <- peekFId c_ccat
|
||||
c_total_cats <- (#peek PgfConcr, total_cats) (concr lang)
|
||||
if Map.member fid chart || fid < c_total_cats
|
||||
then return (fid,chart)
|
||||
else do c_cnccat <- (#peek PgfCCat, cnccat) c_ccat
|
||||
c_abscat <- (#peek PgfCCat, cnccat) c_cnccat
|
||||
c_name <- (#peek PgfCCat, cnccat) c_abscat
|
||||
cat <- peekUtf8CString c_name
|
||||
range <- get_range c_ccat >>= peekSequence peekRange (#size PgfParseRange)
|
||||
c_prods <- (#peek PgfCCat, prods) c_ccat
|
||||
if c_prods == nullPtr
|
||||
then do return (fid,Map.insert fid (range,[],cat) chart)
|
||||
else do c_len <- (#peek PgfCCat, n_synprods) c_ccat
|
||||
(prods,chart) <- fixIO (\res -> peekProductions (Map.insert fid (range,fst res,cat) chart)
|
||||
(fromIntegral (c_len :: CSizeT))
|
||||
(c_prods `plusPtr` (#offset GuSeq, data)))
|
||||
return (fid,chart)
|
||||
where
|
||||
peekProductions chart 0 ptr = return ([],chart)
|
||||
peekProductions chart len ptr = do
|
||||
(ps1,chart) <- deRef (peekProduction chart) ptr
|
||||
(ps2,chart) <- peekProductions chart (len-1) (ptr `plusPtr` (#size GuVariant))
|
||||
return (ps1++ps2,chart)
|
||||
|
||||
peekProduction chart p = do
|
||||
tag <- gu_variant_tag p
|
||||
dt <- gu_variant_data p
|
||||
case tag of
|
||||
(#const PGF_PRODUCTION_APPLY) -> do { c_cncfun <- (#peek PgfProductionApply, fun) dt ;
|
||||
c_absfun <- (#peek PgfCncFun, absfun) c_cncfun ;
|
||||
expr <- (#peek PgfAbsFun, ep.expr) c_absfun ;
|
||||
p <- (#peek PgfAbsFun, ep.prob) c_absfun ;
|
||||
c_args <- (#peek PgfProductionApply, args) dt ;
|
||||
c_len <- (#peek GuSeq, len) c_args ;
|
||||
(pargs,chart) <- peekPArgs chart (c_len :: CSizeT) (c_args `plusPtr` (#offset GuSeq, data)) ;
|
||||
return ([(Expr expr (touchConcr lang), pargs, p)],chart) }
|
||||
(#const PGF_PRODUCTION_COERCE) -> do { c_coerce <- (#peek PgfProductionCoerce, coerce) dt ;
|
||||
(fid,chart) <- peekCCat get_range chart c_coerce ;
|
||||
return (maybe [] snd3 (Map.lookup fid chart),chart) }
|
||||
(#const PGF_PRODUCTION_EXTERN) -> do { c_ep <- (#peek PgfProductionExtern, ep) dt ;
|
||||
expr <- (#peek PgfExprProb, expr) c_ep ;
|
||||
p <- (#peek PgfExprProb, prob) c_ep ;
|
||||
return ([(Expr expr (touchConcr lang), [], p)],chart) }
|
||||
_ -> error ("Unknown production type "++show tag++" in the grammar")
|
||||
|
||||
snd3 (_,x,_) = x
|
||||
|
||||
peekPArgs chart 0 ptr = return ([],chart)
|
||||
peekPArgs chart len ptr = do
|
||||
(a, chart) <- peekPArg chart ptr
|
||||
(as,chart) <- peekPArgs chart (len-1) (ptr `plusPtr` (#size PgfPArg))
|
||||
return (a:as,chart)
|
||||
|
||||
peekPArg chart ptr = do
|
||||
c_hypos <- (#peek PgfPArg, hypos) ptr
|
||||
hypos <- if c_hypos /= nullPtr
|
||||
then do res <- peekSequence (deRef peekFId) (#size int) c_hypos
|
||||
return [(fid,fid) | fid <- res]
|
||||
else return []
|
||||
c_ccat <- (#peek PgfPArg, ccat) ptr
|
||||
(fid,chart) <- peekCCat get_range chart c_ccat
|
||||
return (PArg hypos fid,chart)
|
||||
|
||||
peekRange ptr = do
|
||||
s <- (#peek PgfParseRange, start) ptr
|
||||
e <- (#peek PgfParseRange, end) ptr
|
||||
f <- (#peek PgfParseRange, field) ptr >>= peekCString
|
||||
return ((fromIntegral :: CSizeT -> Int) s, (fromIntegral :: CSizeT -> Int) e, f)
|
||||
|
||||
mkCallbacksMap :: Ptr PgfConcr -> [(String, String -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap)
|
||||
mkCallbacksMap concr callbacks pool = do
|
||||
callbacks_map <- pgf_new_callbacks_map concr pool
|
||||
forM_ callbacks $ \(cat,match) -> do
|
||||
@@ -1095,23 +1299,15 @@ mkCallbacksMap concr callbacks pool = do
|
||||
hspgf_callbacks_map_add_literal concr callbacks_map ccat match predict pool
|
||||
return callbacks_map
|
||||
where
|
||||
match_callback match clin_idx poffset out_pool = do
|
||||
match_callback match c_ann poffset out_pool = do
|
||||
coffset <- peek poffset
|
||||
case match (fromIntegral clin_idx) (fromIntegral coffset) of
|
||||
ann <- peekUtf8CString c_ann
|
||||
case match ann (fromIntegral coffset) of
|
||||
Nothing -> return nullPtr
|
||||
Just (e,prob,offset') -> do poke poffset (fromIntegral offset')
|
||||
|
||||
-- here we copy the expression to out_pool
|
||||
c_e <- withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
|
||||
(sb,out) <- newOut tmpPl
|
||||
let printCtxt = nullPtr
|
||||
pgf_print_expr (expr e) printCtxt 1 out exn
|
||||
c_str <- gu_string_buf_freeze sb tmpPl
|
||||
|
||||
guin <- gu_string_in c_str tmpPl
|
||||
pgf_read_expr guin out_pool tmpPl exn
|
||||
c_e <- pgf_clone_expr (expr e) out_pool
|
||||
|
||||
ep <- gu_malloc out_pool (#size PgfExprProb)
|
||||
(#poke PgfExprProb, expr) ep c_e
|
||||
@@ -1120,26 +1316,6 @@ mkCallbacksMap concr callbacks pool = do
|
||||
|
||||
predict_callback _ _ _ = return nullPtr
|
||||
|
||||
complete :: Concr -- ^ the language with which we do word completion
|
||||
-> Type -- ^ the start category
|
||||
-> String -- ^ the input sentence
|
||||
-> String -- ^ prefix for the word to be completed
|
||||
-> [(String, Cat, Fun, Float)]
|
||||
complete lang (Type ctype _) sent prefix =
|
||||
unsafePerformIO $
|
||||
do pl <- gu_new_pool
|
||||
exn <- gu_new_exn pl
|
||||
sent <- newUtf8CString sent pl
|
||||
prefix <- newUtf8CString prefix pl
|
||||
enum <- pgf_complete (concr lang) ctype sent prefix exn pl
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do gu_pool_free pl
|
||||
return []
|
||||
else do fpl <- newForeignPtr gu_pool_finalizer pl
|
||||
tokens <- fromPgfTokenEnum enum fpl
|
||||
return tokens
|
||||
|
||||
lookupSentence :: Concr -- ^ the language with which we parse
|
||||
-> Type -- ^ the start category
|
||||
-> String -- ^ the input sentence
|
||||
@@ -1158,7 +1334,7 @@ lookupSentence lang (Type ctype _) sent =
|
||||
|
||||
-- | The oracle is a triple of functions.
|
||||
-- The first two take a category name and a linearization field name
|
||||
-- and they should return True/False when the corresponding
|
||||
-- and they should return True/False when the corresponding
|
||||
-- prediction or completion is appropriate. The third function
|
||||
-- is the oracle for literals.
|
||||
type Oracle = (Maybe (Cat -> String -> Int -> Bool)
|
||||
@@ -1170,7 +1346,7 @@ parseWithOracle :: Concr -- ^ the language with which we parse
|
||||
-> Cat -- ^ the start category
|
||||
-> String -- ^ the input sentence
|
||||
-> Oracle
|
||||
-> ParseOutput
|
||||
-> ParseOutput [(Expr,Float)]
|
||||
parseWithOracle lang cat sent (predict,complete,literal) =
|
||||
unsafePerformIO $
|
||||
do parsePl <- gu_new_pool
|
||||
@@ -1246,6 +1422,67 @@ parseWithOracle lang cat sent (predict,complete,literal) =
|
||||
return ep
|
||||
Nothing -> do return nullPtr
|
||||
|
||||
-- | Returns possible completions of the current partial input.
|
||||
complete :: Concr -- ^ the language with which we parse
|
||||
-> Type -- ^ the start category
|
||||
-> String -- ^ the input sentence (excluding token being completed)
|
||||
-> String -- ^ prefix (partial token being completed)
|
||||
-> ParseOutput [(String, Fun, Cat, Float)] -- ^ (token, category, function, probability)
|
||||
complete lang (Type ctype _) sent pfx =
|
||||
unsafePerformIO $ do
|
||||
parsePl <- gu_new_pool
|
||||
exn <- gu_new_exn parsePl
|
||||
sent <- newUtf8CString sent parsePl
|
||||
pfx <- newUtf8CString pfx parsePl
|
||||
enum <- pgf_complete (concr lang) ctype sent pfx exn parsePl
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do
|
||||
is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
|
||||
if is_parse_error
|
||||
then do
|
||||
c_err <- (#peek GuExn, data.data) exn
|
||||
c_offset <- (#peek PgfParseError, offset) c_err
|
||||
token_ptr <- (#peek PgfParseError, token_ptr) c_err
|
||||
token_len <- (#peek PgfParseError, token_len) c_err
|
||||
tok <- peekUtf8CStringLen token_ptr token_len
|
||||
gu_pool_free parsePl
|
||||
return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
|
||||
else do
|
||||
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do
|
||||
c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekUtf8CString c_msg
|
||||
gu_pool_free parsePl
|
||||
throwIO (PGFError msg)
|
||||
else do
|
||||
gu_pool_free parsePl
|
||||
throwIO (PGFError "Parsing failed")
|
||||
else do
|
||||
fpl <- newForeignPtr gu_pool_finalizer parsePl
|
||||
ParseOk <$> fromCompletions enum fpl
|
||||
where
|
||||
fromCompletions :: Ptr GuEnum -> ForeignPtr GuPool -> IO [(String, Cat, Fun, Float)]
|
||||
fromCompletions enum fpl =
|
||||
withGuPool $ \tmpPl -> do
|
||||
cmpEntry <- alloca $ \ptr ->
|
||||
withForeignPtr fpl $ \pl ->
|
||||
do gu_enum_next enum ptr pl
|
||||
peek ptr
|
||||
if cmpEntry == nullPtr
|
||||
then do
|
||||
finalizeForeignPtr fpl
|
||||
touchConcr lang
|
||||
return []
|
||||
else do
|
||||
tok <- peekUtf8CString =<< (#peek PgfTokenProb, tok) cmpEntry
|
||||
cat <- peekUtf8CString =<< (#peek PgfTokenProb, cat) cmpEntry
|
||||
fun <- peekUtf8CString =<< (#peek PgfTokenProb, fun) cmpEntry
|
||||
prob <- (#peek PgfTokenProb, prob) cmpEntry
|
||||
toks <- unsafeInterleaveIO (fromCompletions enum fpl)
|
||||
return ((tok, cat, fun, prob) : toks)
|
||||
|
||||
-- | Returns True if there is a linearization defined for that function in that language
|
||||
hasLinearization :: Concr -> Fun -> Bool
|
||||
hasLinearization lang id = unsafePerformIO $
|
||||
@@ -1319,7 +1556,7 @@ linearizeAll lang e = unsafePerformIO $
|
||||
|
||||
-- | Generates a table of linearizations for an expression
|
||||
tabularLinearize :: Concr -> Expr -> [(String, String)]
|
||||
tabularLinearize lang e =
|
||||
tabularLinearize lang e =
|
||||
case tabularLinearizeAll lang e of
|
||||
(lins:_) -> lins
|
||||
_ -> []
|
||||
@@ -1331,6 +1568,7 @@ tabularLinearizeAll lang e = unsafePerformIO $
|
||||
exn <- gu_new_exn tmpPl
|
||||
cts <- pgf_lzr_concretize (concr lang) (expr e) exn tmpPl
|
||||
failed <- gu_exn_is_raised exn
|
||||
touchConcr lang
|
||||
if failed
|
||||
then throwExn exn
|
||||
else collect cts exn tmpPl
|
||||
@@ -1368,45 +1606,58 @@ tabularLinearizeAll lang e = unsafePerformIO $
|
||||
ss <- collectTable lang ctree (lin_idx+1) labels exn tmpPl
|
||||
return ((label,s):ss)
|
||||
|
||||
throwExn exn = do
|
||||
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekUtf8CString c_msg
|
||||
throwIO (PGFError msg)
|
||||
else do throwIO (PGFError "The abstract tree cannot be linearized")
|
||||
categoryFields :: Concr -> Cat -> Maybe [String]
|
||||
categoryFields lang cat =
|
||||
unsafePerformIO $ do
|
||||
withGuPool $ \tmpPl -> do
|
||||
p_n_lins <- gu_malloc tmpPl (#size size_t)
|
||||
c_cat <- newUtf8CString cat tmpPl
|
||||
c_fields <- pgf_category_fields (concr lang) c_cat p_n_lins
|
||||
if c_fields == nullPtr
|
||||
then do touchConcr lang
|
||||
return Nothing
|
||||
else do len <- peek p_n_lins
|
||||
fs <- peekFields len c_fields
|
||||
touchConcr lang
|
||||
return (Just fs)
|
||||
where
|
||||
peekFields 0 ptr = return []
|
||||
peekFields len ptr = do
|
||||
f <- peek ptr >>= peekUtf8CString
|
||||
fs <- peekFields (len-1) (ptr `plusPtr` (#size GuString))
|
||||
return (f:fs)
|
||||
|
||||
type FId = Int
|
||||
type LIndex = Int
|
||||
|
||||
-- | BracketedString represents a sentence that is linearized
|
||||
-- as usual but we also want to retain the ''brackets'' that
|
||||
-- mark the beginning and the end of each constituent.
|
||||
data BracketedString
|
||||
= Leaf String -- ^ this is the leaf i.e. a single token
|
||||
| Bracket Cat {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex Fun [BracketedString]
|
||||
| BIND -- ^ the surrounding tokens must be bound together
|
||||
| Bracket Cat {-# UNPACK #-} !FId String Fun [BracketedString]
|
||||
-- ^ this is a bracket. The 'Cat' is the category of
|
||||
-- the phrase. The 'FId' is an unique identifier for
|
||||
-- every phrase in the sentence. For context-free grammars
|
||||
-- i.e. without discontinuous constituents this identifier
|
||||
-- is also unique for every bracket. When there are discontinuous
|
||||
-- is also unique for every bracket. When there are discontinuous
|
||||
-- phrases then the identifiers are unique for every phrase but
|
||||
-- not for every bracket since the bracket represents a constituent.
|
||||
-- The different constituents could still be distinguished by using
|
||||
-- the constituent index i.e. 'LIndex'. If the grammar is reduplicating
|
||||
-- the analysis string. If the grammar is reduplicating
|
||||
-- then the constituent indices will be the same for all brackets
|
||||
-- that represents the same constituent.
|
||||
-- The 'Fun' is the name of the abstract function that generated
|
||||
-- this phrase.
|
||||
|
||||
-- | Renders the bracketed string as a string where
|
||||
-- | Renders the bracketed string as a string where
|
||||
-- the brackets are shown as @(S ...)@ where
|
||||
-- @S@ is the category.
|
||||
showBracketedString :: BracketedString -> String
|
||||
showBracketedString = render . ppBracketedString
|
||||
|
||||
ppBracketedString (Leaf t) = text t
|
||||
ppBracketedString (Bracket cat fid index _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
|
||||
ppBracketedString BIND = text "&+"
|
||||
ppBracketedString (Bracket cat fid _ _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
|
||||
|
||||
-- | Extracts the sequence of tokens from the bracketed string
|
||||
flattenBracketedString :: BracketedString -> [String]
|
||||
@@ -1415,7 +1666,7 @@ flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString
|
||||
|
||||
bracketedLinearize :: Concr -> Expr -> [BracketedString]
|
||||
bracketedLinearize lang e = unsafePerformIO $
|
||||
withGuPool $ \pl ->
|
||||
withGuPool $ \pl ->
|
||||
do exn <- gu_new_exn pl
|
||||
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
|
||||
failed <- gu_exn_is_raised exn
|
||||
@@ -1428,27 +1679,8 @@ bracketedLinearize lang e = unsafePerformIO $
|
||||
return []
|
||||
else do ctree <- pgf_lzr_wrap_linref ctree pl
|
||||
ref <- newIORef ([],[])
|
||||
allocaBytes (#size PgfLinFuncs) $ \pLinFuncs ->
|
||||
alloca $ \ppLinFuncs -> do
|
||||
fptr_symbol_token <- wrapSymbolTokenCallback (symbol_token ref)
|
||||
fptr_begin_phrase <- wrapPhraseCallback (begin_phrase ref)
|
||||
fptr_end_phrase <- wrapPhraseCallback (end_phrase ref)
|
||||
fptr_symbol_ne <- wrapSymbolNonExistCallback (symbol_ne exn)
|
||||
fptr_symbol_meta <- wrapSymbolMetaCallback (symbol_meta ref)
|
||||
(#poke PgfLinFuncs, symbol_token) pLinFuncs fptr_symbol_token
|
||||
(#poke PgfLinFuncs, begin_phrase) pLinFuncs fptr_begin_phrase
|
||||
(#poke PgfLinFuncs, end_phrase) pLinFuncs fptr_end_phrase
|
||||
(#poke PgfLinFuncs, symbol_ne) pLinFuncs fptr_symbol_ne
|
||||
(#poke PgfLinFuncs, symbol_bind) pLinFuncs nullPtr
|
||||
(#poke PgfLinFuncs, symbol_capit) pLinFuncs nullPtr
|
||||
(#poke PgfLinFuncs, symbol_meta) pLinFuncs fptr_symbol_meta
|
||||
poke ppLinFuncs pLinFuncs
|
||||
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
|
||||
freeHaskellFunPtr fptr_symbol_token
|
||||
freeHaskellFunPtr fptr_begin_phrase
|
||||
freeHaskellFunPtr fptr_end_phrase
|
||||
freeHaskellFunPtr fptr_symbol_ne
|
||||
freeHaskellFunPtr fptr_symbol_meta
|
||||
withBracketLinFuncs ref exn $ \ppLinFuncs ->
|
||||
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
|
||||
@@ -1457,41 +1689,105 @@ bracketedLinearize lang e = unsafePerformIO $
|
||||
else throwExn exn
|
||||
else do (_,bs) <- readIORef ref
|
||||
return (reverse bs)
|
||||
|
||||
bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]]
|
||||
bracketedLinearizeAll lang e = unsafePerformIO $
|
||||
withGuPool $ \pl ->
|
||||
do exn <- gu_new_exn pl
|
||||
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do touchExpr e
|
||||
throwExn exn
|
||||
else do ref <- newIORef ([],[])
|
||||
bss <- withBracketLinFuncs ref exn $ \ppLinFuncs ->
|
||||
collect ref cts ppLinFuncs exn pl
|
||||
touchExpr e
|
||||
return bss
|
||||
where
|
||||
collect ref cts ppLinFuncs exn pl = withGuPool $ \tmpPl -> do
|
||||
ctree <- alloca $ \ptr -> do gu_enum_next cts ptr tmpPl
|
||||
peek ptr
|
||||
if ctree == nullPtr
|
||||
then return []
|
||||
else do ctree <- pgf_lzr_wrap_linref ctree pl
|
||||
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
|
||||
if is_nonexist
|
||||
then collect ref cts ppLinFuncs exn pl
|
||||
else throwExn exn
|
||||
else do (_,bs) <- readIORef ref
|
||||
writeIORef ref ([],[])
|
||||
bss <- collect ref cts ppLinFuncs exn pl
|
||||
return (reverse bs : bss)
|
||||
|
||||
withBracketLinFuncs ref exn f =
|
||||
allocaBytes (#size PgfLinFuncs) $ \pLinFuncs ->
|
||||
alloca $ \ppLinFuncs -> do
|
||||
fptr_symbol_token <- wrapSymbolTokenCallback (symbol_token ref)
|
||||
fptr_begin_phrase <- wrapPhraseCallback (begin_phrase ref)
|
||||
fptr_end_phrase <- wrapPhraseCallback (end_phrase ref)
|
||||
fptr_symbol_ne <- wrapSymbolNonExistCallback (symbol_ne exn)
|
||||
fptr_symbol_bind <- wrapSymbolBindCallback (symbol_bind ref)
|
||||
fptr_symbol_meta <- wrapSymbolMetaCallback (symbol_meta ref)
|
||||
(#poke PgfLinFuncs, symbol_token) pLinFuncs fptr_symbol_token
|
||||
(#poke PgfLinFuncs, begin_phrase) pLinFuncs fptr_begin_phrase
|
||||
(#poke PgfLinFuncs, end_phrase) pLinFuncs fptr_end_phrase
|
||||
(#poke PgfLinFuncs, symbol_ne) pLinFuncs fptr_symbol_ne
|
||||
(#poke PgfLinFuncs, symbol_bind) pLinFuncs fptr_symbol_bind
|
||||
(#poke PgfLinFuncs, symbol_capit) pLinFuncs nullPtr
|
||||
(#poke PgfLinFuncs, symbol_meta) pLinFuncs fptr_symbol_meta
|
||||
poke ppLinFuncs pLinFuncs
|
||||
res <- f ppLinFuncs
|
||||
freeHaskellFunPtr fptr_symbol_token
|
||||
freeHaskellFunPtr fptr_begin_phrase
|
||||
freeHaskellFunPtr fptr_end_phrase
|
||||
freeHaskellFunPtr fptr_symbol_ne
|
||||
freeHaskellFunPtr fptr_symbol_bind
|
||||
freeHaskellFunPtr fptr_symbol_meta
|
||||
return res
|
||||
where
|
||||
symbol_token ref _ c_token = do
|
||||
(stack,bs) <- readIORef ref
|
||||
token <- peekUtf8CString c_token
|
||||
writeIORef ref (stack,Leaf token : bs)
|
||||
|
||||
begin_phrase ref _ c_cat c_fid c_lindex c_fun = do
|
||||
begin_phrase ref _ c_cat c_fid c_ann c_fun = do
|
||||
(stack,bs) <- readIORef ref
|
||||
writeIORef ref (bs:stack,[])
|
||||
|
||||
end_phrase ref _ c_cat c_fid c_lindex c_fun = do
|
||||
end_phrase ref _ c_cat c_fid c_ann c_fun = do
|
||||
(bs':stack,bs) <- readIORef ref
|
||||
if null bs
|
||||
then writeIORef ref (stack, bs')
|
||||
else do cat <- peekUtf8CString c_cat
|
||||
let fid = fromIntegral c_fid
|
||||
let lindex = fromIntegral c_lindex
|
||||
ann <- peekUtf8CString c_ann
|
||||
fun <- peekUtf8CString c_fun
|
||||
writeIORef ref (stack, Bracket cat fid lindex fun (reverse bs) : bs')
|
||||
writeIORef ref (stack, Bracket cat fid ann fun (reverse bs) : bs')
|
||||
|
||||
symbol_ne exn _ = do
|
||||
gu_exn_raise exn gu_exn_type_PgfLinNonExist
|
||||
return ()
|
||||
|
||||
symbol_bind ref _ = do
|
||||
(stack,bs) <- readIORef ref
|
||||
writeIORef ref (stack,BIND : bs)
|
||||
return ()
|
||||
|
||||
symbol_meta ref _ meta_id = do
|
||||
(stack,bs) <- readIORef ref
|
||||
writeIORef ref (stack,Leaf "?" : bs)
|
||||
|
||||
throwExn exn = do
|
||||
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekUtf8CString c_msg
|
||||
throwIO (PGFError msg)
|
||||
else do throwIO (PGFError "The abstract tree cannot be linearized")
|
||||
throwExn exn = do
|
||||
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekUtf8CString c_msg
|
||||
throwIO (PGFError msg)
|
||||
else do throwIO (PGFError "The abstract tree cannot be linearized")
|
||||
|
||||
alignWords :: Concr -> Expr -> [(String, [Int])]
|
||||
alignWords lang e = unsafePerformIO $
|
||||
@@ -1684,13 +1980,13 @@ instance Exception PGFError
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
type LiteralCallback =
|
||||
PGF -> (ConcName,Concr) -> String -> Int -> Int -> Maybe (Expr,Float,Int)
|
||||
PGF -> (ConcName,Concr) -> String -> String -> Int -> Maybe (Expr,Float,Int)
|
||||
|
||||
-- | Callbacks for the App grammar
|
||||
literalCallbacks :: [(AbsName,[(Cat,LiteralCallback)])]
|
||||
literalCallbacks = [("App",[("PN",nerc),("Symb",chunk)])]
|
||||
|
||||
-- | Named entity recognition for the App grammar
|
||||
-- | Named entity recognition for the App grammar
|
||||
-- (based on ../java/org/grammaticalframework/pgf/NercLiteralCallback.java)
|
||||
nerc :: LiteralCallback
|
||||
nerc pgf (lang,concr) sentence lin_idx offset =
|
||||
|
||||
@@ -13,7 +13,7 @@ import Data.Maybe(fromJust)
|
||||
type Cat = String -- ^ Name of syntactic category
|
||||
type Fun = String -- ^ Name of function
|
||||
|
||||
data BindType =
|
||||
data BindType =
|
||||
Explicit
|
||||
| Implicit
|
||||
deriving (Show, Eq, Ord)
|
||||
@@ -32,7 +32,7 @@ instance Show Expr where
|
||||
show = showExpr []
|
||||
|
||||
instance Eq Expr where
|
||||
(Expr e1 e1_touch) == (Expr e2 e2_touch) =
|
||||
(Expr e1 e1_touch) == (Expr e2 e2_touch) =
|
||||
unsafePerformIO $ do
|
||||
res <- pgf_expr_eq e1 e2
|
||||
e1_touch >> e2_touch
|
||||
@@ -107,9 +107,9 @@ unApp (Expr expr touch) =
|
||||
appl <- pgf_expr_unapply expr pl
|
||||
if appl == nullPtr
|
||||
then return Nothing
|
||||
else do
|
||||
else do
|
||||
fun <- peekCString =<< (#peek PgfApplication, fun) appl
|
||||
arity <- (#peek PgfApplication, n_args) appl :: IO CInt
|
||||
arity <- (#peek PgfApplication, n_args) appl :: IO CInt
|
||||
c_args <- peekArray (fromIntegral arity) (appl `plusPtr` (#offset PgfApplication, args))
|
||||
return $ Just (fun, [Expr c_arg touch | c_arg <- c_args])
|
||||
|
||||
@@ -145,7 +145,9 @@ unStr (Expr expr touch) =
|
||||
touch
|
||||
return (Just s)
|
||||
|
||||
-- | Constructs an expression from an integer literal
|
||||
-- | Constructs an expression from an integer literal.
|
||||
-- Note that the C runtime does not support long integers, and you may run into overflow issues with large values.
|
||||
-- See [here](https://github.com/GrammaticalFramework/gf-core/issues/109) for more details.
|
||||
mkInt :: Int -> Expr
|
||||
mkInt val =
|
||||
unsafePerformIO $ do
|
||||
|
||||
@@ -6,6 +6,7 @@ module PGF2.FFI where
|
||||
#include <gu/hash.h>
|
||||
#include <gu/utf8.h>
|
||||
#include <pgf/pgf.h>
|
||||
#include <pgf/data.h>
|
||||
|
||||
import Foreign ( alloca, peek, poke, peekByteOff )
|
||||
import Foreign.C
|
||||
@@ -102,7 +103,7 @@ foreign import ccall unsafe "gu/file.h gu_file_in"
|
||||
|
||||
foreign import ccall safe "gu/enum.h gu_enum_next"
|
||||
gu_enum_next :: Ptr a -> Ptr (Ptr b) -> Ptr GuPool -> IO ()
|
||||
|
||||
|
||||
foreign import ccall unsafe "gu/string.h gu_string_buf_freeze"
|
||||
gu_string_buf_freeze :: Ptr GuStringBuf -> Ptr GuPool -> IO CString
|
||||
|
||||
@@ -237,6 +238,16 @@ newSequence elem_size pokeElem values pool = do
|
||||
pokeElem ptr x
|
||||
pokeElems (ptr `plusPtr` (fromIntegral elem_size)) xs
|
||||
|
||||
type FId = Int
|
||||
data PArg = PArg [(FId,FId)] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
|
||||
|
||||
peekFId :: Ptr a -> IO FId
|
||||
peekFId c_ccat = do
|
||||
c_fid <- (#peek PgfCCat, fid) c_ccat
|
||||
return (fromIntegral (c_fid :: CInt))
|
||||
|
||||
deRef peekValue ptr = peek ptr >>= peekValue
|
||||
|
||||
------------------------------------------------------------------
|
||||
-- libpgf API
|
||||
|
||||
@@ -245,6 +256,7 @@ data PgfApplication
|
||||
data PgfConcr
|
||||
type PgfExpr = Ptr ()
|
||||
data PgfExprProb
|
||||
data PgfTokenProb
|
||||
data PgfExprParser
|
||||
data PgfFullFormEntry
|
||||
data PgfMorphoCallback
|
||||
@@ -261,6 +273,7 @@ data PgfAbsCat
|
||||
data PgfCCat
|
||||
data PgfCncFun
|
||||
data PgfProductionApply
|
||||
data PgfParsing
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_read"
|
||||
pgf_read :: CString -> Ptr GuPool -> Ptr GuExn -> IO (Ptr PgfPGF)
|
||||
@@ -310,6 +323,9 @@ foreign import ccall "pgf/pgf.h pgf_category_context"
|
||||
foreign import ccall "pgf/pgf.h pgf_category_prob"
|
||||
pgf_category_prob :: Ptr PgfPGF -> CString -> IO (#type prob_t)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_category_fields"
|
||||
pgf_category_fields :: Ptr PgfConcr -> CString -> Ptr CSize -> IO (Ptr CString)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_iter_functions"
|
||||
pgf_iter_functions :: Ptr PgfPGF -> Ptr GuMapItor -> Ptr GuExn -> IO ()
|
||||
|
||||
@@ -347,8 +363,9 @@ foreign import ccall "pgf/pgf.h pgf_lzr_get_table"
|
||||
pgf_lzr_get_table :: Ptr PgfConcr -> Ptr PgfCncTree -> Ptr CSizeT -> Ptr (Ptr CString) -> IO ()
|
||||
|
||||
type SymbolTokenCallback = Ptr (Ptr PgfLinFuncs) -> CString -> IO ()
|
||||
type PhraseCallback = Ptr (Ptr PgfLinFuncs) -> CString -> CInt -> CSizeT -> CString -> IO ()
|
||||
type PhraseCallback = Ptr (Ptr PgfLinFuncs) -> CString -> CInt -> CString -> CString -> IO ()
|
||||
type NonExistCallback = Ptr (Ptr PgfLinFuncs) -> IO ()
|
||||
type BindCallback = Ptr (Ptr PgfLinFuncs) -> IO ()
|
||||
type MetaCallback = Ptr (Ptr PgfLinFuncs) -> CInt -> IO ()
|
||||
|
||||
foreign import ccall "wrapper"
|
||||
@@ -360,27 +377,36 @@ foreign import ccall "wrapper"
|
||||
foreign import ccall "wrapper"
|
||||
wrapSymbolNonExistCallback :: NonExistCallback -> IO (FunPtr NonExistCallback)
|
||||
|
||||
foreign import ccall "wrapper"
|
||||
wrapSymbolBindCallback :: BindCallback -> IO (FunPtr BindCallback)
|
||||
|
||||
foreign import ccall "wrapper"
|
||||
wrapSymbolMetaCallback :: MetaCallback -> IO (FunPtr MetaCallback)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_align_words"
|
||||
pgf_align_words :: Ptr PgfConcr -> PgfExpr -> Ptr GuExn -> Ptr GuPool -> IO (Ptr GuSeq)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_parse_to_chart"
|
||||
pgf_parse_to_chart :: Ptr PgfConcr -> PgfType -> CString -> Double -> Ptr PgfCallbacksMap -> CSizeT -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr PgfParsing)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_get_parse_roots"
|
||||
pgf_get_parse_roots :: Ptr PgfParsing -> Ptr GuPool -> IO (Ptr GuSeq)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_ccat_to_range"
|
||||
pgf_ccat_to_range :: Ptr PgfParsing -> Ptr PgfCCat -> Ptr GuPool -> IO (Ptr GuSeq)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_parse_with_heuristics"
|
||||
pgf_parse_with_heuristics :: Ptr PgfConcr -> PgfType -> CString -> Double -> Ptr PgfCallbacksMap -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_complete"
|
||||
pgf_complete :: Ptr PgfConcr -> PgfType -> CString -> CString -> Ptr GuExn -> Ptr GuPool -> IO (Ptr GuEnum)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_lookup_sentence"
|
||||
pgf_lookup_sentence :: Ptr PgfConcr -> PgfType -> CString -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
|
||||
|
||||
type LiteralMatchCallback = CSizeT -> Ptr CSizeT -> Ptr GuPool -> IO (Ptr PgfExprProb)
|
||||
type LiteralMatchCallback = CString -> Ptr CSizeT -> Ptr GuPool -> IO (Ptr PgfExprProb)
|
||||
|
||||
foreign import ccall "wrapper"
|
||||
wrapLiteralMatchCallback :: LiteralMatchCallback -> IO (FunPtr LiteralMatchCallback)
|
||||
|
||||
type LiteralPredictCallback = CSizeT -> CString -> Ptr GuPool -> IO (Ptr PgfExprProb)
|
||||
type LiteralPredictCallback = CString -> CString -> Ptr GuPool -> IO (Ptr PgfExprProb)
|
||||
|
||||
foreign import ccall "wrapper"
|
||||
wrapLiteralPredictCallback :: LiteralPredictCallback -> IO (FunPtr LiteralPredictCallback)
|
||||
@@ -406,6 +432,9 @@ foreign import ccall
|
||||
foreign import ccall "pgf/pgf.h pgf_parse_with_oracle"
|
||||
pgf_parse_with_oracle :: Ptr PgfConcr -> CString -> CString -> Ptr PgfOracleCallback -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_complete"
|
||||
pgf_complete :: Ptr PgfConcr -> PgfType -> CString -> CString -> Ptr GuExn -> Ptr GuPool -> IO (Ptr GuEnum)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_lookup_morpho"
|
||||
pgf_lookup_morpho :: Ptr PgfConcr -> CString -> Ptr PgfMorphoCallback -> Ptr GuExn -> IO ()
|
||||
|
||||
@@ -500,9 +529,6 @@ foreign import ccall "pgf/expr.h pgf_compute"
|
||||
foreign import ccall "pgf/expr.h pgf_print_expr"
|
||||
pgf_print_expr :: PgfExpr -> Ptr PgfPrintContext -> CInt -> Ptr GuOut -> Ptr GuExn -> IO ()
|
||||
|
||||
foreign import ccall "pgf/expr.h pgf_print_expr_tuple"
|
||||
pgf_print_expr_tuple :: CSizeT -> Ptr PgfExpr -> Ptr PgfPrintContext -> Ptr GuOut -> Ptr GuExn -> IO ()
|
||||
|
||||
foreign import ccall "pgf/expr.h pgf_print_type"
|
||||
pgf_print_type :: PgfType -> Ptr PgfPrintContext -> CInt -> Ptr GuOut -> Ptr GuExn -> IO ()
|
||||
|
||||
@@ -518,12 +544,6 @@ foreign import ccall "pgf/pgf.h pgf_print"
|
||||
foreign import ccall "pgf/expr.h pgf_read_expr"
|
||||
pgf_read_expr :: Ptr GuIn -> Ptr GuPool -> Ptr GuPool -> Ptr GuExn -> IO PgfExpr
|
||||
|
||||
foreign import ccall "pgf/expr.h pgf_read_expr_tuple"
|
||||
pgf_read_expr_tuple :: Ptr GuIn -> CSizeT -> Ptr PgfExpr -> Ptr GuPool -> Ptr GuExn -> IO CInt
|
||||
|
||||
foreign import ccall "pgf/expr.h pgf_read_expr_matrix"
|
||||
pgf_read_expr_matrix :: Ptr GuIn -> CSizeT -> Ptr GuPool -> Ptr GuExn -> IO (Ptr GuSeq)
|
||||
|
||||
foreign import ccall "pgf/expr.h pgf_read_type"
|
||||
pgf_read_type :: Ptr GuIn -> Ptr GuPool -> Ptr GuPool -> Ptr GuExn -> IO PgfType
|
||||
|
||||
@@ -544,3 +564,6 @@ foreign import ccall "pgf/data.h pgf_lzr_index"
|
||||
|
||||
foreign import ccall "pgf/data.h pgf_production_is_lexical"
|
||||
pgf_production_is_lexical :: Ptr PgfProductionApply -> Ptr GuBuf -> Ptr GuPool -> IO (#type bool)
|
||||
|
||||
foreign import ccall "pgf/expr.h pgf_clone_expr"
|
||||
pgf_clone_expr :: PgfExpr -> Ptr GuPool -> IO PgfExpr
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
module PGF2.Internal(-- * Access the internal structures
|
||||
FId,isPredefFId,
|
||||
FunId,SeqId,Token,Production(..),PArg(..),Symbol(..),Literal(..),
|
||||
FunId,SeqId,LIndex,Token,Production(..),PArg(..),Symbol(..),Literal(..),
|
||||
globalFlags, abstrFlags, concrFlags,
|
||||
concrTotalCats, concrCategories, concrProductions,
|
||||
concrTotalFuns, concrFunction,
|
||||
@@ -42,7 +42,8 @@ import Control.Exception(Exception,throwIO)
|
||||
import Control.Monad(foldM,when)
|
||||
import qualified Data.Map as Map
|
||||
|
||||
type Token = String
|
||||
type Token = String
|
||||
type LIndex = Int
|
||||
data Symbol
|
||||
= SymCat {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex
|
||||
| SymLit {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex
|
||||
@@ -60,7 +61,7 @@ data Production
|
||||
= PApply {-# UNPACK #-} !FunId [PArg]
|
||||
| PCoerce {-# UNPACK #-} !FId
|
||||
deriving (Eq,Ord,Show)
|
||||
data PArg = PArg [(FId,FId)] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
|
||||
|
||||
type FunId = Int
|
||||
type SeqId = Int
|
||||
data Literal =
|
||||
@@ -229,10 +230,6 @@ concrProductions c fid = unsafePerformIO $ do
|
||||
fid <- peekFId c_ccat
|
||||
return (PArg [(fid,fid) | fid <- hypos] fid)
|
||||
|
||||
peekFId c_ccat = do
|
||||
c_fid <- (#peek PgfCCat, fid) c_ccat
|
||||
return (fromIntegral (c_fid :: CInt))
|
||||
|
||||
concrTotalFuns :: Concr -> FunId
|
||||
concrTotalFuns c = unsafePerformIO $ do
|
||||
c_cncfuns <- (#peek PgfConcr, cncfuns) (concr c)
|
||||
@@ -320,8 +317,6 @@ concrSequence c seqid = unsafePerformIO $ do
|
||||
forms <- peekForms (len-1) (ptr `plusPtr` (#size PgfAlternative))
|
||||
return ((form,prefixes):forms)
|
||||
|
||||
deRef peekValue ptr = peek ptr >>= peekValue
|
||||
|
||||
fidString, fidInt, fidFloat, fidVar, fidStart :: FId
|
||||
fidString = (-1)
|
||||
fidInt = (-2)
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
This is a binding to the new GF runtime in C.
|
||||
|
||||
The files are:
|
||||
|
||||
PGF2.hsc -- a user API similar to Python and Java APIs
|
||||
PGF2/FFI.hs -- an internal module with FFI definitions for
|
||||
-- the relevant C functions
|
||||
|
||||
HOW TO COMPILE:
|
||||
|
||||
cabal install
|
||||
|
||||
HOW TO USE:
|
||||
|
||||
- Import PGF to the Haskell program that you're writing.
|
||||
The Cabal infrastructure will make sure to tell the compiler
|
||||
where to find the relevant modules. Example:
|
||||
|
||||
module Main where
|
||||
|
||||
import PGF2
|
||||
import qualified Data.Map as Map
|
||||
|
||||
main = do
|
||||
pgf <- readPGF "Foo.pgf"
|
||||
let Just english = Map.lookup "FooEng" (languages pgf)
|
||||
56
src/runtime/haskell/README.md
Normal file
56
src/runtime/haskell/README.md
Normal file
@@ -0,0 +1,56 @@
|
||||
# PGF2
|
||||
|
||||
This is a Haskell binding to the PGF runtime written in C.
|
||||
|
||||
The exposed modules are:
|
||||
|
||||
- `PGF2`: a user API similar to Python and Java APIs
|
||||
- `PGF2.Internal`: an internal module with FFI definitions for the relevant C functions
|
||||
|
||||
## How to compile
|
||||
|
||||
**Important:** You must have the C runtime already installed and available on your system.
|
||||
See <https://github.com/GrammaticalFramework/gf-core/blob/master/src/runtime/c/INSTALL>
|
||||
|
||||
Once the runtine is installed, you can install the library to your global Cabal installation:
|
||||
|
||||
```
|
||||
cabal install pgf2 --extra-lib-dirs=/usr/local/lib
|
||||
```
|
||||
|
||||
or add it to your `stack.yaml` file:
|
||||
|
||||
```yaml
|
||||
extra-deps:
|
||||
- pgf2
|
||||
extra-lib-dirs:
|
||||
- /usr/local/lib
|
||||
```
|
||||
|
||||
## How to use
|
||||
|
||||
Simply import `PGF2` in your Haskell program.
|
||||
The Cabal infrastructure will make sure to tell the compiler where to find the relevant modules.
|
||||
|
||||
## Example
|
||||
|
||||
```haskell
|
||||
module Main where
|
||||
|
||||
import PGF2
|
||||
import qualified Data.Map as Map
|
||||
|
||||
main = do
|
||||
pgf <- readPGF "App12.pgf"
|
||||
let Just eng = Map.lookup "AppEng" (languages pgf)
|
||||
|
||||
-- Parsing
|
||||
let res = parse eng (startCat pgf) "this is a small theatre"
|
||||
let ParseOk ((tree,prob):rest) = res
|
||||
print tree
|
||||
|
||||
-- Linearisation
|
||||
let Just expr = readExpr "AdjCN (PositA red_A) (UseN theatre_N)"
|
||||
let s = linearize eng expr
|
||||
print s
|
||||
```
|
||||
@@ -1,349 +0,0 @@
|
||||
{-# LANGUAGE DeriveDataTypeable, ExistentialQuantification #-}
|
||||
|
||||
#include <pgf/pgf.h>
|
||||
#include <gu/exn.h>
|
||||
#include <sg/sg.h>
|
||||
|
||||
module SG( SG, openSG, closeSG
|
||||
, beginTrans, commit, rollback, inTransaction
|
||||
, SgId
|
||||
, insertExpr, getExpr, queryExpr
|
||||
, updateFtsIndex
|
||||
, queryLinearization
|
||||
, readTriple, showTriple
|
||||
, insertTriple, getTriple
|
||||
, queryTriple
|
||||
, query
|
||||
) where
|
||||
|
||||
import Foreign hiding (unsafePerformIO)
|
||||
import Foreign.C
|
||||
import SG.FFI
|
||||
import PGF2.FFI
|
||||
import PGF2.Expr
|
||||
|
||||
import Data.Typeable
|
||||
import Control.Exception(Exception,SomeException,catch,throwIO)
|
||||
import System.IO.Unsafe(unsafePerformIO,unsafeInterleaveIO)
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
-- Global database operations and types
|
||||
|
||||
newtype SG = SG {sg :: Ptr SgSG}
|
||||
|
||||
openSG :: FilePath -> IO SG
|
||||
openSG fpath =
|
||||
withCString fpath $ \c_fpath ->
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
sg <- sg_open c_fpath exn
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do is_errno <- gu_exn_caught exn gu_exn_type_GuErrno
|
||||
if is_errno
|
||||
then do perrno <- (#peek GuExn, data.data) exn
|
||||
errno <- peek perrno
|
||||
ioError (errnoToIOError "openSG" (Errno errno) Nothing (Just fpath))
|
||||
else do is_sgerr <- gu_exn_caught exn gu_exn_type_SgError
|
||||
if is_sgerr
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekUtf8CString c_msg
|
||||
throwIO (SGError msg)
|
||||
else throwIO (SGError "The database cannot be opened")
|
||||
else return (SG sg)
|
||||
|
||||
closeSG :: SG -> IO ()
|
||||
closeSG (SG sg) =
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
sg <- sg_close sg exn
|
||||
handle_sg_exn exn
|
||||
|
||||
beginTrans :: SG -> IO ()
|
||||
beginTrans (SG sg) =
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
sg <- sg_begin_trans sg exn
|
||||
handle_sg_exn exn
|
||||
|
||||
commit :: SG -> IO ()
|
||||
commit (SG sg) =
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
sg <- sg_commit sg exn
|
||||
handle_sg_exn exn
|
||||
|
||||
rollback :: SG -> IO ()
|
||||
rollback (SG sg) =
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
sg <- sg_rollback sg exn
|
||||
handle_sg_exn exn
|
||||
|
||||
inTransaction :: SG -> IO a -> IO a
|
||||
inTransaction sg f =
|
||||
catch (beginTrans sg >> f >>= \x -> commit sg >> return x)
|
||||
(\e -> rollback sg >> throwIO (e :: SomeException))
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
-- Expressions
|
||||
|
||||
insertExpr :: SG -> Expr -> IO SgId
|
||||
insertExpr (SG sg) (Expr expr touch) =
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
id <- sg_insert_expr sg expr 1 exn
|
||||
touch
|
||||
handle_sg_exn exn
|
||||
return id
|
||||
|
||||
getExpr :: SG -> SgId -> IO (Maybe Expr)
|
||||
getExpr (SG sg) id = do
|
||||
exprPl <- gu_new_pool
|
||||
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
c_expr <- sg_get_expr sg id exprPl exn
|
||||
handle_sg_exn exn
|
||||
if c_expr == nullPtr
|
||||
then do touchForeignPtr exprFPl
|
||||
return Nothing
|
||||
else do return $ Just (Expr c_expr (touchForeignPtr exprFPl))
|
||||
|
||||
queryExpr :: SG -> Expr -> IO [(SgId,Expr)]
|
||||
queryExpr (SG sg) (Expr query touch) =
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
res <- sg_query_expr sg query tmpPl exn
|
||||
touch
|
||||
handle_sg_exn exn
|
||||
fetchResults res exn
|
||||
where
|
||||
fetchResults res exn = do
|
||||
exprPl <- gu_new_pool
|
||||
(key,c_expr) <- alloca $ \pKey -> do
|
||||
c_expr <- sg_query_next sg res pKey exprPl exn
|
||||
key <- peek pKey
|
||||
return (key,c_expr)
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do gu_pool_free exprPl
|
||||
sg_query_close sg res exn
|
||||
handle_sg_exn exn
|
||||
return []
|
||||
else if c_expr == nullPtr
|
||||
then do gu_pool_free exprPl
|
||||
sg_query_close sg res exn
|
||||
return []
|
||||
else do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
rest <- fetchResults res exn
|
||||
return ((key,Expr c_expr (touchForeignPtr exprFPl)) : rest)
|
||||
|
||||
updateFtsIndex :: SG -> PGF -> IO ()
|
||||
updateFtsIndex (SG sg) p = do
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
sg_update_fts_index sg (pgf p) exn
|
||||
handle_sg_exn exn
|
||||
|
||||
queryLinearization :: SG -> String -> IO [Expr]
|
||||
queryLinearization (SG sg) query = do
|
||||
exprPl <- gu_new_pool
|
||||
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
(withGuPool $ \tmpPl -> do
|
||||
c_query <- newUtf8CString query tmpPl
|
||||
exn <- gu_new_exn tmpPl
|
||||
seq <- sg_query_linearization sg c_query tmpPl exn
|
||||
handle_sg_exn exn
|
||||
len <- (#peek GuSeq, len) seq
|
||||
ids <- peekArray (fromIntegral (len :: CInt)) (seq `plusPtr` (#offset GuSeq, data))
|
||||
getExprs exprFPl exprPl exn ids)
|
||||
where
|
||||
getExprs exprFPl exprPl exn [] = return []
|
||||
getExprs exprFPl exprPl exn (id:ids) = do
|
||||
c_expr <- sg_get_expr sg id exprPl exn
|
||||
handle_sg_exn exn
|
||||
if c_expr == nullPtr
|
||||
then getExprs exprFPl exprPl exn ids
|
||||
else do let e = Expr c_expr (touchForeignPtr exprFPl)
|
||||
es <- getExprs exprFPl exprPl exn ids
|
||||
return (e:es)
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
-- Triples
|
||||
|
||||
readTriple :: String -> Maybe (Expr,Expr,Expr)
|
||||
readTriple str =
|
||||
unsafePerformIO $
|
||||
do exprPl <- gu_new_pool
|
||||
withGuPool $ \tmpPl ->
|
||||
withTriple $ \triple ->
|
||||
do c_str <- newUtf8CString str tmpPl
|
||||
guin <- gu_string_in c_str tmpPl
|
||||
exn <- gu_new_exn tmpPl
|
||||
ok <- pgf_read_expr_tuple guin 3 triple exprPl exn
|
||||
status <- gu_exn_is_raised exn
|
||||
if (ok == 1 && not status)
|
||||
then do c_expr1 <- peekElemOff triple 0
|
||||
c_expr2 <- peekElemOff triple 1
|
||||
c_expr3 <- peekElemOff triple 2
|
||||
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
let touch = touchForeignPtr exprFPl
|
||||
return $ Just (Expr c_expr1 touch,Expr c_expr2 touch,Expr c_expr3 touch)
|
||||
else do gu_pool_free exprPl
|
||||
return Nothing
|
||||
|
||||
showTriple :: Expr -> Expr -> Expr -> String
|
||||
showTriple (Expr expr1 touch1) (Expr expr2 touch2) (Expr expr3 touch3) =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \tmpPl ->
|
||||
withTriple $ \triple -> do
|
||||
(sb,out) <- newOut tmpPl
|
||||
let printCtxt = nullPtr
|
||||
exn <- gu_new_exn tmpPl
|
||||
pokeElemOff triple 0 expr1
|
||||
pokeElemOff triple 1 expr2
|
||||
pokeElemOff triple 2 expr3
|
||||
pgf_print_expr_tuple 3 triple printCtxt out exn
|
||||
touch1 >> touch2 >> touch3
|
||||
s <- gu_string_buf_freeze sb tmpPl
|
||||
peekUtf8CString s
|
||||
|
||||
insertTriple :: SG -> Expr -> Expr -> Expr -> IO SgId
|
||||
insertTriple (SG sg) (Expr expr1 touch1) (Expr expr2 touch2) (Expr expr3 touch3) =
|
||||
withGuPool $ \tmpPl ->
|
||||
withTriple $ \triple -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
pokeElemOff triple 0 expr1
|
||||
pokeElemOff triple 1 expr2
|
||||
pokeElemOff triple 2 expr3
|
||||
id <- sg_insert_triple sg triple exn
|
||||
touch1 >> touch2 >> touch3
|
||||
handle_sg_exn exn
|
||||
return id
|
||||
|
||||
getTriple :: SG -> SgId -> IO (Maybe (Expr,Expr,Expr))
|
||||
getTriple (SG sg) id = do
|
||||
exprPl <- gu_new_pool
|
||||
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
let touch = touchForeignPtr exprFPl
|
||||
withGuPool $ \tmpPl ->
|
||||
withTriple $ \triple -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
res <- sg_get_triple sg id triple exprPl exn
|
||||
handle_sg_exn exn
|
||||
if res /= 0
|
||||
then do c_expr1 <- peekElemOff triple 0
|
||||
c_expr2 <- peekElemOff triple 1
|
||||
c_expr3 <- peekElemOff triple 2
|
||||
return (Just (Expr c_expr1 touch
|
||||
,Expr c_expr2 touch
|
||||
,Expr c_expr3 touch
|
||||
))
|
||||
else do touch
|
||||
return Nothing
|
||||
|
||||
queryTriple :: SG -> Maybe Expr -> Maybe Expr -> Maybe Expr -> IO [(SgId,Expr,Expr,Expr)]
|
||||
queryTriple (SG sg) mb_expr1 mb_expr2 mb_expr3 =
|
||||
withGuPool $ \tmpPl ->
|
||||
withTriple $ \triple -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
pokeElemOff triple 0 (toCExpr mb_expr1)
|
||||
pokeElemOff triple 1 (toCExpr mb_expr2)
|
||||
pokeElemOff triple 2 (toCExpr mb_expr3)
|
||||
res <- sg_query_triple sg triple exn
|
||||
handle_sg_exn exn
|
||||
unsafeInterleaveIO (fetchResults res)
|
||||
where
|
||||
toCExpr Nothing = nullPtr
|
||||
toCExpr (Just (Expr expr _)) = expr
|
||||
|
||||
fromCExpr c_expr touch Nothing = Expr c_expr touch
|
||||
fromCExpr c_expr touch (Just e) = e
|
||||
|
||||
fetchResults res = do
|
||||
exprPl <- gu_new_pool
|
||||
alloca $ \pKey ->
|
||||
withGuPool $ \tmpPl ->
|
||||
withTriple $ \triple -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
r <- sg_triple_result_fetch res pKey triple exprPl exn
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do gu_pool_free exprPl
|
||||
sg_triple_result_close res exn
|
||||
handle_sg_exn exn
|
||||
return []
|
||||
else if r == 0
|
||||
then do gu_pool_free exprPl
|
||||
sg_triple_result_close res exn
|
||||
return []
|
||||
else do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
let touch = touchForeignPtr exprFPl
|
||||
c_expr1 <- peekElemOff triple 0
|
||||
c_expr2 <- peekElemOff triple 1
|
||||
c_expr3 <- peekElemOff triple 2
|
||||
key <- peek pKey
|
||||
rest <- unsafeInterleaveIO (fetchResults res)
|
||||
return ((key,fromCExpr c_expr1 touch mb_expr1
|
||||
,fromCExpr c_expr2 touch mb_expr2
|
||||
,fromCExpr c_expr3 touch mb_expr3) : rest)
|
||||
|
||||
|
||||
query :: SG -> String -> IO [[Expr]]
|
||||
query (SG sg) str =
|
||||
withGuPool $ \tmpPl ->
|
||||
do c_str <- newUtf8CString str tmpPl
|
||||
guin <- gu_string_in c_str tmpPl
|
||||
exn <- gu_new_exn tmpPl
|
||||
seq <- pgf_read_expr_matrix guin 3 tmpPl exn
|
||||
if seq /= nullPtr
|
||||
then do count <- (#peek GuSeq, len) seq
|
||||
q <- sg_query sg (count `div` 3) (seq `plusPtr` (#offset GuSeq, data)) exn
|
||||
handle_sg_exn exn
|
||||
n_cols <- sg_query_result_columns q
|
||||
unsafeInterleaveIO (fetchResults q n_cols)
|
||||
else return []
|
||||
where
|
||||
fetchResults q n_cols =
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
pExprs <- gu_malloc tmpPl ((#size PgfExpr) * n_cols)
|
||||
exprPl <- gu_new_pool
|
||||
res <- sg_query_result_fetch q pExprs exprPl exn
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do gu_pool_free exprPl
|
||||
sg_query_result_close q exn
|
||||
handle_sg_exn exn
|
||||
return []
|
||||
else if res /= 0
|
||||
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
let touch = touchForeignPtr exprFPl
|
||||
row <- fmap (map (flip Expr touch)) $ peekArray (fromIntegral n_cols) pExprs
|
||||
rows <- unsafeInterleaveIO (fetchResults q n_cols)
|
||||
return (row:rows)
|
||||
else do gu_pool_free exprPl
|
||||
sg_query_result_close q exn
|
||||
return []
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
-- Exceptions
|
||||
|
||||
newtype SGError = SGError String
|
||||
deriving (Show, Typeable)
|
||||
|
||||
instance Exception SGError
|
||||
|
||||
handle_sg_exn exn = do
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do is_sgerr <- gu_exn_caught exn gu_exn_type_SgError
|
||||
if is_sgerr
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekUtf8CString c_msg
|
||||
throwIO (SGError msg)
|
||||
else throwIO (SGError "Unknown database error")
|
||||
else return ()
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
@@ -1,84 +0,0 @@
|
||||
{-# LANGUAGE ForeignFunctionInterface, MagicHash #-}
|
||||
module SG.FFI where
|
||||
|
||||
import Foreign
|
||||
import Foreign.C
|
||||
import PGF2.FFI
|
||||
import GHC.Ptr
|
||||
import Data.Int
|
||||
|
||||
data SgSG
|
||||
data SgQueryExprResult
|
||||
data SgTripleResult
|
||||
data SgQueryResult
|
||||
type SgId = Int64
|
||||
|
||||
foreign import ccall "sg/sg.h sg_open"
|
||||
sg_open :: CString -> Ptr GuExn -> IO (Ptr SgSG)
|
||||
|
||||
foreign import ccall "sg/sg.h sg_close"
|
||||
sg_close :: Ptr SgSG -> Ptr GuExn -> IO ()
|
||||
|
||||
foreign import ccall "sg/sg.h sg_begin_trans"
|
||||
sg_begin_trans :: Ptr SgSG -> Ptr GuExn -> IO ()
|
||||
|
||||
foreign import ccall "sg/sg.h sg_commit"
|
||||
sg_commit :: Ptr SgSG -> Ptr GuExn -> IO ()
|
||||
|
||||
foreign import ccall "sg/sg.h sg_rollback"
|
||||
sg_rollback :: Ptr SgSG -> Ptr GuExn -> IO ()
|
||||
|
||||
foreign import ccall "sg/sg.h sg_insert_expr"
|
||||
sg_insert_expr :: Ptr SgSG -> PgfExpr -> CInt -> Ptr GuExn -> IO SgId
|
||||
|
||||
foreign import ccall "sg/sg.h sg_get_expr"
|
||||
sg_get_expr :: Ptr SgSG -> SgId -> Ptr GuPool -> Ptr GuExn -> IO PgfExpr
|
||||
|
||||
foreign import ccall "sg/sg.h sg_query_expr"
|
||||
sg_query_expr :: Ptr SgSG -> PgfExpr -> Ptr GuPool -> Ptr GuExn -> IO (Ptr SgQueryExprResult)
|
||||
|
||||
foreign import ccall "sg/sg.h sg_query_next"
|
||||
sg_query_next :: Ptr SgSG -> Ptr SgQueryExprResult -> Ptr SgId -> Ptr GuPool -> Ptr GuExn -> IO PgfExpr
|
||||
|
||||
foreign import ccall "sg/sg.h sg_query_close"
|
||||
sg_query_close :: Ptr SgSG -> Ptr SgQueryExprResult -> Ptr GuExn -> IO ()
|
||||
|
||||
foreign import ccall "sg/sg.h sg_update_fts_index"
|
||||
sg_update_fts_index :: Ptr SgSG -> Ptr PgfPGF -> Ptr GuExn -> IO ()
|
||||
|
||||
foreign import ccall "sg/sg.h sg_query_linearization"
|
||||
sg_query_linearization :: Ptr SgSG -> CString -> Ptr GuPool -> Ptr GuExn -> IO (Ptr GuSeq)
|
||||
|
||||
foreign import ccall "sg/sg.h sg_insert_triple"
|
||||
sg_insert_triple :: Ptr SgSG -> SgTriple -> Ptr GuExn -> IO SgId
|
||||
|
||||
foreign import ccall "sg/sg.h sg_get_triple"
|
||||
sg_get_triple :: Ptr SgSG -> SgId -> SgTriple -> Ptr GuPool -> Ptr GuExn -> IO CInt
|
||||
|
||||
foreign import ccall "sg/sg.h sg_query_triple"
|
||||
sg_query_triple :: Ptr SgSG -> SgTriple -> Ptr GuExn -> IO (Ptr SgTripleResult)
|
||||
|
||||
foreign import ccall "sg/sg.h sg_triple_result_fetch"
|
||||
sg_triple_result_fetch :: Ptr SgTripleResult -> Ptr SgId -> SgTriple -> Ptr GuPool -> Ptr GuExn -> IO CInt
|
||||
|
||||
foreign import ccall "sg/sg.h sg_triple_result_close"
|
||||
sg_triple_result_close :: Ptr SgTripleResult -> Ptr GuExn -> IO ()
|
||||
|
||||
foreign import ccall "sg/sg.h sg_query"
|
||||
sg_query :: Ptr SgSG -> CSizeT -> Ptr PgfExpr -> Ptr GuExn -> IO (Ptr SgQueryResult)
|
||||
|
||||
foreign import ccall "sg/sg.h sg_query_result_columns"
|
||||
sg_query_result_columns :: Ptr SgQueryResult -> IO CSizeT
|
||||
|
||||
foreign import ccall "sg/sg.h sg_query_result_fetch"
|
||||
sg_query_result_fetch :: Ptr SgQueryResult -> Ptr PgfExpr -> Ptr GuPool -> Ptr GuExn -> IO CInt
|
||||
|
||||
foreign import ccall "sg/sg.h sg_query_result_close"
|
||||
sg_query_result_close :: Ptr SgQueryResult -> Ptr GuExn -> IO ()
|
||||
|
||||
type SgTriple = Ptr PgfExpr
|
||||
|
||||
withTriple :: (SgTriple -> IO a) -> IO a
|
||||
withTriple = allocaArray 3
|
||||
|
||||
gu_exn_type_SgError = Ptr "SgError"# :: CString
|
||||
@@ -14,10 +14,10 @@ extra-source-files: README
|
||||
cabal-version: >=1.10
|
||||
|
||||
library
|
||||
exposed-modules: PGF2, PGF2.Internal, SG,
|
||||
exposed-modules: PGF2, PGF2.Internal,
|
||||
-- backwards compatibility API:
|
||||
PGF
|
||||
other-modules: PGF2.FFI, PGF2.Expr, PGF2.Type, SG.FFI
|
||||
other-modules: PGF2.FFI, PGF2.Expr, PGF2.Type
|
||||
build-depends: base >=4.3, containers, pretty, array, random
|
||||
-- hs-source-dirs:
|
||||
default-language: Haskell2010
|
||||
@@ -27,11 +27,3 @@ library
|
||||
cc-options: -std=c99
|
||||
default-language: Haskell2010
|
||||
c-sources: utils.c
|
||||
|
||||
executable pgf-shell
|
||||
main-is: pgf-shell.hs
|
||||
hs-source-dirs: examples
|
||||
build-depends: base, pgf2, containers, mtl, lifted-base
|
||||
default-language: Haskell2010
|
||||
if impl(ghc>=7.0)
|
||||
ghc-options: -rtsopts
|
||||
|
||||
3
src/runtime/haskell/stack-ghc7.10.3.yaml
Normal file
3
src/runtime/haskell/stack-ghc7.10.3.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
resolver: lts-6.35 # ghc 7.10.3
|
||||
|
||||
allow-newer: true
|
||||
1
src/runtime/haskell/stack-ghc8.0.2.yaml
Normal file
1
src/runtime/haskell/stack-ghc8.0.2.yaml
Normal file
@@ -0,0 +1 @@
|
||||
resolver: lts-9.21 # ghc 8.0.2
|
||||
1
src/runtime/haskell/stack-ghc8.10.4.yaml
Normal file
1
src/runtime/haskell/stack-ghc8.10.4.yaml
Normal file
@@ -0,0 +1 @@
|
||||
resolver: lts-18.0 # ghc 8.10.4
|
||||
31
src/runtime/haskell/stack-haddock-upload.sh
Executable file
31
src/runtime/haskell/stack-haddock-upload.sh
Executable file
@@ -0,0 +1,31 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Author: Dimitri Sabadie <dimitri.sabadie@gmail.com>
|
||||
# 2015
|
||||
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "Usage: ./stack-haddock-upload.sh NAME VERSION"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
dist=`stack path --dist-dir --stack-yaml ./stack.yaml 2> /dev/null`
|
||||
|
||||
echo -e "\033[1;36mGenerating documentation...\033[0m"
|
||||
stack haddock 2> /dev/null
|
||||
|
||||
if [ "$?" -eq "0" ]; then
|
||||
docdir=$dist/doc/html
|
||||
cd $docdir || exit
|
||||
doc=$1-$2-docs
|
||||
echo -e "Compressing documentation from \033[1;34m$docdir\033[0m for \033[1;35m$1\033[0m-\033[1;33m$2\033[1;30m"
|
||||
cp -r $1 $doc
|
||||
tar -c -v -z --format=ustar -f $doc.tar.gz $doc
|
||||
echo -e "\033[1;32mUploading to Hackage...\033[0m"
|
||||
read -p "Hackage username: " username
|
||||
read -p "Hackage password: " -s password
|
||||
echo ""
|
||||
curl -X PUT -H 'Content-Type: application/x-tar' -H 'Content-Encoding: gzip' --data-binary "@$doc.tar.gz" "https://$username:$password@hackage.haskell.org/package/$1-$2/docs"
|
||||
exit $?
|
||||
else
|
||||
echo -e "\033[1;31mNot in a stack-powered project\033[0m"
|
||||
fi
|
||||
3
src/runtime/haskell/stack.yaml
Normal file
3
src/runtime/haskell/stack.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
# This is mainly here so that I can run `stack sdist` for uploading to Hackage
|
||||
|
||||
resolver: lts-12.26 # ghc 8.4.4
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
typedef struct {
|
||||
PgfLiteralCallback callback;
|
||||
PgfExprProb* (*match)(size_t lin_idx, size_t* poffset,
|
||||
PgfExprProb* (*match)(GuString ann, size_t* poffset,
|
||||
GuPool *out_pool);
|
||||
GuFinalizer fin;
|
||||
} HSPgfLiteralCallback;
|
||||
@@ -37,7 +37,7 @@ hspgf_hs2offset(GuString sentence, size_t hs_offset)
|
||||
|
||||
static PgfExprProb*
|
||||
hspgf_match_callback(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
size_t lin_idx,
|
||||
GuString ann,
|
||||
GuString sentence, size_t* poffset,
|
||||
GuPool *out_pool)
|
||||
{
|
||||
@@ -46,7 +46,7 @@ hspgf_match_callback(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
size_t hs_offset =
|
||||
hspgf_offset2hs(sentence, *poffset);
|
||||
PgfExprProb* ep =
|
||||
callback->match(lin_idx, &hs_offset, out_pool);
|
||||
callback->match(ann, &hs_offset, out_pool);
|
||||
*poffset = hspgf_hs2offset(sentence, hs_offset);
|
||||
|
||||
return ep;
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
INSTALL_PATH = /usr/local
|
||||
|
||||
C_SOURCES = jpgf.c jsg.c jni_utils.c
|
||||
JAVA_SOURCES = $(wildcard org/grammaticalframework/pgf/*.java) \
|
||||
$(wildcard org/grammaticalframework/sg/*.java)
|
||||
C_SOURCES = jpgf.c jni_utils.c
|
||||
JAVA_SOURCES = $(wildcard org/grammaticalframework/pgf/*.java)
|
||||
|
||||
JNI_INCLUDES = $(if $(wildcard /usr/lib/jvm/default-java/include/.*), -I/usr/lib/jvm/default-java/include -I/usr/lib/jvm/default-java/include/linux, \
|
||||
$(if $(wildcard /usr/lib/jvm/java-1.11.0-openjdk-amd64/include/.*), -I/usr/lib/jvm/java-1.11.0-openjdk-amd64/include/ -I/usr/lib/jvm/java-1.11.0-openjdk-amd64/include/linux, \
|
||||
@@ -28,13 +27,13 @@ LIBTOOL = $(if $(shell command -v glibtool 2>/dev/null), glibtool, libtool) --t
|
||||
all: libjpgf.la jpgf.jar
|
||||
|
||||
libjpgf.la: $(patsubst %.c, %.lo, $(C_SOURCES))
|
||||
$(LIBTOOL) --mode=link $(GCC) $(CFLAGS) -g -O -o libjpgf.la -shared $^ -rpath $(INSTALL_PATH)/lib -lgu -lpgf -lsg $(WINDOWS_LDFLAGS)
|
||||
$(LIBTOOL) --mode=link $(GCC) $(CFLAGS) -g -O -o libjpgf.la -shared $^ -rpath $(INSTALL_PATH)/lib -lgu -lpgf $(WINDOWS_LDFLAGS)
|
||||
|
||||
%.lo : %.c
|
||||
$(LIBTOOL) --mode=compile $(GCC) $(CFLAGS) -g -O -c $(JNI_INCLUDES) $(WINDOWS_CCFLAGS) -std=c99 -shared $< -o $@
|
||||
|
||||
jpgf.jar: $(patsubst %.java, %.class, $(JAVA_SOURCES))
|
||||
jar -cf $@ org/grammaticalframework/pgf/*.class org/grammaticalframework/sg/*.class
|
||||
jar -cf $@ org/grammaticalframework/pgf/*.class
|
||||
|
||||
%.class : %.java
|
||||
javac $<
|
||||
@@ -45,7 +44,7 @@ install: libjpgf.la jpgf.jar
|
||||
|
||||
|
||||
doc:
|
||||
javadoc org.grammaticalframework.pgf org.grammaticalframework.sg -d java-api
|
||||
javadoc org.grammaticalframework.pgf -d java-api
|
||||
|
||||
clean:
|
||||
rm -f *.lo
|
||||
|
||||
@@ -456,7 +456,7 @@ typedef struct {
|
||||
|
||||
static PgfExprProb*
|
||||
jpgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
size_t lin_idx,
|
||||
GuString ann,
|
||||
GuString sentence, size_t* poffset,
|
||||
GuPool *out_pool)
|
||||
{
|
||||
@@ -465,8 +465,9 @@ jpgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
JNIEnv *env;
|
||||
(*cachedJVM)->AttachCurrentThread(cachedJVM, (void**)&env, NULL);
|
||||
|
||||
size_t joffset = gu2j_string_offset(sentence, *poffset);
|
||||
jobject result = (*env)->CallObjectMethod(env, callback->jcallback, callback->match_methodId, lin_idx, joffset);
|
||||
jstring jann = gu2j_string(env, ann);
|
||||
size_t joffset = gu2j_string_offset(sentence, *poffset);
|
||||
jobject result = (*env)->CallObjectMethod(env, callback->jcallback, callback->match_methodId, jann, joffset);
|
||||
if (result == NULL)
|
||||
return NULL;
|
||||
|
||||
@@ -485,39 +486,8 @@ jpgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
|
||||
PgfExprProb* ep = gu_new(PgfExprProb, out_pool);
|
||||
ep->expr = gu_variant_from_ptr(get_ref(env, jexpr));
|
||||
ep->expr = pgf_clone_expr(ep->expr, out_pool);
|
||||
ep->prob = prob;
|
||||
|
||||
|
||||
{
|
||||
// This is an uggly hack. We first show the expression ep->expr
|
||||
// and then we read it back but in out_pool. The whole purpose
|
||||
// of this is to copy the expression from the temporary pool
|
||||
// that was created in the Java binding to the parser pool.
|
||||
// There should be a real copying function or even better
|
||||
// there must be a way to avoid copying at all.
|
||||
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
|
||||
GuExn* err = gu_exn(tmp_pool);
|
||||
GuStringBuf* sbuf = gu_new_string_buf(tmp_pool);
|
||||
GuOut* out = gu_string_buf_out(sbuf);
|
||||
|
||||
pgf_print_expr(ep->expr, NULL, 0, out, err);
|
||||
|
||||
GuString str = gu_string_buf_data(sbuf);
|
||||
size_t len = gu_string_buf_length(sbuf);
|
||||
GuIn* in = gu_data_in((uint8_t*) str, len, tmp_pool);
|
||||
|
||||
ep->expr = pgf_read_expr(in, out_pool, tmp_pool, err);
|
||||
if (!gu_ok(err) || gu_variant_is_null(ep->expr)) {
|
||||
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", "The expression cannot be parsed");
|
||||
gu_pool_free(tmp_pool);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
}
|
||||
|
||||
return ep;
|
||||
}
|
||||
|
||||
@@ -534,7 +504,7 @@ jpgf_token_prob_enum_fin(GuFinalizer* self)
|
||||
|
||||
static GuEnum*
|
||||
jpgf_literal_callback_predict(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
size_t lin_idx,
|
||||
GuString ann,
|
||||
GuString prefix,
|
||||
GuPool *out_pool)
|
||||
{
|
||||
@@ -543,8 +513,9 @@ jpgf_literal_callback_predict(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
JNIEnv *env;
|
||||
(*cachedJVM)->AttachCurrentThread(cachedJVM, (void**)&env, NULL);
|
||||
|
||||
jstring jann = gu2j_string(env, ann);
|
||||
jstring jprefix = gu2j_string(env, prefix);
|
||||
jobject jiterator = (*env)->CallObjectMethod(env, callback->jcallback, callback->predict_methodId, lin_idx, jprefix);
|
||||
jobject jiterator = (*env)->CallObjectMethod(env, callback->jcallback, callback->predict_methodId, jann, jprefix);
|
||||
if (jiterator == NULL)
|
||||
return NULL;
|
||||
|
||||
@@ -582,8 +553,8 @@ JNIEXPORT void JNICALL Java_org_grammaticalframework_pgf_Parser_addLiteralCallba
|
||||
callback->fin.fn = jpgf_literal_callback_fin;
|
||||
|
||||
jclass callback_class = (*env)->GetObjectClass(env, jcallback);
|
||||
callback->match_methodId = (*env)->GetMethodID(env, callback_class, "match", "(II)Lorg/grammaticalframework/pgf/LiteralCallback$CallbackResult;");
|
||||
callback->predict_methodId = (*env)->GetMethodID(env, callback_class, "predict", "(ILjava/lang/String;)Ljava/util/Iterator;");
|
||||
callback->match_methodId = (*env)->GetMethodID(env, callback_class, "match", "(Ljava/lang/String;I)Lorg/grammaticalframework/pgf/LiteralCallback$CallbackResult;");
|
||||
callback->predict_methodId = (*env)->GetMethodID(env, callback_class, "predict", "(Ljava/lang/String;Ljava/lang/String;)Ljava/util/Iterator;");
|
||||
|
||||
gu_pool_finally(pool, &callback->fin);
|
||||
|
||||
@@ -964,7 +935,7 @@ pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
|
||||
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
|
||||
{
|
||||
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
|
||||
|
||||
@@ -973,7 +944,7 @@ pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t li
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
|
||||
pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
|
||||
{
|
||||
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
|
||||
JNIEnv* env = state->env;
|
||||
@@ -983,6 +954,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lind
|
||||
if (gu_buf_length(state->list) > 0) {
|
||||
jstring jcat = gu2j_string(env, cat);
|
||||
jstring jfun = gu2j_string(env, fun);
|
||||
jstring jann = gu2j_string(env, ann);
|
||||
|
||||
size_t len = gu_buf_length(state->list);
|
||||
jobjectArray jchildren = (*env)->NewObjectArray(env, len, state->object_class, NULL);
|
||||
@@ -998,10 +970,11 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lind
|
||||
jcat,
|
||||
jfun,
|
||||
fid,
|
||||
lindex,
|
||||
jann,
|
||||
jchildren);
|
||||
|
||||
(*env)->DeleteLocalRef(env, jchildren);
|
||||
(*env)->DeleteLocalRef(env, jann);
|
||||
(*env)->DeleteLocalRef(env, jfun);
|
||||
(*env)->DeleteLocalRef(env, jcat);
|
||||
|
||||
@@ -1051,7 +1024,7 @@ Java_org_grammaticalframework_pgf_Concr_bracketedLinearize(JNIEnv* env, jobject
|
||||
jclass bracket_class = (*env)->FindClass(env, "org/grammaticalframework/pgf/Bracket");
|
||||
if (!bracket_class)
|
||||
return NULL;
|
||||
jmethodID bracket_constrId = (*env)->GetMethodID(env, bracket_class, "<init>", "(Ljava/lang/String;Ljava/lang/String;II[Ljava/lang/Object;)V");
|
||||
jmethodID bracket_constrId = (*env)->GetMethodID(env, bracket_class, "<init>", "(Ljava/lang/String;Ljava/lang/String;ILjava/lang/String;[Ljava/lang/Object;)V");
|
||||
if (!bracket_constrId)
|
||||
return NULL;
|
||||
|
||||
|
||||
@@ -1,339 +0,0 @@
|
||||
#include <jni.h>
|
||||
#include <sg/sg.h>
|
||||
#include <pgf/expr.h>
|
||||
#include <pgf/linearizer.h>
|
||||
#include "jni_utils.h"
|
||||
|
||||
JNIEXPORT jobject JNICALL
|
||||
Java_org_grammaticalframework_sg_SG_openSG(JNIEnv *env, jclass cls, jstring path)
|
||||
{
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
|
||||
// Create an exception frame that catches all errors.
|
||||
GuExn* err = gu_exn(tmp_pool);
|
||||
|
||||
const char *fpath = (*env)->GetStringUTFChars(env, path, 0);
|
||||
|
||||
// Read the PGF grammar.
|
||||
SgSG* sg = sg_open(fpath, err);
|
||||
|
||||
(*env)->ReleaseStringUTFChars(env, path, fpath);
|
||||
|
||||
if (!gu_ok(err)) {
|
||||
GuString msg;
|
||||
if (gu_exn_caught(err, SgError)) {
|
||||
msg = (GuString) gu_exn_caught_data(err);
|
||||
} else {
|
||||
msg = "The database cannot be opened";
|
||||
}
|
||||
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
|
||||
gu_pool_free(tmp_pool);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
jmethodID constrId = (*env)->GetMethodID(env, cls, "<init>", "(J)V");
|
||||
return (*env)->NewObject(env, cls, constrId, p2l(sg));
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL
|
||||
Java_org_grammaticalframework_sg_SG_close(JNIEnv *env, jobject self)
|
||||
{
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
|
||||
// Create an exception frame that catches all errors.
|
||||
GuExn* err = gu_exn(tmp_pool);
|
||||
|
||||
sg_close(get_ref(env, self), err);
|
||||
if (!gu_ok(err)) {
|
||||
GuString msg;
|
||||
if (gu_exn_caught(err, SgError)) {
|
||||
msg = (GuString) gu_exn_caught_data(err);
|
||||
} else {
|
||||
msg = "The database cannot be closed";
|
||||
}
|
||||
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
|
||||
gu_pool_free(tmp_pool);
|
||||
return;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
}
|
||||
|
||||
JNIEXPORT jobjectArray JNICALL
|
||||
Java_org_grammaticalframework_sg_SG_readTriple(JNIEnv *env, jclass cls, jstring s)
|
||||
{
|
||||
GuPool* pool = gu_new_pool();
|
||||
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
GuString buf = j2gu_string(env, s, tmp_pool);
|
||||
GuIn* in = gu_data_in((uint8_t*) buf, strlen(buf), tmp_pool);
|
||||
GuExn* err = gu_exn(tmp_pool);
|
||||
|
||||
const int len = 3;
|
||||
|
||||
PgfExpr exprs[len];
|
||||
int res = pgf_read_expr_tuple(in, 3, exprs, pool, err);
|
||||
if (!gu_ok(err) || res == 0) {
|
||||
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", "The expression cannot be parsed");
|
||||
gu_pool_free(tmp_pool);
|
||||
gu_pool_free(pool);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
jclass pool_class = (*env)->FindClass(env, "org/grammaticalframework/pgf/Pool");
|
||||
jmethodID pool_constrId = (*env)->GetMethodID(env, pool_class, "<init>", "(J)V");
|
||||
jobject jpool = (*env)->NewObject(env, pool_class, pool_constrId, p2l(pool));
|
||||
|
||||
jclass expr_class = (*env)->FindClass(env, "org/grammaticalframework/pgf/Expr");
|
||||
jmethodID expr_constrId = (*env)->GetMethodID(env, expr_class, "<init>", "(Lorg/grammaticalframework/pgf/Pool;Ljava/lang/Object;J)V");
|
||||
|
||||
jobjectArray array = (*env)->NewObjectArray(env, len, expr_class, NULL);
|
||||
for (int i = 0; i < len; i++) {
|
||||
jobject obj = (*env)->NewObject(env, expr_class, expr_constrId, jpool, NULL, p2l(gu_variant_to_ptr(exprs[i])));
|
||||
(*env)->SetObjectArrayElement(env, array, i, obj);
|
||||
(*env)->DeleteLocalRef(env, obj);
|
||||
}
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
JNIEXPORT jobject JNICALL
|
||||
Java_org_grammaticalframework_sg_SG_queryTriple(JNIEnv *env, jobject self,
|
||||
jobject jsubj,
|
||||
jobject jpred,
|
||||
jobject jobj)
|
||||
{
|
||||
SgSG *sg = get_ref(env, self);
|
||||
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
GuExn* err = gu_exn(tmp_pool);
|
||||
|
||||
SgTriple triple;
|
||||
triple[0] = (jsubj == NULL) ? gu_null_variant
|
||||
: gu_variant_from_ptr((void*) get_ref(env, jsubj));
|
||||
triple[1] = (jpred == NULL) ? gu_null_variant
|
||||
: gu_variant_from_ptr((void*) get_ref(env, jpred));
|
||||
triple[2] = (jobj == NULL) ? gu_null_variant
|
||||
: gu_variant_from_ptr((void*) get_ref(env, jobj));
|
||||
|
||||
SgTripleResult* res = sg_query_triple(sg, triple, err);
|
||||
if (!gu_ok(err)) {
|
||||
GuString msg;
|
||||
if (gu_exn_caught(err, SgError)) {
|
||||
msg = (GuString) gu_exn_caught_data(err);
|
||||
} else {
|
||||
msg = "The query failed";
|
||||
}
|
||||
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
|
||||
gu_pool_free(tmp_pool);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
jclass res_class = (*env)->FindClass(env, "org/grammaticalframework/sg/TripleResult");
|
||||
jmethodID constrId = (*env)->GetMethodID(env, res_class, "<init>", "(JLorg/grammaticalframework/pgf/Expr;Lorg/grammaticalframework/pgf/Expr;Lorg/grammaticalframework/pgf/Expr;)V");
|
||||
jobject jres = (*env)->NewObject(env, res_class, constrId, p2l(res), jsubj, jpred, jobj);
|
||||
|
||||
return jres;
|
||||
}
|
||||
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_org_grammaticalframework_sg_TripleResult_hasNext(JNIEnv *env, jobject self)
|
||||
{
|
||||
SgTripleResult *res = get_ref(env, self);
|
||||
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
GuPool* out_pool = gu_new_pool();
|
||||
GuExn* err = gu_exn(tmp_pool);
|
||||
|
||||
SgId key;
|
||||
SgTriple triple;
|
||||
int r = sg_triple_result_fetch(res, &key, triple, out_pool, err);
|
||||
if (!gu_ok(err)) {
|
||||
GuString msg;
|
||||
if (gu_exn_caught(err, SgError)) {
|
||||
msg = (GuString) gu_exn_caught_data(err);
|
||||
} else {
|
||||
msg = "The fetch failed";
|
||||
}
|
||||
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
|
||||
gu_pool_free(out_pool);
|
||||
gu_pool_free(tmp_pool);
|
||||
return JNI_FALSE;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
if (r) {
|
||||
SgTriple orig_triple;
|
||||
sg_triple_result_get_query(res, orig_triple);
|
||||
|
||||
jclass pool_class = (*env)->FindClass(env, "org/grammaticalframework/pgf/Pool");
|
||||
jmethodID pool_constrId = (*env)->GetMethodID(env, pool_class, "<init>", "(J)V");
|
||||
jobject jpool = (*env)->NewObject(env, pool_class, pool_constrId, p2l(out_pool));
|
||||
|
||||
jclass expr_class = (*env)->FindClass(env, "org/grammaticalframework/pgf/Expr");
|
||||
jmethodID constrId = (*env)->GetMethodID(env, expr_class, "<init>", "(Lorg/grammaticalframework/pgf/Pool;Ljava/lang/Object;J)V");
|
||||
|
||||
jclass result_class = (*env)->GetObjectClass(env, self);
|
||||
|
||||
jfieldID keyId = (*env)->GetFieldID(env, result_class, "key", "J");
|
||||
(*env)->SetLongField(env, self, keyId, key);
|
||||
|
||||
if (triple[0] != orig_triple[0]) {
|
||||
jfieldID subjId = (*env)->GetFieldID(env, result_class, "subj", "Lorg/grammaticalframework/pgf/Expr;");
|
||||
jobject jsubj = (*env)->NewObject(env, expr_class, constrId, jpool, jpool, p2l(gu_variant_to_ptr(triple[0])));
|
||||
(*env)->SetObjectField(env, self, subjId, jsubj);
|
||||
}
|
||||
|
||||
if (triple[1] != orig_triple[1]) {
|
||||
jfieldID predId = (*env)->GetFieldID(env, result_class, "pred", "Lorg/grammaticalframework/pgf/Expr;");
|
||||
jobject jpred = (*env)->NewObject(env, expr_class, constrId, jpool, jpool, p2l(gu_variant_to_ptr(triple[1])));
|
||||
(*env)->SetObjectField(env, self, predId, jpred);
|
||||
}
|
||||
|
||||
if (triple[2] != orig_triple[2]) {
|
||||
jfieldID objId = (*env)->GetFieldID(env, result_class, "obj", "Lorg/grammaticalframework/pgf/Expr;");
|
||||
jobject jobj = (*env)->NewObject(env, expr_class, constrId, jpool, jpool, p2l(gu_variant_to_ptr(triple[2])));
|
||||
(*env)->SetObjectField(env, self, objId, jobj);
|
||||
}
|
||||
|
||||
return JNI_TRUE;
|
||||
} else {
|
||||
gu_pool_free(out_pool);
|
||||
return JNI_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL
|
||||
Java_org_grammaticalframework_sg_TripleResult_close(JNIEnv *env, jobject self)
|
||||
{
|
||||
SgTripleResult *res = get_ref(env, self);
|
||||
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
GuExn* err = gu_exn(tmp_pool);
|
||||
|
||||
sg_triple_result_close(res, err);
|
||||
if (!gu_ok(err)) {
|
||||
GuString msg;
|
||||
if (gu_exn_caught(err, SgError)) {
|
||||
msg = (GuString) gu_exn_caught_data(err);
|
||||
} else {
|
||||
msg = "Closing the result failed";
|
||||
}
|
||||
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL
|
||||
Java_org_grammaticalframework_sg_SG_beginTrans(JNIEnv *env, jobject self)
|
||||
{
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
|
||||
// Create an exception frame that catches all errors.
|
||||
GuExn* err = gu_exn(tmp_pool);
|
||||
|
||||
sg_begin_trans(get_ref(env, self), err);
|
||||
if (!gu_ok(err)) {
|
||||
GuString msg;
|
||||
if (gu_exn_caught(err, SgError)) {
|
||||
msg = (GuString) gu_exn_caught_data(err);
|
||||
} else {
|
||||
msg = "The transaction cannot be started";
|
||||
}
|
||||
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
|
||||
gu_pool_free(tmp_pool);
|
||||
return;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL
|
||||
Java_org_grammaticalframework_sg_SG_commit(JNIEnv *env, jobject self)
|
||||
{
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
|
||||
// Create an exception frame that catches all errors.
|
||||
GuExn* err = gu_exn(tmp_pool);
|
||||
|
||||
sg_commit(get_ref(env, self), err);
|
||||
if (!gu_ok(err)) {
|
||||
GuString msg;
|
||||
if (gu_exn_caught(err, SgError)) {
|
||||
msg = (GuString) gu_exn_caught_data(err);
|
||||
} else {
|
||||
msg = "The transaction cannot be commited";
|
||||
}
|
||||
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
|
||||
gu_pool_free(tmp_pool);
|
||||
return;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL
|
||||
Java_org_grammaticalframework_sg_SG_rollback(JNIEnv *env, jobject self)
|
||||
{
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
|
||||
// Create an exception frame that catches all errors.
|
||||
GuExn* err = gu_exn(tmp_pool);
|
||||
|
||||
sg_rollback(get_ref(env, self), err);
|
||||
if (!gu_ok(err)) {
|
||||
GuString msg;
|
||||
if (gu_exn_caught(err, SgError)) {
|
||||
msg = (GuString) gu_exn_caught_data(err);
|
||||
} else {
|
||||
msg = "The transaction cannot be rolled back";
|
||||
}
|
||||
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
|
||||
gu_pool_free(tmp_pool);
|
||||
return;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_org_grammaticalframework_sg_SG_insertTriple(JNIEnv *env, jobject self,
|
||||
jobject jsubj,
|
||||
jobject jpred,
|
||||
jobject jobj)
|
||||
{
|
||||
SgSG *sg = get_ref(env, self);
|
||||
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
GuExn* err = gu_exn(tmp_pool);
|
||||
|
||||
SgTriple triple;
|
||||
triple[0] = gu_variant_from_ptr((void*) get_ref(env, jsubj));
|
||||
triple[1] = gu_variant_from_ptr((void*) get_ref(env, jpred));
|
||||
triple[2] = gu_variant_from_ptr((void*) get_ref(env, jobj));
|
||||
|
||||
SgId id = sg_insert_triple(sg, triple, err);
|
||||
if (!gu_ok(err)) {
|
||||
GuString msg;
|
||||
if (gu_exn_caught(err, SgError)) {
|
||||
msg = (GuString) gu_exn_caught_data(err);
|
||||
} else {
|
||||
msg = "The insertion failed";
|
||||
}
|
||||
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
|
||||
gu_pool_free(tmp_pool);
|
||||
return 0;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
return id;
|
||||
}
|
||||
@@ -14,18 +14,18 @@ public class Bracket {
|
||||
* where they all will have the same id */
|
||||
public final int fid;
|
||||
|
||||
public final int lindex;
|
||||
public final String ann;
|
||||
|
||||
/** The children of the bracket. Every element is either a string
|
||||
* if this is a leaf in the parse tree, or a {@link Bracket} object.
|
||||
*/
|
||||
public final Object[] children;
|
||||
|
||||
public Bracket(String cat, String fun, int fid, int lindex, Object[] children) {
|
||||
public Bracket(String cat, String fun, int fid, String ann, Object[] children) {
|
||||
this.cat = cat;
|
||||
this.fun = fun;
|
||||
this.fid = fid;
|
||||
this.lindex = lindex;
|
||||
this.ann = ann;
|
||||
this.children = children;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,9 +3,9 @@ package org.grammaticalframework.pgf;
|
||||
import java.util.Iterator;
|
||||
|
||||
public interface LiteralCallback {
|
||||
public CallbackResult match(int lin_idx, int start_offset);
|
||||
public CallbackResult match(String ann, int start_offset);
|
||||
|
||||
public Iterator<TokenProb> predict(int lin_idx, String prefix);
|
||||
public Iterator<TokenProb> predict(String ann, String prefix);
|
||||
|
||||
public static class CallbackResult {
|
||||
private ExprProb ep;
|
||||
|
||||
@@ -19,7 +19,7 @@ public class NercLiteralCallback implements LiteralCallback {
|
||||
this.sentence = sentence;
|
||||
}
|
||||
|
||||
public CallbackResult match(int lin_idx, int offset) {
|
||||
public CallbackResult match(String ann, int offset) {
|
||||
StringBuilder sbuilder = new StringBuilder();
|
||||
|
||||
int i = 0;
|
||||
@@ -83,7 +83,7 @@ public class NercLiteralCallback implements LiteralCallback {
|
||||
return null;
|
||||
}
|
||||
|
||||
public Iterator<TokenProb> predict(int lin_idx, String prefix) {
|
||||
public Iterator<TokenProb> predict(String ann, String prefix) {
|
||||
return Collections.<TokenProb>emptyList().iterator();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ public class UnknownLiteralCallback implements LiteralCallback {
|
||||
this.sentence = sentence;
|
||||
}
|
||||
|
||||
public CallbackResult match(int lin_idx, int offset) {
|
||||
public CallbackResult match(String ann, int offset) {
|
||||
if (offset < sentence.length() &&
|
||||
!Character.isUpperCase(sentence.charAt(offset))) {
|
||||
int start_offset = offset;
|
||||
@@ -35,7 +35,7 @@ public class UnknownLiteralCallback implements LiteralCallback {
|
||||
return null;
|
||||
}
|
||||
|
||||
public Iterator<TokenProb> predict(int lin_idx, String prefix) {
|
||||
public Iterator<TokenProb> predict(String ann, String prefix) {
|
||||
return Collections.<TokenProb>emptyList().iterator();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
package org.grammaticalframework.sg;
|
||||
|
||||
import java.io.Closeable;
|
||||
import org.grammaticalframework.pgf.*;
|
||||
|
||||
/** This class represents a connection to a semantic graph database.
|
||||
* The semantic graph is a graph represented as a set of tripples
|
||||
* of abstract expressions. The graph can be used for instance to store
|
||||
* semantic information for entities in a GF grammar.
|
||||
*/
|
||||
public class SG implements Closeable {
|
||||
/** Opens a new database file. */
|
||||
public static native SG openSG(String path) throws SGError;
|
||||
|
||||
/** Closes an already opened database. */
|
||||
public native void close() throws SGError;
|
||||
|
||||
/** Reads a triple in the format <expr,expr,expr> and returns it as an array. */
|
||||
public static native Expr[] readTriple(String s) throws PGFError;
|
||||
|
||||
/** Simple triple queries.
|
||||
* Each of the arguments subj, pred and obj could be null.
|
||||
* A null argument is interpreted as a wild card.
|
||||
* If one of the arguments is not null then only triples with matching values
|
||||
* will be retrieved.
|
||||
*
|
||||
* @return An iterator over the matching triples.
|
||||
*/
|
||||
public native TripleResult queryTriple(Expr subj, Expr pred, Expr obj) throws SGError;
|
||||
|
||||
/** Starts a new transaction. */
|
||||
public native void beginTrans() throws SGError;
|
||||
|
||||
/** Commits the transaction. */
|
||||
public native void commit() throws SGError;
|
||||
|
||||
/** Rollbacks all changes made in the current transaction. */
|
||||
public native void rollback() throws SGError;
|
||||
|
||||
/** Inserts a new triple.
|
||||
* @return an unique id that identifies this triple in the database
|
||||
*/
|
||||
public native long insertTriple(Expr subj, Expr pred, Expr obj) throws SGError;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// private stuff
|
||||
private long ref;
|
||||
|
||||
private SG(long ref) {
|
||||
this.ref = ref;
|
||||
}
|
||||
|
||||
static {
|
||||
System.loadLibrary("jpgf");
|
||||
}
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
package org.grammaticalframework.sg;
|
||||
|
||||
/** This exception is thrown if an error occurs in the semantic graph.
|
||||
*/
|
||||
public class SGError extends RuntimeException {
|
||||
private static final long serialVersionUID = -6098784400143861939L;
|
||||
|
||||
public SGError(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
||||
@@ -1,53 +0,0 @@
|
||||
package org.grammaticalframework.sg;
|
||||
|
||||
import java.io.Closeable;
|
||||
import org.grammaticalframework.pgf.Expr;
|
||||
|
||||
/** This class is used to iterate over a list of triples.
|
||||
* To move to the next triple, call {@link TripleResult#hasNext}.
|
||||
* When you do not need the iterator anymore then call {@link TripleResult#close}
|
||||
* to release the allocated resources.
|
||||
*/
|
||||
public class TripleResult implements Closeable {
|
||||
public native boolean hasNext();
|
||||
|
||||
/** Closes the iterator and releases the allocated resources. */
|
||||
public native void close();
|
||||
|
||||
/** Each triple has an unique integer key. You can get the key for
|
||||
* the current triple by calling {@link TripleResult#getKey}.
|
||||
*/
|
||||
public long getKey() {
|
||||
return key;
|
||||
}
|
||||
|
||||
/** This is the first element of the current triple. */
|
||||
public Expr getSubject() {
|
||||
return subj;
|
||||
}
|
||||
|
||||
/** This is the second element of the current triple. */
|
||||
public Expr getPredicate() {
|
||||
return pred;
|
||||
}
|
||||
|
||||
/** This is the third element of the current triple. */
|
||||
public Expr getObject() {
|
||||
return obj;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// private stuff
|
||||
private long ref;
|
||||
private long key;
|
||||
private Expr subj;
|
||||
private Expr pred;
|
||||
private Expr obj;
|
||||
|
||||
private TripleResult(long ref, Expr subj, Expr pred, Expr obj) {
|
||||
this.ref = ref;
|
||||
this.subj = subj;
|
||||
this.pred = pred;
|
||||
this.obj = obj;
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <gu/mem.h>
|
||||
#include <gu/map.h>
|
||||
#include <gu/file.h>
|
||||
#include <gu/utf8.h>
|
||||
#include <pgf/pgf.h>
|
||||
#include <pgf/linearizer.h>
|
||||
|
||||
@@ -1307,8 +1308,8 @@ static PyObject*
|
||||
Concr_printName(ConcrObject* self, PyObject *args)
|
||||
{
|
||||
GuString id;
|
||||
if (!PyArg_ParseTuple(args, "s", &id))
|
||||
return NULL;
|
||||
if (!PyArg_ParseTuple(args, "s", &id))
|
||||
return NULL;
|
||||
|
||||
GuString name = pgf_print_name(self->concr, id);
|
||||
if (name == NULL)
|
||||
@@ -1346,9 +1347,42 @@ typedef struct {
|
||||
GuFinalizer fin;
|
||||
} PyPgfLiteralCallback;
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
static size_t
|
||||
utf8_to_unicode_offset(GuString sentence, size_t offset)
|
||||
{
|
||||
const uint8_t* start = (uint8_t*) sentence;
|
||||
const uint8_t* end = start+offset;
|
||||
|
||||
size_t chars = 0;
|
||||
while (start < end) {
|
||||
gu_utf8_decode(&start);
|
||||
chars++;
|
||||
}
|
||||
|
||||
return chars;
|
||||
}
|
||||
|
||||
static size_t
|
||||
unicode_to_utf8_offset(GuString sentence, size_t chars)
|
||||
{
|
||||
const uint8_t* start = (uint8_t*) sentence;
|
||||
const uint8_t* end = start;
|
||||
|
||||
while (chars > 0) {
|
||||
GuUCS ucs = gu_utf8_decode(&end);
|
||||
if (ucs == 0)
|
||||
break;
|
||||
chars--;
|
||||
}
|
||||
|
||||
return (end-start);
|
||||
}
|
||||
#endif
|
||||
|
||||
static PgfExprProb*
|
||||
pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
size_t lin_idx,
|
||||
GuString ann,
|
||||
GuString sentence, size_t* poffset,
|
||||
GuPool *out_pool)
|
||||
{
|
||||
@@ -1356,10 +1390,18 @@ pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
gu_container(self, PyPgfLiteralCallback, callback);
|
||||
|
||||
PyObject* result =
|
||||
PyObject_CallFunction(callback->pycallback, "ii",
|
||||
lin_idx, *poffset);
|
||||
if (result == NULL)
|
||||
PyObject_CallFunction(callback->pycallback, "si",
|
||||
ann,
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
utf8_to_unicode_offset(sentence, *poffset)
|
||||
#else
|
||||
*poffset
|
||||
#endif
|
||||
);
|
||||
if (result == NULL) {
|
||||
PyErr_Print();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (result == Py_None) {
|
||||
Py_DECREF(result);
|
||||
@@ -1369,40 +1411,17 @@ pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
PgfExprProb* ep = gu_new(PgfExprProb, out_pool);
|
||||
|
||||
ExprObject* pyexpr;
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
int chars;
|
||||
if (!PyArg_ParseTuple(result, "Ofi", &pyexpr, &ep->prob, &chars))
|
||||
return NULL;
|
||||
*poffset = unicode_to_utf8_offset(sentence, chars);
|
||||
#else
|
||||
if (!PyArg_ParseTuple(result, "Ofi", &pyexpr, &ep->prob, poffset))
|
||||
return NULL;
|
||||
#endif
|
||||
|
||||
ep->expr = pyexpr->expr;
|
||||
|
||||
{
|
||||
// This is an uggly hack. We first show the expression ep->expr
|
||||
// and then we read it back but in out_pool. The whole purpose
|
||||
// of this is to copy the expression from the temporary pool
|
||||
// that was created in the Java binding to the parser pool.
|
||||
// There should be a real copying function or even better
|
||||
// there must be a way to avoid copying at all.
|
||||
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
|
||||
GuExn* err = gu_exn(tmp_pool);
|
||||
GuStringBuf* sbuf = gu_new_string_buf(tmp_pool);
|
||||
GuOut* out = gu_string_buf_out(sbuf);
|
||||
|
||||
pgf_print_expr(ep->expr, NULL, 0, out, err);
|
||||
|
||||
GuIn* in = gu_data_in((uint8_t*) gu_string_buf_data(sbuf),
|
||||
gu_string_buf_length(sbuf),
|
||||
tmp_pool);
|
||||
|
||||
ep->expr = pgf_read_expr(in, out_pool, tmp_pool, err);
|
||||
if (!gu_ok(err) || gu_variant_is_null(ep->expr)) {
|
||||
PyErr_SetString(PGFError, "The expression cannot be parsed");
|
||||
gu_pool_free(tmp_pool);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
}
|
||||
ep->expr = pgf_clone_expr(pyexpr->expr, out_pool);
|
||||
|
||||
Py_DECREF(result);
|
||||
|
||||
@@ -1411,7 +1430,7 @@ pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
|
||||
static GuEnum*
|
||||
pypgf_literal_callback_predict(PgfLiteralCallback* self, PgfConcr* concr,
|
||||
size_t lin_idx,
|
||||
GuString ann,
|
||||
GuString prefix,
|
||||
GuPool *out_pool)
|
||||
{
|
||||
@@ -1490,7 +1509,7 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
int max_count = -1;
|
||||
double heuristics = -1;
|
||||
PyObject* py_callbacks = NULL;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|OidO!", kwlist,
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|OidO!", kwlist,
|
||||
&sentence, &start, &max_count,
|
||||
&heuristics,
|
||||
&PyList_Type, &py_callbacks))
|
||||
@@ -1586,10 +1605,10 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
PyObject* start = NULL;
|
||||
GuString prefix = "";
|
||||
int max_count = -1;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|Osi", kwlist,
|
||||
&sentence, &start,
|
||||
&prefix, &max_count))
|
||||
return NULL;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|Osi", kwlist,
|
||||
&sentence, &start,
|
||||
&prefix, &max_count))
|
||||
return NULL;
|
||||
|
||||
IterObject* pyres = (IterObject*)
|
||||
pgf_IterType.tp_alloc(&pgf_IterType, 0);
|
||||
@@ -1681,9 +1700,9 @@ Concr_lookupSentence(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
const char *sentence = NULL;
|
||||
PyObject* start = NULL;
|
||||
int max_count = -1;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|O", kwlist,
|
||||
&sentence, &start, &max_count))
|
||||
return NULL;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|O", kwlist,
|
||||
&sentence, &start, &max_count))
|
||||
return NULL;
|
||||
|
||||
IterObject* pyres = (IterObject*)
|
||||
pgf_IterType.tp_alloc(&pgf_IterType, 0);
|
||||
@@ -1934,7 +1953,7 @@ typedef struct {
|
||||
PyObject_HEAD
|
||||
PyObject* cat;
|
||||
int fid;
|
||||
int lindex;
|
||||
PyObject* ann;
|
||||
PyObject* fun;
|
||||
PyObject* children;
|
||||
} BracketObject;
|
||||
@@ -2009,8 +2028,8 @@ static PyMemberDef Bracket_members[] = {
|
||||
"the abstract function for this bracket"},
|
||||
{"fid", T_INT, offsetof(BracketObject, fid), 0,
|
||||
"an id which identifies this bracket in the bracketed string. If there are discontinuous phrases this id will be shared for all brackets belonging to the same phrase."},
|
||||
{"lindex", T_INT, offsetof(BracketObject, lindex), 0,
|
||||
"the constituent index"},
|
||||
{"ann", T_OBJECT_EX, offsetof(BracketObject, ann), 0,
|
||||
"the analysis of the constituent"},
|
||||
{"children", T_OBJECT_EX, offsetof(BracketObject, children), 0,
|
||||
"a list with the children of this bracket"},
|
||||
{NULL} /* Sentinel */
|
||||
@@ -2058,6 +2077,58 @@ static PyTypeObject pgf_BracketType = {
|
||||
0, /*tp_new */
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
} BINDObject;
|
||||
|
||||
static PyObject *
|
||||
BIND_repr(BINDObject *self)
|
||||
{
|
||||
return PyString_FromString("&+");
|
||||
}
|
||||
|
||||
static PyTypeObject pgf_BINDType = {
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
//0, /*ob_size*/
|
||||
"pgf.BIND", /*tp_name*/
|
||||
sizeof(BINDObject), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
0, /*tp_dealloc*/
|
||||
0, /*tp_print*/
|
||||
0, /*tp_getattr*/
|
||||
0, /*tp_setattr*/
|
||||
0, /*tp_compare*/
|
||||
0, /*tp_repr*/
|
||||
0, /*tp_as_number*/
|
||||
0, /*tp_as_sequence*/
|
||||
0, /*tp_as_mapping*/
|
||||
0, /*tp_hash */
|
||||
0, /*tp_call*/
|
||||
(reprfunc) BIND_repr, /*tp_str*/
|
||||
0, /*tp_getattro*/
|
||||
0, /*tp_setattro*/
|
||||
0, /*tp_as_buffer*/
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||
"a marker for BIND in a bracketed string", /*tp_doc*/
|
||||
0, /*tp_traverse */
|
||||
0, /*tp_clear */
|
||||
0, /*tp_richcompare */
|
||||
0, /*tp_weaklistoffset */
|
||||
0, /*tp_iter */
|
||||
0, /*tp_iternext */
|
||||
0, /*tp_methods */
|
||||
0, /*tp_members */
|
||||
0, /*tp_getset */
|
||||
0, /*tp_base */
|
||||
0, /*tp_dict */
|
||||
0, /*tp_descr_get */
|
||||
0, /*tp_descr_set */
|
||||
0, /*tp_dictoffset */
|
||||
0, /*tp_init */
|
||||
0, /*tp_alloc */
|
||||
0, /*tp_new */
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
PgfLinFuncs* funcs;
|
||||
GuBuf* stack;
|
||||
@@ -2075,7 +2146,7 @@ pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
|
||||
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
|
||||
{
|
||||
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
|
||||
|
||||
@@ -2084,7 +2155,7 @@ pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t li
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
|
||||
pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
|
||||
{
|
||||
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
|
||||
|
||||
@@ -2096,7 +2167,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lind
|
||||
if (bracket != NULL) {
|
||||
bracket->cat = PyString_FromString(cat);
|
||||
bracket->fid = fid;
|
||||
bracket->lindex = lindex;
|
||||
bracket->ann = PyString_FromString(ann);
|
||||
bracket->fun = PyString_FromString(fun);
|
||||
bracket->children = state->list;
|
||||
PyList_Append(parent, (PyObject*) bracket);
|
||||
@@ -2109,6 +2180,16 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lind
|
||||
state->list = parent;
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_bracket_lzn_symbol_bind(PgfLinFuncs** funcs)
|
||||
{
|
||||
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
|
||||
|
||||
PyObject* bind = pgf_BINDType.tp_alloc(&pgf_BINDType, 0);
|
||||
PyList_Append(state->list, bind);
|
||||
Py_DECREF(bind);
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_bracket_lzn_symbol_meta(PgfLinFuncs** funcs, PgfMetaId meta_id)
|
||||
{
|
||||
@@ -2120,7 +2201,7 @@ static PgfLinFuncs pgf_bracket_lin_funcs = {
|
||||
.begin_phrase = pgf_bracket_lzn_begin_phrase,
|
||||
.end_phrase = pgf_bracket_lzn_end_phrase,
|
||||
.symbol_ne = NULL,
|
||||
.symbol_bind = NULL,
|
||||
.symbol_bind = pgf_bracket_lzn_symbol_bind,
|
||||
.symbol_capit = NULL,
|
||||
.symbol_meta = pgf_bracket_lzn_symbol_meta
|
||||
};
|
||||
@@ -2349,8 +2430,8 @@ pypgf_collect_morpho(PgfMorphoCallback* self,
|
||||
static PyObject*
|
||||
Concr_lookupMorpho(ConcrObject* self, PyObject *args) {
|
||||
GuString sent;
|
||||
if (!PyArg_ParseTuple(args, "s", &sent))
|
||||
return NULL;
|
||||
if (!PyArg_ParseTuple(args, "s", &sent))
|
||||
return NULL;
|
||||
|
||||
GuPool *tmp_pool = gu_local_pool();
|
||||
GuExn* err = gu_exn(tmp_pool);
|
||||
@@ -2375,6 +2456,129 @@ Concr_lookupMorpho(ConcrObject* self, PyObject *args) {
|
||||
return analyses;
|
||||
}
|
||||
|
||||
#define PGF_MORPHOCALLBACK_NAME "pgf.MorphoCallback"
|
||||
|
||||
static void
|
||||
pypgf_morphocallback_destructor(PyObject *capsule)
|
||||
{
|
||||
PyMorphoCallback* callback =
|
||||
PyCapsule_GetPointer(capsule, PGF_MORPHOCALLBACK_NAME);
|
||||
Py_XDECREF(callback->analyses);
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
Iter_fetch_cohort(IterObject* self)
|
||||
{
|
||||
PgfCohortRange range =
|
||||
gu_next(self->res, PgfCohortRange, self->pool);
|
||||
if (range.buf == NULL)
|
||||
return NULL;
|
||||
|
||||
PyObject* py_start = PyLong_FromSize_t(range.start.pos);
|
||||
if (py_start == NULL)
|
||||
return NULL;
|
||||
PyObject* py_end = PyLong_FromSize_t(range.end.pos);
|
||||
if (py_end == NULL) {
|
||||
Py_DECREF(py_start);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyMorphoCallback* callback =
|
||||
PyCapsule_GetPointer(PyTuple_GetItem(self->container, 0),
|
||||
PGF_MORPHOCALLBACK_NAME);
|
||||
|
||||
PyObject* py_slice =
|
||||
PySlice_New(py_start, py_end, NULL);
|
||||
if (py_slice == NULL) {
|
||||
Py_DECREF(py_start);
|
||||
Py_DECREF(py_end);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject* py_w =
|
||||
PyObject_GetItem(PyTuple_GetItem(self->container, 1), py_slice);
|
||||
|
||||
PyObject* res =
|
||||
PyTuple_Pack(4, py_start, py_w, callback->analyses, py_end);
|
||||
|
||||
Py_DECREF(callback->analyses);
|
||||
callback->analyses = PyList_New(0);
|
||||
|
||||
Py_DECREF(py_w);
|
||||
Py_DECREF(py_slice);
|
||||
Py_DECREF(py_end);
|
||||
Py_DECREF(py_start);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
Concr_lookupCohorts(ConcrObject* self, PyObject *args)
|
||||
{
|
||||
PyObject* py_sent = NULL;
|
||||
if (!PyArg_ParseTuple(args, "U", &py_sent))
|
||||
return NULL;
|
||||
|
||||
IterObject* pyres = (IterObject*)
|
||||
pgf_IterType.tp_alloc(&pgf_IterType, 0);
|
||||
if (pyres == NULL)
|
||||
return NULL;
|
||||
|
||||
pyres->pool = gu_new_pool();
|
||||
pyres->source = (PyObject*) self->grammar;
|
||||
Py_XINCREF(pyres->source);
|
||||
|
||||
PyMorphoCallback* callback = gu_new(PyMorphoCallback,pyres->pool);
|
||||
callback->fn.callback = pypgf_collect_morpho;
|
||||
callback->analyses = PyList_New(0);
|
||||
PyObject* capsule =
|
||||
PyCapsule_New(callback, PGF_MORPHOCALLBACK_NAME,
|
||||
pypgf_morphocallback_destructor);
|
||||
if (capsule == NULL) {
|
||||
Py_DECREF(pyres);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
PyObject* bytes = PyUnicode_AsUTF8String(py_sent);
|
||||
if (!bytes)
|
||||
return NULL;
|
||||
GuString sent = PyBytes_AsString(bytes);
|
||||
if (!sent) {
|
||||
Py_DECREF(bytes);
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
GuString sent = PyString_AsString(py_sent);
|
||||
if (!sent)
|
||||
return NULL;
|
||||
#endif
|
||||
|
||||
|
||||
pyres->container =
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
PyTuple_Pack(3, capsule, py_sent, bytes);
|
||||
Py_DECREF(bytes);
|
||||
#else
|
||||
PyTuple_Pack(2, capsule, py_sent);
|
||||
#endif
|
||||
pyres->max_count = -1;
|
||||
pyres->counter = 0;
|
||||
pyres->fetch = Iter_fetch_cohort;
|
||||
|
||||
Py_DECREF(capsule);
|
||||
|
||||
GuExn* err = gu_new_exn(pyres->pool);
|
||||
pyres->res = pgf_lookup_cohorts(self->concr, sent,
|
||||
&callback->fn, pyres->pool, err);
|
||||
if (pyres->res == NULL) {
|
||||
Py_DECREF(pyres);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return (PyObject*) pyres;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
Iter_fetch_fullform(IterObject* self)
|
||||
{
|
||||
@@ -2445,9 +2649,9 @@ Concr_fullFormLexicon(ConcrObject* self, PyObject *args)
|
||||
static PyObject*
|
||||
Concr_load(ConcrObject* self, PyObject *args)
|
||||
{
|
||||
const char *fpath;
|
||||
if (!PyArg_ParseTuple(args, "s", &fpath))
|
||||
return NULL;
|
||||
const char *fpath;
|
||||
if (!PyArg_ParseTuple(args, "s", &fpath))
|
||||
return NULL;
|
||||
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
|
||||
@@ -2517,7 +2721,7 @@ static PyMethodDef Concr_methods[] = {
|
||||
{"parse", (PyCFunction)Concr_parse, METH_VARARGS | METH_KEYWORDS,
|
||||
"Parses a string and returns an iterator over the abstract trees for this sentence\n\n"
|
||||
"Named arguments:\n"
|
||||
"- sentence (string) or tokens (list of strings)\n"
|
||||
"- sentence (string)\n"
|
||||
"- cat (string); OPTIONAL, default: the startcat of the grammar\n"
|
||||
"- n (int), max. trees; OPTIONAL, default: extract all trees\n"
|
||||
"- heuristics (double >= 0.0); OPTIONAL, default: taken from the flags in the grammar\n"
|
||||
@@ -2560,6 +2764,9 @@ static PyMethodDef Concr_methods[] = {
|
||||
{"lookupMorpho", (PyCFunction)Concr_lookupMorpho, METH_VARARGS,
|
||||
"Looks up a word in the lexicon of the grammar"
|
||||
},
|
||||
{"lookupCohorts", (PyCFunction)Concr_lookupCohorts, METH_VARARGS,
|
||||
"Takes a sentence and returns all matches for lexical items from the grammar in that sentence"
|
||||
},
|
||||
{"fullFormLexicon", (PyCFunction)Concr_fullFormLexicon, METH_VARARGS,
|
||||
"Enumerates all words in the lexicon (useful for extracting full form lexicons)"
|
||||
},
|
||||
@@ -2850,8 +3057,8 @@ static PyObject*
|
||||
PGF_functionsByCat(PGFObject* self, PyObject *args)
|
||||
{
|
||||
PgfCId catname;
|
||||
if (!PyArg_ParseTuple(args, "s", &catname))
|
||||
return NULL;
|
||||
if (!PyArg_ParseTuple(args, "s", &catname))
|
||||
return NULL;
|
||||
|
||||
PyObject* functions = PyList_New(0);
|
||||
if (functions == NULL) {
|
||||
@@ -2907,9 +3114,9 @@ PGF_generateAll(PGFObject* self, PyObject *args, PyObject *keywds)
|
||||
|
||||
PyObject* start = NULL;
|
||||
int max_count = -1;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "O|i", kwlist,
|
||||
&start, &max_count))
|
||||
return NULL;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "O|i", kwlist,
|
||||
&start, &max_count))
|
||||
return NULL;
|
||||
|
||||
IterObject* pyres = (IterObject*)
|
||||
pgf_IterType.tp_alloc(&pgf_IterType, 0);
|
||||
@@ -3171,12 +3378,12 @@ static PyObject*
|
||||
PGF_embed(PGFObject* self, PyObject *args)
|
||||
{
|
||||
PgfCId modname;
|
||||
if (!PyArg_ParseTuple(args, "s", &modname))
|
||||
return NULL;
|
||||
if (!PyArg_ParseTuple(args, "s", &modname))
|
||||
return NULL;
|
||||
|
||||
PyObject *m = PyImport_AddModule(modname);
|
||||
if (m == NULL)
|
||||
return NULL;
|
||||
PyObject *m = PyImport_AddModule(modname);
|
||||
if (m == NULL)
|
||||
return NULL;
|
||||
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
|
||||
@@ -3304,9 +3511,9 @@ static PyTypeObject pgf_PGFType = {
|
||||
static PGFObject*
|
||||
pgf_readPGF(PyObject *self, PyObject *args)
|
||||
{
|
||||
const char *fpath;
|
||||
if (!PyArg_ParseTuple(args, "s", &fpath))
|
||||
return NULL;
|
||||
const char *fpath;
|
||||
if (!PyArg_ParseTuple(args, "s", &fpath))
|
||||
return NULL;
|
||||
|
||||
PGFObject* py_pgf = (PGFObject*) pgf_PGFType.tp_alloc(&pgf_PGFType, 0);
|
||||
py_pgf->pool = gu_new_pool();
|
||||
@@ -3337,9 +3544,9 @@ pgf_readPGF(PyObject *self, PyObject *args)
|
||||
static ExprObject*
|
||||
pgf_readExpr(PyObject *self, PyObject *args) {
|
||||
Py_ssize_t len;
|
||||
const uint8_t *buf;
|
||||
if (!PyArg_ParseTuple(args, "s#", &buf, &len))
|
||||
return NULL;
|
||||
const uint8_t *buf;
|
||||
if (!PyArg_ParseTuple(args, "s#", &buf, &len))
|
||||
return NULL;
|
||||
|
||||
ExprObject* pyexpr = (ExprObject*) pgf_ExprType.tp_alloc(&pgf_ExprType, 0);
|
||||
if (pyexpr == NULL)
|
||||
@@ -3367,9 +3574,9 @@ pgf_readExpr(PyObject *self, PyObject *args) {
|
||||
static TypeObject*
|
||||
pgf_readType(PyObject *self, PyObject *args) {
|
||||
Py_ssize_t len;
|
||||
const uint8_t *buf;
|
||||
if (!PyArg_ParseTuple(args, "s#", &buf, &len))
|
||||
return NULL;
|
||||
const uint8_t *buf;
|
||||
if (!PyArg_ParseTuple(args, "s#", &buf, &len))
|
||||
return NULL;
|
||||
|
||||
TypeObject* pytype = (TypeObject*) pgf_TypeType.tp_alloc(&pgf_TypeType, 0);
|
||||
if (pytype == NULL)
|
||||
@@ -3424,20 +3631,23 @@ MOD_INIT(pgf)
|
||||
{
|
||||
PyObject *m;
|
||||
|
||||
if (PyType_Ready(&pgf_PGFType) < 0)
|
||||
return MOD_ERROR_VAL;
|
||||
if (PyType_Ready(&pgf_PGFType) < 0)
|
||||
return MOD_ERROR_VAL;
|
||||
|
||||
if (PyType_Ready(&pgf_ConcrType) < 0)
|
||||
return MOD_ERROR_VAL;
|
||||
if (PyType_Ready(&pgf_ConcrType) < 0)
|
||||
return MOD_ERROR_VAL;
|
||||
|
||||
if (PyType_Ready(&pgf_BracketType) < 0)
|
||||
return MOD_ERROR_VAL;
|
||||
if (PyType_Ready(&pgf_BracketType) < 0)
|
||||
return MOD_ERROR_VAL;
|
||||
|
||||
if (PyType_Ready(&pgf_ExprType) < 0)
|
||||
return MOD_ERROR_VAL;
|
||||
if (PyType_Ready(&pgf_BINDType) < 0)
|
||||
return MOD_ERROR_VAL;
|
||||
|
||||
if (PyType_Ready(&pgf_TypeType) < 0)
|
||||
return MOD_ERROR_VAL;
|
||||
if (PyType_Ready(&pgf_ExprType) < 0)
|
||||
return MOD_ERROR_VAL;
|
||||
|
||||
if (PyType_Ready(&pgf_TypeType) < 0)
|
||||
return MOD_ERROR_VAL;
|
||||
|
||||
if (PyType_Ready(&pgf_IterType) < 0)
|
||||
return MOD_ERROR_VAL;
|
||||
@@ -3467,10 +3677,20 @@ MOD_INIT(pgf)
|
||||
PyModule_AddObject(m, "Type", (PyObject *) &pgf_TypeType);
|
||||
Py_INCREF(&pgf_TypeType);
|
||||
|
||||
PyModule_AddObject(m, "PGF", (PyObject *) &pgf_PGFType);
|
||||
Py_INCREF(&pgf_PGFType);
|
||||
|
||||
PyModule_AddObject(m, "Concr", (PyObject *) &pgf_ConcrType);
|
||||
Py_INCREF(&pgf_ConcrType);
|
||||
|
||||
PyModule_AddObject(m, "Iter", (PyObject *) &pgf_IterType);
|
||||
Py_INCREF(&pgf_IterType);
|
||||
|
||||
PyModule_AddObject(m, "Bracket", (PyObject *) &pgf_BracketType);
|
||||
Py_INCREF(&pgf_BracketType);
|
||||
|
||||
PyModule_AddObject(m, "BIND", (PyObject *) &pgf_BINDType);
|
||||
Py_INCREF(&pgf_BINDType);
|
||||
|
||||
return MOD_SUCCESS_VAL(m);
|
||||
}
|
||||
|
||||
@@ -17,7 +17,13 @@ pgf_module = Extension('pgf',
|
||||
|
||||
setup (name = 'pgf',
|
||||
version = '1.0',
|
||||
description = 'A binding to the PGF engine',
|
||||
description = 'Python bindings to the Grammatical Framework\'s PGF runtime',
|
||||
long_description="""\
|
||||
Grammatical Framework (GF) is a programming language for multilingual grammar applications.
|
||||
This package provides Python bindings to GF runtime, which allows you to \
|
||||
parse and generate text using GF grammars compiled into the PGF format.
|
||||
""",
|
||||
url='https://www.grammaticalframework.org/',
|
||||
author='Krasimir Angelov',
|
||||
author_email='kr.angelov@gmail.com',
|
||||
license='BSD',
|
||||
|
||||
Reference in New Issue
Block a user