1
0
forked from GitHub/gf-core

Merge branch 'master' into c-runtime

This commit is contained in:
krangelov
2021-07-30 11:20:04 +02:00
211 changed files with 7161 additions and 58549 deletions

View File

@@ -14,6 +14,9 @@ For Linux users
You will need the packages: autoconf, automake, libtool, make
- On Ubuntu: $ apt-get install autotools-dev
- On Fedora: $ dnf install autoconf automake libtool
The compilation steps are:
$ autoreconf -i
@@ -28,7 +31,7 @@ For Mac OSX users
The following is what I did to make it work on MacOSX 10.8:
- Install XCode and XCode command line tools
- Install Homebrew: http://mxcl.github.com/homebrew/
- Install Homebrew: https://brew.sh
$ brew install automake autoconf libtool
$ glibtoolize
@@ -49,7 +52,7 @@ For Windows users
After the installation, don't forget to fix the fstab file. See here:
http://www.mingw.org/wiki/Getting_Started
- From the MSYS shell (c:/MinGW/msys/1.0/msys.bat) go to the directory
- From the MSYS shell (c:/MinGW/msys/1.0/msys.bat) go to the directory
which contains the INSTALL file and do:
$ autoreconf -i

View File

@@ -1,7 +1,7 @@
lib_LTLIBRARIES = libgu.la libpgf.la libsg.la
lib_LTLIBRARIES = libgu.la libpgf.la
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = libgu.pc libpgf.pc libsg.pc
pkgconfig_DATA = libgu.pc libpgf.pc
configincludedir = $(libdir)/libgu/include
@@ -37,10 +37,6 @@ pgfinclude_HEADERS = \
pgf/pgf.h \
pgf/data.h
sgincludedir=$(includedir)/sg
sginclude_HEADERS = \
sg/sg.h
libgu_la_SOURCES = \
gu/assert.c \
gu/bits.c \
@@ -92,11 +88,6 @@ libpgf_la_SOURCES = \
libpgf_la_LDFLAGS = "-no-undefined"
libpgf_la_LIBADD = libgu.la
libsg_la_SOURCES = \
sg/sqlite3Btree.c \
sg/sg.c
libsg_la_LIBADD = libgu.la libpgf.la
bin_PROGRAMS =
AUTOMAKE_OPTIONS = foreign subdir-objects dist-bzip2
@@ -104,5 +95,4 @@ ACLOCAL_AMFLAGS = -I m4
EXTRA_DIST = \
libgu.pc.in \
libpgf.pc.in \
libsg.pc.in
libpgf.pc.in

View File

@@ -58,7 +58,6 @@ AC_CONFIG_LINKS(pgf/lightning/asm.h:$cpu_dir/asm.h dnl
AC_CONFIG_FILES([Makefile
libgu.pc
libpgf.pc
libsg.pc
])
AC_OUTPUT

View File

@@ -322,7 +322,7 @@ gu_map_iter(GuMap* map, GuMapItor* itor, GuExn* err)
}
GU_API bool
gu_map_next(GuMap* map, size_t* pi, void** pkey, void* pvalue)
gu_map_next(GuMap* map, size_t* pi, void* pkey, void* pvalue)
{
while (*pi < map->data.n_entries) {
if (gu_map_entry_is_free(map, &map->data, *pi)) {
@@ -330,14 +330,17 @@ gu_map_next(GuMap* map, size_t* pi, void** pkey, void* pvalue)
continue;
}
*pkey = &map->data.keys[*pi * map->key_size];
if (map->hasher == gu_addr_hasher) {
*pkey = *(void**) *pkey;
*((void**) pkey) = *((void**) &map->data.keys[*pi * sizeof(void*)]);
} else if (map->hasher == gu_word_hasher) {
*((GuWord*) pkey) = *((GuWord*) &map->data.keys[*pi * sizeof(GuWord)]);
} else if (map->hasher == gu_string_hasher) {
*pkey = *(void**) *pkey;
}
*((GuString*) pkey) = *((GuString*) &map->data.keys[*pi * sizeof(GuString)]);
} else {
memcpy(pkey, &map->data.keys[*pi * map->key_size], map->key_size);
}
memcpy(pvalue, &map->data.values[*pi * map->cell_size],
memcpy(pvalue, &map->data.values[*pi * map->cell_size],
map->value_size);
(*pi)++;

View File

@@ -75,7 +75,7 @@ GU_API_DECL void
gu_map_iter(GuMap* ht, GuMapItor* itor, GuExn* err);
GU_API bool
gu_map_next(GuMap* map, size_t* pi, void** pkey, void* pvalue);
gu_map_next(GuMap* map, size_t* pi, void* pkey, void* pvalue);
typedef GuMap GuIntMap;

3
src/runtime/c/install.sh Executable file
View File

@@ -0,0 +1,3 @@
bash setup.sh configure
bash setup.sh build
bash setup.sh install

View File

@@ -1,10 +0,0 @@
prefix=@prefix@
exec_prefix=@exec_prefix@
libdir=@libdir@
includedir=@includedir@
Name: libsg
Description: Semantic Graph library
Version: @VERSION@
Libs: -L${libdir} -lsg -lpgf
Cflags: -I${includedir}

View File

@@ -142,14 +142,14 @@ pgf_aligner_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
}
static void
pgf_aligner_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
pgf_aligner_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{
PgfAlignerLin* alin = gu_container(funcs, PgfAlignerLin, funcs);
gu_buf_push(alin->parent_stack, int, fid);
}
static void
pgf_aligner_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
pgf_aligner_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{
PgfAlignerLin* alin = gu_container(funcs, PgfAlignerLin, funcs);
gu_buf_pop(alin->parent_stack, int);

View File

@@ -322,7 +322,8 @@ typedef struct PgfProductionCoerce
typedef struct {
PgfExprProb *ep;
GuSeq* lins;
size_t n_lins;
PgfSymbols* lins[];
} PgfProductionExtern;
typedef struct {

View File

@@ -953,94 +953,6 @@ pgf_read_expr(GuIn* in, GuPool* pool, GuPool* tmp_pool, GuExn* err)
return expr;
}
PGF_API int
pgf_read_expr_tuple(GuIn* in,
size_t n_exprs, PgfExpr exprs[],
GuPool* pool, GuExn* err)
{
GuPool* tmp_pool = gu_new_pool();
PgfExprParser* parser =
pgf_new_parser(in, pgf_expr_parser_in_getc, pool, tmp_pool, err);
if (parser->token_tag != PGF_TOKEN_LTRIANGLE)
goto fail;
pgf_expr_parser_token(parser, false);
for (size_t i = 0; i < n_exprs; i++) {
if (i > 0) {
if (parser->token_tag != PGF_TOKEN_COMMA)
goto fail;
pgf_expr_parser_token(parser, false);
}
exprs[i] = pgf_expr_parser_expr(parser, false);
if (gu_variant_is_null(exprs[i]))
goto fail;
}
if (parser->token_tag != PGF_TOKEN_RTRIANGLE)
goto fail;
pgf_expr_parser_token(parser, false);
if (parser->token_tag != PGF_TOKEN_EOF)
goto fail;
gu_pool_free(tmp_pool);
return 1;
fail:
gu_pool_free(tmp_pool);
return 0;
}
PGF_API GuSeq*
pgf_read_expr_matrix(GuIn* in,
size_t n_exprs,
GuPool* pool, GuExn* err)
{
GuPool* tmp_pool = gu_new_pool();
PgfExprParser* parser =
pgf_new_parser(in, pgf_expr_parser_in_getc, pool, tmp_pool, err);
if (parser->token_tag != PGF_TOKEN_LTRIANGLE)
goto fail;
pgf_expr_parser_token(parser, false);
GuBuf* buf = gu_new_buf(PgfExpr, pool);
if (parser->token_tag != PGF_TOKEN_RTRIANGLE) {
for (;;) {
PgfExpr* exprs = gu_buf_extend_n(buf, n_exprs);
for (size_t i = 0; i < n_exprs; i++) {
if (i > 0) {
if (parser->token_tag != PGF_TOKEN_COMMA)
goto fail;
pgf_expr_parser_token(parser, false);
}
exprs[i] = pgf_expr_parser_expr(parser, false);
if (gu_variant_is_null(exprs[i]))
goto fail;
}
if (parser->token_tag != PGF_TOKEN_SEMI)
break;
pgf_expr_parser_token(parser, false);
}
if (parser->token_tag != PGF_TOKEN_RTRIANGLE)
goto fail;
}
pgf_expr_parser_token(parser, false);
if (parser->token_tag != PGF_TOKEN_EOF)
goto fail;
gu_pool_free(tmp_pool);
return gu_buf_data_seq(buf);
fail:
gu_pool_free(tmp_pool);
return NULL;
}
PGF_API PgfType*
pgf_read_type(GuIn* in, GuPool* pool, GuPool* tmp_pool, GuExn* err)
{
@@ -1758,19 +1670,6 @@ pgf_print_context(PgfHypos *hypos, PgfPrintContext* ctxt,
}
}
PGF_API void
pgf_print_expr_tuple(size_t n_exprs, PgfExpr exprs[], PgfPrintContext* ctxt,
GuOut* out, GuExn* err)
{
gu_putc('<', out, err);
for (size_t i = 0; i < n_exprs; i++) {
if (i > 0)
gu_putc(',', out, err);
pgf_print_expr(exprs[i], ctxt, 0, out, err);
}
gu_putc('>', out, err);
}
PGF_API bool
pgf_type_eq(PgfType* t1, PgfType* t2)
{
@@ -1806,6 +1705,168 @@ pgf_type_eq(PgfType* t1, PgfType* t2)
return true;
}
PGF_API PgfLiteral
pgf_clone_literal(PgfLiteral lit, GuPool* pool)
{
PgfLiteral new_lit = gu_null_variant;
GuVariantInfo inf = gu_variant_open(lit);
switch (inf.tag) {
case PGF_LITERAL_STR: {
PgfLiteralStr* lit_str = inf.data;
PgfLiteralStr* new_lit_str =
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, strlen(lit_str->val)+1,
&new_lit, pool);
strcpy(new_lit_str->val, lit_str->val);
break;
}
case PGF_LITERAL_INT: {
PgfLiteralInt *lit_int = inf.data;
PgfLiteralInt *new_lit_int =
gu_new_variant(PGF_LITERAL_INT,
PgfLiteralInt,
&new_lit, pool);
new_lit_int->val = lit_int->val;
break;
}
case PGF_LITERAL_FLT: {
PgfLiteralFlt *lit_flt = inf.data;
PgfLiteralFlt *new_lit_flt =
gu_new_variant(PGF_LITERAL_FLT,
PgfLiteralFlt,
&new_lit, pool);
new_lit_flt->val = lit_flt->val;
break;
}
default:
gu_impossible();
}
return new_lit;
}
PGF_API PgfExpr
pgf_clone_expr(PgfExpr expr, GuPool* pool)
{
PgfExpr new_expr = gu_null_variant;
GuVariantInfo inf = gu_variant_open(expr);
switch (inf.tag) {
case PGF_EXPR_ABS: {
PgfExprAbs* abs = inf.data;
PgfExprAbs* new_abs =
gu_new_variant(PGF_EXPR_ABS,
PgfExprAbs,
&new_expr, pool);
new_abs->bind_type = abs->bind_type;
new_abs->id = gu_string_copy(abs->id, pool);
new_abs->body = pgf_clone_expr(abs->body,pool);
break;
}
case PGF_EXPR_APP: {
PgfExprApp* app = inf.data;
PgfExprApp* new_app =
gu_new_variant(PGF_EXPR_APP,
PgfExprApp,
&new_expr, pool);
new_app->fun = pgf_clone_expr(app->fun, pool);
new_app->arg = pgf_clone_expr(app->arg, pool);
break;
}
case PGF_EXPR_LIT: {
PgfExprLit* lit = inf.data;
PgfExprLit* new_lit =
gu_new_variant(PGF_EXPR_LIT,
PgfExprLit,
&new_expr, pool);
new_lit->lit = pgf_clone_literal(lit->lit, pool);
break;
}
case PGF_EXPR_META: {
PgfExprMeta* meta = inf.data;
PgfExprMeta* new_meta =
gu_new_variant(PGF_EXPR_META,
PgfExprMeta,
&new_expr, pool);
new_meta->id = meta->id;
break;
}
case PGF_EXPR_FUN: {
PgfExprFun* fun = inf.data;
PgfExprFun* new_fun =
gu_new_flex_variant(PGF_EXPR_FUN,
PgfExprFun,
fun, strlen(fun->fun)+1,
&new_expr, pool);
strcpy(new_fun->fun, fun->fun);
break;
}
case PGF_EXPR_VAR: {
PgfExprVar* var = inf.data;
PgfExprVar* new_var =
gu_new_variant(PGF_EXPR_VAR,
PgfExprVar,
&new_expr, pool);
new_var->var = var->var;
break;
}
case PGF_EXPR_TYPED: {
PgfExprTyped* typed = inf.data;
PgfExprTyped *new_typed =
gu_new_variant(PGF_EXPR_TYPED,
PgfExprTyped,
&new_expr, pool);
new_typed->expr = pgf_clone_expr(typed->expr, pool);
new_typed->type = pgf_clone_type(typed->type, pool);
break;
}
case PGF_EXPR_IMPL_ARG: {
PgfExprImplArg* impl = inf.data;
PgfExprImplArg *new_impl =
gu_new_variant(PGF_EXPR_IMPL_ARG,
PgfExprImplArg,
&new_expr, pool);
new_impl->expr = pgf_clone_expr(impl->expr, pool);
break;
}
default:
gu_impossible();
}
return new_expr;
}
PGF_API PgfType*
pgf_clone_type(PgfType* type, GuPool* pool)
{
PgfType* new_type =
gu_new_flex(pool, PgfType, exprs, type->n_exprs);
size_t n_hypos = gu_seq_length(type->hypos);
new_type->hypos = gu_new_seq(PgfHypo, n_hypos, pool);
for (size_t i = 0; i < n_hypos; i++) {
PgfHypo* hypo = gu_seq_index(type->hypos, PgfHypo, i);
PgfHypo* new_hypo = gu_seq_index(new_type->hypos, PgfHypo, i);
new_hypo->bind_type = hypo->bind_type;
new_hypo->cid = gu_string_copy(hypo->cid, pool);
new_hypo->type = pgf_clone_type(hypo->type, pool);
}
new_type->cid = gu_string_copy(type->cid, pool);
new_type->n_exprs = type->n_exprs;
for (size_t i = 0; i < new_type->n_exprs; i++) {
new_type->exprs[i] = pgf_clone_expr(type->exprs[i], pool);
}
return new_type;
}
PGF_API prob_t
pgf_compute_tree_probability(PgfPGF *gr, PgfExpr expr)
{

View File

@@ -171,15 +171,6 @@ pgf_expr_unmeta(PgfExpr expr);
PGF_API_DECL PgfExpr
pgf_read_expr(GuIn* in, GuPool* pool, GuPool* tmp_pool, GuExn* err);
PGF_API_DECL int
pgf_read_expr_tuple(GuIn* in,
size_t n_exprs, PgfExpr exprs[],
GuPool* pool, GuExn* err);
PGF_API_DECL GuSeq*
pgf_read_expr_matrix(GuIn* in, size_t n_exprs,
GuPool* pool, GuExn* err);
PGF_API_DECL PgfType*
pgf_read_type(GuIn* in, GuPool* pool, GuPool* tmp_pool, GuExn* err);
@@ -239,9 +230,14 @@ PGF_API_DECL void
pgf_print_context(PgfHypos *hypos, PgfPrintContext* ctxt,
GuOut *out, GuExn *err);
PGF_API_DECL void
pgf_print_expr_tuple(size_t n_exprs, PgfExpr exprs[], PgfPrintContext* ctxt,
GuOut* out, GuExn* err);
PGF_API PgfLiteral
pgf_clone_literal(PgfLiteral lit, GuPool* pool);
PGF_API PgfExpr
pgf_clone_expr(PgfExpr expr, GuPool* pool);
PGF_API PgfType*
pgf_clone_type(PgfType* type, GuPool* pool);
PGF_API_DECL prob_t
pgf_compute_tree_probability(PgfPGF *gr, PgfExpr expr);

View File

@@ -155,7 +155,7 @@ pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
}
static void
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
@@ -192,7 +192,7 @@ pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t li
}
static void
pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);

View File

@@ -628,7 +628,7 @@ typedef struct {
PgfLzrCachedTag tag;
PgfCId cat;
int fid;
int lin_idx;
GuString ann;
PgfCId fun;
} PgfLzrCached;
@@ -666,7 +666,7 @@ pgf_lzr_cache_flush(PgfLzrCache* cache, PgfSymbols* form)
cache->lzr->funcs,
event->cat,
event->fid,
event->lin_idx,
event->ann,
event->fun);
}
break;
@@ -676,7 +676,7 @@ pgf_lzr_cache_flush(PgfLzrCache* cache, PgfSymbols* form)
cache->lzr->funcs,
event->cat,
event->fid,
event->lin_idx,
event->ann,
event->fun);
}
break;
@@ -731,27 +731,27 @@ found:
}
static void
pgf_lzr_cache_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun)
pgf_lzr_cache_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{
PgfLzrCache* cache = gu_container(funcs, PgfLzrCache, funcs);
PgfLzrCached* event = gu_buf_extend(cache->events);
event->tag = PGF_CACHED_BEGIN;
event->cat = cat;
event->fid = fid;
event->lin_idx = lin_idx;
event->fun = fun;
event->tag = PGF_CACHED_BEGIN;
event->cat = cat;
event->fid = fid;
event->ann = ann;
event->fun = fun;
}
static void
pgf_lzr_cache_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun)
pgf_lzr_cache_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{
PgfLzrCache* cache = gu_container(funcs, PgfLzrCache, funcs);
PgfLzrCached* event = gu_buf_extend(cache->events);
event->tag = PGF_CACHED_END;
event->cat = cat;
event->fid = fid;
event->lin_idx = lin_idx;
event->fun = fun;
event->tag = PGF_CACHED_END;
event->cat = cat;
event->fid = fid;
event->ann = ann;
event->fun = fun;
}
static void
@@ -939,8 +939,8 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
if ((*lzr->funcs)->begin_phrase && fapp->ccat != NULL) {
(*lzr->funcs)->begin_phrase(lzr->funcs,
fun->absfun->type->cid,
fapp->fid, lin_idx,
fapp->ccat->cnccat->abscat->name,
fapp->fid, fapp->ccat->cnccat->labels[lin_idx],
fun->absfun->name);
}
@@ -949,8 +949,8 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
if ((*lzr->funcs)->end_phrase && fapp->ccat != NULL) {
(*lzr->funcs)->end_phrase(lzr->funcs,
fun->absfun->type->cid,
fapp->fid, lin_idx,
fapp->ccat->cnccat->abscat->name,
fapp->fid, fapp->ccat->cnccat->labels[lin_idx],
fun->absfun->name);
}
break;
@@ -979,7 +979,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
if ((*lzr->funcs)->begin_phrase) {
(*lzr->funcs)->begin_phrase(lzr->funcs,
cat, flit->fid, 0,
cat, flit->fid, "s",
"");
}
@@ -1011,7 +1011,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
if ((*lzr->funcs)->end_phrase) {
(*lzr->funcs)->end_phrase(lzr->funcs,
cat, flit->fid, 0,
cat, flit->fid, "s",
"");
}

View File

@@ -83,10 +83,10 @@ struct PgfLinFuncs
void (*symbol_token)(PgfLinFuncs** self, PgfToken tok);
/// Begin phrase
void (*begin_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId fun);
void (*begin_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId fun);
/// End phrase
void (*end_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId fun);
void (*end_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId fun);
/// handling nonExist
void (*symbol_ne)(PgfLinFuncs** self);

View File

@@ -6,11 +6,12 @@
static PgfExprProb*
pgf_match_string_lit(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx,
GuString ann,
GuString sentence, size_t* poffset,
GuPool *out_pool)
{
gu_assert(lin_idx == 0);
if (strcmp(ann,"s") != 0)
return NULL;
const uint8_t* buf = (uint8_t*) (sentence + *poffset);
const uint8_t* p = buf;
@@ -51,7 +52,7 @@ pgf_predict_empty_next(GuEnum* self, void* to, GuPool* pool)
static GuEnum*
pgf_predict_empty(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx,
GuString ann,
GuString prefix,
GuPool *out_pool)
{
@@ -67,11 +68,12 @@ static PgfLiteralCallback pgf_string_literal_callback =
static PgfExprProb*
pgf_match_int_lit(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx,
GuString ann,
GuString sentence, size_t* poffset,
GuPool *out_pool)
{
gu_assert(lin_idx == 0);
if (strcmp(ann,"s") != 0)
return NULL;
const uint8_t* buf = (uint8_t*) (sentence + *poffset);
const uint8_t* p = buf;
@@ -121,11 +123,12 @@ static PgfLiteralCallback pgf_int_literal_callback =
static PgfExprProb*
pgf_match_float_lit(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx,
GuString ann,
GuString sentence, size_t* poffset,
GuPool *out_pool)
{
gu_assert(lin_idx == 0);
if (strcmp(ann,"s") != 0)
return NULL;
const uint8_t* buf = (uint8_t*) (sentence + *poffset);
const uint8_t* p = buf;
@@ -226,11 +229,11 @@ pgf_match_name_morpho_callback(PgfMorphoCallback* self_,
static PgfExprProb*
pgf_match_name_lit(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx,
GuString ann,
GuString sentence, size_t* poffset,
GuPool *out_pool)
{
if (lin_idx != 0)
if (strcmp(ann,"s") != 0)
return NULL;
GuPool* tmp_pool = gu_local_pool();
@@ -349,7 +352,7 @@ pgf_match_unknown_morpho_callback(PgfMorphoCallback* self_,
static PgfExprProb*
pgf_match_unknown_lit(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx,
GuString ann,
GuString sentence, size_t* poffset,
GuPool *out_pool)
{

View File

@@ -869,7 +869,7 @@ pgf_lookup_symbol_token(PgfLinFuncs** self, PgfToken token)
}
static void
pgf_lookup_begin_phrase(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId funname)
pgf_lookup_begin_phrase(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId funname)
{
PgfLookupState* st = gu_container(self, PgfLookupState, funcs);
@@ -883,7 +883,7 @@ pgf_lookup_begin_phrase(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex,
}
static void
pgf_lookup_end_phrase(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId fun)
pgf_lookup_end_phrase(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId fun)
{
PgfLookupState* st = gu_container(self, PgfLookupState, funcs);
st->curr_absfun = NULL;

View File

@@ -61,6 +61,14 @@ typedef struct {
typedef enum { BIND_NONE, BIND_HARD, BIND_SOFT } BIND_TYPE;
typedef struct {
PgfProductionIdx* idx;
size_t offset;
size_t sym_idx;
} PgfLexiconIdxEntry;
typedef GuBuf PgfLexiconIdx;
struct PgfParseState {
PgfParseState* next;
@@ -74,6 +82,8 @@ struct PgfParseState {
size_t end_offset;
prob_t viterbi_prob;
PgfLexiconIdx* lexicon_idx;
};
typedef struct PgfAnswers {
@@ -113,43 +123,10 @@ struct PgfItem {
prob_t inside_prob;
};
static PgfSymbol
pgf_prev_extern_sym(PgfSymbol sym)
{
GuVariantInfo i = gu_variant_open(sym);
switch (i.tag) {
case PGF_SYMBOL_CAT:
return *((PgfSymbol*) (((PgfSymbolCat*) i.data)+1));
case PGF_SYMBOL_KP:
return *((PgfSymbol*) (((PgfSymbolKP*) i.data)+1));
case PGF_SYMBOL_KS: {
PgfSymbolKS* sks = (PgfSymbolKS*) i.data;
size_t tok_len = strlen(sks->token);
return *((PgfSymbol*) (((uint8_t*) sks)+sizeof(PgfSymbolKS)+tok_len+1));
}
case PGF_SYMBOL_LIT:
return *((PgfSymbol*) (((PgfSymbolLit*) i.data)+1));
case PGF_SYMBOL_VAR:
return *((PgfSymbol*) (((PgfSymbolVar*) i.data)+1));
case PGF_SYMBOL_BIND:
case PGF_SYMBOL_SOFT_BIND:
case PGF_SYMBOL_SOFT_SPACE:
return *((PgfSymbol*) (((PgfSymbolBIND*) i.data)+1));
case PGF_SYMBOL_CAPIT:
case PGF_SYMBOL_ALL_CAPIT:
return *((PgfSymbol*) (((PgfSymbolCAPIT*) i.data)+1));
case PGF_SYMBOL_NE:
return *((PgfSymbol*) (((PgfSymbolNE*) i.data)+1));
default:
gu_impossible();
return gu_null_variant;
}
}
static PgfSymbol
static PgfSymbols*
pgf_collect_extern_tok(PgfParsing* ps, size_t start_offset, size_t end_offset)
{
PgfSymbol sym = gu_null_variant;
GuBuf* syms = gu_new_buf(PgfSymbol, ps->pool);
const uint8_t* start = (uint8_t*) ps->sentence+start_offset;
const uint8_t* end = (uint8_t*) ps->sentence+end_offset;
@@ -163,16 +140,15 @@ pgf_collect_extern_tok(PgfParsing* ps, size_t start_offset, size_t end_offset)
ucs = gu_utf8_decode(&p);
}
PgfSymbol new_sym;
PgfSymbol sym;
PgfSymbolKS* sks = (PgfSymbolKS*)
gu_alloc_variant(PGF_SYMBOL_KS,
sizeof(PgfSymbol)+sizeof(PgfSymbolKS)+len+1,
gu_alignof(PgfSymbolKS),
&new_sym, ps->pool);
sizeof(PgfSymbolKS)+len+1,
gu_alignof(PgfSymbolKS),
&sym, ps->pool);
memcpy((char*) sks->token, start, len);
((char*) sks->token)[len] = 0;
*((PgfSymbol*) (((uint8_t*) sks)+sizeof(PgfSymbolKS)+len+1)) = sym;
sym = new_sym;
gu_buf_push(syms, PgfSymbol, sym);
start = p;
while (gu_ucs_is_space(ucs)) {
@@ -181,68 +157,16 @@ pgf_collect_extern_tok(PgfParsing* ps, size_t start_offset, size_t end_offset)
}
}
return sym;
}
static size_t
pgf_item_symbols_length(PgfItem* item)
{
GuVariantInfo i = gu_variant_open(item->prod);
switch (i.tag) {
case PGF_PRODUCTION_APPLY: {
PgfProductionApply* papp = i.data;
return gu_seq_length(papp->fun->lins[item->conts->lin_idx]->syms);
}
case PGF_PRODUCTION_COERCE: {
return 1;
}
case PGF_PRODUCTION_EXTERN: {
PgfProductionExtern* pext = i.data;
PgfSymbols* syms;
if (pext->lins != NULL &&
(syms = gu_seq_get(pext->lins,PgfSymbols*,item->conts->lin_idx)) != NULL) {
return gu_seq_length(syms);
} else {
int seq_len = 0;
PgfSymbol sym = item->curr_sym;
while (!gu_variant_is_null(sym)) {
seq_len++;
sym = pgf_prev_extern_sym(sym);
}
return seq_len;
}
}
default:
gu_impossible();
return 0;
}
}
static PgfSymbols*
pgf_extern_syms_get(PgfItem* item, GuPool* pool)
{
int syms_len = pgf_item_symbols_length(item);
PgfSymbols* syms =
gu_new_seq(PgfSymbol, syms_len, pool);
PgfSymbol sym = item->curr_sym;
while (!gu_variant_is_null(sym)) {
gu_seq_set(syms, PgfSymbol, --syms_len, sym);
sym = pgf_prev_extern_sym(sym);
}
return syms;
return gu_buf_data_seq(syms);
}
#ifdef PGF_PARSER_DEBUG
PGF_INTERNAL void
pgf_print_fid(int fid, GuOut* out, GuExn* err);
PGF_INTERNAL_DECL void
pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err);
#ifdef PGF_PARSER_DEBUG
static void
pgf_item_symbols(PgfItem* item,
size_t* lin_idx, PgfSymbols** syms,
@@ -267,11 +191,7 @@ pgf_item_symbols(PgfItem* item,
}
case PGF_PRODUCTION_EXTERN: {
PgfProductionExtern* pext = i.data;
if (pext->lins == NULL ||
(*syms = gu_seq_get(pext->lins, PgfSymbols*, item->conts->lin_idx)) == NULL) {
*syms = pgf_extern_syms_get(item, pool);
}
*syms = pext->lins[item->conts->lin_idx];
break;
}
default:
@@ -603,16 +523,11 @@ pgf_item_set_curr_symbol(PgfItem* item, GuPool* pool)
case PGF_PRODUCTION_EXTERN: {
PgfProductionExtern* pext = i.data;
PgfSymbols* syms;
if (pext->lins != NULL &&
(syms = gu_seq_get(pext->lins,PgfSymbols*,item->conts->lin_idx)) != NULL) {
if (item->sym_idx == gu_seq_length(syms)) {
item->curr_sym = gu_null_variant;
} else {
item->curr_sym = gu_seq_get(syms, PgfSymbol, item->sym_idx);
}
} else {
PgfSymbols* syms = pext->lins[item->conts->lin_idx];
if (item->sym_idx == gu_seq_length(syms)) {
item->curr_sym = gu_null_variant;
} else {
item->curr_sym = gu_seq_get(syms, PgfSymbol, item->sym_idx);
}
break;
}
@@ -781,16 +696,6 @@ pgf_result_production(PgfParsing* ps,
static void
pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep);
static void
pgf_parsing_push_item(PgfParseState* state, PgfItem* item)
{
if (gu_buf_length(state->agenda) == 0) {
state->viterbi_prob =
item->inside_prob+item->conts->outside_prob;
}
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
}
static void
pgf_parsing_push_production(PgfParsing* ps, PgfParseState* state,
PgfItemConts* conts, PgfProduction prod)
@@ -822,7 +727,7 @@ pgf_parsing_combine(PgfParsing* ps,
}
pgf_item_advance(item, ps->pool);
pgf_parsing_push_item(before, item);
gu_buf_heap_push(before->agenda, pgf_item_prob_order, &item);
}
static PgfProduction
@@ -851,36 +756,7 @@ pgf_parsing_new_production(PgfItem* item, PgfExprProb *ep, GuPool *pool)
break;
}
case PGF_PRODUCTION_EXTERN: {
PgfProductionExtern* pext = i.data;
if (pext->lins == NULL ||
gu_seq_get(pext->lins,PgfSymbols*,item->conts->lin_idx) == NULL) {
PgfSymbols* syms =
pgf_extern_syms_get(item, pool);
size_t n_lins = item->conts->ccat->cnccat->n_lins;
PgfProductionExtern* new_pext = (PgfProductionExtern*)
gu_new_variant(PGF_PRODUCTION_EXTERN,
PgfProductionExtern,
&prod, pool);
new_pext->ep = ep;
new_pext->lins = gu_new_seq(PgfSymbols*, n_lins, pool);
if (pext->lins == NULL) {
for (size_t i = 0; i < n_lins; i++) {
gu_seq_set(new_pext->lins,PgfSymbols*,i,NULL);
}
} else {
for (size_t i = 0; i < n_lins; i++) {
gu_seq_set(new_pext->lins,PgfSymbols*,i,
gu_seq_get(pext->lins,PgfSymbols*,i));
}
}
gu_seq_set(new_pext->lins,PgfSymbols*,item->conts->lin_idx,syms);
} else {
prod = item->prod;
}
prod = item->prod;
break;
}
default:
@@ -1022,9 +898,65 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
}
}
PGF_INTERNAL_DECL int
pgf_symbols_cmp(PgfCohortSpot* spot,
PgfSymbols* syms, size_t* sym_idx,
bool case_sensitive);
static void
pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state,
int i, int j, ptrdiff_t min, ptrdiff_t max)
{
// This is a variation of a binary search algorithm which
// can retrieve all prefixes of a string with minimal
// comparisons, i.e. there is no need to lookup every
// prefix separately.
while (i <= j) {
int k = (i+j) / 2;
PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, k);
PgfCohortSpot start = {0, ps->sentence + state->end_offset};
PgfCohortSpot current = start;
size_t sym_idx = 0;
int cmp = pgf_symbols_cmp(&current, seq->syms, &sym_idx, ps->case_sensitive);
if (cmp < 0) {
j = k-1;
} else if (cmp > 0) {
ptrdiff_t len = current.ptr - start.ptr;
if (min <= len)
pgf_parsing_lookahead(ps, state, i, k-1, min, len);
if (len+1 <= max)
pgf_parsing_lookahead(ps, state, k+1, j, len+1, max);
break;
} else {
ptrdiff_t len = current.ptr - start.ptr;
if (min <= len-1)
pgf_parsing_lookahead(ps, state, i, k-1, min, len-1);
if (seq->idx != NULL) {
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
entry->idx = seq->idx;
entry->offset = (size_t) (current.ptr - ps->sentence);
entry->sym_idx = sym_idx;
}
if (len+1 <= max)
pgf_parsing_lookahead(ps, state, k+1, j, len+1, max);
break;
}
}
}
static PgfParseState*
pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
BIND_TYPE bind_type)
BIND_TYPE bind_type,
prob_t viterbi_prob)
{
PgfParseState** pstate;
if (ps->before == NULL && start_offset == 0)
@@ -1077,172 +1009,36 @@ pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
(start_offset == end_offset);
state->start_offset = start_offset;
state->end_offset = end_offset;
state->viterbi_prob = 0;
state->viterbi_prob = viterbi_prob;
state->lexicon_idx =
gu_new_buf(PgfLexiconIdxEntry, ps->pool);
if (ps->before == NULL && start_offset == 0)
state->needs_bind = false;
if (gu_seq_length(ps->concr->sequences) > 0) {
// Add epsilon lexical rules to the bottom up index
PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, 0);
if (gu_seq_length(seq->syms) == 0 && seq->idx != NULL) {
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
entry->idx = seq->idx;
entry->offset = state->start_offset;
entry->sym_idx= 0;
}
// Add non-epsilon lexical rules to the bottom up index
if (!state->needs_bind) {
pgf_parsing_lookahead(ps, state,
0, gu_seq_length(ps->concr->sequences)-1,
1, strlen(ps->sentence)-state->end_offset);
}
}
*pstate = state;
return state;
}
PGF_INTERNAL_DECL int
pgf_symbols_cmp(PgfCohortSpot* spot,
PgfSymbols* syms, size_t* sym_idx,
bool case_sensitive);
static bool
pgf_parsing_scan_helper(PgfParsing *ps, PgfParseState* state,
int i, int j, ptrdiff_t min, ptrdiff_t max)
{
// This is a variation of a binary search algorithm which
// can retrieve all prefixes of a string with minimal
// comparisons, i.e. there is no need to lookup every
// prefix separately.
bool found = false;
while (i <= j) {
int k = (i+j) / 2;
PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, k);
PgfCohortSpot start = {0, ps->sentence+state->end_offset};
PgfCohortSpot current = start;
size_t sym_idx = 0;
int cmp = pgf_symbols_cmp(&current, seq->syms, &sym_idx, ps->case_sensitive);
if (cmp < 0) {
j = k-1;
} else if (cmp > 0) {
ptrdiff_t len = current.ptr - start.ptr;
if (min <= len)
if (pgf_parsing_scan_helper(ps, state, i, k-1, min, len))
found = true;
if (len+1 <= max)
if (pgf_parsing_scan_helper(ps, state, k+1, j, len+1, max))
found = true;
break;
} else {
ptrdiff_t len = current.ptr - start.ptr;
if (min <= len)
if (pgf_parsing_scan_helper(ps, state, i, k-1, min, len))
found = true;
// Here we do bottom-up prediction for all lexical categories.
// The epsilon productions will be predicted in top-down
// fashion while parsing.
if (seq->idx != NULL && len > 0) {
found = true;
// A new state will mark the end of the current match
PgfParseState* new_state =
pgf_new_parse_state(ps, (size_t) (current.ptr - ps->sentence), BIND_NONE);
// Bottom-up prediction for lexical rules
size_t n_entries = gu_buf_length(seq->idx);
for (size_t i = 0; i < n_entries; i++) {
PgfProductionIdxEntry* entry =
gu_buf_index(seq->idx, PgfProductionIdxEntry, i);
PgfItemConts* conts =
pgf_parsing_get_conts(state,
entry->ccat, entry->lin_idx,
ps->pool);
// Create the new category if it doesn't exist yet
PgfCCat* tmp_ccat = pgf_parsing_get_completed(new_state, conts);
PgfCCat* ccat = tmp_ccat;
if (ccat == NULL) {
ccat = pgf_parsing_create_completed(ps, new_state, conts, INFINITY);
}
// Add the production
if (ccat->prods == NULL || ccat->n_synprods >= gu_seq_length(ccat->prods)) {
ccat->prods = gu_realloc_seq(ccat->prods, PgfProduction, ccat->n_synprods+1);
}
GuVariantInfo i;
i.tag = PGF_PRODUCTION_APPLY;
i.data = entry->papp;
PgfProduction prod = gu_variant_close(i);
gu_seq_set(ccat->prods, PgfProduction, ccat->n_synprods++, prod);
// Update the category's probability to be minimum
if (ccat->viterbi_prob > entry->papp->fun->ep->prob)
ccat->viterbi_prob = entry->papp->fun->ep->prob;
#ifdef PGF_PARSER_DEBUG
GuPool* tmp_pool = gu_new_pool();
GuOut* out = gu_file_out(stderr, tmp_pool);
GuExn* err = gu_exn(tmp_pool);
if (tmp_ccat == NULL) {
gu_printf(out, err, "[");
pgf_print_range(state, new_state, out, err);
gu_puts("; ", out, err);
pgf_print_fid(conts->ccat->fid, out, err);
gu_printf(out, err, "; %d; ",
conts->lin_idx);
pgf_print_fid(ccat->fid, out, err);
gu_puts("] ", out, err);
pgf_print_fid(ccat->fid, out, err);
gu_printf(out, err, ".chunk_count=%d\n", ccat->chunk_count);
}
pgf_print_production(ccat->fid, prod, out, err);
gu_pool_free(tmp_pool);
#endif
}
}
if (len <= max)
if (pgf_parsing_scan_helper(ps, state, k+1, j, len, max))
found = true;
break;
}
}
return found;
}
static void
pgf_parsing_scan(PgfParsing *ps)
{
size_t len = strlen(ps->sentence);
PgfParseState* state =
pgf_new_parse_state(ps, 0, BIND_SOFT);
while (state != NULL && state->end_offset < len) {
if (state->needs_bind) {
// We have encountered two tokens without space in between.
// Those can be accepted only if there is a BIND token
// in between. We encode this by having one more state
// at the same offset. A transition between these two
// states is possible only with the BIND token.
state =
pgf_new_parse_state(ps, state->end_offset, BIND_HARD);
}
if (!pgf_parsing_scan_helper
(ps, state,
0, gu_seq_length(ps->concr->sequences)-1,
1, len-state->end_offset)) {
// skip one character and try again
GuString s = ps->sentence+state->end_offset;
gu_utf8_decode((const uint8_t**) &s);
pgf_new_parse_state(ps, s-ps->sentence, BIND_NONE);
}
if (state == ps->before)
state = ps->after;
else
state = state->next;
}
}
static void
pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
{
@@ -1262,8 +1058,9 @@ pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
if (!ps->before->needs_bind && cmp_string(&current, tok, ps->case_sensitive) == 0) {
PgfParseState* state =
pgf_new_parse_state(ps, (current.ptr - ps->sentence),
BIND_NONE);
pgf_parsing_push_item(state, item);
BIND_NONE,
item->inside_prob+item->conts->outside_prob);
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
} else {
pgf_item_free(ps, item);
}
@@ -1273,17 +1070,18 @@ pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
static void
pgf_parsing_predict_lexeme(PgfParsing* ps, PgfItemConts* conts,
PgfProductionIdxEntry* entry,
size_t offset)
size_t offset, size_t sym_idx)
{
GuVariantInfo i = { PGF_PRODUCTION_APPLY, entry->papp };
PgfProduction prod = gu_variant_close(i);
PgfItem* item =
pgf_new_item(ps, conts, prod);
PgfSymbols* syms = entry->papp->fun->lins[conts->lin_idx]->syms;
item->sym_idx = gu_seq_length(syms);
item->sym_idx = sym_idx;
pgf_item_set_curr_symbol(item, ps->pool);
prob_t prob = item->inside_prob+item->conts->outside_prob;
PgfParseState* state =
pgf_new_parse_state(ps, offset, BIND_NONE);
pgf_new_parse_state(ps, offset, BIND_NONE, prob);
if (state->viterbi_prob > prob) {
state->viterbi_prob = prob;
}
@@ -1337,36 +1135,34 @@ pgf_parsing_td_predict(PgfParsing* ps,
pgf_parsing_push_production(ps, ps->before, conts, prod);
}
// Top-down prediction for epsilon lexical rules if any
PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, 0);
if (gu_seq_length(seq->syms) == 0 && seq->idx != NULL) {
// Bottom-up prediction for lexical and epsilon rules
size_t n_idcs = gu_buf_length(ps->before->lexicon_idx);
for (size_t i = 0; i < n_idcs; i++) {
PgfLexiconIdxEntry* lentry =
gu_buf_index(ps->before->lexicon_idx, PgfLexiconIdxEntry, i);
PgfProductionIdxEntry key;
key.ccat = ccat;
key.lin_idx = lin_idx;
key.papp = NULL;
PgfProductionIdxEntry* value =
gu_seq_binsearch(gu_buf_data_seq(seq->idx),
gu_seq_binsearch(gu_buf_data_seq(lentry->idx),
pgf_production_idx_entry_order,
PgfProductionIdxEntry, &key);
if (value != NULL) {
GuVariantInfo i = { PGF_PRODUCTION_APPLY, value->papp };
PgfProduction prod = gu_variant_close(i);
pgf_parsing_push_production(ps, ps->before, conts, prod);
pgf_parsing_predict_lexeme(ps, conts, value, lentry->offset, lentry->sym_idx);
PgfProductionIdxEntry* start =
gu_buf_data(seq->idx);
gu_buf_data(lentry->idx);
PgfProductionIdxEntry* end =
start + gu_buf_length(seq->idx)-1;
start + gu_buf_length(lentry->idx)-1;
PgfProductionIdxEntry* left = value-1;
while (left >= start &&
value->ccat->fid == left->ccat->fid &&
value->lin_idx == left->lin_idx) {
GuVariantInfo i = { PGF_PRODUCTION_APPLY, left->papp };
PgfProduction prod = gu_variant_close(i);
pgf_parsing_push_production(ps, ps->before, conts, prod);
pgf_parsing_predict_lexeme(ps, conts, left, lentry->offset, lentry->sym_idx);
left--;
}
@@ -1374,9 +1170,7 @@ pgf_parsing_td_predict(PgfParsing* ps,
while (right <= end &&
value->ccat->fid == right->ccat->fid &&
value->lin_idx == right->lin_idx) {
GuVariantInfo i = { PGF_PRODUCTION_APPLY, right->papp };
PgfProduction prod = gu_variant_close(i);
pgf_parsing_push_production(ps, ps->before, conts, prod);
pgf_parsing_predict_lexeme(ps, conts, right, lentry->offset, lentry->sym_idx);
right++;
}
}
@@ -1415,7 +1209,7 @@ pgf_parsing_pre(PgfParsing* ps, PgfItem* item, PgfSymbols* syms)
} else {
item->alt = 0;
pgf_item_advance(item, ps->pool);
pgf_parsing_push_item(ps->before, item);
gu_buf_heap_push(ps->before->agenda, pgf_item_prob_order, &item);
}
}
@@ -1514,28 +1308,40 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
if (callback != NULL) {
ep = callback->match(callback, ps->concr,
slit->r,
parg->ccat->cnccat->labels[slit->r],
ps->sentence, &offset,
ps->out_pool);
}
}
if (ep != NULL) {
PgfSymbols* syms =
pgf_collect_extern_tok(ps, start, offset);
size_t n_lins = conts->ccat->cnccat->n_lins;
PgfProduction prod;
PgfProductionExtern* pext =
gu_new_variant(PGF_PRODUCTION_EXTERN,
PgfProductionExtern,
&prod, ps->pool);
pext->ep = ep;
pext->lins = NULL;
gu_new_flex_variant(PGF_PRODUCTION_EXTERN,
PgfProductionExtern,
lins, n_lins,
&prod, ps->pool);
pext->ep = ep;
pext->n_lins = n_lins;
for (size_t i = 0; i < n_lins; i++) {
pext->lins[i] = NULL;
}
pext->lins[conts->lin_idx] = syms;
PgfItem* item =
pgf_new_item(ps, conts, prod);
item->curr_sym = pgf_collect_extern_tok(ps,start,offset);
item->sym_idx = pgf_item_symbols_length(item);
item->curr_sym = gu_null_variant;
item->sym_idx = gu_seq_length(syms);
PgfParseState* state =
pgf_new_parse_state(ps, offset, BIND_NONE);
pgf_parsing_push_item(state, item);
pgf_new_parse_state(ps, offset, BIND_NONE,
item->inside_prob+item->conts->outside_prob);
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
match = true;
}
}
@@ -1578,10 +1384,11 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
if (ps->before->start_offset == ps->before->end_offset &&
ps->before->needs_bind) {
PgfParseState* state =
pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD);
pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD,
item->inside_prob+item->conts->outside_prob);
if (state != NULL) {
pgf_item_advance(item, ps->pool);
pgf_parsing_push_item(state, item);
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
} else {
pgf_item_free(ps, item);
}
@@ -1595,10 +1402,11 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
if (ps->before->start_offset == ps->before->end_offset) {
if (ps->before->needs_bind) {
PgfParseState* state =
pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD);
pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD,
item->inside_prob+item->conts->outside_prob);
if (state != NULL) {
pgf_item_advance(item, ps->pool);
pgf_parsing_push_item(state, item);
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
} else {
pgf_item_free(ps, item);
}
@@ -1607,12 +1415,13 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
}
} else {
pgf_item_advance(item, ps->pool);
pgf_parsing_push_item(ps->before, item);
gu_buf_heap_push(ps->before->agenda, pgf_item_prob_order, &item);
}
break;
}
case PGF_SYMBOL_CAPIT:
case PGF_SYMBOL_ALL_CAPIT: {
printf("PGF_SYMBOL_CAPIT\n");
pgf_item_advance(item, ps->pool);
pgf_parsing_symbol(ps, item, item->curr_sym);
break;
@@ -1857,7 +1666,8 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
ps->heuristic_factor = heuristic_factor;
}
pgf_parsing_scan(ps);
PgfParseState* state =
pgf_new_parse_state(ps, 0, BIND_SOFT, 0);
int fidString = -1;
PgfCCat* start_ccat = gu_new(PgfCCat, ps->pool);
@@ -1879,7 +1689,7 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
#endif
PgfItemConts* conts =
pgf_parsing_get_conts(ps->before, start_ccat, 0, ps->pool);
pgf_parsing_get_conts(state, start_ccat, 0, ps->pool);
gu_buf_push(conts->items, PgfItem*, NULL);
size_t n_ccats = gu_seq_length(cnccat->cats);
@@ -2218,6 +2028,8 @@ pgf_process_generated_cat(PgfParsing* ps,
children[i] = pcoerce->coerce;
break;
}
case PGF_PRODUCTION_EXTERN:
just_coercions = false;
}
}
@@ -2363,6 +2175,104 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence,
return &ps->en;
}
PGF_API PgfParsing*
pgf_parse_to_chart(PgfConcr* concr, PgfType* typ, GuString sentence,
double heuristics,
PgfCallbacksMap* callbacks,
size_t n_roots,
GuExn* err,
GuPool* pool, GuPool* out_pool)
{
if (concr->sequences == NULL ||
concr->cnccats == NULL) {
GuExnData* err_data = gu_raise(err, PgfExn);
if (err_data) {
err_data->data = "The concrete syntax is not loaded";
return NULL;
}
}
// Begin parsing a sentence with the specified category
PgfParsing* ps =
pgf_parsing_init(concr, typ->cid, sentence, heuristics, callbacks, NULL, err, pool, out_pool);
if (ps == NULL) {
return NULL;
}
#ifdef PGF_COUNTS_DEBUG
pgf_parsing_print_counts(ps);
#endif
while (gu_buf_length(ps->expr_queue) < n_roots) {
if (!pgf_parsing_proceed(ps)) {
break;
}
#ifdef PGF_COUNTS_DEBUG
pgf_parsing_print_counts(ps);
#endif
}
return ps;
}
PGF_API PgfCCats*
pgf_get_parse_roots(PgfParsing* ps, GuPool* pool)
{
size_t n_cats = 0;
size_t n_states = gu_buf_length(ps->expr_queue);
GuSeq* roots = gu_new_seq(PgfCCat*, n_states, pool);
for (size_t i = 0; i < n_states; i++) {
PgfCCat* ccat = gu_buf_get(ps->expr_queue, PgfExprState*, i)->answers->ccat;
bool found = false;
for (size_t j = 0; j < n_cats; j++) {
if (gu_seq_get(roots, PgfCCat*, j) == ccat) {
found = true;
break;
}
}
if (!found) {
gu_seq_set(roots, PgfCCat*, n_cats, ccat);
n_cats++;
}
}
roots->len = n_cats;
return roots;
}
PGF_API GuSeq*
pgf_ccat_to_range(PgfParsing* ps, PgfCCat* ccat, GuPool* pool)
{
PgfParseState* state = ps->before;
GuBuf* buf = gu_new_buf(PgfParseRange, pool);
while (ccat->conts != NULL) {
size_t start = ccat->conts->state->end_offset;
size_t end = start;
while (state != NULL) {
if (pgf_parsing_get_completed(state, ccat->conts) == ccat) {
if (state->start_offset >= start)
end = state->start_offset;
break;
}
state = state->next;
}
if (start != end) {
PgfParseRange* range = gu_buf_extend(buf);
range->start = start;
range->end = end;
range->field = ccat->cnccat->labels[ccat->conts->lin_idx];
}
ccat = ccat->conts->ccat;
}
return gu_buf_data_seq(buf);
}
PGF_API PgfExprEnum*
pgf_parse_with_oracle(PgfConcr* concr, PgfType* typ,
GuString sentence,

View File

@@ -6,7 +6,7 @@
typedef struct {
int start, end;
PgfCId cat;
size_t lin_idx;
GuString ann;
} PgfPhrase;
typedef struct {
@@ -46,14 +46,14 @@ pgf_metrics_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
}
static void
pgf_metrics_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_index, PgfCId fun)
pgf_metrics_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
gu_buf_push(state->marks, int, state->pos);
}
static void
pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun)
pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
@@ -65,7 +65,7 @@ pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin
phrase->start = start;
phrase->end = end;
phrase->cat = cat;
phrase->lin_idx = lin_idx;
phrase->ann = ann;
gu_buf_push(state->phrases, PgfPhrase*, phrase);
}
}
@@ -85,7 +85,7 @@ pgf_metrics_symbol_bind(PgfLinFuncs** funcs)
}
static void
pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun)
pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
@@ -100,7 +100,7 @@ pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin
if (phrase->start == start &&
phrase->end == end &&
strcmp(phrase->cat, cat) == 0 &&
phrase->lin_idx == lin_idx) {
strcmp(phrase->ann, ann) == 0) {
state->matches++;
break;
}

View File

@@ -220,6 +220,20 @@ pgf_category_prob(PgfPGF* pgf, PgfCId catname)
return abscat->prob;
}
PGF_API GuString*
pgf_category_fields(PgfConcr* concr, PgfCId catname, size_t *n_lins)
{
PgfCncCat* cnccat =
gu_map_get(concr->cnccats, catname, PgfCncCat*);
if (!cnccat) {
*n_lins = 0;
return NULL;
}
*n_lins = cnccat->n_lins;
return &cnccat->labels;
}
PGF_API GuString
pgf_language_code(PgfConcr* concr)
{

View File

@@ -90,6 +90,9 @@ pgf_category_context(PgfPGF *gr, PgfCId catname);
PGF_API_DECL prob_t
pgf_category_prob(PgfPGF* pgf, PgfCId catname);
PGF_API GuString*
pgf_category_fields(PgfConcr* concr, PgfCId catname, size_t *n_lins);
PGF_API_DECL void
pgf_iter_functions(PgfPGF* pgf, GuMapItor* itor, GuExn* err);
@@ -163,8 +166,8 @@ pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
PgfMorphoCallback* callback, GuExn* err);
typedef struct {
size_t pos;
GuString ptr;
size_t pos; // position in Unicode characters
GuString ptr; // pointer into the string
} PgfCohortSpot;
typedef struct {
@@ -203,6 +206,12 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ,
GuExn* err,
GuPool* pool, GuPool* out_pool);
typedef struct {
size_t start;
size_t end;
GuString field;
} PgfParseRange;
typedef struct PgfOracleCallback PgfOracleCallback;
struct PgfOracleCallback {
@@ -243,11 +252,11 @@ typedef struct PgfLiteralCallback PgfLiteralCallback;
struct PgfLiteralCallback {
PgfExprProb* (*match)(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx,
GuString ann,
GuString sentence, size_t* poffset,
GuPool *out_pool);
GuEnum* (*predict)(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx,
GuString ann,
GuString prefix,
GuPool *out_pool);
};

View File

@@ -114,7 +114,7 @@ pgf_morpho_iter(PgfProductionIdx* idx,
PgfCId lemma = entry->papp->fun->absfun->name;
GuString analysis = entry->ccat->cnccat->labels[entry->lin_idx];
prob_t prob = entry->ccat->cnccat->abscat->prob +
entry->papp->fun->absfun->ep.prob;
callback->callback(callback,
@@ -234,12 +234,13 @@ typedef struct {
GuEnum en;
PgfConcr* concr;
GuString sentence;
GuString current;
size_t len;
PgfMorphoCallback* callback;
GuExn* err;
bool case_sensitive;
GuBuf* spots;
GuBuf* skip_spots;
GuBuf* empty_buf;
GuBuf* found;
} PgfCohortsState;
@@ -255,6 +256,23 @@ cmp_cohort_spot(GuOrder* self, const void* a, const void* b)
static GuOrder
pgf_cohort_spot_order[1] = {{ cmp_cohort_spot }};
static void
pgf_lookup_cohorts_report_skip(PgfCohortsState *state,
PgfCohortSpot* spot)
{
size_t n_spots = gu_buf_length(state->skip_spots);
for (size_t i = 0; i < n_spots; i++) {
PgfCohortSpot* skip_spot =
gu_buf_index(state->skip_spots, PgfCohortSpot, i);
PgfCohortRange* range = gu_buf_insert(state->found, 0);
range->start = *skip_spot;
range->end = *spot;
range->buf = state->empty_buf;
}
gu_buf_flush(state->skip_spots);
}
static void
pgf_lookup_cohorts_helper(PgfCohortsState *state, PgfCohortSpot* spot,
int i, int j, ptrdiff_t min, ptrdiff_t max)
@@ -291,18 +309,23 @@ pgf_lookup_cohorts_helper(PgfCohortsState *state, PgfCohortSpot* spot,
pgf_lookup_cohorts_helper(state, spot, i, k-1, min, len);
if (seq->idx != NULL && gu_buf_length(seq->idx) > 0) {
// Report unknown words
pgf_lookup_cohorts_report_skip(state, spot);
// Report the actual hit
PgfCohortRange* range = gu_buf_insert(state->found, 0);
range->start = *spot;
range->end = current;
range->buf = seq->idx;
}
while (*current.ptr != 0) {
if (!skip_space(&current.ptr, &current.pos))
break;
}
// Schedule the next search spot
while (*current.ptr != 0) {
if (!skip_space(&current.ptr, &current.pos))
break;
}
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &current);
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &current);
}
if (len <= max)
pgf_lookup_cohorts_helper(state, spot, k+1, j, len, max);
@@ -318,29 +341,67 @@ pgf_lookup_cohorts_enum_next(GuEnum* self, void* to, GuPool* pool)
PgfCohortsState* state = gu_container(self, PgfCohortsState, en);
while (gu_buf_length(state->found) == 0 &&
gu_buf_length(state->spots) > 0) {
gu_buf_length(state->spots) > 0) {
PgfCohortSpot spot;
gu_buf_heap_pop(state->spots, pgf_cohort_spot_order, &spot);
if (spot.ptr == state->current)
continue;
GuString next_ptr = state->sentence+state->len;
while (gu_buf_length(state->spots) > 0) {
GuString ptr =
gu_buf_index(state->spots, PgfCohortSpot, 0)->ptr;
if (ptr > spot.ptr) {
next_ptr = ptr;
break;
}
gu_buf_heap_pop(state->spots, pgf_cohort_spot_order, &spot);
}
if (*spot.ptr == 0)
break;
bool needs_report = true;
while (next_ptr > spot.ptr) {
pgf_lookup_cohorts_helper
(state, &spot,
0, gu_seq_length(state->concr->sequences)-1,
1, (state->sentence+state->len)-spot.ptr);
pgf_lookup_cohorts_helper
(state, &spot,
0, gu_seq_length(state->concr->sequences)-1,
1, (state->sentence+state->len)-spot.ptr);
if (gu_buf_length(state->found) == 0) {
// skip one character and try again
gu_utf8_decode((const uint8_t**) &spot.ptr);
spot.pos++;
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot);
// got a hit -> exit
if (gu_buf_length(state->found) > 0)
break;
if (needs_report) {
// no hit, but the word must be reported as unknown.
gu_buf_push(state->skip_spots, PgfCohortSpot, spot);
needs_report = false;
}
// skip one character
const uint8_t* ptr = (const uint8_t*) spot.ptr;
GuUCS c = gu_utf8_decode(&ptr);
if (gu_ucs_is_space(c)) {
// We have encounter a space and we must report
// a new unknown word.
pgf_lookup_cohorts_report_skip(state, &spot);
spot.ptr = (GuString) ptr;
spot.pos++;
// Schedule the next search spot
while (*spot.ptr != 0) {
if (!skip_space(&spot.ptr, &spot.pos))
break;
}
gu_buf_heap_push(state->spots, pgf_cohort_spot_order, &spot);
break;
} else {
spot.ptr = (GuString) ptr;
spot.pos++;
}
}
}
PgfCohortSpot end_spot = {state->len, state->sentence+state->len};
pgf_lookup_cohorts_report_skip(state, &end_spot);
PgfCohortRange* pRes = (PgfCohortRange*)to;
if (gu_buf_length(state->found) == 0) {
@@ -349,15 +410,19 @@ pgf_lookup_cohorts_enum_next(GuEnum* self, void* to, GuPool* pool)
pRes->end.pos = 0;
pRes->end.ptr = NULL;
pRes->buf = NULL;
state->current = NULL;
return;
} else do {
} else for (;;) {
*pRes = gu_buf_pop(state->found, PgfCohortRange);
state->current = pRes->start.ptr;
pgf_morpho_iter(pRes->buf, state->callback, state->err);
} while (gu_buf_length(state->found) > 0 &&
gu_buf_index_last(state->found, PgfCohortRange)->end.ptr == pRes->end.ptr);
if (gu_buf_length(state->found) <= 0)
break;
PgfCohortRange* last =
gu_buf_index_last(state->found, PgfCohortRange);
if (last->start.ptr != pRes->start.ptr ||
last->end.ptr != pRes->end.ptr)
break;
}
}
PGF_API GuEnum*
@@ -374,15 +439,17 @@ pgf_lookup_cohorts(PgfConcr *concr, GuString sentence,
}
PgfCohortsState* state = gu_new(PgfCohortsState, pool);
state->en.next = pgf_lookup_cohorts_enum_next;
state->concr = concr;
state->sentence= sentence;
state->len = strlen(sentence);
state->callback= callback;
state->err = err;
state->case_sensitive = pgf_is_case_sensitive(concr);
state->spots = gu_new_buf(PgfCohortSpot, pool);
state->found = gu_new_buf(PgfCohortRange, pool);
state->en.next = pgf_lookup_cohorts_enum_next;
state->concr = concr;
state->sentence = sentence;
state->len = strlen(sentence);
state->callback = callback;
state->err = err;
state->case_sensitive= pgf_is_case_sensitive(concr);
state->spots = gu_new_buf(PgfCohortSpot, pool);
state->skip_spots = gu_new_buf(PgfCohortSpot, pool);
state->empty_buf = gu_new_buf(PgfProductionIdxEntry, pool);
state->found = gu_new_buf(PgfCohortRange, pool);
PgfCohortSpot spot = {0,sentence};
while (*spot.ptr != 0) {

File diff suppressed because it is too large Load Diff

View File

@@ -1,94 +0,0 @@
#ifndef SG_SG_H_
#define SG_SG_H_
typedef long long int SgId;
#include <gu/exn.h>
#include <pgf/pgf.h>
typedef struct SgSG SgSG;
SgSG*
sg_open(const char *filename, GuExn* err);
void
sg_close(SgSG *sg, GuExn* err);
void
sg_begin_trans(SgSG* sg, GuExn* err);
void
sg_commit(SgSG* sg, GuExn* err);
void
sg_rollback(SgSG* sg, GuExn* err);
SgId
sg_insert_expr(SgSG *sg, PgfExpr expr, int wrFlag, GuExn* err);
PgfExpr
sg_get_expr(SgSG *sg, SgId key, GuPool* out_pool, GuExn* err);
typedef struct SgQueryExprResult SgQueryExprResult;
SgQueryExprResult*
sg_query_expr(SgSG *sg, PgfExpr expr, GuPool* pool, GuExn* err);
PgfExpr
sg_query_next(SgSG *sg, SgQueryExprResult* ctxt, SgId* pKey, GuPool* pool, GuExn* err);
void
sg_query_close(SgSG* sg, SgQueryExprResult* ctxt, GuExn* err);
void
sg_update_fts_index(SgSG* sg, PgfPGF* pgf, GuExn* err);
GuSeq*
sg_query_linearization(SgSG *sg, GuString tok, GuPool* pool, GuExn* err);
typedef PgfExpr SgTriple[3];
SgId
sg_insert_triple(SgSG *sg, SgTriple triple, GuExn* err);
int
sg_get_triple(SgSG *sg, SgId key, SgTriple triple,
GuPool* out_pool, GuExn* err);
typedef struct SgTripleResult SgTripleResult;
SgTripleResult*
sg_query_triple(SgSG *sg, SgTriple triple, GuExn* err);
int
sg_triple_result_fetch(SgTripleResult* tres, SgId* pKey, SgTriple triple,
GuPool* out_pool, GuExn* err);
void
sg_triple_result_get_query(SgTripleResult* tres, SgTriple triple);
void
sg_triple_result_close(SgTripleResult* tres, GuExn* err);
typedef struct SgQueryResult SgQueryResult;
SgQueryResult*
sg_query(SgSG *sg, size_t n_triples, SgTriple* triples, GuExn* err);
size_t
sg_query_result_columns(SgQueryResult* qres);
int
sg_query_result_fetch_columns(SgQueryResult* qres, PgfExpr* res,
GuPool* out_pool, GuExn* err);
PgfExpr
sg_query_result_fetch_expr(SgQueryResult* qres, PgfExpr expr,
GuPool* out_pool, GuExn* err);
void
sg_query_result_close(SgQueryResult* qres, GuExn* err);
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,705 +0,0 @@
/*
** 2001 September 15
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite B-Tree file
** subsystem. See comments in the source code for a detailed description
** of what each interface routine does.
*/
#ifndef _BTREE_H_
#define _BTREE_H_
/*
** The SQLITE_THREADSAFE macro must be defined as 0, 1, or 2.
** 0 means mutexes are permanently disable and the library is never
** threadsafe. 1 means the library is serialized which is the highest
** level of threadsafety. 2 means the library is multithreaded - multiple
** threads can use SQLite as long as no two threads try to use the same
** database connection at the same time.
**
** Older versions of SQLite used an optional THREADSAFE macro.
** We support that for legacy.
*/
#if !defined(SQLITE_THREADSAFE)
# if defined(THREADSAFE)
# define SQLITE_THREADSAFE THREADSAFE
# else
# define SQLITE_THREADSAFE 1 /* IMP: R-07272-22309 */
# endif
#endif
/*
** CAPI3REF: 64-Bit Integer Types
** KEYWORDS: sqlite_int64 sqlite_uint64
**
** Because there is no cross-platform way to specify 64-bit integer types
** SQLite includes typedefs for 64-bit signed and unsigned integers.
**
** The sqlite3_int64 and sqlite3_uint64 are the preferred type definitions.
** The sqlite_int64 and sqlite_uint64 types are supported for backwards
** compatibility only.
**
** ^The sqlite3_int64 and sqlite_int64 types can store integer values
** between -9223372036854775808 and +9223372036854775807 inclusive. ^The
** sqlite3_uint64 and sqlite_uint64 types can store integer values
** between 0 and +18446744073709551615 inclusive.
*/
#ifdef SQLITE_INT64_TYPE
typedef SQLITE_INT64_TYPE sqlite_int64;
typedef unsigned SQLITE_INT64_TYPE sqlite_uint64;
#elif defined(_MSC_VER) || defined(__BORLANDC__)
typedef __int64 sqlite_int64;
typedef unsigned __int64 sqlite_uint64;
#else
typedef long long int sqlite_int64;
typedef unsigned long long int sqlite_uint64;
#endif
typedef sqlite_int64 sqlite3_int64;
typedef sqlite_uint64 sqlite3_uint64;
/*
** Integers of known sizes. These typedefs might change for architectures
** where the sizes very. Preprocessor macros are available so that the
** types can be conveniently redefined at compile-type. Like this:
**
** cc '-DUINTPTR_TYPE=long long int' ...
*/
#ifndef UINT32_TYPE
# ifdef HAVE_UINT32_T
# define UINT32_TYPE uint32_t
# else
# define UINT32_TYPE unsigned int
# endif
#endif
#ifndef UINT16_TYPE
# ifdef HAVE_UINT16_T
# define UINT16_TYPE uint16_t
# else
# define UINT16_TYPE unsigned short int
# endif
#endif
#ifndef INT16_TYPE
# ifdef HAVE_INT16_T
# define INT16_TYPE int16_t
# else
# define INT16_TYPE short int
# endif
#endif
#ifndef UINT8_TYPE
# ifdef HAVE_UINT8_T
# define UINT8_TYPE uint8_t
# else
# define UINT8_TYPE unsigned char
# endif
#endif
#ifndef INT8_TYPE
# ifdef HAVE_INT8_T
# define INT8_TYPE int8_t
# else
# define INT8_TYPE signed char
# endif
#endif
#ifndef LONGDOUBLE_TYPE
# define LONGDOUBLE_TYPE long double
#endif
typedef sqlite_int64 i64; /* 8-byte signed integer */
typedef sqlite_uint64 u64; /* 8-byte unsigned integer */
typedef UINT32_TYPE u32; /* 4-byte unsigned integer */
typedef UINT16_TYPE u16; /* 2-byte unsigned integer */
typedef INT16_TYPE i16; /* 2-byte signed integer */
typedef UINT8_TYPE u8; /* 1-byte unsigned integer */
typedef INT8_TYPE i8; /* 1-byte signed integer */
/* TODO: This definition is just included so other modules compile. It
** needs to be revisited.
*/
#define SQLITE_N_BTREE_META 16
/*
** If defined as non-zero, auto-vacuum is enabled by default. Otherwise
** it must be turned on for each database using "PRAGMA auto_vacuum = 1".
*/
#ifndef SQLITE_DEFAULT_AUTOVACUUM
#define SQLITE_DEFAULT_AUTOVACUUM 0
#endif
#define BTREE_AUTOVACUUM_NONE 0 /* Do not do auto-vacuum */
#define BTREE_AUTOVACUUM_FULL 1 /* Do full auto-vacuum */
#define BTREE_AUTOVACUUM_INCR 2 /* Incremental vacuum */
/*
** CAPI3REF: Initialize The SQLite Library
**
** ^The sqlite3BtreeInitialize() routine initializes the
** SQLite library. ^The sqlite3BtreeShutdown() routine
** deallocates any resources that were allocated by sqlite3BtreeInitialize().
** These routines are designed to aid in process initialization and
** shutdown on embedded systems. Workstation applications using
** SQLite normally do not need to invoke either of these routines.
**
** A call to sqlite3BtreeInitialize() is an "effective" call if it is
** the first time sqlite3BtreeInitialize() is invoked during the lifetime of
** the process, or if it is the first time sqlite3BtreeInitialize() is invoked
** following a call to sqlite3BtreeShutdown(). ^(Only an effective call
** of sqlite3BtreeInitialize() does any initialization. All other calls
** are harmless no-ops.)^
**
** A call to sqlite3BtreeShutdown() is an "effective" call if it is the first
** call to sqlite3BtreeShutdown() since the last sqlite3BtreeInitialize(). ^(Only
** an effective call to sqlite3BtreeShutdown() does any deinitialization.
** All other valid calls to sqlite3BtreeShutdown() are harmless no-ops.)^
**
** The sqlite3BtreeInitialize() interface is threadsafe, but sqlite3BtreeShutdown()
** is not. The sqlite3BtreeShutdown() interface must only be called from a
** single thread. All open [database connections] must be closed and all
** other SQLite resources must be deallocated prior to invoking
** sqlite3BtreeShutdown().
**
** Among other things, ^sqlite3BtreeInitialize() will invoke
** sqlite3_os_init(). Similarly, ^sqlite3BtreeShutdown()
** will invoke sqlite3_os_end().
**
** ^The sqlite3BtreeInitialize() routine returns [SQLITE_OK] on success.
** ^If for some reason, sqlite3BtreeInitialize() is unable to initialize
** the library (perhaps it is unable to allocate a needed resource such
** as a mutex) it returns an [error code] other than [SQLITE_OK].
**
** ^The sqlite3BtreeInitialize() routine is called internally by many other
** SQLite interfaces so that an application usually does not need to
** invoke sqlite3BtreeInitialize() directly. For example, [sqlite3_open()]
** calls sqlite3BtreeInitialize() so the SQLite library will be automatically
** initialized when [sqlite3_open()] is called if it has not be initialized
** already. ^However, if SQLite is compiled with the [SQLITE_OMIT_AUTOINIT]
** compile-time option, then the automatic calls to sqlite3BtreeInitialize()
** are omitted and the application must call sqlite3BtreeInitialize() directly
** prior to using any other SQLite interface. For maximum portability,
** it is recommended that applications always invoke sqlite3BtreeInitialize()
** directly prior to using any other SQLite interface. Future releases
** of SQLite may require this. In other words, the behavior exhibited
** when SQLite is compiled with [SQLITE_OMIT_AUTOINIT] might become the
** default behavior in some future release of SQLite.
**
** The sqlite3_os_init() routine does operating-system specific
** initialization of the SQLite library. The sqlite3_os_end()
** routine undoes the effect of sqlite3_os_init(). Typical tasks
** performed by these routines include allocation or deallocation
** of static resources, initialization of global variables,
** setting up a default [sqlite3_vfs] module, or setting up
** a default configuration using [sqlite3_config()].
**
** The application should never invoke either sqlite3_os_init()
** or sqlite3_os_end() directly. The application should only invoke
** sqlite3BtreeInitialize() and sqlite3BtreeShutdown(). The sqlite3_os_init()
** interface is called automatically by sqlite3BtreeInitialize() and
** sqlite3_os_end() is called by sqlite3BtreeShutdown(). Appropriate
** implementations for sqlite3_os_init() and sqlite3_os_end()
** are built into SQLite when it is compiled for Unix, Windows, or OS/2.
** When [custom builds | built for other platforms]
** (using the [SQLITE_OS_OTHER=1] compile-time
** option) the application must supply a suitable implementation for
** sqlite3_os_init() and sqlite3_os_end(). An application-supplied
** implementation of sqlite3_os_init() or sqlite3_os_end()
** must return [SQLITE_OK] on success and some other [error code] upon
** failure.
*/
int sqlite3BtreeInitialize(void);
int sqlite3BtreeShutdown(void);
/*
** CAPI3REF: Result Codes
** KEYWORDS: {result code definitions}
**
** Many SQLite functions return an integer result code from the set shown
** here in order to indicate success or failure.
**
** New error codes may be added in future versions of SQLite.
**
** See also: [extended result code definitions]
*/
#define SQLITE_OK 0 /* Successful result */
/* beginning-of-error-codes */
#define SQLITE_ERROR 1 /* SQL error or missing database */
#define SQLITE_INTERNAL 2 /* Internal logic error in SQLite */
#define SQLITE_PERM 3 /* Access permission denied */
#define SQLITE_ABORT 4 /* Callback routine requested an abort */
#define SQLITE_BUSY 5 /* The database file is locked */
#define SQLITE_LOCKED 6 /* A table in the database is locked */
#define SQLITE_NOMEM 7 /* A malloc() failed */
#define SQLITE_READONLY 8 /* Attempt to write a readonly database */
#define SQLITE_INTERRUPT 9 /* Operation terminated by sqlite3_interrupt()*/
#define SQLITE_IOERR 10 /* Some kind of disk I/O error occurred */
#define SQLITE_CORRUPT 11 /* The database disk image is malformed */
#define SQLITE_NOTFOUND 12 /* Unknown opcode in sqlite3_file_control() */
#define SQLITE_FULL 13 /* Insertion failed because database is full */
#define SQLITE_CANTOPEN 14 /* Unable to open the database file */
#define SQLITE_PROTOCOL 15 /* Database lock protocol error */
#define SQLITE_EMPTY 16 /* Database is empty */
#define SQLITE_SCHEMA 17 /* The database schema changed */
#define SQLITE_TOOBIG 18 /* String or BLOB exceeds size limit */
#define SQLITE_CONSTRAINT 19 /* Abort due to constraint violation */
#define SQLITE_MISMATCH 20 /* Data type mismatch */
#define SQLITE_MISUSE 21 /* Library used incorrectly */
#define SQLITE_NOLFS 22 /* Uses OS features not supported on host */
#define SQLITE_AUTH 23 /* Authorization denied */
#define SQLITE_FORMAT 24 /* Auxiliary database format error */
#define SQLITE_RANGE 25 /* 2nd parameter to sqlite3_bind out of range */
#define SQLITE_NOTADB 26 /* File opened that is not a database file */
#define SQLITE_NOTICE 27 /* Notifications from sqlite3_log() */
#define SQLITE_WARNING 28 /* Warnings from sqlite3_log() */
#define SQLITE_ROW 100 /* sqlite3_step() has another row ready */
#define SQLITE_DONE 101 /* sqlite3_step() has finished executing */
/* end-of-error-codes */
/*
** CAPI3REF: Extended Result Codes
** KEYWORDS: {extended result code definitions}
**
** In its default configuration, SQLite API routines return one of 30 integer
** [result codes]. However, experience has shown that many of
** these result codes are too coarse-grained. They do not provide as
** much information about problems as programmers might like. In an effort to
** address this, newer versions of SQLite (version 3.3.8 and later) include
** support for additional result codes that provide more detailed information
** about errors. These [extended result codes] are enabled or disabled
** on a per database connection basis using the
** [sqlite3_extended_result_codes()] API. Or, the extended code for
** the most recent error can be obtained using
** [sqlite3_extended_errcode()].
*/
#define SQLITE_IOERR_READ (SQLITE_IOERR | (1<<8))
#define SQLITE_IOERR_SHORT_READ (SQLITE_IOERR | (2<<8))
#define SQLITE_IOERR_WRITE (SQLITE_IOERR | (3<<8))
#define SQLITE_IOERR_FSYNC (SQLITE_IOERR | (4<<8))
#define SQLITE_IOERR_DIR_FSYNC (SQLITE_IOERR | (5<<8))
#define SQLITE_IOERR_TRUNCATE (SQLITE_IOERR | (6<<8))
#define SQLITE_IOERR_FSTAT (SQLITE_IOERR | (7<<8))
#define SQLITE_IOERR_UNLOCK (SQLITE_IOERR | (8<<8))
#define SQLITE_IOERR_RDLOCK (SQLITE_IOERR | (9<<8))
#define SQLITE_IOERR_DELETE (SQLITE_IOERR | (10<<8))
#define SQLITE_IOERR_BLOCKED (SQLITE_IOERR | (11<<8))
#define SQLITE_IOERR_NOMEM (SQLITE_IOERR | (12<<8))
#define SQLITE_IOERR_ACCESS (SQLITE_IOERR | (13<<8))
#define SQLITE_IOERR_CHECKRESERVEDLOCK (SQLITE_IOERR | (14<<8))
#define SQLITE_IOERR_LOCK (SQLITE_IOERR | (15<<8))
#define SQLITE_IOERR_CLOSE (SQLITE_IOERR | (16<<8))
#define SQLITE_IOERR_DIR_CLOSE (SQLITE_IOERR | (17<<8))
#define SQLITE_IOERR_SHMOPEN (SQLITE_IOERR | (18<<8))
#define SQLITE_IOERR_SHMSIZE (SQLITE_IOERR | (19<<8))
#define SQLITE_IOERR_SHMLOCK (SQLITE_IOERR | (20<<8))
#define SQLITE_IOERR_SHMMAP (SQLITE_IOERR | (21<<8))
#define SQLITE_IOERR_SEEK (SQLITE_IOERR | (22<<8))
#define SQLITE_IOERR_DELETE_NOENT (SQLITE_IOERR | (23<<8))
#define SQLITE_IOERR_MMAP (SQLITE_IOERR | (24<<8))
#define SQLITE_IOERR_GETTEMPPATH (SQLITE_IOERR | (25<<8))
#define SQLITE_IOERR_CONVPATH (SQLITE_IOERR | (26<<8))
#define SQLITE_IOERR_VNODE (SQLITE_IOERR | (27<<8))
#define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8))
#define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8))
#define SQLITE_BUSY_SNAPSHOT (SQLITE_BUSY | (2<<8))
#define SQLITE_CANTOPEN_NOTEMPDIR (SQLITE_CANTOPEN | (1<<8))
#define SQLITE_CANTOPEN_ISDIR (SQLITE_CANTOPEN | (2<<8))
#define SQLITE_CANTOPEN_FULLPATH (SQLITE_CANTOPEN | (3<<8))
#define SQLITE_CANTOPEN_CONVPATH (SQLITE_CANTOPEN | (4<<8))
#define SQLITE_CORRUPT_VTAB (SQLITE_CORRUPT | (1<<8))
#define SQLITE_READONLY_RECOVERY (SQLITE_READONLY | (1<<8))
#define SQLITE_READONLY_CANTLOCK (SQLITE_READONLY | (2<<8))
#define SQLITE_READONLY_ROLLBACK (SQLITE_READONLY | (3<<8))
#define SQLITE_READONLY_DBMOVED (SQLITE_READONLY | (4<<8))
#define SQLITE_ABORT_ROLLBACK (SQLITE_ABORT | (2<<8))
#define SQLITE_CONSTRAINT_CHECK (SQLITE_CONSTRAINT | (1<<8))
#define SQLITE_CONSTRAINT_COMMITHOOK (SQLITE_CONSTRAINT | (2<<8))
#define SQLITE_CONSTRAINT_FOREIGNKEY (SQLITE_CONSTRAINT | (3<<8))
#define SQLITE_CONSTRAINT_FUNCTION (SQLITE_CONSTRAINT | (4<<8))
#define SQLITE_CONSTRAINT_NOTNULL (SQLITE_CONSTRAINT | (5<<8))
#define SQLITE_CONSTRAINT_PRIMARYKEY (SQLITE_CONSTRAINT | (6<<8))
#define SQLITE_CONSTRAINT_TRIGGER (SQLITE_CONSTRAINT | (7<<8))
#define SQLITE_CONSTRAINT_UNIQUE (SQLITE_CONSTRAINT | (8<<8))
#define SQLITE_CONSTRAINT_VTAB (SQLITE_CONSTRAINT | (9<<8))
#define SQLITE_CONSTRAINT_ROWID (SQLITE_CONSTRAINT |(10<<8))
#define SQLITE_NOTICE_RECOVER_WAL (SQLITE_NOTICE | (1<<8))
#define SQLITE_NOTICE_RECOVER_ROLLBACK (SQLITE_NOTICE | (2<<8))
#define SQLITE_WARNING_AUTOINDEX (SQLITE_WARNING | (1<<8))
#define SQLITE_AUTH_USER (SQLITE_AUTH | (1<<8))
/* Reserved: 0x00F00000 */
/*
** Forward declarations of structure
*/
typedef struct Btree Btree;
typedef struct BtCursor BtCursor;
typedef struct BtShared BtShared;
typedef struct Mem Mem;
typedef struct KeyInfo KeyInfo;
typedef struct UnpackedRecord UnpackedRecord;
int sqlite3BtreeOpen(
const char *zVfs, /* VFS to use with this b-tree */
const char *zFilename, /* Name of database file to open */
Btree **ppBtree, /* Return open Btree* here */
int flags, /* Flags */
int vfsFlags /* Flags passed through to VFS open */
);
/* The flags parameter to sqlite3BtreeOpen can be the bitwise or of the
** following values.
**
** NOTE: These values must match the corresponding PAGER_ values in
** pager.h.
*/
#define BTREE_OMIT_JOURNAL 1 /* Do not create or use a rollback journal */
#define BTREE_MEMORY 2 /* This is an in-memory DB */
#define BTREE_SINGLE 4 /* The file contains at most 1 b-tree */
#define BTREE_UNORDERED 8 /* Use of a hash implementation is OK */
/*
** CAPI3REF: Flags For File Open Operations
**
** These bit values are intended for use in the
** 3rd parameter to the [sqlite3_open_v2()] interface and
** in the 4th parameter to the [sqlite3_vfs.xOpen] method.
*/
#define SQLITE_OPEN_READONLY 0x00000001 /* Ok for sqlite3_open_v2() */
#define SQLITE_OPEN_READWRITE 0x00000002 /* Ok for sqlite3_open_v2() */
#define SQLITE_OPEN_CREATE 0x00000004 /* Ok for sqlite3_open_v2() */
#define SQLITE_OPEN_DELETEONCLOSE 0x00000008 /* VFS only */
#define SQLITE_OPEN_EXCLUSIVE 0x00000010 /* VFS only */
#define SQLITE_OPEN_AUTOPROXY 0x00000020 /* VFS only */
#define SQLITE_OPEN_URI 0x00000040 /* Ok for sqlite3_open_v2() */
#define SQLITE_OPEN_MEMORY 0x00000080 /* Ok for sqlite3_open_v2() */
#define SQLITE_OPEN_MAIN_DB 0x00000100 /* VFS only */
#define SQLITE_OPEN_TEMP_DB 0x00000200 /* VFS only */
#define SQLITE_OPEN_TRANSIENT_DB 0x00000400 /* VFS only */
#define SQLITE_OPEN_MAIN_JOURNAL 0x00000800 /* VFS only */
#define SQLITE_OPEN_TEMP_JOURNAL 0x00001000 /* VFS only */
#define SQLITE_OPEN_SUBJOURNAL 0x00002000 /* VFS only */
#define SQLITE_OPEN_MASTER_JOURNAL 0x00004000 /* VFS only */
#define SQLITE_OPEN_NOMUTEX 0x00008000 /* Ok for sqlite3_open_v2() */
#define SQLITE_OPEN_FULLMUTEX 0x00010000 /* Ok for sqlite3_open_v2() */
#define SQLITE_OPEN_SHAREDCACHE 0x00020000 /* Ok for sqlite3_open_v2() */
#define SQLITE_OPEN_PRIVATECACHE 0x00040000 /* Ok for sqlite3_open_v2() */
#define SQLITE_OPEN_WAL 0x00080000 /* VFS only */
int sqlite3BtreeClose(Btree*);
int sqlite3BtreeSetCacheSize(Btree*,int);
#if SQLITE_MAX_MMAP_SIZE>0
int sqlite3BtreeSetMmapLimit(Btree*,sqlite3_int64);
#endif
int sqlite3BtreeSetPagerFlags(Btree*,unsigned);
int sqlite3BtreeSyncDisabled(Btree*);
int sqlite3BtreeSetPageSize(Btree *p, int nPagesize, int nReserve, int eFix);
int sqlite3BtreeGetPageSize(Btree*);
int sqlite3BtreeMaxPageCount(Btree*,int);
u32 sqlite3BtreeLastPage(Btree*);
int sqlite3BtreeSecureDelete(Btree*,int);
int sqlite3BtreeGetOptimalReserve(Btree*);
int sqlite3BtreeGetReserveNoMutex(Btree *p);
int sqlite3BtreeSetAutoVacuum(Btree *, int);
int sqlite3BtreeGetAutoVacuum(Btree *);
int sqlite3BtreeBeginTrans(Btree*,int);
int sqlite3BtreeCommitPhaseOne(Btree*, const char *zMaster);
int sqlite3BtreeCommitPhaseTwo(Btree*, int);
int sqlite3BtreeCommit(Btree*);
int sqlite3BtreeRollback(Btree*,int,int);
int sqlite3BtreeBeginStmt(Btree*,int);
int sqlite3BtreeCreateTable(Btree*, int*, int flags);
int sqlite3BtreeIsInTrans(Btree*);
int sqlite3BtreeIsInReadTrans(Btree*);
int sqlite3BtreeIsInBackup(Btree*);
void *sqlite3BtreeSchema(Btree *, int, void(*)(void *));
int sqlite3BtreeSchemaLocked(Btree *pBtree);
int sqlite3BtreeLockTable(Btree *pBtree, int iTab, u8 isWriteLock);
int sqlite3BtreeSavepoint(Btree *, int, int);
int sqlite3BtreeFileFormat(Btree *);
const char *sqlite3BtreeGetFilename(Btree *);
const char *sqlite3BtreeGetJournalname(Btree *);
int sqlite3BtreeCopyFile(Btree *, Btree *);
int sqlite3BtreeIncrVacuum(Btree *);
/* The flags parameter to sqlite3BtreeCreateTable can be the bitwise OR
** of the flags shown below.
**
** Every SQLite table must have either BTREE_INTKEY or BTREE_BLOBKEY set.
** With BTREE_INTKEY, the table key is a 64-bit integer and arbitrary data
** is stored in the leaves. (BTREE_INTKEY is used for SQL tables.) With
** BTREE_BLOBKEY, the key is an arbitrary BLOB and no content is stored
** anywhere - the key is the content. (BTREE_BLOBKEY is used for SQL
** indices.)
*/
#define BTREE_INTKEY 1 /* Table has only 64-bit signed integer keys */
#define BTREE_BLOBKEY 2 /* Table has keys only - no data */
int sqlite3BtreeDropTable(Btree*, int, int*);
int sqlite3BtreeClearTable(Btree*, int, int*);
int sqlite3BtreeClearTableOfCursor(BtCursor*);
int sqlite3BtreeTripAllCursors(Btree*, int, int);
void sqlite3BtreeGetMeta(Btree *pBtree, int idx, u32 *pValue);
int sqlite3BtreeUpdateMeta(Btree*, int idx, u32 value);
int sqlite3BtreeNewDb(Btree *p);
/*
** The second parameter to sqlite3BtreeGetMeta or sqlite3BtreeUpdateMeta
** should be one of the following values. The integer values are assigned
** to constants so that the offset of the corresponding field in an
** SQLite database header may be found using the following formula:
**
** offset = 36 + (idx * 4)
**
** For example, the free-page-count field is located at byte offset 36 of
** the database file header. The incr-vacuum-flag field is located at
** byte offset 64 (== 36+4*7).
**
** The BTREE_DATA_VERSION value is not really a value stored in the header.
** It is a read-only number computed by the pager. But we merge it with
** the header value access routines since its access pattern is the same.
** Call it a "virtual meta value".
*/
#define BTREE_FREE_PAGE_COUNT 0
#define BTREE_SCHEMA_VERSION 1
#define BTREE_FILE_FORMAT 2
#define BTREE_DEFAULT_CACHE_SIZE 3
#define BTREE_LARGEST_ROOT_PAGE 4
#define BTREE_TEXT_ENCODING 5
#define BTREE_USER_VERSION 6
#define BTREE_INCR_VACUUM 7
#define BTREE_APPLICATION_ID 8
#define BTREE_DATA_VERSION 15 /* A virtual meta-value */
/*
** An instance of the following structure holds information about a
** single index record that has already been parsed out into individual
** values.
**
** A record is an object that contains one or more fields of data.
** Records are used to store the content of a table row and to store
** the key of an index. A blob encoding of a record is created by
** the OP_MakeRecord opcode of the VDBE and is disassembled by the
** OP_Column opcode.
**
** This structure holds a record that has already been disassembled
** into its constituent fields.
**
** The r1 and r2 member variables are only used by the optimized comparison
** functions vdbeRecordCompareInt() and vdbeRecordCompareString().
*/
struct UnpackedRecord {
KeyInfo *pKeyInfo; /* Collation and sort-order information */
u16 nField; /* Number of entries in apMem[] */
i8 default_rc; /* Comparison result if keys are equal */
u8 errCode; /* Error detected by xRecordCompare (CORRUPT or NOMEM) */
Mem *aMem; /* Values */
int r1; /* Value to return if (lhs > rhs) */
int r2; /* Value to return if (rhs < lhs) */
};
/* One or more of the following flags are set to indicate the validOK
** representations of the value stored in the Mem struct.
**
** If the MEM_Null flag is set, then the value is an SQL NULL value.
** No other flags may be set in this case.
**
** If the MEM_Str flag is set then Mem.z points at a string representation.
** Usually this is encoded in the same unicode encoding as the main
** database (see below for exceptions). If the MEM_Term flag is also
** set, then the string is nul terminated. The MEM_Int and MEM_Real
** flags may coexist with the MEM_Str flag.
*/
#define MEM_Null 0x0001 /* Value is NULL */
#define MEM_Str 0x0002 /* Value is a string */
#define MEM_Int 0x0004 /* Value is an integer */
#define MEM_Real 0x0008 /* Value is a real number */
#define MEM_Blob 0x0010 /* Value is a BLOB */
#define MEM_Term 0x0200 /* String rep is nul terminated */
#define MEM_Dyn 0x0400 /* Need to call Mem.xDel() on Mem.z */
#define MEM_Static 0x0800 /* Mem.z points to a static string */
#define MEM_Ephem 0x1000 /* Mem.z points to an ephemeral string */
#define MEM_Zero 0x4000 /* Mem.i contains count of 0s appended to blob */
/*
** Internally, the vdbe manipulates nearly all SQL values as Mem
** structures. Each Mem struct may cache multiple representations (string,
** integer etc.) of the same value.
*/
struct Mem {
union MemValue {
double r; /* Real value used when MEM_Real is set in flags */
i64 i; /* Integer value used when MEM_Int is set in flags */
int nZero; /* Used when bit MEM_Zero is set in flags */
} u;
u16 flags; /* Some combination of MEM_Null, MEM_Str, MEM_Dyn, etc. */
u8 enc; /* SQLITE_UTF8, SQLITE_UTF16BE, SQLITE_UTF16LE */
u8 eSubtype; /* Subtype for this value */
int n; /* Number of characters in string value, excluding '\0' */
char *z; /* String or BLOB value */
/* ShallowCopy only needs to copy the information above */
char *zMalloc; /* Space to hold MEM_Str or MEM_Blob if szMalloc>0 */
int szMalloc; /* Size of the zMalloc allocation */
u32 uTemp; /* Transient storage for serial_type in OP_MakeRecord */
Btree *pBtree; /* The associated database connection */
void (*xDel)(void*);/* Destructor for Mem.z - only valid if MEM_Dyn */
#ifdef SQLITE_DEBUG
Mem *pScopyFrom; /* This Mem is a shallow copy of pScopyFrom */
void *pFiller; /* So that sizeof(Mem) is a multiple of 8 */
#endif
};
/*
** Values that may be OR'd together to form the second argument of an
** sqlite3BtreeCursorHints() call.
**
** The BTREE_BULKLOAD flag is set on index cursors when the index is going
** to be filled with content that is already in sorted order.
**
** The BTREE_SEEK_EQ flag is set on cursors that will get OP_SeekGE or
** OP_SeekLE opcodes for a range search, but where the range of entries
** selected will all have the same key. In other words, the cursor will
** be used only for equality key searches.
**
*/
#define BTREE_BULKLOAD 0x00000001 /* Used to full index in sorted order */
#define BTREE_SEEK_EQ 0x00000002 /* EQ seeks only - no range seeks */
int sqlite3BtreeCursor(
Btree*, /* BTree containing table to open */
int iTable, /* Index of root page */
int wrFlag, /* 1 for writing. 0 for read-only */
int N, int X, /* index of N key columns and X extra columns */
BtCursor **ppCursor /* Space to write cursor pointer */
);
int sqlite3BtreeCursorSize(void);
int sqlite3BtreeCloseCursor(BtCursor*);
void sqlite3BtreeInitUnpackedRecord(
UnpackedRecord *pUnKey,
BtCursor* pCur,
int nField,
int default_rc,
Mem* pMem);
int sqlite3BtreeMovetoUnpacked(
BtCursor*,
UnpackedRecord *pUnKey,
i64 intKey,
int bias,
int *pRes
);
int sqlite3BtreeCursorHasMoved(BtCursor*);
int sqlite3BtreeCursorRestore(BtCursor*, int*);
int sqlite3BtreeDelete(BtCursor*, int);
int sqlite3BtreeInsert(BtCursor*, const void *pKey, i64 nKey,
const void *pData, int nData,
int nZero, int bias, int seekResult);
int sqlite3BtreeFirst(BtCursor*, int *pRes);
int sqlite3BtreeLast(BtCursor*, int *pRes);
int sqlite3BtreeNext(BtCursor*, int *pRes);
int sqlite3BtreeEof(BtCursor*);
int sqlite3BtreePrevious(BtCursor*, int *pRes);
int sqlite3BtreeKeySize(BtCursor*, i64 *pSize);
int sqlite3BtreeKey(BtCursor*, u32 offset, u32 amt, void*);
const void *sqlite3BtreeKeyFetch(BtCursor*, u32 *pAmt);
const void *sqlite3BtreeDataFetch(BtCursor*, u32 *pAmt);
int sqlite3BtreeDataSize(BtCursor*, u32 *pSize);
int sqlite3BtreeData(BtCursor*, u32 offset, u32 amt, void*);
char *sqlite3BtreeIntegrityCheck(Btree*, int *aRoot, int nRoot, int, int*);
struct Pager *sqlite3BtreePager(Btree*);
int sqlite3BtreePutData(BtCursor*, u32 offset, u32 amt, void*);
void sqlite3BtreeIncrblobCursor(BtCursor *);
void sqlite3BtreeClearCursor(BtCursor *);
int sqlite3BtreeSetVersion(Btree *pBt, int iVersion);
void sqlite3BtreeCursorHints(BtCursor *, unsigned int mask);
#ifdef SQLITE_DEBUG
int sqlite3BtreeCursorHasHint(BtCursor*, unsigned int mask);
#endif
int sqlite3BtreeIsReadonly(Btree *pBt);
#ifndef NDEBUG
int sqlite3BtreeCursorIsValid(BtCursor*);
#endif
#ifndef SQLITE_OMIT_BTREECOUNT
int sqlite3BtreeCount(BtCursor *, i64 *);
#endif
#ifdef SQLITE_TEST
int sqlite3BtreeCursorInfo(BtCursor*, int*, int);
void sqlite3BtreeCursorList(Btree*);
#endif
#ifndef SQLITE_OMIT_WAL
int sqlite3BtreeCheckpoint(Btree*, int, int *, int *);
#endif
/*
** If we are not using shared cache, then there is no need to
** use mutexes to access the BtShared structures. So make the
** Enter and Leave procedures no-ops.
*/
#ifndef SQLITE_OMIT_SHARED_CACHE
void sqlite3BtreeEnter(Btree*);
#else
# define sqlite3BtreeEnter(X)
#endif
#if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE
int sqlite3BtreeSharable(Btree*);
void sqlite3BtreeLeave(Btree*);
void sqlite3BtreeEnterCursor(BtCursor*);
void sqlite3BtreeLeaveCursor(BtCursor*);
#else
# define sqlite3BtreeSharable(X) 0
# define sqlite3BtreeLeave(X)
# define sqlite3BtreeEnterCursor(X)
# define sqlite3BtreeLeaveCursor(X)
#endif
u32 sqlite3BtreeSerialType(Mem *pMem, int file_format);
u32 sqlite3BtreeSerialTypeLen(u32);
u32 sqlite3BtreeSerialGet(const unsigned char*, u32, Mem *);
u32 sqlite3BtreeSerialPut(u8*, Mem*, u32);
/*
** Routines to read and write variable-length integers. These used to
** be defined locally, but now we use the varint routines in the util.c
** file.
*/
int sqlite3BtreePutVarint(unsigned char*, u64);
u8 sqlite3BtreeGetVarint(const unsigned char *, u64 *);
u8 sqlite3BtreeGetVarint32(const unsigned char *, u32 *);
int sqlite3BtreeVarintLen(u64 v);
/*
** The common case is for a varint to be a single byte. They following
** macros handle the common case without a procedure call, but then call
** the procedure for larger varints.
*/
#define getVarint32(A,B) \
(u8)((*(A)<(u8)0x80)?((B)=(u32)*(A)),1:sqlite3BtreeGetVarint32((A),(u32 *)&(B)))
#define putVarint32(A,B) \
(u8)(((u32)(B)<(u32)0x80)?(*(A)=(unsigned char)(B)),1:\
sqlite3BtreePutVarint((A),(B)))
#define getVarint sqlite3BtreeGetVarint
#define putVarint sqlite3BtreePutVarint
int sqlite3BtreeIdxRowid(Btree*, BtCursor*, i64*);
int sqlite3BtreeRecordCompare(int,const void*,UnpackedRecord*);
const char *sqlite3BtreeErrName(int rc);
#endif /* _BTREE_H_ */

View File

@@ -0,0 +1,22 @@
## 1.3.0
- Add completion support.
## 1.2.1
- Remove deprecated `pgf_print_expr_tuple`.
- Added an API for cloning expressions/types/literals.
## 1.2.0
- Stop `pgf-shell` from being built by default.
- parseToChart also returns the category.
- bugfix in bracketedLinearize.
## 1.1.0
- Remove SG library.
## 1.0.0
- Everything up until 2020-07-11.

View File

@@ -0,0 +1,10 @@
# Instructions for uploading to Hackage
You will need a Hackage account for steps 4 & 5.
1. Bump the version number in `pgf2.cabal`
2. Add details in `CHANGELOG.md`
3. Run `stack sdist` (or `cabal sdist`)
4. Visit `https://hackage.haskell.org/upload` and upload the file `./.stack-work/dist/x86_64-osx/Cabal-2.2.0.1/pgf2-x.y.z.tar.gz` (or Cabal equivalent)
5. If successful, upload documentation with `./stack-haddock-upload.sh pgf2 x.y.z` (compilation on Hackage's servers will fail because of missing C libraries)
6. Commit and push to this repository (`gf-core`)

165
src/runtime/haskell/LICENSE Normal file
View File

@@ -0,0 +1,165 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
This version of the GNU Lesser General Public License incorporates
the terms and conditions of version 3 of the GNU General Public
License, supplemented by the additional permissions listed below.
0. Additional Definitions.
As used herein, "this License" refers to version 3 of the GNU Lesser
General Public License, and the "GNU GPL" refers to version 3 of the GNU
General Public License.
"The Library" refers to a covered work governed by this License,
other than an Application or a Combined Work as defined below.
An "Application" is any work that makes use of an interface provided
by the Library, but which is not otherwise based on the Library.
Defining a subclass of a class defined by the Library is deemed a mode
of using an interface provided by the Library.
A "Combined Work" is a work produced by combining or linking an
Application with the Library. The particular version of the Library
with which the Combined Work was made is also called the "Linked
Version".
The "Minimal Corresponding Source" for a Combined Work means the
Corresponding Source for the Combined Work, excluding any source code
for portions of the Combined Work that, considered in isolation, are
based on the Application, and not on the Linked Version.
The "Corresponding Application Code" for a Combined Work means the
object code and/or source code for the Application, including any data
and utility programs needed for reproducing the Combined Work from the
Application, but excluding the System Libraries of the Combined Work.
1. Exception to Section 3 of the GNU GPL.
You may convey a covered work under sections 3 and 4 of this License
without being bound by section 3 of the GNU GPL.
2. Conveying Modified Versions.
If you modify a copy of the Library, and, in your modifications, a
facility refers to a function or data to be supplied by an Application
that uses the facility (other than as an argument passed when the
facility is invoked), then you may convey a copy of the modified
version:
a) under this License, provided that you make a good faith effort to
ensure that, in the event an Application does not supply the
function or data, the facility still operates, and performs
whatever part of its purpose remains meaningful, or
b) under the GNU GPL, with none of the additional permissions of
this License applicable to that copy.
3. Object Code Incorporating Material from Library Header Files.
The object code form of an Application may incorporate material from
a header file that is part of the Library. You may convey such object
code under terms of your choice, provided that, if the incorporated
material is not limited to numerical parameters, data structure
layouts and accessors, or small macros, inline functions and templates
(ten or fewer lines in length), you do both of the following:
a) Give prominent notice with each copy of the object code that the
Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the object code with a copy of the GNU GPL and this license
document.
4. Combined Works.
You may convey a Combined Work under terms of your choice that,
taken together, effectively do not restrict modification of the
portions of the Library contained in the Combined Work and reverse
engineering for debugging such modifications, if you also do each of
the following:
a) Give prominent notice with each copy of the Combined Work that
the Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the Combined Work with a copy of the GNU GPL and this license
document.
c) For a Combined Work that displays copyright notices during
execution, include the copyright notice for the Library among
these notices, as well as a reference directing the user to the
copies of the GNU GPL and this license document.
d) Do one of the following:
0) Convey the Minimal Corresponding Source under the terms of this
License, and the Corresponding Application Code in a form
suitable for, and under terms that permit, the user to
recombine or relink the Application with a modified version of
the Linked Version to produce a modified Combined Work, in the
manner specified by section 6 of the GNU GPL for conveying
Corresponding Source.
1) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (a) uses at run time
a copy of the Library already present on the user's computer
system, and (b) will operate properly with a modified version
of the Library that is interface-compatible with the Linked
Version.
e) Provide Installation Information, but only if you would otherwise
be required to provide such information under section 6 of the
GNU GPL, and only to the extent that such information is
necessary to install and execute a modified version of the
Combined Work produced by recombining or relinking the
Application with a modified version of the Linked Version. (If
you use option 4d0, the Installation Information must accompany
the Minimal Corresponding Source and Corresponding Application
Code. If you use option 4d1, you must provide the Installation
Information in the manner specified by section 6 of the GNU GPL
for conveying Corresponding Source.)
5. Combined Libraries.
You may place library facilities that are a work based on the
Library side by side in a single library together with other library
facilities that are not Applications and are not covered by this
License, and convey such a combined library under terms of your
choice, if you do both of the following:
a) Accompany the combined library with a copy of the same work based
on the Library, uncombined with any other library facilities,
conveyed under the terms of this License.
b) Give prominent notice with the combined library that part of it
is a work based on the Library, and explaining where to find the
accompanying uncombined form of the same work.
6. Revised Versions of the GNU Lesser General Public License.
The Free Software Foundation may publish revised and/or new versions
of the GNU Lesser General Public License from time to time. Such new
versions will be similar in spirit to the present version, but may
differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the
Library as you received it specifies that a certain numbered version
of the GNU Lesser General Public License "or any later version"
applies to it, you have the option of following the terms and
conditions either of that published version or of any later version
published by the Free Software Foundation. If the Library as you
received it does not specify a version number of the GNU Lesser
General Public License, you may choose any version of the GNU Lesser
General Public License ever published by the Free Software Foundation.
If the Library as you received it specifies that a proxy can decide
whether future versions of the GNU Lesser General Public License shall
apply, that proxy's public statement of acceptance of any version is
permanent authorization for you to choose that version for the
Library.

View File

@@ -158,7 +158,7 @@ parse_ pgf lang cat dp s =
PGF2.ParseIncomplete -> (ParseIncomplete, PGF2.Leaf s)
complete pgf lang cat s prefix =
let compls = Map.fromListWith (++) [(tok,[CId fun]) | (tok,_,fun,_) <- PGF2.complete (lookConcr pgf lang) cat s prefix]
let compls = Map.fromListWith (++) [(tok,[CId fun]) | PGF2.ParseOk res <- [PGF2.complete (lookConcr pgf lang) cat s prefix], (tok,_,fun,_) <- res]
in (PGF2.Leaf [],s,compls)
hasLinearization pgf lang (CId f) = PGF2.hasLinearization (lookConcr pgf lang) f

View File

@@ -15,6 +15,7 @@
#include <pgf/pgf.h>
#include <pgf/linearizer.h>
#include <pgf/data.h>
#include <gu/enum.h>
#include <gu/exn.h>
@@ -38,30 +39,28 @@ module PGF2 (-- * PGF
mkMeta,unMeta,
exprHash, exprSize, exprFunctions, exprSubstitute,
treeProbability,
-- ** Types
Type, Hypo, BindType(..), startCat,
readType, showType, showContext,
mkType, unType,
-- ** Type checking
-- | Dynamically-built expressions should always be type-checked before using in other functions,
-- as the exceptions thrown by using invalid expressions may not catchable.
checkExpr, inferExpr, checkType,
-- ** Computing
compute,
-- * Concrete syntax
ConcName,Concr,languages,concreteName,languageCode,
-- ** Linearization
linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,
FId, LIndex, BracketedString(..), showBracketedString, flattenBracketedString,
printName,
linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,bracketedLinearizeAll,
FId, BracketedString(..), showBracketedString, flattenBracketedString,
printName, categoryFields,
alignWords, gizaAlignment,
-- ** Parsing
ParseOutput(..), parse, parseWithHeuristics, complete,
ParseOutput(..), parse, parseWithHeuristics,
parseToChart, PArg(..),
complete,
-- ** Sentence Lookup
lookupSentence,
@@ -71,6 +70,7 @@ module PGF2 (-- * PGF
-- ** Morphological Analysis
MorphoAnalysis, lookupMorpho, lookupCohorts, fullFormLexicon,
filterBest, filterLongest,
-- ** Visualizations
GraphvizOptions(..), graphvizDefaults,
graphvizAbstractTree, graphvizParseTree,
@@ -88,11 +88,12 @@ module PGF2 (-- * PGF
readProbabilitiesFromFile
) where
import Prelude hiding (fromEnum)
import Prelude hiding (fromEnum,(<>))
import Control.Exception(Exception,throwIO)
import Control.Monad(forM_)
import System.IO.Unsafe(unsafePerformIO,unsafeInterleaveIO)
import System.Random
import System.IO(fixIO)
import Text.PrettyPrint
import PGF2.Expr
import PGF2.Type
@@ -103,12 +104,12 @@ import Foreign.C
import Data.Typeable
import qualified Data.Map as Map
import Data.IORef
import Data.Char(isUpper,isSpace)
import Data.Char(isUpper,isSpace,isPunctuation)
import Data.List(isSuffixOf,maximumBy,nub,mapAccumL,intersperse,groupBy,find)
import Data.Maybe(fromMaybe)
import Data.Function(on)
import Data.Maybe(maybe)
-----------------------------------------------------------------------
-- Functions that take a PGF.
-- PGF has many Concrs.
@@ -188,7 +189,7 @@ languageCode c = unsafePerformIO $ do
else fmap Just (peekUtf8CString c_code)
-- | Generates an exhaustive possibly infinite list of
-- all abstract syntax expressions of the given type.
-- all abstract syntax expressions of the given type.
-- The expressions are ordered by their probability.
generateAll :: PGF -> Type -> [(Expr,Float)]
generateAll p (Type ctype _) =
@@ -450,6 +451,7 @@ graphvizParseTree c opts e =
c_opts <- newGraphvizOptions tmpPl opts
pgf_graphviz_parse_tree (concr c) (expr e) c_opts out exn
touchExpr e
touchConcr c
s <- gu_string_buf_freeze sb tmpPl
peekUtf8CString s
@@ -915,21 +917,21 @@ newGraphvizOptions pool opts = do
-- Functions using Concr
-- Morpho analyses, parsing & linearization
-- | This triple is returned by all functions that deal with
-- | This triple is returned by all functions that deal with
-- the grammar's lexicon. Its first element is the name of an abstract
-- lexical function which can produce a given word or
-- lexical function which can produce a given word or
-- a multiword expression (i.e. this is the lemma).
-- After that follows a string which describes
-- After that follows a string which describes
-- the particular inflection form.
--
-- The last element is a logarithm from the
-- the probability of the function. The probability is not
-- the probability of the function. The probability is not
-- conditionalized on the category of the function. This makes it
-- possible to compare the likelihood of two functions even if they
-- have different types.
-- have different types.
type MorphoAnalysis = (Fun,String,Float)
-- | 'lookupMorpho' takes a string which must be a single word or
-- | 'lookupMorpho' takes a string which must be a single word or
-- a multiword expression. It then computes the list of all possible
-- morphological analyses.
lookupMorpho :: Concr -> String -> [MorphoAnalysis]
@@ -954,7 +956,7 @@ lookupMorpho (Concr concr master) sent =
-- The list is sorted first by the @start@ position and after than
-- by the @end@ position. This can be used for instance if you want to
-- filter only the longest matches.
lookupCohorts :: Concr -> String -> [(Int,[MorphoAnalysis],Int)]
lookupCohorts :: Concr -> String -> [(Int,String,[MorphoAnalysis],Int)]
lookupCohorts lang@(Concr concr master) sent =
unsafePerformIO $
do pl <- gu_new_pool
@@ -965,9 +967,9 @@ lookupCohorts lang@(Concr concr master) sent =
c_sent <- newUtf8CString sent pl
enum <- pgf_lookup_cohorts concr c_sent cback pl nullPtr
fpl <- newForeignPtr gu_pool_finalizer pl
fromCohortRange enum fpl fptr ref
fromCohortRange enum fpl fptr 0 sent ref
where
fromCohortRange enum fpl fptr ref =
fromCohortRange enum fpl fptr i sent ref =
allocaBytes (#size PgfCohortRange) $ \ptr ->
withForeignPtr fpl $ \pl ->
do gu_enum_next enum ptr pl
@@ -981,8 +983,80 @@ lookupCohorts lang@(Concr concr master) sent =
end <- (#peek PgfCohortRange, end.pos) ptr
ans <- readIORef ref
writeIORef ref []
cohs <- unsafeInterleaveIO (fromCohortRange enum fpl fptr ref)
return ((start,ans,end):cohs)
let sent' = drop (start-i) sent
tok = take (end-start) sent'
cohs <- unsafeInterleaveIO (fromCohortRange enum fpl fptr start sent' ref)
return ((start,tok,ans,end):cohs)
filterBest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)]
filterBest ans =
reverse (iterate (maxBound :: Int) [(0,0,[],ans)] [] [])
where
iterate v0 [] [] res = res
iterate v0 [] new res = iterate v0 new [] res
iterate v0 ((_,v,conf, []):old) new res =
case compare v0 v of
LT -> res
EQ -> iterate v0 old new (merge conf res)
GT -> iterate v old new conf
iterate v0 ((_,v,conf,an:ans):old) new res = iterate v0 old (insert (v+valueOf an) conf an ans [] new) res
valueOf (_,_,[],_) = 2
valueOf _ = 1
insert v conf an@(start,_,_,end) ans l_new [] =
match start v conf ans ((end,v,comb conf an,filter end ans):l_new) []
insert v conf an@(start,_,_,end) ans l_new (new@(end0,v0,conf0,ans0):r_new) =
case compare end0 end of
LT -> insert v conf an ans (new:l_new) r_new
EQ -> case compare v0 v of
LT -> match start v conf ans ((end,v, conf0,ans0): l_new) r_new
EQ -> match start v conf ans ((end,v,merge (comb conf an) conf0,ans0): l_new) r_new
GT -> match start v conf ans ((end,v,comb conf an, ans0): l_new) r_new
GT -> match start v conf ans ((end,v,comb conf an, filter end ans):new:l_new) r_new
match start0 v conf (an@(start,_,_,end):ans) l_new r_new
| start0 == start = insert v conf an ans l_new r_new
match start0 v conf ans l_new r_new = revOn l_new r_new
comb ((start0,w0,an0,end0):conf) (start,w,an,end)
| end0 == start && (unk w0 an0 || unk w an) = (start0,w0++w,[],end):conf
comb conf an = an:conf
filter end [] = []
filter end (next@(start,_,_,_):ans)
| end <= start = next:ans
| otherwise = filter end ans
revOn [] ys = ys
revOn (x:xs) ys = revOn xs (x:ys)
merge [] ans = ans
merge ans [] = ans
merge (an1@(start1,_,_,end1):ans1) (an2@(start2,_,_,end2):ans2) =
case compare (start1,end1) (start2,end2) of
GT -> an1 : merge ans1 (an2:ans2)
EQ -> an1 : merge ans1 ans2
LT -> an2 : merge (an1:ans1) ans2
filterLongest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)]
filterLongest [] = []
filterLongest (an:ans) = longest an ans
where
longest prev [] = [prev]
longest prev@(start0,_,_,end0) (next@(start,_,_,end):ans)
| start0 == start = longest next ans
| otherwise = filter prev (next:ans)
filter prev [] = [prev]
filter prev@(start0,w0,an0,end0) (next@(start,w,an,end):ans)
| end0 == start && (unk w0 an0 || unk w an)
= filter (start0,w0++w,[],end) ans
| end0 <= start = prev : longest next ans
| otherwise = filter prev ans
unk w [] | any (not . isPunctuation) w = True
unk _ _ = False
fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
fullFormLexicon lang =
@@ -1020,32 +1094,32 @@ getAnalysis ref self c_lemma c_anal prob exn = do
writeIORef ref ((lemma, anal, prob):ans)
-- | This data type encodes the different outcomes which you could get from the parser.
data ParseOutput
data ParseOutput a
= ParseFailed Int String -- ^ The integer is the position in number of unicode characters where the parser failed.
-- The string is the token where the parser have failed.
| ParseOk [(Expr,Float)] -- ^ If the parsing and the type checking are successful we get a list of abstract syntax trees.
-- The list should be non-empty.
| ParseOk a -- ^ If the parsing and the type checking are successful
-- we get the abstract syntax trees as either a list or a chart.
| ParseIncomplete -- ^ The sentence is not complete.
parse :: Concr -> Type -> String -> ParseOutput
parse :: Concr -> Type -> String -> ParseOutput [(Expr,Float)]
parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
parseWithHeuristics :: Concr -- ^ the language with which we parse
-> Type -- ^ the start category
-> String -- ^ the input sentence
-> Double -- ^ the heuristic factor.
-- A negative value tells the parser
-- to lookup up the default from
-> Double -- ^ the heuristic factor.
-- A negative value tells the parser
-- to lookup up the default from
-- the grammar flags
-> [(Cat, Int -> Int -> Maybe (Expr,Float,Int))]
-> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
-- ^ a list of callbacks for literal categories.
-- The arguments of the callback are:
-- the index of the constituent for the literal category;
-- the input sentence; the current offset in the sentence.
-- If a literal has been recognized then the output should
-- be Just (expr,probability,end_offset)
-> ParseOutput
parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
-> ParseOutput [(Expr,Float)]
parseWithHeuristics lang (Type ctype touchType) sent heuristic callbacks =
unsafePerformIO $
do exprPl <- gu_new_pool
parsePl <- gu_new_pool
@@ -1085,7 +1159,137 @@ parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl)
return (ParseOk exprs)
mkCallbacksMap :: Ptr PgfConcr -> [(String, Int -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap)
parseToChart :: Concr -- ^ the language with which we parse
-> Type -- ^ the start category
-> String -- ^ the input sentence
-> Double -- ^ the heuristic factor.
-- A negative value tells the parser
-- to lookup up the default from
-- the grammar flags
-> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
-- ^ a list of callbacks for literal categories.
-- The arguments of the callback are:
-- the index of the constituent for the literal category;
-- the input sentence; the current offset in the sentence.
-- If a literal has been recognized then the output should
-- be Just (expr,probability,end_offset)
-> Int -- ^ the maximal number of roots
-> ParseOutput ([FId],Map.Map FId ([(Int,Int,String)],[(Expr,[PArg],Float)],Cat))
parseToChart lang (Type ctype touchType) sent heuristic callbacks roots =
unsafePerformIO $
withGuPool $ \parsePl -> do
do exn <- gu_new_exn parsePl
sent <- newUtf8CString sent parsePl
callbacks_map <- mkCallbacksMap (concr lang) callbacks parsePl
ps <- pgf_parse_to_chart (concr lang) ctype sent heuristic callbacks_map (fromIntegral roots) exn parsePl parsePl
touchType
failed <- gu_exn_is_raised exn
if failed
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
if is_parse_error
then do c_err <- (#peek GuExn, data.data) exn
c_incomplete <- (#peek PgfParseError, incomplete) c_err
if (c_incomplete :: CInt) == 0
then do c_offset <- (#peek PgfParseError, offset) c_err
token_ptr <- (#peek PgfParseError, token_ptr) c_err
token_len <- (#peek PgfParseError, token_len) c_err
tok <- peekUtf8CStringLen token_ptr token_len
touchConcr lang
return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
else do touchConcr lang
return ParseIncomplete
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- (#peek GuExn, data.data) exn
msg <- peekUtf8CString c_msg
touchConcr lang
throwIO (PGFError msg)
else do touchConcr lang
throwIO (PGFError "Parsing failed")
else do c_roots <- pgf_get_parse_roots ps parsePl
let get_range c_ccat = pgf_ccat_to_range ps c_ccat parsePl
c_len <- (#peek GuSeq, len) c_roots
chart <- peekCCats get_range Map.empty (c_len :: CSizeT) (c_roots `plusPtr` (#offset GuSeq, data))
touchConcr lang
return (ParseOk chart)
where
peekCCats get_range chart 0 ptr = return ([],chart)
peekCCats get_range chart len ptr = do
(root, chart) <- deRef (peekCCat get_range chart) ptr
(roots,chart) <- peekCCats get_range chart (len-1) (ptr `plusPtr` (#size PgfCCat*))
return (root:roots,chart)
peekCCat get_range chart c_ccat = do
fid <- peekFId c_ccat
c_total_cats <- (#peek PgfConcr, total_cats) (concr lang)
if Map.member fid chart || fid < c_total_cats
then return (fid,chart)
else do c_cnccat <- (#peek PgfCCat, cnccat) c_ccat
c_abscat <- (#peek PgfCCat, cnccat) c_cnccat
c_name <- (#peek PgfCCat, cnccat) c_abscat
cat <- peekUtf8CString c_name
range <- get_range c_ccat >>= peekSequence peekRange (#size PgfParseRange)
c_prods <- (#peek PgfCCat, prods) c_ccat
if c_prods == nullPtr
then do return (fid,Map.insert fid (range,[],cat) chart)
else do c_len <- (#peek PgfCCat, n_synprods) c_ccat
(prods,chart) <- fixIO (\res -> peekProductions (Map.insert fid (range,fst res,cat) chart)
(fromIntegral (c_len :: CSizeT))
(c_prods `plusPtr` (#offset GuSeq, data)))
return (fid,chart)
where
peekProductions chart 0 ptr = return ([],chart)
peekProductions chart len ptr = do
(ps1,chart) <- deRef (peekProduction chart) ptr
(ps2,chart) <- peekProductions chart (len-1) (ptr `plusPtr` (#size GuVariant))
return (ps1++ps2,chart)
peekProduction chart p = do
tag <- gu_variant_tag p
dt <- gu_variant_data p
case tag of
(#const PGF_PRODUCTION_APPLY) -> do { c_cncfun <- (#peek PgfProductionApply, fun) dt ;
c_absfun <- (#peek PgfCncFun, absfun) c_cncfun ;
expr <- (#peek PgfAbsFun, ep.expr) c_absfun ;
p <- (#peek PgfAbsFun, ep.prob) c_absfun ;
c_args <- (#peek PgfProductionApply, args) dt ;
c_len <- (#peek GuSeq, len) c_args ;
(pargs,chart) <- peekPArgs chart (c_len :: CSizeT) (c_args `plusPtr` (#offset GuSeq, data)) ;
return ([(Expr expr (touchConcr lang), pargs, p)],chart) }
(#const PGF_PRODUCTION_COERCE) -> do { c_coerce <- (#peek PgfProductionCoerce, coerce) dt ;
(fid,chart) <- peekCCat get_range chart c_coerce ;
return (maybe [] snd3 (Map.lookup fid chart),chart) }
(#const PGF_PRODUCTION_EXTERN) -> do { c_ep <- (#peek PgfProductionExtern, ep) dt ;
expr <- (#peek PgfExprProb, expr) c_ep ;
p <- (#peek PgfExprProb, prob) c_ep ;
return ([(Expr expr (touchConcr lang), [], p)],chart) }
_ -> error ("Unknown production type "++show tag++" in the grammar")
snd3 (_,x,_) = x
peekPArgs chart 0 ptr = return ([],chart)
peekPArgs chart len ptr = do
(a, chart) <- peekPArg chart ptr
(as,chart) <- peekPArgs chart (len-1) (ptr `plusPtr` (#size PgfPArg))
return (a:as,chart)
peekPArg chart ptr = do
c_hypos <- (#peek PgfPArg, hypos) ptr
hypos <- if c_hypos /= nullPtr
then do res <- peekSequence (deRef peekFId) (#size int) c_hypos
return [(fid,fid) | fid <- res]
else return []
c_ccat <- (#peek PgfPArg, ccat) ptr
(fid,chart) <- peekCCat get_range chart c_ccat
return (PArg hypos fid,chart)
peekRange ptr = do
s <- (#peek PgfParseRange, start) ptr
e <- (#peek PgfParseRange, end) ptr
f <- (#peek PgfParseRange, field) ptr >>= peekCString
return ((fromIntegral :: CSizeT -> Int) s, (fromIntegral :: CSizeT -> Int) e, f)
mkCallbacksMap :: Ptr PgfConcr -> [(String, String -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap)
mkCallbacksMap concr callbacks pool = do
callbacks_map <- pgf_new_callbacks_map concr pool
forM_ callbacks $ \(cat,match) -> do
@@ -1095,23 +1299,15 @@ mkCallbacksMap concr callbacks pool = do
hspgf_callbacks_map_add_literal concr callbacks_map ccat match predict pool
return callbacks_map
where
match_callback match clin_idx poffset out_pool = do
match_callback match c_ann poffset out_pool = do
coffset <- peek poffset
case match (fromIntegral clin_idx) (fromIntegral coffset) of
ann <- peekUtf8CString c_ann
case match ann (fromIntegral coffset) of
Nothing -> return nullPtr
Just (e,prob,offset') -> do poke poffset (fromIntegral offset')
-- here we copy the expression to out_pool
c_e <- withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
(sb,out) <- newOut tmpPl
let printCtxt = nullPtr
pgf_print_expr (expr e) printCtxt 1 out exn
c_str <- gu_string_buf_freeze sb tmpPl
guin <- gu_string_in c_str tmpPl
pgf_read_expr guin out_pool tmpPl exn
c_e <- pgf_clone_expr (expr e) out_pool
ep <- gu_malloc out_pool (#size PgfExprProb)
(#poke PgfExprProb, expr) ep c_e
@@ -1120,26 +1316,6 @@ mkCallbacksMap concr callbacks pool = do
predict_callback _ _ _ = return nullPtr
complete :: Concr -- ^ the language with which we do word completion
-> Type -- ^ the start category
-> String -- ^ the input sentence
-> String -- ^ prefix for the word to be completed
-> [(String, Cat, Fun, Float)]
complete lang (Type ctype _) sent prefix =
unsafePerformIO $
do pl <- gu_new_pool
exn <- gu_new_exn pl
sent <- newUtf8CString sent pl
prefix <- newUtf8CString prefix pl
enum <- pgf_complete (concr lang) ctype sent prefix exn pl
failed <- gu_exn_is_raised exn
if failed
then do gu_pool_free pl
return []
else do fpl <- newForeignPtr gu_pool_finalizer pl
tokens <- fromPgfTokenEnum enum fpl
return tokens
lookupSentence :: Concr -- ^ the language with which we parse
-> Type -- ^ the start category
-> String -- ^ the input sentence
@@ -1158,7 +1334,7 @@ lookupSentence lang (Type ctype _) sent =
-- | The oracle is a triple of functions.
-- The first two take a category name and a linearization field name
-- and they should return True/False when the corresponding
-- and they should return True/False when the corresponding
-- prediction or completion is appropriate. The third function
-- is the oracle for literals.
type Oracle = (Maybe (Cat -> String -> Int -> Bool)
@@ -1170,7 +1346,7 @@ parseWithOracle :: Concr -- ^ the language with which we parse
-> Cat -- ^ the start category
-> String -- ^ the input sentence
-> Oracle
-> ParseOutput
-> ParseOutput [(Expr,Float)]
parseWithOracle lang cat sent (predict,complete,literal) =
unsafePerformIO $
do parsePl <- gu_new_pool
@@ -1246,6 +1422,67 @@ parseWithOracle lang cat sent (predict,complete,literal) =
return ep
Nothing -> do return nullPtr
-- | Returns possible completions of the current partial input.
complete :: Concr -- ^ the language with which we parse
-> Type -- ^ the start category
-> String -- ^ the input sentence (excluding token being completed)
-> String -- ^ prefix (partial token being completed)
-> ParseOutput [(String, Fun, Cat, Float)] -- ^ (token, category, function, probability)
complete lang (Type ctype _) sent pfx =
unsafePerformIO $ do
parsePl <- gu_new_pool
exn <- gu_new_exn parsePl
sent <- newUtf8CString sent parsePl
pfx <- newUtf8CString pfx parsePl
enum <- pgf_complete (concr lang) ctype sent pfx exn parsePl
failed <- gu_exn_is_raised exn
if failed
then do
is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
if is_parse_error
then do
c_err <- (#peek GuExn, data.data) exn
c_offset <- (#peek PgfParseError, offset) c_err
token_ptr <- (#peek PgfParseError, token_ptr) c_err
token_len <- (#peek PgfParseError, token_len) c_err
tok <- peekUtf8CStringLen token_ptr token_len
gu_pool_free parsePl
return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
else do
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do
c_msg <- (#peek GuExn, data.data) exn
msg <- peekUtf8CString c_msg
gu_pool_free parsePl
throwIO (PGFError msg)
else do
gu_pool_free parsePl
throwIO (PGFError "Parsing failed")
else do
fpl <- newForeignPtr gu_pool_finalizer parsePl
ParseOk <$> fromCompletions enum fpl
where
fromCompletions :: Ptr GuEnum -> ForeignPtr GuPool -> IO [(String, Cat, Fun, Float)]
fromCompletions enum fpl =
withGuPool $ \tmpPl -> do
cmpEntry <- alloca $ \ptr ->
withForeignPtr fpl $ \pl ->
do gu_enum_next enum ptr pl
peek ptr
if cmpEntry == nullPtr
then do
finalizeForeignPtr fpl
touchConcr lang
return []
else do
tok <- peekUtf8CString =<< (#peek PgfTokenProb, tok) cmpEntry
cat <- peekUtf8CString =<< (#peek PgfTokenProb, cat) cmpEntry
fun <- peekUtf8CString =<< (#peek PgfTokenProb, fun) cmpEntry
prob <- (#peek PgfTokenProb, prob) cmpEntry
toks <- unsafeInterleaveIO (fromCompletions enum fpl)
return ((tok, cat, fun, prob) : toks)
-- | Returns True if there is a linearization defined for that function in that language
hasLinearization :: Concr -> Fun -> Bool
hasLinearization lang id = unsafePerformIO $
@@ -1319,7 +1556,7 @@ linearizeAll lang e = unsafePerformIO $
-- | Generates a table of linearizations for an expression
tabularLinearize :: Concr -> Expr -> [(String, String)]
tabularLinearize lang e =
tabularLinearize lang e =
case tabularLinearizeAll lang e of
(lins:_) -> lins
_ -> []
@@ -1331,6 +1568,7 @@ tabularLinearizeAll lang e = unsafePerformIO $
exn <- gu_new_exn tmpPl
cts <- pgf_lzr_concretize (concr lang) (expr e) exn tmpPl
failed <- gu_exn_is_raised exn
touchConcr lang
if failed
then throwExn exn
else collect cts exn tmpPl
@@ -1368,45 +1606,58 @@ tabularLinearizeAll lang e = unsafePerformIO $
ss <- collectTable lang ctree (lin_idx+1) labels exn tmpPl
return ((label,s):ss)
throwExn exn = do
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- (#peek GuExn, data.data) exn
msg <- peekUtf8CString c_msg
throwIO (PGFError msg)
else do throwIO (PGFError "The abstract tree cannot be linearized")
categoryFields :: Concr -> Cat -> Maybe [String]
categoryFields lang cat =
unsafePerformIO $ do
withGuPool $ \tmpPl -> do
p_n_lins <- gu_malloc tmpPl (#size size_t)
c_cat <- newUtf8CString cat tmpPl
c_fields <- pgf_category_fields (concr lang) c_cat p_n_lins
if c_fields == nullPtr
then do touchConcr lang
return Nothing
else do len <- peek p_n_lins
fs <- peekFields len c_fields
touchConcr lang
return (Just fs)
where
peekFields 0 ptr = return []
peekFields len ptr = do
f <- peek ptr >>= peekUtf8CString
fs <- peekFields (len-1) (ptr `plusPtr` (#size GuString))
return (f:fs)
type FId = Int
type LIndex = Int
-- | BracketedString represents a sentence that is linearized
-- as usual but we also want to retain the ''brackets'' that
-- mark the beginning and the end of each constituent.
data BracketedString
= Leaf String -- ^ this is the leaf i.e. a single token
| Bracket Cat {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex Fun [BracketedString]
| BIND -- ^ the surrounding tokens must be bound together
| Bracket Cat {-# UNPACK #-} !FId String Fun [BracketedString]
-- ^ this is a bracket. The 'Cat' is the category of
-- the phrase. The 'FId' is an unique identifier for
-- every phrase in the sentence. For context-free grammars
-- i.e. without discontinuous constituents this identifier
-- is also unique for every bracket. When there are discontinuous
-- is also unique for every bracket. When there are discontinuous
-- phrases then the identifiers are unique for every phrase but
-- not for every bracket since the bracket represents a constituent.
-- The different constituents could still be distinguished by using
-- the constituent index i.e. 'LIndex'. If the grammar is reduplicating
-- the analysis string. If the grammar is reduplicating
-- then the constituent indices will be the same for all brackets
-- that represents the same constituent.
-- The 'Fun' is the name of the abstract function that generated
-- this phrase.
-- | Renders the bracketed string as a string where
-- | Renders the bracketed string as a string where
-- the brackets are shown as @(S ...)@ where
-- @S@ is the category.
showBracketedString :: BracketedString -> String
showBracketedString = render . ppBracketedString
ppBracketedString (Leaf t) = text t
ppBracketedString (Bracket cat fid index _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
ppBracketedString BIND = text "&+"
ppBracketedString (Bracket cat fid _ _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
-- | Extracts the sequence of tokens from the bracketed string
flattenBracketedString :: BracketedString -> [String]
@@ -1415,7 +1666,7 @@ flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString
bracketedLinearize :: Concr -> Expr -> [BracketedString]
bracketedLinearize lang e = unsafePerformIO $
withGuPool $ \pl ->
withGuPool $ \pl ->
do exn <- gu_new_exn pl
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
failed <- gu_exn_is_raised exn
@@ -1428,27 +1679,8 @@ bracketedLinearize lang e = unsafePerformIO $
return []
else do ctree <- pgf_lzr_wrap_linref ctree pl
ref <- newIORef ([],[])
allocaBytes (#size PgfLinFuncs) $ \pLinFuncs ->
alloca $ \ppLinFuncs -> do
fptr_symbol_token <- wrapSymbolTokenCallback (symbol_token ref)
fptr_begin_phrase <- wrapPhraseCallback (begin_phrase ref)
fptr_end_phrase <- wrapPhraseCallback (end_phrase ref)
fptr_symbol_ne <- wrapSymbolNonExistCallback (symbol_ne exn)
fptr_symbol_meta <- wrapSymbolMetaCallback (symbol_meta ref)
(#poke PgfLinFuncs, symbol_token) pLinFuncs fptr_symbol_token
(#poke PgfLinFuncs, begin_phrase) pLinFuncs fptr_begin_phrase
(#poke PgfLinFuncs, end_phrase) pLinFuncs fptr_end_phrase
(#poke PgfLinFuncs, symbol_ne) pLinFuncs fptr_symbol_ne
(#poke PgfLinFuncs, symbol_bind) pLinFuncs nullPtr
(#poke PgfLinFuncs, symbol_capit) pLinFuncs nullPtr
(#poke PgfLinFuncs, symbol_meta) pLinFuncs fptr_symbol_meta
poke ppLinFuncs pLinFuncs
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
freeHaskellFunPtr fptr_symbol_token
freeHaskellFunPtr fptr_begin_phrase
freeHaskellFunPtr fptr_end_phrase
freeHaskellFunPtr fptr_symbol_ne
freeHaskellFunPtr fptr_symbol_meta
withBracketLinFuncs ref exn $ \ppLinFuncs ->
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
failed <- gu_exn_is_raised exn
if failed
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
@@ -1457,41 +1689,105 @@ bracketedLinearize lang e = unsafePerformIO $
else throwExn exn
else do (_,bs) <- readIORef ref
return (reverse bs)
bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]]
bracketedLinearizeAll lang e = unsafePerformIO $
withGuPool $ \pl ->
do exn <- gu_new_exn pl
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
failed <- gu_exn_is_raised exn
if failed
then do touchExpr e
throwExn exn
else do ref <- newIORef ([],[])
bss <- withBracketLinFuncs ref exn $ \ppLinFuncs ->
collect ref cts ppLinFuncs exn pl
touchExpr e
return bss
where
collect ref cts ppLinFuncs exn pl = withGuPool $ \tmpPl -> do
ctree <- alloca $ \ptr -> do gu_enum_next cts ptr tmpPl
peek ptr
if ctree == nullPtr
then return []
else do ctree <- pgf_lzr_wrap_linref ctree pl
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
failed <- gu_exn_is_raised exn
if failed
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
if is_nonexist
then collect ref cts ppLinFuncs exn pl
else throwExn exn
else do (_,bs) <- readIORef ref
writeIORef ref ([],[])
bss <- collect ref cts ppLinFuncs exn pl
return (reverse bs : bss)
withBracketLinFuncs ref exn f =
allocaBytes (#size PgfLinFuncs) $ \pLinFuncs ->
alloca $ \ppLinFuncs -> do
fptr_symbol_token <- wrapSymbolTokenCallback (symbol_token ref)
fptr_begin_phrase <- wrapPhraseCallback (begin_phrase ref)
fptr_end_phrase <- wrapPhraseCallback (end_phrase ref)
fptr_symbol_ne <- wrapSymbolNonExistCallback (symbol_ne exn)
fptr_symbol_bind <- wrapSymbolBindCallback (symbol_bind ref)
fptr_symbol_meta <- wrapSymbolMetaCallback (symbol_meta ref)
(#poke PgfLinFuncs, symbol_token) pLinFuncs fptr_symbol_token
(#poke PgfLinFuncs, begin_phrase) pLinFuncs fptr_begin_phrase
(#poke PgfLinFuncs, end_phrase) pLinFuncs fptr_end_phrase
(#poke PgfLinFuncs, symbol_ne) pLinFuncs fptr_symbol_ne
(#poke PgfLinFuncs, symbol_bind) pLinFuncs fptr_symbol_bind
(#poke PgfLinFuncs, symbol_capit) pLinFuncs nullPtr
(#poke PgfLinFuncs, symbol_meta) pLinFuncs fptr_symbol_meta
poke ppLinFuncs pLinFuncs
res <- f ppLinFuncs
freeHaskellFunPtr fptr_symbol_token
freeHaskellFunPtr fptr_begin_phrase
freeHaskellFunPtr fptr_end_phrase
freeHaskellFunPtr fptr_symbol_ne
freeHaskellFunPtr fptr_symbol_bind
freeHaskellFunPtr fptr_symbol_meta
return res
where
symbol_token ref _ c_token = do
(stack,bs) <- readIORef ref
token <- peekUtf8CString c_token
writeIORef ref (stack,Leaf token : bs)
begin_phrase ref _ c_cat c_fid c_lindex c_fun = do
begin_phrase ref _ c_cat c_fid c_ann c_fun = do
(stack,bs) <- readIORef ref
writeIORef ref (bs:stack,[])
end_phrase ref _ c_cat c_fid c_lindex c_fun = do
end_phrase ref _ c_cat c_fid c_ann c_fun = do
(bs':stack,bs) <- readIORef ref
if null bs
then writeIORef ref (stack, bs')
else do cat <- peekUtf8CString c_cat
let fid = fromIntegral c_fid
let lindex = fromIntegral c_lindex
ann <- peekUtf8CString c_ann
fun <- peekUtf8CString c_fun
writeIORef ref (stack, Bracket cat fid lindex fun (reverse bs) : bs')
writeIORef ref (stack, Bracket cat fid ann fun (reverse bs) : bs')
symbol_ne exn _ = do
gu_exn_raise exn gu_exn_type_PgfLinNonExist
return ()
symbol_bind ref _ = do
(stack,bs) <- readIORef ref
writeIORef ref (stack,BIND : bs)
return ()
symbol_meta ref _ meta_id = do
(stack,bs) <- readIORef ref
writeIORef ref (stack,Leaf "?" : bs)
throwExn exn = do
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- (#peek GuExn, data.data) exn
msg <- peekUtf8CString c_msg
throwIO (PGFError msg)
else do throwIO (PGFError "The abstract tree cannot be linearized")
throwExn exn = do
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do c_msg <- (#peek GuExn, data.data) exn
msg <- peekUtf8CString c_msg
throwIO (PGFError msg)
else do throwIO (PGFError "The abstract tree cannot be linearized")
alignWords :: Concr -> Expr -> [(String, [Int])]
alignWords lang e = unsafePerformIO $
@@ -1684,13 +1980,13 @@ instance Exception PGFError
-----------------------------------------------------------------------
type LiteralCallback =
PGF -> (ConcName,Concr) -> String -> Int -> Int -> Maybe (Expr,Float,Int)
PGF -> (ConcName,Concr) -> String -> String -> Int -> Maybe (Expr,Float,Int)
-- | Callbacks for the App grammar
literalCallbacks :: [(AbsName,[(Cat,LiteralCallback)])]
literalCallbacks = [("App",[("PN",nerc),("Symb",chunk)])]
-- | Named entity recognition for the App grammar
-- | Named entity recognition for the App grammar
-- (based on ../java/org/grammaticalframework/pgf/NercLiteralCallback.java)
nerc :: LiteralCallback
nerc pgf (lang,concr) sentence lin_idx offset =

View File

@@ -13,7 +13,7 @@ import Data.Maybe(fromJust)
type Cat = String -- ^ Name of syntactic category
type Fun = String -- ^ Name of function
data BindType =
data BindType =
Explicit
| Implicit
deriving (Show, Eq, Ord)
@@ -32,7 +32,7 @@ instance Show Expr where
show = showExpr []
instance Eq Expr where
(Expr e1 e1_touch) == (Expr e2 e2_touch) =
(Expr e1 e1_touch) == (Expr e2 e2_touch) =
unsafePerformIO $ do
res <- pgf_expr_eq e1 e2
e1_touch >> e2_touch
@@ -107,9 +107,9 @@ unApp (Expr expr touch) =
appl <- pgf_expr_unapply expr pl
if appl == nullPtr
then return Nothing
else do
else do
fun <- peekCString =<< (#peek PgfApplication, fun) appl
arity <- (#peek PgfApplication, n_args) appl :: IO CInt
arity <- (#peek PgfApplication, n_args) appl :: IO CInt
c_args <- peekArray (fromIntegral arity) (appl `plusPtr` (#offset PgfApplication, args))
return $ Just (fun, [Expr c_arg touch | c_arg <- c_args])
@@ -145,7 +145,9 @@ unStr (Expr expr touch) =
touch
return (Just s)
-- | Constructs an expression from an integer literal
-- | Constructs an expression from an integer literal.
-- Note that the C runtime does not support long integers, and you may run into overflow issues with large values.
-- See [here](https://github.com/GrammaticalFramework/gf-core/issues/109) for more details.
mkInt :: Int -> Expr
mkInt val =
unsafePerformIO $ do

View File

@@ -6,6 +6,7 @@ module PGF2.FFI where
#include <gu/hash.h>
#include <gu/utf8.h>
#include <pgf/pgf.h>
#include <pgf/data.h>
import Foreign ( alloca, peek, poke, peekByteOff )
import Foreign.C
@@ -102,7 +103,7 @@ foreign import ccall unsafe "gu/file.h gu_file_in"
foreign import ccall safe "gu/enum.h gu_enum_next"
gu_enum_next :: Ptr a -> Ptr (Ptr b) -> Ptr GuPool -> IO ()
foreign import ccall unsafe "gu/string.h gu_string_buf_freeze"
gu_string_buf_freeze :: Ptr GuStringBuf -> Ptr GuPool -> IO CString
@@ -237,6 +238,16 @@ newSequence elem_size pokeElem values pool = do
pokeElem ptr x
pokeElems (ptr `plusPtr` (fromIntegral elem_size)) xs
type FId = Int
data PArg = PArg [(FId,FId)] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
peekFId :: Ptr a -> IO FId
peekFId c_ccat = do
c_fid <- (#peek PgfCCat, fid) c_ccat
return (fromIntegral (c_fid :: CInt))
deRef peekValue ptr = peek ptr >>= peekValue
------------------------------------------------------------------
-- libpgf API
@@ -245,6 +256,7 @@ data PgfApplication
data PgfConcr
type PgfExpr = Ptr ()
data PgfExprProb
data PgfTokenProb
data PgfExprParser
data PgfFullFormEntry
data PgfMorphoCallback
@@ -261,6 +273,7 @@ data PgfAbsCat
data PgfCCat
data PgfCncFun
data PgfProductionApply
data PgfParsing
foreign import ccall "pgf/pgf.h pgf_read"
pgf_read :: CString -> Ptr GuPool -> Ptr GuExn -> IO (Ptr PgfPGF)
@@ -310,6 +323,9 @@ foreign import ccall "pgf/pgf.h pgf_category_context"
foreign import ccall "pgf/pgf.h pgf_category_prob"
pgf_category_prob :: Ptr PgfPGF -> CString -> IO (#type prob_t)
foreign import ccall "pgf/pgf.h pgf_category_fields"
pgf_category_fields :: Ptr PgfConcr -> CString -> Ptr CSize -> IO (Ptr CString)
foreign import ccall "pgf/pgf.h pgf_iter_functions"
pgf_iter_functions :: Ptr PgfPGF -> Ptr GuMapItor -> Ptr GuExn -> IO ()
@@ -347,8 +363,9 @@ foreign import ccall "pgf/pgf.h pgf_lzr_get_table"
pgf_lzr_get_table :: Ptr PgfConcr -> Ptr PgfCncTree -> Ptr CSizeT -> Ptr (Ptr CString) -> IO ()
type SymbolTokenCallback = Ptr (Ptr PgfLinFuncs) -> CString -> IO ()
type PhraseCallback = Ptr (Ptr PgfLinFuncs) -> CString -> CInt -> CSizeT -> CString -> IO ()
type PhraseCallback = Ptr (Ptr PgfLinFuncs) -> CString -> CInt -> CString -> CString -> IO ()
type NonExistCallback = Ptr (Ptr PgfLinFuncs) -> IO ()
type BindCallback = Ptr (Ptr PgfLinFuncs) -> IO ()
type MetaCallback = Ptr (Ptr PgfLinFuncs) -> CInt -> IO ()
foreign import ccall "wrapper"
@@ -360,27 +377,36 @@ foreign import ccall "wrapper"
foreign import ccall "wrapper"
wrapSymbolNonExistCallback :: NonExistCallback -> IO (FunPtr NonExistCallback)
foreign import ccall "wrapper"
wrapSymbolBindCallback :: BindCallback -> IO (FunPtr BindCallback)
foreign import ccall "wrapper"
wrapSymbolMetaCallback :: MetaCallback -> IO (FunPtr MetaCallback)
foreign import ccall "pgf/pgf.h pgf_align_words"
pgf_align_words :: Ptr PgfConcr -> PgfExpr -> Ptr GuExn -> Ptr GuPool -> IO (Ptr GuSeq)
foreign import ccall "pgf/pgf.h pgf_parse_to_chart"
pgf_parse_to_chart :: Ptr PgfConcr -> PgfType -> CString -> Double -> Ptr PgfCallbacksMap -> CSizeT -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr PgfParsing)
foreign import ccall "pgf/pgf.h pgf_get_parse_roots"
pgf_get_parse_roots :: Ptr PgfParsing -> Ptr GuPool -> IO (Ptr GuSeq)
foreign import ccall "pgf/pgf.h pgf_ccat_to_range"
pgf_ccat_to_range :: Ptr PgfParsing -> Ptr PgfCCat -> Ptr GuPool -> IO (Ptr GuSeq)
foreign import ccall "pgf/pgf.h pgf_parse_with_heuristics"
pgf_parse_with_heuristics :: Ptr PgfConcr -> PgfType -> CString -> Double -> Ptr PgfCallbacksMap -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
foreign import ccall "pgf/pgf.h pgf_complete"
pgf_complete :: Ptr PgfConcr -> PgfType -> CString -> CString -> Ptr GuExn -> Ptr GuPool -> IO (Ptr GuEnum)
foreign import ccall "pgf/pgf.h pgf_lookup_sentence"
pgf_lookup_sentence :: Ptr PgfConcr -> PgfType -> CString -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
type LiteralMatchCallback = CSizeT -> Ptr CSizeT -> Ptr GuPool -> IO (Ptr PgfExprProb)
type LiteralMatchCallback = CString -> Ptr CSizeT -> Ptr GuPool -> IO (Ptr PgfExprProb)
foreign import ccall "wrapper"
wrapLiteralMatchCallback :: LiteralMatchCallback -> IO (FunPtr LiteralMatchCallback)
type LiteralPredictCallback = CSizeT -> CString -> Ptr GuPool -> IO (Ptr PgfExprProb)
type LiteralPredictCallback = CString -> CString -> Ptr GuPool -> IO (Ptr PgfExprProb)
foreign import ccall "wrapper"
wrapLiteralPredictCallback :: LiteralPredictCallback -> IO (FunPtr LiteralPredictCallback)
@@ -406,6 +432,9 @@ foreign import ccall
foreign import ccall "pgf/pgf.h pgf_parse_with_oracle"
pgf_parse_with_oracle :: Ptr PgfConcr -> CString -> CString -> Ptr PgfOracleCallback -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
foreign import ccall "pgf/pgf.h pgf_complete"
pgf_complete :: Ptr PgfConcr -> PgfType -> CString -> CString -> Ptr GuExn -> Ptr GuPool -> IO (Ptr GuEnum)
foreign import ccall "pgf/pgf.h pgf_lookup_morpho"
pgf_lookup_morpho :: Ptr PgfConcr -> CString -> Ptr PgfMorphoCallback -> Ptr GuExn -> IO ()
@@ -500,9 +529,6 @@ foreign import ccall "pgf/expr.h pgf_compute"
foreign import ccall "pgf/expr.h pgf_print_expr"
pgf_print_expr :: PgfExpr -> Ptr PgfPrintContext -> CInt -> Ptr GuOut -> Ptr GuExn -> IO ()
foreign import ccall "pgf/expr.h pgf_print_expr_tuple"
pgf_print_expr_tuple :: CSizeT -> Ptr PgfExpr -> Ptr PgfPrintContext -> Ptr GuOut -> Ptr GuExn -> IO ()
foreign import ccall "pgf/expr.h pgf_print_type"
pgf_print_type :: PgfType -> Ptr PgfPrintContext -> CInt -> Ptr GuOut -> Ptr GuExn -> IO ()
@@ -518,12 +544,6 @@ foreign import ccall "pgf/pgf.h pgf_print"
foreign import ccall "pgf/expr.h pgf_read_expr"
pgf_read_expr :: Ptr GuIn -> Ptr GuPool -> Ptr GuPool -> Ptr GuExn -> IO PgfExpr
foreign import ccall "pgf/expr.h pgf_read_expr_tuple"
pgf_read_expr_tuple :: Ptr GuIn -> CSizeT -> Ptr PgfExpr -> Ptr GuPool -> Ptr GuExn -> IO CInt
foreign import ccall "pgf/expr.h pgf_read_expr_matrix"
pgf_read_expr_matrix :: Ptr GuIn -> CSizeT -> Ptr GuPool -> Ptr GuExn -> IO (Ptr GuSeq)
foreign import ccall "pgf/expr.h pgf_read_type"
pgf_read_type :: Ptr GuIn -> Ptr GuPool -> Ptr GuPool -> Ptr GuExn -> IO PgfType
@@ -544,3 +564,6 @@ foreign import ccall "pgf/data.h pgf_lzr_index"
foreign import ccall "pgf/data.h pgf_production_is_lexical"
pgf_production_is_lexical :: Ptr PgfProductionApply -> Ptr GuBuf -> Ptr GuPool -> IO (#type bool)
foreign import ccall "pgf/expr.h pgf_clone_expr"
pgf_clone_expr :: PgfExpr -> Ptr GuPool -> IO PgfExpr

View File

@@ -2,7 +2,7 @@
module PGF2.Internal(-- * Access the internal structures
FId,isPredefFId,
FunId,SeqId,Token,Production(..),PArg(..),Symbol(..),Literal(..),
FunId,SeqId,LIndex,Token,Production(..),PArg(..),Symbol(..),Literal(..),
globalFlags, abstrFlags, concrFlags,
concrTotalCats, concrCategories, concrProductions,
concrTotalFuns, concrFunction,
@@ -42,7 +42,8 @@ import Control.Exception(Exception,throwIO)
import Control.Monad(foldM,when)
import qualified Data.Map as Map
type Token = String
type Token = String
type LIndex = Int
data Symbol
= SymCat {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex
| SymLit {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex
@@ -60,7 +61,7 @@ data Production
= PApply {-# UNPACK #-} !FunId [PArg]
| PCoerce {-# UNPACK #-} !FId
deriving (Eq,Ord,Show)
data PArg = PArg [(FId,FId)] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
type FunId = Int
type SeqId = Int
data Literal =
@@ -229,10 +230,6 @@ concrProductions c fid = unsafePerformIO $ do
fid <- peekFId c_ccat
return (PArg [(fid,fid) | fid <- hypos] fid)
peekFId c_ccat = do
c_fid <- (#peek PgfCCat, fid) c_ccat
return (fromIntegral (c_fid :: CInt))
concrTotalFuns :: Concr -> FunId
concrTotalFuns c = unsafePerformIO $ do
c_cncfuns <- (#peek PgfConcr, cncfuns) (concr c)
@@ -320,8 +317,6 @@ concrSequence c seqid = unsafePerformIO $ do
forms <- peekForms (len-1) (ptr `plusPtr` (#size PgfAlternative))
return ((form,prefixes):forms)
deRef peekValue ptr = peek ptr >>= peekValue
fidString, fidInt, fidFloat, fidVar, fidStart :: FId
fidString = (-1)
fidInt = (-2)

View File

@@ -1,26 +0,0 @@
This is a binding to the new GF runtime in C.
The files are:
PGF2.hsc -- a user API similar to Python and Java APIs
PGF2/FFI.hs -- an internal module with FFI definitions for
-- the relevant C functions
HOW TO COMPILE:
cabal install
HOW TO USE:
- Import PGF to the Haskell program that you're writing.
The Cabal infrastructure will make sure to tell the compiler
where to find the relevant modules. Example:
module Main where
import PGF2
import qualified Data.Map as Map
main = do
pgf <- readPGF "Foo.pgf"
let Just english = Map.lookup "FooEng" (languages pgf)

View File

@@ -0,0 +1,56 @@
# PGF2
This is a Haskell binding to the PGF runtime written in C.
The exposed modules are:
- `PGF2`: a user API similar to Python and Java APIs
- `PGF2.Internal`: an internal module with FFI definitions for the relevant C functions
## How to compile
**Important:** You must have the C runtime already installed and available on your system.
See <https://github.com/GrammaticalFramework/gf-core/blob/master/src/runtime/c/INSTALL>
Once the runtine is installed, you can install the library to your global Cabal installation:
```
cabal install pgf2 --extra-lib-dirs=/usr/local/lib
```
or add it to your `stack.yaml` file:
```yaml
extra-deps:
- pgf2
extra-lib-dirs:
- /usr/local/lib
```
## How to use
Simply import `PGF2` in your Haskell program.
The Cabal infrastructure will make sure to tell the compiler where to find the relevant modules.
## Example
```haskell
module Main where
import PGF2
import qualified Data.Map as Map
main = do
pgf <- readPGF "App12.pgf"
let Just eng = Map.lookup "AppEng" (languages pgf)
-- Parsing
let res = parse eng (startCat pgf) "this is a small theatre"
let ParseOk ((tree,prob):rest) = res
print tree
-- Linearisation
let Just expr = readExpr "AdjCN (PositA red_A) (UseN theatre_N)"
let s = linearize eng expr
print s
```

View File

@@ -1,349 +0,0 @@
{-# LANGUAGE DeriveDataTypeable, ExistentialQuantification #-}
#include <pgf/pgf.h>
#include <gu/exn.h>
#include <sg/sg.h>
module SG( SG, openSG, closeSG
, beginTrans, commit, rollback, inTransaction
, SgId
, insertExpr, getExpr, queryExpr
, updateFtsIndex
, queryLinearization
, readTriple, showTriple
, insertTriple, getTriple
, queryTriple
, query
) where
import Foreign hiding (unsafePerformIO)
import Foreign.C
import SG.FFI
import PGF2.FFI
import PGF2.Expr
import Data.Typeable
import Control.Exception(Exception,SomeException,catch,throwIO)
import System.IO.Unsafe(unsafePerformIO,unsafeInterleaveIO)
-----------------------------------------------------------------------
-- Global database operations and types
newtype SG = SG {sg :: Ptr SgSG}
openSG :: FilePath -> IO SG
openSG fpath =
withCString fpath $ \c_fpath ->
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
sg <- sg_open c_fpath exn
failed <- gu_exn_is_raised exn
if failed
then do is_errno <- gu_exn_caught exn gu_exn_type_GuErrno
if is_errno
then do perrno <- (#peek GuExn, data.data) exn
errno <- peek perrno
ioError (errnoToIOError "openSG" (Errno errno) Nothing (Just fpath))
else do is_sgerr <- gu_exn_caught exn gu_exn_type_SgError
if is_sgerr
then do c_msg <- (#peek GuExn, data.data) exn
msg <- peekUtf8CString c_msg
throwIO (SGError msg)
else throwIO (SGError "The database cannot be opened")
else return (SG sg)
closeSG :: SG -> IO ()
closeSG (SG sg) =
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
sg <- sg_close sg exn
handle_sg_exn exn
beginTrans :: SG -> IO ()
beginTrans (SG sg) =
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
sg <- sg_begin_trans sg exn
handle_sg_exn exn
commit :: SG -> IO ()
commit (SG sg) =
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
sg <- sg_commit sg exn
handle_sg_exn exn
rollback :: SG -> IO ()
rollback (SG sg) =
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
sg <- sg_rollback sg exn
handle_sg_exn exn
inTransaction :: SG -> IO a -> IO a
inTransaction sg f =
catch (beginTrans sg >> f >>= \x -> commit sg >> return x)
(\e -> rollback sg >> throwIO (e :: SomeException))
-----------------------------------------------------------------------
-- Expressions
insertExpr :: SG -> Expr -> IO SgId
insertExpr (SG sg) (Expr expr touch) =
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
id <- sg_insert_expr sg expr 1 exn
touch
handle_sg_exn exn
return id
getExpr :: SG -> SgId -> IO (Maybe Expr)
getExpr (SG sg) id = do
exprPl <- gu_new_pool
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
c_expr <- sg_get_expr sg id exprPl exn
handle_sg_exn exn
if c_expr == nullPtr
then do touchForeignPtr exprFPl
return Nothing
else do return $ Just (Expr c_expr (touchForeignPtr exprFPl))
queryExpr :: SG -> Expr -> IO [(SgId,Expr)]
queryExpr (SG sg) (Expr query touch) =
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
res <- sg_query_expr sg query tmpPl exn
touch
handle_sg_exn exn
fetchResults res exn
where
fetchResults res exn = do
exprPl <- gu_new_pool
(key,c_expr) <- alloca $ \pKey -> do
c_expr <- sg_query_next sg res pKey exprPl exn
key <- peek pKey
return (key,c_expr)
failed <- gu_exn_is_raised exn
if failed
then do gu_pool_free exprPl
sg_query_close sg res exn
handle_sg_exn exn
return []
else if c_expr == nullPtr
then do gu_pool_free exprPl
sg_query_close sg res exn
return []
else do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
rest <- fetchResults res exn
return ((key,Expr c_expr (touchForeignPtr exprFPl)) : rest)
updateFtsIndex :: SG -> PGF -> IO ()
updateFtsIndex (SG sg) p = do
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
sg_update_fts_index sg (pgf p) exn
handle_sg_exn exn
queryLinearization :: SG -> String -> IO [Expr]
queryLinearization (SG sg) query = do
exprPl <- gu_new_pool
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
(withGuPool $ \tmpPl -> do
c_query <- newUtf8CString query tmpPl
exn <- gu_new_exn tmpPl
seq <- sg_query_linearization sg c_query tmpPl exn
handle_sg_exn exn
len <- (#peek GuSeq, len) seq
ids <- peekArray (fromIntegral (len :: CInt)) (seq `plusPtr` (#offset GuSeq, data))
getExprs exprFPl exprPl exn ids)
where
getExprs exprFPl exprPl exn [] = return []
getExprs exprFPl exprPl exn (id:ids) = do
c_expr <- sg_get_expr sg id exprPl exn
handle_sg_exn exn
if c_expr == nullPtr
then getExprs exprFPl exprPl exn ids
else do let e = Expr c_expr (touchForeignPtr exprFPl)
es <- getExprs exprFPl exprPl exn ids
return (e:es)
-----------------------------------------------------------------------
-- Triples
readTriple :: String -> Maybe (Expr,Expr,Expr)
readTriple str =
unsafePerformIO $
do exprPl <- gu_new_pool
withGuPool $ \tmpPl ->
withTriple $ \triple ->
do c_str <- newUtf8CString str tmpPl
guin <- gu_string_in c_str tmpPl
exn <- gu_new_exn tmpPl
ok <- pgf_read_expr_tuple guin 3 triple exprPl exn
status <- gu_exn_is_raised exn
if (ok == 1 && not status)
then do c_expr1 <- peekElemOff triple 0
c_expr2 <- peekElemOff triple 1
c_expr3 <- peekElemOff triple 2
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
let touch = touchForeignPtr exprFPl
return $ Just (Expr c_expr1 touch,Expr c_expr2 touch,Expr c_expr3 touch)
else do gu_pool_free exprPl
return Nothing
showTriple :: Expr -> Expr -> Expr -> String
showTriple (Expr expr1 touch1) (Expr expr2 touch2) (Expr expr3 touch3) =
unsafePerformIO $
withGuPool $ \tmpPl ->
withTriple $ \triple -> do
(sb,out) <- newOut tmpPl
let printCtxt = nullPtr
exn <- gu_new_exn tmpPl
pokeElemOff triple 0 expr1
pokeElemOff triple 1 expr2
pokeElemOff triple 2 expr3
pgf_print_expr_tuple 3 triple printCtxt out exn
touch1 >> touch2 >> touch3
s <- gu_string_buf_freeze sb tmpPl
peekUtf8CString s
insertTriple :: SG -> Expr -> Expr -> Expr -> IO SgId
insertTriple (SG sg) (Expr expr1 touch1) (Expr expr2 touch2) (Expr expr3 touch3) =
withGuPool $ \tmpPl ->
withTriple $ \triple -> do
exn <- gu_new_exn tmpPl
pokeElemOff triple 0 expr1
pokeElemOff triple 1 expr2
pokeElemOff triple 2 expr3
id <- sg_insert_triple sg triple exn
touch1 >> touch2 >> touch3
handle_sg_exn exn
return id
getTriple :: SG -> SgId -> IO (Maybe (Expr,Expr,Expr))
getTriple (SG sg) id = do
exprPl <- gu_new_pool
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
let touch = touchForeignPtr exprFPl
withGuPool $ \tmpPl ->
withTriple $ \triple -> do
exn <- gu_new_exn tmpPl
res <- sg_get_triple sg id triple exprPl exn
handle_sg_exn exn
if res /= 0
then do c_expr1 <- peekElemOff triple 0
c_expr2 <- peekElemOff triple 1
c_expr3 <- peekElemOff triple 2
return (Just (Expr c_expr1 touch
,Expr c_expr2 touch
,Expr c_expr3 touch
))
else do touch
return Nothing
queryTriple :: SG -> Maybe Expr -> Maybe Expr -> Maybe Expr -> IO [(SgId,Expr,Expr,Expr)]
queryTriple (SG sg) mb_expr1 mb_expr2 mb_expr3 =
withGuPool $ \tmpPl ->
withTriple $ \triple -> do
exn <- gu_new_exn tmpPl
pokeElemOff triple 0 (toCExpr mb_expr1)
pokeElemOff triple 1 (toCExpr mb_expr2)
pokeElemOff triple 2 (toCExpr mb_expr3)
res <- sg_query_triple sg triple exn
handle_sg_exn exn
unsafeInterleaveIO (fetchResults res)
where
toCExpr Nothing = nullPtr
toCExpr (Just (Expr expr _)) = expr
fromCExpr c_expr touch Nothing = Expr c_expr touch
fromCExpr c_expr touch (Just e) = e
fetchResults res = do
exprPl <- gu_new_pool
alloca $ \pKey ->
withGuPool $ \tmpPl ->
withTriple $ \triple -> do
exn <- gu_new_exn tmpPl
r <- sg_triple_result_fetch res pKey triple exprPl exn
failed <- gu_exn_is_raised exn
if failed
then do gu_pool_free exprPl
sg_triple_result_close res exn
handle_sg_exn exn
return []
else if r == 0
then do gu_pool_free exprPl
sg_triple_result_close res exn
return []
else do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
let touch = touchForeignPtr exprFPl
c_expr1 <- peekElemOff triple 0
c_expr2 <- peekElemOff triple 1
c_expr3 <- peekElemOff triple 2
key <- peek pKey
rest <- unsafeInterleaveIO (fetchResults res)
return ((key,fromCExpr c_expr1 touch mb_expr1
,fromCExpr c_expr2 touch mb_expr2
,fromCExpr c_expr3 touch mb_expr3) : rest)
query :: SG -> String -> IO [[Expr]]
query (SG sg) str =
withGuPool $ \tmpPl ->
do c_str <- newUtf8CString str tmpPl
guin <- gu_string_in c_str tmpPl
exn <- gu_new_exn tmpPl
seq <- pgf_read_expr_matrix guin 3 tmpPl exn
if seq /= nullPtr
then do count <- (#peek GuSeq, len) seq
q <- sg_query sg (count `div` 3) (seq `plusPtr` (#offset GuSeq, data)) exn
handle_sg_exn exn
n_cols <- sg_query_result_columns q
unsafeInterleaveIO (fetchResults q n_cols)
else return []
where
fetchResults q n_cols =
withGuPool $ \tmpPl -> do
exn <- gu_new_exn tmpPl
pExprs <- gu_malloc tmpPl ((#size PgfExpr) * n_cols)
exprPl <- gu_new_pool
res <- sg_query_result_fetch q pExprs exprPl exn
failed <- gu_exn_is_raised exn
if failed
then do gu_pool_free exprPl
sg_query_result_close q exn
handle_sg_exn exn
return []
else if res /= 0
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
let touch = touchForeignPtr exprFPl
row <- fmap (map (flip Expr touch)) $ peekArray (fromIntegral n_cols) pExprs
rows <- unsafeInterleaveIO (fetchResults q n_cols)
return (row:rows)
else do gu_pool_free exprPl
sg_query_result_close q exn
return []
-----------------------------------------------------------------------
-- Exceptions
newtype SGError = SGError String
deriving (Show, Typeable)
instance Exception SGError
handle_sg_exn exn = do
failed <- gu_exn_is_raised exn
if failed
then do is_sgerr <- gu_exn_caught exn gu_exn_type_SgError
if is_sgerr
then do c_msg <- (#peek GuExn, data.data) exn
msg <- peekUtf8CString c_msg
throwIO (SGError msg)
else throwIO (SGError "Unknown database error")
else return ()
-----------------------------------------------------------------------

View File

@@ -1,84 +0,0 @@
{-# LANGUAGE ForeignFunctionInterface, MagicHash #-}
module SG.FFI where
import Foreign
import Foreign.C
import PGF2.FFI
import GHC.Ptr
import Data.Int
data SgSG
data SgQueryExprResult
data SgTripleResult
data SgQueryResult
type SgId = Int64
foreign import ccall "sg/sg.h sg_open"
sg_open :: CString -> Ptr GuExn -> IO (Ptr SgSG)
foreign import ccall "sg/sg.h sg_close"
sg_close :: Ptr SgSG -> Ptr GuExn -> IO ()
foreign import ccall "sg/sg.h sg_begin_trans"
sg_begin_trans :: Ptr SgSG -> Ptr GuExn -> IO ()
foreign import ccall "sg/sg.h sg_commit"
sg_commit :: Ptr SgSG -> Ptr GuExn -> IO ()
foreign import ccall "sg/sg.h sg_rollback"
sg_rollback :: Ptr SgSG -> Ptr GuExn -> IO ()
foreign import ccall "sg/sg.h sg_insert_expr"
sg_insert_expr :: Ptr SgSG -> PgfExpr -> CInt -> Ptr GuExn -> IO SgId
foreign import ccall "sg/sg.h sg_get_expr"
sg_get_expr :: Ptr SgSG -> SgId -> Ptr GuPool -> Ptr GuExn -> IO PgfExpr
foreign import ccall "sg/sg.h sg_query_expr"
sg_query_expr :: Ptr SgSG -> PgfExpr -> Ptr GuPool -> Ptr GuExn -> IO (Ptr SgQueryExprResult)
foreign import ccall "sg/sg.h sg_query_next"
sg_query_next :: Ptr SgSG -> Ptr SgQueryExprResult -> Ptr SgId -> Ptr GuPool -> Ptr GuExn -> IO PgfExpr
foreign import ccall "sg/sg.h sg_query_close"
sg_query_close :: Ptr SgSG -> Ptr SgQueryExprResult -> Ptr GuExn -> IO ()
foreign import ccall "sg/sg.h sg_update_fts_index"
sg_update_fts_index :: Ptr SgSG -> Ptr PgfPGF -> Ptr GuExn -> IO ()
foreign import ccall "sg/sg.h sg_query_linearization"
sg_query_linearization :: Ptr SgSG -> CString -> Ptr GuPool -> Ptr GuExn -> IO (Ptr GuSeq)
foreign import ccall "sg/sg.h sg_insert_triple"
sg_insert_triple :: Ptr SgSG -> SgTriple -> Ptr GuExn -> IO SgId
foreign import ccall "sg/sg.h sg_get_triple"
sg_get_triple :: Ptr SgSG -> SgId -> SgTriple -> Ptr GuPool -> Ptr GuExn -> IO CInt
foreign import ccall "sg/sg.h sg_query_triple"
sg_query_triple :: Ptr SgSG -> SgTriple -> Ptr GuExn -> IO (Ptr SgTripleResult)
foreign import ccall "sg/sg.h sg_triple_result_fetch"
sg_triple_result_fetch :: Ptr SgTripleResult -> Ptr SgId -> SgTriple -> Ptr GuPool -> Ptr GuExn -> IO CInt
foreign import ccall "sg/sg.h sg_triple_result_close"
sg_triple_result_close :: Ptr SgTripleResult -> Ptr GuExn -> IO ()
foreign import ccall "sg/sg.h sg_query"
sg_query :: Ptr SgSG -> CSizeT -> Ptr PgfExpr -> Ptr GuExn -> IO (Ptr SgQueryResult)
foreign import ccall "sg/sg.h sg_query_result_columns"
sg_query_result_columns :: Ptr SgQueryResult -> IO CSizeT
foreign import ccall "sg/sg.h sg_query_result_fetch"
sg_query_result_fetch :: Ptr SgQueryResult -> Ptr PgfExpr -> Ptr GuPool -> Ptr GuExn -> IO CInt
foreign import ccall "sg/sg.h sg_query_result_close"
sg_query_result_close :: Ptr SgQueryResult -> Ptr GuExn -> IO ()
type SgTriple = Ptr PgfExpr
withTriple :: (SgTriple -> IO a) -> IO a
withTriple = allocaArray 3
gu_exn_type_SgError = Ptr "SgError"# :: CString

View File

@@ -14,10 +14,10 @@ extra-source-files: README
cabal-version: >=1.10
library
exposed-modules: PGF2, PGF2.Internal, SG,
exposed-modules: PGF2, PGF2.Internal,
-- backwards compatibility API:
PGF
other-modules: PGF2.FFI, PGF2.Expr, PGF2.Type, SG.FFI
other-modules: PGF2.FFI, PGF2.Expr, PGF2.Type
build-depends: base >=4.3, containers, pretty, array, random
-- hs-source-dirs:
default-language: Haskell2010
@@ -27,11 +27,3 @@ library
cc-options: -std=c99
default-language: Haskell2010
c-sources: utils.c
executable pgf-shell
main-is: pgf-shell.hs
hs-source-dirs: examples
build-depends: base, pgf2, containers, mtl, lifted-base
default-language: Haskell2010
if impl(ghc>=7.0)
ghc-options: -rtsopts

View File

@@ -0,0 +1,3 @@
resolver: lts-6.35 # ghc 7.10.3
allow-newer: true

View File

@@ -0,0 +1 @@
resolver: lts-9.21 # ghc 8.0.2

View File

@@ -0,0 +1 @@
resolver: lts-18.0 # ghc 8.10.4

View File

@@ -0,0 +1,31 @@
#!/bin/bash
# Author: Dimitri Sabadie <dimitri.sabadie@gmail.com>
# 2015
if [ $# -lt 2 ]; then
echo "Usage: ./stack-haddock-upload.sh NAME VERSION"
exit 1
fi
dist=`stack path --dist-dir --stack-yaml ./stack.yaml 2> /dev/null`
echo -e "\033[1;36mGenerating documentation...\033[0m"
stack haddock 2> /dev/null
if [ "$?" -eq "0" ]; then
docdir=$dist/doc/html
cd $docdir || exit
doc=$1-$2-docs
echo -e "Compressing documentation from \033[1;34m$docdir\033[0m for \033[1;35m$1\033[0m-\033[1;33m$2\033[1;30m"
cp -r $1 $doc
tar -c -v -z --format=ustar -f $doc.tar.gz $doc
echo -e "\033[1;32mUploading to Hackage...\033[0m"
read -p "Hackage username: " username
read -p "Hackage password: " -s password
echo ""
curl -X PUT -H 'Content-Type: application/x-tar' -H 'Content-Encoding: gzip' --data-binary "@$doc.tar.gz" "https://$username:$password@hackage.haskell.org/package/$1-$2/docs"
exit $?
else
echo -e "\033[1;31mNot in a stack-powered project\033[0m"
fi

View File

@@ -0,0 +1,3 @@
# This is mainly here so that I can run `stack sdist` for uploading to Hackage
resolver: lts-12.26 # ghc 8.4.4

View File

@@ -4,7 +4,7 @@
typedef struct {
PgfLiteralCallback callback;
PgfExprProb* (*match)(size_t lin_idx, size_t* poffset,
PgfExprProb* (*match)(GuString ann, size_t* poffset,
GuPool *out_pool);
GuFinalizer fin;
} HSPgfLiteralCallback;
@@ -37,7 +37,7 @@ hspgf_hs2offset(GuString sentence, size_t hs_offset)
static PgfExprProb*
hspgf_match_callback(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx,
GuString ann,
GuString sentence, size_t* poffset,
GuPool *out_pool)
{
@@ -46,7 +46,7 @@ hspgf_match_callback(PgfLiteralCallback* self, PgfConcr* concr,
size_t hs_offset =
hspgf_offset2hs(sentence, *poffset);
PgfExprProb* ep =
callback->match(lin_idx, &hs_offset, out_pool);
callback->match(ann, &hs_offset, out_pool);
*poffset = hspgf_hs2offset(sentence, hs_offset);
return ep;

View File

@@ -1,8 +1,7 @@
INSTALL_PATH = /usr/local
C_SOURCES = jpgf.c jsg.c jni_utils.c
JAVA_SOURCES = $(wildcard org/grammaticalframework/pgf/*.java) \
$(wildcard org/grammaticalframework/sg/*.java)
C_SOURCES = jpgf.c jni_utils.c
JAVA_SOURCES = $(wildcard org/grammaticalframework/pgf/*.java)
JNI_INCLUDES = $(if $(wildcard /usr/lib/jvm/default-java/include/.*), -I/usr/lib/jvm/default-java/include -I/usr/lib/jvm/default-java/include/linux, \
$(if $(wildcard /usr/lib/jvm/java-1.11.0-openjdk-amd64/include/.*), -I/usr/lib/jvm/java-1.11.0-openjdk-amd64/include/ -I/usr/lib/jvm/java-1.11.0-openjdk-amd64/include/linux, \
@@ -28,13 +27,13 @@ LIBTOOL = $(if $(shell command -v glibtool 2>/dev/null), glibtool, libtool) --t
all: libjpgf.la jpgf.jar
libjpgf.la: $(patsubst %.c, %.lo, $(C_SOURCES))
$(LIBTOOL) --mode=link $(GCC) $(CFLAGS) -g -O -o libjpgf.la -shared $^ -rpath $(INSTALL_PATH)/lib -lgu -lpgf -lsg $(WINDOWS_LDFLAGS)
$(LIBTOOL) --mode=link $(GCC) $(CFLAGS) -g -O -o libjpgf.la -shared $^ -rpath $(INSTALL_PATH)/lib -lgu -lpgf $(WINDOWS_LDFLAGS)
%.lo : %.c
$(LIBTOOL) --mode=compile $(GCC) $(CFLAGS) -g -O -c $(JNI_INCLUDES) $(WINDOWS_CCFLAGS) -std=c99 -shared $< -o $@
jpgf.jar: $(patsubst %.java, %.class, $(JAVA_SOURCES))
jar -cf $@ org/grammaticalframework/pgf/*.class org/grammaticalframework/sg/*.class
jar -cf $@ org/grammaticalframework/pgf/*.class
%.class : %.java
javac $<
@@ -45,7 +44,7 @@ install: libjpgf.la jpgf.jar
doc:
javadoc org.grammaticalframework.pgf org.grammaticalframework.sg -d java-api
javadoc org.grammaticalframework.pgf -d java-api
clean:
rm -f *.lo

View File

@@ -456,7 +456,7 @@ typedef struct {
static PgfExprProb*
jpgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx,
GuString ann,
GuString sentence, size_t* poffset,
GuPool *out_pool)
{
@@ -465,8 +465,9 @@ jpgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
JNIEnv *env;
(*cachedJVM)->AttachCurrentThread(cachedJVM, (void**)&env, NULL);
size_t joffset = gu2j_string_offset(sentence, *poffset);
jobject result = (*env)->CallObjectMethod(env, callback->jcallback, callback->match_methodId, lin_idx, joffset);
jstring jann = gu2j_string(env, ann);
size_t joffset = gu2j_string_offset(sentence, *poffset);
jobject result = (*env)->CallObjectMethod(env, callback->jcallback, callback->match_methodId, jann, joffset);
if (result == NULL)
return NULL;
@@ -485,39 +486,8 @@ jpgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
PgfExprProb* ep = gu_new(PgfExprProb, out_pool);
ep->expr = gu_variant_from_ptr(get_ref(env, jexpr));
ep->expr = pgf_clone_expr(ep->expr, out_pool);
ep->prob = prob;
{
// This is an uggly hack. We first show the expression ep->expr
// and then we read it back but in out_pool. The whole purpose
// of this is to copy the expression from the temporary pool
// that was created in the Java binding to the parser pool.
// There should be a real copying function or even better
// there must be a way to avoid copying at all.
GuPool* tmp_pool = gu_local_pool();
GuExn* err = gu_exn(tmp_pool);
GuStringBuf* sbuf = gu_new_string_buf(tmp_pool);
GuOut* out = gu_string_buf_out(sbuf);
pgf_print_expr(ep->expr, NULL, 0, out, err);
GuString str = gu_string_buf_data(sbuf);
size_t len = gu_string_buf_length(sbuf);
GuIn* in = gu_data_in((uint8_t*) str, len, tmp_pool);
ep->expr = pgf_read_expr(in, out_pool, tmp_pool, err);
if (!gu_ok(err) || gu_variant_is_null(ep->expr)) {
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", "The expression cannot be parsed");
gu_pool_free(tmp_pool);
return NULL;
}
gu_pool_free(tmp_pool);
}
return ep;
}
@@ -534,7 +504,7 @@ jpgf_token_prob_enum_fin(GuFinalizer* self)
static GuEnum*
jpgf_literal_callback_predict(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx,
GuString ann,
GuString prefix,
GuPool *out_pool)
{
@@ -543,8 +513,9 @@ jpgf_literal_callback_predict(PgfLiteralCallback* self, PgfConcr* concr,
JNIEnv *env;
(*cachedJVM)->AttachCurrentThread(cachedJVM, (void**)&env, NULL);
jstring jann = gu2j_string(env, ann);
jstring jprefix = gu2j_string(env, prefix);
jobject jiterator = (*env)->CallObjectMethod(env, callback->jcallback, callback->predict_methodId, lin_idx, jprefix);
jobject jiterator = (*env)->CallObjectMethod(env, callback->jcallback, callback->predict_methodId, jann, jprefix);
if (jiterator == NULL)
return NULL;
@@ -582,8 +553,8 @@ JNIEXPORT void JNICALL Java_org_grammaticalframework_pgf_Parser_addLiteralCallba
callback->fin.fn = jpgf_literal_callback_fin;
jclass callback_class = (*env)->GetObjectClass(env, jcallback);
callback->match_methodId = (*env)->GetMethodID(env, callback_class, "match", "(II)Lorg/grammaticalframework/pgf/LiteralCallback$CallbackResult;");
callback->predict_methodId = (*env)->GetMethodID(env, callback_class, "predict", "(ILjava/lang/String;)Ljava/util/Iterator;");
callback->match_methodId = (*env)->GetMethodID(env, callback_class, "match", "(Ljava/lang/String;I)Lorg/grammaticalframework/pgf/LiteralCallback$CallbackResult;");
callback->predict_methodId = (*env)->GetMethodID(env, callback_class, "predict", "(Ljava/lang/String;Ljava/lang/String;)Ljava/util/Iterator;");
gu_pool_finally(pool, &callback->fin);
@@ -964,7 +935,7 @@ pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
}
static void
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
@@ -973,7 +944,7 @@ pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t li
}
static void
pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
JNIEnv* env = state->env;
@@ -983,6 +954,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lind
if (gu_buf_length(state->list) > 0) {
jstring jcat = gu2j_string(env, cat);
jstring jfun = gu2j_string(env, fun);
jstring jann = gu2j_string(env, ann);
size_t len = gu_buf_length(state->list);
jobjectArray jchildren = (*env)->NewObjectArray(env, len, state->object_class, NULL);
@@ -998,10 +970,11 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lind
jcat,
jfun,
fid,
lindex,
jann,
jchildren);
(*env)->DeleteLocalRef(env, jchildren);
(*env)->DeleteLocalRef(env, jann);
(*env)->DeleteLocalRef(env, jfun);
(*env)->DeleteLocalRef(env, jcat);
@@ -1051,7 +1024,7 @@ Java_org_grammaticalframework_pgf_Concr_bracketedLinearize(JNIEnv* env, jobject
jclass bracket_class = (*env)->FindClass(env, "org/grammaticalframework/pgf/Bracket");
if (!bracket_class)
return NULL;
jmethodID bracket_constrId = (*env)->GetMethodID(env, bracket_class, "<init>", "(Ljava/lang/String;Ljava/lang/String;II[Ljava/lang/Object;)V");
jmethodID bracket_constrId = (*env)->GetMethodID(env, bracket_class, "<init>", "(Ljava/lang/String;Ljava/lang/String;ILjava/lang/String;[Ljava/lang/Object;)V");
if (!bracket_constrId)
return NULL;

View File

@@ -1,339 +0,0 @@
#include <jni.h>
#include <sg/sg.h>
#include <pgf/expr.h>
#include <pgf/linearizer.h>
#include "jni_utils.h"
JNIEXPORT jobject JNICALL
Java_org_grammaticalframework_sg_SG_openSG(JNIEnv *env, jclass cls, jstring path)
{
GuPool* tmp_pool = gu_local_pool();
// Create an exception frame that catches all errors.
GuExn* err = gu_exn(tmp_pool);
const char *fpath = (*env)->GetStringUTFChars(env, path, 0);
// Read the PGF grammar.
SgSG* sg = sg_open(fpath, err);
(*env)->ReleaseStringUTFChars(env, path, fpath);
if (!gu_ok(err)) {
GuString msg;
if (gu_exn_caught(err, SgError)) {
msg = (GuString) gu_exn_caught_data(err);
} else {
msg = "The database cannot be opened";
}
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
gu_pool_free(tmp_pool);
return NULL;
}
gu_pool_free(tmp_pool);
jmethodID constrId = (*env)->GetMethodID(env, cls, "<init>", "(J)V");
return (*env)->NewObject(env, cls, constrId, p2l(sg));
}
JNIEXPORT void JNICALL
Java_org_grammaticalframework_sg_SG_close(JNIEnv *env, jobject self)
{
GuPool* tmp_pool = gu_local_pool();
// Create an exception frame that catches all errors.
GuExn* err = gu_exn(tmp_pool);
sg_close(get_ref(env, self), err);
if (!gu_ok(err)) {
GuString msg;
if (gu_exn_caught(err, SgError)) {
msg = (GuString) gu_exn_caught_data(err);
} else {
msg = "The database cannot be closed";
}
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
gu_pool_free(tmp_pool);
return;
}
gu_pool_free(tmp_pool);
}
JNIEXPORT jobjectArray JNICALL
Java_org_grammaticalframework_sg_SG_readTriple(JNIEnv *env, jclass cls, jstring s)
{
GuPool* pool = gu_new_pool();
GuPool* tmp_pool = gu_local_pool();
GuString buf = j2gu_string(env, s, tmp_pool);
GuIn* in = gu_data_in((uint8_t*) buf, strlen(buf), tmp_pool);
GuExn* err = gu_exn(tmp_pool);
const int len = 3;
PgfExpr exprs[len];
int res = pgf_read_expr_tuple(in, 3, exprs, pool, err);
if (!gu_ok(err) || res == 0) {
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", "The expression cannot be parsed");
gu_pool_free(tmp_pool);
gu_pool_free(pool);
return NULL;
}
gu_pool_free(tmp_pool);
jclass pool_class = (*env)->FindClass(env, "org/grammaticalframework/pgf/Pool");
jmethodID pool_constrId = (*env)->GetMethodID(env, pool_class, "<init>", "(J)V");
jobject jpool = (*env)->NewObject(env, pool_class, pool_constrId, p2l(pool));
jclass expr_class = (*env)->FindClass(env, "org/grammaticalframework/pgf/Expr");
jmethodID expr_constrId = (*env)->GetMethodID(env, expr_class, "<init>", "(Lorg/grammaticalframework/pgf/Pool;Ljava/lang/Object;J)V");
jobjectArray array = (*env)->NewObjectArray(env, len, expr_class, NULL);
for (int i = 0; i < len; i++) {
jobject obj = (*env)->NewObject(env, expr_class, expr_constrId, jpool, NULL, p2l(gu_variant_to_ptr(exprs[i])));
(*env)->SetObjectArrayElement(env, array, i, obj);
(*env)->DeleteLocalRef(env, obj);
}
return array;
}
JNIEXPORT jobject JNICALL
Java_org_grammaticalframework_sg_SG_queryTriple(JNIEnv *env, jobject self,
jobject jsubj,
jobject jpred,
jobject jobj)
{
SgSG *sg = get_ref(env, self);
GuPool* tmp_pool = gu_local_pool();
GuExn* err = gu_exn(tmp_pool);
SgTriple triple;
triple[0] = (jsubj == NULL) ? gu_null_variant
: gu_variant_from_ptr((void*) get_ref(env, jsubj));
triple[1] = (jpred == NULL) ? gu_null_variant
: gu_variant_from_ptr((void*) get_ref(env, jpred));
triple[2] = (jobj == NULL) ? gu_null_variant
: gu_variant_from_ptr((void*) get_ref(env, jobj));
SgTripleResult* res = sg_query_triple(sg, triple, err);
if (!gu_ok(err)) {
GuString msg;
if (gu_exn_caught(err, SgError)) {
msg = (GuString) gu_exn_caught_data(err);
} else {
msg = "The query failed";
}
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
gu_pool_free(tmp_pool);
return NULL;
}
gu_pool_free(tmp_pool);
jclass res_class = (*env)->FindClass(env, "org/grammaticalframework/sg/TripleResult");
jmethodID constrId = (*env)->GetMethodID(env, res_class, "<init>", "(JLorg/grammaticalframework/pgf/Expr;Lorg/grammaticalframework/pgf/Expr;Lorg/grammaticalframework/pgf/Expr;)V");
jobject jres = (*env)->NewObject(env, res_class, constrId, p2l(res), jsubj, jpred, jobj);
return jres;
}
JNIEXPORT jboolean JNICALL
Java_org_grammaticalframework_sg_TripleResult_hasNext(JNIEnv *env, jobject self)
{
SgTripleResult *res = get_ref(env, self);
GuPool* tmp_pool = gu_local_pool();
GuPool* out_pool = gu_new_pool();
GuExn* err = gu_exn(tmp_pool);
SgId key;
SgTriple triple;
int r = sg_triple_result_fetch(res, &key, triple, out_pool, err);
if (!gu_ok(err)) {
GuString msg;
if (gu_exn_caught(err, SgError)) {
msg = (GuString) gu_exn_caught_data(err);
} else {
msg = "The fetch failed";
}
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
gu_pool_free(out_pool);
gu_pool_free(tmp_pool);
return JNI_FALSE;
}
gu_pool_free(tmp_pool);
if (r) {
SgTriple orig_triple;
sg_triple_result_get_query(res, orig_triple);
jclass pool_class = (*env)->FindClass(env, "org/grammaticalframework/pgf/Pool");
jmethodID pool_constrId = (*env)->GetMethodID(env, pool_class, "<init>", "(J)V");
jobject jpool = (*env)->NewObject(env, pool_class, pool_constrId, p2l(out_pool));
jclass expr_class = (*env)->FindClass(env, "org/grammaticalframework/pgf/Expr");
jmethodID constrId = (*env)->GetMethodID(env, expr_class, "<init>", "(Lorg/grammaticalframework/pgf/Pool;Ljava/lang/Object;J)V");
jclass result_class = (*env)->GetObjectClass(env, self);
jfieldID keyId = (*env)->GetFieldID(env, result_class, "key", "J");
(*env)->SetLongField(env, self, keyId, key);
if (triple[0] != orig_triple[0]) {
jfieldID subjId = (*env)->GetFieldID(env, result_class, "subj", "Lorg/grammaticalframework/pgf/Expr;");
jobject jsubj = (*env)->NewObject(env, expr_class, constrId, jpool, jpool, p2l(gu_variant_to_ptr(triple[0])));
(*env)->SetObjectField(env, self, subjId, jsubj);
}
if (triple[1] != orig_triple[1]) {
jfieldID predId = (*env)->GetFieldID(env, result_class, "pred", "Lorg/grammaticalframework/pgf/Expr;");
jobject jpred = (*env)->NewObject(env, expr_class, constrId, jpool, jpool, p2l(gu_variant_to_ptr(triple[1])));
(*env)->SetObjectField(env, self, predId, jpred);
}
if (triple[2] != orig_triple[2]) {
jfieldID objId = (*env)->GetFieldID(env, result_class, "obj", "Lorg/grammaticalframework/pgf/Expr;");
jobject jobj = (*env)->NewObject(env, expr_class, constrId, jpool, jpool, p2l(gu_variant_to_ptr(triple[2])));
(*env)->SetObjectField(env, self, objId, jobj);
}
return JNI_TRUE;
} else {
gu_pool_free(out_pool);
return JNI_FALSE;
}
}
JNIEXPORT void JNICALL
Java_org_grammaticalframework_sg_TripleResult_close(JNIEnv *env, jobject self)
{
SgTripleResult *res = get_ref(env, self);
GuPool* tmp_pool = gu_local_pool();
GuExn* err = gu_exn(tmp_pool);
sg_triple_result_close(res, err);
if (!gu_ok(err)) {
GuString msg;
if (gu_exn_caught(err, SgError)) {
msg = (GuString) gu_exn_caught_data(err);
} else {
msg = "Closing the result failed";
}
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
}
gu_pool_free(tmp_pool);
}
JNIEXPORT void JNICALL
Java_org_grammaticalframework_sg_SG_beginTrans(JNIEnv *env, jobject self)
{
GuPool* tmp_pool = gu_local_pool();
// Create an exception frame that catches all errors.
GuExn* err = gu_exn(tmp_pool);
sg_begin_trans(get_ref(env, self), err);
if (!gu_ok(err)) {
GuString msg;
if (gu_exn_caught(err, SgError)) {
msg = (GuString) gu_exn_caught_data(err);
} else {
msg = "The transaction cannot be started";
}
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
gu_pool_free(tmp_pool);
return;
}
gu_pool_free(tmp_pool);
}
JNIEXPORT void JNICALL
Java_org_grammaticalframework_sg_SG_commit(JNIEnv *env, jobject self)
{
GuPool* tmp_pool = gu_local_pool();
// Create an exception frame that catches all errors.
GuExn* err = gu_exn(tmp_pool);
sg_commit(get_ref(env, self), err);
if (!gu_ok(err)) {
GuString msg;
if (gu_exn_caught(err, SgError)) {
msg = (GuString) gu_exn_caught_data(err);
} else {
msg = "The transaction cannot be commited";
}
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
gu_pool_free(tmp_pool);
return;
}
gu_pool_free(tmp_pool);
}
JNIEXPORT void JNICALL
Java_org_grammaticalframework_sg_SG_rollback(JNIEnv *env, jobject self)
{
GuPool* tmp_pool = gu_local_pool();
// Create an exception frame that catches all errors.
GuExn* err = gu_exn(tmp_pool);
sg_rollback(get_ref(env, self), err);
if (!gu_ok(err)) {
GuString msg;
if (gu_exn_caught(err, SgError)) {
msg = (GuString) gu_exn_caught_data(err);
} else {
msg = "The transaction cannot be rolled back";
}
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
gu_pool_free(tmp_pool);
return;
}
gu_pool_free(tmp_pool);
}
JNIEXPORT jlong JNICALL
Java_org_grammaticalframework_sg_SG_insertTriple(JNIEnv *env, jobject self,
jobject jsubj,
jobject jpred,
jobject jobj)
{
SgSG *sg = get_ref(env, self);
GuPool* tmp_pool = gu_local_pool();
GuExn* err = gu_exn(tmp_pool);
SgTriple triple;
triple[0] = gu_variant_from_ptr((void*) get_ref(env, jsubj));
triple[1] = gu_variant_from_ptr((void*) get_ref(env, jpred));
triple[2] = gu_variant_from_ptr((void*) get_ref(env, jobj));
SgId id = sg_insert_triple(sg, triple, err);
if (!gu_ok(err)) {
GuString msg;
if (gu_exn_caught(err, SgError)) {
msg = (GuString) gu_exn_caught_data(err);
} else {
msg = "The insertion failed";
}
throw_string_exception(env, "org/grammaticalframework/sg/SGError", msg);
gu_pool_free(tmp_pool);
return 0;
}
gu_pool_free(tmp_pool);
return id;
}

View File

@@ -14,18 +14,18 @@ public class Bracket {
* where they all will have the same id */
public final int fid;
public final int lindex;
public final String ann;
/** The children of the bracket. Every element is either a string
* if this is a leaf in the parse tree, or a {@link Bracket} object.
*/
public final Object[] children;
public Bracket(String cat, String fun, int fid, int lindex, Object[] children) {
public Bracket(String cat, String fun, int fid, String ann, Object[] children) {
this.cat = cat;
this.fun = fun;
this.fid = fid;
this.lindex = lindex;
this.ann = ann;
this.children = children;
}
}

View File

@@ -3,9 +3,9 @@ package org.grammaticalframework.pgf;
import java.util.Iterator;
public interface LiteralCallback {
public CallbackResult match(int lin_idx, int start_offset);
public CallbackResult match(String ann, int start_offset);
public Iterator<TokenProb> predict(int lin_idx, String prefix);
public Iterator<TokenProb> predict(String ann, String prefix);
public static class CallbackResult {
private ExprProb ep;

View File

@@ -19,7 +19,7 @@ public class NercLiteralCallback implements LiteralCallback {
this.sentence = sentence;
}
public CallbackResult match(int lin_idx, int offset) {
public CallbackResult match(String ann, int offset) {
StringBuilder sbuilder = new StringBuilder();
int i = 0;
@@ -83,7 +83,7 @@ public class NercLiteralCallback implements LiteralCallback {
return null;
}
public Iterator<TokenProb> predict(int lin_idx, String prefix) {
public Iterator<TokenProb> predict(String ann, String prefix) {
return Collections.<TokenProb>emptyList().iterator();
}
}

View File

@@ -15,7 +15,7 @@ public class UnknownLiteralCallback implements LiteralCallback {
this.sentence = sentence;
}
public CallbackResult match(int lin_idx, int offset) {
public CallbackResult match(String ann, int offset) {
if (offset < sentence.length() &&
!Character.isUpperCase(sentence.charAt(offset))) {
int start_offset = offset;
@@ -35,7 +35,7 @@ public class UnknownLiteralCallback implements LiteralCallback {
return null;
}
public Iterator<TokenProb> predict(int lin_idx, String prefix) {
public Iterator<TokenProb> predict(String ann, String prefix) {
return Collections.<TokenProb>emptyList().iterator();
}
}

View File

@@ -1,56 +0,0 @@
package org.grammaticalframework.sg;
import java.io.Closeable;
import org.grammaticalframework.pgf.*;
/** This class represents a connection to a semantic graph database.
* The semantic graph is a graph represented as a set of tripples
* of abstract expressions. The graph can be used for instance to store
* semantic information for entities in a GF grammar.
*/
public class SG implements Closeable {
/** Opens a new database file. */
public static native SG openSG(String path) throws SGError;
/** Closes an already opened database. */
public native void close() throws SGError;
/** Reads a triple in the format &lt;expr,expr,expr&gt; and returns it as an array. */
public static native Expr[] readTriple(String s) throws PGFError;
/** Simple triple queries.
* Each of the arguments subj, pred and obj could be null.
* A null argument is interpreted as a wild card.
* If one of the arguments is not null then only triples with matching values
* will be retrieved.
*
* @return An iterator over the matching triples.
*/
public native TripleResult queryTriple(Expr subj, Expr pred, Expr obj) throws SGError;
/** Starts a new transaction. */
public native void beginTrans() throws SGError;
/** Commits the transaction. */
public native void commit() throws SGError;
/** Rollbacks all changes made in the current transaction. */
public native void rollback() throws SGError;
/** Inserts a new triple.
* @return an unique id that identifies this triple in the database
*/
public native long insertTriple(Expr subj, Expr pred, Expr obj) throws SGError;
//////////////////////////////////////////////////////////////////
// private stuff
private long ref;
private SG(long ref) {
this.ref = ref;
}
static {
System.loadLibrary("jpgf");
}
}

View File

@@ -1,11 +0,0 @@
package org.grammaticalframework.sg;
/** This exception is thrown if an error occurs in the semantic graph.
*/
public class SGError extends RuntimeException {
private static final long serialVersionUID = -6098784400143861939L;
public SGError(String message) {
super(message);
}
}

View File

@@ -1,53 +0,0 @@
package org.grammaticalframework.sg;
import java.io.Closeable;
import org.grammaticalframework.pgf.Expr;
/** This class is used to iterate over a list of triples.
* To move to the next triple, call {@link TripleResult#hasNext}.
* When you do not need the iterator anymore then call {@link TripleResult#close}
* to release the allocated resources.
*/
public class TripleResult implements Closeable {
public native boolean hasNext();
/** Closes the iterator and releases the allocated resources. */
public native void close();
/** Each triple has an unique integer key. You can get the key for
* the current triple by calling {@link TripleResult#getKey}.
*/
public long getKey() {
return key;
}
/** This is the first element of the current triple. */
public Expr getSubject() {
return subj;
}
/** This is the second element of the current triple. */
public Expr getPredicate() {
return pred;
}
/** This is the third element of the current triple. */
public Expr getObject() {
return obj;
}
//////////////////////////////////////////////////////////////////
// private stuff
private long ref;
private long key;
private Expr subj;
private Expr pred;
private Expr obj;
private TripleResult(long ref, Expr subj, Expr pred, Expr obj) {
this.ref = ref;
this.subj = subj;
this.pred = pred;
this.obj = obj;
}
}

View File

@@ -5,6 +5,7 @@
#include <gu/mem.h>
#include <gu/map.h>
#include <gu/file.h>
#include <gu/utf8.h>
#include <pgf/pgf.h>
#include <pgf/linearizer.h>
@@ -1307,8 +1308,8 @@ static PyObject*
Concr_printName(ConcrObject* self, PyObject *args)
{
GuString id;
if (!PyArg_ParseTuple(args, "s", &id))
return NULL;
if (!PyArg_ParseTuple(args, "s", &id))
return NULL;
GuString name = pgf_print_name(self->concr, id);
if (name == NULL)
@@ -1346,9 +1347,42 @@ typedef struct {
GuFinalizer fin;
} PyPgfLiteralCallback;
#if PY_MAJOR_VERSION >= 3
static size_t
utf8_to_unicode_offset(GuString sentence, size_t offset)
{
const uint8_t* start = (uint8_t*) sentence;
const uint8_t* end = start+offset;
size_t chars = 0;
while (start < end) {
gu_utf8_decode(&start);
chars++;
}
return chars;
}
static size_t
unicode_to_utf8_offset(GuString sentence, size_t chars)
{
const uint8_t* start = (uint8_t*) sentence;
const uint8_t* end = start;
while (chars > 0) {
GuUCS ucs = gu_utf8_decode(&end);
if (ucs == 0)
break;
chars--;
}
return (end-start);
}
#endif
static PgfExprProb*
pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx,
GuString ann,
GuString sentence, size_t* poffset,
GuPool *out_pool)
{
@@ -1356,10 +1390,18 @@ pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
gu_container(self, PyPgfLiteralCallback, callback);
PyObject* result =
PyObject_CallFunction(callback->pycallback, "ii",
lin_idx, *poffset);
if (result == NULL)
PyObject_CallFunction(callback->pycallback, "si",
ann,
#if PY_MAJOR_VERSION >= 3
utf8_to_unicode_offset(sentence, *poffset)
#else
*poffset
#endif
);
if (result == NULL) {
PyErr_Print();
return NULL;
}
if (result == Py_None) {
Py_DECREF(result);
@@ -1369,40 +1411,17 @@ pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
PgfExprProb* ep = gu_new(PgfExprProb, out_pool);
ExprObject* pyexpr;
#if PY_MAJOR_VERSION >= 3
int chars;
if (!PyArg_ParseTuple(result, "Ofi", &pyexpr, &ep->prob, &chars))
return NULL;
*poffset = unicode_to_utf8_offset(sentence, chars);
#else
if (!PyArg_ParseTuple(result, "Ofi", &pyexpr, &ep->prob, poffset))
return NULL;
#endif
ep->expr = pyexpr->expr;
{
// This is an uggly hack. We first show the expression ep->expr
// and then we read it back but in out_pool. The whole purpose
// of this is to copy the expression from the temporary pool
// that was created in the Java binding to the parser pool.
// There should be a real copying function or even better
// there must be a way to avoid copying at all.
GuPool* tmp_pool = gu_local_pool();
GuExn* err = gu_exn(tmp_pool);
GuStringBuf* sbuf = gu_new_string_buf(tmp_pool);
GuOut* out = gu_string_buf_out(sbuf);
pgf_print_expr(ep->expr, NULL, 0, out, err);
GuIn* in = gu_data_in((uint8_t*) gu_string_buf_data(sbuf),
gu_string_buf_length(sbuf),
tmp_pool);
ep->expr = pgf_read_expr(in, out_pool, tmp_pool, err);
if (!gu_ok(err) || gu_variant_is_null(ep->expr)) {
PyErr_SetString(PGFError, "The expression cannot be parsed");
gu_pool_free(tmp_pool);
return NULL;
}
gu_pool_free(tmp_pool);
}
ep->expr = pgf_clone_expr(pyexpr->expr, out_pool);
Py_DECREF(result);
@@ -1411,7 +1430,7 @@ pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
static GuEnum*
pypgf_literal_callback_predict(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx,
GuString ann,
GuString prefix,
GuPool *out_pool)
{
@@ -1490,7 +1509,7 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
int max_count = -1;
double heuristics = -1;
PyObject* py_callbacks = NULL;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|OidO!", kwlist,
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|OidO!", kwlist,
&sentence, &start, &max_count,
&heuristics,
&PyList_Type, &py_callbacks))
@@ -1586,10 +1605,10 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
PyObject* start = NULL;
GuString prefix = "";
int max_count = -1;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|Osi", kwlist,
&sentence, &start,
&prefix, &max_count))
return NULL;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|Osi", kwlist,
&sentence, &start,
&prefix, &max_count))
return NULL;
IterObject* pyres = (IterObject*)
pgf_IterType.tp_alloc(&pgf_IterType, 0);
@@ -1681,9 +1700,9 @@ Concr_lookupSentence(ConcrObject* self, PyObject *args, PyObject *keywds)
const char *sentence = NULL;
PyObject* start = NULL;
int max_count = -1;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|O", kwlist,
&sentence, &start, &max_count))
return NULL;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|O", kwlist,
&sentence, &start, &max_count))
return NULL;
IterObject* pyres = (IterObject*)
pgf_IterType.tp_alloc(&pgf_IterType, 0);
@@ -1934,7 +1953,7 @@ typedef struct {
PyObject_HEAD
PyObject* cat;
int fid;
int lindex;
PyObject* ann;
PyObject* fun;
PyObject* children;
} BracketObject;
@@ -2009,8 +2028,8 @@ static PyMemberDef Bracket_members[] = {
"the abstract function for this bracket"},
{"fid", T_INT, offsetof(BracketObject, fid), 0,
"an id which identifies this bracket in the bracketed string. If there are discontinuous phrases this id will be shared for all brackets belonging to the same phrase."},
{"lindex", T_INT, offsetof(BracketObject, lindex), 0,
"the constituent index"},
{"ann", T_OBJECT_EX, offsetof(BracketObject, ann), 0,
"the analysis of the constituent"},
{"children", T_OBJECT_EX, offsetof(BracketObject, children), 0,
"a list with the children of this bracket"},
{NULL} /* Sentinel */
@@ -2058,6 +2077,58 @@ static PyTypeObject pgf_BracketType = {
0, /*tp_new */
};
typedef struct {
PyObject_HEAD
} BINDObject;
static PyObject *
BIND_repr(BINDObject *self)
{
return PyString_FromString("&+");
}
static PyTypeObject pgf_BINDType = {
PyVarObject_HEAD_INIT(NULL, 0)
//0, /*ob_size*/
"pgf.BIND", /*tp_name*/
sizeof(BINDObject), /*tp_basicsize*/
0, /*tp_itemsize*/
0, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
(reprfunc) BIND_repr, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
"a marker for BIND in a bracketed string", /*tp_doc*/
0, /*tp_traverse */
0, /*tp_clear */
0, /*tp_richcompare */
0, /*tp_weaklistoffset */
0, /*tp_iter */
0, /*tp_iternext */
0, /*tp_methods */
0, /*tp_members */
0, /*tp_getset */
0, /*tp_base */
0, /*tp_dict */
0, /*tp_descr_get */
0, /*tp_descr_set */
0, /*tp_dictoffset */
0, /*tp_init */
0, /*tp_alloc */
0, /*tp_new */
};
typedef struct {
PgfLinFuncs* funcs;
GuBuf* stack;
@@ -2075,7 +2146,7 @@ pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
}
static void
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
@@ -2084,7 +2155,7 @@ pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t li
}
static void
pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun)
pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
@@ -2096,7 +2167,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lind
if (bracket != NULL) {
bracket->cat = PyString_FromString(cat);
bracket->fid = fid;
bracket->lindex = lindex;
bracket->ann = PyString_FromString(ann);
bracket->fun = PyString_FromString(fun);
bracket->children = state->list;
PyList_Append(parent, (PyObject*) bracket);
@@ -2109,6 +2180,16 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lind
state->list = parent;
}
static void
pgf_bracket_lzn_symbol_bind(PgfLinFuncs** funcs)
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
PyObject* bind = pgf_BINDType.tp_alloc(&pgf_BINDType, 0);
PyList_Append(state->list, bind);
Py_DECREF(bind);
}
static void
pgf_bracket_lzn_symbol_meta(PgfLinFuncs** funcs, PgfMetaId meta_id)
{
@@ -2120,7 +2201,7 @@ static PgfLinFuncs pgf_bracket_lin_funcs = {
.begin_phrase = pgf_bracket_lzn_begin_phrase,
.end_phrase = pgf_bracket_lzn_end_phrase,
.symbol_ne = NULL,
.symbol_bind = NULL,
.symbol_bind = pgf_bracket_lzn_symbol_bind,
.symbol_capit = NULL,
.symbol_meta = pgf_bracket_lzn_symbol_meta
};
@@ -2349,8 +2430,8 @@ pypgf_collect_morpho(PgfMorphoCallback* self,
static PyObject*
Concr_lookupMorpho(ConcrObject* self, PyObject *args) {
GuString sent;
if (!PyArg_ParseTuple(args, "s", &sent))
return NULL;
if (!PyArg_ParseTuple(args, "s", &sent))
return NULL;
GuPool *tmp_pool = gu_local_pool();
GuExn* err = gu_exn(tmp_pool);
@@ -2375,6 +2456,129 @@ Concr_lookupMorpho(ConcrObject* self, PyObject *args) {
return analyses;
}
#define PGF_MORPHOCALLBACK_NAME "pgf.MorphoCallback"
static void
pypgf_morphocallback_destructor(PyObject *capsule)
{
PyMorphoCallback* callback =
PyCapsule_GetPointer(capsule, PGF_MORPHOCALLBACK_NAME);
Py_XDECREF(callback->analyses);
}
static PyObject*
Iter_fetch_cohort(IterObject* self)
{
PgfCohortRange range =
gu_next(self->res, PgfCohortRange, self->pool);
if (range.buf == NULL)
return NULL;
PyObject* py_start = PyLong_FromSize_t(range.start.pos);
if (py_start == NULL)
return NULL;
PyObject* py_end = PyLong_FromSize_t(range.end.pos);
if (py_end == NULL) {
Py_DECREF(py_start);
return NULL;
}
PyMorphoCallback* callback =
PyCapsule_GetPointer(PyTuple_GetItem(self->container, 0),
PGF_MORPHOCALLBACK_NAME);
PyObject* py_slice =
PySlice_New(py_start, py_end, NULL);
if (py_slice == NULL) {
Py_DECREF(py_start);
Py_DECREF(py_end);
return NULL;
}
PyObject* py_w =
PyObject_GetItem(PyTuple_GetItem(self->container, 1), py_slice);
PyObject* res =
PyTuple_Pack(4, py_start, py_w, callback->analyses, py_end);
Py_DECREF(callback->analyses);
callback->analyses = PyList_New(0);
Py_DECREF(py_w);
Py_DECREF(py_slice);
Py_DECREF(py_end);
Py_DECREF(py_start);
return res;
}
static PyObject*
Concr_lookupCohorts(ConcrObject* self, PyObject *args)
{
PyObject* py_sent = NULL;
if (!PyArg_ParseTuple(args, "U", &py_sent))
return NULL;
IterObject* pyres = (IterObject*)
pgf_IterType.tp_alloc(&pgf_IterType, 0);
if (pyres == NULL)
return NULL;
pyres->pool = gu_new_pool();
pyres->source = (PyObject*) self->grammar;
Py_XINCREF(pyres->source);
PyMorphoCallback* callback = gu_new(PyMorphoCallback,pyres->pool);
callback->fn.callback = pypgf_collect_morpho;
callback->analyses = PyList_New(0);
PyObject* capsule =
PyCapsule_New(callback, PGF_MORPHOCALLBACK_NAME,
pypgf_morphocallback_destructor);
if (capsule == NULL) {
Py_DECREF(pyres);
return NULL;
}
#if PY_MAJOR_VERSION >= 3
PyObject* bytes = PyUnicode_AsUTF8String(py_sent);
if (!bytes)
return NULL;
GuString sent = PyBytes_AsString(bytes);
if (!sent) {
Py_DECREF(bytes);
return NULL;
}
#else
GuString sent = PyString_AsString(py_sent);
if (!sent)
return NULL;
#endif
pyres->container =
#if PY_MAJOR_VERSION >= 3
PyTuple_Pack(3, capsule, py_sent, bytes);
Py_DECREF(bytes);
#else
PyTuple_Pack(2, capsule, py_sent);
#endif
pyres->max_count = -1;
pyres->counter = 0;
pyres->fetch = Iter_fetch_cohort;
Py_DECREF(capsule);
GuExn* err = gu_new_exn(pyres->pool);
pyres->res = pgf_lookup_cohorts(self->concr, sent,
&callback->fn, pyres->pool, err);
if (pyres->res == NULL) {
Py_DECREF(pyres);
return NULL;
}
return (PyObject*) pyres;
}
static PyObject*
Iter_fetch_fullform(IterObject* self)
{
@@ -2445,9 +2649,9 @@ Concr_fullFormLexicon(ConcrObject* self, PyObject *args)
static PyObject*
Concr_load(ConcrObject* self, PyObject *args)
{
const char *fpath;
if (!PyArg_ParseTuple(args, "s", &fpath))
return NULL;
const char *fpath;
if (!PyArg_ParseTuple(args, "s", &fpath))
return NULL;
GuPool* tmp_pool = gu_local_pool();
@@ -2517,7 +2721,7 @@ static PyMethodDef Concr_methods[] = {
{"parse", (PyCFunction)Concr_parse, METH_VARARGS | METH_KEYWORDS,
"Parses a string and returns an iterator over the abstract trees for this sentence\n\n"
"Named arguments:\n"
"- sentence (string) or tokens (list of strings)\n"
"- sentence (string)\n"
"- cat (string); OPTIONAL, default: the startcat of the grammar\n"
"- n (int), max. trees; OPTIONAL, default: extract all trees\n"
"- heuristics (double >= 0.0); OPTIONAL, default: taken from the flags in the grammar\n"
@@ -2560,6 +2764,9 @@ static PyMethodDef Concr_methods[] = {
{"lookupMorpho", (PyCFunction)Concr_lookupMorpho, METH_VARARGS,
"Looks up a word in the lexicon of the grammar"
},
{"lookupCohorts", (PyCFunction)Concr_lookupCohorts, METH_VARARGS,
"Takes a sentence and returns all matches for lexical items from the grammar in that sentence"
},
{"fullFormLexicon", (PyCFunction)Concr_fullFormLexicon, METH_VARARGS,
"Enumerates all words in the lexicon (useful for extracting full form lexicons)"
},
@@ -2850,8 +3057,8 @@ static PyObject*
PGF_functionsByCat(PGFObject* self, PyObject *args)
{
PgfCId catname;
if (!PyArg_ParseTuple(args, "s", &catname))
return NULL;
if (!PyArg_ParseTuple(args, "s", &catname))
return NULL;
PyObject* functions = PyList_New(0);
if (functions == NULL) {
@@ -2907,9 +3114,9 @@ PGF_generateAll(PGFObject* self, PyObject *args, PyObject *keywds)
PyObject* start = NULL;
int max_count = -1;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "O|i", kwlist,
&start, &max_count))
return NULL;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "O|i", kwlist,
&start, &max_count))
return NULL;
IterObject* pyres = (IterObject*)
pgf_IterType.tp_alloc(&pgf_IterType, 0);
@@ -3171,12 +3378,12 @@ static PyObject*
PGF_embed(PGFObject* self, PyObject *args)
{
PgfCId modname;
if (!PyArg_ParseTuple(args, "s", &modname))
return NULL;
if (!PyArg_ParseTuple(args, "s", &modname))
return NULL;
PyObject *m = PyImport_AddModule(modname);
if (m == NULL)
return NULL;
PyObject *m = PyImport_AddModule(modname);
if (m == NULL)
return NULL;
GuPool* tmp_pool = gu_local_pool();
@@ -3304,9 +3511,9 @@ static PyTypeObject pgf_PGFType = {
static PGFObject*
pgf_readPGF(PyObject *self, PyObject *args)
{
const char *fpath;
if (!PyArg_ParseTuple(args, "s", &fpath))
return NULL;
const char *fpath;
if (!PyArg_ParseTuple(args, "s", &fpath))
return NULL;
PGFObject* py_pgf = (PGFObject*) pgf_PGFType.tp_alloc(&pgf_PGFType, 0);
py_pgf->pool = gu_new_pool();
@@ -3337,9 +3544,9 @@ pgf_readPGF(PyObject *self, PyObject *args)
static ExprObject*
pgf_readExpr(PyObject *self, PyObject *args) {
Py_ssize_t len;
const uint8_t *buf;
if (!PyArg_ParseTuple(args, "s#", &buf, &len))
return NULL;
const uint8_t *buf;
if (!PyArg_ParseTuple(args, "s#", &buf, &len))
return NULL;
ExprObject* pyexpr = (ExprObject*) pgf_ExprType.tp_alloc(&pgf_ExprType, 0);
if (pyexpr == NULL)
@@ -3367,9 +3574,9 @@ pgf_readExpr(PyObject *self, PyObject *args) {
static TypeObject*
pgf_readType(PyObject *self, PyObject *args) {
Py_ssize_t len;
const uint8_t *buf;
if (!PyArg_ParseTuple(args, "s#", &buf, &len))
return NULL;
const uint8_t *buf;
if (!PyArg_ParseTuple(args, "s#", &buf, &len))
return NULL;
TypeObject* pytype = (TypeObject*) pgf_TypeType.tp_alloc(&pgf_TypeType, 0);
if (pytype == NULL)
@@ -3424,20 +3631,23 @@ MOD_INIT(pgf)
{
PyObject *m;
if (PyType_Ready(&pgf_PGFType) < 0)
return MOD_ERROR_VAL;
if (PyType_Ready(&pgf_PGFType) < 0)
return MOD_ERROR_VAL;
if (PyType_Ready(&pgf_ConcrType) < 0)
return MOD_ERROR_VAL;
if (PyType_Ready(&pgf_ConcrType) < 0)
return MOD_ERROR_VAL;
if (PyType_Ready(&pgf_BracketType) < 0)
return MOD_ERROR_VAL;
if (PyType_Ready(&pgf_BracketType) < 0)
return MOD_ERROR_VAL;
if (PyType_Ready(&pgf_ExprType) < 0)
return MOD_ERROR_VAL;
if (PyType_Ready(&pgf_BINDType) < 0)
return MOD_ERROR_VAL;
if (PyType_Ready(&pgf_TypeType) < 0)
return MOD_ERROR_VAL;
if (PyType_Ready(&pgf_ExprType) < 0)
return MOD_ERROR_VAL;
if (PyType_Ready(&pgf_TypeType) < 0)
return MOD_ERROR_VAL;
if (PyType_Ready(&pgf_IterType) < 0)
return MOD_ERROR_VAL;
@@ -3467,10 +3677,20 @@ MOD_INIT(pgf)
PyModule_AddObject(m, "Type", (PyObject *) &pgf_TypeType);
Py_INCREF(&pgf_TypeType);
PyModule_AddObject(m, "PGF", (PyObject *) &pgf_PGFType);
Py_INCREF(&pgf_PGFType);
PyModule_AddObject(m, "Concr", (PyObject *) &pgf_ConcrType);
Py_INCREF(&pgf_ConcrType);
PyModule_AddObject(m, "Iter", (PyObject *) &pgf_IterType);
Py_INCREF(&pgf_IterType);
PyModule_AddObject(m, "Bracket", (PyObject *) &pgf_BracketType);
Py_INCREF(&pgf_BracketType);
PyModule_AddObject(m, "BIND", (PyObject *) &pgf_BINDType);
Py_INCREF(&pgf_BINDType);
return MOD_SUCCESS_VAL(m);
}

View File

@@ -17,7 +17,13 @@ pgf_module = Extension('pgf',
setup (name = 'pgf',
version = '1.0',
description = 'A binding to the PGF engine',
description = 'Python bindings to the Grammatical Framework\'s PGF runtime',
long_description="""\
Grammatical Framework (GF) is a programming language for multilingual grammar applications.
This package provides Python bindings to GF runtime, which allows you to \
parse and generate text using GF grammars compiled into the PGF format.
""",
url='https://www.grammaticalframework.org/',
author='Krasimir Angelov',
author_email='kr.angelov@gmail.com',
license='BSD',