1
0
forked from GitHub/gf-core

bring the Haskell binding a bit closer to the pure Haskell API

This commit is contained in:
krasimir
2017-01-26 12:48:22 +00:00
parent 0bbf27a5ed
commit 74b0df2b6b
15 changed files with 166 additions and 552 deletions

View File

@@ -92,19 +92,7 @@ libsg_la_SOURCES = \
sg/sg.c
libsg_la_LIBADD = libgu.la libpgf.la
bin_PROGRAMS = \
utils/pgf-print \
utils/pgf-translate \
utils/pgf-parse
utils_pgf_print_SOURCES = utils/pgf-print.c
utils_pgf_print_LDADD = libpgf.la libgu.la
utils_pgf_translate_SOURCES = utils/pgf-translate.c
utils_pgf_translate_LDADD = libpgf.la libgu.la
utils_pgf_parse_SOURCES = utils/pgf-parse.c
utils_pgf_parse_LDADD = libpgf.la libgu.la
bin_PROGRAMS =
AUTOMAKE_OPTIONS = foreign subdir-objects dist-bzip2
ACLOCAL_AMFLAGS = -I m4

View File

@@ -2106,16 +2106,16 @@ pgf_parsing_last_token(PgfParsing* ps, GuPool* pool)
}
GuEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, GuString sentence,
pgf_parse(PgfConcr* concr, PgfType* typ, GuString sentence,
GuExn* err,
GuPool* pool, GuPool* out_pool)
{
PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, out_pool);
return pgf_parse_with_heuristics(concr, cat, sentence, -1.0, callbacks, err, pool, out_pool);
return pgf_parse_with_heuristics(concr, typ, sentence, -1.0, callbacks, err, pool, out_pool);
}
GuEnum*
pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, GuString sentence,
pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence,
double heuristics,
PgfCallbacksMap* callbacks,
GuExn* err,
@@ -2132,7 +2132,7 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, GuString sentence,
// Begin parsing a sentence with the specified category
PgfParsing* ps =
pgf_parsing_init(concr, cat, 0, sentence, heuristics, callbacks, NULL, err, pool, out_pool);
pgf_parsing_init(concr, typ->cid, 0, sentence, heuristics, callbacks, NULL, err, pool, out_pool);
if (ps == NULL) {
return NULL;
}
@@ -2159,7 +2159,7 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, GuString sentence,
}
PgfExprEnum*
pgf_parse_with_oracle(PgfConcr* concr, PgfCId cat,
pgf_parse_with_oracle(PgfConcr* concr, PgfType* typ,
GuString sentence,
PgfOracleCallback* oracle,
GuExn* err,
@@ -2177,7 +2177,7 @@ pgf_parse_with_oracle(PgfConcr* concr, PgfCId cat,
// Begin parsing a sentence with the specified category
PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, out_pool);
PgfParsing* ps =
pgf_parsing_init(concr, cat, 0, sentence, -1, callbacks, oracle, err, pool, out_pool);
pgf_parsing_init(concr, typ->cid, 0, sentence, -1, callbacks, oracle, err, pool, out_pool);
if (ps == NULL) {
return NULL;
}
@@ -2223,7 +2223,7 @@ pgf_parser_completions_next(GuEnum* self, void* to, GuPool* pool)
}
GuEnum*
pgf_complete(PgfConcr* concr, PgfCId cat, GuString sentence,
pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence,
GuString prefix, GuExn *err, GuPool* pool)
{
if (concr->sequences == NULL ||
@@ -2239,7 +2239,7 @@ pgf_complete(PgfConcr* concr, PgfCId cat, GuString sentence,
PgfCallbacksMap* callbacks =
pgf_new_callbacks_map(concr, pool);
PgfParsing* ps =
pgf_parsing_init(concr, cat, 0, sentence, -1.0, callbacks, NULL, err, pool, pool);
pgf_parsing_init(concr, type->cid, 0, sentence, -1.0, callbacks, NULL, err, pool, pool);
if (ps == NULL) {
return NULL;
}

View File

@@ -129,7 +129,7 @@ static PgfLinFuncs pgf_metrics_lin_funcs2 = {
};
bool
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfType* type,
double *precision, double *recall, double *exact)
{
GuPool* pool = gu_new_pool();
@@ -174,7 +174,7 @@ pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
gu_string_buf_freeze(sbuf, pool);
GuEnum* en_trees =
pgf_parse(concr, cat, sentence,
pgf_parse(concr, type, sentence,
state.err, pool, pool);
PgfExprProb* ep = gu_next(en_trees, PgfExprProb*, pool);
if (ep == NULL) {

View File

@@ -86,24 +86,35 @@ pgf_iter_categories(PgfPGF* pgf, GuMapItor* itor, GuExn* err)
}
}
PgfCId
pgf_start_cat(PgfPGF* pgf)
PgfType*
pgf_start_cat(PgfPGF* pgf, GuPool* pool)
{
PgfFlag* flag =
gu_seq_binsearch(pgf->abstract.aflags, pgf_flag_order, PgfFlag, "startcat");
if (flag == NULL)
return "S";
GuVariantInfo i = gu_variant_open(flag->value);
switch (i.tag) {
case PGF_LITERAL_STR: {
PgfLiteralStr *lstr = (PgfLiteralStr *) i.data;
return lstr->val;
}
if (flag != NULL) {
GuVariantInfo i = gu_variant_open(flag->value);
switch (i.tag) {
case PGF_LITERAL_STR: {
PgfLiteralStr *lstr = (PgfLiteralStr *) i.data;
GuPool* tmp_pool = gu_local_pool();
GuIn* in = gu_string_in(lstr->val,tmp_pool);
GuExn* err = gu_new_exn(tmp_pool);
PgfType *type = pgf_read_type(in, pool, err);
if (!gu_ok(err))
break;
gu_pool_free(tmp_pool);
return type;
}
}
}
return "S";
PgfType* type = gu_new_flex(pool, PgfType, exprs, 0);
type->hypos = gu_empty_seq();
type->cid = "S";
type->n_exprs = 0;
return type;
}
GuString

View File

@@ -53,8 +53,8 @@ pgf_language_code(PgfConcr* concr);
void
pgf_iter_categories(PgfPGF* pgf, GuMapItor* itor, GuExn* err);
PgfCId
pgf_start_cat(PgfPGF* pgf);
PgfType*
pgf_start_cat(PgfPGF* pgf, GuPool* pool);
void
pgf_iter_functions(PgfPGF* pgf, GuMapItor* itor, GuExn* err);
@@ -89,7 +89,7 @@ pgf_align_words(PgfConcr* concr, PgfExpr expr,
GuExn* err, GuPool* pool);
bool
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfType* type,
double *precision, double *recall, double *exact);
PgfExpr
@@ -97,11 +97,11 @@ pgf_compute(PgfPGF* pgf, PgfExpr expr, GuExn* err,
GuPool* pool, GuPool* out_pool);
PgfExprEnum*
pgf_generate_all(PgfPGF* pgf, PgfCId cat,
pgf_generate_all(PgfPGF* pgf, PgfType* ty,
GuExn* err, GuPool* pool, GuPool* out_pool);
PgfExprEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, GuString sentence,
pgf_parse(PgfConcr* concr, PgfType* typ, GuString sentence,
GuExn* err, GuPool* pool, GuPool* out_pool);
typedef struct PgfMorphoCallback PgfMorphoCallback;
@@ -134,7 +134,7 @@ pgf_lookup_word_prefix(PgfConcr *concr, GuString prefix,
typedef GuMap PgfCallbacksMap;
PgfExprEnum*
pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat,
pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ,
GuString sentence, double heuristics,
PgfCallbacksMap* callbacks,
GuExn* err,
@@ -159,7 +159,7 @@ struct PgfOracleCallback {
};
PgfExprEnum*
pgf_parse_with_oracle(PgfConcr* concr, PgfCId cat,
pgf_parse_with_oracle(PgfConcr* concr, PgfType* typ,
GuString sentence,
PgfOracleCallback* oracle,
GuExn* err,
@@ -172,7 +172,7 @@ typedef struct {
} PgfTokenProb;
GuEnum*
pgf_complete(PgfConcr* concr, PgfCId cat, GuString string,
pgf_complete(PgfConcr* concr, PgfType* type, GuString string,
GuString prefix, GuExn* err, GuPool* pool);
typedef struct PgfLiteralCallback PgfLiteralCallback;

View File

@@ -454,7 +454,7 @@ pgf_new_reasoner(PgfPGF* pgf, GuExn* err, GuPool* pool, GuPool* out_pool)
}
PgfExprEnum*
pgf_generate_all(PgfPGF* pgf, PgfCId cat, GuExn* err, GuPool* pool, GuPool* out_pool)
pgf_generate_all(PgfPGF* pgf, PgfType* typ, GuExn* err, GuPool* pool, GuPool* out_pool)
{
PgfReasoner* rs = pgf_new_reasoner(pgf, err, pool, out_pool);
@@ -462,9 +462,9 @@ pgf_generate_all(PgfPGF* pgf, PgfCId cat, GuExn* err, GuPool* pool, GuPool* out_
answers->parents = gu_new_buf(PgfExprState*, rs->pool);
answers->exprs = rs->exprs;
answers->outside_prob = 0;
gu_map_put(rs->table, cat, PgfAnswers*, answers);
gu_map_put(rs->table, typ->cid, PgfAnswers*, answers);
PgfAbsCat* abscat = gu_seq_binsearch(rs->abstract->cats, pgf_abscat_order, PgfAbsCat, cat);
PgfAbsCat* abscat = gu_seq_binsearch(rs->abstract->cats, pgf_abscat_order, PgfAbsCat, typ->cid);
if (abscat != NULL) {
rs->start = gu_new(PgfClosure, rs->pool);
rs->start->code = abscat->predicate;

View File

@@ -1,133 +0,0 @@
#include <gu/variant.h>
#include <gu/map.h>
#include <gu/enum.h>
#include <gu/file.h>
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <pgf/literals.h>
#include <pgf/linearizer.h>
#include <pgf/expr.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#include <time.h>
int main(int argc, char* argv[]) {
// Set the character locale, so we can produce proper output.
setlocale(LC_CTYPE, "");
// Create the pool that is used to allocate everything
GuPool* pool = gu_new_pool();
int status = EXIT_SUCCESS;
if (argc < 4 || argc > 5) {
fprintf(stderr, "usage: %s pgf-file start-cat cnc-lang [heuristics]\n(0.0 <= heuristics < 1.0, default: 0.95)\n", argv[0]);
status = EXIT_FAILURE;
goto fail;
}
char* filename = argv[1];
GuString cat = argv[2];
GuString lang = argv[3];
double heuristics = 0.95;
if (argc == 5) {
heuristics = atof(argv[4]);
}
// Create an exception frame that catches all errors.
GuExn* err = gu_new_exn(pool);
clock_t start = clock();
// Read the PGF grammar.
PgfPGF* pgf = pgf_read(filename, pool, err);
// If an error occured, it shows in the exception frame
if (!gu_ok(err)) {
fprintf(stderr, "Reading PGF failed\n");
status = EXIT_FAILURE;
goto fail;
}
// Look up the source and destination concrete categories
PgfConcr* concr = pgf_get_language(pgf, lang);
if (!concr) {
fprintf(stderr, "Unknown language\n");
status = EXIT_FAILURE;
goto fail;
}
clock_t end = clock();
double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
fprintf(stderr, "(%.0f ms) Ready to parse [heuristics=%.2f]!\n", 1000.0 * cpu_time_used, heuristics);
// Create an output stream for stdout
GuOut* out = gu_file_out(stdout, pool);
// We will keep the latest results in the 'ppool' and
// we will iterate over them by using 'result'.
GuPool* ppool = NULL;
// The interactive PARSING loop.
// XXX: This currently reads stdin directly, so it doesn't support
// encodings properly. TODO: use a locale reader for input
for (int ctr = 0; true; ctr++) {
// We release the last results
if (ppool != NULL) {
gu_pool_free(ppool);
ppool = NULL;
}
/* fprintf(stdout, "> "); */
/* fflush(stdout); */
char buf[4096];
char* line = fgets(buf, sizeof(buf), stdin);
if (line == NULL) {
if (ferror(stdin)) {
fprintf(stderr, "Input error\n");
status = EXIT_FAILURE;
}
break;
} else if (strcmp(line, "") == 0) {
// End nicely on empty input
break;
} else if (strcmp(line, "\n") == 0) {
// Empty line -> skip
continue;
}
// We create a temporary pool for translating a single
// sentence, so our memory usage doesn't increase over time.
ppool = gu_new_pool();
clock_t start = clock();
GuExn* parse_err = gu_new_exn(ppool);
PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, ppool);
GuEnum* result = pgf_parse_with_heuristics(concr, cat, line, heuristics, callbacks, parse_err, ppool, ppool);
PgfExprProb* ep = NULL;
if (gu_ok(parse_err))
ep = gu_next(result, PgfExprProb*, ppool);
clock_t end = clock();
double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
gu_printf(out, err, "%d (%.0f ms): ", ctr, 1000.0 * cpu_time_used);
if (ep != NULL) {
gu_printf(out, err, "[%.4f] (", ep->prob);
pgf_print_expr(ep->expr, NULL, 0, out, err);
gu_printf(out, err, ")\n");
} else {
gu_printf(out, err, "---\n");
}
gu_out_flush(out, err);
}
fail:
gu_pool_free(pool);
return status;
}

View File

@@ -1,36 +0,0 @@
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <gu/file.h>
#include <gu/utf8.h>
#include <locale.h>
#include <stdlib.h>
int main(int argc, char* argv[]) {
// Set the character locale, so we can produce proper output.
setlocale(LC_CTYPE, "");
if (argc != 2) {
fprintf(stderr, "usage: %s pgf\n", argv[0]);
return EXIT_FAILURE;
}
char* filename = argv[1];
GuPool* pool = gu_new_pool();
GuExn* err = gu_exn(pool);
PgfPGF* pgf = pgf_read(filename, pool, err);
int status = 0;
if (!gu_ok(err)) {
fprintf(stderr, "Reading PGF failed\n");
status = 1;
goto fail_read;
}
GuOut* out = gu_file_out(stdout, pool);
pgf_print(pgf, out, err);
gu_out_flush(out, err);
fail_read:
gu_pool_free(pool);
return status;
}

View File

@@ -1,203 +0,0 @@
#include <gu/variant.h>
#include <gu/map.h>
#include <gu/enum.h>
#include <gu/file.h>
#include <gu/exn.h>
#include <pgf/pgf.h>
#include <pgf/literals.h>
#include <pgf/linearizer.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#include <time.h>
static void
print_result(PgfExprProb* ep, PgfConcr* to_concr,
GuOut* out, GuExn* err, GuPool* ppool)
{
// Write out the abstract syntax tree
gu_printf(out, err, " [%f] ", ep->prob);
pgf_print_expr(ep->expr, NULL, 0, out, err);
gu_putc('\n', out, err);
// Enumerate the concrete syntax trees corresponding
// to the abstract tree.
GuEnum* cts = pgf_lzr_concretize(to_concr, ep->expr, err, ppool);
while (true) {
PgfCncTree ctree =
gu_next(cts, PgfCncTree, ppool);
if (gu_variant_is_null(ctree)) {
break;
}
gu_putc(' ', out, err);
// Linearize the concrete tree as a simple
// sequence of strings.
pgf_lzr_linearize_simple(to_concr, ctree, 0, out, err, ppool);
if (gu_exn_caught(err, PgfLinNonExist)) {
// encountered nonExist. Unfortunately there
// might be some output printed already. The
// right solution should be to use GuStringBuf.
gu_exn_clear(err);
}
gu_putc('\n', out, err);
gu_out_flush(out, err);
}
}
int main(int argc, char* argv[]) {
// Set the character locale, so we can produce proper output.
setlocale(LC_CTYPE, "");
// Create the pool that is used to allocate everything
GuPool* pool = gu_new_pool();
int status = EXIT_SUCCESS;
if (argc < 5) {
fprintf(stderr, "usage: %s pgf cat from-lang to-lang\n", argv[0]);
status = EXIT_FAILURE;
goto fail;
}
GuString filename = argv[1];
GuString cat = argv[2];
GuString from_lang = argv[3];
GuString to_lang = argv[4];
// Create an exception frame that catches all errors.
GuExn* err = gu_new_exn(pool);
// Read the PGF grammar.
PgfPGF* pgf = pgf_read(filename, pool, err);
// If an error occured, it shows in the exception frame
if (!gu_ok(err)) {
fprintf(stderr, "Reading PGF failed\n");
status = EXIT_FAILURE;
goto fail;
}
// Look up the source and destination concrete categories
PgfConcr* from_concr = pgf_get_language(pgf, from_lang);
PgfConcr* to_concr = pgf_get_language(pgf, to_lang);
if (!from_concr || !to_concr) {
fprintf(stderr, "Unknown language\n");
status = EXIT_FAILURE;
goto fail_concr;
}
// Register a callback for the literal category Symbol
PgfCallbacksMap* callbacks =
pgf_new_callbacks_map(from_concr, pool);
pgf_callbacks_map_add_literal(from_concr, callbacks,
"PN", &pgf_nerc_literal_callback);
pgf_callbacks_map_add_literal(from_concr, callbacks,
"Symb", &pgf_unknown_literal_callback);
// Create an output stream for stdout
GuOut* out = gu_file_out(stdout, pool);
// We will keep the latest results in the 'ppool' and
// we will iterate over them by using 'result'.
GuPool* ppool = NULL;
GuEnum* result = NULL;
// The interactive translation loop.
// XXX: This currently reads stdin directly, so it doesn't support
// encodings properly. TODO: use a locale reader for input
while (true) {
fprintf(stdout, "> ");
fflush(stdout);
char buf[4096];
char* line = fgets(buf, sizeof(buf), stdin);
if (line == NULL) {
if (ferror(stdin)) {
fprintf(stderr, "Input error\n");
status = EXIT_FAILURE;
}
break;
} else if (strcmp(line, "") == 0) {
// End nicely on empty input
break;
} else if (strcmp(line, "\n") == 0) {
// Empty line -> show the next tree for the last sentence
if (result != NULL) {
clock_t start = clock();
PgfExprProb* ep = gu_next(result, PgfExprProb*, ppool);
clock_t end = clock();
double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("%.2f sec\n", cpu_time_used);
// The enumerator will return a null variant at the
// end of the results.
if (ep == NULL) {
goto fail_parse;
}
print_result(ep, to_concr, out, err, ppool);
}
continue;
}
// We release the last results
if (ppool != NULL) {
gu_pool_free(ppool);
ppool = NULL;
result = NULL;
}
// We create a temporary pool for translating a single
// sentence, so our memory usage doesn't increase over time.
ppool = gu_new_pool();
clock_t start = clock();
GuExn* parse_err = gu_new_exn(ppool);
result =
pgf_parse_with_heuristics(from_concr, cat, line,
-1, callbacks,
parse_err, ppool, ppool);
if (!gu_ok(parse_err)) {
if (gu_exn_caught(parse_err, PgfExn)) {
GuString msg = gu_exn_caught_data(parse_err);
gu_string_write(msg, out, err);
gu_putc('\n', out, err);
} else if (gu_exn_caught(parse_err, PgfParseError)) {
gu_puts("Unexpected token: \"", out, err);
GuString tok = gu_exn_caught_data(parse_err);
gu_string_write(tok, out, err);
gu_puts("\"\n", out, err);
}
goto fail_parse;
}
PgfExprProb* ep = gu_next(result, PgfExprProb*, ppool);
clock_t end = clock();
double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("%.2f sec\n", cpu_time_used);
// The enumerator will return null at the end of the results.
if (ep == NULL) {
goto fail_parse;
}
print_result(ep, to_concr, out, err, ppool);
continue;
fail_parse:
// Free all resources allocated during parsing and linearization
gu_pool_free(ppool);
ppool = NULL;
result = NULL;
}
fail_concr:
fail:
gu_pool_free(pool);
return status;
}