From 3467a54965e532c07d43784609ddc05b54e958e5 Mon Sep 17 00:00:00 2001 From: krasimir Date: Thu, 26 Jan 2017 12:48:22 +0000 Subject: [PATCH] bring the Haskell binding a bit closer to the pure Haskell API --- src/runtime/c/Makefile.am | 14 +- src/runtime/c/pgf/parser.c | 16 +- src/runtime/c/pgf/parseval.c | 4 +- src/runtime/c/pgf/pgf.c | 35 ++-- src/runtime/c/pgf/pgf.h | 16 +- src/runtime/c/pgf/reasoner.c | 6 +- src/runtime/c/utils/pgf-parse.c | 133 --------------- src/runtime/c/utils/pgf-print.c | 36 ---- src/runtime/c/utils/pgf-translate.c | 203 ----------------------- src/runtime/haskell-bind/PGF2.hsc | 58 ++----- src/runtime/haskell-bind/PGF2/Expr.hsc | 95 +++-------- src/runtime/haskell-bind/PGF2/FFI.hs | 19 ++- src/runtime/haskell-bind/pgf2-bind.cabal | 2 +- src/runtime/java/jpgf.c | 27 ++- src/runtime/python/pypgf.c | 54 ++++-- 15 files changed, 166 insertions(+), 552 deletions(-) delete mode 100644 src/runtime/c/utils/pgf-parse.c delete mode 100644 src/runtime/c/utils/pgf-print.c delete mode 100644 src/runtime/c/utils/pgf-translate.c diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am index af377ab62..a429f5f92 100644 --- a/src/runtime/c/Makefile.am +++ b/src/runtime/c/Makefile.am @@ -92,19 +92,7 @@ libsg_la_SOURCES = \ sg/sg.c libsg_la_LIBADD = libgu.la libpgf.la -bin_PROGRAMS = \ - utils/pgf-print \ - utils/pgf-translate \ - utils/pgf-parse - -utils_pgf_print_SOURCES = utils/pgf-print.c -utils_pgf_print_LDADD = libpgf.la libgu.la - -utils_pgf_translate_SOURCES = utils/pgf-translate.c -utils_pgf_translate_LDADD = libpgf.la libgu.la - -utils_pgf_parse_SOURCES = utils/pgf-parse.c -utils_pgf_parse_LDADD = libpgf.la libgu.la +bin_PROGRAMS = AUTOMAKE_OPTIONS = foreign subdir-objects dist-bzip2 ACLOCAL_AMFLAGS = -I m4 diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 8843a5f37..fb2fbfc22 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -2106,16 +2106,16 @@ pgf_parsing_last_token(PgfParsing* ps, GuPool* pool) } GuEnum* -pgf_parse(PgfConcr* concr, PgfCId cat, GuString sentence, +pgf_parse(PgfConcr* concr, PgfType* typ, GuString sentence, GuExn* err, GuPool* pool, GuPool* out_pool) { PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, out_pool); - return pgf_parse_with_heuristics(concr, cat, sentence, -1.0, callbacks, err, pool, out_pool); + return pgf_parse_with_heuristics(concr, typ, sentence, -1.0, callbacks, err, pool, out_pool); } GuEnum* -pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, GuString sentence, +pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence, double heuristics, PgfCallbacksMap* callbacks, GuExn* err, @@ -2132,7 +2132,7 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, GuString sentence, // Begin parsing a sentence with the specified category PgfParsing* ps = - pgf_parsing_init(concr, cat, 0, sentence, heuristics, callbacks, NULL, err, pool, out_pool); + pgf_parsing_init(concr, typ->cid, 0, sentence, heuristics, callbacks, NULL, err, pool, out_pool); if (ps == NULL) { return NULL; } @@ -2159,7 +2159,7 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, GuString sentence, } PgfExprEnum* -pgf_parse_with_oracle(PgfConcr* concr, PgfCId cat, +pgf_parse_with_oracle(PgfConcr* concr, PgfType* typ, GuString sentence, PgfOracleCallback* oracle, GuExn* err, @@ -2177,7 +2177,7 @@ pgf_parse_with_oracle(PgfConcr* concr, PgfCId cat, // Begin parsing a sentence with the specified category PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, out_pool); PgfParsing* ps = - pgf_parsing_init(concr, cat, 0, sentence, -1, callbacks, oracle, err, pool, out_pool); + pgf_parsing_init(concr, typ->cid, 0, sentence, -1, callbacks, oracle, err, pool, out_pool); if (ps == NULL) { return NULL; } @@ -2223,7 +2223,7 @@ pgf_parser_completions_next(GuEnum* self, void* to, GuPool* pool) } GuEnum* -pgf_complete(PgfConcr* concr, PgfCId cat, GuString sentence, +pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence, GuString prefix, GuExn *err, GuPool* pool) { if (concr->sequences == NULL || @@ -2239,7 +2239,7 @@ pgf_complete(PgfConcr* concr, PgfCId cat, GuString sentence, PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, pool); PgfParsing* ps = - pgf_parsing_init(concr, cat, 0, sentence, -1.0, callbacks, NULL, err, pool, pool); + pgf_parsing_init(concr, type->cid, 0, sentence, -1.0, callbacks, NULL, err, pool, pool); if (ps == NULL) { return NULL; } diff --git a/src/runtime/c/pgf/parseval.c b/src/runtime/c/pgf/parseval.c index cbea3d429..7ef41577d 100644 --- a/src/runtime/c/pgf/parseval.c +++ b/src/runtime/c/pgf/parseval.c @@ -129,7 +129,7 @@ static PgfLinFuncs pgf_metrics_lin_funcs2 = { }; bool -pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat, +pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfType* type, double *precision, double *recall, double *exact) { GuPool* pool = gu_new_pool(); @@ -174,7 +174,7 @@ pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat, gu_string_buf_freeze(sbuf, pool); GuEnum* en_trees = - pgf_parse(concr, cat, sentence, + pgf_parse(concr, type, sentence, state.err, pool, pool); PgfExprProb* ep = gu_next(en_trees, PgfExprProb*, pool); if (ep == NULL) { diff --git a/src/runtime/c/pgf/pgf.c b/src/runtime/c/pgf/pgf.c index 370b9411b..fe9c1d140 100644 --- a/src/runtime/c/pgf/pgf.c +++ b/src/runtime/c/pgf/pgf.c @@ -86,24 +86,35 @@ pgf_iter_categories(PgfPGF* pgf, GuMapItor* itor, GuExn* err) } } -PgfCId -pgf_start_cat(PgfPGF* pgf) +PgfType* +pgf_start_cat(PgfPGF* pgf, GuPool* pool) { PgfFlag* flag = gu_seq_binsearch(pgf->abstract.aflags, pgf_flag_order, PgfFlag, "startcat"); - if (flag == NULL) - return "S"; - - GuVariantInfo i = gu_variant_open(flag->value); - switch (i.tag) { - case PGF_LITERAL_STR: { - PgfLiteralStr *lstr = (PgfLiteralStr *) i.data; - return lstr->val; - } + if (flag != NULL) { + GuVariantInfo i = gu_variant_open(flag->value); + switch (i.tag) { + case PGF_LITERAL_STR: { + PgfLiteralStr *lstr = (PgfLiteralStr *) i.data; + + GuPool* tmp_pool = gu_local_pool(); + GuIn* in = gu_string_in(lstr->val,tmp_pool); + GuExn* err = gu_new_exn(tmp_pool); + PgfType *type = pgf_read_type(in, pool, err); + if (!gu_ok(err)) + break; + gu_pool_free(tmp_pool); + return type; + } + } } - return "S"; + PgfType* type = gu_new_flex(pool, PgfType, exprs, 0); + type->hypos = gu_empty_seq(); + type->cid = "S"; + type->n_exprs = 0; + return type; } GuString diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index 365d20d73..e5679a5e6 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -53,8 +53,8 @@ pgf_language_code(PgfConcr* concr); void pgf_iter_categories(PgfPGF* pgf, GuMapItor* itor, GuExn* err); -PgfCId -pgf_start_cat(PgfPGF* pgf); +PgfType* +pgf_start_cat(PgfPGF* pgf, GuPool* pool); void pgf_iter_functions(PgfPGF* pgf, GuMapItor* itor, GuExn* err); @@ -89,7 +89,7 @@ pgf_align_words(PgfConcr* concr, PgfExpr expr, GuExn* err, GuPool* pool); bool -pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat, +pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfType* type, double *precision, double *recall, double *exact); PgfExpr @@ -97,11 +97,11 @@ pgf_compute(PgfPGF* pgf, PgfExpr expr, GuExn* err, GuPool* pool, GuPool* out_pool); PgfExprEnum* -pgf_generate_all(PgfPGF* pgf, PgfCId cat, +pgf_generate_all(PgfPGF* pgf, PgfType* ty, GuExn* err, GuPool* pool, GuPool* out_pool); PgfExprEnum* -pgf_parse(PgfConcr* concr, PgfCId cat, GuString sentence, +pgf_parse(PgfConcr* concr, PgfType* typ, GuString sentence, GuExn* err, GuPool* pool, GuPool* out_pool); typedef struct PgfMorphoCallback PgfMorphoCallback; @@ -134,7 +134,7 @@ pgf_lookup_word_prefix(PgfConcr *concr, GuString prefix, typedef GuMap PgfCallbacksMap; PgfExprEnum* -pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, +pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence, double heuristics, PgfCallbacksMap* callbacks, GuExn* err, @@ -159,7 +159,7 @@ struct PgfOracleCallback { }; PgfExprEnum* -pgf_parse_with_oracle(PgfConcr* concr, PgfCId cat, +pgf_parse_with_oracle(PgfConcr* concr, PgfType* typ, GuString sentence, PgfOracleCallback* oracle, GuExn* err, @@ -172,7 +172,7 @@ typedef struct { } PgfTokenProb; GuEnum* -pgf_complete(PgfConcr* concr, PgfCId cat, GuString string, +pgf_complete(PgfConcr* concr, PgfType* type, GuString string, GuString prefix, GuExn* err, GuPool* pool); typedef struct PgfLiteralCallback PgfLiteralCallback; diff --git a/src/runtime/c/pgf/reasoner.c b/src/runtime/c/pgf/reasoner.c index 75f7ee0c6..5d604a4cc 100644 --- a/src/runtime/c/pgf/reasoner.c +++ b/src/runtime/c/pgf/reasoner.c @@ -454,7 +454,7 @@ pgf_new_reasoner(PgfPGF* pgf, GuExn* err, GuPool* pool, GuPool* out_pool) } PgfExprEnum* -pgf_generate_all(PgfPGF* pgf, PgfCId cat, GuExn* err, GuPool* pool, GuPool* out_pool) +pgf_generate_all(PgfPGF* pgf, PgfType* typ, GuExn* err, GuPool* pool, GuPool* out_pool) { PgfReasoner* rs = pgf_new_reasoner(pgf, err, pool, out_pool); @@ -462,9 +462,9 @@ pgf_generate_all(PgfPGF* pgf, PgfCId cat, GuExn* err, GuPool* pool, GuPool* out_ answers->parents = gu_new_buf(PgfExprState*, rs->pool); answers->exprs = rs->exprs; answers->outside_prob = 0; - gu_map_put(rs->table, cat, PgfAnswers*, answers); + gu_map_put(rs->table, typ->cid, PgfAnswers*, answers); - PgfAbsCat* abscat = gu_seq_binsearch(rs->abstract->cats, pgf_abscat_order, PgfAbsCat, cat); + PgfAbsCat* abscat = gu_seq_binsearch(rs->abstract->cats, pgf_abscat_order, PgfAbsCat, typ->cid); if (abscat != NULL) { rs->start = gu_new(PgfClosure, rs->pool); rs->start->code = abscat->predicate; diff --git a/src/runtime/c/utils/pgf-parse.c b/src/runtime/c/utils/pgf-parse.c deleted file mode 100644 index 088fe409d..000000000 --- a/src/runtime/c/utils/pgf-parse.c +++ /dev/null @@ -1,133 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int main(int argc, char* argv[]) { - // Set the character locale, so we can produce proper output. - setlocale(LC_CTYPE, ""); - - // Create the pool that is used to allocate everything - GuPool* pool = gu_new_pool(); - int status = EXIT_SUCCESS; - if (argc < 4 || argc > 5) { - fprintf(stderr, "usage: %s pgf-file start-cat cnc-lang [heuristics]\n(0.0 <= heuristics < 1.0, default: 0.95)\n", argv[0]); - status = EXIT_FAILURE; - goto fail; - } - char* filename = argv[1]; - GuString cat = argv[2]; - GuString lang = argv[3]; - - double heuristics = 0.95; - if (argc == 5) { - heuristics = atof(argv[4]); - } - - // Create an exception frame that catches all errors. - GuExn* err = gu_new_exn(pool); - - - clock_t start = clock(); - - // Read the PGF grammar. - PgfPGF* pgf = pgf_read(filename, pool, err); - - // If an error occured, it shows in the exception frame - if (!gu_ok(err)) { - fprintf(stderr, "Reading PGF failed\n"); - status = EXIT_FAILURE; - goto fail; - } - - // Look up the source and destination concrete categories - PgfConcr* concr = pgf_get_language(pgf, lang); - if (!concr) { - fprintf(stderr, "Unknown language\n"); - status = EXIT_FAILURE; - goto fail; - } - - clock_t end = clock(); - double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; - - fprintf(stderr, "(%.0f ms) Ready to parse [heuristics=%.2f]!\n", 1000.0 * cpu_time_used, heuristics); - - // Create an output stream for stdout - GuOut* out = gu_file_out(stdout, pool); - - // We will keep the latest results in the 'ppool' and - // we will iterate over them by using 'result'. - GuPool* ppool = NULL; - - // The interactive PARSING loop. - // XXX: This currently reads stdin directly, so it doesn't support - // encodings properly. TODO: use a locale reader for input - for (int ctr = 0; true; ctr++) { - // We release the last results - if (ppool != NULL) { - gu_pool_free(ppool); - ppool = NULL; - } - - /* fprintf(stdout, "> "); */ - /* fflush(stdout); */ - char buf[4096]; - char* line = fgets(buf, sizeof(buf), stdin); - if (line == NULL) { - if (ferror(stdin)) { - fprintf(stderr, "Input error\n"); - status = EXIT_FAILURE; - } - break; - } else if (strcmp(line, "") == 0) { - // End nicely on empty input - break; - } else if (strcmp(line, "\n") == 0) { - // Empty line -> skip - continue; - } - - // We create a temporary pool for translating a single - // sentence, so our memory usage doesn't increase over time. - ppool = gu_new_pool(); - - clock_t start = clock(); - - GuExn* parse_err = gu_new_exn(ppool); - PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, ppool); - GuEnum* result = pgf_parse_with_heuristics(concr, cat, line, heuristics, callbacks, parse_err, ppool, ppool); - - PgfExprProb* ep = NULL; - if (gu_ok(parse_err)) - ep = gu_next(result, PgfExprProb*, ppool); - - clock_t end = clock(); - double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; - - gu_printf(out, err, "%d (%.0f ms): ", ctr, 1000.0 * cpu_time_used); - if (ep != NULL) { - gu_printf(out, err, "[%.4f] (", ep->prob); - pgf_print_expr(ep->expr, NULL, 0, out, err); - gu_printf(out, err, ")\n"); - } else { - gu_printf(out, err, "---\n"); - } - gu_out_flush(out, err); - } - - fail: - gu_pool_free(pool); - return status; -} - diff --git a/src/runtime/c/utils/pgf-print.c b/src/runtime/c/utils/pgf-print.c deleted file mode 100644 index 07b343a4d..000000000 --- a/src/runtime/c/utils/pgf-print.c +++ /dev/null @@ -1,36 +0,0 @@ -#include -#include - -#include -#include - -#include -#include - -int main(int argc, char* argv[]) { - // Set the character locale, so we can produce proper output. - setlocale(LC_CTYPE, ""); - - if (argc != 2) { - fprintf(stderr, "usage: %s pgf\n", argv[0]); - return EXIT_FAILURE; - } - char* filename = argv[1]; - - GuPool* pool = gu_new_pool(); - GuExn* err = gu_exn(pool); - PgfPGF* pgf = pgf_read(filename, pool, err); - int status = 0; - if (!gu_ok(err)) { - fprintf(stderr, "Reading PGF failed\n"); - status = 1; - goto fail_read; - } - GuOut* out = gu_file_out(stdout, pool); - pgf_print(pgf, out, err); - gu_out_flush(out, err); -fail_read: - gu_pool_free(pool); - return status; -} - diff --git a/src/runtime/c/utils/pgf-translate.c b/src/runtime/c/utils/pgf-translate.c deleted file mode 100644 index 00506e4cc..000000000 --- a/src/runtime/c/utils/pgf-translate.c +++ /dev/null @@ -1,203 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static void -print_result(PgfExprProb* ep, PgfConcr* to_concr, - GuOut* out, GuExn* err, GuPool* ppool) -{ - // Write out the abstract syntax tree - gu_printf(out, err, " [%f] ", ep->prob); - pgf_print_expr(ep->expr, NULL, 0, out, err); - gu_putc('\n', out, err); - - // Enumerate the concrete syntax trees corresponding - // to the abstract tree. - GuEnum* cts = pgf_lzr_concretize(to_concr, ep->expr, err, ppool); - while (true) { - PgfCncTree ctree = - gu_next(cts, PgfCncTree, ppool); - if (gu_variant_is_null(ctree)) { - break; - } - gu_putc(' ', out, err); - // Linearize the concrete tree as a simple - // sequence of strings. - pgf_lzr_linearize_simple(to_concr, ctree, 0, out, err, ppool); - - if (gu_exn_caught(err, PgfLinNonExist)) { - // encountered nonExist. Unfortunately there - // might be some output printed already. The - // right solution should be to use GuStringBuf. - gu_exn_clear(err); - } - gu_putc('\n', out, err); - gu_out_flush(out, err); - } -} - -int main(int argc, char* argv[]) { - // Set the character locale, so we can produce proper output. - setlocale(LC_CTYPE, ""); - - // Create the pool that is used to allocate everything - GuPool* pool = gu_new_pool(); - int status = EXIT_SUCCESS; - if (argc < 5) { - fprintf(stderr, "usage: %s pgf cat from-lang to-lang\n", argv[0]); - status = EXIT_FAILURE; - goto fail; - } - - GuString filename = argv[1]; - GuString cat = argv[2]; - GuString from_lang = argv[3]; - GuString to_lang = argv[4]; - - // Create an exception frame that catches all errors. - GuExn* err = gu_new_exn(pool); - - // Read the PGF grammar. - PgfPGF* pgf = pgf_read(filename, pool, err); - - // If an error occured, it shows in the exception frame - if (!gu_ok(err)) { - fprintf(stderr, "Reading PGF failed\n"); - status = EXIT_FAILURE; - goto fail; - } - - // Look up the source and destination concrete categories - PgfConcr* from_concr = pgf_get_language(pgf, from_lang); - PgfConcr* to_concr = pgf_get_language(pgf, to_lang); - if (!from_concr || !to_concr) { - fprintf(stderr, "Unknown language\n"); - status = EXIT_FAILURE; - goto fail_concr; - } - - // Register a callback for the literal category Symbol - PgfCallbacksMap* callbacks = - pgf_new_callbacks_map(from_concr, pool); - pgf_callbacks_map_add_literal(from_concr, callbacks, - "PN", &pgf_nerc_literal_callback); - pgf_callbacks_map_add_literal(from_concr, callbacks, - "Symb", &pgf_unknown_literal_callback); - - // Create an output stream for stdout - GuOut* out = gu_file_out(stdout, pool); - - // We will keep the latest results in the 'ppool' and - // we will iterate over them by using 'result'. - GuPool* ppool = NULL; - GuEnum* result = NULL; - - // The interactive translation loop. - // XXX: This currently reads stdin directly, so it doesn't support - // encodings properly. TODO: use a locale reader for input - while (true) { - fprintf(stdout, "> "); - fflush(stdout); - char buf[4096]; - char* line = fgets(buf, sizeof(buf), stdin); - if (line == NULL) { - if (ferror(stdin)) { - fprintf(stderr, "Input error\n"); - status = EXIT_FAILURE; - } - break; - } else if (strcmp(line, "") == 0) { - // End nicely on empty input - break; - } else if (strcmp(line, "\n") == 0) { - // Empty line -> show the next tree for the last sentence - - if (result != NULL) { - clock_t start = clock(); - - PgfExprProb* ep = gu_next(result, PgfExprProb*, ppool); - - clock_t end = clock(); - double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; - printf("%.2f sec\n", cpu_time_used); - - // The enumerator will return a null variant at the - // end of the results. - if (ep == NULL) { - goto fail_parse; - } - - print_result(ep, to_concr, out, err, ppool); - } - continue; - } - - // We release the last results - if (ppool != NULL) { - gu_pool_free(ppool); - ppool = NULL; - result = NULL; - } - - // We create a temporary pool for translating a single - // sentence, so our memory usage doesn't increase over time. - ppool = gu_new_pool(); - - clock_t start = clock(); - - GuExn* parse_err = gu_new_exn(ppool); - result = - pgf_parse_with_heuristics(from_concr, cat, line, - -1, callbacks, - parse_err, ppool, ppool); - if (!gu_ok(parse_err)) { - if (gu_exn_caught(parse_err, PgfExn)) { - GuString msg = gu_exn_caught_data(parse_err); - gu_string_write(msg, out, err); - gu_putc('\n', out, err); - } else if (gu_exn_caught(parse_err, PgfParseError)) { - gu_puts("Unexpected token: \"", out, err); - GuString tok = gu_exn_caught_data(parse_err); - gu_string_write(tok, out, err); - gu_puts("\"\n", out, err); - } - - goto fail_parse; - } - - PgfExprProb* ep = gu_next(result, PgfExprProb*, ppool); - - clock_t end = clock(); - double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; - printf("%.2f sec\n", cpu_time_used); - - // The enumerator will return null at the end of the results. - if (ep == NULL) { - goto fail_parse; - } - - print_result(ep, to_concr, out, err, ppool); - - continue; - fail_parse: - // Free all resources allocated during parsing and linearization - gu_pool_free(ppool); - ppool = NULL; - result = NULL; - } -fail_concr: -fail: - gu_pool_free(pool); - return status; -} - diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc index 5d0484c1e..a368d9ccd 100644 --- a/src/runtime/haskell-bind/PGF2.hsc +++ b/src/runtime/haskell-bind/PGF2.hsc @@ -38,7 +38,9 @@ module PGF2 (-- * PGF mkFloat,unFloat, mkMeta,unMeta, -- ** Types - Type(..), Hypo, BindType(..), startCat, showType, + Type, Hypo, BindType(..), startCat, + readType, showType, + mkType, unType, -- * Concrete syntax ConcName,Concr,languages, @@ -69,6 +71,7 @@ import Control.Monad(forM_) import System.IO.Unsafe(unsafePerformIO,unsafeInterleaveIO) import Text.PrettyPrint import PGF2.Expr +import PGF2.Type import PGF2.FFI import Foreign hiding ( Pool, newPool, unsafePerformIO ) @@ -141,13 +144,12 @@ languages p = -- all abstract syntax expressions of the given type. -- The expressions are ordered by their probability. generateAll :: PGF -> Type -> [(Expr,Float)] -generateAll p (DTyp _ cat _) = +generateAll p (Type ctype _) = unsafePerformIO $ do genPl <- gu_new_pool exprPl <- gu_new_pool - cat <- newUtf8CString cat genPl exn <- gu_new_exn genPl - enum <- pgf_generate_all (pgf p) cat exn genPl exprPl + enum <- pgf_generate_all (pgf p) ctype exn genPl exprPl genFPl <- newForeignPtr gu_pool_finalizer genPl exprFPl <- newForeignPtr gu_pool_finalizer exprPl fromPgfExprEnum enum genFPl (p,exprFPl) @@ -164,9 +166,9 @@ abstractName p = unsafePerformIO (peekUtf8CString =<< pgf_abstract_name (pgf p)) -- definition is just for convenience. startCat :: PGF -> Type startCat p = unsafePerformIO $ do - cat <- pgf_start_cat (pgf p) - cat <- peekUtf8CString cat - return (DTyp [] cat []) + typPl <- gu_new_pool + c_type <- pgf_start_cat (pgf p) typPl + return (Type c_type typPl) loadConcr :: Concr -> FilePath -> IO () loadConcr c fpath = @@ -199,36 +201,7 @@ functionType p fn = c_type <- pgf_function_type (pgf p) c_fn if c_type == nullPtr then throwIO (PGFError ("Function '"++fn++"' is not defined")) - else peekType c_type - where - peekType c_type = do - cid <- (#peek PgfType, cid) c_type >>= peekUtf8CString - c_hypos <- (#peek PgfType, hypos) c_type - n_hypos <- (#peek GuSeq, len) c_hypos - hs <- peekHypos (c_hypos `plusPtr` (#offset GuSeq, data)) 0 n_hypos - n_exprs <- (#peek PgfType, n_exprs) c_type - es <- peekExprs (c_type `plusPtr` (#offset PgfType, exprs)) 0 n_exprs - return (DTyp hs cid es) - - peekHypos :: Ptr a -> Int -> Int -> IO [Hypo] - peekHypos c_hypo i n - | i < n = do cid <- (#peek PgfHypo, cid) c_hypo >>= peekUtf8CString - ty <- (#peek PgfHypo, type) c_hypo >>= peekType - bt <- fmap toBindType ((#peek PgfHypo, bind_type) c_hypo) - hs <- peekHypos (plusPtr c_hypo (#size PgfHypo)) (i+1) n - return ((bt,cid,ty) : hs) - | otherwise = return [] - - toBindType :: CInt -> BindType - toBindType (#const PGF_BIND_TYPE_EXPLICIT) = Explicit - toBindType (#const PGF_BIND_TYPE_IMPLICIT) = Implicit - - peekExprs ptr i n - | i < n = do e <- peekElemOff ptr i - es <- peekExprs ptr (i+1) n - return (Expr e p : es) - | otherwise = return [] - + else return (Type c_type (pgfMaster p)) ----------------------------------------------------------------------------- -- Graphviz @@ -326,15 +299,14 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse -- If a literal has been recognized then the output should -- be Just (expr,probability,end_offset) -> Either String [(Expr,Float)] -parseWithHeuristics lang (DTyp _ cat _) sent heuristic callbacks = +parseWithHeuristics lang (Type ctype _) sent heuristic callbacks = unsafePerformIO $ do exprPl <- gu_new_pool parsePl <- gu_new_pool exn <- gu_new_exn parsePl - cat <- newUtf8CString cat parsePl sent <- newUtf8CString sent parsePl callbacks_map <- mkCallbacksMap (concr lang) callbacks parsePl - enum <- pgf_parse_with_heuristics (concr lang) cat sent heuristic callbacks_map exn parsePl exprPl + enum <- pgf_parse_with_heuristics (concr lang) ctype sent heuristic callbacks_map exn parsePl exprPl failed <- gu_exn_is_raised exn if failed then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError @@ -574,7 +546,7 @@ showBracketedString :: BracketedString -> String showBracketedString = render . ppBracketedString ppBracketedString (Leaf t) = text t -ppBracketedString (Bracket cat fid index _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss)) +ppBracketedString (Bracket cat fid index _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss)) -- | Extracts the sequence of tokens from the bracketed string flattenBracketedString :: BracketedString -> [String] @@ -657,7 +629,7 @@ functionsByCat p cat = -- with the \'cat\' keyword. categories :: PGF -> [Cat] categories pgf = -- !!! quick hack - nub [cat | f<-functions pgf, let DTyp _ cat _=functionType pgf f] + nub [cat | f<-functions pgf, let (_, cat, _) = unType (functionType pgf f)] categoryContext :: PGF -> Cat -> Maybe [Hypo] categoryContext pgf cat = Nothing -- !!! not implemented yet TODO @@ -729,7 +701,7 @@ nerc pgf (lang,concr) sentence lin_idx offset = Just (y,xs') -> (y:ys,xs'') where (ys,xs'') = consume munch xs' - functionCat f = case functionType pgf f of DTyp _ cat _ -> cat + functionCat f = case unType (functionType pgf f) of (_,cat,_) -> cat -- | Callback to parse arbitrary words as chunks (from -- ../java/org/grammaticalframework/pgf/UnknownLiteralCallback.java) diff --git a/src/runtime/haskell-bind/PGF2/Expr.hsc b/src/runtime/haskell-bind/PGF2/Expr.hsc index c18e97a13..84559e5a0 100644 --- a/src/runtime/haskell-bind/PGF2/Expr.hsc +++ b/src/runtime/haskell-bind/PGF2/Expr.hsc @@ -6,20 +6,22 @@ module PGF2.Expr where import System.IO.Unsafe(unsafePerformIO) import Foreign hiding (unsafePerformIO) import Foreign.C -import qualified Text.PrettyPrint as PP import PGF2.FFI -import Data.List(mapAccumL) -- | An data type that represents -- identifiers for functions and categories in PGF. type CId = String -ppCId = PP.text wildCId = "_" :: CId type Cat = CId -- ^ Name of syntactic category type Fun = CId -- ^ Name of function +data BindType = + Explicit + | Implicit + deriving Show + ----------------------------------------------------------------------------- -- Expressions @@ -177,19 +179,16 @@ readExpr str = unsafePerformIO $ do exprPl <- gu_new_pool withGuPool $ \tmpPl -> - withCString str $ \c_str -> - do guin <- gu_string_in c_str tmpPl - exn <- gu_new_exn tmpPl - c_expr <- pgf_read_expr guin exprPl exn - status <- gu_exn_is_raised exn - if (not status && c_expr /= nullPtr) - then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl - return $ Just (Expr c_expr exprFPl) - else do gu_pool_free exprPl - return Nothing - -ppExpr :: Int -> [CId] -> Expr -> PP.Doc -ppExpr d xs e = ppParens (d>0) (PP.text (showExpr xs e)) -- just a quick hack !!! + do c_str <- newUtf8CString str tmpPl + guin <- gu_string_in c_str tmpPl + exn <- gu_new_exn tmpPl + c_expr <- pgf_read_expr guin exprPl exn + status <- gu_exn_is_raised exn + if (not status && c_expr /= nullPtr) + then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl + return $ Just (Expr c_expr exprFPl) + else do gu_pool_free exprPl + return Nothing -- | renders an expression as a 'String'. The list -- of identifiers is the list of all free variables @@ -200,62 +199,16 @@ showExpr scope e = unsafePerformIO $ withGuPool $ \tmpPl -> do (sb,out) <- newOut tmpPl - let printCtxt = nullPtr + printCtxt <- newPrintCtxt scope tmpPl exn <- gu_new_exn tmpPl pgf_print_expr (expr e) printCtxt 1 out exn s <- gu_string_buf_freeze sb tmpPl - peekCString s + peekUtf8CString s - ------------------------------------------------------------------------------ --- Types - -data Type = - DTyp [Hypo] CId [Expr] - deriving Show - -data BindType = - Explicit - | Implicit - deriving Show - --- | 'Hypo' represents a hypothesis in a type i.e. in the type A -> B, A is the hypothesis -type Hypo = (BindType,CId,Type) - --- | renders type as 'String'. -showType :: Type -> String -showType = PP.render . ppType 0 [] - -ppType :: Int -> [CId] -> Type -> PP.Doc -ppType d scope (DTyp hyps cat args) - | null hyps = ppRes scope cat args - | otherwise = let (scope',hdocs) = mapAccumL (ppHypo 1) scope hyps - in ppParens (d > 0) (foldr (\hdoc doc -> hdoc PP.<+> PP.text "->" PP.<+> doc) (ppRes scope cat args) hdocs) - where - ppRes scope cat es - | null es = ppCId cat - | otherwise = ppParens (d > 3) (ppCId cat PP.<+> PP.hsep (map (ppExpr 4 scope) es)) - -ppHypo :: Int -> [CId]-> (BindType,CId,Type) -> ([CId],PP.Doc) -ppHypo d scope (Explicit,x,typ) = - if x == wildCId - then (scope, ppType d scope typ) - else let y = freshName x scope - in (y:scope, PP.parens (ppCId x PP.<+> PP.char ':' PP.<+> ppType 0 scope typ)) -ppHypo d scope (Implicit,x,typ) = - if x == wildCId - then (scope,PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ)) - else let y = freshName x scope - in (y:scope,PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ)) - -freshName :: CId -> [CId] -> CId -freshName x xs0 = loop 1 x - where - xs = wildCId : xs0 - - loop i y - | elem y xs = loop (i+1) (x++show i) - | otherwise = y - -ppParens True = PP.parens -ppParens False = id +newPrintCtxt :: [String] -> Ptr GuPool -> IO (Ptr PgfPrintContext) +newPrintCtxt [] pool = return nullPtr +newPrintCtxt (x:xs) pool = do + pctxt <- gu_malloc pool (#size PgfPrintContext) + newUtf8CString x pool >>= (#poke PgfPrintContext, name) pctxt + newPrintCtxt xs pool >>= (#poke PgfPrintContext, next) pctxt + return pctxt diff --git a/src/runtime/haskell-bind/PGF2/FFI.hs b/src/runtime/haskell-bind/PGF2/FFI.hs index 9051b1465..949c46471 100644 --- a/src/runtime/haskell-bind/PGF2/FFI.hs +++ b/src/runtime/haskell-bind/PGF2/FFI.hs @@ -85,6 +85,9 @@ foreign import ccall unsafe "gu/utf8.h gu_utf8_decode" foreign import ccall unsafe "gu/utf8.h gu_utf8_encode" gu_utf8_encode :: Int32 -> Ptr CString -> IO () +foreign import ccall unsafe "gu/seq.h gu_make_seq" + gu_make_seq :: CInt -> CInt -> Ptr GuPool -> IO (Ptr GuSeq) + withGuPool :: (Ptr GuPool -> IO a) -> IO a withGuPool f = bracket gu_new_pool gu_pool_free f @@ -133,7 +136,7 @@ data PgfExprProb data PgfFullFormEntry data PgfMorphoCallback data PgfPrintContext -data PgfType +type PgfType = Ptr () data PgfCallbacksMap data PgfOracleCallback data PgfCncTree @@ -166,7 +169,7 @@ foreign import ccall "pgf/pgf.h pgf_iter_categories" pgf_iter_categories :: Ptr PgfPGF -> Ptr GuMapItor -> Ptr GuExn -> IO () foreign import ccall "pgf/pgf.h pgf_start_cat" - pgf_start_cat :: Ptr PgfPGF -> IO CString + pgf_start_cat :: Ptr PgfPGF -> Ptr GuPool -> IO PgfType foreign import ccall "pgf/pgf.h pgf_iter_functions" pgf_iter_functions :: Ptr PgfPGF -> Ptr GuMapItor -> Ptr GuExn -> IO () @@ -175,7 +178,7 @@ foreign import ccall "pgf/pgf.h pgf_iter_functions_by_cat" pgf_iter_functions_by_cat :: Ptr PgfPGF -> CString -> Ptr GuMapItor -> Ptr GuExn -> IO () foreign import ccall "pgf/pgf.h pgf_function_type" - pgf_function_type :: Ptr PgfPGF -> CString -> IO (Ptr PgfType) + pgf_function_type :: Ptr PgfPGF -> CString -> IO PgfType foreign import ccall "pgf/pgf.h pgf_print_name" pgf_print_name :: Ptr PgfConcr -> CString -> IO CString @@ -199,7 +202,7 @@ foreign import ccall "pgf/pgf.h pgf_align_words" pgf_align_words :: Ptr PgfConcr -> PgfExpr -> Ptr GuExn -> Ptr GuPool -> IO (Ptr GuSeq) foreign import ccall "pgf/pgf.h pgf_parse_with_heuristics" - pgf_parse_with_heuristics :: Ptr PgfConcr -> CString -> CString -> Double -> Ptr PgfCallbacksMap -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum) + pgf_parse_with_heuristics :: Ptr PgfConcr -> PgfType -> CString -> Double -> Ptr PgfCallbacksMap -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum) type LiteralMatchCallback = CInt -> Ptr CInt -> Ptr GuPool -> IO (Ptr PgfExprProb) @@ -293,8 +296,11 @@ foreign import ccall "pgf/expr.h pgf_print_expr" foreign import ccall "pgf/expr.h pgf_print_expr_tuple" pgf_print_expr_tuple :: CInt -> Ptr PgfExpr -> Ptr PgfPrintContext -> Ptr GuOut -> Ptr GuExn -> IO () +foreign import ccall "pgf/expr.h pgf_print_type" + pgf_print_type :: PgfType -> Ptr PgfPrintContext -> CInt -> Ptr GuOut -> Ptr GuExn -> IO () + foreign import ccall "pgf/pgf.h pgf_generate_all" - pgf_generate_all :: Ptr PgfPGF -> CString -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum) + pgf_generate_all :: Ptr PgfPGF -> PgfType -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum) foreign import ccall "pgf/pgf.h pgf_print" pgf_print :: Ptr PgfPGF -> Ptr GuOut -> Ptr GuExn -> IO () @@ -308,6 +314,9 @@ foreign import ccall "pgf/expr.h pgf_read_expr_tuple" foreign import ccall "pgf/expr.h pgf_read_expr_matrix" pgf_read_expr_matrix :: Ptr GuIn -> CInt -> Ptr GuPool -> Ptr GuExn -> IO (Ptr GuSeq) +foreign import ccall "pgf/expr.h pgf_read_type" + pgf_read_type :: Ptr GuIn -> Ptr GuPool -> Ptr GuExn -> IO PgfType + foreign import ccall "pgf/graphviz.h pgf_graphviz_abstract_tree" pgf_graphviz_abstract_tree :: Ptr PgfPGF -> PgfExpr -> Ptr GuOut -> Ptr GuExn -> IO () diff --git a/src/runtime/haskell-bind/pgf2-bind.cabal b/src/runtime/haskell-bind/pgf2-bind.cabal index 4e60264a5..dfde308b9 100644 --- a/src/runtime/haskell-bind/pgf2-bind.cabal +++ b/src/runtime/haskell-bind/pgf2-bind.cabal @@ -18,7 +18,7 @@ cabal-version: >=1.10 library exposed-modules: PGF2, SG - other-modules: PGF2.FFI, PGF2.Expr, SG.FFI + other-modules: PGF2.FFI, PGF2.Expr, PGF2.Type, SG.FFI build-depends: base >=4.3, bytestring >=0.9, containers, pretty -- hs-source-dirs: diff --git a/src/runtime/java/jpgf.c b/src/runtime/java/jpgf.c index a732e571e..58ec9db39 100644 --- a/src/runtime/java/jpgf.c +++ b/src/runtime/java/jpgf.c @@ -156,7 +156,11 @@ Java_org_grammaticalframework_pgf_PGF_getAbstractName(JNIEnv* env, jobject self) JNIEXPORT jstring JNICALL Java_org_grammaticalframework_pgf_PGF_getStartCat(JNIEnv* env, jobject self) { - return gu2j_string(env, pgf_start_cat(get_ref(env, self))); + GuPool* tmp_pool = gu_local_pool(); + PgfType* type = pgf_start_cat(get_ref(env, self), tmp_pool); + jstring jcat = gu2j_string(env, type->cid); + gu_pool_free(tmp_pool); + return jcat; } JNIEXPORT jobject JNICALL @@ -570,8 +574,13 @@ Java_org_grammaticalframework_pgf_Parser_parseWithHeuristics GuString s = j2gu_string(env, js, pool); GuExn* parse_err = gu_new_exn(pool); + PgfType* type = gu_new_flex(pool, PgfType, exprs, 0); + type->hypos = gu_empty_seq(); + type->cid = startCat; + type->n_exprs = 0; + GuEnum* res = - pgf_parse_with_heuristics(get_ref(env, jconcr), startCat, s, heuristics, l2p(callbacksRef), parse_err, pool, out_pool); + pgf_parse_with_heuristics(get_ref(env, jconcr), type, s, heuristics, l2p(callbacksRef), parse_err, pool, out_pool); if (!gu_ok(parse_err)) { if (gu_exn_caught(parse_err, PgfExn)) { @@ -606,8 +615,13 @@ Java_org_grammaticalframework_pgf_Completer_complete(JNIEnv* env, jclass clazz, GuString prefix = j2gu_string(env, jprefix, pool); GuExn* parse_err = gu_new_exn(pool); + PgfType* type = gu_new_flex(pool, PgfType, exprs, 0); + type->hypos = gu_empty_seq(); + type->cid = startCat; + type->n_exprs = 0; + GuEnum* res = - pgf_complete(get_ref(env, jconcr), startCat, s, prefix, parse_err, pool); + pgf_complete(get_ref(env, jconcr), type, s, prefix, parse_err, pool); if (!gu_ok(parse_err)) { if (gu_exn_caught(parse_err, PgfExn)) { @@ -1290,8 +1304,13 @@ Java_org_grammaticalframework_pgf_Generator_generateAll(JNIEnv* env, jclass claz GuString startCat = j2gu_string(env, jstartCat, pool); GuExn* err = gu_exn(pool); + PgfType* type = gu_new_flex(pool, PgfType, exprs, 0); + type->hypos = gu_empty_seq(); + type->cid = startCat; + type->n_exprs = 0; + GuEnum* res = - pgf_generate_all(get_ref(env, jpgf), startCat, err, pool, out_pool); + pgf_generate_all(get_ref(env, jpgf), type, err, pool, out_pool); if (res == NULL) { throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", "The generation failed"); gu_pool_free(pool); diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c index 4c95c46df..0b4c6b657 100644 --- a/src/runtime/python/pypgf.c +++ b/src/runtime/python/pypgf.c @@ -1461,7 +1461,7 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds) static char *kwlist[] = {"sentence", "cat", "n", "heuristics", "callbacks", NULL}; const char *sentence = NULL; - PgfCId catname = pgf_start_cat(self->grammar->pgf); + PgfCId catname = NULL; int max_count = -1; double heuristics = -1; PyObject* py_callbacks = NULL; @@ -1500,8 +1500,18 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds) sentence = gu_string_copy(sentence, pyres->pool); + PgfType* type; + if (catname == NULL) { + type = pgf_start_cat(self->grammar->pgf, pyres->pool); + } else { + type = gu_new_flex(pyres->pool, PgfType, exprs, 0); + type->hypos = gu_empty_seq(); + type->cid = catname; + type->n_exprs = 0; + } + pyres->res = - pgf_parse_with_heuristics(self->concr, catname, sentence, + pgf_parse_with_heuristics(self->concr, type, sentence, heuristics, callbacks, parse_err, pyres->pool, out_pool); @@ -1530,7 +1540,7 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds) static char *kwlist[] = {"sentence", "cat", "prefix", "n", NULL}; const char *sentence = NULL; - GuString catname = pgf_start_cat(self->grammar->pgf); + GuString catname = NULL; GuString prefix = ""; int max_count = -1; if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|ssi", kwlist, @@ -1557,9 +1567,19 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds) GuPool *tmp_pool = gu_local_pool(); GuExn* parse_err = gu_new_exn(tmp_pool); - + + PgfType* type; + if (catname == NULL) { + type = pgf_start_cat(self->grammar->pgf, pyres->pool); + } else { + type = gu_new_flex(pyres->pool, PgfType, exprs, 0); + type->hypos = gu_empty_seq(); + type->cid = catname; + type->n_exprs = 0; + } + pyres->res = - pgf_complete(self->concr, catname, sentence, prefix, parse_err, pyres->pool); + pgf_complete(self->concr, type, sentence, prefix, parse_err, pyres->pool); if (!gu_ok(parse_err)) { Py_DECREF(pyres); @@ -1594,13 +1614,18 @@ Concr_parseval(ConcrObject* self, PyObject *args) { double precision = 0; double recall = 0; double exact = 0; - - if (!pgf_parseval(self->concr, pyexpr->expr, cat, + + PgfType* type = gu_new_flex(tmp_pool, PgfType, exprs, 0); + type->hypos = gu_empty_seq(); + type->cid = cat; + type->n_exprs = 0; + + if (!pgf_parseval(self->concr, pyexpr->expr, type, &precision, &recall, &exact)) return NULL; gu_pool_free(tmp_pool); - + return Py_BuildValue("ddd", precision, recall, exact); } @@ -2606,7 +2631,11 @@ PGF_getCategories(PGFObject *self, void *closure) static PyObject* PGF_getStartCat(PGFObject *self, void *closure) { - return PyString_FromString(pgf_start_cat(self->pgf)); + GuPool* tmp_pool = gu_local_pool(); + PgfType* type = pgf_start_cat(self->pgf, tmp_pool); + PyObject* pycat = PyString_FromString(type->cid); + gu_pool_free(tmp_pool); + return pycat; } static void @@ -2743,8 +2772,13 @@ PGF_generateAll(PGFObject* self, PyObject *args, PyObject *keywds) GuExn* err = gu_exn(pyres->pool); + PgfType* type = gu_new_flex(pyres->pool, PgfType, exprs, 0); + type->hypos = gu_empty_seq(); + type->cid = catname; + type->n_exprs = 0; + pyres->res = - pgf_generate_all(self->pgf, catname, err, pyres->pool, out_pool); + pgf_generate_all(self->pgf, type, err, pyres->pool, out_pool); if (pyres->res == NULL) { Py_DECREF(pyres); return NULL;