1
0
forked from GitHub/gf-core

bring the Haskell binding a bit closer to the pure Haskell API

This commit is contained in:
krasimir
2017-01-26 12:48:22 +00:00
parent 17163ae881
commit 3467a54965
15 changed files with 166 additions and 552 deletions

View File

@@ -92,19 +92,7 @@ libsg_la_SOURCES = \
sg/sg.c
libsg_la_LIBADD = libgu.la libpgf.la
bin_PROGRAMS = \
utils/pgf-print \
utils/pgf-translate \
utils/pgf-parse
utils_pgf_print_SOURCES = utils/pgf-print.c
utils_pgf_print_LDADD = libpgf.la libgu.la
utils_pgf_translate_SOURCES = utils/pgf-translate.c
utils_pgf_translate_LDADD = libpgf.la libgu.la
utils_pgf_parse_SOURCES = utils/pgf-parse.c
utils_pgf_parse_LDADD = libpgf.la libgu.la
bin_PROGRAMS =
AUTOMAKE_OPTIONS = foreign subdir-objects dist-bzip2
ACLOCAL_AMFLAGS = -I m4

View File

@@ -2106,16 +2106,16 @@ pgf_parsing_last_token(PgfParsing* ps, GuPool* pool)
}
GuEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, GuString sentence,
pgf_parse(PgfConcr* concr, PgfType* typ, GuString sentence,
GuExn* err,
GuPool* pool, GuPool* out_pool)
{
PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, out_pool);
return pgf_parse_with_heuristics(concr, cat, sentence, -1.0, callbacks, err, pool, out_pool);
return pgf_parse_with_heuristics(concr, typ, sentence, -1.0, callbacks, err, pool, out_pool);
}
GuEnum*
pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, GuString sentence,
pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence,
double heuristics,
PgfCallbacksMap* callbacks,
GuExn* err,
@@ -2132,7 +2132,7 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, GuString sentence,
// Begin parsing a sentence with the specified category
PgfParsing* ps =
pgf_parsing_init(concr, cat, 0, sentence, heuristics, callbacks, NULL, err, pool, out_pool);
pgf_parsing_init(concr, typ->cid, 0, sentence, heuristics, callbacks, NULL, err, pool, out_pool);
if (ps == NULL) {
return NULL;
}
@@ -2159,7 +2159,7 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, GuString sentence,
}
PgfExprEnum*
pgf_parse_with_oracle(PgfConcr* concr, PgfCId cat,
pgf_parse_with_oracle(PgfConcr* concr, PgfType* typ,
GuString sentence,
PgfOracleCallback* oracle,
GuExn* err,
@@ -2177,7 +2177,7 @@ pgf_parse_with_oracle(PgfConcr* concr, PgfCId cat,
// Begin parsing a sentence with the specified category
PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, out_pool);
PgfParsing* ps =
pgf_parsing_init(concr, cat, 0, sentence, -1, callbacks, oracle, err, pool, out_pool);
pgf_parsing_init(concr, typ->cid, 0, sentence, -1, callbacks, oracle, err, pool, out_pool);
if (ps == NULL) {
return NULL;
}
@@ -2223,7 +2223,7 @@ pgf_parser_completions_next(GuEnum* self, void* to, GuPool* pool)
}
GuEnum*
pgf_complete(PgfConcr* concr, PgfCId cat, GuString sentence,
pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence,
GuString prefix, GuExn *err, GuPool* pool)
{
if (concr->sequences == NULL ||
@@ -2239,7 +2239,7 @@ pgf_complete(PgfConcr* concr, PgfCId cat, GuString sentence,
PgfCallbacksMap* callbacks =
pgf_new_callbacks_map(concr, pool);
PgfParsing* ps =
pgf_parsing_init(concr, cat, 0, sentence, -1.0, callbacks, NULL, err, pool, pool);
pgf_parsing_init(concr, type->cid, 0, sentence, -1.0, callbacks, NULL, err, pool, pool);
if (ps == NULL) {
return NULL;
}

View File

@@ -129,7 +129,7 @@ static PgfLinFuncs pgf_metrics_lin_funcs2 = {
};
bool
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfType* type,
double *precision, double *recall, double *exact)
{
GuPool* pool = gu_new_pool();
@@ -174,7 +174,7 @@ pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
gu_string_buf_freeze(sbuf, pool);
GuEnum* en_trees =
pgf_parse(concr, cat, sentence,
pgf_parse(concr, type, sentence,
state.err, pool, pool);
PgfExprProb* ep = gu_next(en_trees, PgfExprProb*, pool);
if (ep == NULL) {

View File

@@ -86,24 +86,35 @@ pgf_iter_categories(PgfPGF* pgf, GuMapItor* itor, GuExn* err)
}
}
PgfCId
pgf_start_cat(PgfPGF* pgf)
PgfType*
pgf_start_cat(PgfPGF* pgf, GuPool* pool)
{
PgfFlag* flag =
gu_seq_binsearch(pgf->abstract.aflags, pgf_flag_order, PgfFlag, "startcat");
if (flag == NULL)
return "S";
GuVariantInfo i = gu_variant_open(flag->value);
switch (i.tag) {
case PGF_LITERAL_STR: {
PgfLiteralStr *lstr = (PgfLiteralStr *) i.data;
return lstr->val;
}
if (flag != NULL) {
GuVariantInfo i = gu_variant_open(flag->value);
switch (i.tag) {
case PGF_LITERAL_STR: {
PgfLiteralStr *lstr = (PgfLiteralStr *) i.data;
GuPool* tmp_pool = gu_local_pool();
GuIn* in = gu_string_in(lstr->val,tmp_pool);
GuExn* err = gu_new_exn(tmp_pool);
PgfType *type = pgf_read_type(in, pool, err);
if (!gu_ok(err))
break;
gu_pool_free(tmp_pool);
return type;
}
}
}
return "S";
PgfType* type = gu_new_flex(pool, PgfType, exprs, 0);
type->hypos = gu_empty_seq();
type->cid = "S";
type->n_exprs = 0;
return type;
}
GuString

View File

@@ -53,8 +53,8 @@ pgf_language_code(PgfConcr* concr);
void
pgf_iter_categories(PgfPGF* pgf, GuMapItor* itor, GuExn* err);
PgfCId
pgf_start_cat(PgfPGF* pgf);
PgfType*
pgf_start_cat(PgfPGF* pgf, GuPool* pool);
void
pgf_iter_functions(PgfPGF* pgf, GuMapItor* itor, GuExn* err);
@@ -89,7 +89,7 @@ pgf_align_words(PgfConcr* concr, PgfExpr expr,
GuExn* err, GuPool* pool);
bool
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfType* type,
double *precision, double *recall, double *exact);
PgfExpr
@@ -97,11 +97,11 @@ pgf_compute(PgfPGF* pgf, PgfExpr expr, GuExn* err,
GuPool* pool, GuPool* out_pool);
PgfExprEnum*
pgf_generate_all(PgfPGF* pgf, PgfCId cat,
pgf_generate_all(PgfPGF* pgf, PgfType* ty,
GuExn* err, GuPool* pool, GuPool* out_pool);
PgfExprEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, GuString sentence,
pgf_parse(PgfConcr* concr, PgfType* typ, GuString sentence,
GuExn* err, GuPool* pool, GuPool* out_pool);
typedef struct PgfMorphoCallback PgfMorphoCallback;
@@ -134,7 +134,7 @@ pgf_lookup_word_prefix(PgfConcr *concr, GuString prefix,
typedef GuMap PgfCallbacksMap;
PgfExprEnum*
pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat,
pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ,
GuString sentence, double heuristics,
PgfCallbacksMap* callbacks,
GuExn* err,
@@ -159,7 +159,7 @@ struct PgfOracleCallback {
};
PgfExprEnum*
pgf_parse_with_oracle(PgfConcr* concr, PgfCId cat,
pgf_parse_with_oracle(PgfConcr* concr, PgfType* typ,
GuString sentence,
PgfOracleCallback* oracle,
GuExn* err,
@@ -172,7 +172,7 @@ typedef struct {
} PgfTokenProb;
GuEnum*
pgf_complete(PgfConcr* concr, PgfCId cat, GuString string,
pgf_complete(PgfConcr* concr, PgfType* type, GuString string,
GuString prefix, GuExn* err, GuPool* pool);
typedef struct PgfLiteralCallback PgfLiteralCallback;

View File

@@ -454,7 +454,7 @@ pgf_new_reasoner(PgfPGF* pgf, GuExn* err, GuPool* pool, GuPool* out_pool)
}
PgfExprEnum*
pgf_generate_all(PgfPGF* pgf, PgfCId cat, GuExn* err, GuPool* pool, GuPool* out_pool)
pgf_generate_all(PgfPGF* pgf, PgfType* typ, GuExn* err, GuPool* pool, GuPool* out_pool)
{
PgfReasoner* rs = pgf_new_reasoner(pgf, err, pool, out_pool);
@@ -462,9 +462,9 @@ pgf_generate_all(PgfPGF* pgf, PgfCId cat, GuExn* err, GuPool* pool, GuPool* out_
answers->parents = gu_new_buf(PgfExprState*, rs->pool);
answers->exprs = rs->exprs;
answers->outside_prob = 0;
gu_map_put(rs->table, cat, PgfAnswers*, answers);
gu_map_put(rs->table, typ->cid, PgfAnswers*, answers);
PgfAbsCat* abscat = gu_seq_binsearch(rs->abstract->cats, pgf_abscat_order, PgfAbsCat, cat);
PgfAbsCat* abscat = gu_seq_binsearch(rs->abstract->cats, pgf_abscat_order, PgfAbsCat, typ->cid);
if (abscat != NULL) {
rs->start = gu_new(PgfClosure, rs->pool);
rs->start->code = abscat->predicate;

View File

@@ -1,133 +0,0 @@
#include <gu/variant.h>
#include <gu/map.h>
#include <gu/enum.h>
#include <gu/file.h>
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <pgf/literals.h>
#include <pgf/linearizer.h>
#include <pgf/expr.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#include <time.h>
int main(int argc, char* argv[]) {
// Set the character locale, so we can produce proper output.
setlocale(LC_CTYPE, "");
// Create the pool that is used to allocate everything
GuPool* pool = gu_new_pool();
int status = EXIT_SUCCESS;
if (argc < 4 || argc > 5) {
fprintf(stderr, "usage: %s pgf-file start-cat cnc-lang [heuristics]\n(0.0 <= heuristics < 1.0, default: 0.95)\n", argv[0]);
status = EXIT_FAILURE;
goto fail;
}
char* filename = argv[1];
GuString cat = argv[2];
GuString lang = argv[3];
double heuristics = 0.95;
if (argc == 5) {
heuristics = atof(argv[4]);
}
// Create an exception frame that catches all errors.
GuExn* err = gu_new_exn(pool);
clock_t start = clock();
// Read the PGF grammar.
PgfPGF* pgf = pgf_read(filename, pool, err);
// If an error occured, it shows in the exception frame
if (!gu_ok(err)) {
fprintf(stderr, "Reading PGF failed\n");
status = EXIT_FAILURE;
goto fail;
}
// Look up the source and destination concrete categories
PgfConcr* concr = pgf_get_language(pgf, lang);
if (!concr) {
fprintf(stderr, "Unknown language\n");
status = EXIT_FAILURE;
goto fail;
}
clock_t end = clock();
double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
fprintf(stderr, "(%.0f ms) Ready to parse [heuristics=%.2f]!\n", 1000.0 * cpu_time_used, heuristics);
// Create an output stream for stdout
GuOut* out = gu_file_out(stdout, pool);
// We will keep the latest results in the 'ppool' and
// we will iterate over them by using 'result'.
GuPool* ppool = NULL;
// The interactive PARSING loop.
// XXX: This currently reads stdin directly, so it doesn't support
// encodings properly. TODO: use a locale reader for input
for (int ctr = 0; true; ctr++) {
// We release the last results
if (ppool != NULL) {
gu_pool_free(ppool);
ppool = NULL;
}
/* fprintf(stdout, "> "); */
/* fflush(stdout); */
char buf[4096];
char* line = fgets(buf, sizeof(buf), stdin);
if (line == NULL) {
if (ferror(stdin)) {
fprintf(stderr, "Input error\n");
status = EXIT_FAILURE;
}
break;
} else if (strcmp(line, "") == 0) {
// End nicely on empty input
break;
} else if (strcmp(line, "\n") == 0) {
// Empty line -> skip
continue;
}
// We create a temporary pool for translating a single
// sentence, so our memory usage doesn't increase over time.
ppool = gu_new_pool();
clock_t start = clock();
GuExn* parse_err = gu_new_exn(ppool);
PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, ppool);
GuEnum* result = pgf_parse_with_heuristics(concr, cat, line, heuristics, callbacks, parse_err, ppool, ppool);
PgfExprProb* ep = NULL;
if (gu_ok(parse_err))
ep = gu_next(result, PgfExprProb*, ppool);
clock_t end = clock();
double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
gu_printf(out, err, "%d (%.0f ms): ", ctr, 1000.0 * cpu_time_used);
if (ep != NULL) {
gu_printf(out, err, "[%.4f] (", ep->prob);
pgf_print_expr(ep->expr, NULL, 0, out, err);
gu_printf(out, err, ")\n");
} else {
gu_printf(out, err, "---\n");
}
gu_out_flush(out, err);
}
fail:
gu_pool_free(pool);
return status;
}

View File

@@ -1,36 +0,0 @@
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <gu/file.h>
#include <gu/utf8.h>
#include <locale.h>
#include <stdlib.h>
int main(int argc, char* argv[]) {
// Set the character locale, so we can produce proper output.
setlocale(LC_CTYPE, "");
if (argc != 2) {
fprintf(stderr, "usage: %s pgf\n", argv[0]);
return EXIT_FAILURE;
}
char* filename = argv[1];
GuPool* pool = gu_new_pool();
GuExn* err = gu_exn(pool);
PgfPGF* pgf = pgf_read(filename, pool, err);
int status = 0;
if (!gu_ok(err)) {
fprintf(stderr, "Reading PGF failed\n");
status = 1;
goto fail_read;
}
GuOut* out = gu_file_out(stdout, pool);
pgf_print(pgf, out, err);
gu_out_flush(out, err);
fail_read:
gu_pool_free(pool);
return status;
}

View File

@@ -1,203 +0,0 @@
#include <gu/variant.h>
#include <gu/map.h>
#include <gu/enum.h>
#include <gu/file.h>
#include <gu/exn.h>
#include <pgf/pgf.h>
#include <pgf/literals.h>
#include <pgf/linearizer.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#include <time.h>
static void
print_result(PgfExprProb* ep, PgfConcr* to_concr,
GuOut* out, GuExn* err, GuPool* ppool)
{
// Write out the abstract syntax tree
gu_printf(out, err, " [%f] ", ep->prob);
pgf_print_expr(ep->expr, NULL, 0, out, err);
gu_putc('\n', out, err);
// Enumerate the concrete syntax trees corresponding
// to the abstract tree.
GuEnum* cts = pgf_lzr_concretize(to_concr, ep->expr, err, ppool);
while (true) {
PgfCncTree ctree =
gu_next(cts, PgfCncTree, ppool);
if (gu_variant_is_null(ctree)) {
break;
}
gu_putc(' ', out, err);
// Linearize the concrete tree as a simple
// sequence of strings.
pgf_lzr_linearize_simple(to_concr, ctree, 0, out, err, ppool);
if (gu_exn_caught(err, PgfLinNonExist)) {
// encountered nonExist. Unfortunately there
// might be some output printed already. The
// right solution should be to use GuStringBuf.
gu_exn_clear(err);
}
gu_putc('\n', out, err);
gu_out_flush(out, err);
}
}
int main(int argc, char* argv[]) {
// Set the character locale, so we can produce proper output.
setlocale(LC_CTYPE, "");
// Create the pool that is used to allocate everything
GuPool* pool = gu_new_pool();
int status = EXIT_SUCCESS;
if (argc < 5) {
fprintf(stderr, "usage: %s pgf cat from-lang to-lang\n", argv[0]);
status = EXIT_FAILURE;
goto fail;
}
GuString filename = argv[1];
GuString cat = argv[2];
GuString from_lang = argv[3];
GuString to_lang = argv[4];
// Create an exception frame that catches all errors.
GuExn* err = gu_new_exn(pool);
// Read the PGF grammar.
PgfPGF* pgf = pgf_read(filename, pool, err);
// If an error occured, it shows in the exception frame
if (!gu_ok(err)) {
fprintf(stderr, "Reading PGF failed\n");
status = EXIT_FAILURE;
goto fail;
}
// Look up the source and destination concrete categories
PgfConcr* from_concr = pgf_get_language(pgf, from_lang);
PgfConcr* to_concr = pgf_get_language(pgf, to_lang);
if (!from_concr || !to_concr) {
fprintf(stderr, "Unknown language\n");
status = EXIT_FAILURE;
goto fail_concr;
}
// Register a callback for the literal category Symbol
PgfCallbacksMap* callbacks =
pgf_new_callbacks_map(from_concr, pool);
pgf_callbacks_map_add_literal(from_concr, callbacks,
"PN", &pgf_nerc_literal_callback);
pgf_callbacks_map_add_literal(from_concr, callbacks,
"Symb", &pgf_unknown_literal_callback);
// Create an output stream for stdout
GuOut* out = gu_file_out(stdout, pool);
// We will keep the latest results in the 'ppool' and
// we will iterate over them by using 'result'.
GuPool* ppool = NULL;
GuEnum* result = NULL;
// The interactive translation loop.
// XXX: This currently reads stdin directly, so it doesn't support
// encodings properly. TODO: use a locale reader for input
while (true) {
fprintf(stdout, "> ");
fflush(stdout);
char buf[4096];
char* line = fgets(buf, sizeof(buf), stdin);
if (line == NULL) {
if (ferror(stdin)) {
fprintf(stderr, "Input error\n");
status = EXIT_FAILURE;
}
break;
} else if (strcmp(line, "") == 0) {
// End nicely on empty input
break;
} else if (strcmp(line, "\n") == 0) {
// Empty line -> show the next tree for the last sentence
if (result != NULL) {
clock_t start = clock();
PgfExprProb* ep = gu_next(result, PgfExprProb*, ppool);
clock_t end = clock();
double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("%.2f sec\n", cpu_time_used);
// The enumerator will return a null variant at the
// end of the results.
if (ep == NULL) {
goto fail_parse;
}
print_result(ep, to_concr, out, err, ppool);
}
continue;
}
// We release the last results
if (ppool != NULL) {
gu_pool_free(ppool);
ppool = NULL;
result = NULL;
}
// We create a temporary pool for translating a single
// sentence, so our memory usage doesn't increase over time.
ppool = gu_new_pool();
clock_t start = clock();
GuExn* parse_err = gu_new_exn(ppool);
result =
pgf_parse_with_heuristics(from_concr, cat, line,
-1, callbacks,
parse_err, ppool, ppool);
if (!gu_ok(parse_err)) {
if (gu_exn_caught(parse_err, PgfExn)) {
GuString msg = gu_exn_caught_data(parse_err);
gu_string_write(msg, out, err);
gu_putc('\n', out, err);
} else if (gu_exn_caught(parse_err, PgfParseError)) {
gu_puts("Unexpected token: \"", out, err);
GuString tok = gu_exn_caught_data(parse_err);
gu_string_write(tok, out, err);
gu_puts("\"\n", out, err);
}
goto fail_parse;
}
PgfExprProb* ep = gu_next(result, PgfExprProb*, ppool);
clock_t end = clock();
double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("%.2f sec\n", cpu_time_used);
// The enumerator will return null at the end of the results.
if (ep == NULL) {
goto fail_parse;
}
print_result(ep, to_concr, out, err, ppool);
continue;
fail_parse:
// Free all resources allocated during parsing and linearization
gu_pool_free(ppool);
ppool = NULL;
result = NULL;
}
fail_concr:
fail:
gu_pool_free(pool);
return status;
}

View File

@@ -38,7 +38,9 @@ module PGF2 (-- * PGF
mkFloat,unFloat,
mkMeta,unMeta,
-- ** Types
Type(..), Hypo, BindType(..), startCat, showType,
Type, Hypo, BindType(..), startCat,
readType, showType,
mkType, unType,
-- * Concrete syntax
ConcName,Concr,languages,
@@ -69,6 +71,7 @@ import Control.Monad(forM_)
import System.IO.Unsafe(unsafePerformIO,unsafeInterleaveIO)
import Text.PrettyPrint
import PGF2.Expr
import PGF2.Type
import PGF2.FFI
import Foreign hiding ( Pool, newPool, unsafePerformIO )
@@ -141,13 +144,12 @@ languages p =
-- all abstract syntax expressions of the given type.
-- The expressions are ordered by their probability.
generateAll :: PGF -> Type -> [(Expr,Float)]
generateAll p (DTyp _ cat _) =
generateAll p (Type ctype _) =
unsafePerformIO $
do genPl <- gu_new_pool
exprPl <- gu_new_pool
cat <- newUtf8CString cat genPl
exn <- gu_new_exn genPl
enum <- pgf_generate_all (pgf p) cat exn genPl exprPl
enum <- pgf_generate_all (pgf p) ctype exn genPl exprPl
genFPl <- newForeignPtr gu_pool_finalizer genPl
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
fromPgfExprEnum enum genFPl (p,exprFPl)
@@ -164,9 +166,9 @@ abstractName p = unsafePerformIO (peekUtf8CString =<< pgf_abstract_name (pgf p))
-- definition is just for convenience.
startCat :: PGF -> Type
startCat p = unsafePerformIO $ do
cat <- pgf_start_cat (pgf p)
cat <- peekUtf8CString cat
return (DTyp [] cat [])
typPl <- gu_new_pool
c_type <- pgf_start_cat (pgf p) typPl
return (Type c_type typPl)
loadConcr :: Concr -> FilePath -> IO ()
loadConcr c fpath =
@@ -199,36 +201,7 @@ functionType p fn =
c_type <- pgf_function_type (pgf p) c_fn
if c_type == nullPtr
then throwIO (PGFError ("Function '"++fn++"' is not defined"))
else peekType c_type
where
peekType c_type = do
cid <- (#peek PgfType, cid) c_type >>= peekUtf8CString
c_hypos <- (#peek PgfType, hypos) c_type
n_hypos <- (#peek GuSeq, len) c_hypos
hs <- peekHypos (c_hypos `plusPtr` (#offset GuSeq, data)) 0 n_hypos
n_exprs <- (#peek PgfType, n_exprs) c_type
es <- peekExprs (c_type `plusPtr` (#offset PgfType, exprs)) 0 n_exprs
return (DTyp hs cid es)
peekHypos :: Ptr a -> Int -> Int -> IO [Hypo]
peekHypos c_hypo i n
| i < n = do cid <- (#peek PgfHypo, cid) c_hypo >>= peekUtf8CString
ty <- (#peek PgfHypo, type) c_hypo >>= peekType
bt <- fmap toBindType ((#peek PgfHypo, bind_type) c_hypo)
hs <- peekHypos (plusPtr c_hypo (#size PgfHypo)) (i+1) n
return ((bt,cid,ty) : hs)
| otherwise = return []
toBindType :: CInt -> BindType
toBindType (#const PGF_BIND_TYPE_EXPLICIT) = Explicit
toBindType (#const PGF_BIND_TYPE_IMPLICIT) = Implicit
peekExprs ptr i n
| i < n = do e <- peekElemOff ptr i
es <- peekExprs ptr (i+1) n
return (Expr e p : es)
| otherwise = return []
else return (Type c_type (pgfMaster p))
-----------------------------------------------------------------------------
-- Graphviz
@@ -326,15 +299,14 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse
-- If a literal has been recognized then the output should
-- be Just (expr,probability,end_offset)
-> Either String [(Expr,Float)]
parseWithHeuristics lang (DTyp _ cat _) sent heuristic callbacks =
parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
unsafePerformIO $
do exprPl <- gu_new_pool
parsePl <- gu_new_pool
exn <- gu_new_exn parsePl
cat <- newUtf8CString cat parsePl
sent <- newUtf8CString sent parsePl
callbacks_map <- mkCallbacksMap (concr lang) callbacks parsePl
enum <- pgf_parse_with_heuristics (concr lang) cat sent heuristic callbacks_map exn parsePl exprPl
enum <- pgf_parse_with_heuristics (concr lang) ctype sent heuristic callbacks_map exn parsePl exprPl
failed <- gu_exn_is_raised exn
if failed
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
@@ -574,7 +546,7 @@ showBracketedString :: BracketedString -> String
showBracketedString = render . ppBracketedString
ppBracketedString (Leaf t) = text t
ppBracketedString (Bracket cat fid index _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
ppBracketedString (Bracket cat fid index _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
-- | Extracts the sequence of tokens from the bracketed string
flattenBracketedString :: BracketedString -> [String]
@@ -657,7 +629,7 @@ functionsByCat p cat =
-- with the \'cat\' keyword.
categories :: PGF -> [Cat]
categories pgf = -- !!! quick hack
nub [cat | f<-functions pgf, let DTyp _ cat _=functionType pgf f]
nub [cat | f<-functions pgf, let (_, cat, _) = unType (functionType pgf f)]
categoryContext :: PGF -> Cat -> Maybe [Hypo]
categoryContext pgf cat = Nothing -- !!! not implemented yet TODO
@@ -729,7 +701,7 @@ nerc pgf (lang,concr) sentence lin_idx offset =
Just (y,xs') -> (y:ys,xs'')
where (ys,xs'') = consume munch xs'
functionCat f = case functionType pgf f of DTyp _ cat _ -> cat
functionCat f = case unType (functionType pgf f) of (_,cat,_) -> cat
-- | Callback to parse arbitrary words as chunks (from
-- ../java/org/grammaticalframework/pgf/UnknownLiteralCallback.java)

View File

@@ -6,20 +6,22 @@ module PGF2.Expr where
import System.IO.Unsafe(unsafePerformIO)
import Foreign hiding (unsafePerformIO)
import Foreign.C
import qualified Text.PrettyPrint as PP
import PGF2.FFI
import Data.List(mapAccumL)
-- | An data type that represents
-- identifiers for functions and categories in PGF.
type CId = String
ppCId = PP.text
wildCId = "_" :: CId
type Cat = CId -- ^ Name of syntactic category
type Fun = CId -- ^ Name of function
data BindType =
Explicit
| Implicit
deriving Show
-----------------------------------------------------------------------------
-- Expressions
@@ -177,19 +179,16 @@ readExpr str =
unsafePerformIO $
do exprPl <- gu_new_pool
withGuPool $ \tmpPl ->
withCString str $ \c_str ->
do guin <- gu_string_in c_str tmpPl
exn <- gu_new_exn tmpPl
c_expr <- pgf_read_expr guin exprPl exn
status <- gu_exn_is_raised exn
if (not status && c_expr /= nullPtr)
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
return $ Just (Expr c_expr exprFPl)
else do gu_pool_free exprPl
return Nothing
ppExpr :: Int -> [CId] -> Expr -> PP.Doc
ppExpr d xs e = ppParens (d>0) (PP.text (showExpr xs e)) -- just a quick hack !!!
do c_str <- newUtf8CString str tmpPl
guin <- gu_string_in c_str tmpPl
exn <- gu_new_exn tmpPl
c_expr <- pgf_read_expr guin exprPl exn
status <- gu_exn_is_raised exn
if (not status && c_expr /= nullPtr)
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
return $ Just (Expr c_expr exprFPl)
else do gu_pool_free exprPl
return Nothing
-- | renders an expression as a 'String'. The list
-- of identifiers is the list of all free variables
@@ -200,62 +199,16 @@ showExpr scope e =
unsafePerformIO $
withGuPool $ \tmpPl ->
do (sb,out) <- newOut tmpPl
let printCtxt = nullPtr
printCtxt <- newPrintCtxt scope tmpPl
exn <- gu_new_exn tmpPl
pgf_print_expr (expr e) printCtxt 1 out exn
s <- gu_string_buf_freeze sb tmpPl
peekCString s
peekUtf8CString s
-----------------------------------------------------------------------------
-- Types
data Type =
DTyp [Hypo] CId [Expr]
deriving Show
data BindType =
Explicit
| Implicit
deriving Show
-- | 'Hypo' represents a hypothesis in a type i.e. in the type A -> B, A is the hypothesis
type Hypo = (BindType,CId,Type)
-- | renders type as 'String'.
showType :: Type -> String
showType = PP.render . ppType 0 []
ppType :: Int -> [CId] -> Type -> PP.Doc
ppType d scope (DTyp hyps cat args)
| null hyps = ppRes scope cat args
| otherwise = let (scope',hdocs) = mapAccumL (ppHypo 1) scope hyps
in ppParens (d > 0) (foldr (\hdoc doc -> hdoc PP.<+> PP.text "->" PP.<+> doc) (ppRes scope cat args) hdocs)
where
ppRes scope cat es
| null es = ppCId cat
| otherwise = ppParens (d > 3) (ppCId cat PP.<+> PP.hsep (map (ppExpr 4 scope) es))
ppHypo :: Int -> [CId]-> (BindType,CId,Type) -> ([CId],PP.Doc)
ppHypo d scope (Explicit,x,typ) =
if x == wildCId
then (scope, ppType d scope typ)
else let y = freshName x scope
in (y:scope, PP.parens (ppCId x PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
ppHypo d scope (Implicit,x,typ) =
if x == wildCId
then (scope,PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
else let y = freshName x scope
in (y:scope,PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
freshName :: CId -> [CId] -> CId
freshName x xs0 = loop 1 x
where
xs = wildCId : xs0
loop i y
| elem y xs = loop (i+1) (x++show i)
| otherwise = y
ppParens True = PP.parens
ppParens False = id
newPrintCtxt :: [String] -> Ptr GuPool -> IO (Ptr PgfPrintContext)
newPrintCtxt [] pool = return nullPtr
newPrintCtxt (x:xs) pool = do
pctxt <- gu_malloc pool (#size PgfPrintContext)
newUtf8CString x pool >>= (#poke PgfPrintContext, name) pctxt
newPrintCtxt xs pool >>= (#poke PgfPrintContext, next) pctxt
return pctxt

View File

@@ -85,6 +85,9 @@ foreign import ccall unsafe "gu/utf8.h gu_utf8_decode"
foreign import ccall unsafe "gu/utf8.h gu_utf8_encode"
gu_utf8_encode :: Int32 -> Ptr CString -> IO ()
foreign import ccall unsafe "gu/seq.h gu_make_seq"
gu_make_seq :: CInt -> CInt -> Ptr GuPool -> IO (Ptr GuSeq)
withGuPool :: (Ptr GuPool -> IO a) -> IO a
withGuPool f = bracket gu_new_pool gu_pool_free f
@@ -133,7 +136,7 @@ data PgfExprProb
data PgfFullFormEntry
data PgfMorphoCallback
data PgfPrintContext
data PgfType
type PgfType = Ptr ()
data PgfCallbacksMap
data PgfOracleCallback
data PgfCncTree
@@ -166,7 +169,7 @@ foreign import ccall "pgf/pgf.h pgf_iter_categories"
pgf_iter_categories :: Ptr PgfPGF -> Ptr GuMapItor -> Ptr GuExn -> IO ()
foreign import ccall "pgf/pgf.h pgf_start_cat"
pgf_start_cat :: Ptr PgfPGF -> IO CString
pgf_start_cat :: Ptr PgfPGF -> Ptr GuPool -> IO PgfType
foreign import ccall "pgf/pgf.h pgf_iter_functions"
pgf_iter_functions :: Ptr PgfPGF -> Ptr GuMapItor -> Ptr GuExn -> IO ()
@@ -175,7 +178,7 @@ foreign import ccall "pgf/pgf.h pgf_iter_functions_by_cat"
pgf_iter_functions_by_cat :: Ptr PgfPGF -> CString -> Ptr GuMapItor -> Ptr GuExn -> IO ()
foreign import ccall "pgf/pgf.h pgf_function_type"
pgf_function_type :: Ptr PgfPGF -> CString -> IO (Ptr PgfType)
pgf_function_type :: Ptr PgfPGF -> CString -> IO PgfType
foreign import ccall "pgf/pgf.h pgf_print_name"
pgf_print_name :: Ptr PgfConcr -> CString -> IO CString
@@ -199,7 +202,7 @@ foreign import ccall "pgf/pgf.h pgf_align_words"
pgf_align_words :: Ptr PgfConcr -> PgfExpr -> Ptr GuExn -> Ptr GuPool -> IO (Ptr GuSeq)
foreign import ccall "pgf/pgf.h pgf_parse_with_heuristics"
pgf_parse_with_heuristics :: Ptr PgfConcr -> CString -> CString -> Double -> Ptr PgfCallbacksMap -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
pgf_parse_with_heuristics :: Ptr PgfConcr -> PgfType -> CString -> Double -> Ptr PgfCallbacksMap -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
type LiteralMatchCallback = CInt -> Ptr CInt -> Ptr GuPool -> IO (Ptr PgfExprProb)
@@ -293,8 +296,11 @@ foreign import ccall "pgf/expr.h pgf_print_expr"
foreign import ccall "pgf/expr.h pgf_print_expr_tuple"
pgf_print_expr_tuple :: CInt -> Ptr PgfExpr -> Ptr PgfPrintContext -> Ptr GuOut -> Ptr GuExn -> IO ()
foreign import ccall "pgf/expr.h pgf_print_type"
pgf_print_type :: PgfType -> Ptr PgfPrintContext -> CInt -> Ptr GuOut -> Ptr GuExn -> IO ()
foreign import ccall "pgf/pgf.h pgf_generate_all"
pgf_generate_all :: Ptr PgfPGF -> CString -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
pgf_generate_all :: Ptr PgfPGF -> PgfType -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
foreign import ccall "pgf/pgf.h pgf_print"
pgf_print :: Ptr PgfPGF -> Ptr GuOut -> Ptr GuExn -> IO ()
@@ -308,6 +314,9 @@ foreign import ccall "pgf/expr.h pgf_read_expr_tuple"
foreign import ccall "pgf/expr.h pgf_read_expr_matrix"
pgf_read_expr_matrix :: Ptr GuIn -> CInt -> Ptr GuPool -> Ptr GuExn -> IO (Ptr GuSeq)
foreign import ccall "pgf/expr.h pgf_read_type"
pgf_read_type :: Ptr GuIn -> Ptr GuPool -> Ptr GuExn -> IO PgfType
foreign import ccall "pgf/graphviz.h pgf_graphviz_abstract_tree"
pgf_graphviz_abstract_tree :: Ptr PgfPGF -> PgfExpr -> Ptr GuOut -> Ptr GuExn -> IO ()

View File

@@ -18,7 +18,7 @@ cabal-version: >=1.10
library
exposed-modules: PGF2, SG
other-modules: PGF2.FFI, PGF2.Expr, SG.FFI
other-modules: PGF2.FFI, PGF2.Expr, PGF2.Type, SG.FFI
build-depends: base >=4.3, bytestring >=0.9,
containers, pretty
-- hs-source-dirs:

View File

@@ -156,7 +156,11 @@ Java_org_grammaticalframework_pgf_PGF_getAbstractName(JNIEnv* env, jobject self)
JNIEXPORT jstring JNICALL
Java_org_grammaticalframework_pgf_PGF_getStartCat(JNIEnv* env, jobject self)
{
return gu2j_string(env, pgf_start_cat(get_ref(env, self)));
GuPool* tmp_pool = gu_local_pool();
PgfType* type = pgf_start_cat(get_ref(env, self), tmp_pool);
jstring jcat = gu2j_string(env, type->cid);
gu_pool_free(tmp_pool);
return jcat;
}
JNIEXPORT jobject JNICALL
@@ -570,8 +574,13 @@ Java_org_grammaticalframework_pgf_Parser_parseWithHeuristics
GuString s = j2gu_string(env, js, pool);
GuExn* parse_err = gu_new_exn(pool);
PgfType* type = gu_new_flex(pool, PgfType, exprs, 0);
type->hypos = gu_empty_seq();
type->cid = startCat;
type->n_exprs = 0;
GuEnum* res =
pgf_parse_with_heuristics(get_ref(env, jconcr), startCat, s, heuristics, l2p(callbacksRef), parse_err, pool, out_pool);
pgf_parse_with_heuristics(get_ref(env, jconcr), type, s, heuristics, l2p(callbacksRef), parse_err, pool, out_pool);
if (!gu_ok(parse_err)) {
if (gu_exn_caught(parse_err, PgfExn)) {
@@ -606,8 +615,13 @@ Java_org_grammaticalframework_pgf_Completer_complete(JNIEnv* env, jclass clazz,
GuString prefix = j2gu_string(env, jprefix, pool);
GuExn* parse_err = gu_new_exn(pool);
PgfType* type = gu_new_flex(pool, PgfType, exprs, 0);
type->hypos = gu_empty_seq();
type->cid = startCat;
type->n_exprs = 0;
GuEnum* res =
pgf_complete(get_ref(env, jconcr), startCat, s, prefix, parse_err, pool);
pgf_complete(get_ref(env, jconcr), type, s, prefix, parse_err, pool);
if (!gu_ok(parse_err)) {
if (gu_exn_caught(parse_err, PgfExn)) {
@@ -1290,8 +1304,13 @@ Java_org_grammaticalframework_pgf_Generator_generateAll(JNIEnv* env, jclass claz
GuString startCat = j2gu_string(env, jstartCat, pool);
GuExn* err = gu_exn(pool);
PgfType* type = gu_new_flex(pool, PgfType, exprs, 0);
type->hypos = gu_empty_seq();
type->cid = startCat;
type->n_exprs = 0;
GuEnum* res =
pgf_generate_all(get_ref(env, jpgf), startCat, err, pool, out_pool);
pgf_generate_all(get_ref(env, jpgf), type, err, pool, out_pool);
if (res == NULL) {
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", "The generation failed");
gu_pool_free(pool);

View File

@@ -1461,7 +1461,7 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
static char *kwlist[] = {"sentence", "cat", "n", "heuristics", "callbacks", NULL};
const char *sentence = NULL;
PgfCId catname = pgf_start_cat(self->grammar->pgf);
PgfCId catname = NULL;
int max_count = -1;
double heuristics = -1;
PyObject* py_callbacks = NULL;
@@ -1500,8 +1500,18 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
sentence = gu_string_copy(sentence, pyres->pool);
PgfType* type;
if (catname == NULL) {
type = pgf_start_cat(self->grammar->pgf, pyres->pool);
} else {
type = gu_new_flex(pyres->pool, PgfType, exprs, 0);
type->hypos = gu_empty_seq();
type->cid = catname;
type->n_exprs = 0;
}
pyres->res =
pgf_parse_with_heuristics(self->concr, catname, sentence,
pgf_parse_with_heuristics(self->concr, type, sentence,
heuristics, callbacks, parse_err,
pyres->pool, out_pool);
@@ -1530,7 +1540,7 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
static char *kwlist[] = {"sentence", "cat", "prefix", "n", NULL};
const char *sentence = NULL;
GuString catname = pgf_start_cat(self->grammar->pgf);
GuString catname = NULL;
GuString prefix = "";
int max_count = -1;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|ssi", kwlist,
@@ -1557,9 +1567,19 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
GuPool *tmp_pool = gu_local_pool();
GuExn* parse_err = gu_new_exn(tmp_pool);
PgfType* type;
if (catname == NULL) {
type = pgf_start_cat(self->grammar->pgf, pyres->pool);
} else {
type = gu_new_flex(pyres->pool, PgfType, exprs, 0);
type->hypos = gu_empty_seq();
type->cid = catname;
type->n_exprs = 0;
}
pyres->res =
pgf_complete(self->concr, catname, sentence, prefix, parse_err, pyres->pool);
pgf_complete(self->concr, type, sentence, prefix, parse_err, pyres->pool);
if (!gu_ok(parse_err)) {
Py_DECREF(pyres);
@@ -1594,13 +1614,18 @@ Concr_parseval(ConcrObject* self, PyObject *args) {
double precision = 0;
double recall = 0;
double exact = 0;
if (!pgf_parseval(self->concr, pyexpr->expr, cat,
PgfType* type = gu_new_flex(tmp_pool, PgfType, exprs, 0);
type->hypos = gu_empty_seq();
type->cid = cat;
type->n_exprs = 0;
if (!pgf_parseval(self->concr, pyexpr->expr, type,
&precision, &recall, &exact))
return NULL;
gu_pool_free(tmp_pool);
return Py_BuildValue("ddd", precision, recall, exact);
}
@@ -2606,7 +2631,11 @@ PGF_getCategories(PGFObject *self, void *closure)
static PyObject*
PGF_getStartCat(PGFObject *self, void *closure)
{
return PyString_FromString(pgf_start_cat(self->pgf));
GuPool* tmp_pool = gu_local_pool();
PgfType* type = pgf_start_cat(self->pgf, tmp_pool);
PyObject* pycat = PyString_FromString(type->cid);
gu_pool_free(tmp_pool);
return pycat;
}
static void
@@ -2743,8 +2772,13 @@ PGF_generateAll(PGFObject* self, PyObject *args, PyObject *keywds)
GuExn* err = gu_exn(pyres->pool);
PgfType* type = gu_new_flex(pyres->pool, PgfType, exprs, 0);
type->hypos = gu_empty_seq();
type->cid = catname;
type->n_exprs = 0;
pyres->res =
pgf_generate_all(self->pgf, catname, err, pyres->pool, out_pool);
pgf_generate_all(self->pgf, type, err, pyres->pool, out_pool);
if (pyres->res == NULL) {
Py_DECREF(pyres);
return NULL;