From 14e721dda9a38762695ac5435c24818265629b02 Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Thu, 13 Dec 2012 14:44:33 +0000 Subject: [PATCH] a top-level API for parsing in the C runtime --- src/runtime/c/pgf/data.c | 1 - src/runtime/c/pgf/data.h | 10 ---- src/runtime/c/pgf/expr.c | 2 +- src/runtime/c/pgf/expr.h | 10 +++- src/runtime/c/pgf/lexer.c | 20 +++++--- src/runtime/c/pgf/lexer.h | 9 +++- src/runtime/c/pgf/pgf.c | 74 +++++++++++++++++++++++++++++ src/runtime/c/pgf/pgf.h | 23 ++++++--- src/runtime/c/utils/pgf-chunk.c | 39 +-------------- src/runtime/c/utils/pgf-translate.c | 41 ++++++---------- 10 files changed, 138 insertions(+), 91 deletions(-) diff --git a/src/runtime/c/pgf/data.c b/src/runtime/c/pgf/data.c index dbb0b1899..d5607031b 100644 --- a/src/runtime/c/pgf/data.c +++ b/src/runtime/c/pgf/data.c @@ -1,5 +1,4 @@ #include "data.h" -#include "expr.h" #include #include #include diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index f5435cee5..267823127 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -27,7 +27,6 @@ #include #include #include -#include typedef struct PgfCCat PgfCCat; typedef PgfCCat* PgfCCatId; @@ -123,15 +122,6 @@ struct PgfPGF { extern GU_DECLARE_TYPE(PgfPGF, struct); -typedef float prob_t; - -typedef struct { - prob_t prob; - PgfExpr expr; -} PgfExprProb; - -extern GU_DECLARE_TYPE(PgfExprProb, struct); - struct PgfFunDecl { PgfType* type; int arity; diff --git a/src/runtime/c/pgf/expr.c b/src/runtime/c/pgf/expr.c index 8f2fc875e..a90e9b474 100644 --- a/src/runtime/c/pgf/expr.c +++ b/src/runtime/c/pgf/expr.c @@ -1,4 +1,4 @@ -#include "expr.h" +#include "pgf.h" #include #include #include diff --git a/src/runtime/c/pgf/expr.h b/src/runtime/c/pgf/expr.h index f4d5881c7..4c1bddbae 100644 --- a/src/runtime/c/pgf/expr.h +++ b/src/runtime/c/pgf/expr.h @@ -5,7 +5,6 @@ #include #include #include -#include /// Abstract syntax trees /// @file @@ -125,6 +124,15 @@ typedef struct { PgfExpr expr; } PgfExprImplArg; +typedef float prob_t; + +typedef struct { + prob_t prob; + PgfExpr expr; +} PgfExprProb; + +extern GU_DECLARE_TYPE(PgfExprProb, struct); + int pgf_expr_arity(PgfExpr expr); diff --git a/src/runtime/c/pgf/lexer.c b/src/runtime/c/pgf/lexer.c index 05372eca0..acb4cd0c4 100644 --- a/src/runtime/c/pgf/lexer.c +++ b/src/runtime/c/pgf/lexer.c @@ -1,11 +1,13 @@ #include -#include +#include #include #include struct PgfLexer { GuReader* rdr; + GuPool* pool; GuUCS ucs; + PgfToken tok; }; PgfLexer* @@ -13,17 +15,17 @@ pgf_new_lexer(GuReader *rdr, GuPool *pool) { PgfLexer* lexer = gu_new(PgfLexer, pool); lexer->rdr = rdr; + lexer->pool = pool; lexer->ucs = ' '; + lexer->tok = gu_empty_string; return lexer; } PgfToken -pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool) +pgf_lexer_read_token(PgfLexer *lexer, GuExn* err) { GuPool* tmp_pool = gu_new_pool(); - PgfToken tok; - GuStringBuf* buf = gu_string_buf(tmp_pool); GuWriter* wtr = gu_string_buf_writer(buf); @@ -109,8 +111,14 @@ pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool) } stop: - tok = gu_string_buf_freeze(buf, pool); + lexer->tok = gu_string_buf_freeze(buf, lexer->pool); gu_pool_free(tmp_pool); - return tok; + return lexer->tok; +} + +PgfToken +pgf_lexer_current_token(PgfLexer *lexer) +{ + return lexer->tok; } diff --git a/src/runtime/c/pgf/lexer.h b/src/runtime/c/pgf/lexer.h index 9bead9c7e..6f01d4d10 100644 --- a/src/runtime/c/pgf/lexer.h +++ b/src/runtime/c/pgf/lexer.h @@ -2,7 +2,9 @@ #define PGF_LEXER_H_ #include -#include + +/// A single lexical token +typedef GuString PgfToken; typedef struct PgfLexer PgfLexer; @@ -10,6 +12,9 @@ PgfLexer* pgf_new_lexer(GuReader *rdr, GuPool *pool); PgfToken -pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool); +pgf_lexer_read_token(PgfLexer *lexer, GuExn* err); + +PgfToken +pgf_lexer_current_token(PgfLexer *lexer); #endif // PGF_LEXER_H_ diff --git a/src/runtime/c/pgf/pgf.c b/src/runtime/c/pgf/pgf.c index 6e54193dd..ceeff23bf 100644 --- a/src/runtime/c/pgf/pgf.c +++ b/src/runtime/c/pgf/pgf.c @@ -2,8 +2,12 @@ #include #include #include +#include +#include +#include #include #include +#include #include #include @@ -167,3 +171,73 @@ pgf_print_name(PgfConcr* concr, PgfCId id) name = id; return name; } + +void +pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err) +{ + GuPool* tmp_pool = gu_local_pool(); + + GuEnum* cts = + pgf_lzr_concretize(concr, expr, tmp_pool); + PgfCncTree ctree = gu_next(cts, PgfCncTree, tmp_pool); + if (!gu_variant_is_null(ctree)) { + pgf_lzr_linearize_simple(concr, ctree, 0, wtr, err); + } + + gu_pool_free(tmp_pool); +} + +GuEnum* +pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool) +{ + // Begin parsing a sentence of the specified category + PgfParseState* state = + pgf_parser_init_state(concr, cat, 0, pool); + if (state == NULL) { + return NULL; + } + + // Tokenization + GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool); + PgfToken tok = pgf_lexer_read_token(lexer, lex_err); + while (!gu_exn_is_raised(lex_err)) { + // feed the token to get a new parse state + state = pgf_parser_next_state(state, tok, pool); + if (state == NULL) { + return NULL; + } + + tok = pgf_lexer_read_token(lexer, lex_err); + } + + // Now begin enumerating the resulting syntax trees + return pgf_parse_result(state, pool); +} + +void +pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool) +{ + // Begin parsing a sentence of the specified category + PgfParseState* state = + pgf_parser_init_state(concr, cat, 0, pool); + if (state == NULL) { + printf("\n"); + return; + } + + // Tokenization + GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool); + PgfToken tok = pgf_lexer_read_token(lexer, lex_err); + while (!gu_exn_is_raised(lex_err)) { + // feed the token to get a new parse state + state = pgf_parser_next_state(state, tok, pool); + if (state == NULL) { + printf("\n"); + return; + } + + tok = pgf_lexer_read_token(lexer, lex_err); + } + + pgf_parse_print_chunks(state); +} diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index 40b290617..9963534b5 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -37,19 +38,21 @@ extern GU_DECLARE_TYPE(PgfCId, typedef); extern GU_DECLARE_TYPE(PgfExn, abstract); - -/// A single lexical token -typedef GuString PgfToken; - /// @name PGF Grammar objects /// @{ typedef struct PgfPGF PgfPGF; +extern GU_DECLARE_TYPE(PgfPGF, struct); + typedef struct PgfConcr PgfConcr; +extern GU_DECLARE_TYPE(PgfConcr, struct); + /**< A representation of a PGF grammar. */ +#include +#include PgfPGF* pgf_read(const char* fpath, @@ -103,8 +106,16 @@ pgf_iter_functions_by_cat(PgfPGF* pgf, PgfCId catname, GuString pgf_print_name(PgfConcr*, PgfCId id); -#include -extern GU_DECLARE_TYPE(PgfPGF, struct); +void +pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err); + +GuEnum* +pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool); + +// an experimental function. Please don't use it +void +pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool); + /// @} diff --git a/src/runtime/c/utils/pgf-chunk.c b/src/runtime/c/utils/pgf-chunk.c index d5e203368..575534cd3 100644 --- a/src/runtime/c/utils/pgf-chunk.c +++ b/src/runtime/c/utils/pgf-chunk.c @@ -70,14 +70,6 @@ int main(int argc, char* argv[]) { pgf_parser_add_literal(from_concr, gu_str_string("Symb", pool), &pgf_nerc_literal_callback); - // Create an output stream for stdout - GuOut* out = gu_file_out(stdout, pool); - - // Locale-encoding writers are currently unsupported - // GuWriter* wtr = gu_locale_writer(out, pool); - // Use a writer with hard-coded utf-8 encoding for now. - GuWriter* wtr = gu_new_utf8_writer(out, pool); - // We will keep the latest results in the 'ppool' and // we will iterate over them by using 'result'. GuPool* ppool = NULL; @@ -103,42 +95,15 @@ int main(int argc, char* argv[]) { // sentence, so our memory usage doesn't increase over time. ppool = gu_new_pool(); - // Begin parsing a sentence of the specified category - PgfParseState* state = - pgf_parser_init_state(from_concr, cat, 0, ppool); - if (state == NULL) { - fprintf(stderr, "Couldn't begin parsing\n"); - status = EXIT_FAILURE; - break; - } - GuReader *rdr = gu_string_reader(gu_str_string(line, ppool), ppool); PgfLexer *lexer = pgf_new_lexer(rdr, ppool); - // Tokenization - GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), ppool); - PgfToken tok = pgf_lexer_next_token(lexer, lex_err, ppool); - while (!gu_exn_is_raised(lex_err)) { - // feed the token to get a new parse state - state = pgf_parser_next_state(state, tok, ppool); - if (!state) { - gu_puts("Unexpected token: \"", wtr, err); - gu_string_write(tok, wtr, err); - gu_puts("\"\n", wtr, err); - goto fail_parse; - } - - tok = pgf_lexer_next_token(lexer, lex_err, ppool); - } - - pgf_parse_print_chunks(state); - continue; - fail_parse: + pgf_print_chunks(from_concr, cat, lexer, ppool); + // Free all resources allocated during parsing and linearization gu_pool_free(ppool); - ppool = NULL; } fail_concr: fail: diff --git a/src/runtime/c/utils/pgf-translate.c b/src/runtime/c/utils/pgf-translate.c index 2cf1dcfe7..03b3635f0 100644 --- a/src/runtime/c/utils/pgf-translate.c +++ b/src/runtime/c/utils/pgf-translate.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -160,40 +159,29 @@ int main(int argc, char* argv[]) { // sentence, so our memory usage doesn't increase over time. ppool = gu_new_pool(); - clock_t start = clock(); - - // Begin parsing a sentence of the specified category - PgfParseState* state = - pgf_parser_init_state(from_concr, cat, 0, ppool); - if (state == NULL) { - fprintf(stderr, "Couldn't begin parsing\n"); - status = EXIT_FAILURE; - break; - } - GuReader *rdr = gu_string_reader(gu_str_string(line, ppool), ppool); PgfLexer *lexer = pgf_new_lexer(rdr, ppool); - // Tokenization - GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), ppool); - PgfToken tok = pgf_lexer_next_token(lexer, lex_err, ppool); - while (!gu_exn_is_raised(lex_err)) { - // feed the token to get a new parse state - state = pgf_parser_next_state(state, tok, ppool); - if (!state) { + clock_t start = clock(); + + GuEnum* result = + pgf_parse(from_concr, cat, lexer, ppool); + if (result == NULL) { + PgfToken tok = + pgf_lexer_current_token(lexer); + + if (gu_string_eq(tok, gu_empty_string)) + gu_puts("Couldn't begin parsing", wtr, err); + else { gu_puts("Unexpected token: \"", wtr, err); gu_string_write(tok, wtr, err); gu_puts("\"\n", wtr, err); - goto fail_parse; } - - tok = pgf_lexer_next_token(lexer, lex_err, ppool); - } - // Now begin enumerating the resulting syntax trees - result = pgf_parse_result(state, ppool); + goto fail_parse; + } PgfExprProb* ep = gu_next(result, PgfExprProb*, ppool); @@ -201,8 +189,7 @@ int main(int argc, char* argv[]) { double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; printf("%.2f sec\n", cpu_time_used); - // The enumerator will return a null variant at the - // end of the results. + // The enumerator will return null at the end of the results. if (ep == NULL) { goto fail_parse; }