forked from GitHub/gf-core
a top-level API for parsing in the C runtime
This commit is contained in:
@@ -1,5 +1,4 @@
|
|||||||
#include "data.h"
|
#include "data.h"
|
||||||
#include "expr.h"
|
|
||||||
#include <gu/type.h>
|
#include <gu/type.h>
|
||||||
#include <gu/variant.h>
|
#include <gu/variant.h>
|
||||||
#include <gu/assert.h>
|
#include <gu/assert.h>
|
||||||
|
|||||||
@@ -27,7 +27,6 @@
|
|||||||
#include <gu/type.h>
|
#include <gu/type.h>
|
||||||
#include <gu/seq.h>
|
#include <gu/seq.h>
|
||||||
#include <pgf/pgf.h>
|
#include <pgf/pgf.h>
|
||||||
#include <pgf/expr.h>
|
|
||||||
|
|
||||||
typedef struct PgfCCat PgfCCat;
|
typedef struct PgfCCat PgfCCat;
|
||||||
typedef PgfCCat* PgfCCatId;
|
typedef PgfCCat* PgfCCatId;
|
||||||
@@ -123,15 +122,6 @@ struct PgfPGF {
|
|||||||
|
|
||||||
extern GU_DECLARE_TYPE(PgfPGF, struct);
|
extern GU_DECLARE_TYPE(PgfPGF, struct);
|
||||||
|
|
||||||
typedef float prob_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
prob_t prob;
|
|
||||||
PgfExpr expr;
|
|
||||||
} PgfExprProb;
|
|
||||||
|
|
||||||
extern GU_DECLARE_TYPE(PgfExprProb, struct);
|
|
||||||
|
|
||||||
struct PgfFunDecl {
|
struct PgfFunDecl {
|
||||||
PgfType* type;
|
PgfType* type;
|
||||||
int arity;
|
int arity;
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
#include "expr.h"
|
#include "pgf.h"
|
||||||
#include <gu/intern.h>
|
#include <gu/intern.h>
|
||||||
#include <gu/assert.h>
|
#include <gu/assert.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
|||||||
@@ -5,7 +5,6 @@
|
|||||||
#include <gu/write.h>
|
#include <gu/write.h>
|
||||||
#include <gu/variant.h>
|
#include <gu/variant.h>
|
||||||
#include <gu/seq.h>
|
#include <gu/seq.h>
|
||||||
#include <pgf/pgf.h>
|
|
||||||
|
|
||||||
/// Abstract syntax trees
|
/// Abstract syntax trees
|
||||||
/// @file
|
/// @file
|
||||||
@@ -125,6 +124,15 @@ typedef struct {
|
|||||||
PgfExpr expr;
|
PgfExpr expr;
|
||||||
} PgfExprImplArg;
|
} PgfExprImplArg;
|
||||||
|
|
||||||
|
typedef float prob_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
prob_t prob;
|
||||||
|
PgfExpr expr;
|
||||||
|
} PgfExprProb;
|
||||||
|
|
||||||
|
extern GU_DECLARE_TYPE(PgfExprProb, struct);
|
||||||
|
|
||||||
int
|
int
|
||||||
pgf_expr_arity(PgfExpr expr);
|
pgf_expr_arity(PgfExpr expr);
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
#include <gu/list.h>
|
#include <gu/list.h>
|
||||||
#include <pgf/lexer.h>
|
#include <pgf/pgf.h>
|
||||||
#include <pgf/data.h>
|
#include <pgf/data.h>
|
||||||
#include <wctype.h>
|
#include <wctype.h>
|
||||||
|
|
||||||
struct PgfLexer {
|
struct PgfLexer {
|
||||||
GuReader* rdr;
|
GuReader* rdr;
|
||||||
|
GuPool* pool;
|
||||||
GuUCS ucs;
|
GuUCS ucs;
|
||||||
|
PgfToken tok;
|
||||||
};
|
};
|
||||||
|
|
||||||
PgfLexer*
|
PgfLexer*
|
||||||
@@ -13,17 +15,17 @@ pgf_new_lexer(GuReader *rdr, GuPool *pool)
|
|||||||
{
|
{
|
||||||
PgfLexer* lexer = gu_new(PgfLexer, pool);
|
PgfLexer* lexer = gu_new(PgfLexer, pool);
|
||||||
lexer->rdr = rdr;
|
lexer->rdr = rdr;
|
||||||
|
lexer->pool = pool;
|
||||||
lexer->ucs = ' ';
|
lexer->ucs = ' ';
|
||||||
|
lexer->tok = gu_empty_string;
|
||||||
return lexer;
|
return lexer;
|
||||||
}
|
}
|
||||||
|
|
||||||
PgfToken
|
PgfToken
|
||||||
pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool)
|
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err)
|
||||||
{
|
{
|
||||||
GuPool* tmp_pool = gu_new_pool();
|
GuPool* tmp_pool = gu_new_pool();
|
||||||
|
|
||||||
PgfToken tok;
|
|
||||||
|
|
||||||
GuStringBuf* buf = gu_string_buf(tmp_pool);
|
GuStringBuf* buf = gu_string_buf(tmp_pool);
|
||||||
GuWriter* wtr = gu_string_buf_writer(buf);
|
GuWriter* wtr = gu_string_buf_writer(buf);
|
||||||
|
|
||||||
@@ -109,8 +111,14 @@ pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool)
|
|||||||
}
|
}
|
||||||
|
|
||||||
stop:
|
stop:
|
||||||
tok = gu_string_buf_freeze(buf, pool);
|
lexer->tok = gu_string_buf_freeze(buf, lexer->pool);
|
||||||
|
|
||||||
gu_pool_free(tmp_pool);
|
gu_pool_free(tmp_pool);
|
||||||
return tok;
|
return lexer->tok;
|
||||||
|
}
|
||||||
|
|
||||||
|
PgfToken
|
||||||
|
pgf_lexer_current_token(PgfLexer *lexer)
|
||||||
|
{
|
||||||
|
return lexer->tok;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,9 @@
|
|||||||
#define PGF_LEXER_H_
|
#define PGF_LEXER_H_
|
||||||
|
|
||||||
#include <gu/read.h>
|
#include <gu/read.h>
|
||||||
#include <pgf/data.h>
|
|
||||||
|
/// A single lexical token
|
||||||
|
typedef GuString PgfToken;
|
||||||
|
|
||||||
typedef struct PgfLexer PgfLexer;
|
typedef struct PgfLexer PgfLexer;
|
||||||
|
|
||||||
@@ -10,6 +12,9 @@ PgfLexer*
|
|||||||
pgf_new_lexer(GuReader *rdr, GuPool *pool);
|
pgf_new_lexer(GuReader *rdr, GuPool *pool);
|
||||||
|
|
||||||
PgfToken
|
PgfToken
|
||||||
pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool);
|
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err);
|
||||||
|
|
||||||
|
PgfToken
|
||||||
|
pgf_lexer_current_token(PgfLexer *lexer);
|
||||||
|
|
||||||
#endif // PGF_LEXER_H_
|
#endif // PGF_LEXER_H_
|
||||||
|
|||||||
@@ -2,8 +2,12 @@
|
|||||||
#include <pgf/data.h>
|
#include <pgf/data.h>
|
||||||
#include <pgf/expr.h>
|
#include <pgf/expr.h>
|
||||||
#include <pgf/reader.h>
|
#include <pgf/reader.h>
|
||||||
|
#include <pgf/linearize.h>
|
||||||
|
#include <pgf/parser.h>
|
||||||
|
#include <pgf/lexer.h>
|
||||||
#include <gu/file.h>
|
#include <gu/file.h>
|
||||||
#include <gu/string.h>
|
#include <gu/string.h>
|
||||||
|
#include <gu/enum.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
@@ -167,3 +171,73 @@ pgf_print_name(PgfConcr* concr, PgfCId id)
|
|||||||
name = id;
|
name = id;
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err)
|
||||||
|
{
|
||||||
|
GuPool* tmp_pool = gu_local_pool();
|
||||||
|
|
||||||
|
GuEnum* cts =
|
||||||
|
pgf_lzr_concretize(concr, expr, tmp_pool);
|
||||||
|
PgfCncTree ctree = gu_next(cts, PgfCncTree, tmp_pool);
|
||||||
|
if (!gu_variant_is_null(ctree)) {
|
||||||
|
pgf_lzr_linearize_simple(concr, ctree, 0, wtr, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
gu_pool_free(tmp_pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
GuEnum*
|
||||||
|
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
|
||||||
|
{
|
||||||
|
// Begin parsing a sentence of the specified category
|
||||||
|
PgfParseState* state =
|
||||||
|
pgf_parser_init_state(concr, cat, 0, pool);
|
||||||
|
if (state == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tokenization
|
||||||
|
GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool);
|
||||||
|
PgfToken tok = pgf_lexer_read_token(lexer, lex_err);
|
||||||
|
while (!gu_exn_is_raised(lex_err)) {
|
||||||
|
// feed the token to get a new parse state
|
||||||
|
state = pgf_parser_next_state(state, tok, pool);
|
||||||
|
if (state == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
tok = pgf_lexer_read_token(lexer, lex_err);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now begin enumerating the resulting syntax trees
|
||||||
|
return pgf_parse_result(state, pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
|
||||||
|
{
|
||||||
|
// Begin parsing a sentence of the specified category
|
||||||
|
PgfParseState* state =
|
||||||
|
pgf_parser_init_state(concr, cat, 0, pool);
|
||||||
|
if (state == NULL) {
|
||||||
|
printf("\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tokenization
|
||||||
|
GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool);
|
||||||
|
PgfToken tok = pgf_lexer_read_token(lexer, lex_err);
|
||||||
|
while (!gu_exn_is_raised(lex_err)) {
|
||||||
|
// feed the token to get a new parse state
|
||||||
|
state = pgf_parser_next_state(state, tok, pool);
|
||||||
|
if (state == NULL) {
|
||||||
|
printf("\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
tok = pgf_lexer_read_token(lexer, lex_err);
|
||||||
|
}
|
||||||
|
|
||||||
|
pgf_parse_print_chunks(state);
|
||||||
|
}
|
||||||
|
|||||||
@@ -28,6 +28,7 @@
|
|||||||
#include <gu/exn.h>
|
#include <gu/exn.h>
|
||||||
#include <gu/mem.h>
|
#include <gu/mem.h>
|
||||||
#include <gu/map.h>
|
#include <gu/map.h>
|
||||||
|
#include <gu/enum.h>
|
||||||
#include <gu/string.h>
|
#include <gu/string.h>
|
||||||
|
|
||||||
|
|
||||||
@@ -37,19 +38,21 @@ extern GU_DECLARE_TYPE(PgfCId, typedef);
|
|||||||
|
|
||||||
extern GU_DECLARE_TYPE(PgfExn, abstract);
|
extern GU_DECLARE_TYPE(PgfExn, abstract);
|
||||||
|
|
||||||
|
|
||||||
/// A single lexical token
|
|
||||||
typedef GuString PgfToken;
|
|
||||||
|
|
||||||
/// @name PGF Grammar objects
|
/// @name PGF Grammar objects
|
||||||
/// @{
|
/// @{
|
||||||
|
|
||||||
typedef struct PgfPGF PgfPGF;
|
typedef struct PgfPGF PgfPGF;
|
||||||
|
extern GU_DECLARE_TYPE(PgfPGF, struct);
|
||||||
|
|
||||||
typedef struct PgfConcr PgfConcr;
|
typedef struct PgfConcr PgfConcr;
|
||||||
|
extern GU_DECLARE_TYPE(PgfConcr, struct);
|
||||||
|
|
||||||
|
|
||||||
/**< A representation of a PGF grammar.
|
/**< A representation of a PGF grammar.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <pgf/expr.h>
|
||||||
|
#include <pgf/lexer.h>
|
||||||
|
|
||||||
PgfPGF*
|
PgfPGF*
|
||||||
pgf_read(const char* fpath,
|
pgf_read(const char* fpath,
|
||||||
@@ -103,8 +106,16 @@ pgf_iter_functions_by_cat(PgfPGF* pgf, PgfCId catname,
|
|||||||
GuString
|
GuString
|
||||||
pgf_print_name(PgfConcr*, PgfCId id);
|
pgf_print_name(PgfConcr*, PgfCId id);
|
||||||
|
|
||||||
#include <gu/type.h>
|
void
|
||||||
extern GU_DECLARE_TYPE(PgfPGF, struct);
|
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
|
||||||
|
|
||||||
|
GuEnum*
|
||||||
|
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
|
||||||
|
|
||||||
|
// an experimental function. Please don't use it
|
||||||
|
void
|
||||||
|
pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
|
||||||
|
|
||||||
|
|
||||||
/// @}
|
/// @}
|
||||||
|
|
||||||
|
|||||||
@@ -70,14 +70,6 @@ int main(int argc, char* argv[]) {
|
|||||||
pgf_parser_add_literal(from_concr, gu_str_string("Symb", pool),
|
pgf_parser_add_literal(from_concr, gu_str_string("Symb", pool),
|
||||||
&pgf_nerc_literal_callback);
|
&pgf_nerc_literal_callback);
|
||||||
|
|
||||||
// Create an output stream for stdout
|
|
||||||
GuOut* out = gu_file_out(stdout, pool);
|
|
||||||
|
|
||||||
// Locale-encoding writers are currently unsupported
|
|
||||||
// GuWriter* wtr = gu_locale_writer(out, pool);
|
|
||||||
// Use a writer with hard-coded utf-8 encoding for now.
|
|
||||||
GuWriter* wtr = gu_new_utf8_writer(out, pool);
|
|
||||||
|
|
||||||
// We will keep the latest results in the 'ppool' and
|
// We will keep the latest results in the 'ppool' and
|
||||||
// we will iterate over them by using 'result'.
|
// we will iterate over them by using 'result'.
|
||||||
GuPool* ppool = NULL;
|
GuPool* ppool = NULL;
|
||||||
@@ -103,42 +95,15 @@ int main(int argc, char* argv[]) {
|
|||||||
// sentence, so our memory usage doesn't increase over time.
|
// sentence, so our memory usage doesn't increase over time.
|
||||||
ppool = gu_new_pool();
|
ppool = gu_new_pool();
|
||||||
|
|
||||||
// Begin parsing a sentence of the specified category
|
|
||||||
PgfParseState* state =
|
|
||||||
pgf_parser_init_state(from_concr, cat, 0, ppool);
|
|
||||||
if (state == NULL) {
|
|
||||||
fprintf(stderr, "Couldn't begin parsing\n");
|
|
||||||
status = EXIT_FAILURE;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
GuReader *rdr =
|
GuReader *rdr =
|
||||||
gu_string_reader(gu_str_string(line, ppool), ppool);
|
gu_string_reader(gu_str_string(line, ppool), ppool);
|
||||||
PgfLexer *lexer =
|
PgfLexer *lexer =
|
||||||
pgf_new_lexer(rdr, ppool);
|
pgf_new_lexer(rdr, ppool);
|
||||||
|
|
||||||
// Tokenization
|
pgf_print_chunks(from_concr, cat, lexer, ppool);
|
||||||
GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), ppool);
|
|
||||||
PgfToken tok = pgf_lexer_next_token(lexer, lex_err, ppool);
|
|
||||||
while (!gu_exn_is_raised(lex_err)) {
|
|
||||||
// feed the token to get a new parse state
|
|
||||||
state = pgf_parser_next_state(state, tok, ppool);
|
|
||||||
if (!state) {
|
|
||||||
gu_puts("Unexpected token: \"", wtr, err);
|
|
||||||
gu_string_write(tok, wtr, err);
|
|
||||||
gu_puts("\"\n", wtr, err);
|
|
||||||
goto fail_parse;
|
|
||||||
}
|
|
||||||
|
|
||||||
tok = pgf_lexer_next_token(lexer, lex_err, ppool);
|
|
||||||
}
|
|
||||||
|
|
||||||
pgf_parse_print_chunks(state);
|
|
||||||
continue;
|
|
||||||
fail_parse:
|
|
||||||
// Free all resources allocated during parsing and linearization
|
// Free all resources allocated during parsing and linearization
|
||||||
gu_pool_free(ppool);
|
gu_pool_free(ppool);
|
||||||
ppool = NULL;
|
|
||||||
}
|
}
|
||||||
fail_concr:
|
fail_concr:
|
||||||
fail:
|
fail:
|
||||||
|
|||||||
@@ -9,7 +9,6 @@
|
|||||||
#include <pgf/lexer.h>
|
#include <pgf/lexer.h>
|
||||||
#include <pgf/literals.h>
|
#include <pgf/literals.h>
|
||||||
#include <pgf/linearize.h>
|
#include <pgf/linearize.h>
|
||||||
#include <pgf/expr.h>
|
|
||||||
#include <pgf/edsl.h>
|
#include <pgf/edsl.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@@ -160,49 +159,37 @@ int main(int argc, char* argv[]) {
|
|||||||
// sentence, so our memory usage doesn't increase over time.
|
// sentence, so our memory usage doesn't increase over time.
|
||||||
ppool = gu_new_pool();
|
ppool = gu_new_pool();
|
||||||
|
|
||||||
clock_t start = clock();
|
|
||||||
|
|
||||||
// Begin parsing a sentence of the specified category
|
|
||||||
PgfParseState* state =
|
|
||||||
pgf_parser_init_state(from_concr, cat, 0, ppool);
|
|
||||||
if (state == NULL) {
|
|
||||||
fprintf(stderr, "Couldn't begin parsing\n");
|
|
||||||
status = EXIT_FAILURE;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
GuReader *rdr =
|
GuReader *rdr =
|
||||||
gu_string_reader(gu_str_string(line, ppool), ppool);
|
gu_string_reader(gu_str_string(line, ppool), ppool);
|
||||||
PgfLexer *lexer =
|
PgfLexer *lexer =
|
||||||
pgf_new_lexer(rdr, ppool);
|
pgf_new_lexer(rdr, ppool);
|
||||||
|
|
||||||
// Tokenization
|
clock_t start = clock();
|
||||||
GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), ppool);
|
|
||||||
PgfToken tok = pgf_lexer_next_token(lexer, lex_err, ppool);
|
GuEnum* result =
|
||||||
while (!gu_exn_is_raised(lex_err)) {
|
pgf_parse(from_concr, cat, lexer, ppool);
|
||||||
// feed the token to get a new parse state
|
if (result == NULL) {
|
||||||
state = pgf_parser_next_state(state, tok, ppool);
|
PgfToken tok =
|
||||||
if (!state) {
|
pgf_lexer_current_token(lexer);
|
||||||
|
|
||||||
|
if (gu_string_eq(tok, gu_empty_string))
|
||||||
|
gu_puts("Couldn't begin parsing", wtr, err);
|
||||||
|
else {
|
||||||
gu_puts("Unexpected token: \"", wtr, err);
|
gu_puts("Unexpected token: \"", wtr, err);
|
||||||
gu_string_write(tok, wtr, err);
|
gu_string_write(tok, wtr, err);
|
||||||
gu_puts("\"\n", wtr, err);
|
gu_puts("\"\n", wtr, err);
|
||||||
|
}
|
||||||
|
|
||||||
goto fail_parse;
|
goto fail_parse;
|
||||||
}
|
}
|
||||||
|
|
||||||
tok = pgf_lexer_next_token(lexer, lex_err, ppool);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now begin enumerating the resulting syntax trees
|
|
||||||
result = pgf_parse_result(state, ppool);
|
|
||||||
|
|
||||||
PgfExprProb* ep = gu_next(result, PgfExprProb*, ppool);
|
PgfExprProb* ep = gu_next(result, PgfExprProb*, ppool);
|
||||||
|
|
||||||
clock_t end = clock();
|
clock_t end = clock();
|
||||||
double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
|
double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
|
||||||
printf("%.2f sec\n", cpu_time_used);
|
printf("%.2f sec\n", cpu_time_used);
|
||||||
|
|
||||||
// The enumerator will return a null variant at the
|
// The enumerator will return null at the end of the results.
|
||||||
// end of the results.
|
|
||||||
if (ep == NULL) {
|
if (ep == NULL) {
|
||||||
goto fail_parse;
|
goto fail_parse;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user