a top-level API for parsing in the C runtime

This commit is contained in:
kr.angelov
2012-12-13 14:44:33 +00:00
parent 60942c440a
commit 2ba632dc9f
10 changed files with 138 additions and 91 deletions

View File

@@ -1,5 +1,4 @@
#include "data.h"
#include "expr.h"
#include <gu/type.h>
#include <gu/variant.h>
#include <gu/assert.h>

View File

@@ -27,7 +27,6 @@
#include <gu/type.h>
#include <gu/seq.h>
#include <pgf/pgf.h>
#include <pgf/expr.h>
typedef struct PgfCCat PgfCCat;
typedef PgfCCat* PgfCCatId;
@@ -123,15 +122,6 @@ struct PgfPGF {
extern GU_DECLARE_TYPE(PgfPGF, struct);
typedef float prob_t;
typedef struct {
prob_t prob;
PgfExpr expr;
} PgfExprProb;
extern GU_DECLARE_TYPE(PgfExprProb, struct);
struct PgfFunDecl {
PgfType* type;
int arity;

View File

@@ -1,4 +1,4 @@
#include "expr.h"
#include "pgf.h"
#include <gu/intern.h>
#include <gu/assert.h>
#include <ctype.h>

View File

@@ -5,7 +5,6 @@
#include <gu/write.h>
#include <gu/variant.h>
#include <gu/seq.h>
#include <pgf/pgf.h>
/// Abstract syntax trees
/// @file
@@ -125,6 +124,15 @@ typedef struct {
PgfExpr expr;
} PgfExprImplArg;
typedef float prob_t;
typedef struct {
prob_t prob;
PgfExpr expr;
} PgfExprProb;
extern GU_DECLARE_TYPE(PgfExprProb, struct);
int
pgf_expr_arity(PgfExpr expr);

View File

@@ -1,11 +1,13 @@
#include <gu/list.h>
#include <pgf/lexer.h>
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <wctype.h>
struct PgfLexer {
GuReader* rdr;
GuPool* pool;
GuUCS ucs;
PgfToken tok;
};
PgfLexer*
@@ -13,17 +15,17 @@ pgf_new_lexer(GuReader *rdr, GuPool *pool)
{
PgfLexer* lexer = gu_new(PgfLexer, pool);
lexer->rdr = rdr;
lexer->pool = pool;
lexer->ucs = ' ';
lexer->tok = gu_empty_string;
return lexer;
}
PgfToken
pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool)
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err)
{
GuPool* tmp_pool = gu_new_pool();
PgfToken tok;
GuStringBuf* buf = gu_string_buf(tmp_pool);
GuWriter* wtr = gu_string_buf_writer(buf);
@@ -109,8 +111,14 @@ pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool)
}
stop:
tok = gu_string_buf_freeze(buf, pool);
lexer->tok = gu_string_buf_freeze(buf, lexer->pool);
gu_pool_free(tmp_pool);
return tok;
return lexer->tok;
}
PgfToken
pgf_lexer_current_token(PgfLexer *lexer)
{
return lexer->tok;
}

View File

@@ -2,7 +2,9 @@
#define PGF_LEXER_H_
#include <gu/read.h>
#include <pgf/data.h>
/// A single lexical token
typedef GuString PgfToken;
typedef struct PgfLexer PgfLexer;
@@ -10,6 +12,9 @@ PgfLexer*
pgf_new_lexer(GuReader *rdr, GuPool *pool);
PgfToken
pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool);
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err);
PgfToken
pgf_lexer_current_token(PgfLexer *lexer);
#endif // PGF_LEXER_H_

View File

@@ -2,8 +2,12 @@
#include <pgf/data.h>
#include <pgf/expr.h>
#include <pgf/reader.h>
#include <pgf/linearize.h>
#include <pgf/parser.h>
#include <pgf/lexer.h>
#include <gu/file.h>
#include <gu/string.h>
#include <gu/enum.h>
#include <stdio.h>
#include <math.h>
@@ -167,3 +171,73 @@ pgf_print_name(PgfConcr* concr, PgfCId id)
name = id;
return name;
}
void
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err)
{
GuPool* tmp_pool = gu_local_pool();
GuEnum* cts =
pgf_lzr_concretize(concr, expr, tmp_pool);
PgfCncTree ctree = gu_next(cts, PgfCncTree, tmp_pool);
if (!gu_variant_is_null(ctree)) {
pgf_lzr_linearize_simple(concr, ctree, 0, wtr, err);
}
gu_pool_free(tmp_pool);
}
GuEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
{
// Begin parsing a sentence of the specified category
PgfParseState* state =
pgf_parser_init_state(concr, cat, 0, pool);
if (state == NULL) {
return NULL;
}
// Tokenization
GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool);
PgfToken tok = pgf_lexer_read_token(lexer, lex_err);
while (!gu_exn_is_raised(lex_err)) {
// feed the token to get a new parse state
state = pgf_parser_next_state(state, tok, pool);
if (state == NULL) {
return NULL;
}
tok = pgf_lexer_read_token(lexer, lex_err);
}
// Now begin enumerating the resulting syntax trees
return pgf_parse_result(state, pool);
}
void
pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
{
// Begin parsing a sentence of the specified category
PgfParseState* state =
pgf_parser_init_state(concr, cat, 0, pool);
if (state == NULL) {
printf("\n");
return;
}
// Tokenization
GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool);
PgfToken tok = pgf_lexer_read_token(lexer, lex_err);
while (!gu_exn_is_raised(lex_err)) {
// feed the token to get a new parse state
state = pgf_parser_next_state(state, tok, pool);
if (state == NULL) {
printf("\n");
return;
}
tok = pgf_lexer_read_token(lexer, lex_err);
}
pgf_parse_print_chunks(state);
}

View File

@@ -28,6 +28,7 @@
#include <gu/exn.h>
#include <gu/mem.h>
#include <gu/map.h>
#include <gu/enum.h>
#include <gu/string.h>
@@ -37,19 +38,21 @@ extern GU_DECLARE_TYPE(PgfCId, typedef);
extern GU_DECLARE_TYPE(PgfExn, abstract);
/// A single lexical token
typedef GuString PgfToken;
/// @name PGF Grammar objects
/// @{
typedef struct PgfPGF PgfPGF;
extern GU_DECLARE_TYPE(PgfPGF, struct);
typedef struct PgfConcr PgfConcr;
extern GU_DECLARE_TYPE(PgfConcr, struct);
/**< A representation of a PGF grammar.
*/
#include <pgf/expr.h>
#include <pgf/lexer.h>
PgfPGF*
pgf_read(const char* fpath,
@@ -103,8 +106,16 @@ pgf_iter_functions_by_cat(PgfPGF* pgf, PgfCId catname,
GuString
pgf_print_name(PgfConcr*, PgfCId id);
#include <gu/type.h>
extern GU_DECLARE_TYPE(PgfPGF, struct);
void
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
GuEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
// an experimental function. Please don't use it
void
pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
/// @}