This patch removes Gregoire's parse_tokens function in the python binding and adds another implementation which builds on the existing API for lexers in the C runtime. Now it is possible to write incremental Lexers in Python

This commit is contained in:
kr.angelov
2013-02-01 09:29:43 +00:00
parent eca4a28563
commit e9b5557c6c
8 changed files with 113 additions and 137 deletions

View File

@@ -3,26 +3,15 @@
#include <pgf/data.h>
#include <wctype.h>
struct PgfLexer {
typedef struct {
PgfLexer base;
GuReader* rdr;
GuPool* pool;
GuUCS ucs;
PgfToken tok;
};
PgfLexer*
pgf_new_lexer(GuReader *rdr, GuPool *pool)
{
PgfLexer* lexer = gu_new(PgfLexer, pool);
lexer->rdr = rdr;
lexer->pool = pool;
lexer->ucs = ' ';
lexer->tok = gu_empty_string;
return lexer;
}
} PgfSimpleLexer;
static void
pgf_lexer_read_ucs(PgfLexer *lexer, GuExn* err)
pgf_lexer_read_ucs(PgfSimpleLexer *lexer, GuExn* err)
{
lexer->ucs = gu_read_ucs(lexer->rdr, err);
if (gu_exn_is_raised(err)) {
@@ -31,9 +20,10 @@ pgf_lexer_read_ucs(PgfLexer *lexer, GuExn* err)
}
}
PgfToken
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err)
static PgfToken
pgf_simple_lexer_read_token(PgfLexer *base, GuExn* err)
{
PgfSimpleLexer* lexer = (PgfSimpleLexer*) base;
GuPool* tmp_pool = gu_new_pool();
GuStringBuf* buf = gu_string_buf(tmp_pool);
@@ -107,10 +97,28 @@ pgf_lexer_read_token(PgfLexer *lexer, GuExn* err)
}
stop:
lexer->tok = gu_string_buf_freeze(buf, lexer->pool);
lexer->base.tok = gu_string_buf_freeze(buf, lexer->pool);
gu_pool_free(tmp_pool);
return lexer->tok;
return lexer->base.tok;
}
PgfLexer*
pgf_new_simple_lexer(GuReader *rdr, GuPool *pool)
{
PgfSimpleLexer* lexer = gu_new(PgfSimpleLexer, pool);
lexer->base.read_token = pgf_simple_lexer_read_token;
lexer->base.tok = gu_empty_string;
lexer->rdr = rdr;
lexer->pool = pool;
lexer->ucs = ' ';
return ((PgfLexer*) lexer);
}
PgfToken
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err)
{
return lexer->read_token(lexer, err);
}
PgfToken

View File

@@ -6,10 +6,13 @@
/// A single lexical token
typedef GuString PgfToken;
typedef struct PgfLexer PgfLexer;
typedef struct {
PgfToken (*read_token)();
PgfToken tok;
} PgfLexer;
PgfLexer*
pgf_new_lexer(GuReader *rdr, GuPool *pool);
pgf_new_simple_lexer(GuReader *rdr, GuPool *pool);
PgfToken
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err);

View File

@@ -223,37 +223,13 @@ pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
tok = pgf_lexer_read_token(lexer, lex_err);
}
if (gu_exn_caught(lex_err) != gu_type(GuEOF))
return NULL;
// Now begin enumerating the resulting syntax trees
return pgf_parse_result(state, pool);
}
// Same as previous but accept a list of tokens as input instead of a
// lexer
GuEnum*
pgf_parse_tokens(PgfConcr* concr, PgfCId cat, char **tokens, int len, GuPool* pool)
{
// Begin parsing a sentence of the specified category
PgfParseState* state =
pgf_parser_init_state(concr, cat, 0, pool);
if (state == NULL) {
return NULL;
}
// Parsing
PgfToken tok;
for (int i = 0; i < len; i++) {
tok = gu_str_string(tokens[i], pool);
state = pgf_parser_next_state(state, tok, pool);
if (state == NULL) {
return NULL;
}
}
// Now begin enumerating the resulting syntax trees
return pgf_parse_result(state, pool);
}
void
pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
{

View File

@@ -115,9 +115,6 @@ pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
PgfExprEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
PgfExprEnum*
pgf_parse_tokens(PgfConcr* concr, PgfCId cat, char* tokens[], int len, GuPool* pool);
PgfExprEnum*
pgf_generate(PgfPGF* pgf, PgfCId cat, GuPool* pool);

View File

@@ -98,7 +98,7 @@ int main(int argc, char* argv[]) {
GuReader *rdr =
gu_string_reader(gu_str_string(line, ppool), ppool);
PgfLexer *lexer =
pgf_new_lexer(rdr, ppool);
pgf_new_simple_lexer(rdr, ppool);
pgf_print_chunks(from_concr, cat, lexer, ppool);

View File

@@ -123,7 +123,7 @@ int main(int argc, char* argv[]) {
}
GuReader *rdr = gu_string_reader(gu_str_string(line, ppool), ppool);
PgfLexer *lexer = pgf_new_lexer(rdr, ppool);
PgfLexer *lexer = pgf_new_simple_lexer(rdr, ppool);
GuEnum* result = pgf_parse(concr, cat, lexer, ppool);
PgfExprProb* ep = NULL;

View File

@@ -164,7 +164,7 @@ int main(int argc, char* argv[]) {
GuReader *rdr =
gu_string_reader(gu_str_string(line, ppool), ppool);
PgfLexer *lexer =
pgf_new_lexer(rdr, ppool);
pgf_new_simple_lexer(rdr, ppool);
clock_t start = clock();