mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-22 19:22:50 -06:00
This patch removes Gregoire's parse_tokens function in the python binding and adds another implementation which builds on the existing API for lexers in the C runtime. Now it is possible to write incremental Lexers in Python
This commit is contained in:
@@ -3,26 +3,15 @@
|
||||
#include <pgf/data.h>
|
||||
#include <wctype.h>
|
||||
|
||||
struct PgfLexer {
|
||||
typedef struct {
|
||||
PgfLexer base;
|
||||
GuReader* rdr;
|
||||
GuPool* pool;
|
||||
GuUCS ucs;
|
||||
PgfToken tok;
|
||||
};
|
||||
|
||||
PgfLexer*
|
||||
pgf_new_lexer(GuReader *rdr, GuPool *pool)
|
||||
{
|
||||
PgfLexer* lexer = gu_new(PgfLexer, pool);
|
||||
lexer->rdr = rdr;
|
||||
lexer->pool = pool;
|
||||
lexer->ucs = ' ';
|
||||
lexer->tok = gu_empty_string;
|
||||
return lexer;
|
||||
}
|
||||
} PgfSimpleLexer;
|
||||
|
||||
static void
|
||||
pgf_lexer_read_ucs(PgfLexer *lexer, GuExn* err)
|
||||
pgf_lexer_read_ucs(PgfSimpleLexer *lexer, GuExn* err)
|
||||
{
|
||||
lexer->ucs = gu_read_ucs(lexer->rdr, err);
|
||||
if (gu_exn_is_raised(err)) {
|
||||
@@ -31,9 +20,10 @@ pgf_lexer_read_ucs(PgfLexer *lexer, GuExn* err)
|
||||
}
|
||||
}
|
||||
|
||||
PgfToken
|
||||
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err)
|
||||
static PgfToken
|
||||
pgf_simple_lexer_read_token(PgfLexer *base, GuExn* err)
|
||||
{
|
||||
PgfSimpleLexer* lexer = (PgfSimpleLexer*) base;
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
|
||||
GuStringBuf* buf = gu_string_buf(tmp_pool);
|
||||
@@ -107,10 +97,28 @@ pgf_lexer_read_token(PgfLexer *lexer, GuExn* err)
|
||||
}
|
||||
|
||||
stop:
|
||||
lexer->tok = gu_string_buf_freeze(buf, lexer->pool);
|
||||
lexer->base.tok = gu_string_buf_freeze(buf, lexer->pool);
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
return lexer->tok;
|
||||
return lexer->base.tok;
|
||||
}
|
||||
|
||||
PgfLexer*
|
||||
pgf_new_simple_lexer(GuReader *rdr, GuPool *pool)
|
||||
{
|
||||
PgfSimpleLexer* lexer = gu_new(PgfSimpleLexer, pool);
|
||||
lexer->base.read_token = pgf_simple_lexer_read_token;
|
||||
lexer->base.tok = gu_empty_string;
|
||||
lexer->rdr = rdr;
|
||||
lexer->pool = pool;
|
||||
lexer->ucs = ' ';
|
||||
return ((PgfLexer*) lexer);
|
||||
}
|
||||
|
||||
PgfToken
|
||||
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err)
|
||||
{
|
||||
return lexer->read_token(lexer, err);
|
||||
}
|
||||
|
||||
PgfToken
|
||||
|
||||
@@ -6,10 +6,13 @@
|
||||
/// A single lexical token
|
||||
typedef GuString PgfToken;
|
||||
|
||||
typedef struct PgfLexer PgfLexer;
|
||||
typedef struct {
|
||||
PgfToken (*read_token)();
|
||||
PgfToken tok;
|
||||
} PgfLexer;
|
||||
|
||||
PgfLexer*
|
||||
pgf_new_lexer(GuReader *rdr, GuPool *pool);
|
||||
pgf_new_simple_lexer(GuReader *rdr, GuPool *pool);
|
||||
|
||||
PgfToken
|
||||
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err);
|
||||
|
||||
@@ -223,37 +223,13 @@ pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
|
||||
tok = pgf_lexer_read_token(lexer, lex_err);
|
||||
}
|
||||
|
||||
if (gu_exn_caught(lex_err) != gu_type(GuEOF))
|
||||
return NULL;
|
||||
|
||||
// Now begin enumerating the resulting syntax trees
|
||||
return pgf_parse_result(state, pool);
|
||||
}
|
||||
|
||||
// Same as previous but accept a list of tokens as input instead of a
|
||||
// lexer
|
||||
GuEnum*
|
||||
pgf_parse_tokens(PgfConcr* concr, PgfCId cat, char **tokens, int len, GuPool* pool)
|
||||
{
|
||||
// Begin parsing a sentence of the specified category
|
||||
PgfParseState* state =
|
||||
pgf_parser_init_state(concr, cat, 0, pool);
|
||||
if (state == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Parsing
|
||||
PgfToken tok;
|
||||
for (int i = 0; i < len; i++) {
|
||||
tok = gu_str_string(tokens[i], pool);
|
||||
|
||||
state = pgf_parser_next_state(state, tok, pool);
|
||||
if (state == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Now begin enumerating the resulting syntax trees
|
||||
return pgf_parse_result(state, pool);
|
||||
}
|
||||
|
||||
void
|
||||
pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
|
||||
{
|
||||
|
||||
@@ -115,9 +115,6 @@ pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
|
||||
PgfExprEnum*
|
||||
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
|
||||
|
||||
PgfExprEnum*
|
||||
pgf_parse_tokens(PgfConcr* concr, PgfCId cat, char* tokens[], int len, GuPool* pool);
|
||||
|
||||
PgfExprEnum*
|
||||
pgf_generate(PgfPGF* pgf, PgfCId cat, GuPool* pool);
|
||||
|
||||
|
||||
@@ -98,7 +98,7 @@ int main(int argc, char* argv[]) {
|
||||
GuReader *rdr =
|
||||
gu_string_reader(gu_str_string(line, ppool), ppool);
|
||||
PgfLexer *lexer =
|
||||
pgf_new_lexer(rdr, ppool);
|
||||
pgf_new_simple_lexer(rdr, ppool);
|
||||
|
||||
pgf_print_chunks(from_concr, cat, lexer, ppool);
|
||||
|
||||
|
||||
@@ -123,7 +123,7 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
|
||||
GuReader *rdr = gu_string_reader(gu_str_string(line, ppool), ppool);
|
||||
PgfLexer *lexer = pgf_new_lexer(rdr, ppool);
|
||||
PgfLexer *lexer = pgf_new_simple_lexer(rdr, ppool);
|
||||
GuEnum* result = pgf_parse(concr, cat, lexer, ppool);
|
||||
|
||||
PgfExprProb* ep = NULL;
|
||||
|
||||
@@ -164,7 +164,7 @@ int main(int argc, char* argv[]) {
|
||||
GuReader *rdr =
|
||||
gu_string_reader(gu_str_string(line, ppool), ppool);
|
||||
PgfLexer *lexer =
|
||||
pgf_new_lexer(rdr, ppool);
|
||||
pgf_new_simple_lexer(rdr, ppool);
|
||||
|
||||
clock_t start = clock();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user