From c1b2246fa93fffa7c03fc2ec2a165315500a23fe Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Fri, 9 Mar 2012 09:14:44 +0000 Subject: [PATCH] libpgf: added simple lexer --- src/runtime/c/Makefile.am | 3 + src/runtime/c/pgf/lexer.c | 103 ++++++++++++++++++++++++++++ src/runtime/c/pgf/lexer.h | 15 ++++ src/runtime/c/pgf/parser.c | 4 +- src/runtime/c/utils/pgf-translate.c | 23 ++++--- 5 files changed, 138 insertions(+), 10 deletions(-) create mode 100644 src/runtime/c/pgf/lexer.c create mode 100644 src/runtime/c/pgf/lexer.h diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am index 710dd7330..35d79afea 100644 --- a/src/runtime/c/Makefile.am +++ b/src/runtime/c/Makefile.am @@ -47,6 +47,7 @@ pgfinclude_HEADERS = \ pgf/expr.h \ pgf/linearize.h \ pgf/parser.h \ + pgf/lexer.h \ pgf/pgf.h libgu_la_SOURCES = \ @@ -87,6 +88,8 @@ libpgf_la_SOURCES = \ pgf/expr.h \ pgf/parser.c \ pgf/parser.h \ + pgf/lexer.c \ + pgf/lexer.h \ pgf/reader.c \ pgf/linearize.c \ pgf/printer.c diff --git a/src/runtime/c/pgf/lexer.c b/src/runtime/c/pgf/lexer.c new file mode 100644 index 000000000..fd196bc1b --- /dev/null +++ b/src/runtime/c/pgf/lexer.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include + +struct PgfLexer { + GuReader* rdr; + GuUCS ucs; +}; + +PgfLexer* +pgf_new_lexer(GuReader *rdr, GuPool *pool) +{ + PgfLexer* lexer = gu_new(PgfLexer, pool); + lexer->rdr = rdr; + lexer->ucs = ' '; + return lexer; +} + +PgfToken +pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool) +{ + GuPool* tmp_pool = gu_new_pool(); + + PgfToken tok; + + GuStringBuf* buf = gu_string_buf(tmp_pool); + GuWriter* wtr = gu_string_buf_writer(buf); + + while (iswspace(lexer->ucs)) { + lexer->ucs = gu_read_ucs(lexer->rdr, err); + if (gu_exn_is_raised(err)) + goto stop; + } + + if (iswalpha(lexer->ucs) || + lexer->ucs == '\'' || + lexer->ucs == '_') { + do { + gu_ucs_write(lexer->ucs, wtr, err); + if (gu_exn_is_raised(err)) + goto stop; + lexer->ucs = gu_read_ucs(lexer->rdr, err); + if (gu_exn_is_raised(err)) + goto stop; + } while (iswalnum(lexer->ucs) || + lexer->ucs == '\'' || + lexer->ucs == '_'); + } else if (iswdigit(lexer->ucs) || lexer->ucs == '-') { + if (lexer->ucs == '-') { + gu_ucs_write(lexer->ucs, wtr, err); + if (gu_exn_is_raised(err)) + goto stop; + lexer->ucs = gu_read_ucs(lexer->rdr, err); + if (gu_exn_is_raised(err)) + goto stop; + + if (!iswdigit(lexer->ucs)) + goto stop; + } + + do { + gu_ucs_write(lexer->ucs, wtr, err); + if (gu_exn_is_raised(err)) + goto stop; + lexer->ucs = gu_read_ucs(lexer->rdr, err); + if (gu_exn_is_raised(err)) + goto stop; + } while (iswdigit(lexer->ucs)); + + if (lexer->ucs == '.') { + gu_ucs_write(lexer->ucs, wtr, err); + if (gu_exn_is_raised(err)) + goto stop; + + lexer->ucs = gu_read_ucs(lexer->rdr, err); + if (gu_exn_is_raised(err)) + goto stop; + + while (iswdigit(lexer->ucs)) { + gu_ucs_write(lexer->ucs, wtr, err); + if (gu_exn_is_raised(err)) + goto stop; + lexer->ucs = gu_read_ucs(lexer->rdr, err); + if (gu_exn_is_raised(err)) + goto stop; + } + } + } else { + gu_ucs_write(lexer->ucs, wtr, err); + if (gu_exn_is_raised(err)) + goto stop; + lexer->ucs = gu_read_ucs(lexer->rdr, err); + if (gu_exn_is_raised(err)) + goto stop; + } + +stop: + tok = gu_string_buf_freeze(buf, pool); + + gu_pool_free(tmp_pool); + return tok; +} diff --git a/src/runtime/c/pgf/lexer.h b/src/runtime/c/pgf/lexer.h new file mode 100644 index 000000000..9bead9c7e --- /dev/null +++ b/src/runtime/c/pgf/lexer.h @@ -0,0 +1,15 @@ +#ifndef PGF_LEXER_H_ +#define PGF_LEXER_H_ + +#include +#include + +typedef struct PgfLexer PgfLexer; + +PgfLexer* +pgf_new_lexer(GuReader *rdr, GuPool *pool); + +PgfToken +pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool); + +#endif // PGF_LEXER_H_ diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 8190a9df1..5cd6e2bda 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -953,8 +953,8 @@ typedef struct { GuPool *pool; } PgfParseTokenCallback; -static -void pgf_match_token(PgfLexCallback* self, PgfToken tok, PgfItem* item) +static void +pgf_match_token(PgfLexCallback* self, PgfToken tok, PgfItem* item) { PgfParseTokenCallback *clo = (PgfParseTokenCallback *) self; diff --git a/src/runtime/c/utils/pgf-translate.c b/src/runtime/c/utils/pgf-translate.c index 751bcf15b..b2a8bae59 100644 --- a/src/runtime/c/utils/pgf-translate.c +++ b/src/runtime/c/utils/pgf-translate.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -121,20 +122,26 @@ int main(int argc, char* argv[]) { status = EXIT_FAILURE; break; } + + GuReader *rdr = + gu_string_reader(gu_str_string(line, pool), pool); + PgfLexer *lexer = + pgf_new_lexer(rdr, pool); // naive tokenization - char* tok = strtok(line, " \n"); - while (tok) { - GuString tok_s = gu_str_string(tok, pool); - gu_debug("parsing token \"%s\"", tok); + GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool); + PgfToken tok = pgf_lexer_next_token(lexer, lex_err, pool); + while (!gu_exn_is_raised(lex_err)) { // feed the token to get a new parse state - parse = pgf_parse_token(parse, tok_s, robust_mode, ppool); + parse = pgf_parse_token(parse, tok, robust_mode, ppool); if (!parse) { - fprintf(stderr, - "Unexpected token: \"%s\"\n", tok); + gu_puts("Unexpected token: \"", wtr, err); + gu_string_write(tok, wtr, err); + gu_puts("\"\n", wtr, err); goto fail_parse; } - tok = strtok(NULL, " \n"); + + tok = pgf_lexer_next_token(lexer, lex_err, pool); } if (robust_mode) {