libpgf: added simple lexer

This commit is contained in:
kr.angelov
2012-03-09 09:14:44 +00:00
parent f5737e8222
commit c1b2246fa9
5 changed files with 138 additions and 10 deletions

View File

@@ -47,6 +47,7 @@ pgfinclude_HEADERS = \
pgf/expr.h \
pgf/linearize.h \
pgf/parser.h \
pgf/lexer.h \
pgf/pgf.h
libgu_la_SOURCES = \
@@ -87,6 +88,8 @@ libpgf_la_SOURCES = \
pgf/expr.h \
pgf/parser.c \
pgf/parser.h \
pgf/lexer.c \
pgf/lexer.h \
pgf/reader.c \
pgf/linearize.c \
pgf/printer.c

103
src/runtime/c/pgf/lexer.c Normal file
View File

@@ -0,0 +1,103 @@
#include <gu/list.h>
#include <pgf/lexer.h>
#include <pgf/data.h>
#include <wctype.h>
struct PgfLexer {
GuReader* rdr;
GuUCS ucs;
};
PgfLexer*
pgf_new_lexer(GuReader *rdr, GuPool *pool)
{
PgfLexer* lexer = gu_new(PgfLexer, pool);
lexer->rdr = rdr;
lexer->ucs = ' ';
return lexer;
}
PgfToken
pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool)
{
GuPool* tmp_pool = gu_new_pool();
PgfToken tok;
GuStringBuf* buf = gu_string_buf(tmp_pool);
GuWriter* wtr = gu_string_buf_writer(buf);
while (iswspace(lexer->ucs)) {
lexer->ucs = gu_read_ucs(lexer->rdr, err);
if (gu_exn_is_raised(err))
goto stop;
}
if (iswalpha(lexer->ucs) ||
lexer->ucs == '\'' ||
lexer->ucs == '_') {
do {
gu_ucs_write(lexer->ucs, wtr, err);
if (gu_exn_is_raised(err))
goto stop;
lexer->ucs = gu_read_ucs(lexer->rdr, err);
if (gu_exn_is_raised(err))
goto stop;
} while (iswalnum(lexer->ucs) ||
lexer->ucs == '\'' ||
lexer->ucs == '_');
} else if (iswdigit(lexer->ucs) || lexer->ucs == '-') {
if (lexer->ucs == '-') {
gu_ucs_write(lexer->ucs, wtr, err);
if (gu_exn_is_raised(err))
goto stop;
lexer->ucs = gu_read_ucs(lexer->rdr, err);
if (gu_exn_is_raised(err))
goto stop;
if (!iswdigit(lexer->ucs))
goto stop;
}
do {
gu_ucs_write(lexer->ucs, wtr, err);
if (gu_exn_is_raised(err))
goto stop;
lexer->ucs = gu_read_ucs(lexer->rdr, err);
if (gu_exn_is_raised(err))
goto stop;
} while (iswdigit(lexer->ucs));
if (lexer->ucs == '.') {
gu_ucs_write(lexer->ucs, wtr, err);
if (gu_exn_is_raised(err))
goto stop;
lexer->ucs = gu_read_ucs(lexer->rdr, err);
if (gu_exn_is_raised(err))
goto stop;
while (iswdigit(lexer->ucs)) {
gu_ucs_write(lexer->ucs, wtr, err);
if (gu_exn_is_raised(err))
goto stop;
lexer->ucs = gu_read_ucs(lexer->rdr, err);
if (gu_exn_is_raised(err))
goto stop;
}
}
} else {
gu_ucs_write(lexer->ucs, wtr, err);
if (gu_exn_is_raised(err))
goto stop;
lexer->ucs = gu_read_ucs(lexer->rdr, err);
if (gu_exn_is_raised(err))
goto stop;
}
stop:
tok = gu_string_buf_freeze(buf, pool);
gu_pool_free(tmp_pool);
return tok;
}

15
src/runtime/c/pgf/lexer.h Normal file
View File

@@ -0,0 +1,15 @@
#ifndef PGF_LEXER_H_
#define PGF_LEXER_H_
#include <gu/read.h>
#include <pgf/data.h>
typedef struct PgfLexer PgfLexer;
PgfLexer*
pgf_new_lexer(GuReader *rdr, GuPool *pool);
PgfToken
pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool);
#endif // PGF_LEXER_H_

View File

@@ -953,8 +953,8 @@ typedef struct {
GuPool *pool;
} PgfParseTokenCallback;
static
void pgf_match_token(PgfLexCallback* self, PgfToken tok, PgfItem* item)
static void
pgf_match_token(PgfLexCallback* self, PgfToken tok, PgfItem* item)
{
PgfParseTokenCallback *clo = (PgfParseTokenCallback *) self;

View File

@@ -7,6 +7,7 @@
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <pgf/parser.h>
#include <pgf/lexer.h>
#include <pgf/linearize.h>
#include <pgf/expr.h>
#include <pgf/edsl.h>
@@ -121,20 +122,26 @@ int main(int argc, char* argv[]) {
status = EXIT_FAILURE;
break;
}
GuReader *rdr =
gu_string_reader(gu_str_string(line, pool), pool);
PgfLexer *lexer =
pgf_new_lexer(rdr, pool);
// naive tokenization
char* tok = strtok(line, " \n");
while (tok) {
GuString tok_s = gu_str_string(tok, pool);
gu_debug("parsing token \"%s\"", tok);
GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool);
PgfToken tok = pgf_lexer_next_token(lexer, lex_err, pool);
while (!gu_exn_is_raised(lex_err)) {
// feed the token to get a new parse state
parse = pgf_parse_token(parse, tok_s, robust_mode, ppool);
parse = pgf_parse_token(parse, tok, robust_mode, ppool);
if (!parse) {
fprintf(stderr,
"Unexpected token: \"%s\"\n", tok);
gu_puts("Unexpected token: \"", wtr, err);
gu_string_write(tok, wtr, err);
gu_puts("\"\n", wtr, err);
goto fail_parse;
}
tok = strtok(NULL, " \n");
tok = pgf_lexer_next_token(lexer, lex_err, pool);
}
if (robust_mode) {