the GF syntax for identifiers is exteded with quoted forms, i.e. you could write for instance 'ab.c' and then everything between the quites is identifier. This includes Unicode characters and non-ASCII symbols. This is useful for automatically generated GF grammars.

This commit is contained in:
kr.angelov
2013-11-22 13:30:18 +00:00
parent 1d2786f7da
commit 8bcc70eac8
8 changed files with 165 additions and 54 deletions

View File

@@ -1,5 +1,6 @@
#include "pgf.h"
#include <gu/assert.h>
#include <gu/utf8.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
@@ -166,6 +167,45 @@ pgf_expr_parser_getc(PgfExprParser* parser)
}
}
static bool
pgf_is_ident_first(GuUCS ucs)
{
return (ucs == '_') ||
(ucs >= 'a' && ucs <= 'z') ||
(ucs >= 'A' && ucs <= 'Z') ||
(ucs >= 192 && ucs <= 255 && ucs != 247 && ucs != 215);
}
static bool
pgf_is_ident_rest(GuUCS ucs)
{
return (ucs == '_') ||
(ucs == '\'') ||
(ucs >= '0' && ucs <= '9') ||
(ucs >= 'a' && ucs <= 'z') ||
(ucs >= 'A' && ucs <= 'Z') ||
(ucs >= 192 && ucs <= 255 && ucs != 247 && ucs != 215);
}
static bool
pgf_is_normal_ident(PgfCId id)
{
const uint8_t* p = (const uint8_t*) id;
GuUCS ucs = gu_utf8_decode(&p);
if (!pgf_is_ident_first(ucs))
return false;
for (;;) {
ucs = gu_utf8_decode(&p);
if (ucs == 0)
break;
if (!pgf_is_ident_rest(ucs))
return false;
}
return true;
}
static void
pgf_expr_parser_token(PgfExprParser* parser)
{
@@ -227,20 +267,32 @@ pgf_expr_parser_token(PgfExprParser* parser)
pgf_expr_parser_getc(parser);
parser->token_tag = PGF_TOKEN_COLON;
break;
case '_':
case '\'':
pgf_expr_parser_getc(parser);
parser->token_tag = PGF_TOKEN_WILD;
GuBuf* chars = gu_new_buf(char, parser->tmp_pool);
while (parser->ch != '\'' && parser->ch != EOF) {
if (parser->ch == '\\') {
pgf_expr_parser_getc(parser);
}
gu_buf_push(chars, char, parser->ch);
pgf_expr_parser_getc(parser);
}
if (parser->ch == '\'') {
pgf_expr_parser_getc(parser);
gu_buf_push(chars, char, 0);
parser->token_tag = PGF_TOKEN_IDENT;
parser->token_value = chars;
}
break;
default: {
GuBuf* chars = gu_new_buf(char, parser->tmp_pool);
if (isalpha(parser->ch)) {
while (isalnum(parser->ch) ||
parser->ch == '_' ||
parser->ch == '\'') {
if (pgf_is_ident_first(parser->ch)) {
do {
gu_buf_push(chars, char, parser->ch);
pgf_expr_parser_getc(parser);
}
} while (pgf_is_ident_rest(parser->ch));
gu_buf_push(chars, char, 0);
parser->token_tag = PGF_TOKEN_IDENT;
parser->token_value = chars;
@@ -268,7 +320,7 @@ pgf_expr_parser_token(PgfExprParser* parser)
}
} else if (parser->ch == '"') {
pgf_expr_parser_getc(parser);
while (parser->ch != '"' && parser->ch != EOF) {
gu_buf_push(chars, char, parser->ch);
pgf_expr_parser_getc(parser);
@@ -924,6 +976,30 @@ pgf_expr_hash(GuHash h, PgfExpr e)
return h;
}
void
pgf_print_cid(PgfCId id,
GuOut* out, GuExn* err)
{
if (pgf_is_normal_ident(id))
gu_string_write(id, out, err);
else {
gu_putc('\'', out, err);
const uint8_t* p = (const uint8_t*) id;
for (;;) {
GuUCS ucs = gu_utf8_decode(&p);
if (ucs == 0)
break;
if (ucs == '\'')
gu_puts("\\\'", out, err);
else if (ucs == '\\')
gu_puts("\\\\", out, err);
else
gu_out_utf8(ucs, out, err);
}
gu_putc('\'', out, err);
}
}
void
pgf_print_literal(PgfLiteral lit,
GuOut* out, GuExn* err)
@@ -973,7 +1049,7 @@ pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec,
if (abs->bind_type == PGF_BIND_TYPE_IMPLICIT) {
gu_putc('{', out, err);
}
gu_string_write(abs->id, out, err);
pgf_print_cid(abs->id, out, err);
if (abs->bind_type == PGF_BIND_TYPE_IMPLICIT) {
gu_putc('}', out, err);
}
@@ -1028,7 +1104,7 @@ pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec,
break;
case PGF_EXPR_FUN: {
PgfExprFun* fun = ei.data;
gu_string_write(fun->fun, out, err);
pgf_print_cid(fun->fun, out, err);
break;
}
case PGF_EXPR_VAR: {
@@ -1043,7 +1119,7 @@ pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec,
if (c == NULL) {
gu_printf(out, err, "#%d", evar->var);
} else {
gu_string_write(c->name, out, err);
pgf_print_cid(c->name, out, err);
}
break;
}
@@ -1074,7 +1150,7 @@ pgf_print_hypo(PgfHypo *hypo, PgfPrintContext* ctxt, int prec,
{
if (hypo->bind_type == PGF_BIND_TYPE_IMPLICIT) {
gu_puts("({", out, err);
gu_string_write(hypo->cid, out, err);
pgf_print_cid(hypo->cid, out, err);
gu_puts("} : ", out, err);
pgf_print_type(hypo->type, ctxt, 0, out, err);
gu_puts(")", out, err);
@@ -1083,7 +1159,7 @@ pgf_print_hypo(PgfHypo *hypo, PgfPrintContext* ctxt, int prec,
if (strcmp(hypo->cid, "_") != 0) {
gu_puts("(", out, err);
gu_string_write(hypo->cid, out, err);
pgf_print_cid(hypo->cid, out, err);
gu_puts(" : ", out, err);
pgf_print_type(hypo->type, ctxt, 0, out, err);
gu_puts(")", out, err);
@@ -1117,7 +1193,7 @@ pgf_print_type(PgfType *type, PgfPrintContext* ctxt, int prec,
gu_puts(" -> ", out, err);
}
gu_string_write(type->cid, out, err);
pgf_print_cid(type->cid, out, err);
for (size_t i = 0; i < type->n_exprs; i++) {
gu_puts(" ", out, err);
@@ -1143,7 +1219,7 @@ pgf_print_type(PgfType *type, PgfPrintContext* ctxt, int prec,
if (prec > 3) gu_putc(')', out, err);
} else {
gu_string_write(type->cid, out, err);
pgf_print_cid(type->cid, out, err);
}
}

View File

@@ -176,6 +176,9 @@ struct PgfPrintContext {
PgfPrintContext* next;
};
void
pgf_print_cid(PgfCId id, GuOut* out, GuExn* err);
void
pgf_print_literal(PgfLiteral lit, GuOut* out, GuExn* err);

View File

@@ -16,7 +16,7 @@ pgf_print_flag(GuMapItor* fn, const void* key, void* value,
GuOut *out = clo->out;
gu_puts(" flag ", out, err);
gu_string_write(flag, out, err);
pgf_print_cid(flag, out, err);
gu_puts(" = ", out, err);
pgf_print_literal(lit, out, err);
gu_puts(";\n", out, err);
@@ -32,7 +32,7 @@ pgf_print_cat(GuMapItor* fn, const void* key, void* value,
GuOut *out = clo->out;
gu_puts(" cat ", out, err);
gu_string_write(name, out, err);
pgf_print_cid(name, out, err);
PgfPrintContext* ctxt = NULL;
size_t n_hypos = gu_seq_length(cat->context);
@@ -61,7 +61,7 @@ pgf_print_absfun(GuMapItor* fn, const void* key, void* value,
GuOut *out = clo->out;
gu_puts((fun->defns == NULL) ? " data " : " fun ", out, err);
gu_string_write(name, out, err);
pgf_print_cid(name, out, err);
gu_puts(" : ", out, err);
pgf_print_type(fun->type, NULL, 0, out, err);
gu_printf(out, err, " ; -- %f\n", fun->ep.prob);
@@ -70,7 +70,7 @@ static void
pgf_print_abstract(PgfAbstr* abstr, GuOut* out, GuExn* err)
{
gu_puts("abstract ", out, err);
gu_string_write(abstr->name, out, err);
pgf_print_cid(abstr->name, out, err);
gu_puts(" {\n", out, err);
PgfPrintFn clo1 = { { pgf_print_flag }, out };
@@ -205,7 +205,7 @@ pgf_print_cncfun(PgfCncFun *cncfun, PgfSequences* sequences,
if (cncfun->absfun != NULL) {
gu_puts(" [", out, err);
gu_string_write(cncfun->absfun->name, out, err);
pgf_print_cid(cncfun->absfun->name, out, err);
gu_puts("]", out, err);
}
@@ -311,7 +311,7 @@ pgf_print_cnccat(GuMapItor* fn, const void* key, void* value,
GuOut *out = clo->out;
gu_puts(" ", out, err);
gu_string_write(name, out, err);
pgf_print_cid(name, out, err);
gu_puts(" :=\n", out, err);
PgfCCat *start = gu_seq_get(cnccat->cats, PgfCCat*, 0);
@@ -335,7 +335,7 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr,
GuOut* out, GuExn* err)
{
gu_puts("concrete ", out, err);
gu_string_write(cncname, out, err);
pgf_print_cid(cncname, out, err);
gu_puts(" {\n", out, err);
PgfPrintFn clo1 = { { pgf_print_flag }, out };