mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-22 11:19:32 -06:00
the GF syntax for identifiers is exteded with quoted forms, i.e. you could write for instance 'ab.c' and then everything between the quites is identifier. This includes Unicode characters and non-ASCII symbols. This is useful for automatically generated GF grammars.
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
#include "pgf.h"
|
||||
#include <gu/assert.h>
|
||||
#include <gu/utf8.h>
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@@ -166,6 +167,45 @@ pgf_expr_parser_getc(PgfExprParser* parser)
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
pgf_is_ident_first(GuUCS ucs)
|
||||
{
|
||||
return (ucs == '_') ||
|
||||
(ucs >= 'a' && ucs <= 'z') ||
|
||||
(ucs >= 'A' && ucs <= 'Z') ||
|
||||
(ucs >= 192 && ucs <= 255 && ucs != 247 && ucs != 215);
|
||||
}
|
||||
|
||||
static bool
|
||||
pgf_is_ident_rest(GuUCS ucs)
|
||||
{
|
||||
return (ucs == '_') ||
|
||||
(ucs == '\'') ||
|
||||
(ucs >= '0' && ucs <= '9') ||
|
||||
(ucs >= 'a' && ucs <= 'z') ||
|
||||
(ucs >= 'A' && ucs <= 'Z') ||
|
||||
(ucs >= 192 && ucs <= 255 && ucs != 247 && ucs != 215);
|
||||
}
|
||||
|
||||
static bool
|
||||
pgf_is_normal_ident(PgfCId id)
|
||||
{
|
||||
const uint8_t* p = (const uint8_t*) id;
|
||||
GuUCS ucs = gu_utf8_decode(&p);
|
||||
if (!pgf_is_ident_first(ucs))
|
||||
return false;
|
||||
|
||||
for (;;) {
|
||||
ucs = gu_utf8_decode(&p);
|
||||
if (ucs == 0)
|
||||
break;
|
||||
if (!pgf_is_ident_rest(ucs))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_expr_parser_token(PgfExprParser* parser)
|
||||
{
|
||||
@@ -227,20 +267,32 @@ pgf_expr_parser_token(PgfExprParser* parser)
|
||||
pgf_expr_parser_getc(parser);
|
||||
parser->token_tag = PGF_TOKEN_COLON;
|
||||
break;
|
||||
case '_':
|
||||
case '\'':
|
||||
pgf_expr_parser_getc(parser);
|
||||
parser->token_tag = PGF_TOKEN_WILD;
|
||||
|
||||
GuBuf* chars = gu_new_buf(char, parser->tmp_pool);
|
||||
while (parser->ch != '\'' && parser->ch != EOF) {
|
||||
if (parser->ch == '\\') {
|
||||
pgf_expr_parser_getc(parser);
|
||||
}
|
||||
gu_buf_push(chars, char, parser->ch);
|
||||
pgf_expr_parser_getc(parser);
|
||||
}
|
||||
if (parser->ch == '\'') {
|
||||
pgf_expr_parser_getc(parser);
|
||||
gu_buf_push(chars, char, 0);
|
||||
parser->token_tag = PGF_TOKEN_IDENT;
|
||||
parser->token_value = chars;
|
||||
}
|
||||
break;
|
||||
default: {
|
||||
GuBuf* chars = gu_new_buf(char, parser->tmp_pool);
|
||||
|
||||
if (isalpha(parser->ch)) {
|
||||
while (isalnum(parser->ch) ||
|
||||
parser->ch == '_' ||
|
||||
parser->ch == '\'') {
|
||||
if (pgf_is_ident_first(parser->ch)) {
|
||||
do {
|
||||
gu_buf_push(chars, char, parser->ch);
|
||||
pgf_expr_parser_getc(parser);
|
||||
}
|
||||
} while (pgf_is_ident_rest(parser->ch));
|
||||
gu_buf_push(chars, char, 0);
|
||||
parser->token_tag = PGF_TOKEN_IDENT;
|
||||
parser->token_value = chars;
|
||||
@@ -268,7 +320,7 @@ pgf_expr_parser_token(PgfExprParser* parser)
|
||||
}
|
||||
} else if (parser->ch == '"') {
|
||||
pgf_expr_parser_getc(parser);
|
||||
|
||||
|
||||
while (parser->ch != '"' && parser->ch != EOF) {
|
||||
gu_buf_push(chars, char, parser->ch);
|
||||
pgf_expr_parser_getc(parser);
|
||||
@@ -924,6 +976,30 @@ pgf_expr_hash(GuHash h, PgfExpr e)
|
||||
return h;
|
||||
}
|
||||
|
||||
void
|
||||
pgf_print_cid(PgfCId id,
|
||||
GuOut* out, GuExn* err)
|
||||
{
|
||||
if (pgf_is_normal_ident(id))
|
||||
gu_string_write(id, out, err);
|
||||
else {
|
||||
gu_putc('\'', out, err);
|
||||
const uint8_t* p = (const uint8_t*) id;
|
||||
for (;;) {
|
||||
GuUCS ucs = gu_utf8_decode(&p);
|
||||
if (ucs == 0)
|
||||
break;
|
||||
if (ucs == '\'')
|
||||
gu_puts("\\\'", out, err);
|
||||
else if (ucs == '\\')
|
||||
gu_puts("\\\\", out, err);
|
||||
else
|
||||
gu_out_utf8(ucs, out, err);
|
||||
}
|
||||
gu_putc('\'', out, err);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pgf_print_literal(PgfLiteral lit,
|
||||
GuOut* out, GuExn* err)
|
||||
@@ -973,7 +1049,7 @@ pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec,
|
||||
if (abs->bind_type == PGF_BIND_TYPE_IMPLICIT) {
|
||||
gu_putc('{', out, err);
|
||||
}
|
||||
gu_string_write(abs->id, out, err);
|
||||
pgf_print_cid(abs->id, out, err);
|
||||
if (abs->bind_type == PGF_BIND_TYPE_IMPLICIT) {
|
||||
gu_putc('}', out, err);
|
||||
}
|
||||
@@ -1028,7 +1104,7 @@ pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec,
|
||||
break;
|
||||
case PGF_EXPR_FUN: {
|
||||
PgfExprFun* fun = ei.data;
|
||||
gu_string_write(fun->fun, out, err);
|
||||
pgf_print_cid(fun->fun, out, err);
|
||||
break;
|
||||
}
|
||||
case PGF_EXPR_VAR: {
|
||||
@@ -1043,7 +1119,7 @@ pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec,
|
||||
if (c == NULL) {
|
||||
gu_printf(out, err, "#%d", evar->var);
|
||||
} else {
|
||||
gu_string_write(c->name, out, err);
|
||||
pgf_print_cid(c->name, out, err);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -1074,7 +1150,7 @@ pgf_print_hypo(PgfHypo *hypo, PgfPrintContext* ctxt, int prec,
|
||||
{
|
||||
if (hypo->bind_type == PGF_BIND_TYPE_IMPLICIT) {
|
||||
gu_puts("({", out, err);
|
||||
gu_string_write(hypo->cid, out, err);
|
||||
pgf_print_cid(hypo->cid, out, err);
|
||||
gu_puts("} : ", out, err);
|
||||
pgf_print_type(hypo->type, ctxt, 0, out, err);
|
||||
gu_puts(")", out, err);
|
||||
@@ -1083,7 +1159,7 @@ pgf_print_hypo(PgfHypo *hypo, PgfPrintContext* ctxt, int prec,
|
||||
|
||||
if (strcmp(hypo->cid, "_") != 0) {
|
||||
gu_puts("(", out, err);
|
||||
gu_string_write(hypo->cid, out, err);
|
||||
pgf_print_cid(hypo->cid, out, err);
|
||||
gu_puts(" : ", out, err);
|
||||
pgf_print_type(hypo->type, ctxt, 0, out, err);
|
||||
gu_puts(")", out, err);
|
||||
@@ -1117,7 +1193,7 @@ pgf_print_type(PgfType *type, PgfPrintContext* ctxt, int prec,
|
||||
gu_puts(" -> ", out, err);
|
||||
}
|
||||
|
||||
gu_string_write(type->cid, out, err);
|
||||
pgf_print_cid(type->cid, out, err);
|
||||
|
||||
for (size_t i = 0; i < type->n_exprs; i++) {
|
||||
gu_puts(" ", out, err);
|
||||
@@ -1143,7 +1219,7 @@ pgf_print_type(PgfType *type, PgfPrintContext* ctxt, int prec,
|
||||
|
||||
if (prec > 3) gu_putc(')', out, err);
|
||||
} else {
|
||||
gu_string_write(type->cid, out, err);
|
||||
pgf_print_cid(type->cid, out, err);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -176,6 +176,9 @@ struct PgfPrintContext {
|
||||
PgfPrintContext* next;
|
||||
};
|
||||
|
||||
void
|
||||
pgf_print_cid(PgfCId id, GuOut* out, GuExn* err);
|
||||
|
||||
void
|
||||
pgf_print_literal(PgfLiteral lit, GuOut* out, GuExn* err);
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ pgf_print_flag(GuMapItor* fn, const void* key, void* value,
|
||||
GuOut *out = clo->out;
|
||||
|
||||
gu_puts(" flag ", out, err);
|
||||
gu_string_write(flag, out, err);
|
||||
pgf_print_cid(flag, out, err);
|
||||
gu_puts(" = ", out, err);
|
||||
pgf_print_literal(lit, out, err);
|
||||
gu_puts(";\n", out, err);
|
||||
@@ -32,7 +32,7 @@ pgf_print_cat(GuMapItor* fn, const void* key, void* value,
|
||||
GuOut *out = clo->out;
|
||||
|
||||
gu_puts(" cat ", out, err);
|
||||
gu_string_write(name, out, err);
|
||||
pgf_print_cid(name, out, err);
|
||||
|
||||
PgfPrintContext* ctxt = NULL;
|
||||
size_t n_hypos = gu_seq_length(cat->context);
|
||||
@@ -61,7 +61,7 @@ pgf_print_absfun(GuMapItor* fn, const void* key, void* value,
|
||||
GuOut *out = clo->out;
|
||||
|
||||
gu_puts((fun->defns == NULL) ? " data " : " fun ", out, err);
|
||||
gu_string_write(name, out, err);
|
||||
pgf_print_cid(name, out, err);
|
||||
gu_puts(" : ", out, err);
|
||||
pgf_print_type(fun->type, NULL, 0, out, err);
|
||||
gu_printf(out, err, " ; -- %f\n", fun->ep.prob);
|
||||
@@ -70,7 +70,7 @@ static void
|
||||
pgf_print_abstract(PgfAbstr* abstr, GuOut* out, GuExn* err)
|
||||
{
|
||||
gu_puts("abstract ", out, err);
|
||||
gu_string_write(abstr->name, out, err);
|
||||
pgf_print_cid(abstr->name, out, err);
|
||||
gu_puts(" {\n", out, err);
|
||||
|
||||
PgfPrintFn clo1 = { { pgf_print_flag }, out };
|
||||
@@ -205,7 +205,7 @@ pgf_print_cncfun(PgfCncFun *cncfun, PgfSequences* sequences,
|
||||
|
||||
if (cncfun->absfun != NULL) {
|
||||
gu_puts(" [", out, err);
|
||||
gu_string_write(cncfun->absfun->name, out, err);
|
||||
pgf_print_cid(cncfun->absfun->name, out, err);
|
||||
gu_puts("]", out, err);
|
||||
}
|
||||
|
||||
@@ -311,7 +311,7 @@ pgf_print_cnccat(GuMapItor* fn, const void* key, void* value,
|
||||
GuOut *out = clo->out;
|
||||
|
||||
gu_puts(" ", out, err);
|
||||
gu_string_write(name, out, err);
|
||||
pgf_print_cid(name, out, err);
|
||||
gu_puts(" :=\n", out, err);
|
||||
|
||||
PgfCCat *start = gu_seq_get(cnccat->cats, PgfCCat*, 0);
|
||||
@@ -335,7 +335,7 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr,
|
||||
GuOut* out, GuExn* err)
|
||||
{
|
||||
gu_puts("concrete ", out, err);
|
||||
gu_string_write(cncname, out, err);
|
||||
pgf_print_cid(cncname, out, err);
|
||||
gu_puts(" {\n", out, err);
|
||||
|
||||
PgfPrintFn clo1 = { { pgf_print_flag }, out };
|
||||
|
||||
Reference in New Issue
Block a user