1
0
forked from GitHub/gf-core

a complete Python API for reading, printing and manipulation of abstract trees and types. This includes dependent types, high-order abstract syntax and implicit arguments

This commit is contained in:
kr.angelov
2013-06-27 09:39:15 +00:00
parent f4f76e4638
commit 155afdf9b7
9 changed files with 1148 additions and 81 deletions

View File

@@ -88,7 +88,7 @@ GU_DEFINE_TYPE(PgfLiteral, GuVariant,
GU_DECLARE_TYPE(PgfType, struct);
GU_DEFINE_TYPE(PgfHypo, struct,
GU_MEMBER(PgfHypo, bindtype, PgfBindType),
GU_MEMBER(PgfHypo, bind_type, PgfBindType),
GU_MEMBER(PgfHypo, cid, PgfCId),
GU_MEMBER_P(PgfHypo, type, PgfType));
@@ -137,7 +137,16 @@ typedef struct PgfExprParser PgfExprParser;
typedef enum {
PGF_TOKEN_LPAR,
PGF_TOKEN_RPAR,
PGF_TOKEN_LCURLY,
PGF_TOKEN_RCURLY,
PGF_TOKEN_QUESTION,
PGF_TOKEN_LAMBDA,
PGF_TOKEN_RARROW,
PGF_TOKEN_LTRIANGLE,
PGF_TOKEN_RTRIANGLE,
PGF_TOKEN_COMMA,
PGF_TOKEN_COLON,
PGF_TOKEN_WILD,
PGF_TOKEN_IDENT,
PGF_TOKEN_INT,
PGF_TOKEN_FLT,
@@ -172,11 +181,6 @@ pgf_expr_parser_token(PgfExprParser* parser)
while (isspace(parser->ch)) {
pgf_expr_parser_getc(parser);
}
if (parser->tmp_pool != NULL) {
gu_pool_free(parser->tmp_pool);
parser->tmp_pool = NULL;
}
parser->token_tag = PGF_TOKEN_UNKNOWN;
parser->token_value = gu_null_seq;
@@ -193,12 +197,50 @@ pgf_expr_parser_token(PgfExprParser* parser)
pgf_expr_parser_getc(parser);
parser->token_tag = PGF_TOKEN_RPAR;
break;
case '{':
pgf_expr_parser_getc(parser);
parser->token_tag = PGF_TOKEN_LCURLY;
break;
case '}':
pgf_expr_parser_getc(parser);
parser->token_tag = PGF_TOKEN_RCURLY;
break;
case '<':
pgf_expr_parser_getc(parser);
parser->token_tag = PGF_TOKEN_LTRIANGLE;
break;
case '>':
pgf_expr_parser_getc(parser);
parser->token_tag = PGF_TOKEN_RTRIANGLE;
break;
case '?':
pgf_expr_parser_getc(parser);
parser->token_tag = PGF_TOKEN_QUESTION;
break;
case '\\':
pgf_expr_parser_getc(parser);
parser->token_tag = PGF_TOKEN_LAMBDA;
break;
case '-':
pgf_expr_parser_getc(parser);
if (parser->ch == '>') {
pgf_expr_parser_getc(parser);
parser->token_tag = PGF_TOKEN_RARROW;
}
break;
case ',':
pgf_expr_parser_getc(parser);
parser->token_tag = PGF_TOKEN_COMMA;
break;
case ':':
pgf_expr_parser_getc(parser);
parser->token_tag = PGF_TOKEN_COLON;
break;
case '_':
pgf_expr_parser_getc(parser);
parser->token_tag = PGF_TOKEN_WILD;
break;
default: {
parser->tmp_pool = gu_new_pool();
GuCharBuf* chars = gu_new_buf(char, parser->tmp_pool);
if (isalpha(parser->ch)) {
@@ -249,9 +291,22 @@ pgf_expr_parser_token(PgfExprParser* parser)
}
}
static bool
pgf_expr_parser_lookahead(PgfExprParser* parser, int ch)
{
while (isspace(parser->ch)) {
pgf_expr_parser_getc(parser);
}
return (parser->ch == ch);
}
static PgfExpr
pgf_expr_parser_expr(PgfExprParser* parser);
static PgfType*
pgf_expr_parser_type(PgfExprParser* parser);
static PgfExpr
pgf_expr_parser_term(PgfExprParser* parser)
{
@@ -266,6 +321,27 @@ pgf_expr_parser_term(PgfExprParser* parser)
return gu_null_variant;
}
}
case PGF_TOKEN_LTRIANGLE: {
pgf_expr_parser_token(parser);
PgfExpr expr = pgf_expr_parser_expr(parser);
if (parser->token_tag != PGF_TOKEN_COLON) {
return gu_null_variant;
}
pgf_expr_parser_token(parser);
PgfType* type = pgf_expr_parser_type(parser);
if (parser->token_tag != PGF_TOKEN_RTRIANGLE) {
return gu_null_variant;
}
pgf_expr_parser_token(parser);
return gu_new_variant_i(parser->expr_pool,
PGF_EXPR_TYPED,
PgfExprTyped,
expr, type);
}
case PGF_TOKEN_QUESTION: {
pgf_expr_parser_token(parser);
return gu_new_variant_i(parser->expr_pool,
@@ -333,41 +409,354 @@ pgf_expr_parser_term(PgfExprParser* parser)
}
}
static PgfExpr
pgf_expr_parser_arg(PgfExprParser* parser)
{
PgfExpr arg;
if (parser->token_tag == PGF_TOKEN_LCURLY) {
pgf_expr_parser_token(parser);
arg = pgf_expr_parser_expr(parser);
if (gu_variant_is_null(arg))
return gu_null_variant;
if (parser->token_tag != PGF_TOKEN_RCURLY) {
return gu_null_variant;
}
pgf_expr_parser_token(parser);
arg = gu_new_variant_i(parser->expr_pool,
PGF_EXPR_IMPL_ARG,
PgfExprImplArg,
arg);
} else {
arg = pgf_expr_parser_term(parser);
}
return arg;
}
static bool
pgf_expr_parser_bind(PgfExprParser* parser, GuBuf* binds)
{
PgfCId var;
PgfBindType bind_type = PGF_BIND_TYPE_EXPLICIT;
if (parser->token_tag == PGF_TOKEN_LCURLY) {
bind_type = PGF_BIND_TYPE_IMPLICIT;
pgf_expr_parser_token(parser);
}
for (;;) {
if (parser->token_tag == PGF_TOKEN_IDENT) {
char* str =
gu_chars_str(parser->token_value, parser->tmp_pool);
var = gu_str_string(str, parser->expr_pool);
pgf_expr_parser_token(parser);
} else if (parser->token_tag == PGF_TOKEN_WILD) {
var = gu_str_string("_", parser->expr_pool);
pgf_expr_parser_token(parser);
} else {
return false;
}
PgfExpr bind =
gu_new_variant_i(parser->expr_pool,
PGF_EXPR_ABS,
PgfExprAbs,
bind_type,
var,
gu_null_variant);
gu_buf_push(binds, PgfExpr, bind);
if (bind_type == PGF_BIND_TYPE_EXPLICIT ||
parser->token_tag != PGF_TOKEN_COMMA) {
break;
}
pgf_expr_parser_token(parser);
}
if (bind_type == PGF_BIND_TYPE_IMPLICIT) {
if (parser->token_tag != PGF_TOKEN_RCURLY)
return false;
pgf_expr_parser_token(parser);
}
return true;
}
static GuBuf*
pgf_expr_parser_binds(PgfExprParser* parser)
{
GuBuf* binds = gu_new_buf(PgfExpr, parser->tmp_pool);
for (;;) {
if (!pgf_expr_parser_bind(parser, binds))
return NULL;
if (parser->token_tag != PGF_TOKEN_COMMA)
break;
pgf_expr_parser_token(parser);
}
return binds;
}
static PgfExpr
pgf_expr_parser_expr(PgfExprParser* parser)
{
PgfExpr expr = pgf_expr_parser_term(parser);
if (gu_variant_is_null(expr))
return expr;
if (parser->token_tag == PGF_TOKEN_LAMBDA) {
pgf_expr_parser_token(parser);
GuBuf* binds = pgf_expr_parser_binds(parser);
if (binds == NULL)
return gu_null_variant;
while (true) {
PgfExpr arg = pgf_expr_parser_term(parser);
if (gu_variant_is_null(arg)) {
return expr;
if (parser->token_tag != PGF_TOKEN_RARROW) {
return gu_null_variant;
}
expr = gu_new_variant_i(parser->expr_pool,
PGF_EXPR_APP,
PgfExprApp,
expr, arg);
pgf_expr_parser_token(parser);
PgfExpr expr = pgf_expr_parser_expr(parser);
if (gu_variant_is_null(expr))
return gu_null_variant;
size_t n_binds = gu_buf_length(binds);
for (size_t i = n_binds; i > 0; i--) {
PgfExpr bind = gu_buf_get(binds, PgfExpr, i-1);
((PgfExprAbs*) gu_variant_data(bind))->body = expr;
expr = bind;
}
return expr;
} else {
PgfExpr expr = pgf_expr_parser_term(parser);
if (gu_variant_is_null(expr))
return gu_null_variant;
while (parser->token_tag != PGF_TOKEN_EOF &&
parser->token_tag != PGF_TOKEN_RPAR &&
parser->token_tag != PGF_TOKEN_RCURLY &&
parser->token_tag != PGF_TOKEN_RTRIANGLE &&
parser->token_tag != PGF_TOKEN_COLON) {
PgfExpr arg = pgf_expr_parser_arg(parser);
if (gu_variant_is_null(arg))
return gu_null_variant;
expr = gu_new_variant_i(parser->expr_pool,
PGF_EXPR_APP,
PgfExprApp,
expr, arg);
}
return expr;
}
}
static bool
pgf_expr_parser_hypos(PgfExprParser* parser, GuBuf* hypos)
{
PgfCId var;
PgfBindType bind_type = PGF_BIND_TYPE_EXPLICIT;
for (;;) {
if (bind_type == PGF_BIND_TYPE_EXPLICIT &&
parser->token_tag == PGF_TOKEN_LCURLY) {
bind_type = PGF_BIND_TYPE_IMPLICIT;
pgf_expr_parser_token(parser);
}
if (parser->token_tag == PGF_TOKEN_IDENT) {
char* str =
gu_chars_str(parser->token_value, parser->tmp_pool);
var = gu_str_string(str, parser->expr_pool);
pgf_expr_parser_token(parser);
} else if (parser->token_tag == PGF_TOKEN_WILD) {
var = gu_str_string("_", parser->expr_pool);
pgf_expr_parser_token(parser);
} else {
return false;
}
PgfHypo* hypo = gu_buf_extend(hypos);
hypo->bind_type = bind_type;
hypo->cid = var;
hypo->type = NULL;
if (bind_type == PGF_BIND_TYPE_IMPLICIT &&
parser->token_tag == PGF_TOKEN_RCURLY) {
bind_type = PGF_BIND_TYPE_EXPLICIT;
pgf_expr_parser_token(parser);
}
if (parser->token_tag != PGF_TOKEN_COMMA) {
break;
}
pgf_expr_parser_token(parser);
}
if (bind_type == PGF_BIND_TYPE_IMPLICIT)
return false;
return true;
}
static PgfType*
pgf_expr_parser_atom(PgfExprParser* parser)
{
if (parser->token_tag != PGF_TOKEN_IDENT)
return NULL;
char* str =
gu_chars_str(parser->token_value, parser->tmp_pool);
PgfCId cid = gu_str_string(str, parser->expr_pool);
pgf_expr_parser_token(parser);
GuBuf* args = gu_new_buf(PgfExpr, parser->tmp_pool);
while (parser->token_tag != PGF_TOKEN_EOF &&
parser->token_tag != PGF_TOKEN_RPAR &&
parser->token_tag != PGF_TOKEN_RTRIANGLE &&
parser->token_tag != PGF_TOKEN_RARROW) {
PgfExpr arg =
pgf_expr_parser_arg(parser);
if (gu_variant_is_null(arg))
return NULL;
gu_buf_push(args, PgfExpr, arg);
}
size_t n_exprs = gu_buf_length(args);
PgfType* type = gu_new_flex(parser->expr_pool, PgfType, exprs, n_exprs);
type->hypos = gu_empty_seq();
type->cid = cid;
type->n_exprs = n_exprs;
for (size_t i = 0; i < n_exprs; i++) {
type->exprs[i] = gu_buf_get(args, PgfExpr, i);
}
return type;
}
static PgfType*
pgf_expr_parser_type(PgfExprParser* parser)
{
PgfType* type = NULL;
GuBuf* hypos = gu_new_buf(PgfHypo, parser->expr_pool);
for (;;) {
if (parser->token_tag == PGF_TOKEN_LPAR) {
pgf_expr_parser_token(parser);
size_t n_start = gu_buf_length(hypos);
if ((parser->token_tag == PGF_TOKEN_IDENT &&
(pgf_expr_parser_lookahead(parser, ',') ||
pgf_expr_parser_lookahead(parser, ':'))) ||
(parser->token_tag == PGF_TOKEN_LCURLY) ||
(parser->token_tag == PGF_TOKEN_WILD)) {
if (!pgf_expr_parser_hypos(parser, hypos))
return NULL;
if (parser->token_tag != PGF_TOKEN_COLON)
return NULL;
pgf_expr_parser_token(parser);
} else {
PgfHypo* hypo = gu_buf_extend(hypos);
hypo->bind_type = PGF_BIND_TYPE_EXPLICIT;
hypo->cid = gu_str_string("_", parser->expr_pool);
hypo->type = NULL;
}
size_t n_end = gu_buf_length(hypos);
PgfType* type = pgf_expr_parser_type(parser);
if (type == NULL)
return NULL;
if (parser->token_tag != PGF_TOKEN_RPAR)
return NULL;
pgf_expr_parser_token(parser);
if (parser->token_tag != PGF_TOKEN_RARROW)
return NULL;
pgf_expr_parser_token(parser);
for (size_t i = n_start; i < n_end; i++) {
PgfHypo* hypo = gu_buf_index(hypos, PgfHypo, i);
hypo->type = type;
}
} else {
type = pgf_expr_parser_atom(parser);
if (type == NULL)
return NULL;
if (parser->token_tag != PGF_TOKEN_RARROW)
break;
pgf_expr_parser_token(parser);
PgfHypo* hypo = gu_buf_extend(hypos);
hypo->bind_type = PGF_BIND_TYPE_EXPLICIT;
hypo->cid = gu_str_string("_", parser->expr_pool);
hypo->type = type;
}
}
type->hypos = gu_buf_seq(hypos);
return type;
}
static PgfExprParser*
pgf_new_parser(GuIn* in, GuPool* pool, GuPool* tmp_pool, GuExn* err)
{
PgfExprParser* parser = gu_new(PgfExprParser, tmp_pool);
parser->err = err;
parser->in = in;
parser->expr_pool = pool;
parser->tmp_pool = tmp_pool;
parser->ch = ' ';
pgf_expr_parser_token(parser);
return parser;
}
PgfExpr
pgf_read_expr(GuIn* in, GuPool* pool, GuExn* err)
{
GuPool* tmp_pool = gu_new_pool();
PgfExprParser* parser = gu_new(PgfExprParser, tmp_pool);
parser->err = err;
parser->in = in;
parser->expr_pool = pool;
parser->tmp_pool = NULL;
parser->ch = ' ';
pgf_expr_parser_token(parser);
PgfExprParser* parser =
pgf_new_parser(in, pool, tmp_pool, err);
PgfExpr expr = pgf_expr_parser_expr(parser);
if (parser->token_tag != PGF_TOKEN_EOF)
return gu_null_variant;
gu_pool_free(tmp_pool);
return expr;
}
PgfType*
pgf_read_type(GuIn* in, GuPool* pool, GuExn* err)
{
GuPool* tmp_pool = gu_new_pool();
PgfExprParser* parser =
pgf_new_parser(in, pool, tmp_pool, err);
PgfType* type = pgf_expr_parser_type(parser);
if (parser->token_tag != PGF_TOKEN_EOF)
return NULL;
gu_pool_free(tmp_pool);
return type;
}
bool
pgf_literal_eq(PgfLiteral lit1, PgfLiteral lit2)
{
@@ -470,14 +859,56 @@ pgf_print_literal(PgfLiteral lit,
}
void
pgf_print_expr(PgfExpr expr, int prec,
GuWriter* wtr, GuExn* err)
pgf_print_expr(PgfExpr expr, PgfPrintContext* ctxt, int prec,
GuWriter* wtr, GuExn* err)
{
GuVariantInfo ei = gu_variant_open(expr);
switch (ei.tag) {
case PGF_EXPR_FUN: {
PgfExprFun* fun = ei.data;
gu_string_write(fun->fun, wtr, err);
case PGF_EXPR_ABS: {
PgfExprAbs* abs = ei.data;
if (prec > 1) {
gu_puts("(", wtr, err);
}
gu_putc('\\', wtr, err);
PgfPrintContext* new_ctxt = ctxt;
for (;;) {
if (abs->bind_type == PGF_BIND_TYPE_IMPLICIT) {
gu_putc('{', wtr, err);
}
gu_string_write(abs->id, wtr, err);
if (abs->bind_type == PGF_BIND_TYPE_IMPLICIT) {
gu_putc('}', wtr, err);
}
PgfPrintContext* c = malloc(sizeof(PgfPrintContext));
c->name = abs->id;
c->next = new_ctxt;
new_ctxt = c;
if (gu_variant_tag(abs->body) != PGF_EXPR_ABS)
break;
gu_putc(',', wtr, err);
abs = gu_variant_data(abs->body);
}
gu_puts(" -> ", wtr, err);
pgf_print_expr(abs->body, new_ctxt, 1, wtr, err);
while (new_ctxt != ctxt) {
PgfPrintContext* next = new_ctxt->next;
free(new_ctxt);
new_ctxt = next;
}
if (prec > 1) {
gu_puts(")", wtr, err);
}
break;
}
case PGF_EXPR_APP: {
@@ -485,9 +916,9 @@ pgf_print_expr(PgfExpr expr, int prec,
if (prec > 3) {
gu_puts("(", wtr, err);
}
pgf_print_expr(app->fun, 3, wtr, err);
pgf_print_expr(app->fun, ctxt, 3, wtr, err);
gu_puts(" ", wtr, err);
pgf_print_expr(app->arg, 4, wtr, err);
pgf_print_expr(app->arg, ctxt, 4, wtr, err);
if (prec > 3) {
gu_puts(")", wtr, err);
}
@@ -501,25 +932,57 @@ pgf_print_expr(PgfExpr expr, int prec,
case PGF_EXPR_META:
gu_putc('?', wtr, err);
break;
case PGF_EXPR_ABS:
case PGF_EXPR_VAR:
case PGF_EXPR_TYPED:
case PGF_EXPR_IMPL_ARG:
gu_impossible();
case PGF_EXPR_FUN: {
PgfExprFun* fun = ei.data;
gu_string_write(fun->fun, wtr, err);
break;
}
case PGF_EXPR_VAR: {
PgfExprVar* evar = ei.data;
int var = evar->var;
PgfPrintContext* c = ctxt;
while (c != NULL && var > 0) {
c = ctxt->next;
}
if (c == NULL) {
gu_printf(wtr, err, "#%d", evar->var);
} else {
gu_string_write(c->name, wtr, err);
}
break;
}
case PGF_EXPR_TYPED: {
PgfExprTyped* typed = ei.data;
gu_putc('<', wtr, err);
pgf_print_expr(typed->expr, ctxt, 0, wtr, err);
gu_puts(" : ", wtr, err);
pgf_print_type(typed->type, ctxt, 0, wtr, err);
gu_putc('>', wtr, err);
break;
}
case PGF_EXPR_IMPL_ARG: {
PgfExprImplArg* impl = ei.data;
gu_putc('{', wtr, err);
pgf_print_expr(impl->expr, ctxt, 0, wtr, err);
gu_putc('}', wtr, err);
break;
}
default:
gu_impossible();
}
}
void
pgf_print_hypo(PgfHypo *hypo, int prec, GuWriter *wtr, GuExn *err)
PgfPrintContext*
pgf_print_hypo(PgfHypo *hypo, PgfPrintContext* ctxt, int prec,
GuWriter *wtr, GuExn *err)
{
if (hypo->bindtype == PGF_BIND_TYPE_IMPLICIT) {
if (hypo->bind_type == PGF_BIND_TYPE_IMPLICIT) {
gu_puts("({", wtr, err);
gu_string_write(hypo->cid, wtr, err);
gu_puts("} : ", wtr, err);
pgf_print_type(hypo->type, 0, wtr, err);
pgf_print_type(hypo->type, ctxt, 0, wtr, err);
gu_puts(")", wtr, err);
} else {
GuPool* tmp_pool = gu_new_pool();
@@ -529,27 +992,34 @@ pgf_print_hypo(PgfHypo *hypo, int prec, GuWriter *wtr, GuExn *err)
gu_puts("(", wtr, err);
gu_string_write(hypo->cid, wtr, err);
gu_puts(" : ", wtr, err);
pgf_print_type(hypo->type, 0, wtr, err);
pgf_print_type(hypo->type, ctxt, 0, wtr, err);
gu_puts(")", wtr, err);
} else {
pgf_print_type(hypo->type, prec, wtr, err);
pgf_print_type(hypo->type, ctxt, prec, wtr, err);
}
gu_pool_free(tmp_pool);
}
PgfPrintContext* new_ctxt = malloc(sizeof(PgfPrintContext));
new_ctxt->name = hypo->cid;
new_ctxt->next = ctxt;
return new_ctxt;
}
void
pgf_print_type(PgfType *type, int prec, GuWriter *wtr, GuExn *err)
pgf_print_type(PgfType *type, PgfPrintContext* ctxt, int prec,
GuWriter *wtr, GuExn *err)
{
size_t n_hypos = gu_seq_length(type->hypos);
if (n_hypos > 0) {
if (prec > 0) gu_putc('(', wtr, err);
PgfPrintContext* new_ctxt = ctxt;
for (size_t i = 0; i < n_hypos; i++) {
PgfHypo *hypo = gu_seq_index(type->hypos, PgfHypo, i);
pgf_print_hypo(hypo, 1, wtr, err);
new_ctxt = pgf_print_hypo(hypo, new_ctxt, 1, wtr, err);
gu_puts(" -> ", wtr, err);
}
@@ -558,9 +1028,15 @@ pgf_print_type(PgfType *type, int prec, GuWriter *wtr, GuExn *err)
for (size_t i = 0; i < type->n_exprs; i++) {
gu_puts(" ", wtr, err);
pgf_print_expr(type->exprs[i], 4, wtr, err);
pgf_print_expr(type->exprs[i], new_ctxt, 4, wtr, err);
}
while (new_ctxt != ctxt) {
PgfPrintContext* next = new_ctxt->next;
free(new_ctxt);
new_ctxt = next;
}
if (prec > 0) gu_putc(')', wtr, err);
} else if (type->n_exprs > 0) {
if (prec > 3) gu_putc('(', wtr, err);
@@ -569,7 +1045,7 @@ pgf_print_type(PgfType *type, int prec, GuWriter *wtr, GuExn *err)
for (size_t i = 0; i < type->n_exprs; i++) {
gu_puts(" ", wtr, err);
pgf_print_expr(type->exprs[i], 4, wtr, err);
pgf_print_expr(type->exprs[i], ctxt, 4, wtr, err);
}
if (prec > 3) gu_putc(')', wtr, err);
@@ -577,3 +1053,38 @@ pgf_print_type(PgfType *type, int prec, GuWriter *wtr, GuExn *err)
gu_string_write(type->cid, wtr, err);
}
}
bool
pgf_type_eq(PgfType* t1, PgfType* t2)
{
if (gu_seq_length(t1->hypos) != gu_seq_length(t2->hypos))
return false;
size_t n_hypos = gu_seq_length(t1->hypos);
for (size_t i = 0; i < n_hypos; i++) {
PgfHypo *hypo1 = gu_seq_index(t1->hypos, PgfHypo, i);
PgfHypo *hypo2 = gu_seq_index(t1->hypos, PgfHypo, i);
if (hypo1->bind_type != hypo2->bind_type)
return false;
if (!gu_string_eq(hypo1->cid, hypo2->cid))
return false;
if (!pgf_type_eq(hypo1->type, hypo2->type))
return false;
}
if (!gu_string_eq(t1->cid, t2->cid))
return false;
if (t1->n_exprs != t2->n_exprs)
return false;
for (size_t i = 0; i < t1->n_exprs; i++) {
if (!pgf_expr_eq(t1->exprs[i], t2->exprs[i]))
return false;
}
return true;
}