Files
gf-core/src/runtime/c/pgf/reader.c
kr.angelov 584d589041 a partial support for def rules in the C runtime
The def rules are now compiled to byte code by the compiler and then to
native code by the JIT compiler in the runtime. Not all constructions
are implemented yet. The partial implementation is now in the repository
but it is not activated by default since this requires changes in the
PGF format. I will enable it only after it is complete.
2014-08-11 10:59:10 +00:00

1347 lines
31 KiB
C

#include "data.h"
#include "expr.h"
#include "literals.h"
#include "reader.h"
#include <gu/defs.h>
#include <gu/map.h>
#include <gu/seq.h>
#include <gu/assert.h>
#include <gu/in.h>
#include <gu/bits.h>
#include <gu/exn.h>
#include <gu/utf8.h>
#include <math.h>
#include <stdlib.h>
#ifdef __MINGW32__
#include <malloc.h>
#endif
//
// PgfReader
//
typedef struct PgfReadTagExn PgfReadTagExn;
struct PgfReadTagExn {
GuType* type;
int tag;
};
static GU_DEFINE_TYPE(PgfReadTagExn, abstract, _);
static GU_DEFINE_TYPE(PgfReadExn, abstract, _);
uint8_t
pgf_read_tag(PgfReader* rdr)
{
return gu_in_u8(rdr->in, rdr->err);
}
uint32_t
pgf_read_uint(PgfReader* rdr)
{
uint32_t u = 0;
int shift = 0;
uint8_t b = 0;
do {
b = gu_in_u8(rdr->in, rdr->err);
gu_return_on_exn(rdr->err, 0);
u |= (b & ~0x80) << shift;
shift += 7;
} while (b & 0x80);
return u;
}
int32_t
pgf_read_int(PgfReader* rdr)
{
uint32_t u = pgf_read_uint(rdr);
return gu_decode_2c32(u, rdr->err);
}
size_t
pgf_read_len(PgfReader* rdr)
{
int32_t len = pgf_read_int(rdr);
// It's crucial that we return 0 on failure, so the
// caller can proceed without checking for error
// immediately.
gu_return_on_exn(rdr->err, 0);
if (GU_UNLIKELY(len < 0)) {
GuExnData* err_data = gu_raise(rdr->err, PgfReadTagExn);
if (err_data) {
PgfReadTagExn* rtag = gu_new(PgfReadTagExn, err_data->pool);
rtag->type = gu_type(GuLength);
rtag->tag = len;
err_data->data = rtag;
}
return 0;
}
return len;
}
PgfCId
pgf_read_cid(PgfReader* rdr, GuPool* pool)
{
size_t len = pgf_read_len(rdr);
return gu_string_read_latin1(len, pool, rdr->in, rdr->err);
}
GuString
pgf_read_string(PgfReader* rdr)
{
GuLength len = pgf_read_len(rdr);
return gu_string_read(len, rdr->opool, rdr->in, rdr->err);
}
double
pgf_read_double(PgfReader* rdr)
{
return gu_in_f64be(rdr->in, rdr->err);
}
static void
pgf_read_tag_error(PgfReader* rdr)
{
gu_impossible();
}
static PgfLiteral
pgf_read_literal(PgfReader* rdr)
{
PgfLiteral lit = gu_null_variant;
uint8_t tag = pgf_read_tag(rdr);
switch (tag) {
case PGF_LITERAL_STR: {
GuLength len = pgf_read_len(rdr);
uint8_t* buf = alloca(len*6+1);
uint8_t* p = buf;
for (size_t i = 0; i < len; i++) {
gu_in_utf8_buf(&p, rdr->in, rdr->err);
gu_return_on_exn(rdr->err, gu_null_variant);
}
*p++ = 0;
PgfLiteralStr *lit_str =
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, p-buf,
&lit, rdr->opool);
strcpy((char*) lit_str->val, (char*) buf);
break;
}
case PGF_LITERAL_INT: {
PgfLiteralInt *lit_int =
gu_new_variant(PGF_LITERAL_INT,
PgfLiteralInt,
&lit, rdr->opool);
lit_int->val = pgf_read_int(rdr);
break;
}
case PGF_LITERAL_FLT: {
PgfLiteralFlt *lit_flt =
gu_new_variant(PGF_LITERAL_FLT,
PgfLiteralFlt,
&lit, rdr->opool);
lit_flt->val = pgf_read_double(rdr);
break;
}
default:
pgf_read_tag_error(rdr);
}
return lit;
}
static PgfFlags*
pgf_read_flags(PgfReader* rdr)
{
PgfFlags* flags = gu_map_type_new(PgfFlags, rdr->opool);
GuLength len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
for (size_t i = 0; i < len; i++) {
PgfCId name = pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, NULL);
PgfLiteral value = pgf_read_literal(rdr);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(flags, name, PgfLiteral, value);
}
return flags;
}
static PgfType*
pgf_read_type_(PgfReader* rdr);
static PgfExpr
pgf_read_expr_(PgfReader* rdr)
{
PgfExpr expr = gu_null_variant;
uint8_t tag = pgf_read_tag(rdr);
switch (tag) {
case PGF_EXPR_ABS:{
PgfExprAbs *eabs =
gu_new_variant(PGF_EXPR_ABS,
PgfExprAbs,
&expr, rdr->opool);
eabs->bind_type = pgf_read_tag(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
eabs->id = pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, gu_null_variant);
eabs->body = pgf_read_expr_(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_EXPR_APP: {
PgfExprApp *eapp =
gu_new_variant(PGF_EXPR_APP,
PgfExprApp,
&expr, rdr->opool);
eapp->fun = pgf_read_expr_(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
eapp->arg = pgf_read_expr_(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_EXPR_LIT: {
PgfExprLit *elit =
gu_new_variant(PGF_EXPR_LIT,
PgfExprLit,
&expr, rdr->opool);
elit->lit = pgf_read_literal(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_EXPR_META: {
PgfExprMeta *emeta =
gu_new_variant(PGF_EXPR_META,
PgfExprMeta,
&expr, rdr->opool);
emeta->id = pgf_read_int(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_EXPR_FUN: {
size_t len = pgf_read_len(rdr);
PgfExprFun *efun =
gu_new_flex_variant(PGF_EXPR_FUN,
PgfExprFun,
fun, len+1,
&expr, rdr->opool);
gu_in_bytes(rdr->in, (uint8_t*)efun->fun, len, rdr->err);
efun->fun[len] = 0;
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_EXPR_VAR: {
PgfExprVar *evar =
gu_new_variant(PGF_EXPR_VAR,
PgfExprVar,
&expr, rdr->opool);
evar->var = pgf_read_int(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_EXPR_TYPED: {
PgfExprTyped *etyped =
gu_new_variant(PGF_EXPR_TYPED,
PgfExprTyped,
&expr, rdr->opool);
etyped->expr = pgf_read_expr_(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
etyped->type = pgf_read_type_(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_EXPR_IMPL_ARG: {
PgfExprImplArg *eimpl =
gu_new_variant(PGF_EXPR_IMPL_ARG,
PgfExprImplArg,
&expr, rdr->opool);
eimpl->expr = pgf_read_expr_(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
default:
pgf_read_tag_error(rdr);
}
return expr;
}
static void
pgf_read_hypo(PgfReader* rdr, PgfHypo* hypo)
{
hypo->bind_type = pgf_read_tag(rdr);
gu_return_on_exn(rdr->err, );
hypo->cid = pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, );
hypo->type = pgf_read_type_(rdr);
gu_return_on_exn(rdr->err, );
}
static PgfType*
pgf_read_type_(PgfReader* rdr)
{
size_t n_hypos = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
GuSeq* hypos = gu_new_seq(PgfHypo, n_hypos, rdr->opool);
for (size_t i = 0; i < n_hypos; i++) {
PgfHypo* hypo = gu_seq_index(hypos, PgfHypo, i);
pgf_read_hypo(rdr, hypo);
gu_return_on_exn(rdr->err, NULL);
}
PgfCId cid = pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, NULL);
size_t n_exprs = pgf_read_len(rdr);
PgfType* type = gu_new_flex(rdr->opool, PgfType, exprs, n_exprs);
type->hypos = hypos;
type->cid = cid;
type->n_exprs = n_exprs;
for (size_t i = 0; i < type->n_exprs; i++) {
type->exprs[i] = pgf_read_expr_(rdr);
gu_return_on_exn(rdr->err, NULL);
}
return type;
}
static PgfPatt
pgf_read_patt(PgfReader* rdr)
{
PgfPatt patt = gu_null_variant;
uint8_t tag = pgf_read_tag(rdr);
switch (tag) {
case PGF_PATT_APP: {
PgfPattApp *papp =
gu_new_variant(PGF_PATT_APP,
PgfPattApp,
&patt, rdr->opool);
papp->ctor = pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, gu_null_variant);
papp->n_args = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
for (size_t i = 0; i < papp->n_args; i++) {
papp->args[i] = pgf_read_patt(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
}
break;
}
case PGF_PATT_VAR: {
PgfPattVar *papp =
gu_new_variant(PGF_PATT_VAR,
PgfPattVar,
&patt, rdr->opool);
papp->var = pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_PATT_AS: {
PgfPattAs *pas =
gu_new_variant(PGF_PATT_AS,
PgfPattAs,
&patt, rdr->opool);
pas->var = pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, gu_null_variant);
pas->patt = pgf_read_patt(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_PATT_WILD: {
PgfPattWild* pwild =
gu_new_variant(PGF_PATT_WILD,
PgfPattWild,
&patt, rdr->opool);
((void) pwild);
break;
}
case PGF_PATT_LIT: {
PgfPattLit *plit =
gu_new_variant(PGF_PATT_LIT,
PgfPattLit,
&patt, rdr->opool);
plit->lit = pgf_read_literal(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_PATT_IMPL_ARG: {
PgfPattImplArg *pimpl =
gu_new_variant(PGF_PATT_IMPL_ARG,
PgfPattImplArg,
&patt, rdr->opool);
pimpl->patt = pgf_read_patt(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_PATT_TILDE: {
PgfPattTilde *ptilde =
gu_new_variant(PGF_PATT_TILDE,
PgfPattTilde,
&patt, rdr->opool);
ptilde->expr = pgf_read_expr_(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
default:
pgf_read_tag_error(rdr);
}
return patt;
}
static PgfAbsFun*
pgf_read_absfun(PgfReader* rdr, PgfAbstr* abstr)
{
PgfAbsFun* absfun = gu_new(PgfAbsFun, rdr->opool);
size_t len = pgf_read_len(rdr);
PgfExprFun *efun =
gu_new_flex_variant(PGF_EXPR_FUN,
PgfExprFun,
fun, len+1,
&absfun->ep.expr, rdr->opool);
gu_in_bytes(rdr->in, (uint8_t*)efun->fun, len, rdr->err);
efun->fun[len] = 0;
absfun->name = efun->fun;
gu_return_on_exn(rdr->err, NULL);
absfun->type = pgf_read_type_(rdr);
gu_return_on_exn(rdr->err, NULL);
absfun->arity = pgf_read_int(rdr);
uint8_t tag = pgf_read_tag(rdr);
gu_return_on_exn(rdr->err, NULL);
switch (tag) {
case 0:
absfun->defns = NULL;
absfun->function = NULL;
break;
case 1: {
GuLength length = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
absfun->defns = gu_new_seq(PgfEquation*, length, rdr->opool);
PgfEquation** data = gu_seq_data(absfun->defns);
for (size_t i = 0; i < length; i++) {
GuLength n_patts = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
PgfEquation *equ =
gu_malloc(rdr->opool,
sizeof(PgfEquation)+sizeof(PgfPatt)*n_patts);
equ->n_patts = n_patts;
for (GuLength j = 0; j < n_patts; j++) {
equ->patts[j] = pgf_read_patt(rdr);
gu_return_on_exn(rdr->err, NULL);
}
equ->body = pgf_read_expr_(rdr);
gu_return_on_exn(rdr->err, NULL);
data[i] = equ;
}
// pgf_jit_function(rdr, abstr, absfun);
break;
}
default:
pgf_read_tag_error(rdr);
break;
}
absfun->ep.prob = - log(pgf_read_double(rdr));
return absfun;
}
static PgfCIdMap*
pgf_read_absfuns(PgfReader* rdr, PgfAbstr* abstr)
{
GuMapType* map_type = (GuMapType*)
GU_TYPE_LIT(GuStringMap, _,
gu_ptr_type(PgfAbsFun),
&gu_null_struct);
PgfCIdMap* absfuns = gu_map_type_make(map_type, rdr->opool);
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
for (size_t i = 0; i < len; i++) {
PgfAbsFun* absfun = pgf_read_absfun(rdr, abstr);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(absfuns, absfun->name, PgfAbsFun*, absfun);
}
return absfuns;
}
static PgfAbsCat*
pgf_read_abscat(PgfReader* rdr, PgfAbstr* abstr, PgfCIdMap* abscats)
{
PgfAbsCat* abscat = gu_new(PgfAbsCat, rdr->opool);
abscat->name = pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, NULL);
size_t n_hypos = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
abscat->context = gu_new_seq(PgfHypo, n_hypos, rdr->opool);
for (size_t i = 0; i < n_hypos; i++) {
PgfHypo* hypo = gu_seq_index(abscat->context, PgfHypo, i);
pgf_read_hypo(rdr, hypo);
gu_return_on_exn(rdr->err, NULL);
}
pgf_jit_predicate(rdr, abstr, abscat);
abscat->prob = - log(pgf_read_double(rdr));
return abscat;
}
static PgfCIdMap*
pgf_read_abscats(PgfReader* rdr, PgfAbstr* abstr)
{
GuMapType* map_type = (GuMapType*)
GU_TYPE_LIT(GuStringMap, _,
gu_ptr_type(PgfAbsCat),
&gu_null_struct);
PgfCIdMap* abscats = gu_map_type_make(map_type, rdr->opool);
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
for (size_t i = 0; i < len; i++) {
PgfAbsCat* abscat = pgf_read_abscat(rdr, abstr, abscats);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(abscats, abscat->name, PgfAbsCat*, abscat);
}
return abscats;
}
static void
pgf_read_abstract(PgfReader* rdr, PgfAbstr* abstract)
{
abstract->name = pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, );
abstract->aflags = pgf_read_flags(rdr);
gu_return_on_exn(rdr->err, );
abstract->funs = pgf_read_absfuns(rdr, abstract);
gu_return_on_exn(rdr->err, );
abstract->cats = pgf_read_abscats(rdr, abstract);
gu_return_on_exn(rdr->err, );
abstract->abs_lin_fun = gu_new(PgfAbsFun, rdr->opool);
abstract->abs_lin_fun->name = "_";
abstract->abs_lin_fun->type = gu_new(PgfType, rdr->opool);
abstract->abs_lin_fun->type->hypos = NULL;
abstract->abs_lin_fun->type->cid = "_";
abstract->abs_lin_fun->type->n_exprs = 0;
abstract->abs_lin_fun->arity = 0;
abstract->abs_lin_fun->defns = NULL;
abstract->abs_lin_fun->ep.prob = INFINITY;
abstract->abs_lin_fun->ep.expr = gu_null_variant;
}
static PgfCIdMap*
pgf_read_printnames(PgfReader* rdr)
{
GuMapType* map_type = (GuMapType*)
GU_TYPE_LIT(GuStringMap, _,
gu_type(GuString),
&"");
PgfCIdMap* printnames = gu_map_type_make(map_type, rdr->opool);
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
for (size_t i = 0; i < len; i++) {
PgfCId name = pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, NULL);
GuString printname = pgf_read_string(rdr);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(printnames, name, GuString, printname);
}
return printnames;
}
static PgfSymbols*
pgf_read_symbols(PgfReader* rdr);
static void
pgf_read_alternative(PgfReader* rdr, PgfAlternative* alt)
{
alt->form = pgf_read_symbols(rdr);
gu_return_on_exn(rdr->err,);
size_t n_prefixes = pgf_read_len(rdr);
gu_return_on_exn(rdr->err,);
alt->prefixes = gu_new_seq(GuString, n_prefixes, rdr->opool);
for (size_t i = 0; i < n_prefixes; i++) {
GuString prefix = pgf_read_string(rdr);
gu_return_on_exn(rdr->err,);
gu_seq_set(alt->prefixes, GuString, i, prefix);
}
}
static PgfSymbol
pgf_read_symbol(PgfReader* rdr)
{
PgfSymbol sym = gu_null_variant;
uint8_t tag = pgf_read_tag(rdr);
switch (tag) {
case PGF_SYMBOL_CAT: {
PgfSymbolCat *sym_cat =
gu_new_variant(PGF_SYMBOL_CAT,
PgfSymbolCat,
&sym, rdr->opool);
sym_cat->d = pgf_read_int(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
sym_cat->r = pgf_read_int(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_SYMBOL_LIT: {
PgfSymbolLit *sym_lit =
gu_new_variant(PGF_SYMBOL_LIT,
PgfSymbolLit,
&sym, rdr->opool);
sym_lit->d = pgf_read_int(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
sym_lit->r = pgf_read_int(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_SYMBOL_VAR: {
PgfSymbolVar *sym_var =
gu_new_variant(PGF_SYMBOL_VAR,
PgfSymbolVar,
&sym, rdr->opool);
sym_var->d = pgf_read_int(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
sym_var->r = pgf_read_int(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_SYMBOL_KS: {
GuLength len = pgf_read_len(rdr);
uint8_t* buf = alloca(len*6+1);
uint8_t* p = buf;
for (size_t i = 0; i < len; i++) {
gu_in_utf8_buf(&p, rdr->in, rdr->err);
gu_return_on_exn(rdr->err, gu_null_variant);
}
*p++ = 0;
PgfSymbolKS *sym_ks =
gu_new_flex_variant(PGF_SYMBOL_KS,
PgfSymbolKS,
token, p-buf,
&sym, rdr->opool);
strcpy((char*) sym_ks->token, (char*) buf);
break;
}
case PGF_SYMBOL_KP: {
PgfSymbols* default_form = pgf_read_symbols(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
size_t n_forms = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
PgfSymbolKP *sym_kp =
gu_new_flex_variant(PGF_SYMBOL_KP,
PgfSymbolKP, forms, n_forms,
&sym, rdr->opool);
sym_kp->default_form = default_form;
sym_kp->n_forms = n_forms;
for (size_t i = 0; i < sym_kp->n_forms; i++) {
pgf_read_alternative(rdr, &sym_kp->forms[i]);
gu_return_on_exn(rdr->err, gu_null_variant);
}
break;
}
case PGF_SYMBOL_NE: {
gu_new_variant(PGF_SYMBOL_NE,
PgfSymbolNE,
&sym, rdr->opool);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_SYMBOL_BIND: {
gu_new_variant(PGF_SYMBOL_BIND,
PgfSymbolBIND,
&sym, rdr->opool);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_SYMBOL_SOFT_BIND: {
gu_new_variant(PGF_SYMBOL_SOFT_BIND,
PgfSymbolBIND,
&sym, rdr->opool);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
default:
pgf_read_tag_error(rdr);
}
return sym;
}
static PgfSymbols*
pgf_read_symbols(PgfReader* rdr)
{
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
PgfSymbols* syms = gu_new_seq(PgfSymbol, len, rdr->opool);
for (size_t i = 0; i < len; i++) {
PgfSymbol sym = pgf_read_symbol(rdr);
gu_return_on_exn(rdr->err, NULL);
gu_seq_set(syms, PgfSymbol, i, sym);
}
return syms;
}
static PgfSequences*
pgf_read_sequences(PgfReader* rdr)
{
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
PgfSequences* seqs = gu_new_seq(PgfSequence, len, rdr->opool);
for (size_t i = 0; i < len; i++) {
PgfSymbols* syms = pgf_read_symbols(rdr);
gu_return_on_exn(rdr->err, NULL);
gu_seq_index(seqs, PgfSequence, i)->syms = syms;
gu_seq_index(seqs, PgfSequence, i)->idx = NULL;
}
return seqs;
}
static PgfCncFun*
pgf_read_cncfun(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, int funid)
{
PgfCId name = pgf_read_cid(rdr, rdr->tmp_pool);
gu_return_on_exn(rdr->err, NULL);
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
PgfAbsFun* absfun =
gu_map_get(abstr->funs, name, PgfAbsFun*);
PgfCncFun* cncfun = gu_new_flex(rdr->opool, PgfCncFun, lins, len);
cncfun->absfun = absfun;
cncfun->ep = (absfun == NULL) ? NULL : &absfun->ep;
cncfun->funid = funid;
cncfun->n_lins = len;
for (size_t i = 0; i < len; i++) {
size_t seqid = pgf_read_int(rdr);
gu_return_on_exn(rdr->err, NULL);
if (seqid >= gu_seq_length(concr->sequences)) {
gu_raise(rdr->err, PgfReadExn);
return NULL;
}
cncfun->lins[i] = gu_seq_index(concr->sequences, PgfSequence, seqid);
}
return cncfun;
}
static PgfCncFuns*
pgf_read_cncfuns(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr)
{
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
PgfCncFuns* cncfuns = gu_new_seq(PgfCncFun*, len, rdr->opool);
for (size_t funid = 0; funid < len; funid++) {
PgfCncFun* cncfun = pgf_read_cncfun(rdr, abstr, concr, funid);
gu_return_on_exn(rdr->err, NULL);
gu_seq_set(cncfuns, PgfCncFun*, funid, cncfun);
}
return cncfuns;
}
static PgfCCat*
pgf_read_fid(PgfReader* rdr, PgfConcr* concr)
{
int fid = pgf_read_int(rdr);
gu_return_on_exn(rdr->err, NULL);
PgfCCat* ccat = gu_map_get(concr->ccats, &fid, PgfCCat*);
if (!ccat) {
ccat = gu_new(PgfCCat, rdr->opool);
ccat->cnccat = NULL;
ccat->lindefs = NULL;
ccat->linrefs = NULL;
ccat->n_synprods = 0;
ccat->prods = NULL;
ccat->viterbi_prob = 0;
ccat->fid = fid;
ccat->conts = NULL;
ccat->answers = NULL;
gu_map_put(concr->ccats, &fid, PgfCCat*, ccat);
}
return ccat;
}
static PgfCncFun*
pgf_read_funid(PgfReader* rdr, PgfConcr* concr)
{
size_t funid = pgf_read_int(rdr);
gu_return_on_exn(rdr->err, NULL);
if (funid >= gu_seq_length(concr->cncfuns)) {
gu_raise(rdr->err, PgfReadExn);
return NULL;
}
return gu_seq_get(concr->cncfuns, PgfCncFun*, funid);
}
static void
pgf_read_lindefs(PgfReader* rdr, PgfConcr* concr)
{
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, );
for (size_t i = 0; i < len; i++) {
PgfCCat* ccat = pgf_read_fid(rdr, concr);
size_t n_funs = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, );
ccat->lindefs = gu_new_seq(PgfCncFun*, n_funs, rdr->opool);
for (size_t j = 0; j < n_funs; j++) {
PgfCncFun* fun = pgf_read_funid(rdr, concr);
fun->absfun = concr->abstr->abs_lin_fun;
gu_seq_set(ccat->lindefs, PgfCncFun*, j, fun);
}
}
}
static void
pgf_read_linrefs(PgfReader* rdr, PgfConcr* concr)
{
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, );
for (size_t i = 0; i < len; i++) {
PgfCCat* ccat = pgf_read_fid(rdr, concr);
size_t n_funs = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, );
ccat->linrefs = gu_new_seq(PgfCncFun*, n_funs, rdr->opool);
for (size_t j = 0; j < n_funs; j++) {
PgfCncFun* fun = pgf_read_funid(rdr, concr);
fun->absfun = concr->abstr->abs_lin_fun;
gu_seq_set(ccat->linrefs, PgfCncFun*, j, fun);
}
}
}
static void
pgf_read_parg(PgfReader* rdr, PgfConcr* concr, PgfPArg* parg)
{
size_t n_hoas = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, );
parg->hypos = gu_new_seq(PgfCCat*, n_hoas, rdr->opool);
for (size_t i = 0; i < n_hoas; i++) {
gu_seq_set(parg->hypos, PgfCCat*, i, pgf_read_fid(rdr, concr));
gu_return_on_exn(rdr->err, );
}
parg->ccat = pgf_read_fid(rdr, concr);
gu_return_on_exn(rdr->err, );
}
static PgfPArgs*
pgf_read_pargs(PgfReader* rdr, PgfConcr* concr)
{
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
PgfPArgs* pargs = gu_new_seq(PgfPArg, len, rdr->opool);
for (size_t i = 0; i < len; i++) {
PgfPArg* parg = gu_seq_index(pargs, PgfPArg, i);
pgf_read_parg(rdr, concr, parg);
}
return pargs;
}
extern void
pgf_parser_index(PgfConcr* concr,
PgfCCat* ccat, PgfProduction prod,
GuPool *pool);
extern void
pgf_lzr_index(PgfConcr* concr,
PgfCCat* ccat, PgfProduction prod,
GuPool *pool);
static void
pgf_read_production(PgfReader* rdr, PgfConcr* concr,
PgfCCat* ccat, size_t* top, size_t* bot)
{
PgfProduction prod = gu_null_variant;
uint8_t tag = pgf_read_tag(rdr);
switch (tag) {
case PGF_PRODUCTION_APPLY: {
PgfProductionApply *papp =
gu_new_variant(PGF_PRODUCTION_APPLY,
PgfProductionApply,
&prod, rdr->opool);
papp->fun = pgf_read_funid(rdr, concr);
gu_return_on_exn(rdr->err, );
papp->args = pgf_read_pargs(rdr, concr);
gu_return_on_exn(rdr->err, );
if (gu_seq_length(papp->args) > 0)
gu_seq_set(ccat->prods, PgfProduction, (*top)++, prod);
else
gu_seq_set(ccat->prods, PgfProduction, (*bot)--, prod);
break;
}
case PGF_PRODUCTION_COERCE: {
PgfProductionCoerce *pcoerce =
gu_new_variant(PGF_PRODUCTION_COERCE,
PgfProductionCoerce,
&prod, rdr->opool);
pcoerce->coerce = pgf_read_fid(rdr, concr);
gu_return_on_exn(rdr->err, );
gu_seq_set(ccat->prods, PgfProduction, (*top)++, prod);
break;
}
default:
pgf_read_tag_error(rdr);
}
pgf_parser_index(concr, ccat, prod, rdr->opool);
pgf_lzr_index(concr, ccat, prod, rdr->opool);
}
static void
pgf_read_ccats(PgfReader* rdr, PgfConcr* concr)
{
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, );
for (size_t i = 0; i < len; i++) {
PgfCCat* ccat = pgf_read_fid(rdr, concr);
GuLength n_prods = pgf_read_len(rdr);
gu_return_on_exn(rdr->err,);
ccat->prods = gu_new_seq(PgfProduction, n_prods, rdr->opool);
size_t top = 0;
size_t bot = n_prods-1;
for (size_t i = 0; i < n_prods; i++) {
pgf_read_production(rdr, concr, ccat, &top, &bot);
gu_return_on_exn(rdr->err, );
}
ccat->n_synprods = top;
}
}
static PgfCncCat*
pgf_read_cnccat(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, PgfCId name)
{
int first = pgf_read_int(rdr);
int last = pgf_read_int(rdr);
int n_lins = pgf_read_len(rdr);
PgfCncCat* cnccat =
gu_malloc(rdr->opool, sizeof(PgfCncCat)+n_lins*sizeof(GuString));
cnccat->abscat =
gu_map_get(abstr->cats, name, PgfAbsCat*);
gu_assert(cnccat->abscat != NULL);
int len = last + 1 - first;
cnccat->cats = gu_new_seq(PgfCCat*, len, rdr->opool);
for (int i = 0; i < len; i++) {
int fid = first + i;
PgfCCat* ccat = gu_map_get(concr->ccats, &fid, PgfCCat*);
if (!ccat) {
ccat = gu_new(PgfCCat, rdr->opool);
ccat->cnccat = NULL;
ccat->lindefs = NULL;
ccat->linrefs = NULL;
ccat->n_synprods = 0;
ccat->prods = NULL;
ccat->viterbi_prob = 0;
ccat->fid = fid;
ccat->conts = NULL;
ccat->answers = NULL;
gu_map_put(concr->ccats, &fid, PgfCCat*, ccat);
}
gu_seq_set(cnccat->cats, PgfCCat*, i, ccat);
ccat->cnccat = cnccat;
}
cnccat->n_lins = n_lins;
for (size_t i = 0; i < cnccat->n_lins; i++) {
cnccat->labels[i] = pgf_read_string(rdr);
}
return cnccat;
}
static PgfCIdMap*
pgf_read_cnccats(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr)
{
GuMapType* map_type = (GuMapType*)
GU_TYPE_LIT(GuStringMap, _,
gu_ptr_type(PgfCncCat),
&gu_null_struct);
PgfCIdMap* cnccats = gu_map_type_make(map_type, rdr->opool);
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
for (size_t i = 0; i < len; i++) {
PgfCId name = pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, NULL);
PgfCncCat* cnccat =
pgf_read_cnccat(rdr, abstr, concr, name);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(cnccats, name, PgfCncCat*, cnccat);
}
return cnccats;
}
static void
pgf_ccat_set_cnccat(PgfCCat* ccat, PgfProduction prod)
{
GuVariantInfo i = gu_variant_open(prod);
switch (i.tag) {
case PGF_PRODUCTION_COERCE: {
PgfProductionCoerce* pcoerce = i.data;
PgfCncCat* cnccat = pcoerce->coerce->cnccat;
if (!ccat->cnccat) {
ccat->cnccat = cnccat;
} else if (ccat->cnccat != cnccat) {
// XXX: real error
gu_impossible();
}
break;
}
case PGF_PRODUCTION_APPLY:
// Shouldn't happen with current PGF.
// XXX: real error
gu_impossible();
break;
default:
gu_impossible();
}
}
extern prob_t
pgf_ccat_set_viterbi_prob(PgfCCat* ccat);
static void
pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
{
(void) (fn && key && err);
PgfCCat* ccat = *((PgfCCat**) value);
if (ccat->prods == NULL)
return;
size_t n_prods = gu_seq_length(ccat->prods);
for (size_t i = 0; i < n_prods; i++) {
PgfProduction prod =
gu_seq_get(ccat->prods, PgfProduction, i);
if (!ccat->cnccat) {
pgf_ccat_set_cnccat(ccat, prod);
}
}
// pgf_ccat_set_viterbi_prob(ccat);
}
void
pgf_read_concrete_content(PgfReader* rdr, PgfConcr* concr)
{
concr->printnames =
pgf_read_printnames(rdr);
gu_return_on_exn(rdr->err,);
concr->sequences =
pgf_read_sequences(rdr);
gu_return_on_exn(rdr->err,);
concr->pre_sequences = gu_new_buf(PgfSequence, rdr->opool);
concr->cncfuns =
pgf_read_cncfuns(rdr, concr->abstr, concr);
gu_return_on_exn(rdr->err,);
concr->ccats =
gu_new_int_map(PgfCCat*, &gu_null_struct, rdr->opool);
concr->fun_indices = gu_map_type_new(PgfCncFunOverloadMap, rdr->opool);
concr->coerce_idx = gu_map_type_new(PgfCncOverloadMap, rdr->opool);
pgf_read_lindefs(rdr, concr);
pgf_read_linrefs(rdr, concr);
pgf_read_ccats(rdr, concr);
concr->cnccats = pgf_read_cnccats(rdr, concr->abstr, concr);
concr->callbacks = pgf_new_callbacks_map(concr, rdr->opool);
concr->total_cats = pgf_read_int(rdr);
GuMapItor clo1 = { pgf_read_ccat_cb };
gu_map_iter(concr->ccats, &clo1, NULL);
}
static void
pgf_read_concrete_init_header(PgfConcr* concr)
{
concr->printnames = NULL;
concr->sequences = NULL;
concr->pre_sequences = NULL;
concr->cncfuns = NULL;
concr->ccats = NULL;
concr->fun_indices = NULL;
concr->coerce_idx = NULL;
concr->cnccats = NULL;
concr->callbacks = NULL;
concr->total_cats = 0;
}
static void
gu_concr_fini(GuFinalizer* fin)
{
PgfConcr* concr = gu_container(fin, PgfConcr, fin);
if (concr->pool != NULL) {
pgf_read_concrete_init_header(concr);
gu_pool_free(concr->pool);
concr->pool = NULL;
}
}
static PgfConcr*
pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr, bool with_content)
{
PgfConcr* concr = gu_new(PgfConcr, rdr->opool);
concr->name =
pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, NULL);
concr->abstr = abstr;
concr->cflags =
pgf_read_flags(rdr);
gu_return_on_exn(rdr->err, NULL);
concr->pool = NULL;
if (with_content) {
pgf_read_concrete_content(rdr, concr);
concr->fin.fn = NULL;
} else {
pgf_read_concrete_init_header(concr);
concr->fin.fn = gu_concr_fini;
gu_pool_finally(rdr->opool, &concr->fin);
}
gu_return_on_exn(rdr->err, NULL);
return concr;
}
void
pgf_concrete_load(PgfConcr* concr, GuIn* in, GuExn* err)
{
if (concr->fin.fn == NULL || concr->pool != NULL)
return; // already loaded
GuPool* pool = gu_new_pool();
GuPool* tmp_pool = gu_local_pool();
PgfReader* rdr = pgf_new_reader(in, pool, tmp_pool, err);
PgfCId name =
pgf_read_cid(rdr, rdr->tmp_pool);
gu_return_on_exn(rdr->err, );
if (strcmp(name, concr->name) != 0) {
GuExnData* err_data = gu_raise(rdr->err, PgfExn);
if (err_data) {
err_data->data = "This file contains different concrete syntax";
gu_pool_free(tmp_pool);
gu_pool_free(pool);
return;
}
}
concr->pool = pool;
pgf_read_flags(rdr);
if (gu_exn_is_raised(rdr->err))
goto end;
pgf_read_concrete_content(rdr, concr);
if (gu_exn_is_raised(rdr->err))
goto end;
end:
gu_pool_free(tmp_pool);
}
void
pgf_concrete_unload(PgfConcr* concr)
{
if (concr->fin.fn == NULL)
return;
gu_concr_fini(&concr->fin);
}
static PgfCIdMap*
pgf_read_concretes(PgfReader* rdr, PgfAbstr* abstr, bool with_content)
{
GuMapType* map_type = (GuMapType*)
GU_TYPE_LIT(GuStringMap, _,
gu_ptr_type(PgfConcr),
&gu_null_struct);
PgfCIdMap* concretes = gu_map_type_make(map_type, rdr->opool);
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
for (size_t i = 0; i < len; i++) {
PgfConcr* concr = pgf_read_concrete(rdr, abstr, with_content);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(concretes, concr->name, PgfConcr*, concr);
}
return concretes;
}
PgfPGF*
pgf_read_pgf(PgfReader* rdr) {
PgfPGF* pgf = gu_new(PgfPGF, rdr->opool);
pgf->major_version = gu_in_u16be(rdr->in, rdr->err);
gu_return_on_exn(rdr->err, NULL);
pgf->minor_version = gu_in_u16be(rdr->in, rdr->err);
gu_return_on_exn(rdr->err, NULL);
pgf->gflags = pgf_read_flags(rdr);
gu_return_on_exn(rdr->err, NULL);
pgf_read_abstract(rdr, &pgf->abstract);
gu_return_on_exn(rdr->err, NULL);
bool with_content =
gu_variant_is_null(gu_map_get(pgf->gflags, "split", PgfLiteral));
pgf->concretes = pgf_read_concretes(rdr, &pgf->abstract, with_content);
gu_return_on_exn(rdr->err, NULL);
pgf->pool = rdr->opool;
return pgf;
}
PgfReader*
pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err)
{
PgfReader* rdr = gu_new(PgfReader, tmp_pool);
rdr->opool = opool;
rdr->tmp_pool = tmp_pool;
rdr->err = err;
rdr->in = in;
rdr->jit_state = pgf_new_jit(rdr);
return rdr;
}
void
pgf_reader_done(PgfReader* rdr, PgfPGF* pgf)
{
if (pgf == NULL)
return;
pgf_jit_done(rdr, &pgf->abstract);
}