#include #include "data.h" #include "writer.h" PgfWriter::PgfWriter(FILE *out) { this->out = out; this->abstract = 0; } void PgfWriter::write_uint8(uint8_t b) { size_t n_items = fwrite(&b, sizeof(b), 1, out); if (ferror(out)) throw pgf_error("an error occured while writing out the grammar"); if (n_items != 1) throw pgf_error("couldn't write to the output file"); } void PgfWriter::write_u16be(uint16_t u) { uint8_t buf[2] = { (uint8_t) ((u>>8) & 0xFF) , (uint8_t) (u & 0xFF) }; size_t n_items = fwrite(&buf, sizeof(buf), 1, out); if (ferror(out)) throw pgf_error("an error occured while writing out the grammar"); if (n_items != 1) throw pgf_error("couldn't write to the output file"); } void PgfWriter::write_u64be(uint64_t u) { uint8_t buf[8] = { (uint8_t) ((u>>56) & 0xFF) , (uint8_t) ((u>>48) & 0xFF) , (uint8_t) ((u>>40) & 0xFF) , (uint8_t) ((u>>32) & 0xFF) , (uint8_t) ((u>>24) & 0xFF) , (uint8_t) ((u>>16) & 0xFF) , (uint8_t) ((u>>8) & 0xFF) , (uint8_t) (u & 0xFF) }; size_t n_items = fwrite(&buf, sizeof(buf), 1, out); if (ferror(out)) throw pgf_error("an error occured while writing out the grammar"); if (n_items != 1) throw pgf_error("couldn't write to the output file"); } void PgfWriter::write_double(double d) { int sign = signbit(d) > 0; unsigned rawexp; uint64_t mantissa; switch (::fpclassify(d)) { case FP_NAN: rawexp = 0x7ff; mantissa = 1; break; case FP_INFINITE: rawexp = 0x7ff; mantissa = 0; break; default: { int exp; mantissa = (uint64_t) scalbn(frexp(d, &exp), 53); mantissa &= ~ (1ULL << 52); exp -= 53; rawexp = exp + 1075; } } uint64_t u = (((uint64_t) sign) << 63) | (((uint64_t) rawexp & 0x7ff) << 52) | mantissa; write_u64be(u); } void PgfWriter::write_uint(uint64_t u) { for (;;) { uint8_t b = u & 0x7F; u = u >> 7; if (u == 0) { size_t n_items = fwrite(&b, sizeof(b), 1, out); if (ferror(out)) throw pgf_error("an error occured while writing out the grammar"); if (n_items != 1) throw pgf_error("couldn't write to the output file"); break; } else { b = b | 0x80; size_t n_items = fwrite(&b, sizeof(b), 1, out); if (ferror(out)) throw pgf_error("an error occured while writing out the grammar"); if (n_items != 1) throw pgf_error("couldn't write to the output file"); } } } void PgfWriter::write_name(PgfText *text) { write_len(text->size); size_t n_items = fwrite(&text->text, text->size, 1, out); if (ferror(out)) throw pgf_error("an error occured while writing out the grammar"); if (text->size != 0 && n_items != 1) throw pgf_error("couldn't write to the output file"); } void PgfWriter::write_text(PgfText *text) { size_t len = 0; const uint8_t* p = (const uint8_t*) &text->text; const uint8_t* e = p + text->size; while (p < e && pgf_utf8_decode(&p) != 0) len++; write_len(len); size_t n_items = fwrite(&text->text, text->size, 1, out); if (ferror(out)) throw pgf_error("an error occured while writing out the grammar"); if (text->size != 0 && n_items != 1) throw pgf_error("couldn't write to the output file"); } template void PgfWriter::write_namespace(Namespace nmsp, void (PgfWriter::*write_value)(ref)) { write_len(nmsp->sz); write_namespace_helper(nmsp, write_value); } template void PgfWriter::write_namespace_helper(Namespace nmsp, void (PgfWriter::*write_value)(ref)) { if (nmsp == 0) return; write_namespace_helper(nmsp->left, write_value); (this->*write_value)(nmsp->value); write_namespace_helper(nmsp->right, write_value); } template void PgfWriter::write_vector(ref> vec, void (PgfWriter::*write_value)(ref val)) { write_len(vec->len); for (size_t i = 0; i < vec->len; i++) { (this->*write_value)(vector_elem(vec,i)); } } void PgfWriter::write_literal(PgfLiteral literal) { auto tag = ref::get_tag(literal); write_tag(tag); switch (tag) { case PgfLiteralInt::tag: { auto lint = ref::untagged(literal); write_len(lint->size); for (size_t i = 0; i < lint->size; i++) { write_uint(lint->val[i]); } break; } case PgfLiteralStr::tag: { auto lstr = ref::untagged(literal); write_text(&lstr->val); break; } case PgfLiteralFlt::tag: { auto lflt = ref::untagged(literal); write_double(lflt->val); break; } default: throw pgf_error("Unknown literal tag"); } } void PgfWriter::write_expr(PgfExpr expr) { auto tag = ref::get_tag(expr); write_tag(tag); switch (tag) { case PgfExprAbs::tag: { auto eabs = ref::untagged(expr); write_tag(eabs->bind_type); write_name(&eabs->name); write_expr(eabs->body); break; } case PgfExprApp::tag: { auto eapp = ref::untagged(expr); write_expr(eapp->fun); write_expr(eapp->arg); break; } case PgfExprLit::tag: { auto elit = ref::untagged(expr); write_literal(elit->lit); break; } case PgfExprMeta::tag: { write_int(ref::untagged(expr)->id); break; } case PgfExprFun::tag: { write_name(&ref::untagged(expr)->name); break; } case PgfExprVar::tag: { write_int(ref::untagged(expr)->var); break; } case PgfExprTyped::tag: { auto etyped = ref::untagged(expr); write_expr(etyped->expr); write_type(etyped->type); break; } case PgfExprImplArg::tag: { write_expr(ref::untagged(expr)->expr); break; } default: throw pgf_error("Unknown expression tag"); } } void PgfWriter::write_hypo(ref hypo) { write_tag(hypo->bind_type); write_name(hypo->cid); write_type(hypo->type); } void PgfWriter::write_type(ref ty) { write_vector(ty->hypos, &PgfWriter::write_hypo); write_name(&ty->name); write_vector(ty->exprs, &PgfWriter::write_expr); } void PgfWriter::write_flag(ref flag) { write_name(&flag->name); write_literal(flag->value); } void PgfWriter::write_absfun(ref absfun) { write_name(&absfun->name); write_type(absfun->type); write_int(absfun->arity); if (absfun->bytecode == 0) write_tag(0); else { write_tag(1); write_len(0); } write_double(exp(-absfun->prob)); } static void count_funs_by_cat(Namespace funs, PgfText *cat, size_t *pcount) { if (funs == 0) return; count_funs_by_cat(funs->left, cat, pcount); if (textcmp(&funs->value->name, cat) == 0) { *pcount++; } count_funs_by_cat(funs->right, cat, pcount); } static void write_funs_by_cat(Namespace funs, PgfText *cat, PgfWriter *wtr) { if (funs == 0) return; write_funs_by_cat(funs->left, cat, wtr); if (textcmp(&funs->value->name, cat) == 0) { wtr->write_double(exp(-funs->value->prob)); wtr->write_name(&funs->value->name); } write_funs_by_cat(funs->right, cat, wtr); } void PgfWriter::write_abscat(ref abscat) { write_name(&abscat->name); write_vector(abscat->context, &PgfWriter::write_hypo); size_t n_count = 0; count_funs_by_cat(abstract->funs, &abscat->name, &n_count); write_len(n_count); write_funs_by_cat(abstract->funs, &abscat->name, this); write_double(exp(-abscat->prob)); } void PgfWriter::write_abstract(ref abstract) { this->abstract = abstract; write_name(abstract->name); write_namespace(abstract->aflags, &PgfWriter::write_flag); write_namespace(abstract->funs, &PgfWriter::write_absfun); write_namespace(abstract->cats, &PgfWriter::write_abscat); this->abstract = 0; } void PgfWriter::write_variable_range(ref var) { write_int(var->var); write_int(var->range); } void PgfWriter::write_lparam(ref lparam) { write_int(lparam->i0); write_len(lparam->n_terms); for (size_t i = 0; i < lparam->n_terms; i++) { write_int(lparam->terms[i].factor); write_int(lparam->terms[i].var); } } void PgfWriter::write_parg(ref parg) { write_lparam(parg->param); } void PgfWriter::write_presult(ref pres) { if (pres->vars != 0) write_vector(pres->vars, &PgfWriter::write_variable_range); else write_len(0); write_lparam(ref::from_ptr(&pres->param)); } void PgfWriter::write_symbol(PgfSymbol sym) { auto tag = ref::get_tag(sym); write_tag(tag); switch (tag) { case PgfSymbolCat::tag: { auto sym_cat = ref::untagged(sym); write_int(sym_cat->d); write_lparam(ref::from_ptr(&sym_cat->r)); break; } case PgfSymbolLit::tag: { auto sym_lit = ref::untagged(sym); write_int(sym_lit->d); write_lparam(ref::from_ptr(&sym_lit->r)); break; } case PgfSymbolVar::tag: { auto sym_var = ref::untagged(sym); write_int(sym_var->d); write_int(sym_var->r); break; } case PgfSymbolKS::tag: { auto sym_ks = ref::untagged(sym); write_text(&sym_ks->token); break; } case PgfSymbolKP::tag: { auto sym_kp = ref::untagged(sym); write_len(sym_kp->alts.len); for (size_t i = 0; i < sym_kp->alts.len; i++) { write_seq(sym_kp->alts.data[i].form); write_vector(sym_kp->alts.data[i].prefixes, &PgfWriter::write_text); } write_seq(sym_kp->default_form); break; } case PgfSymbolBIND::tag: case PgfSymbolSOFTBIND::tag: case PgfSymbolNE::tag: case PgfSymbolSOFTSPACE::tag: case PgfSymbolCAPIT::tag: case PgfSymbolALLCAPIT::tag: break; default: throw pgf_error("Unknown symbol tag"); } } void PgfWriter::write_seq(ref> seq) { write_vector(seq, &PgfWriter::write_symbol); } void PgfWriter::write_lincat(ref lincat) { write_name(&lincat->name); write_vector(lincat->fields, &PgfWriter::write_text); write_len(lincat->n_lindefs); write_vector(lincat->args, &PgfWriter::write_parg); write_vector(lincat->res, &PgfWriter::write_presult); write_vector(lincat->seqs, &PgfWriter::write_seq); } void PgfWriter::write_lin(ref lin) { write_name(&lin->name); write_vector(lin->args, &PgfWriter::write_parg); write_vector(lin->res, &PgfWriter::write_presult); write_vector(lin->seqs, &PgfWriter::write_seq); } void PgfWriter::write_printname(ref printname) { write_name(&printname->name); write_text(printname->printname); } void PgfWriter::write_concrete(ref concr) { write_name(&concr->name); write_namespace(concr->cflags, &PgfWriter::write_flag); write_namespace(concr->lincats, &PgfWriter::write_lincat); write_namespace(concr->lins, &PgfWriter::write_lin); write_namespace(concr->printnames, &PgfWriter::write_printname); } void PgfWriter::write_pgf(ref pgf) { write_u16be(pgf->major_version); write_u16be(pgf->minor_version); write_namespace(pgf->gflags, &PgfWriter::write_flag); write_abstract(ref::from_ptr(&pgf->abstract)); write_namespace(pgf->concretes, &PgfWriter::write_concrete); }