reading & writing grammars in the new format

This commit is contained in:
krangelov
2021-11-08 10:39:05 +01:00
parent 02b9915d11
commit d6cf023258
6 changed files with 335 additions and 3 deletions

View File

@@ -42,4 +42,24 @@ void PgfPGF::release(ref<PgfPGF> pgf)
void PgfConcr::release(ref<PgfConcr> concr)
{
namespace_release(concr->cflags);
namespace_release(concr->lins);
namespace_release(concr->lincats);
namespace_release(concr->printnames);
}
void PgfConcrLin::release(ref<PgfConcrLin> lin)
{
PgfDB::free(lin->args);
PgfDB::free(lin->res);
PgfDB::free(lin->seqs);
}
void PgfConcrLincat::release(ref<PgfConcrLincat> lincat)
{
PgfDB::free(lincat->fields);
}
void PgfConcrPrintname::release(ref<PgfConcrPrintname> printname)
{
PgfDB::free(printname->printname);
}

View File

@@ -876,6 +876,9 @@ PgfConcrRevision pgf_create_concrete(PgfDB *db, PgfRevision revision,
concr->ref_count = 1;
concr->ref_count_ex = 1;
concr->cflags = 0;
concr->lins = 0;
concr->lincats = 0;
concr->printnames = 0;
concr->prev = 0;
concr->next = 0;
memcpy(&concr->name, name, sizeof(PgfText)+name->size+1);
@@ -912,6 +915,9 @@ PgfConcrRevision pgf_clone_concrete(PgfDB *db, PgfRevision revision,
clone->ref_count = 1;
clone->ref_count_ex = 1;
clone->cflags = concr->cflags;
clone->lins = concr->lins;
clone->lincats = concr->lincats;
clone->printnames = concr->printnames;
clone->prev = 0;
clone->next = 0;
memcpy(&clone->name, name, sizeof(PgfText)+name->size+1);

View File

@@ -6,6 +6,8 @@
PgfReader::PgfReader(FILE *in)
{
this->in = in;
this->abstract = 0;
this->concrete = 0;
}
uint8_t PgfReader::read_uint8()
@@ -425,18 +427,198 @@ ref<PgfAbsCat> PgfReader::read_abscat()
void PgfReader::read_abstract(ref<PgfAbstr> abstract)
{
this->abstract = abstract;
abstract->name = read_name();
abstract->aflags = read_namespace<PgfFlag>(&PgfReader::read_flag);
abstract->funs = read_namespace<PgfAbsFun>(&PgfReader::read_absfun);
abstract->cats = read_namespace<PgfAbsCat>(&PgfReader::read_abscat);
}
ref<PgfConcrLIndex> PgfReader::read_lindex()
{
size_t i0 = read_int();
size_t n_terms = read_len();
ref<PgfConcrLIndex> lindex =
PgfDB::malloc<PgfConcrLIndex>(n_terms*sizeof(PgfConcrLIndex::terms[0]));
lindex->i0 = i0;
lindex->n_terms = n_terms;
for (size_t i = 0; i < n_terms; i++) {
lindex->terms[i].factor = read_int();
lindex->terms[i].var = read_int();
}
return lindex;
}
void PgfReader::read_linarg(ref<PgfConcrLinArg> linarg)
{
size_t size = read_len();
PgfText* name = (PgfText*) alloca(sizeof(PgfText)+size+1);
name->size = size;
// If reading the extra bytes causes EOF, it is an encoding
// error, not a legitimate end of character stream.
fread(name->text, size, 1, in);
if (feof(in))
throw pgf_error("utf8 decoding error");
if (ferror(in))
throw pgf_error("an error occured while reading the grammar");
name->text[size] = 0;
linarg->lincat = namespace_lookup(this->concrete->lincats, name);
if (linarg->lincat == 0)
throw pgf_error("Encountered an unknown category");
linarg->param = read_lindex();
}
void PgfReader::read_linres(ref<PgfConcrLinRes> linres)
{
size_t size = read_len();
PgfText* name = (PgfText*) alloca(sizeof(PgfText)+size+1);
name->size = size;
// If reading the extra bytes causes EOF, it is an encoding
// error, not a legitimate end of character stream.
fread(name->text, size, 1, in);
if (feof(in))
throw pgf_error("utf8 decoding error");
if (ferror(in))
throw pgf_error("an error occured while reading the grammar");
name->text[size] = 0;
linres->lincat = namespace_lookup(this->concrete->lincats, name);
if (linres->lincat == 0)
throw pgf_error("Encountered an unknown category");
linres->param = read_lindex();
}
template<class I>
ref<I> PgfReader::read_symbol_idx()
{
size_t d = read_int();
size_t i0 = read_int();
size_t n_terms = read_len();
ref<I> sym_idx =
PgfDB::malloc<I>(n_terms*sizeof(PgfConcrLIndex::terms[0]));
sym_idx->d = d;
sym_idx->r.i0 = i0;
sym_idx->r.n_terms = n_terms;
for (size_t i = 0; i < n_terms; i++) {
sym_idx->r.terms[i].factor = read_int();
sym_idx->r.terms[i].var = read_int();
}
return sym_idx;
}
PgfSymbol PgfReader::read_symbol()
{
PgfSymbol sym = 0;
uint8_t tag = read_tag();
switch (tag) {
case PgfSymbolCat::tag: {
ref<PgfSymbolCat> sym_cat = read_symbol_idx<PgfSymbolCat>();
sym = ref<PgfSymbolCat>::tagged(sym_cat);
break;
}
case PgfSymbolLit::tag: {
ref<PgfSymbolLit> sym_lit = read_symbol_idx<PgfSymbolLit>();
sym = ref<PgfSymbolLit>::tagged(sym_lit);
break;
}
case PgfSymbolVar::tag: {
ref<PgfSymbolVar> sym_var = PgfDB::malloc<PgfSymbolVar>();
sym_var->d = read_int();
sym_var->r = read_int();
sym = ref<PgfSymbolVar>::tagged(sym_var);
break;
}
case PgfSymbolKS::tag: {
ref<PgfSymbolKS> sym_ks = read_text(&PgfSymbolKS::token);
sym = ref<PgfSymbolKS>::tagged(sym_ks);
break;
}
case PgfSymbolKP::tag: {
ref<PgfSymbolKP> sym_kp = PgfDB::malloc<PgfSymbolKP>();
sym = ref<PgfSymbolKP>::tagged(sym_kp);
break;
}
case PgfSymbolBIND::tag: {
sym = ref<PgfSymbolBIND>::tagged(0);
break;
}
case PgfSymbolSOFTBIND::tag: {
sym = ref<PgfSymbolSOFTBIND>::tagged(0);
break;
}
case PgfSymbolNE::tag: {
sym = ref<PgfSymbolNE>::tagged(0);
break;
}
case PgfSymbolSOFTSPACE::tag: {
sym = ref<PgfSymbolSOFTSPACE>::tagged(0);
break;
}
case PgfSymbolCAPIT::tag: {
sym = ref<PgfSymbolCAPIT>::tagged(0);
break;
}
case PgfSymbolALLCAPIT::tag: {
sym = ref<PgfSymbolALLCAPIT>::tagged(0);
break;
}
default:
throw pgf_error("Unknown symbol tag");
}
return sym;
}
ref<PgfConcrLincat> PgfReader::read_lincat()
{
ref<PgfConcrLincat> lincat = read_name(&PgfConcrLincat::name);
lincat->ref_count = 1;
lincat->fields = read_vector(&PgfReader::read_text2);
return lincat;
}
ref<PgfConcrLin> PgfReader::read_lin()
{
ref<PgfConcrLin> lin = read_name(&PgfConcrLin::name);
lin->ref_count = 1;
lin->args = read_vector(&PgfReader::read_linarg);
lin->res = read_vector(&PgfReader::read_linres);
lin->seqs = read_vector(&PgfReader::read_seq2);
return lin;
}
ref<PgfConcrPrintname> PgfReader::read_printname()
{
ref<PgfConcrPrintname> printname = read_name(&PgfConcrPrintname::name);
printname->ref_count = 1;
printname->printname = read_text();
return printname;
}
ref<PgfConcr> PgfReader::read_concrete()
{
ref<PgfConcr> concr = read_name(&PgfConcr::name);
this->concrete = concr;
concr->ref_count = 1;
concr->ref_count_ex = 0;
concr->cflags = read_namespace<PgfFlag>(&PgfReader::read_flag);
concr->lincats = read_namespace<PgfConcrLincat>(&PgfReader::read_lincat);
concr->lins = read_namespace<PgfConcrLin>(&PgfReader::read_lin);
concr->printnames = read_namespace<PgfConcrPrintname>(&PgfReader::read_printname);
concr->prev = 0;
concr->next = 0;
return concr;

View File

@@ -60,23 +60,39 @@ public:
ref<PgfFlag> read_flag();
PgfPatt read_patt();
void read_patt2(ref<PgfPatt> r) { *r = read_patt(); };
void read_defn(ref<ref<PgfEquation>> defn);
ref<PgfAbsFun> read_absfun();
ref<PgfAbsCat> read_abscat();
void read_abstract(ref<PgfAbstr> abstract);
ref<PgfConcrLincat> read_lincat();
ref<PgfConcrLIndex> read_lindex();
void read_linarg(ref<PgfConcrLinArg> linarg);
void read_linres(ref<PgfConcrLinRes> linres);
PgfSymbol read_symbol();
ref<PgfConcrLin> read_lin();
ref<PgfConcrPrintname> read_printname();
ref<PgfConcr> read_concrete();
ref<PgfPGF> read_pgf();
private:
FILE *in;
ref<PgfAbstr> abstract;
ref<PgfConcr> concrete;
object read_name_internal(size_t struct_size);
object read_text_internal(size_t struct_size);
void read_patt2(ref<PgfPatt> r) { *r = read_patt(); };
void read_text2(ref<ref<PgfText>> r) { *r = read_text(); };
void read_symbol2(ref<PgfSymbol> r) { *r = read_symbol(); };
void read_seq2(ref<ref<PgfSequence>> r) { *r = read_vector(&PgfReader::read_symbol2); }
template<class I>
ref<I> read_symbol_idx();
};
#endif

View File

@@ -385,10 +385,105 @@ void PgfWriter::write_abstract(ref<PgfAbstr> abstract)
this->abstract = 0;
}
void PgfWriter::write_lincat(ref<PgfConcrLincat> lincat)
{
write_name(&lincat->name);
write_vector(lincat->fields, &PgfWriter::write_text);
}
void PgfWriter::write_lindex(ref<PgfConcrLIndex> lindex)
{
write_int(lindex->i0);
write_len(lindex->n_terms);
for (size_t i = 0; i < lindex->n_terms; i++) {
write_int(lindex->terms[i].factor);
write_int(lindex->terms[i].var);
}
}
void PgfWriter::write_linarg(ref<PgfConcrLinArg> linarg)
{
write_name(&linarg->lincat->name);
write_lindex(linarg->param);
}
void PgfWriter::write_linres(ref<PgfConcrLinRes> linres)
{
write_name(&linres->lincat->name);
write_lindex(linres->param);
}
void PgfWriter::write_symbol(PgfSymbol sym)
{
auto tag = ref<PgfSymbol>::get_tag(sym);
write_tag(tag);
switch (tag) {
case PgfSymbolCat::tag: {
auto sym_cat = ref<PgfSymbolCat>::untagged(sym);
write_int(sym_cat->d);
write_lindex(ref<PgfConcrLIndex>::from_ptr(&sym_cat->r));
break;
}
case PgfSymbolLit::tag: {
auto sym_lit = ref<PgfSymbolLit>::untagged(sym);
write_int(sym_lit->d);
write_lindex(ref<PgfConcrLIndex>::from_ptr(&sym_lit->r));
break;
}
case PgfSymbolVar::tag: {
auto sym_var = ref<PgfSymbolVar>::untagged(sym);
write_int(sym_var->d);
write_int(sym_var->r);
break;
}
case PgfSymbolKS::tag: {
auto sym_ks = ref<PgfSymbolKS>::untagged(sym);
write_text(&sym_ks->token);
break;
}
case PgfSymbolKP::tag: {
auto sym_ks = ref<PgfSymbolKP>::untagged(sym);
break;
}
case PgfSymbolBIND::tag:
case PgfSymbolSOFTBIND::tag:
case PgfSymbolNE::tag:
case PgfSymbolSOFTSPACE::tag:
case PgfSymbolCAPIT::tag:
case PgfSymbolALLCAPIT::tag:
break;
default:
throw pgf_error("Unknown symbol tag");
}
}
void PgfWriter::write_seq(ref<PgfSequence> seq)
{
write_vector(seq, &PgfWriter::write_symbol);
}
void PgfWriter::write_lin(ref<PgfConcrLin> lin)
{
write_name(&lin->name);
write_vector(lin->args, &PgfWriter::write_linarg);
write_vector(lin->res, &PgfWriter::write_linres);
write_vector(lin->seqs, &PgfWriter::write_seq);
}
void PgfWriter::write_printname(ref<PgfConcrPrintname> printname)
{
write_name(&printname->name);
write_text(printname->printname);
}
void PgfWriter::write_concrete(ref<PgfConcr> concr)
{
write_name(&concr->name);
write_namespace<PgfFlag>(concr->cflags, &PgfWriter::write_flag);
write_namespace<PgfConcrLincat>(concr->lincats, &PgfWriter::write_lincat);
write_namespace<PgfConcrLin>(concr->lins, &PgfWriter::write_lin);
write_namespace<PgfConcrPrintname>(concr->printnames, &PgfWriter::write_printname);
}
void PgfWriter::write_pgf(ref<PgfPGF> pgf)

View File

@@ -33,7 +33,6 @@ public:
void write_type(ref<PgfDTyp> ty);
void write_patt(PgfPatt patt);
void write_patt(ref<PgfPatt> r) { write_patt(*r); };
void write_defn(ref<ref<PgfEquation>> r);
void write_flag(ref<PgfFlag> flag);
@@ -42,6 +41,15 @@ public:
void write_abscat(ref<PgfAbsCat> abscat);
void write_abstract(ref<PgfAbstr> abstract);
void write_lincat(ref<PgfConcrLincat> lincat);
void write_lindex(ref<PgfConcrLIndex> lindex);
void write_linarg(ref<PgfConcrLinArg> linarg);
void write_linres(ref<PgfConcrLinRes> linres);
void write_symbol(PgfSymbol sym);
void write_seq(ref<PgfSequence> seq);
void write_lin(ref<PgfConcrLin> lin);
void write_printname(ref<PgfConcrPrintname> printname);
void write_concrete(ref<PgfConcr> concr);
void write_pgf(ref<PgfPGF> pgf);
@@ -50,6 +58,11 @@ private:
template<class V>
void write_namespace_helper(Namespace<V> nmsp, void (PgfWriter::*write_value)(ref<V>));
void write_patt(ref<PgfPatt> r) { write_patt(*r); };
void write_text(ref<ref<PgfText>> r) { write_text(&(**r)); };
void write_seq(ref<ref<PgfSequence>> r) { write_seq(*r); };
void write_symbol(ref<PgfSymbol> r) { write_symbol(*r); };
FILE *out;
ref<PgfAbstr> abstract;
};