1
0
forked from GitHub/gf-core

first draft of an LR parser

This commit is contained in:
Krasimir Angelov
2023-05-10 12:01:48 +02:00
parent 54352b507a
commit 7eac9ea2ab
15 changed files with 1415 additions and 917 deletions

View File

@@ -1,5 +1,6 @@
#include "data.h"
#include "reader.h"
#include "parser.h"
#include <math.h>
#include <string.h>
@@ -650,14 +651,14 @@ ref<PgfSequence> PgfReader::read_seq()
return seq;
}
ref<Vector<ref<PgfSequence>>> PgfReader::read_seq_ids(object container)
ref<Vector<ref<PgfSequence>>> PgfReader::read_seq_ids(ref<PgfConcrLincat> lincat, object container)
{
size_t len = read_len();
ref<Vector<ref<PgfSequence>>> vec = vector_new<ref<PgfSequence>>(len);
for (size_t i = 0; i < len; i++) {
size_t seq_id = read_len();
ref<PgfSequence> seq = phrasetable_relink(concrete->phrasetable,
container, i,
lincat, container, i,
seq_id);
if (seq == 0) {
throw pgf_error("Invalid sequence id");
@@ -701,7 +702,7 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
auto n_lindefs = read_len();
auto args = read_vector(&PgfReader::read_parg);
auto res = read_vector(&PgfReader::read_presult2);
auto seqs = read_seq_ids(lincat.tagged());
auto seqs = read_seq_ids(0, lincat.tagged());
lincat->abscat = namespace_lookup(abstract->cats, &lincat->name);
lincat->fields = fields;
@@ -712,130 +713,35 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
return lincat;
}
ref<Vector<PgfLincatField>> PgfReader::read_lincat_fields(ref<PgfConcrLincat> lincat)
ref<Vector<ref<PgfText>>> PgfReader::read_lincat_fields(ref<PgfConcrLincat> lincat)
{
size_t len = read_len();
ref<Vector<PgfLincatField>> fields = vector_new<PgfLincatField>(len);
ref<Vector<ref<PgfText>>> fields = vector_new<ref<PgfText>>(len);
for (size_t i = 0; i < len; i++) {
auto name = read_text();
ref<PgfLincatField> field = vector_elem(fields,i);
field->lincat = lincat;
field->name = name;
field->backrefs = 0;
field->epsilons = 0;
*vector_elem(fields,i) = name;
}
return fields;
}
static void add_to_index(ref<PgfConcr> concrete, ref<PgfConcrLin> lin, size_t seq_index, size_t dot)
{
size_t n_fields = lin->lincat->fields->len;
ref<PgfSequence> seq = *vector_elem(lin->seqs,seq_index);
ref<PgfPResult> result = *vector_elem(lin->res, seq_index / n_fields);
ref<PgfLincatField> field = vector_elem(lin->lincat->fields, seq_index % n_fields);
if (dot >= seq->syms.len) {
ref<Vector<PgfLincatEpsilon>> epsilons = field->epsilons;
epsilons =
vector_resize(epsilons, ((epsilons == 0) ? 0 : epsilons->len)+1,
PgfDB::get_txn_id());
field->epsilons = epsilons;
ref<PgfLincatEpsilon> epsilon =
vector_elem(epsilons,epsilons->len-1);
epsilon->lin = lin;
epsilon->seq_index = seq_index;
if (epsilons->len == 1 && field->backrefs != 0) {
for (size_t i = 0; i < field->backrefs->len; i++) {
ref<PgfLincatBackref> backref = vector_elem(field->backrefs,i);
add_to_index(concrete,backref->lin,backref->seq_index,backref->dot+1);
}
}
} else {
PgfSymbol sym = *vector_elem(&seq->syms,dot);
switch (ref<PgfSymbol>::get_tag(sym)) {
case PgfSymbolCat::tag: {
auto sym_cat = ref<PgfSymbolCat>::untagged(sym);
ref<PgfHypo> hypo =
vector_elem(lin->absfun->type->hypos,sym_cat->d);
ref<PgfConcrLincat> lincat =
namespace_lookup(concrete->lincats,
&hypo->type->name);
if (lincat == 0)
throw pgf_error("Found a lin which uses a category without a lincat");
size_t max_values = 1;
size_t *ranges = (size_t *)
alloca(sym_cat->r.n_terms*sizeof(size_t));
for (size_t i = 0; i < sym_cat->r.n_terms; i++) {
for (size_t j = 0; j < result->vars->len; j++) {
auto var_range = vector_elem(result->vars, j);
if (var_range->var == sym_cat->r.terms[i].var) {
ranges[i] = vector_elem(result->vars, j)->range;
max_values *= var_range->range;
break;
}
}
}
bool is_epsilon = false;
for (size_t values = 0; values < max_values; values++) {
size_t v = values;
size_t index = sym_cat->r.i0;
for (size_t i = 0; i < sym_cat->r.n_terms; i++) {
index += sym_cat->r.terms[i].factor * (v % ranges[i]);
v = v / ranges[i];
}
ref<Vector<PgfLincatBackref>> backrefs =
vector_elem(lincat->fields,index)->backrefs;
backrefs =
vector_resize(backrefs, ((backrefs == 0) ? 0 : backrefs->len)+1,
PgfDB::get_txn_id());
vector_elem(lincat->fields,index)->backrefs = backrefs;
ref<PgfLincatBackref> backref =
vector_elem(backrefs,backrefs->len-1);
backref->lin = lin;
backref->seq_index = seq_index;
backref->dot = dot;
if (vector_elem(lincat->fields,index)->epsilons != 0)
is_epsilon = true;
}
if (is_epsilon)
add_to_index(concrete,lin,seq_index,dot+1);
break;
}
}
}
};
ref<PgfConcrLin> PgfReader::read_lin()
{
ref<PgfConcrLin> lin = read_name(&PgfConcrLin::name);
lin->absfun = namespace_lookup(abstract->funs, &lin->name);
if (lin->absfun == 0)
throw pgf_error("Found a lin without a fun");
auto args = read_vector(&PgfReader::read_parg);
auto res = read_vector(&PgfReader::read_presult2);
auto seqs = read_seq_ids(lin.tagged());
lin->args = args;
lin->res = res;
lin->seqs = seqs;
lin->lincat =
namespace_lookup(concrete->lincats, &lin->absfun->type->name);
if (lin->lincat == 0)
throw pgf_error("Found a lin which uses a category without a lincat");
for (size_t seq_index = 0; seq_index < lin->seqs->len; seq_index++) {
add_to_index(concrete, lin, seq_index, 0);
}
auto args = read_vector(&PgfReader::read_parg);
auto res = read_vector(&PgfReader::read_presult2);
auto seqs = read_seq_ids(lin->lincat, lin.tagged());
lin->args = args;
lin->res = res;
lin->seqs = seqs;
return lin;
}
@@ -866,6 +772,9 @@ ref<PgfConcr> PgfReader::read_concrete()
auto printnames = read_namespace<PgfConcrPrintname>(&PgfReader::read_printname);
concrete->printnames = printnames;
PgfLRTableMaker maker(abstract, concrete);
concrete->lrtable = maker.make();
return concrete;
}