forked from GitHub/gf-core
first draft of an LR parser
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
#include "data.h"
|
||||
#include "reader.h"
|
||||
#include "parser.h"
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
@@ -650,14 +651,14 @@ ref<PgfSequence> PgfReader::read_seq()
|
||||
return seq;
|
||||
}
|
||||
|
||||
ref<Vector<ref<PgfSequence>>> PgfReader::read_seq_ids(object container)
|
||||
ref<Vector<ref<PgfSequence>>> PgfReader::read_seq_ids(ref<PgfConcrLincat> lincat, object container)
|
||||
{
|
||||
size_t len = read_len();
|
||||
ref<Vector<ref<PgfSequence>>> vec = vector_new<ref<PgfSequence>>(len);
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
size_t seq_id = read_len();
|
||||
ref<PgfSequence> seq = phrasetable_relink(concrete->phrasetable,
|
||||
container, i,
|
||||
lincat, container, i,
|
||||
seq_id);
|
||||
if (seq == 0) {
|
||||
throw pgf_error("Invalid sequence id");
|
||||
@@ -701,7 +702,7 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
|
||||
auto n_lindefs = read_len();
|
||||
auto args = read_vector(&PgfReader::read_parg);
|
||||
auto res = read_vector(&PgfReader::read_presult2);
|
||||
auto seqs = read_seq_ids(lincat.tagged());
|
||||
auto seqs = read_seq_ids(0, lincat.tagged());
|
||||
|
||||
lincat->abscat = namespace_lookup(abstract->cats, &lincat->name);
|
||||
lincat->fields = fields;
|
||||
@@ -712,130 +713,35 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
|
||||
return lincat;
|
||||
}
|
||||
|
||||
ref<Vector<PgfLincatField>> PgfReader::read_lincat_fields(ref<PgfConcrLincat> lincat)
|
||||
ref<Vector<ref<PgfText>>> PgfReader::read_lincat_fields(ref<PgfConcrLincat> lincat)
|
||||
{
|
||||
size_t len = read_len();
|
||||
ref<Vector<PgfLincatField>> fields = vector_new<PgfLincatField>(len);
|
||||
ref<Vector<ref<PgfText>>> fields = vector_new<ref<PgfText>>(len);
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
auto name = read_text();
|
||||
|
||||
ref<PgfLincatField> field = vector_elem(fields,i);
|
||||
field->lincat = lincat;
|
||||
field->name = name;
|
||||
field->backrefs = 0;
|
||||
field->epsilons = 0;
|
||||
*vector_elem(fields,i) = name;
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
|
||||
static void add_to_index(ref<PgfConcr> concrete, ref<PgfConcrLin> lin, size_t seq_index, size_t dot)
|
||||
{
|
||||
size_t n_fields = lin->lincat->fields->len;
|
||||
ref<PgfSequence> seq = *vector_elem(lin->seqs,seq_index);
|
||||
ref<PgfPResult> result = *vector_elem(lin->res, seq_index / n_fields);
|
||||
ref<PgfLincatField> field = vector_elem(lin->lincat->fields, seq_index % n_fields);
|
||||
|
||||
if (dot >= seq->syms.len) {
|
||||
ref<Vector<PgfLincatEpsilon>> epsilons = field->epsilons;
|
||||
epsilons =
|
||||
vector_resize(epsilons, ((epsilons == 0) ? 0 : epsilons->len)+1,
|
||||
PgfDB::get_txn_id());
|
||||
field->epsilons = epsilons;
|
||||
ref<PgfLincatEpsilon> epsilon =
|
||||
vector_elem(epsilons,epsilons->len-1);
|
||||
epsilon->lin = lin;
|
||||
epsilon->seq_index = seq_index;
|
||||
|
||||
if (epsilons->len == 1 && field->backrefs != 0) {
|
||||
for (size_t i = 0; i < field->backrefs->len; i++) {
|
||||
ref<PgfLincatBackref> backref = vector_elem(field->backrefs,i);
|
||||
add_to_index(concrete,backref->lin,backref->seq_index,backref->dot+1);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
PgfSymbol sym = *vector_elem(&seq->syms,dot);
|
||||
switch (ref<PgfSymbol>::get_tag(sym)) {
|
||||
case PgfSymbolCat::tag: {
|
||||
auto sym_cat = ref<PgfSymbolCat>::untagged(sym);
|
||||
|
||||
ref<PgfHypo> hypo =
|
||||
vector_elem(lin->absfun->type->hypos,sym_cat->d);
|
||||
ref<PgfConcrLincat> lincat =
|
||||
namespace_lookup(concrete->lincats,
|
||||
&hypo->type->name);
|
||||
if (lincat == 0)
|
||||
throw pgf_error("Found a lin which uses a category without a lincat");
|
||||
|
||||
size_t max_values = 1;
|
||||
size_t *ranges = (size_t *)
|
||||
alloca(sym_cat->r.n_terms*sizeof(size_t));
|
||||
for (size_t i = 0; i < sym_cat->r.n_terms; i++) {
|
||||
for (size_t j = 0; j < result->vars->len; j++) {
|
||||
auto var_range = vector_elem(result->vars, j);
|
||||
if (var_range->var == sym_cat->r.terms[i].var) {
|
||||
ranges[i] = vector_elem(result->vars, j)->range;
|
||||
max_values *= var_range->range;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool is_epsilon = false;
|
||||
for (size_t values = 0; values < max_values; values++) {
|
||||
size_t v = values;
|
||||
size_t index = sym_cat->r.i0;
|
||||
for (size_t i = 0; i < sym_cat->r.n_terms; i++) {
|
||||
index += sym_cat->r.terms[i].factor * (v % ranges[i]);
|
||||
v = v / ranges[i];
|
||||
}
|
||||
|
||||
ref<Vector<PgfLincatBackref>> backrefs =
|
||||
vector_elem(lincat->fields,index)->backrefs;
|
||||
backrefs =
|
||||
vector_resize(backrefs, ((backrefs == 0) ? 0 : backrefs->len)+1,
|
||||
PgfDB::get_txn_id());
|
||||
vector_elem(lincat->fields,index)->backrefs = backrefs;
|
||||
ref<PgfLincatBackref> backref =
|
||||
vector_elem(backrefs,backrefs->len-1);
|
||||
backref->lin = lin;
|
||||
backref->seq_index = seq_index;
|
||||
backref->dot = dot;
|
||||
|
||||
if (vector_elem(lincat->fields,index)->epsilons != 0)
|
||||
is_epsilon = true;
|
||||
}
|
||||
|
||||
if (is_epsilon)
|
||||
add_to_index(concrete,lin,seq_index,dot+1);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
ref<PgfConcrLin> PgfReader::read_lin()
|
||||
{
|
||||
ref<PgfConcrLin> lin = read_name(&PgfConcrLin::name);
|
||||
lin->absfun = namespace_lookup(abstract->funs, &lin->name);
|
||||
if (lin->absfun == 0)
|
||||
throw pgf_error("Found a lin without a fun");
|
||||
|
||||
auto args = read_vector(&PgfReader::read_parg);
|
||||
auto res = read_vector(&PgfReader::read_presult2);
|
||||
auto seqs = read_seq_ids(lin.tagged());
|
||||
|
||||
lin->args = args;
|
||||
lin->res = res;
|
||||
lin->seqs = seqs;
|
||||
lin->lincat =
|
||||
namespace_lookup(concrete->lincats, &lin->absfun->type->name);
|
||||
if (lin->lincat == 0)
|
||||
throw pgf_error("Found a lin which uses a category without a lincat");
|
||||
|
||||
for (size_t seq_index = 0; seq_index < lin->seqs->len; seq_index++) {
|
||||
add_to_index(concrete, lin, seq_index, 0);
|
||||
}
|
||||
auto args = read_vector(&PgfReader::read_parg);
|
||||
auto res = read_vector(&PgfReader::read_presult2);
|
||||
auto seqs = read_seq_ids(lin->lincat, lin.tagged());
|
||||
|
||||
lin->args = args;
|
||||
lin->res = res;
|
||||
lin->seqs = seqs;
|
||||
|
||||
return lin;
|
||||
}
|
||||
@@ -866,6 +772,9 @@ ref<PgfConcr> PgfReader::read_concrete()
|
||||
auto printnames = read_namespace<PgfConcrPrintname>(&PgfReader::read_printname);
|
||||
concrete->printnames = printnames;
|
||||
|
||||
PgfLRTableMaker maker(abstract, concrete);
|
||||
concrete->lrtable = maker.make();
|
||||
|
||||
return concrete;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user