From e2a79748530a51be36c38139237b084af9002806 Mon Sep 17 00:00:00 2001 From: Krasimir Angelov Date: Tue, 4 Oct 2022 11:44:22 +0200 Subject: [PATCH] partial support for epsilon rules --- src/runtime/c/pgf/data.h | 7 +++++ src/runtime/c/pgf/parser.cxx | 55 ++++++++++++++++++++++++++---------- src/runtime/c/pgf/pgf.cxx | 1 + src/runtime/c/pgf/reader.cxx | 15 +++++++++- 4 files changed, 62 insertions(+), 16 deletions(-) diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index 94c2cbee2..fac40068c 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -211,11 +211,13 @@ struct PGF_INTERNAL_DECL PgfSymbolALLCAPIT { struct PGF_INTERNAL_DECL PgfConcrLincat; struct PGF_INTERNAL_DECL PgfLincatBackref; +struct PGF_INTERNAL_DECL PgfLincatEpsilon; struct PGF_INTERNAL_DECL PgfLincatField { ref lincat; ref name; ref> backrefs; + ref> epsilons; static void release(ref field); }; @@ -257,6 +259,11 @@ struct PGF_INTERNAL_DECL PgfLincatBackref { size_t dot; }; +struct PGF_INTERNAL_DECL PgfLincatEpsilon { + ref lin; + size_t seq_index; +}; + struct PGF_INTERNAL_DECL PgfConcrPrintname { ref printname; PgfText name; diff --git a/src/runtime/c/pgf/parser.cxx b/src/runtime/c/pgf/parser.cxx index 3950c980a..fb7050141 100644 --- a/src/runtime/c/pgf/parser.cxx +++ b/src/runtime/c/pgf/parser.cxx @@ -137,6 +137,19 @@ public: this->args[d] = choice; } + ParseItem(ParseItemConts *conts, PgfLincatEpsilon *epsilon, prob_t outside_prob) + { + this->outside_prob = outside_prob; + this->inside_prob = epsilon->lin->absfun->prob; + this->conts = conts; + this->lin = epsilon->lin; + this->seq_index = epsilon->seq_index; + this->dot = 0; + + size_t n_args = epsilon->lin->absfun->type->hypos->len; + memset(this->args, 0, sizeof(Choice*)*n_args); + } + ParseItem(ParseItem *item, size_t d, Choice *choice) { @@ -152,20 +165,33 @@ public: this->args[d] = choice; } - static void bu_predict(PgfLincatBackref *backref, State *state, Choice *choice) + static void bu_predict(ref field, State *state, Choice *choice) { - ref seq = - *vector_elem(backref->lin->seqs, backref->seq_index); - PgfSymbol sym = seq->syms.data[backref->dot]; - ref symcat = ref::untagged(sym); + for (size_t i = 0; i < field->backrefs->len; i++) { + ref backref = vector_elem(field->backrefs, i); - size_t index = backref->seq_index % backref->lin->lincat->fields->len; - ref field = vector_elem(backref->lin->lincat->fields, index); - ParseItemConts *conts = choice->conts->state->get_conts(field, 0); + ref seq = + *vector_elem(backref->lin->seqs, backref->seq_index); + PgfSymbol sym = seq->syms.data[backref->dot]; + ref symcat = ref::untagged(sym); - size_t n_args = backref->lin->absfun->type->hypos->len; - state->queue.push(new(n_args) ParseItem(conts, backref, - symcat->d, choice)); + size_t index = backref->seq_index % backref->lin->lincat->fields->len; + ref up_field = vector_elem(backref->lin->lincat->fields, index); + ParseItemConts *conts = choice->conts->state->get_conts(up_field, 0); + + size_t n_args = backref->lin->absfun->type->hypos->len; + state->queue.push(new(n_args) ParseItem(conts, backref, + symcat->d, choice)); + } + } + + static void eps_predict(ref field, State *state, ParseItemConts *conts, prob_t outside_prob) + { + for (size_t i = 0; i < field->epsilons->len; i++) { + ref epsilon = vector_elem(field->epsilons, i); + size_t n_args = epsilon->lin->absfun->type->hypos->len; + state->queue.push(new(n_args) ParseItem(conts, epsilon, outside_prob)); + } } void combine(State *state, Choice *choice) @@ -229,10 +255,7 @@ public: for (ParseItem *item : conts->items) { item->combine(parser->after,choice); } - for (size_t i = 0; i < conts->field->backrefs->len; i++) { - ref backref = vector_elem(conts->field->backrefs, i); - bu_predict(backref,parser->after,choice); - } + bu_predict(conts->field,parser->after,choice); } } @@ -254,6 +277,8 @@ public: ParseItemConts *conts = parser->after->get_conts(field, 0); conts->items.push_back(this); + + eps_predict(field, parser->after, conts, inside_prob+outside_prob); } } default:; diff --git a/src/runtime/c/pgf/pgf.cxx b/src/runtime/c/pgf/pgf.cxx index 4336bbc69..8b9a7c56c 100644 --- a/src/runtime/c/pgf/pgf.cxx +++ b/src/runtime/c/pgf/pgf.cxx @@ -1506,6 +1506,7 @@ public: vector_elem(db_fields, i)->lincat = lincat; vector_elem(db_fields, i)->name = name; vector_elem(db_fields, i)->backrefs = 0; + vector_elem(db_fields, i)->epsilons = 0; } lincat->fields = db_fields; diff --git a/src/runtime/c/pgf/reader.cxx b/src/runtime/c/pgf/reader.cxx index b27de8e6a..4e8c6e416 100644 --- a/src/runtime/c/pgf/reader.cxx +++ b/src/runtime/c/pgf/reader.cxx @@ -684,6 +684,7 @@ ref> PgfReader::read_lincat_fields(ref li field->lincat = lincat; field->name = read_text(); field->backrefs = 0; + field->epsilons = 0; } return fields; } @@ -717,7 +718,19 @@ ref PgfReader::read_lin() ref result = *vector_elem(lin->res, seq_index / n_fields); size_t dot = 0; - if (dot < seq->syms.len) { + if (dot >= seq->syms.len) { + size_t index = seq_index % n_fields; + ref> epsilons = + vector_elem(lin->lincat->fields,index)->epsilons; + epsilons = + vector_resize(epsilons, epsilons->len+1, + PgfDB::get_txn_id()); + vector_elem(lin->lincat->fields,index)->epsilons = epsilons; + ref epsilon = + vector_elem(epsilons,epsilons->len-1); + epsilon->lin = lin; + epsilon->seq_index = seq_index; + } else { PgfSymbol sym = *vector_elem(&seq->syms,dot); switch (ref::get_tag(sym)) { case PgfSymbolCat::tag: {