mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 19:42:50 -06:00
bugfix in the robust parser
This commit is contained in:
@@ -997,15 +997,18 @@ pgf_parsing_new_production(PgfItem* item, PgfExprProb *ep, GuPool *pool)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_parsing_add_production(PgfParsing* ps,
|
pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
|
||||||
PgfParseState* before, PgfParseState* after,
|
|
||||||
PgfItemConts* conts, PgfProduction prod,
|
|
||||||
prob_t inside_prob)
|
|
||||||
{
|
{
|
||||||
PgfCCat* tmp_ccat = pgf_parsing_get_completed(before, conts);
|
PgfProduction prod =
|
||||||
|
pgf_parsing_new_production(item, ep, ps->pool);
|
||||||
|
#ifdef PGF_COUNTS_DEBUG
|
||||||
|
ps->prod_full_count++;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
PgfCCat* tmp_ccat = pgf_parsing_get_completed(ps->before, item->conts);
|
||||||
PgfCCat* ccat = tmp_ccat;
|
PgfCCat* ccat = tmp_ccat;
|
||||||
if (ccat == NULL) {
|
if (ccat == NULL) {
|
||||||
ccat = pgf_parsing_create_completed(ps, before, conts, inside_prob);
|
ccat = pgf_parsing_create_completed(ps, ps->before, item->conts, item->inside_prob);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ccat->prods == NULL || ccat->n_synprods >= gu_seq_length(ccat->prods)) {
|
if (ccat->prods == NULL || ccat->n_synprods >= gu_seq_length(ccat->prods)) {
|
||||||
@@ -1031,7 +1034,7 @@ pgf_parsing_add_production(PgfParsing* ps,
|
|||||||
|
|
||||||
if (tmp_ccat != NULL) {
|
if (tmp_ccat != NULL) {
|
||||||
PgfItemContss* contss =
|
PgfItemContss* contss =
|
||||||
pgf_parsing_get_contss(before, ccat, ps->pool);
|
pgf_parsing_get_contss(ps->before, ccat, ps->pool);
|
||||||
size_t n_contss = gu_seq_length(contss);
|
size_t n_contss = gu_seq_length(contss);
|
||||||
for (size_t i = 0; i < n_contss; i++) {
|
for (size_t i = 0; i < n_contss; i++) {
|
||||||
PgfItemConts* conts2 = gu_seq_get(contss, PgfItemConts*, i);
|
PgfItemConts* conts2 = gu_seq_get(contss, PgfItemConts*, i);
|
||||||
@@ -1041,13 +1044,13 @@ pgf_parsing_add_production(PgfParsing* ps,
|
|||||||
* production immediately to the agenda,
|
* production immediately to the agenda,
|
||||||
* i.e. process it. */
|
* i.e. process it. */
|
||||||
if (conts2) {
|
if (conts2) {
|
||||||
pgf_parsing_production(ps, before, conts2, prod);
|
pgf_parsing_production(ps, ps->before, conts2, prod);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The category has already been created. If it has also been
|
// The category has already been created. If it has also been
|
||||||
// predicted already, then process a new item for this production.
|
// predicted already, then process a new item for this production.
|
||||||
PgfParseState* state = after;
|
PgfParseState* state = ps->after;
|
||||||
while (state != NULL) {
|
while (state != NULL) {
|
||||||
PgfItemContss* contss =
|
PgfItemContss* contss =
|
||||||
pgf_parsing_get_contss(state, ccat, ps->pool);
|
pgf_parsing_get_contss(state, ccat, ps->pool);
|
||||||
@@ -1071,26 +1074,14 @@ pgf_parsing_add_production(PgfParsing* ps,
|
|||||||
pgf_result_production(ps, ccat->answers, prod);
|
pgf_result_production(ps, ccat->answers, prod);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
size_t n_conts = gu_buf_length(conts->items);
|
size_t n_conts = gu_buf_length(item->conts->items);
|
||||||
for (size_t i = 0; i < n_conts; i++) {
|
for (size_t i = 0; i < n_conts; i++) {
|
||||||
PgfItem* cont = gu_buf_get(conts->items, PgfItem*, i);
|
PgfItem* cont = gu_buf_get(item->conts->items, PgfItem*, i);
|
||||||
pgf_parsing_combine(ps, before, after, cont, ccat, conts->lin_idx);
|
pgf_parsing_combine(ps, ps->before, ps->after, cont, ccat, item->conts->lin_idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
|
|
||||||
{
|
|
||||||
PgfProduction prod =
|
|
||||||
pgf_parsing_new_production(item, ep, ps->pool);
|
|
||||||
#ifdef PGF_COUNTS_DEBUG
|
|
||||||
before->ps->prod_full_count++;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
pgf_parsing_add_production(ps, ps->before, ps->after, item->conts, prod, item->inside_prob);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pgf_symbols_cmp(GuString* psent, size_t sent_len, BIND_TYPE* pbind, PgfSymbols* syms)
|
pgf_symbols_cmp(GuString* psent, size_t sent_len, BIND_TYPE* pbind, PgfSymbols* syms)
|
||||||
{
|
{
|
||||||
@@ -1196,7 +1187,7 @@ pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state)
|
|||||||
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
|
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
|
||||||
entry->idx = seq->idx;
|
entry->idx = seq->idx;
|
||||||
entry->bind_type = bind_type;
|
entry->bind_type = bind_type;
|
||||||
entry->offset = (current - ps->sentence);
|
entry->offset = (current - ps->sentence);
|
||||||
}
|
}
|
||||||
i = k+1;
|
i = k+1;
|
||||||
goto next;
|
goto next;
|
||||||
@@ -1315,6 +1306,20 @@ pgf_parsing_add_transition(PgfParsing* ps, PgfToken tok, PgfItem* item)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_parsing_predict_lexeme(PgfParsing* ps,
|
||||||
|
PgfParseState* state, PgfItemConts* conts,
|
||||||
|
PgfProductionIdxEntry* entry)
|
||||||
|
{
|
||||||
|
GuVariantInfo i = { PGF_PRODUCTION_APPLY, entry->papp };
|
||||||
|
PgfProduction prod = gu_variant_close(i);
|
||||||
|
PgfItem* item =
|
||||||
|
pgf_new_item(ps, conts, prod);
|
||||||
|
PgfSymbols* syms = entry->papp->fun->lins[conts->lin_idx]->syms;
|
||||||
|
item->sym_idx = gu_seq_length(syms);
|
||||||
|
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_parsing_td_predict(PgfParsing* ps,
|
pgf_parsing_td_predict(PgfParsing* ps,
|
||||||
PgfItem* item, PgfCCat* ccat, size_t lin_idx)
|
PgfItem* item, PgfCCat* ccat, size_t lin_idx)
|
||||||
@@ -1357,11 +1362,28 @@ pgf_parsing_td_predict(PgfParsing* ps,
|
|||||||
PgfProductionIdxEntry, &key);
|
PgfProductionIdxEntry, &key);
|
||||||
|
|
||||||
if (value != NULL) {
|
if (value != NULL) {
|
||||||
GuVariantInfo i = { PGF_PRODUCTION_APPLY, value->papp };
|
pgf_parsing_predict_lexeme(ps, state, conts, value);
|
||||||
PgfProduction prod = gu_variant_close(i);
|
|
||||||
pgf_parsing_add_production(ps, state, state->next,
|
PgfProductionIdxEntry* start =
|
||||||
conts, prod,
|
gu_buf_data(lentry->idx);
|
||||||
value->papp->fun->absfun->ep.prob);
|
PgfProductionIdxEntry* end =
|
||||||
|
start + gu_buf_length(lentry->idx)-1;
|
||||||
|
|
||||||
|
PgfProductionIdxEntry* left = value-1;
|
||||||
|
while (left >= start &&
|
||||||
|
value->ccat->fid == left->ccat->fid &&
|
||||||
|
value->lin_idx == left->lin_idx) {
|
||||||
|
pgf_parsing_predict_lexeme(ps, state, conts, left);
|
||||||
|
left--;
|
||||||
|
}
|
||||||
|
|
||||||
|
PgfProductionIdxEntry* right = value+1;
|
||||||
|
while (right <= end &&
|
||||||
|
value->ccat->fid == right->ccat->fid &&
|
||||||
|
value->lin_idx == right->lin_idx) {
|
||||||
|
pgf_parsing_predict_lexeme(ps, state, conts, right);
|
||||||
|
right--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2683,7 +2705,20 @@ pgf_parser_index(PgfConcr* concr,
|
|||||||
pgf_parser_index_pre(concr, seq, choice, pool);
|
pgf_parser_index_pre(concr, seq, choice, pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
PgfProductionIdxEntry* entry = gu_buf_extend(seq->idx);
|
size_t i = gu_buf_length(seq->idx);
|
||||||
|
while (i > 0) {
|
||||||
|
PgfProductionIdxEntry* entry =
|
||||||
|
gu_buf_index(seq->idx, PgfProductionIdxEntry, i-1);
|
||||||
|
|
||||||
|
if (entry->ccat->fid < ccat->fid)
|
||||||
|
break;
|
||||||
|
if (entry->lin_idx <= lin_idx)
|
||||||
|
break;
|
||||||
|
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
|
||||||
|
PgfProductionIdxEntry* entry = gu_buf_insert(seq->idx, i);
|
||||||
entry->ccat = ccat;
|
entry->ccat = ccat;
|
||||||
entry->lin_idx = lin_idx;
|
entry->lin_idx = lin_idx;
|
||||||
entry->papp = papp;
|
entry->papp = papp;
|
||||||
|
|||||||
Reference in New Issue
Block a user