mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 19:42:50 -06:00
bugfix in the parser for lexical lookup
This commit is contained in:
@@ -1105,9 +1105,7 @@ pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state,
|
|||||||
if (seq->idx != NULL) {
|
if (seq->idx != NULL) {
|
||||||
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
|
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
|
||||||
entry->idx = seq->idx;
|
entry->idx = seq->idx;
|
||||||
entry->offset =
|
entry->offset = (size_t) (current - ps->sentence);
|
||||||
(gu_seq_length(seq->syms) == 0) ? state->start_offset
|
|
||||||
: (size_t) (current - ps->sentence);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (len+1 <= max)
|
if (len+1 <= max)
|
||||||
@@ -1179,9 +1177,23 @@ pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
|
|||||||
if (ps->before == NULL && start_offset == 0)
|
if (ps->before == NULL && start_offset == 0)
|
||||||
state->needs_bind = false;
|
state->needs_bind = false;
|
||||||
|
|
||||||
pgf_parsing_lookahead(ps, state,
|
if (gu_seq_length(ps->concr->sequences) > 0) {
|
||||||
0, gu_seq_length(ps->concr->sequences)-1,
|
// Add epsilon lexical rules to the bottom up index
|
||||||
0, strlen(ps->sentence)-state->end_offset);
|
PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, 0);
|
||||||
|
if (gu_seq_length(seq->syms) == 0 && seq->idx != NULL) {
|
||||||
|
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
|
||||||
|
entry->idx = seq->idx;
|
||||||
|
entry->offset = state->start_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add non-epsilon lexical rules to the bottom up index
|
||||||
|
if (!state->needs_bind) {
|
||||||
|
pgf_parsing_lookahead(ps, state,
|
||||||
|
0, gu_seq_length(ps->concr->sequences)-1,
|
||||||
|
1, strlen(ps->sentence)-state->end_offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
*pstate = state;
|
*pstate = state;
|
||||||
|
|
||||||
@@ -1269,44 +1281,43 @@ pgf_parsing_td_predict(PgfParsing* ps,
|
|||||||
pgf_parsing_production(ps, ps->before, conts, prod);
|
pgf_parsing_production(ps, ps->before, conts, prod);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ps->before->needs_bind) {
|
// Bottom-up prediction for lexical and epsilon rules
|
||||||
// Bottom-up prediction for lexical and epsilon rules
|
size_t n_idcs = gu_buf_length(ps->before->lexicon_idx);
|
||||||
size_t n_idcs = gu_buf_length(ps->before->lexicon_idx);
|
for (size_t i = 0; i < n_idcs; i++) {
|
||||||
for (size_t i = 0; i < n_idcs; i++) {
|
PgfLexiconIdxEntry* lentry =
|
||||||
PgfLexiconIdxEntry* lentry =
|
gu_buf_index(ps->before->lexicon_idx, PgfLexiconIdxEntry, i);
|
||||||
gu_buf_index(ps->before->lexicon_idx, PgfLexiconIdxEntry, i);
|
|
||||||
PgfProductionIdxEntry key;
|
|
||||||
key.ccat = ccat;
|
|
||||||
key.lin_idx = lin_idx;
|
|
||||||
key.papp = NULL;
|
|
||||||
PgfProductionIdxEntry* value =
|
|
||||||
gu_seq_binsearch(gu_buf_data_seq(lentry->idx),
|
|
||||||
pgf_production_idx_entry_order,
|
|
||||||
PgfProductionIdxEntry, &key);
|
|
||||||
|
|
||||||
if (value != NULL) {
|
PgfProductionIdxEntry key;
|
||||||
pgf_parsing_predict_lexeme(ps, conts, value, lentry->offset);
|
key.ccat = ccat;
|
||||||
|
key.lin_idx = lin_idx;
|
||||||
|
key.papp = NULL;
|
||||||
|
PgfProductionIdxEntry* value =
|
||||||
|
gu_seq_binsearch(gu_buf_data_seq(lentry->idx),
|
||||||
|
pgf_production_idx_entry_order,
|
||||||
|
PgfProductionIdxEntry, &key);
|
||||||
|
|
||||||
PgfProductionIdxEntry* start =
|
if (value != NULL) {
|
||||||
gu_buf_data(lentry->idx);
|
pgf_parsing_predict_lexeme(ps, conts, value, lentry->offset);
|
||||||
PgfProductionIdxEntry* end =
|
|
||||||
start + gu_buf_length(lentry->idx)-1;
|
|
||||||
|
|
||||||
PgfProductionIdxEntry* left = value-1;
|
PgfProductionIdxEntry* start =
|
||||||
while (left >= start &&
|
gu_buf_data(lentry->idx);
|
||||||
value->ccat->fid == left->ccat->fid &&
|
PgfProductionIdxEntry* end =
|
||||||
value->lin_idx == left->lin_idx) {
|
start + gu_buf_length(lentry->idx)-1;
|
||||||
pgf_parsing_predict_lexeme(ps, conts, left, lentry->offset);
|
|
||||||
left--;
|
|
||||||
}
|
|
||||||
|
|
||||||
PgfProductionIdxEntry* right = value+1;
|
PgfProductionIdxEntry* left = value-1;
|
||||||
while (right <= end &&
|
while (left >= start &&
|
||||||
value->ccat->fid == right->ccat->fid &&
|
value->ccat->fid == left->ccat->fid &&
|
||||||
value->lin_idx == right->lin_idx) {
|
value->lin_idx == left->lin_idx) {
|
||||||
pgf_parsing_predict_lexeme(ps, conts, right, lentry->offset);
|
pgf_parsing_predict_lexeme(ps, conts, left, lentry->offset);
|
||||||
right++;
|
left--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PgfProductionIdxEntry* right = value+1;
|
||||||
|
while (right <= end &&
|
||||||
|
value->ccat->fid == right->ccat->fid &&
|
||||||
|
value->lin_idx == right->lin_idx) {
|
||||||
|
pgf_parsing_predict_lexeme(ps, conts, right, lentry->offset);
|
||||||
|
right++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user