1
0
forked from GitHub/gf-core

bugfix in the parser for lexical lookup

This commit is contained in:
krasimir
2015-06-30 12:54:19 +00:00
parent 21df1ed2f5
commit 6f2afdd53e

View File

@@ -1105,9 +1105,7 @@ pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state,
if (seq->idx != NULL) { if (seq->idx != NULL) {
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx); PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
entry->idx = seq->idx; entry->idx = seq->idx;
entry->offset = entry->offset = (size_t) (current - ps->sentence);
(gu_seq_length(seq->syms) == 0) ? state->start_offset
: (size_t) (current - ps->sentence);
} }
if (len+1 <= max) if (len+1 <= max)
@@ -1179,9 +1177,23 @@ pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
if (ps->before == NULL && start_offset == 0) if (ps->before == NULL && start_offset == 0)
state->needs_bind = false; state->needs_bind = false;
pgf_parsing_lookahead(ps, state, if (gu_seq_length(ps->concr->sequences) > 0) {
0, gu_seq_length(ps->concr->sequences)-1, // Add epsilon lexical rules to the bottom up index
0, strlen(ps->sentence)-state->end_offset); PgfSequence* seq = gu_seq_index(ps->concr->sequences, PgfSequence, 0);
if (gu_seq_length(seq->syms) == 0 && seq->idx != NULL) {
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
entry->idx = seq->idx;
entry->offset = state->start_offset;
}
// Add non-epsilon lexical rules to the bottom up index
if (!state->needs_bind) {
pgf_parsing_lookahead(ps, state,
0, gu_seq_length(ps->concr->sequences)-1,
1, strlen(ps->sentence)-state->end_offset);
}
}
*pstate = state; *pstate = state;
@@ -1269,44 +1281,43 @@ pgf_parsing_td_predict(PgfParsing* ps,
pgf_parsing_production(ps, ps->before, conts, prod); pgf_parsing_production(ps, ps->before, conts, prod);
} }
if (!ps->before->needs_bind) { // Bottom-up prediction for lexical and epsilon rules
// Bottom-up prediction for lexical and epsilon rules size_t n_idcs = gu_buf_length(ps->before->lexicon_idx);
size_t n_idcs = gu_buf_length(ps->before->lexicon_idx); for (size_t i = 0; i < n_idcs; i++) {
for (size_t i = 0; i < n_idcs; i++) { PgfLexiconIdxEntry* lentry =
PgfLexiconIdxEntry* lentry = gu_buf_index(ps->before->lexicon_idx, PgfLexiconIdxEntry, i);
gu_buf_index(ps->before->lexicon_idx, PgfLexiconIdxEntry, i);
PgfProductionIdxEntry key;
key.ccat = ccat;
key.lin_idx = lin_idx;
key.papp = NULL;
PgfProductionIdxEntry* value =
gu_seq_binsearch(gu_buf_data_seq(lentry->idx),
pgf_production_idx_entry_order,
PgfProductionIdxEntry, &key);
if (value != NULL) { PgfProductionIdxEntry key;
pgf_parsing_predict_lexeme(ps, conts, value, lentry->offset); key.ccat = ccat;
key.lin_idx = lin_idx;
key.papp = NULL;
PgfProductionIdxEntry* value =
gu_seq_binsearch(gu_buf_data_seq(lentry->idx),
pgf_production_idx_entry_order,
PgfProductionIdxEntry, &key);
PgfProductionIdxEntry* start = if (value != NULL) {
gu_buf_data(lentry->idx); pgf_parsing_predict_lexeme(ps, conts, value, lentry->offset);
PgfProductionIdxEntry* end =
start + gu_buf_length(lentry->idx)-1;
PgfProductionIdxEntry* left = value-1; PgfProductionIdxEntry* start =
while (left >= start && gu_buf_data(lentry->idx);
value->ccat->fid == left->ccat->fid && PgfProductionIdxEntry* end =
value->lin_idx == left->lin_idx) { start + gu_buf_length(lentry->idx)-1;
pgf_parsing_predict_lexeme(ps, conts, left, lentry->offset);
left--;
}
PgfProductionIdxEntry* right = value+1; PgfProductionIdxEntry* left = value-1;
while (right <= end && while (left >= start &&
value->ccat->fid == right->ccat->fid && value->ccat->fid == left->ccat->fid &&
value->lin_idx == right->lin_idx) { value->lin_idx == left->lin_idx) {
pgf_parsing_predict_lexeme(ps, conts, right, lentry->offset); pgf_parsing_predict_lexeme(ps, conts, left, lentry->offset);
right++; left--;
} }
PgfProductionIdxEntry* right = value+1;
while (right <= end &&
value->ccat->fid == right->ccat->fid &&
value->lin_idx == right->lin_idx) {
pgf_parsing_predict_lexeme(ps, conts, right, lentry->offset);
right++;
} }
} }
} }