1
0
forked from GitHub/gf-core

in the statistical parser: move the outside probability from the parse items to their continuation. this makes the value slot shared between many items

This commit is contained in:
kr.angelov
2012-11-12 13:43:43 +00:00
parent 56f3ff8202
commit c28056c4e5

View File

@@ -26,6 +26,7 @@ struct PgfItemConts {
#ifdef PGF_PARSER_DEBUG #ifdef PGF_PARSER_DEBUG
unsigned short offset; unsigned short offset;
#endif #endif
prob_t outside_prob;
PgfItemBuf* items; PgfItemBuf* items;
int ref_count; // how many items point to this cont? int ref_count; // how many items point to this cont?
}; };
@@ -135,7 +136,6 @@ struct PgfItem {
uint8_t tok_idx; uint8_t tok_idx;
uint8_t alt; uint8_t alt;
prob_t inside_prob; prob_t inside_prob;
prob_t outside_prob;
}; };
GU_DEFINE_TYPE(PgfLeftcornerCatIdx, GuMap, GU_DEFINE_TYPE(PgfLeftcornerCatIdx, GuMap,
@@ -430,8 +430,8 @@ pgf_print_item(PgfItem* item, int offset, GuWriter* wtr, GuExn* err, GuPool* poo
pgf_print_item_seq(item, wtr, err, pool); pgf_print_item_seq(item, wtr, err, pool);
gu_printf(wtr, err, "; %f+%f=%f]\n", gu_printf(wtr, err, "; %f+%f=%f]\n",
item->inside_prob, item->inside_prob,
item->outside_prob, item->conts->outside_prob,
item->inside_prob+item->outside_prob); item->inside_prob+item->conts->outside_prob);
} }
#endif #endif
@@ -441,8 +441,8 @@ cmp_item_prob(GuOrder* self, const void* a, const void* b)
PgfItem *item1 = *((PgfItem **) a); PgfItem *item1 = *((PgfItem **) a);
PgfItem *item2 = *((PgfItem **) b); PgfItem *item2 = *((PgfItem **) b);
prob_t prob1 = item1->inside_prob + item1->outside_prob; prob_t prob1 = item1->inside_prob + item1->conts->outside_prob;
prob_t prob2 = item2->inside_prob + item2->outside_prob; prob_t prob2 = item2->inside_prob + item2->conts->outside_prob;
if (prob1 < prob2) if (prob1 < prob2)
return -1; return -1;
@@ -571,7 +571,6 @@ pgf_item_set_curr_symbol(PgfItem* item, GuPool* pool)
static PgfItem* static PgfItem*
pgf_new_item(PgfItemConts* conts, PgfProduction prod, pgf_new_item(PgfItemConts* conts, PgfProduction prod,
prob_t delta_prob,
GuPool* pool, PgfParsing* ps) GuPool* pool, PgfParsing* ps)
{ {
PgfItem* item; PgfItem* item;
@@ -641,16 +640,6 @@ pgf_new_item(PgfItemConts* conts, PgfProduction prod,
conts->ref_count++; conts->ref_count++;
item->outside_prob = 0;
if (gu_buf_length(conts->items) > 0) {
PgfItem* best_cont = gu_buf_get(conts->items, PgfItem*, 0);
if (best_cont != NULL)
item->outside_prob =
best_cont->inside_prob-conts->ccat->viterbi_prob+
best_cont->outside_prob;
}
item->outside_prob += delta_prob;
pgf_item_set_curr_symbol(item, pool); pgf_item_set_curr_symbol(item, pool);
#ifdef PGF_COUNTS_DEBUG #ifdef PGF_COUNTS_DEBUG
@@ -841,11 +830,10 @@ pgf_parsing_combine(PgfParseState* before, PgfParseState* after,
static void static void
pgf_parsing_production(PgfParseState* state, pgf_parsing_production(PgfParseState* state,
PgfItemConts* conts, PgfProduction prod, PgfItemConts* conts, PgfProduction prod)
prob_t delta_prob)
{ {
PgfItem* item = PgfItem* item =
pgf_new_item(conts, prod, delta_prob, state->pool, state->ps); pgf_new_item(conts, prod, state->pool, state->ps);
gu_buf_heap_push(state->agenda, &pgf_item_prob_order, &item); gu_buf_heap_push(state->agenda, &pgf_item_prob_order, &item);
} }
@@ -998,7 +986,7 @@ pgf_parsing_complete(PgfParseState* before, PgfParseState* after,
* production immediately to the agenda, * production immediately to the agenda,
* i.e. process it. */ * i.e. process it. */
if (conts2) { if (conts2) {
pgf_parsing_production(before, conts2, prod, 0); pgf_parsing_production(before, conts2, prod);
} }
} }
@@ -1017,7 +1005,7 @@ pgf_parsing_complete(PgfParseState* before, PgfParseState* after,
* production immediately to the agenda, * production immediately to the agenda,
* i.e. process it. */ * i.e. process it. */
if (conts2) { if (conts2) {
pgf_parsing_production(state, conts2, prod, 0); pgf_parsing_production(state, conts2, prod);
} }
} }
@@ -1097,19 +1085,24 @@ pgf_parsing_td_predict(PgfParseState* before, PgfParseState* after,
* of this category at the current position, * of this category at the current position,
* so predict it. */ * so predict it. */
conts->outside_prob =
item->inside_prob-conts->ccat->viterbi_prob+
item->conts->outside_prob +
delta_prob;
// Top-down prediction for syntactic rules // Top-down prediction for syntactic rules
PgfProductionSeq prods = ccat->prods; PgfProductionSeq prods = ccat->prods;
for (size_t i = 0; i < ccat->n_synprods; i++) { for (size_t i = 0; i < ccat->n_synprods; i++) {
PgfProduction prod = PgfProduction prod =
gu_seq_get(prods, PgfProduction, i); gu_seq_get(prods, PgfProduction, i);
pgf_parsing_production(before, conts, prod, delta_prob); pgf_parsing_production(before, conts, prod);
} }
if (ccat->cnccat->abscat->meta_prob != INFINITY && if (ccat->cnccat->abscat->meta_prob != INFINITY &&
ccat->conts == NULL /* grammar defined ccat */) { ccat->conts == NULL /* grammar defined ccat */) {
// Top-down prediction for meta rules // Top-down prediction for meta rules
PgfItem *item = PgfItem *item =
pgf_new_item(conts, before->ps->meta_prod, 0, before->pool, before->ps); pgf_new_item(conts, before->ps->meta_prod, before->pool, before->ps);
item->inside_prob = item->inside_prob =
ccat->cnccat->abscat->meta_prob; ccat->cnccat->abscat->meta_prob;
gu_buf_heap_push(before->agenda, &pgf_item_prob_order, &item); gu_buf_heap_push(before->agenda, &pgf_item_prob_order, &item);
@@ -1127,7 +1120,7 @@ pgf_parsing_td_predict(PgfParseState* before, PgfParseState* after,
PgfProduction prod = PgfProduction prod =
gu_seq_get(tok_prods, PgfProduction, i); gu_seq_get(tok_prods, PgfProduction, i);
pgf_parsing_production(before, conts, prod, 0); pgf_parsing_production(before, conts, prod);
} }
} }
} }
@@ -1143,7 +1136,7 @@ pgf_parsing_td_predict(PgfParseState* before, PgfParseState* after,
PgfProduction prod = PgfProduction prod =
gu_seq_get(eps_prods, PgfProduction, i); gu_seq_get(eps_prods, PgfProduction, i);
pgf_parsing_production(before, conts, prod, 0); pgf_parsing_production(before, conts, prod);
} }
} }
} else { } else {
@@ -1351,7 +1344,7 @@ pgf_parsing_symbol(PgfParseState* before, PgfParseState* after,
pext->ep = NULL; pext->ep = NULL;
pext->lins = gu_null_seq; pext->lins = gu_null_seq;
pgf_parsing_production(before, conts, prod, 0); pgf_parsing_production(before, conts, prod);
} }
} else { } else {
/* If it has already been completed, combine. */ /* If it has already been completed, combine. */
@@ -1567,7 +1560,7 @@ pgf_parsing_proceed(PgfParseState* state, void** output) {
while (st != NULL) { while (st != NULL) {
if (gu_buf_length(st->agenda) > 0) { if (gu_buf_length(st->agenda) > 0) {
PgfItem* item = gu_buf_get(st->agenda, PgfItem*, 0); PgfItem* item = gu_buf_get(st->agenda, PgfItem*, 0);
prob_t item_prob = item->inside_prob+item->outside_prob; prob_t item_prob = item->inside_prob+item->conts->outside_prob;
if (item_prob < best_prob) { if (item_prob < best_prob) {
best_prob = item_prob; best_prob = item_prob;
before = st; before = st;
@@ -1926,12 +1919,12 @@ pgf_parser_init_state(PgfConcr* concr, PgfCId cat, size_t lin_idx, GuPool* pool)
PgfProduction prod = PgfProduction prod =
gu_seq_get(ccat->prods, PgfProduction, i); gu_seq_get(ccat->prods, PgfProduction, i);
PgfItem* item = PgfItem* item =
pgf_new_item(conts, prod, 0, pool, ps); pgf_new_item(conts, prod, pool, ps);
gu_buf_heap_push(state->agenda, &pgf_item_prob_order, &item); gu_buf_heap_push(state->agenda, &pgf_item_prob_order, &item);
} }
PgfItem *item = PgfItem *item =
pgf_new_item(conts, ps->meta_prod, 0, pool, ps); pgf_new_item(conts, ps->meta_prod, pool, ps);
item->inside_prob = item->inside_prob =
ccat->cnccat->abscat->meta_prob; ccat->cnccat->abscat->meta_prob;
gu_buf_heap_push(state->agenda, &pgf_item_prob_order, &item); gu_buf_heap_push(state->agenda, &pgf_item_prob_order, &item);
@@ -2251,7 +2244,7 @@ pgf_parser_leftcorner_iter_cats(GuMapItor* fn, const void* key, void* value, GuE
PgfItemConts* conts = PgfItemConts* conts =
pgf_parsing_get_conts(clo->conts_map, ccat, lin_idx, 0, clo->tmp_pool); pgf_parsing_get_conts(clo->conts_map, ccat, lin_idx, 0, clo->tmp_pool);
PgfItem* item = PgfItem* item =
pgf_new_item(conts, prod, 0, clo->tmp_pool, NULL); pgf_new_item(conts, prod, clo->tmp_pool, NULL);
pgf_parser_leftcorner_item(clo, item); pgf_parser_leftcorner_item(clo, item);
} }