forked from GitHub/gf-core
the statistical parser now uses a baseline lexical estimation of the beam size
This commit is contained in:
@@ -55,6 +55,7 @@ typedef struct {
|
|||||||
int prod_full_count;
|
int prod_full_count;
|
||||||
#endif
|
#endif
|
||||||
PgfItem* free_item;
|
PgfItem* free_item;
|
||||||
|
prob_t beam_size;
|
||||||
} PgfParsing;
|
} PgfParsing;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@@ -75,6 +76,7 @@ GU_DEFINE_TYPE(PgfProductionIdx, GuMap,
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
PgfToken tok;
|
PgfToken tok;
|
||||||
PgfProductionIdx* lexicon_idx;
|
PgfProductionIdx* lexicon_idx;
|
||||||
|
prob_t lexical_prob;
|
||||||
} PgfTokenState;
|
} PgfTokenState;
|
||||||
|
|
||||||
struct PgfParseState {
|
struct PgfParseState {
|
||||||
@@ -1483,8 +1485,12 @@ pgf_parsing_proceed(PgfParseState* state) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
delta_prob +=
|
prob_t state_delta =
|
||||||
(st->viterbi_prob-(st->next ? st->next->viterbi_prob : 0))*0.95;
|
(st->viterbi_prob-(st->next ? st->next->viterbi_prob : 0))*
|
||||||
|
state->ps->beam_size;
|
||||||
|
prob_t lexical_prob =
|
||||||
|
st->ts ? st->ts->lexical_prob : 0;
|
||||||
|
delta_prob += fmax(state_delta, lexical_prob);
|
||||||
st = st->next;
|
st = st->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1532,6 +1538,7 @@ pgf_new_parsing(PgfConcr* concr, GuPool* pool)
|
|||||||
ps->prod_full_count = 0;
|
ps->prod_full_count = 0;
|
||||||
#endif
|
#endif
|
||||||
ps->free_item = NULL;
|
ps->free_item = NULL;
|
||||||
|
ps->beam_size = 0.95;
|
||||||
|
|
||||||
PgfExprMeta *expr_meta =
|
PgfExprMeta *expr_meta =
|
||||||
gu_new_variant(PGF_EXPR_META,
|
gu_new_variant(PGF_EXPR_META,
|
||||||
@@ -1569,6 +1576,38 @@ pgf_new_parse_state(PgfParsing* ps,
|
|||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
GuMapItor fn;
|
||||||
|
PgfTokenState* ts;
|
||||||
|
} PgfLexiconFn;
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_parser_compute_lexicon_prob(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||||
|
{
|
||||||
|
PgfTokenState* ts = ((PgfLexiconFn*) fn)->ts;
|
||||||
|
PgfProductionSeq prods = *((PgfProductionSeq*) value);
|
||||||
|
|
||||||
|
if (gu_seq_is_null(prods))
|
||||||
|
return;
|
||||||
|
|
||||||
|
size_t n_prods = gu_seq_length(prods);
|
||||||
|
for (size_t i = 0; i < n_prods; i++) {
|
||||||
|
PgfProduction prod =
|
||||||
|
gu_seq_get(prods, PgfProduction, i);
|
||||||
|
|
||||||
|
GuVariantInfo pi = gu_variant_open(prod);
|
||||||
|
switch (pi.tag) {
|
||||||
|
case PGF_PRODUCTION_APPLY: {
|
||||||
|
PgfProductionApply* papp = pi.data;
|
||||||
|
if (ts->lexical_prob > papp->fun->ep->prob) {
|
||||||
|
ts->lexical_prob = papp->fun->ep->prob;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static PgfTokenState*
|
static PgfTokenState*
|
||||||
pgf_new_token_state(PgfConcr *concr, PgfToken tok, GuPool* pool)
|
pgf_new_token_state(PgfConcr *concr, PgfToken tok, GuPool* pool)
|
||||||
{
|
{
|
||||||
@@ -1576,6 +1615,11 @@ pgf_new_token_state(PgfConcr *concr, PgfToken tok, GuPool* pool)
|
|||||||
ts->tok = tok;
|
ts->tok = tok;
|
||||||
ts->lexicon_idx = gu_map_get(concr->leftcorner_tok_idx,
|
ts->lexicon_idx = gu_map_get(concr->leftcorner_tok_idx,
|
||||||
&tok, PgfProductionIdx*);
|
&tok, PgfProductionIdx*);
|
||||||
|
ts->lexical_prob = INFINITY;
|
||||||
|
PgfLexiconFn clo = { { pgf_parser_compute_lexicon_prob }, ts };
|
||||||
|
gu_map_iter(ts->lexicon_idx, &clo.fn, NULL);
|
||||||
|
if (ts->lexical_prob == INFINITY)
|
||||||
|
ts->lexical_prob = 0;
|
||||||
return ts;
|
return ts;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1969,6 +2013,12 @@ pgf_parser_init_state(PgfConcr* concr, PgfCId cat, size_t lin_idx, GuPool* pool)
|
|||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_parser_set_beam_size(PgfParseState* state, double beam_size)
|
||||||
|
{
|
||||||
|
state->ps->beam_size = beam_size;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
pgf_parser_add_literal(PgfConcr *concr, PgfCId cat,
|
pgf_parser_add_literal(PgfConcr *concr, PgfCId cat,
|
||||||
PgfLiteralCallback* callback)
|
PgfLiteralCallback* callback)
|
||||||
|
|||||||
@@ -67,6 +67,9 @@ pgf_parser_next_state(PgfParseState* prev, PgfToken tok,
|
|||||||
* the pool used to create \parse.
|
* the pool used to create \parse.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_parser_set_beam_size(PgfParseState* state, double beam_size);
|
||||||
|
|
||||||
void
|
void
|
||||||
pgf_parser_add_literal(PgfConcr *concr, PgfCId cat,
|
pgf_parser_add_literal(PgfConcr *concr, PgfCId cat,
|
||||||
PgfLiteralCallback* callback);
|
PgfLiteralCallback* callback);
|
||||||
|
|||||||
Reference in New Issue
Block a user