forked from GitHub/gf-core
first version of a parser which returns chunks in case of failure
This commit is contained in:
@@ -344,8 +344,9 @@ struct PgfCCat {
|
|||||||
PgfCncFuns* linrefs;
|
PgfCncFuns* linrefs;
|
||||||
size_t n_synprods;
|
size_t n_synprods;
|
||||||
PgfProductionSeq* prods;
|
PgfProductionSeq* prods;
|
||||||
float viterbi_prob;
|
prob_t viterbi_prob;
|
||||||
int fid;
|
int fid;
|
||||||
|
int chunk_count;
|
||||||
PgfItemConts* conts;
|
PgfItemConts* conts;
|
||||||
struct PgfAnswers* answers;
|
struct PgfAnswers* answers;
|
||||||
GuFinalizer fin[0];
|
GuFinalizer fin[0];
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ struct PgfItemConts {
|
|||||||
typedef GuSeq PgfItemContss;
|
typedef GuSeq PgfItemContss;
|
||||||
typedef GuMap PgfContsMap;
|
typedef GuMap PgfContsMap;
|
||||||
typedef GuMap PgfGenCatMap;
|
typedef GuMap PgfGenCatMap;
|
||||||
|
typedef GuMap PgfChunksMap;
|
||||||
|
|
||||||
typedef GuBuf PgfCCatBuf;
|
typedef GuBuf PgfCCatBuf;
|
||||||
|
|
||||||
@@ -48,7 +49,6 @@ typedef struct {
|
|||||||
#ifdef PGF_COUNTS_DEBUG
|
#ifdef PGF_COUNTS_DEBUG
|
||||||
int item_full_count;
|
int item_full_count;
|
||||||
int item_real_count;
|
int item_real_count;
|
||||||
int cont_full_count;
|
|
||||||
int ccat_full_count;
|
int ccat_full_count;
|
||||||
int prod_full_count;
|
int prod_full_count;
|
||||||
#endif
|
#endif
|
||||||
@@ -67,6 +67,7 @@ struct PgfParseState {
|
|||||||
PgfItemBuf* agenda;
|
PgfItemBuf* agenda;
|
||||||
PgfContsMap* conts_map;
|
PgfContsMap* conts_map;
|
||||||
PgfGenCatMap* generated_cats;
|
PgfGenCatMap* generated_cats;
|
||||||
|
PgfChunksMap* chunks_map;
|
||||||
|
|
||||||
bool needs_bind;
|
bool needs_bind;
|
||||||
size_t start_offset;
|
size_t start_offset;
|
||||||
@@ -78,14 +79,21 @@ struct PgfParseState {
|
|||||||
typedef struct PgfAnswers {
|
typedef struct PgfAnswers {
|
||||||
GuBuf* conts;
|
GuBuf* conts;
|
||||||
GuBuf* exprs;
|
GuBuf* exprs;
|
||||||
|
PgfCCat* ccat;
|
||||||
prob_t outside_prob;
|
prob_t outside_prob;
|
||||||
} PgfAnswers;
|
} PgfAnswers;
|
||||||
|
|
||||||
|
#define PGF_EXPR_CHUNK_STATE ((size_t) -1)
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PgfAnswers* answers;
|
PgfAnswers* answers;
|
||||||
PgfExprProb ep;
|
PgfExprProb ep;
|
||||||
PgfPArgs* args;
|
union {
|
||||||
size_t arg_idx;
|
PgfPArgs* args;
|
||||||
|
PgfParseState* state;
|
||||||
|
};
|
||||||
|
size_t arg_idx; // if the value is PGF_EXPR_CHUNK_STATE, then
|
||||||
|
// the relevant value above is state, not args.
|
||||||
} PgfExprState;
|
} PgfExprState;
|
||||||
|
|
||||||
typedef struct PgfItemBase PgfItemBase;
|
typedef struct PgfItemBase PgfItemBase;
|
||||||
@@ -371,7 +379,9 @@ static void
|
|||||||
pgf_print_expr_state(PgfExprState* st,
|
pgf_print_expr_state(PgfExprState* st,
|
||||||
GuOut* out, GuExn* err, GuBuf* stack)
|
GuOut* out, GuExn* err, GuBuf* stack)
|
||||||
{
|
{
|
||||||
gu_buf_push(stack, int, (gu_seq_length(st->args) - st->arg_idx - 1));
|
gu_buf_push(stack, int,
|
||||||
|
(st->arg_idx != PGF_EXPR_CHUNK_STATE) ?
|
||||||
|
(gu_seq_length(st->args) - st->arg_idx - 1) : 0);
|
||||||
|
|
||||||
if (gu_buf_length(st->answers->conts) > 0) {
|
if (gu_buf_length(st->answers->conts) > 0) {
|
||||||
PgfExprState* cont = gu_buf_get(st->answers->conts, PgfExprState*, 0);
|
PgfExprState* cont = gu_buf_get(st->answers->conts, PgfExprState*, 0);
|
||||||
@@ -380,6 +390,10 @@ pgf_print_expr_state(PgfExprState* st,
|
|||||||
}
|
}
|
||||||
|
|
||||||
gu_puts(" (", out, err);
|
gu_puts(" (", out, err);
|
||||||
|
if (st->answers->ccat != NULL) {
|
||||||
|
pgf_print_fid(st->answers->ccat->fid,out,err);
|
||||||
|
gu_puts(":", out, err);
|
||||||
|
}
|
||||||
if (gu_variant_is_null(st->ep.expr))
|
if (gu_variant_is_null(st->ep.expr))
|
||||||
gu_puts("_", out, err);
|
gu_puts("_", out, err);
|
||||||
else
|
else
|
||||||
@@ -395,7 +409,8 @@ pgf_print_expr_state0(PgfExprState* st,
|
|||||||
st->answers->outside_prob,
|
st->answers->outside_prob,
|
||||||
st->answers->outside_prob+st->ep.prob);
|
st->answers->outside_prob+st->ep.prob);
|
||||||
|
|
||||||
size_t n_args = gu_seq_length(st->args);
|
size_t n_args = (st->arg_idx == PGF_EXPR_CHUNK_STATE) ?
|
||||||
|
0 : gu_seq_length(st->args);
|
||||||
|
|
||||||
GuBuf* stack = gu_new_buf(int, tmp_pool);
|
GuBuf* stack = gu_new_buf(int, tmp_pool);
|
||||||
if (n_args > 0)
|
if (n_args > 0)
|
||||||
@@ -423,7 +438,7 @@ pgf_print_expr_state0(PgfExprState* st,
|
|||||||
int count = gu_buf_get(stack, int, i);
|
int count = gu_buf_get(stack, int, i);
|
||||||
while (count-- > 0)
|
while (count-- > 0)
|
||||||
gu_puts(" ?", out, err);
|
gu_puts(" ?", out, err);
|
||||||
|
|
||||||
gu_puts(")", out, err);
|
gu_puts(")", out, err);
|
||||||
}
|
}
|
||||||
gu_puts("\n", out, err);
|
gu_puts("\n", out, err);
|
||||||
@@ -508,12 +523,6 @@ pgf_parsing_get_conts(PgfParseState* state,
|
|||||||
conts->outside_prob = 0;
|
conts->outside_prob = 0;
|
||||||
conts->ref_count = 0;
|
conts->ref_count = 0;
|
||||||
gu_seq_get(contss, PgfItemConts*, lin_idx) = conts;
|
gu_seq_get(contss, PgfItemConts*, lin_idx) = conts;
|
||||||
|
|
||||||
#ifdef PGF_COUNTS_DEBUG
|
|
||||||
if (state != NULL) {
|
|
||||||
state->ps->cont_full_count++;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
return conts;
|
return conts;
|
||||||
}
|
}
|
||||||
@@ -527,7 +536,7 @@ gu_ccat_fini(GuFinalizer* fin)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static PgfCCat*
|
static PgfCCat*
|
||||||
pgf_parsing_create_completed(PgfParsing* ps, PgfParseState* state,
|
pgf_parsing_create_completed(PgfParsing* ps, PgfParseState* state,
|
||||||
PgfItemConts* conts,
|
PgfItemConts* conts,
|
||||||
prob_t viterbi_prob)
|
prob_t viterbi_prob)
|
||||||
{
|
{
|
||||||
@@ -537,17 +546,20 @@ pgf_parsing_create_completed(PgfParsing* ps, PgfParseState* state,
|
|||||||
cat->linrefs = conts->ccat->linrefs;
|
cat->linrefs = conts->ccat->linrefs;
|
||||||
cat->viterbi_prob = viterbi_prob;
|
cat->viterbi_prob = viterbi_prob;
|
||||||
cat->fid = ps->max_fid++;
|
cat->fid = ps->max_fid++;
|
||||||
|
cat->chunk_count = (conts->ccat->fid == -5 ||
|
||||||
|
conts->state->end_offset == state->end_offset);
|
||||||
cat->conts = conts;
|
cat->conts = conts;
|
||||||
cat->answers = NULL;
|
cat->answers = NULL;
|
||||||
cat->prods = NULL;
|
cat->prods = NULL;
|
||||||
cat->n_synprods = 0;
|
cat->n_synprods = 0;
|
||||||
|
|
||||||
gu_map_put(state->generated_cats, conts, PgfCCat*, cat);
|
gu_map_put(state->generated_cats, conts, PgfCCat*, cat);
|
||||||
|
|
||||||
cat->fin[0].fn = gu_ccat_fini;
|
cat->fin[0].fn = gu_ccat_fini;
|
||||||
gu_pool_finally(ps->pool, cat->fin);
|
gu_pool_finally(ps->pool, cat->fin);
|
||||||
|
|
||||||
#ifdef PGF_COUNTS_DEBUG
|
#ifdef PGF_COUNTS_DEBUG
|
||||||
state->ps->ccat_full_count++;
|
ps->ccat_full_count++;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return cat;
|
return cat;
|
||||||
@@ -589,6 +601,19 @@ pgf_item_set_curr_symbol(PgfItem* item, GuPool* pool)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PGF_PRODUCTION_EXTERN: {
|
case PGF_PRODUCTION_EXTERN: {
|
||||||
|
PgfProductionExtern* pext = i.data;
|
||||||
|
|
||||||
|
PgfSymbols* syms;
|
||||||
|
if (pext->lins != NULL &&
|
||||||
|
(syms = gu_seq_get(pext->lins,PgfSymbols*,item->conts->lin_idx)) != NULL) {
|
||||||
|
if (item->sym_idx == gu_seq_length(syms)) {
|
||||||
|
item->curr_sym = gu_null_variant;
|
||||||
|
} else {
|
||||||
|
item->curr_sym = gu_seq_get(syms, PgfSymbol, item->sym_idx);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
item->curr_sym = gu_null_variant;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@@ -662,7 +687,7 @@ static PgfItem*
|
|||||||
pgf_item_copy(PgfItem* item, PgfParsing* ps)
|
pgf_item_copy(PgfItem* item, PgfParsing* ps)
|
||||||
{
|
{
|
||||||
PgfItem* copy;
|
PgfItem* copy;
|
||||||
if (ps == NULL || ps->free_item == NULL)
|
if (ps->free_item == NULL)
|
||||||
copy = gu_new(PgfItem, ps->pool);
|
copy = gu_new(PgfItem, ps->pool);
|
||||||
else {
|
else {
|
||||||
copy = ps->free_item;
|
copy = ps->free_item;
|
||||||
@@ -671,10 +696,8 @@ pgf_item_copy(PgfItem* item, PgfParsing* ps)
|
|||||||
memcpy(copy, item, sizeof(PgfItem));
|
memcpy(copy, item, sizeof(PgfItem));
|
||||||
|
|
||||||
#ifdef PGF_COUNTS_DEBUG
|
#ifdef PGF_COUNTS_DEBUG
|
||||||
if (ps != NULL) {
|
ps->item_full_count++;
|
||||||
ps->item_full_count++;
|
ps->item_real_count++;
|
||||||
ps->item_real_count++;
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
item->conts->ref_count++;
|
item->conts->ref_count++;
|
||||||
@@ -747,13 +770,17 @@ pgf_item_free(PgfParsing* ps, PgfItem* item)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_result_predict(PgfParsing* ps,
|
pgf_result_predict(PgfParsing* ps,
|
||||||
PgfExprState* cont, PgfCCat* ccat);
|
PgfExprState* cont, PgfCCat* ccat,
|
||||||
|
prob_t outside_prob);
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_result_production(PgfParsing* ps,
|
pgf_result_production(PgfParsing* ps,
|
||||||
PgfAnswers* answers, PgfProduction prod);
|
PgfAnswers* answers, PgfProduction prod);
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep);
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_parsing_push_item(PgfParseState* state, PgfItem* item)
|
pgf_parsing_push_item(PgfParseState* state, PgfItem* item)
|
||||||
{
|
{
|
||||||
@@ -859,6 +886,10 @@ pgf_parsing_new_production(PgfItem* item, PgfExprProb *ep, GuPool *pool)
|
|||||||
default:
|
default:
|
||||||
gu_impossible();
|
gu_impossible();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef PGF_COUNTS_DEBUG
|
||||||
|
ps->prod_full_count++;
|
||||||
|
#endif
|
||||||
|
|
||||||
return prod;
|
return prod;
|
||||||
}
|
}
|
||||||
@@ -877,9 +908,6 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
|
|||||||
|
|
||||||
PgfProduction prod =
|
PgfProduction prod =
|
||||||
pgf_parsing_new_production(item, ep, ps->pool);
|
pgf_parsing_new_production(item, ep, ps->pool);
|
||||||
#ifdef PGF_COUNTS_DEBUG
|
|
||||||
ps->prod_full_count++;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
PgfCCat* tmp_ccat = pgf_parsing_get_completed(ps->before, item->conts);
|
PgfCCat* tmp_ccat = pgf_parsing_get_completed(ps->before, item->conts);
|
||||||
PgfCCat* ccat = tmp_ccat;
|
PgfCCat* ccat = tmp_ccat;
|
||||||
@@ -904,7 +932,9 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
|
|||||||
gu_printf(out, err, "; %d; ",
|
gu_printf(out, err, "; %d; ",
|
||||||
item->conts->lin_idx);
|
item->conts->lin_idx);
|
||||||
pgf_print_fid(ccat->fid, out, err);
|
pgf_print_fid(ccat->fid, out, err);
|
||||||
gu_puts("]\n", out, err);
|
gu_puts("] ", out, err);
|
||||||
|
pgf_print_fid(ccat->fid, out, err);
|
||||||
|
gu_printf(out, err, ".chunk_count=%d\n", ccat->chunk_count);
|
||||||
}
|
}
|
||||||
pgf_print_production(ccat->fid, prod, out, err);
|
pgf_print_production(ccat->fid, prod, out, err);
|
||||||
gu_pool_free(tmp_pool);
|
gu_pool_free(tmp_pool);
|
||||||
@@ -913,9 +943,29 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
|
|||||||
if (item->conts->ccat->fid == -5) {
|
if (item->conts->ccat->fid == -5) {
|
||||||
if (ps->before->end_offset == strlen(ps->sentence)) {
|
if (ps->before->end_offset == strlen(ps->sentence)) {
|
||||||
PgfPArg* parg = gu_seq_index(item->args, PgfPArg, 0);
|
PgfPArg* parg = gu_seq_index(item->args, PgfPArg, 0);
|
||||||
pgf_result_predict(ps, NULL, parg->ccat);
|
pgf_result_predict(ps, NULL, parg->ccat, 0);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
} else {
|
||||||
|
size_t i = gu_seq_length(item->args);
|
||||||
|
while (i > 0) {
|
||||||
|
PgfPArg* parg = gu_seq_index(item->args, PgfPArg, i-1);
|
||||||
|
|
||||||
|
if (pgf_parsing_get_completed(ps->before, parg->ccat->conts) != NULL) {
|
||||||
|
parg->ccat->chunk_count++;
|
||||||
|
|
||||||
|
#ifdef PGF_PARSER_DEBUG
|
||||||
|
GuPool* tmp_pool = gu_new_pool();
|
||||||
|
GuOut* out = gu_file_out(stderr, tmp_pool);
|
||||||
|
GuExn* err = gu_exn(tmp_pool);
|
||||||
|
pgf_print_fid(parg->ccat->fid, out, err);
|
||||||
|
gu_printf(out, err, ".chunk_count=%d\n", parg->ccat->chunk_count);
|
||||||
|
gu_pool_free(tmp_pool);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
i--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tmp_ccat != NULL) {
|
if (tmp_ccat != NULL) {
|
||||||
@@ -1022,6 +1072,7 @@ pgf_new_parse_state(PgfParsing* ps, size_t start_offset,
|
|||||||
state->agenda = gu_new_buf(PgfItem*, ps->pool);
|
state->agenda = gu_new_buf(PgfItem*, ps->pool);
|
||||||
state->generated_cats = gu_new_addr_map(PgfItemConts*, PgfCCat*, &gu_null_struct, ps->pool);
|
state->generated_cats = gu_new_addr_map(PgfItemConts*, PgfCCat*, &gu_null_struct, ps->pool);
|
||||||
state->conts_map = gu_new_addr_map(PgfCCat*, PgfItemContss*, &gu_null_struct, ps->pool);
|
state->conts_map = gu_new_addr_map(PgfCCat*, PgfItemContss*, &gu_null_struct, ps->pool);
|
||||||
|
state->chunks_map = NULL;
|
||||||
state->needs_bind = (bind_type == BIND_NONE) &&
|
state->needs_bind = (bind_type == BIND_NONE) &&
|
||||||
(start_offset == end_offset);
|
(start_offset == end_offset);
|
||||||
state->start_offset = start_offset;
|
state->start_offset = start_offset;
|
||||||
@@ -1076,15 +1127,17 @@ pgf_parsing_scan_helper(PgfParsing *ps, PgfParseState* state,
|
|||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
ptrdiff_t len = current.ptr - start.ptr;
|
ptrdiff_t len = current.ptr - start.ptr;
|
||||||
found = true;
|
|
||||||
|
|
||||||
if (min <= len)
|
if (min <= len)
|
||||||
pgf_parsing_scan_helper(ps, state, i, k-1, min, len);
|
if (pgf_parsing_scan_helper(ps, state, i, k-1, min, len))
|
||||||
|
found = true;
|
||||||
|
|
||||||
// Here we do bottom-up prediction for all lexical categories.
|
// Here we do bottom-up prediction for all lexical categories.
|
||||||
// The epsilon productions will be predicted in top-down
|
// The epsilon productions will be predicted in top-down
|
||||||
// fashion while parsing.
|
// fashion while parsing.
|
||||||
if (seq->idx != NULL && len > 0) {
|
if (seq->idx != NULL && len > 0) {
|
||||||
|
found = true;
|
||||||
|
|
||||||
// A new state will mark the end of the current match
|
// A new state will mark the end of the current match
|
||||||
PgfParseState* new_state =
|
PgfParseState* new_state =
|
||||||
pgf_new_parse_state(ps, (size_t) (current.ptr - ps->sentence), BIND_NONE);
|
pgf_new_parse_state(ps, (size_t) (current.ptr - ps->sentence), BIND_NONE);
|
||||||
@@ -1133,7 +1186,9 @@ pgf_parsing_scan_helper(PgfParsing *ps, PgfParseState* state,
|
|||||||
gu_printf(out, err, "; %d; ",
|
gu_printf(out, err, "; %d; ",
|
||||||
conts->lin_idx);
|
conts->lin_idx);
|
||||||
pgf_print_fid(ccat->fid, out, err);
|
pgf_print_fid(ccat->fid, out, err);
|
||||||
gu_puts("]\n", out, err);
|
gu_puts("] ", out, err);
|
||||||
|
pgf_print_fid(ccat->fid, out, err);
|
||||||
|
gu_printf(out, err, ".chunk_count=%d\n", ccat->chunk_count);
|
||||||
}
|
}
|
||||||
pgf_print_production(ccat->fid, prod, out, err);
|
pgf_print_production(ccat->fid, prod, out, err);
|
||||||
gu_pool_free(tmp_pool);
|
gu_pool_free(tmp_pool);
|
||||||
@@ -1142,7 +1197,8 @@ pgf_parsing_scan_helper(PgfParsing *ps, PgfParseState* state,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (len <= max)
|
if (len <= max)
|
||||||
pgf_parsing_scan_helper(ps, state, k+1, j, len, max);
|
if (pgf_parsing_scan_helper(ps, state, k+1, j, len, max))
|
||||||
|
found = true;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -1159,7 +1215,7 @@ pgf_parsing_scan(PgfParsing *ps)
|
|||||||
PgfParseState* state =
|
PgfParseState* state =
|
||||||
pgf_new_parse_state(ps, 0, BIND_SOFT);
|
pgf_new_parse_state(ps, 0, BIND_SOFT);
|
||||||
|
|
||||||
while (state->end_offset < len) {
|
while (state != NULL && state->end_offset < len) {
|
||||||
if (state->needs_bind) {
|
if (state->needs_bind) {
|
||||||
// We have encountered two tokens without space in between.
|
// We have encountered two tokens without space in between.
|
||||||
// Those can be accepted only if there is a BIND token
|
// Those can be accepted only if there is a BIND token
|
||||||
@@ -1546,79 +1602,6 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
pgf_parsing_item(PgfParsing* ps, PgfItem* item)
|
|
||||||
{
|
|
||||||
#ifdef PGF_PARSER_DEBUG
|
|
||||||
GuPool* tmp_pool = gu_new_pool();
|
|
||||||
GuOut* out = gu_file_out(stderr, tmp_pool);
|
|
||||||
GuExn* err = gu_exn(tmp_pool);
|
|
||||||
pgf_print_item(item, ps->before, out, err, tmp_pool);
|
|
||||||
gu_pool_free(tmp_pool);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
GuVariantInfo i = gu_variant_open(item->prod);
|
|
||||||
switch (i.tag) {
|
|
||||||
case PGF_PRODUCTION_APPLY: {
|
|
||||||
PgfProductionApply* papp = i.data;
|
|
||||||
PgfCncFun* fun = papp->fun;
|
|
||||||
PgfSymbols* syms = fun->lins[item->conts->lin_idx]->syms;
|
|
||||||
if (item->sym_idx == gu_seq_length(syms)) {
|
|
||||||
pgf_parsing_complete(ps, item, NULL);
|
|
||||||
pgf_item_free(ps, item);
|
|
||||||
} else {
|
|
||||||
pgf_parsing_symbol(ps, item, item->curr_sym);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case PGF_PRODUCTION_COERCE: {
|
|
||||||
PgfProductionCoerce* pcoerce = i.data;
|
|
||||||
switch (item->sym_idx) {
|
|
||||||
case 0:
|
|
||||||
if (pcoerce->coerce->prods == NULL) {
|
|
||||||
// empty category
|
|
||||||
pgf_item_free(ps, item);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
pgf_parsing_td_predict(ps, item,
|
|
||||||
pcoerce->coerce,
|
|
||||||
item->conts->lin_idx);
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
pgf_parsing_complete(ps, item, NULL);
|
|
||||||
pgf_item_free(ps, item);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
gu_impossible();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case PGF_PRODUCTION_EXTERN: {
|
|
||||||
PgfProductionExtern* pext = i.data;
|
|
||||||
|
|
||||||
PgfSymbols* syms;
|
|
||||||
if (pext->lins != NULL &&
|
|
||||||
(syms = gu_seq_get(pext->lins,PgfSymbols*,item->conts->lin_idx)) != NULL) {
|
|
||||||
if (item->sym_idx == gu_seq_length(syms)) {
|
|
||||||
pgf_parsing_complete(ps, item, NULL);
|
|
||||||
pgf_item_free(ps, item);
|
|
||||||
} else {
|
|
||||||
PgfSymbol sym =
|
|
||||||
gu_seq_get(syms, PgfSymbol, item->sym_idx);
|
|
||||||
pgf_parsing_symbol(ps, item, sym);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
pgf_parsing_complete(ps, item, pext->ep);
|
|
||||||
pgf_item_free(ps, item);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
gu_impossible();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_parsing_set_default_factors(PgfParsing* ps, PgfAbstr* abstr)
|
pgf_parsing_set_default_factors(PgfParsing* ps, PgfAbstr* abstr)
|
||||||
{
|
{
|
||||||
@@ -1654,7 +1637,6 @@ pgf_new_parsing(PgfConcr* concr, GuString sentence,
|
|||||||
#ifdef PGF_COUNTS_DEBUG
|
#ifdef PGF_COUNTS_DEBUG
|
||||||
ps->item_full_count = 0;
|
ps->item_full_count = 0;
|
||||||
ps->item_real_count = 0;
|
ps->item_real_count = 0;
|
||||||
ps->cont_full_count = 0;
|
|
||||||
ps->ccat_full_count = 0;
|
ps->ccat_full_count = 0;
|
||||||
ps->prod_full_count = 0;
|
ps->prod_full_count = 0;
|
||||||
#endif
|
#endif
|
||||||
@@ -1674,10 +1656,9 @@ pgf_new_parsing(PgfConcr* concr, GuString sentence,
|
|||||||
static void
|
static void
|
||||||
pgf_parsing_print_counts(PgfParsing* ps)
|
pgf_parsing_print_counts(PgfParsing* ps)
|
||||||
{
|
{
|
||||||
printf("%d\t%d\t%d\t%d\t%d\n",
|
printf("%d\t%d\t%d\t%d\n",
|
||||||
ps->item_full_count,
|
ps->item_full_count,
|
||||||
ps->item_real_count,
|
ps->item_real_count,
|
||||||
ps->cont_full_count,
|
|
||||||
ps->ccat_full_count,
|
ps->ccat_full_count,
|
||||||
ps->prod_full_count);
|
ps->prod_full_count);
|
||||||
}
|
}
|
||||||
@@ -1734,7 +1715,7 @@ pgf_result_production(PgfParsing* ps,
|
|||||||
st->args = gu_empty_seq();
|
st->args = gu_empty_seq();
|
||||||
st->arg_idx = 0;
|
st->arg_idx = 0;
|
||||||
|
|
||||||
pgf_result_predict(ps, st, ccat);
|
pgf_result_predict(ps, st, ccat, answers->outside_prob);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PGF_PRODUCTION_EXTERN: {
|
case PGF_PRODUCTION_EXTERN: {
|
||||||
@@ -1756,21 +1737,16 @@ pgf_result_production(PgfParsing* ps,
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_result_predict(PgfParsing* ps,
|
pgf_result_predict(PgfParsing* ps,
|
||||||
PgfExprState* cont, PgfCCat* ccat)
|
PgfExprState* cont, PgfCCat* ccat,
|
||||||
|
prob_t outside_prob)
|
||||||
{
|
{
|
||||||
prob_t outside_prob = 0;
|
|
||||||
if (cont != NULL) {
|
|
||||||
cont->ep.prob -= ccat->viterbi_prob;
|
|
||||||
outside_prob =
|
|
||||||
cont->answers->outside_prob+cont->ep.prob;
|
|
||||||
}
|
|
||||||
|
|
||||||
PgfAnswers* answers = ccat->answers;
|
PgfAnswers* answers = ccat->answers;
|
||||||
if (answers == NULL) {
|
if (answers == NULL) {
|
||||||
answers = gu_new(PgfAnswers, ps->pool);
|
answers = gu_new(PgfAnswers, ps->pool);
|
||||||
answers->conts = gu_new_buf(PgfExprState*, ps->pool);
|
answers->conts = gu_new_buf(PgfExprState*, ps->pool);
|
||||||
answers->exprs = gu_new_buf(PgfExprProb*, ps->pool);
|
answers->exprs = gu_new_buf(PgfExprProb*, ps->pool);
|
||||||
answers->outside_prob = outside_prob;
|
answers->outside_prob = outside_prob;
|
||||||
|
answers->ccat = ccat;
|
||||||
|
|
||||||
ccat->answers = answers;
|
ccat->answers = answers;
|
||||||
}
|
}
|
||||||
@@ -1802,8 +1778,14 @@ pgf_result_predict(PgfParsing* ps,
|
|||||||
.fun = cont->ep.expr,
|
.fun = cont->ep.expr,
|
||||||
.arg = ep->expr);
|
.arg = ep->expr);
|
||||||
st->ep.prob = cont->ep.prob+ep->prob;
|
st->ep.prob = cont->ep.prob+ep->prob;
|
||||||
st->args = cont->args;
|
|
||||||
st->arg_idx = cont->arg_idx+1;
|
if (cont->arg_idx == PGF_EXPR_CHUNK_STATE) {
|
||||||
|
st->state = gu_map_get(cont->state->chunks_map, ccat, PgfParseState*);
|
||||||
|
st->arg_idx = PGF_EXPR_CHUNK_STATE;
|
||||||
|
} else {
|
||||||
|
st->args = cont->args;
|
||||||
|
st->arg_idx = cont->arg_idx+1;
|
||||||
|
}
|
||||||
|
|
||||||
gu_buf_heap_push(ps->expr_queue, &pgf_expr_state_order, &st);
|
gu_buf_heap_push(ps->expr_queue, &pgf_expr_state_order, &st);
|
||||||
}
|
}
|
||||||
@@ -1864,23 +1846,20 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat,
|
|||||||
start_ccat->linrefs = NULL;
|
start_ccat->linrefs = NULL;
|
||||||
start_ccat->viterbi_prob = 0;
|
start_ccat->viterbi_prob = 0;
|
||||||
start_ccat->fid = -5;
|
start_ccat->fid = -5;
|
||||||
|
start_ccat->chunk_count = 1;
|
||||||
start_ccat->conts = NULL;
|
start_ccat->conts = NULL;
|
||||||
start_ccat->answers = NULL;
|
start_ccat->answers = NULL;
|
||||||
start_ccat->prods = NULL;
|
start_ccat->prods = NULL;
|
||||||
start_ccat->n_synprods = 0;
|
start_ccat->n_synprods = 0;
|
||||||
|
|
||||||
#ifdef PGF_COUNTS_DEBUG
|
#ifdef PGF_COUNTS_DEBUG
|
||||||
state->ps->ccat_full_count++;
|
ps->ccat_full_count++;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
PgfItemConts* conts =
|
PgfItemConts* conts =
|
||||||
pgf_parsing_get_conts(ps->before, start_ccat, 0, ps->pool);
|
pgf_parsing_get_conts(ps->before, start_ccat, 0, ps->pool);
|
||||||
gu_buf_push(conts->items, PgfItem*, NULL);
|
gu_buf_push(conts->items, PgfItem*, NULL);
|
||||||
|
|
||||||
#ifdef PGF_COUNTS_DEBUG
|
|
||||||
ps->cont_full_count++;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
size_t n_ccats = gu_seq_length(cnccat->cats);
|
size_t n_ccats = gu_seq_length(cnccat->cats);
|
||||||
for (size_t i = 0; i < n_ccats; i++) {
|
for (size_t i = 0; i < n_ccats; i++) {
|
||||||
PgfCCat* ccat = gu_seq_get(cnccat->cats, PgfCCat*, i);
|
PgfCCat* ccat = gu_seq_get(cnccat->cats, PgfCCat*, i);
|
||||||
@@ -1970,7 +1949,21 @@ pgf_parsing_proceed(PgfParsing* ps)
|
|||||||
if (has_progress) {
|
if (has_progress) {
|
||||||
PgfItem* item;
|
PgfItem* item;
|
||||||
gu_buf_heap_pop(ps->before->agenda, pgf_item_prob_order, &item);
|
gu_buf_heap_pop(ps->before->agenda, pgf_item_prob_order, &item);
|
||||||
pgf_parsing_item(ps, item);
|
|
||||||
|
#ifdef PGF_PARSER_DEBUG
|
||||||
|
GuPool* tmp_pool = gu_new_pool();
|
||||||
|
GuOut* out = gu_file_out(stderr, tmp_pool);
|
||||||
|
GuExn* err = gu_exn(tmp_pool);
|
||||||
|
pgf_print_item(item, ps->before, out, err, tmp_pool);
|
||||||
|
gu_pool_free(tmp_pool);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (gu_variant_is_null(item->curr_sym)) {
|
||||||
|
pgf_parsing_complete(ps, item, NULL);
|
||||||
|
pgf_item_free(ps, item);
|
||||||
|
} else {
|
||||||
|
pgf_parsing_symbol(ps, item, item->curr_sym);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while (ps->after != NULL) {
|
while (ps->after != NULL) {
|
||||||
@@ -1983,6 +1976,28 @@ pgf_parsing_proceed(PgfParsing* ps)
|
|||||||
return has_progress;
|
return has_progress;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
GuMapItor fn;
|
||||||
|
PgfParsing* ps;
|
||||||
|
PgfExprState* st;
|
||||||
|
} PgfChunkCatItor;
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_iter_chunk_cat(GuMapItor* fn,
|
||||||
|
const void* key, void* value,
|
||||||
|
GuExn *err)
|
||||||
|
{
|
||||||
|
PgfChunkCatItor* clo = (PgfChunkCatItor*) fn;
|
||||||
|
PgfCCat* ccat = (PgfCCat*) key;
|
||||||
|
|
||||||
|
prob_t outside_prob =
|
||||||
|
clo->st->answers->outside_prob+
|
||||||
|
clo->st->ep.prob+
|
||||||
|
ccat->cnccat->abscat->prob;
|
||||||
|
|
||||||
|
pgf_result_predict(clo->ps, clo->st, ccat, outside_prob);
|
||||||
|
}
|
||||||
|
|
||||||
static PgfExprProb*
|
static PgfExprProb*
|
||||||
pgf_parse_result_next(PgfParsing* ps)
|
pgf_parse_result_next(PgfParsing* ps)
|
||||||
{
|
{
|
||||||
@@ -2005,11 +2020,28 @@ pgf_parse_result_next(PgfParsing* ps)
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (st->arg_idx < gu_seq_length(st->args)) {
|
if (st->arg_idx == PGF_EXPR_CHUNK_STATE) {
|
||||||
|
// here we look for chunks
|
||||||
|
|
||||||
|
if (st->state == ps->before) {
|
||||||
|
if (pgf_parse_result_is_new(st)) {
|
||||||
|
gu_buf_push(st->answers->exprs, PgfExprProb*, &st->ep);
|
||||||
|
return &st->ep;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
PgfChunkCatItor clo = { { pgf_iter_chunk_cat }, ps, st };
|
||||||
|
if (st->state->chunks_map != NULL)
|
||||||
|
gu_map_iter(st->state->chunks_map, &clo.fn, NULL);
|
||||||
|
}
|
||||||
|
} else if (st->arg_idx < gu_seq_length(st->args)) {
|
||||||
|
// here we handle normal unfinished expression states
|
||||||
|
|
||||||
PgfCCat* ccat =
|
PgfCCat* ccat =
|
||||||
gu_seq_index(st->args, PgfPArg, st->arg_idx)->ccat;
|
gu_seq_index(st->args, PgfPArg, st->arg_idx)->ccat;
|
||||||
|
|
||||||
if (ccat->fid < ps->concr->total_cats) {
|
if (ccat->fid < ps->concr->total_cats) {
|
||||||
|
// when argument was not used by the parser,
|
||||||
|
// we create a metavariable
|
||||||
PgfExpr meta = gu_new_variant_i(ps->out_pool,
|
PgfExpr meta = gu_new_variant_i(ps->out_pool,
|
||||||
PGF_EXPR_META, PgfExprMeta,
|
PGF_EXPR_META, PgfExprMeta,
|
||||||
.id = 0);
|
.id = 0);
|
||||||
@@ -2024,7 +2056,10 @@ pgf_parse_result_next(PgfParsing* ps)
|
|||||||
st->arg_idx++;
|
st->arg_idx++;
|
||||||
gu_buf_heap_push(ps->expr_queue, &pgf_expr_state_order, &st);
|
gu_buf_heap_push(ps->expr_queue, &pgf_expr_state_order, &st);
|
||||||
} else {
|
} else {
|
||||||
pgf_result_predict(ps, st, ccat);
|
prob_t outside_prob =
|
||||||
|
st->answers->outside_prob+
|
||||||
|
st->ep.prob-ccat->viterbi_prob;
|
||||||
|
pgf_result_predict(ps, st, ccat, outside_prob);
|
||||||
}
|
}
|
||||||
} else if (pgf_parse_result_is_new(st)) {
|
} else if (pgf_parse_result_is_new(st)) {
|
||||||
gu_buf_push(st->answers->exprs, PgfExprProb*, &st->ep);
|
gu_buf_push(st->answers->exprs, PgfExprProb*, &st->ep);
|
||||||
@@ -2032,7 +2067,7 @@ pgf_parse_result_next(PgfParsing* ps)
|
|||||||
size_t n_conts = gu_buf_length(st->answers->conts);
|
size_t n_conts = gu_buf_length(st->answers->conts);
|
||||||
for (size_t i = 0; i < n_conts; i++) {
|
for (size_t i = 0; i < n_conts; i++) {
|
||||||
PgfExprState* st2 = gu_buf_get(st->answers->conts, PgfExprState*, i);
|
PgfExprState* st2 = gu_buf_get(st->answers->conts, PgfExprState*, i);
|
||||||
|
|
||||||
if (st2 == NULL) {
|
if (st2 == NULL) {
|
||||||
return &st->ep;
|
return &st->ep;
|
||||||
}
|
}
|
||||||
@@ -2046,9 +2081,17 @@ pgf_parse_result_next(PgfParsing* ps)
|
|||||||
PGF_EXPR_APP, PgfExprApp,
|
PGF_EXPR_APP, PgfExprApp,
|
||||||
.fun = st2->ep.expr,
|
.fun = st2->ep.expr,
|
||||||
.arg = st->ep.expr);
|
.arg = st->ep.expr);
|
||||||
st3->ep.prob = st2->ep.prob + st->ep.prob;
|
if (st2->arg_idx == PGF_EXPR_CHUNK_STATE) {
|
||||||
st3->args = st2->args;
|
st3->ep.prob = st2->ep.prob+st->answers->ccat->cnccat->abscat->prob +
|
||||||
st3->arg_idx = st2->arg_idx+1;
|
st->ep.prob;
|
||||||
|
st3->state = gu_map_get(st2->state->chunks_map, st->answers->ccat, PgfParseState*);
|
||||||
|
st3->arg_idx = PGF_EXPR_CHUNK_STATE;
|
||||||
|
} else {
|
||||||
|
st3->ep.prob = st2->ep.prob-st->answers->ccat->viterbi_prob +
|
||||||
|
st->ep.prob;
|
||||||
|
st3->args = st2->args;
|
||||||
|
st3->arg_idx = st2->arg_idx+1;
|
||||||
|
}
|
||||||
|
|
||||||
gu_buf_heap_push(ps->expr_queue, &pgf_expr_state_order, &st3);
|
gu_buf_heap_push(ps->expr_queue, &pgf_expr_state_order, &st3);
|
||||||
}
|
}
|
||||||
@@ -2107,6 +2150,126 @@ pgf_parse(PgfConcr* concr, PgfType* typ, GuString sentence,
|
|||||||
return pgf_parse_with_heuristics(concr, typ, sentence, -1.0, callbacks, err, pool, out_pool);
|
return pgf_parse_with_heuristics(concr, typ, sentence, -1.0, callbacks, err, pool, out_pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_iter_generated_cats(PgfParsing* ps, PgfParseState* next_state);
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_process_generated_cat(PgfParsing* ps,
|
||||||
|
PgfParseState* state, PgfParseState* next_state,
|
||||||
|
PgfCCat* ccat)
|
||||||
|
{
|
||||||
|
bool just_coercions = true;
|
||||||
|
|
||||||
|
PgfCCat* children[ccat->n_synprods];
|
||||||
|
for (size_t i = 0; i < ccat->n_synprods; i++) {
|
||||||
|
PgfProduction prod =
|
||||||
|
gu_seq_get(ccat->prods, PgfProduction, i);
|
||||||
|
|
||||||
|
children[i] = NULL;
|
||||||
|
|
||||||
|
GuVariantInfo inf = gu_variant_open(prod);
|
||||||
|
switch (inf.tag) {
|
||||||
|
case PGF_PRODUCTION_APPLY: {
|
||||||
|
PgfProductionApply* papp = inf.data;
|
||||||
|
|
||||||
|
size_t j = gu_seq_length(papp->args);
|
||||||
|
while (j > 0) {
|
||||||
|
PgfPArg* parg = gu_seq_index(papp->args, PgfPArg, j-1);
|
||||||
|
|
||||||
|
if (pgf_parsing_get_completed(state, parg->ccat->conts) != NULL &&
|
||||||
|
ccat->conts->state->end_offset == parg->ccat->conts->state->end_offset) {
|
||||||
|
children[i] = parg->ccat;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
j--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (children[i] == NULL) {
|
||||||
|
just_coercions = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PGF_PRODUCTION_COERCE: {
|
||||||
|
PgfProductionCoerce* pcoerce = inf.data;
|
||||||
|
children[i] = pcoerce->coerce;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (just_coercions) {
|
||||||
|
ccat->chunk_count++;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < ccat->n_synprods; i++) {
|
||||||
|
children[i]->chunk_count--;
|
||||||
|
|
||||||
|
#ifdef PGF_PARSER_DEBUG
|
||||||
|
GuPool* tmp_pool = gu_new_pool();
|
||||||
|
GuOut* out = gu_file_out(stderr, tmp_pool);
|
||||||
|
GuExn* err = gu_exn(tmp_pool);
|
||||||
|
pgf_print_fid(children[i]->fid, out, err);
|
||||||
|
gu_printf(out, err, ".chunk_count=%d\n", children[i]->chunk_count);
|
||||||
|
gu_pool_free(tmp_pool);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (children[i]->chunk_count == 0) {
|
||||||
|
pgf_process_generated_cat(ps, state, next_state, children[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
PgfParseState* prev_state = ccat->conts->state;
|
||||||
|
if (prev_state->chunks_map == NULL) {
|
||||||
|
pgf_iter_generated_cats(ps, prev_state);
|
||||||
|
|
||||||
|
if (prev_state->chunks_map == NULL) {
|
||||||
|
prev_state->chunks_map =
|
||||||
|
gu_new_addr_map(PgfCCat*, PgfParseState*,
|
||||||
|
&gu_null_struct, ps->pool);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef PGF_PARSER_DEBUG
|
||||||
|
GuPool* tmp_pool = gu_new_pool();
|
||||||
|
GuOut* out = gu_file_out(stderr, tmp_pool);
|
||||||
|
GuExn* err = gu_exn(tmp_pool);
|
||||||
|
gu_printf(out, err, "[%d - ", prev_state->end_offset);
|
||||||
|
pgf_print_fid(ccat->fid, out, err);
|
||||||
|
gu_printf(out, err, " - %d]\n", next_state->start_offset);
|
||||||
|
gu_pool_free(tmp_pool);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
gu_map_put(prev_state->chunks_map, ccat, PgfParseState*, next_state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_iter_generated_cats(PgfParsing* ps, PgfParseState* next_state)
|
||||||
|
{
|
||||||
|
size_t count = 0;
|
||||||
|
PgfParseState* state = next_state;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
size_t i = 0;
|
||||||
|
PgfCCat* ccat;
|
||||||
|
PgfItemConts* conts;
|
||||||
|
while (gu_map_next(state->generated_cats, &i, (void**)&conts, &ccat)) {
|
||||||
|
if (ccat->chunk_count > 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
count++;
|
||||||
|
|
||||||
|
pgf_process_generated_cat(ps, state, next_state, ccat);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (count > 0 || state->next == NULL)
|
||||||
|
break;
|
||||||
|
|
||||||
|
state = state->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
PGF_API GuEnum*
|
PGF_API GuEnum*
|
||||||
pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence,
|
pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence,
|
||||||
double heuristics,
|
double heuristics,
|
||||||
@@ -2138,7 +2301,34 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence,
|
|||||||
if (!pgf_parsing_proceed(ps)) {
|
if (!pgf_parsing_proceed(ps)) {
|
||||||
GuExnData* exn = gu_raise(err, PgfParseError);
|
GuExnData* exn = gu_raise(err, PgfParseError);
|
||||||
exn->data = (void*) pgf_parsing_new_exception(ps, exn->pool);
|
exn->data = (void*) pgf_parsing_new_exception(ps, exn->pool);
|
||||||
return NULL;
|
|
||||||
|
PgfExprState* st = gu_new(PgfExprState, ps->pool);
|
||||||
|
st->answers = gu_new(PgfAnswers, ps->pool);
|
||||||
|
st->answers->conts = gu_new_buf(PgfExprState*, ps->pool);
|
||||||
|
st->answers->exprs = gu_new_buf(PgfExprProb*, ps->pool);
|
||||||
|
st->answers->ccat = NULL;
|
||||||
|
st->answers->outside_prob = 0;
|
||||||
|
st->ep.expr =
|
||||||
|
gu_new_variant_i(ps->out_pool,
|
||||||
|
PGF_EXPR_META, PgfExprMeta,
|
||||||
|
.id = 0);
|
||||||
|
st->ep.prob = 0;
|
||||||
|
st->state = NULL;
|
||||||
|
st->arg_idx = PGF_EXPR_CHUNK_STATE;
|
||||||
|
|
||||||
|
pgf_iter_generated_cats(ps, ps->before);
|
||||||
|
|
||||||
|
PgfParseState* state = ps->before;
|
||||||
|
while (state != NULL) {
|
||||||
|
if (state->chunks_map != NULL)
|
||||||
|
st->state = state;
|
||||||
|
state = state->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (st->state != NULL) {
|
||||||
|
gu_buf_heap_push(ps->expr_queue, &pgf_expr_state_order, &st);
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef PGF_COUNTS_DEBUG
|
#ifdef PGF_COUNTS_DEBUG
|
||||||
|
|||||||
@@ -844,6 +844,7 @@ pgf_read_fid(PgfReader* rdr, PgfConcr* concr)
|
|||||||
ccat->prods = NULL;
|
ccat->prods = NULL;
|
||||||
ccat->viterbi_prob = 0;
|
ccat->viterbi_prob = 0;
|
||||||
ccat->fid = fid;
|
ccat->fid = fid;
|
||||||
|
ccat->chunk_count = 1;
|
||||||
ccat->conts = NULL;
|
ccat->conts = NULL;
|
||||||
ccat->answers = NULL;
|
ccat->answers = NULL;
|
||||||
|
|
||||||
@@ -1081,6 +1082,7 @@ pgf_read_cnccat(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, PgfCId name)
|
|||||||
ccat->prods = NULL;
|
ccat->prods = NULL;
|
||||||
ccat->viterbi_prob = 0;
|
ccat->viterbi_prob = 0;
|
||||||
ccat->fid = fid;
|
ccat->fid = fid;
|
||||||
|
ccat->chunk_count = 1;
|
||||||
ccat->conts = NULL;
|
ccat->conts = NULL;
|
||||||
ccat->answers = NULL;
|
ccat->answers = NULL;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user