mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-23 09:52:55 -06:00
Use a separated tag for meta productions in the robust parser. This cleans up the code a lot
This commit is contained in:
@@ -66,7 +66,12 @@ gu_make_buf(size_t elem_size, GuPool* pool)
|
|||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const GuWord gu_empty_seq[2] = {0, 0};
|
static const GuWord gu_empty_seq_[2] = {0, 0};
|
||||||
|
|
||||||
|
GuSeq
|
||||||
|
gu_empty_seq() {
|
||||||
|
return (GuSeq) { gu_tagged((void*)&gu_empty_seq_[1], 0) };
|
||||||
|
}
|
||||||
|
|
||||||
GuSeq
|
GuSeq
|
||||||
gu_make_seq(size_t elem_size, size_t length, GuPool* pool)
|
gu_make_seq(size_t elem_size, size_t length, GuPool* pool)
|
||||||
@@ -76,7 +81,7 @@ gu_make_seq(size_t elem_size, size_t length, GuPool* pool)
|
|||||||
void* buf = gu_malloc(pool, size);
|
void* buf = gu_malloc(pool, size);
|
||||||
return (GuSeq) { gu_tagged(buf, length) };
|
return (GuSeq) { gu_tagged(buf, length) };
|
||||||
} else if (size == 0) {
|
} else if (size == 0) {
|
||||||
return (GuSeq) { gu_tagged((void*)&gu_empty_seq[1], 0) };
|
return gu_empty_seq();
|
||||||
} else {
|
} else {
|
||||||
void* buf = gu_malloc_prefixed(pool,
|
void* buf = gu_malloc_prefixed(pool,
|
||||||
gu_alignof(GuWord),
|
gu_alignof(GuWord),
|
||||||
|
|||||||
@@ -9,6 +9,9 @@ typedef struct GuBuf GuBuf;
|
|||||||
|
|
||||||
typedef GuOpaque() GuSeq;
|
typedef GuOpaque() GuSeq;
|
||||||
|
|
||||||
|
GuSeq
|
||||||
|
gu_empty_seq();
|
||||||
|
|
||||||
GuSeq
|
GuSeq
|
||||||
gu_make_seq(size_t elem_size, size_t len, GuPool* pool);
|
gu_make_seq(size_t elem_size, size_t len, GuPool* pool);
|
||||||
|
|
||||||
|
|||||||
@@ -127,9 +127,12 @@ GU_DEFINE_TYPE(
|
|||||||
GU_MEMBER(PgfProductionCoerce, coerce, PgfCCatId)),
|
GU_MEMBER(PgfProductionCoerce, coerce, PgfCCatId)),
|
||||||
GU_CONSTRUCTOR_S(
|
GU_CONSTRUCTOR_S(
|
||||||
PGF_PRODUCTION_EXTERN, PgfProductionExtern,
|
PGF_PRODUCTION_EXTERN, PgfProductionExtern,
|
||||||
GU_MEMBER(PgfProductionExtern, fun, PgfFunId),
|
GU_MEMBER(PgfProductionExtern, callback, PgfLiteralCallback),
|
||||||
GU_MEMBER(PgfProductionExtern, args, PgfPArgs),
|
GU_MEMBER(PgfProductionExtern, lins, GuSeq)),
|
||||||
GU_MEMBER(PgfProductionExtern, callback, PgfLiteralCallback)));
|
GU_CONSTRUCTOR_S(
|
||||||
|
PGF_PRODUCTION_META, PgfProductionMeta,
|
||||||
|
GU_MEMBER(PgfProductionMeta, lins, GuSeq),
|
||||||
|
GU_MEMBER(PgfProductionMeta, args, PgfPArgs)));
|
||||||
|
|
||||||
GU_DEFINE_TYPE(PgfProductions, GuList, gu_type(PgfProduction));
|
GU_DEFINE_TYPE(PgfProductions, GuList, gu_type(PgfProduction));
|
||||||
GU_DEFINE_TYPE(PgfProductionSeq, GuSeq, gu_type(PgfProduction));
|
GU_DEFINE_TYPE(PgfProductionSeq, GuSeq, gu_type(PgfProduction));
|
||||||
|
|||||||
@@ -42,7 +42,6 @@ typedef struct PgfAbstr PgfAbstr;
|
|||||||
extern GU_DECLARE_TYPE(PgfAbstr, struct);
|
extern GU_DECLARE_TYPE(PgfAbstr, struct);
|
||||||
|
|
||||||
typedef struct PgfFunDecl PgfFunDecl;
|
typedef struct PgfFunDecl PgfFunDecl;
|
||||||
typedef struct PgfConcr PgfConcr;
|
|
||||||
|
|
||||||
typedef int PgfLength;
|
typedef int PgfLength;
|
||||||
typedef struct GuVariant PgfSymbol;
|
typedef struct GuVariant PgfSymbol;
|
||||||
@@ -293,7 +292,8 @@ typedef struct PgfSymbolKP
|
|||||||
typedef enum {
|
typedef enum {
|
||||||
PGF_PRODUCTION_APPLY,
|
PGF_PRODUCTION_APPLY,
|
||||||
PGF_PRODUCTION_COERCE,
|
PGF_PRODUCTION_COERCE,
|
||||||
PGF_PRODUCTION_EXTERN
|
PGF_PRODUCTION_EXTERN,
|
||||||
|
PGF_PRODUCTION_META
|
||||||
} PgfProductionTag;
|
} PgfProductionTag;
|
||||||
|
|
||||||
typedef struct PgfPArg PgfPArg;
|
typedef struct PgfPArg PgfPArg;
|
||||||
@@ -323,11 +323,17 @@ typedef struct PgfProductionCoerce
|
|||||||
} PgfProductionCoerce;
|
} PgfProductionCoerce;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PgfFunId fun;
|
|
||||||
PgfPArgs args;
|
|
||||||
PgfLiteralCallback *callback;
|
PgfLiteralCallback *callback;
|
||||||
|
PgfExprProb *ep;
|
||||||
|
GuSeq lins;
|
||||||
} PgfProductionExtern;
|
} PgfProductionExtern;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
PgfExprProb *ep;
|
||||||
|
GuSeq lins;
|
||||||
|
PgfPArgs args;
|
||||||
|
} PgfProductionMeta;
|
||||||
|
|
||||||
extern GU_DECLARE_TYPE(PgfPatt, GuVariant);
|
extern GU_DECLARE_TYPE(PgfPatt, GuVariant);
|
||||||
|
|
||||||
extern GU_DECLARE_TYPE(PgfProduction, GuVariant);
|
extern GU_DECLARE_TYPE(PgfProduction, GuVariant);
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ typedef struct {
|
|||||||
PgfConcr* concr;
|
PgfConcr* concr;
|
||||||
PgfCCat* completed;
|
PgfCCat* completed;
|
||||||
PgfItem* target;
|
PgfItem* target;
|
||||||
|
PgfExpr meta_var;
|
||||||
PgfProduction meta_prod;
|
PgfProduction meta_prod;
|
||||||
int max_fid;
|
int max_fid;
|
||||||
int item_count;
|
int item_count;
|
||||||
@@ -46,7 +47,6 @@ struct PgfParseState {
|
|||||||
PgfItemBuf* agenda;
|
PgfItemBuf* agenda;
|
||||||
PgfContsMap* conts_map;
|
PgfContsMap* conts_map;
|
||||||
PgfGenCatMap* generated_cats;
|
PgfGenCatMap* generated_cats;
|
||||||
PgfExprProb* meta_ep;
|
|
||||||
int offset;
|
int offset;
|
||||||
|
|
||||||
PgfParsing* ps;
|
PgfParsing* ps;
|
||||||
@@ -161,8 +161,26 @@ pgf_item_sequence_length(PgfItem* item)
|
|||||||
PgfProductionExtern* pext = i.data;
|
PgfProductionExtern* pext = i.data;
|
||||||
PgfSequence seq;
|
PgfSequence seq;
|
||||||
|
|
||||||
if (pext->fun != NULL &&
|
if (!gu_seq_is_null(pext->lins) &&
|
||||||
!gu_seq_is_null(seq = pext->fun->lins[item->base->lin_idx])) {
|
!gu_seq_is_null(seq = gu_seq_get(pext->lins,PgfSequence,item->base->lin_idx))) {
|
||||||
|
return gu_seq_length(seq);
|
||||||
|
} else {
|
||||||
|
int seq_len = 0;
|
||||||
|
PgfSymbol sym = item->curr_sym;
|
||||||
|
while (!gu_variant_is_null(sym)) {
|
||||||
|
seq_len++;
|
||||||
|
sym = pgf_prev_extern_sym(sym);
|
||||||
|
}
|
||||||
|
|
||||||
|
return seq_len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case PGF_PRODUCTION_META: {
|
||||||
|
PgfProductionMeta* pmeta = i.data;
|
||||||
|
PgfSequence seq;
|
||||||
|
|
||||||
|
if (!gu_seq_is_null(pmeta->lins) &&
|
||||||
|
!gu_seq_is_null(seq = gu_seq_get(pmeta->lins,PgfSequence,item->base->lin_idx))) {
|
||||||
return gu_seq_length(seq);
|
return gu_seq_length(seq);
|
||||||
} else {
|
} else {
|
||||||
int seq_len = 0;
|
int seq_len = 0;
|
||||||
@@ -222,8 +240,17 @@ pgf_item_sequence(PgfItem* item,
|
|||||||
case PGF_PRODUCTION_EXTERN: {
|
case PGF_PRODUCTION_EXTERN: {
|
||||||
PgfProductionExtern* pext = i.data;
|
PgfProductionExtern* pext = i.data;
|
||||||
|
|
||||||
if (pext->fun == NULL ||
|
if (gu_seq_is_null(pext->lins) ||
|
||||||
gu_seq_is_null(*seq = pext->fun->lins[item->base->lin_idx])) {
|
gu_seq_is_null(*seq = gu_seq_get(pext->lins, PgfSequence, item->base->lin_idx))) {
|
||||||
|
*seq = pgf_extern_seq_get(item, pool);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PGF_PRODUCTION_META: {
|
||||||
|
PgfProductionMeta* pmeta = i.data;
|
||||||
|
|
||||||
|
if (gu_seq_is_null(pmeta->lins) ||
|
||||||
|
gu_seq_is_null(*seq = gu_seq_get(pmeta->lins,PgfSequence,item->base->lin_idx))) {
|
||||||
*seq = pgf_extern_seq_get(item, pool);
|
*seq = pgf_extern_seq_get(item, pool);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@@ -284,9 +311,14 @@ pgf_print_production(int fid, PgfProduction prod,
|
|||||||
case PGF_PRODUCTION_EXTERN: {
|
case PGF_PRODUCTION_EXTERN: {
|
||||||
PgfProductionExtern* pext = i.data;
|
PgfProductionExtern* pext = i.data;
|
||||||
gu_printf(wtr,err,"<extern>(");
|
gu_printf(wtr,err,"<extern>(");
|
||||||
pgf_print_expr(pext->fun->ep->expr, 0, wtr, err);
|
pgf_print_expr(pext->ep->expr, 0, wtr, err);
|
||||||
gu_printf(wtr,err,")[");
|
gu_printf(wtr,err,")[]\n");
|
||||||
pgf_print_production_args(pext->args,wtr,err);
|
break;
|
||||||
|
}
|
||||||
|
case PGF_PRODUCTION_META: {
|
||||||
|
PgfProductionMeta* pmeta = i.data;
|
||||||
|
gu_printf(wtr,err,"<meta>[");
|
||||||
|
pgf_print_production_args(pmeta->args,wtr,err);
|
||||||
gu_printf(wtr,err,"]\n");
|
gu_printf(wtr,err,"]\n");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -349,9 +381,9 @@ pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err, GuPool* pool)
|
|||||||
case PGF_PRODUCTION_EXTERN: {
|
case PGF_PRODUCTION_EXTERN: {
|
||||||
PgfProductionExtern* pext = i.data;
|
PgfProductionExtern* pext = i.data;
|
||||||
gu_printf(wtr, err, "<extern>");
|
gu_printf(wtr, err, "<extern>");
|
||||||
if (pext->fun != NULL) {
|
if (pext->ep != NULL) {
|
||||||
gu_printf(wtr, err, "(");
|
gu_printf(wtr, err, "(");
|
||||||
pgf_print_expr(pext->fun->ep->expr, 0, wtr, err);
|
pgf_print_expr(pext->ep->expr, 0, wtr, err);
|
||||||
gu_printf(wtr, err, ")");
|
gu_printf(wtr, err, ")");
|
||||||
}
|
}
|
||||||
gu_printf(wtr, err, "[");
|
gu_printf(wtr, err, "[");
|
||||||
@@ -359,6 +391,12 @@ pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err, GuPool* pool)
|
|||||||
gu_printf(wtr, err, "]; ");
|
gu_printf(wtr, err, "]; ");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case PGF_PRODUCTION_META: {
|
||||||
|
gu_printf(wtr, err, "<meta>[");
|
||||||
|
pgf_print_production_args(item->args, wtr, err);
|
||||||
|
gu_printf(wtr, err, "]; ");
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
gu_impossible();
|
gu_impossible();
|
||||||
}
|
}
|
||||||
@@ -486,6 +524,9 @@ pgf_item_set_curr_symbol(PgfItem* item, GuPool* pool)
|
|||||||
case PGF_PRODUCTION_EXTERN: {
|
case PGF_PRODUCTION_EXTERN: {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case PGF_PRODUCTION_META: {
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
gu_impossible();
|
gu_impossible();
|
||||||
}
|
}
|
||||||
@@ -529,8 +570,20 @@ pgf_new_item(int pos, PgfCCat* ccat, size_t lin_idx,
|
|||||||
}
|
}
|
||||||
case PGF_PRODUCTION_EXTERN: {
|
case PGF_PRODUCTION_EXTERN: {
|
||||||
PgfProductionExtern* pext = pi.data;
|
PgfProductionExtern* pext = pi.data;
|
||||||
item->args = pext->args;
|
item->args = gu_empty_seq();
|
||||||
item->inside_prob = pext->fun ? pext->fun->ep->prob : 0;
|
item->inside_prob = pext->ep ? pext->ep->prob : 0;
|
||||||
|
|
||||||
|
int n_args = gu_seq_length(item->args);
|
||||||
|
for (int i = 0; i < n_args; i++) {
|
||||||
|
PgfPArg *arg = gu_seq_index(item->args, PgfPArg, i);
|
||||||
|
item->inside_prob += arg->ccat->viterbi_prob;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PGF_PRODUCTION_META: {
|
||||||
|
PgfProductionMeta* pmeta = pi.data;
|
||||||
|
item->args = pmeta->args;
|
||||||
|
item->inside_prob = pmeta->ep ? pmeta->ep->prob : 0;
|
||||||
|
|
||||||
int n_args = gu_seq_length(item->args);
|
int n_args = gu_seq_length(item->args);
|
||||||
for (int i = 0; i < n_args; i++) {
|
for (int i = 0; i < n_args; i++) {
|
||||||
@@ -621,10 +674,10 @@ pgf_parsing_combine(PgfParseState* before, PgfParseState* after,
|
|||||||
|
|
||||||
bool extend = false;
|
bool extend = false;
|
||||||
GuVariantInfo i = gu_variant_open(cont->base->prod);
|
GuVariantInfo i = gu_variant_open(cont->base->prod);
|
||||||
if (i.tag == PGF_PRODUCTION_EXTERN) {
|
if (i.tag == PGF_PRODUCTION_META) {
|
||||||
PgfProductionExtern* pext = i.data;
|
PgfProductionMeta* pmeta = i.data;
|
||||||
if (pext->fun == NULL ||
|
if (gu_seq_is_null(pmeta->lins) ||
|
||||||
gu_seq_is_null(pext->fun->lins[cont->base->lin_idx])) {
|
gu_seq_is_null(gu_seq_get(pmeta->lins,PgfSequence,cont->base->lin_idx))) {
|
||||||
extend = true;
|
extend = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -719,39 +772,72 @@ pgf_parsing_new_production(PgfItem* item, PgfExprProb *ep, GuPool *pool)
|
|||||||
}
|
}
|
||||||
case PGF_PRODUCTION_EXTERN: {
|
case PGF_PRODUCTION_EXTERN: {
|
||||||
PgfProductionExtern* pext = i.data;
|
PgfProductionExtern* pext = i.data;
|
||||||
PgfCncFun* fun = pext->fun;
|
|
||||||
|
|
||||||
if (fun == NULL ||
|
|
||||||
gu_seq_is_null(fun->lins[item->base->lin_idx])) {
|
|
||||||
|
|
||||||
|
if (gu_seq_is_null(pext->lins) ||
|
||||||
|
gu_seq_is_null(gu_seq_get(pext->lins,PgfSequence,item->base->lin_idx))) {
|
||||||
PgfSequence seq =
|
PgfSequence seq =
|
||||||
pgf_extern_seq_get(item, pool);
|
pgf_extern_seq_get(item, pool);
|
||||||
|
|
||||||
PgfCncCat *cnccat = item->base->ccat->cnccat;
|
size_t n_lins = item->base->ccat->cnccat->n_lins;
|
||||||
size_t size = GU_FLEX_SIZE(PgfCncFun, lins, cnccat->n_lins);
|
|
||||||
fun = gu_malloc(pool, size);
|
|
||||||
if (pext->fun == NULL) {
|
|
||||||
fun->name = gu_empty_string;
|
|
||||||
fun->ep = ep;
|
|
||||||
fun->funid = -1;
|
|
||||||
fun->n_lins = cnccat->n_lins;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < fun->n_lins; i++) {
|
PgfProductionExtern* new_pext = (PgfProductionExtern*)
|
||||||
fun->lins[i] = gu_null_seq;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
memcpy(fun, pext->fun, size);
|
|
||||||
}
|
|
||||||
fun->lins[item->base->lin_idx] = seq;
|
|
||||||
}
|
|
||||||
|
|
||||||
PgfProductionExtern* new_pext =
|
|
||||||
gu_new_variant(PGF_PRODUCTION_EXTERN,
|
gu_new_variant(PGF_PRODUCTION_EXTERN,
|
||||||
PgfProductionExtern,
|
PgfProductionExtern,
|
||||||
&prod, pool);
|
&prod, pool);
|
||||||
new_pext->fun = fun;
|
|
||||||
new_pext->args = item->args;
|
|
||||||
new_pext->callback = pext->callback;
|
new_pext->callback = pext->callback;
|
||||||
|
new_pext->ep = ep;
|
||||||
|
new_pext->lins = gu_new_seq(PgfSequence, n_lins, pool);
|
||||||
|
|
||||||
|
if (gu_seq_is_null(pext->lins)) {
|
||||||
|
for (size_t i = 0; i < n_lins; i++) {
|
||||||
|
gu_seq_set(new_pext->lins,PgfSequence,i,
|
||||||
|
gu_null_seq);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (size_t i = 0; i < n_lins; i++) {
|
||||||
|
gu_seq_set(new_pext->lins,PgfSequence,i,
|
||||||
|
gu_seq_get(pext->lins,PgfSequence,i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gu_seq_set(new_pext->lins,PgfSequence,item->base->lin_idx,seq);
|
||||||
|
} else {
|
||||||
|
prod = item->base->prod;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PGF_PRODUCTION_META: {
|
||||||
|
PgfProductionMeta* pmeta = i.data;
|
||||||
|
|
||||||
|
PgfProductionMeta* new_pmeta =
|
||||||
|
gu_new_variant(PGF_PRODUCTION_META,
|
||||||
|
PgfProductionMeta,
|
||||||
|
&prod, pool);
|
||||||
|
new_pmeta->ep = ep;
|
||||||
|
new_pmeta->lins = pmeta->lins;
|
||||||
|
new_pmeta->args = item->args;
|
||||||
|
|
||||||
|
if (gu_seq_is_null(pmeta->lins) ||
|
||||||
|
gu_seq_is_null(gu_seq_get(pmeta->lins,PgfSequence,item->base->lin_idx))) {
|
||||||
|
PgfSequence seq =
|
||||||
|
pgf_extern_seq_get(item, pool);
|
||||||
|
|
||||||
|
size_t n_lins = item->base->ccat->cnccat->n_lins;
|
||||||
|
|
||||||
|
new_pmeta->lins = gu_new_seq(PgfSequence, n_lins, pool);
|
||||||
|
|
||||||
|
if (gu_seq_is_null(pmeta->lins)) {
|
||||||
|
for (size_t i = 0; i < n_lins; i++) {
|
||||||
|
gu_seq_set(new_pmeta->lins,PgfSequence,i,
|
||||||
|
gu_null_seq);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (size_t i = 0; i < n_lins; i++) {
|
||||||
|
gu_seq_set(new_pmeta->lins,PgfSequence,i,
|
||||||
|
gu_seq_get(pmeta->lins,PgfSequence,i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gu_seq_set(new_pmeta->lins,PgfSequence,item->base->lin_idx,seq);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@@ -944,6 +1030,41 @@ pgf_parsing_td_predict(PgfParseState* before, PgfParseState* after,
|
|||||||
gu_exit("<-");
|
gu_exit("<-");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
GuMapItor fn;
|
||||||
|
PgfParseState* before;
|
||||||
|
PgfParseState* after;
|
||||||
|
PgfItem* meta_item;
|
||||||
|
} PgfMetaPredictFn;
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||||
|
{
|
||||||
|
(void) (err);
|
||||||
|
|
||||||
|
PgfCId abscat = *((PgfCId*) key);
|
||||||
|
float meta_prob = *((float*) value);
|
||||||
|
PgfMetaPredictFn* clo = (PgfMetaPredictFn*) fn;
|
||||||
|
PgfParseState* before = clo->before;
|
||||||
|
PgfParseState* after = clo->after;
|
||||||
|
PgfItem* meta_item = clo->meta_item;
|
||||||
|
|
||||||
|
PgfCncCat* cnccat =
|
||||||
|
gu_map_get(before->ps->concr->cnccats, &abscat, PgfCncCat*);
|
||||||
|
if (cnccat == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
size_t n_cats = gu_list_length(cnccat->cats);
|
||||||
|
for (size_t i = 0; i < n_cats; i++) {
|
||||||
|
PgfCCat* ccat = gu_list_index(cnccat->cats, i);
|
||||||
|
|
||||||
|
for (size_t lin_idx = 0; lin_idx < cnccat->n_lins; lin_idx++) {
|
||||||
|
pgf_parsing_td_predict(before, after,
|
||||||
|
meta_item, ccat, lin_idx, meta_prob);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static float
|
static float
|
||||||
pgf_parsing_bu_predict(PgfParseState* before, PgfParseState* after,
|
pgf_parsing_bu_predict(PgfParseState* before, PgfParseState* after,
|
||||||
PgfItemBuf* index, PgfItem* meta_item,
|
PgfItemBuf* index, PgfItem* meta_item,
|
||||||
@@ -1181,9 +1302,9 @@ pgf_parsing_symbol(PgfParseState* before, PgfParseState* after,
|
|||||||
gu_new_variant(PGF_PRODUCTION_EXTERN,
|
gu_new_variant(PGF_PRODUCTION_EXTERN,
|
||||||
PgfProductionExtern,
|
PgfProductionExtern,
|
||||||
&prod, before->pool);
|
&prod, before->pool);
|
||||||
pext->fun = NULL;
|
|
||||||
pext->args = gu_new_seq(PgfPArg, 0, before->pool);
|
|
||||||
pext->callback = callback;
|
pext->callback = callback;
|
||||||
|
pext->ep = NULL;
|
||||||
|
pext->lins = gu_null_seq;
|
||||||
|
|
||||||
pgf_parsing_production(before, parg->ccat, slit->r,
|
pgf_parsing_production(before, parg->ccat, slit->r,
|
||||||
prod, conts, 0);
|
prod, conts, 0);
|
||||||
@@ -1220,9 +1341,6 @@ pgf_parsing_symbol(PgfParseState* before, PgfParseState* after,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PgfParseState *meta_after = NULL;
|
|
||||||
static PgfLiteralCallback pgf_meta_callback;
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
|
pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
|
||||||
{
|
{
|
||||||
@@ -1268,11 +1386,10 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
|
|||||||
}
|
}
|
||||||
case PGF_PRODUCTION_EXTERN: {
|
case PGF_PRODUCTION_EXTERN: {
|
||||||
PgfProductionExtern* pext = i.data;
|
PgfProductionExtern* pext = i.data;
|
||||||
PgfCncFun* fun = pext->fun;
|
|
||||||
|
|
||||||
PgfSequence seq;
|
PgfSequence seq;
|
||||||
if (fun != NULL &&
|
if (!gu_seq_is_null(pext->lins) &&
|
||||||
!gu_seq_is_null(seq = fun->lins[item->base->lin_idx])) {
|
!gu_seq_is_null(seq = gu_seq_get(pext->lins,PgfSequence,item->base->lin_idx))) {
|
||||||
if (item->seq_idx == gu_seq_length(seq)) {
|
if (item->seq_idx == gu_seq_length(seq)) {
|
||||||
pgf_parsing_complete(before, after, item, NULL);
|
pgf_parsing_complete(before, after, item, NULL);
|
||||||
} else {
|
} else {
|
||||||
@@ -1284,25 +1401,16 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
|
|||||||
PgfToken tok = (after != NULL) ? after->ts->tok
|
PgfToken tok = (after != NULL) ? after->ts->tok
|
||||||
: gu_empty_string;
|
: gu_empty_string;
|
||||||
|
|
||||||
meta_after = after;
|
PgfExprProb *ep = NULL;
|
||||||
bool accepted =
|
bool accepted =
|
||||||
pext->callback->match(before->ps->concr, item,
|
pext->callback->match(before->ps->concr, item,
|
||||||
tok,
|
tok,
|
||||||
&before->meta_ep, before->pool);
|
&ep, before->pool);
|
||||||
meta_after = NULL;
|
|
||||||
|
|
||||||
if (before->meta_ep != NULL)
|
if (ep != NULL)
|
||||||
pgf_parsing_complete(before, after, item, before->meta_ep);
|
pgf_parsing_complete(before, after, item, ep);
|
||||||
|
|
||||||
if (accepted && after != NULL) {
|
if (accepted && after != NULL) {
|
||||||
if (pext->callback == &pgf_meta_callback) {
|
|
||||||
float meta_token_prob =
|
|
||||||
item->base->ccat->cnccat->abscat->meta_token_prob;
|
|
||||||
if (meta_token_prob == INFINITY)
|
|
||||||
break;
|
|
||||||
item->inside_prob += meta_token_prob;
|
|
||||||
}
|
|
||||||
|
|
||||||
PgfSymbol prev = item->curr_sym;
|
PgfSymbol prev = item->curr_sym;
|
||||||
PgfSymbolKS* sks = (PgfSymbolKS*)
|
PgfSymbolKS* sks = (PgfSymbolKS*)
|
||||||
gu_alloc_variant(PGF_SYMBOL_KS,
|
gu_alloc_variant(PGF_SYMBOL_KS,
|
||||||
@@ -1322,86 +1430,56 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
case PGF_PRODUCTION_META: {
|
||||||
gu_impossible();
|
PgfProductionMeta* pmeta = i.data;
|
||||||
|
|
||||||
|
PgfSequence seq;
|
||||||
|
if (!gu_seq_is_null(pmeta->lins) &&
|
||||||
|
!gu_seq_is_null(seq = gu_seq_get(pmeta->lins,PgfSequence,item->base->lin_idx))) {
|
||||||
|
if (item->seq_idx == gu_seq_length(seq)) {
|
||||||
|
pgf_parsing_complete(before, after, item, NULL);
|
||||||
|
} else {
|
||||||
|
PgfSymbol sym =
|
||||||
|
gu_seq_get(seq, PgfSymbol, item->seq_idx);
|
||||||
|
pgf_parsing_symbol(before, after, item, sym);
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
|
if (!gu_variant_is_null(item->curr_sym)) {
|
||||||
typedef struct {
|
PgfExprProb *ep = gu_new(PgfExprProb, before->pool);
|
||||||
GuMapItor fn;
|
ep->expr = before->ps->meta_var;
|
||||||
PgfParseState* before;
|
|
||||||
PgfParseState* after;
|
|
||||||
PgfItem* meta_item;
|
|
||||||
} PgfMetaPredictFn;
|
|
||||||
|
|
||||||
static void
|
|
||||||
pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
|
||||||
{
|
|
||||||
(void) (err);
|
|
||||||
|
|
||||||
PgfCId abscat = *((PgfCId*) key);
|
|
||||||
float meta_prob = *((float*) value);
|
|
||||||
PgfMetaPredictFn* clo = (PgfMetaPredictFn*) fn;
|
|
||||||
PgfParseState* before = clo->before;
|
|
||||||
PgfParseState* after = clo->after;
|
|
||||||
PgfItem* meta_item = clo->meta_item;
|
|
||||||
{
|
|
||||||
GuPool* tmp_pool = gu_new_pool();
|
|
||||||
GuOut* out = gu_file_out(stdout, tmp_pool);
|
|
||||||
GuWriter* wtr = gu_new_utf8_writer(out, tmp_pool);
|
|
||||||
GuExn* err = gu_exn(NULL, type, tmp_pool);
|
|
||||||
gu_string_write(abscat, wtr, err);
|
|
||||||
gu_pool_free(tmp_pool);
|
|
||||||
}
|
|
||||||
PgfCncCat* cnccat =
|
|
||||||
gu_map_get(before->ps->concr->cnccats, &abscat, PgfCncCat*);
|
|
||||||
if (cnccat == NULL)
|
|
||||||
return;
|
|
||||||
|
|
||||||
size_t n_cats = gu_list_length(cnccat->cats);
|
|
||||||
for (size_t i = 0; i < n_cats; i++) {
|
|
||||||
PgfCCat* ccat = gu_list_index(cnccat->cats, i);
|
|
||||||
|
|
||||||
for (size_t lin_idx = 0; lin_idx < cnccat->n_lins; lin_idx++) {
|
|
||||||
pgf_parsing_td_predict(before, after,
|
|
||||||
meta_item, ccat, lin_idx, meta_prob);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool
|
|
||||||
pgf_match_meta(PgfConcr* concr, PgfItem *item, PgfToken tok,
|
|
||||||
PgfExprProb** out_ep, GuPool *pool)
|
|
||||||
{
|
|
||||||
size_t n_syms = pgf_item_sequence_length(item);
|
|
||||||
|
|
||||||
if (n_syms > 0) {
|
|
||||||
PgfExprProb *ep = gu_new(PgfExprProb, pool);
|
|
||||||
ep->prob = item->inside_prob;
|
ep->prob = item->inside_prob;
|
||||||
size_t n_args = gu_seq_length(item->args);
|
size_t n_args = gu_seq_length(item->args);
|
||||||
for (size_t i = 0; i < n_args; i++) {
|
for (size_t i = 0; i < n_args; i++) {
|
||||||
PgfPArg* arg = gu_seq_index(item->args, PgfPArg, i);
|
PgfPArg* arg = gu_seq_index(item->args, PgfPArg, i);
|
||||||
ep->prob -= arg->ccat->viterbi_prob;
|
ep->prob -= arg->ccat->viterbi_prob;
|
||||||
}
|
}
|
||||||
PgfExprMeta *expr_meta =
|
pgf_parsing_complete(before, after, item, ep);
|
||||||
gu_new_variant(PGF_EXPR_META,
|
|
||||||
PgfExprMeta,
|
|
||||||
&ep->expr, pool);
|
|
||||||
expr_meta->id = 0;
|
|
||||||
|
|
||||||
*out_ep = ep;
|
|
||||||
} else {
|
|
||||||
*out_ep = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PgfParseState* after = meta_after;
|
|
||||||
if (after != NULL) {
|
if (after != NULL) {
|
||||||
if (after->ts->lexicon_idx == NULL)
|
if (after->ts->lexicon_idx == NULL) {
|
||||||
return true;
|
float meta_token_prob =
|
||||||
else {
|
item->base->ccat->cnccat->abscat->meta_token_prob;
|
||||||
PgfParseState* before =
|
if (meta_token_prob == INFINITY)
|
||||||
gu_container(out_ep, PgfParseState, meta_ep);
|
break;
|
||||||
|
item->inside_prob += meta_token_prob;
|
||||||
|
|
||||||
|
PgfSymbol prev = item->curr_sym;
|
||||||
|
PgfSymbolKS* sks = (PgfSymbolKS*)
|
||||||
|
gu_alloc_variant(PGF_SYMBOL_KS,
|
||||||
|
sizeof(PgfSymbolKS)+sizeof(PgfSymbol),
|
||||||
|
gu_alignof(PgfSymbolKS),
|
||||||
|
&item->curr_sym, after->pool);
|
||||||
|
*((PgfSymbol*)(sks+1)) = prev;
|
||||||
|
sks->tokens = gu_new_seq(PgfToken, 1, after->pool);
|
||||||
|
gu_seq_set(sks->tokens, PgfToken, 0, after->ts->tok);
|
||||||
|
|
||||||
|
item->seq_idx++;
|
||||||
|
#ifdef PGF_PARSER_DEBUG
|
||||||
|
item->end++;
|
||||||
|
#endif
|
||||||
|
pgf_parsing_add_transition(before, after, after->ts->tok, item);
|
||||||
|
} else {
|
||||||
PgfCIdMap* meta_child_probs =
|
PgfCIdMap* meta_child_probs =
|
||||||
item->base->ccat->cnccat->abscat->meta_child_probs;
|
item->base->ccat->cnccat->abscat->meta_child_probs;
|
||||||
if (meta_child_probs != NULL) {
|
if (meta_child_probs != NULL) {
|
||||||
@@ -1414,16 +1492,15 @@ pgf_match_meta(PgfConcr* concr, PgfItem *item, PgfToken tok,
|
|||||||
after->ts->lexicon_idx, item,
|
after->ts->lexicon_idx, item,
|
||||||
NULL);
|
NULL);
|
||||||
fprintf(stderr, "------------------------------------\n");*/
|
fprintf(stderr, "------------------------------------\n");*/
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
static PgfLiteralCallback pgf_meta_callback =
|
}
|
||||||
{ pgf_match_meta } ;
|
default:
|
||||||
|
gu_impossible();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_parsing_proceed(PgfParseState* state, void** output) {
|
pgf_parsing_proceed(PgfParseState* state, void** output) {
|
||||||
@@ -1484,13 +1561,19 @@ pgf_new_parsing(PgfConcr* concr, GuPool* pool)
|
|||||||
ps->max_fid = concr->max_fid;
|
ps->max_fid = concr->max_fid;
|
||||||
ps->item_count = 0;
|
ps->item_count = 0;
|
||||||
|
|
||||||
PgfProductionExtern* pext =
|
PgfExprMeta *expr_meta =
|
||||||
gu_new_variant(PGF_PRODUCTION_EXTERN,
|
gu_new_variant(PGF_EXPR_META,
|
||||||
PgfProductionExtern,
|
PgfExprMeta,
|
||||||
|
&ps->meta_var, pool);
|
||||||
|
expr_meta->id = 0;
|
||||||
|
|
||||||
|
PgfProductionMeta* pmeta =
|
||||||
|
gu_new_variant(PGF_PRODUCTION_META,
|
||||||
|
PgfProductionMeta,
|
||||||
&ps->meta_prod, pool);
|
&ps->meta_prod, pool);
|
||||||
pext->fun = NULL;
|
pmeta->ep = NULL;
|
||||||
pext->args = gu_new_seq(PgfPArg, 0, pool);
|
pmeta->lins = gu_null_seq;
|
||||||
pext->callback = &pgf_meta_callback;
|
pmeta->args = gu_new_seq(PgfPArg, 0, pool);
|
||||||
|
|
||||||
return ps;
|
return ps;
|
||||||
}
|
}
|
||||||
@@ -1507,7 +1590,6 @@ pgf_new_parse_state(PgfParsing* ps,
|
|||||||
state->agenda = gu_new_buf(PgfItem*, pool);
|
state->agenda = gu_new_buf(PgfItem*, pool);
|
||||||
state->generated_cats = gu_map_type_new(PgfGenCatMap, pool);
|
state->generated_cats = gu_map_type_new(PgfGenCatMap, pool);
|
||||||
state->conts_map = gu_map_type_new(PgfContsMap, pool);
|
state->conts_map = gu_map_type_new(PgfContsMap, pool);
|
||||||
state->meta_ep = NULL;
|
|
||||||
state->offset = offset;
|
state->offset = offset;
|
||||||
state->ps = ps;
|
state->ps = ps;
|
||||||
state->ts = ts;
|
state->ts = ts;
|
||||||
@@ -1615,11 +1697,24 @@ pgf_result_cat_init(PgfParseResult* pr,
|
|||||||
|
|
||||||
PgfExprState *st = gu_new(PgfExprState, pr->tmp_pool);
|
PgfExprState *st = gu_new(PgfExprState, pr->tmp_pool);
|
||||||
st->cont = cont;
|
st->cont = cont;
|
||||||
st->expr = pext->fun->ep->expr;
|
st->expr = pext->ep->expr;
|
||||||
st->args = pext->args;
|
st->args = gu_empty_seq();
|
||||||
st->arg_idx = 0;
|
st->arg_idx = 0;
|
||||||
|
|
||||||
PgfExprQState q = {st, cont_prob + pext->fun->ep->prob};
|
PgfExprQState q = {st, cont_prob + pext->ep->prob};
|
||||||
|
gu_buf_heap_push(pr->pqueue, &pgf_expr_qstate_order, &q);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PGF_PRODUCTION_META: {
|
||||||
|
PgfProductionMeta* pmeta = pi.data;
|
||||||
|
|
||||||
|
PgfExprState *st = gu_new(PgfExprState, pr->tmp_pool);
|
||||||
|
st->cont = cont;
|
||||||
|
st->expr = pmeta->ep->expr;
|
||||||
|
st->args = pmeta->args;
|
||||||
|
st->arg_idx = 0;
|
||||||
|
|
||||||
|
PgfExprQState q = {st, cont_prob + pmeta->ep->prob};
|
||||||
size_t n_args = gu_seq_length(st->args);
|
size_t n_args = gu_seq_length(st->args);
|
||||||
for (size_t k = 0; k < n_args; k++) {
|
for (size_t k = 0; k < n_args; k++) {
|
||||||
PgfPArg* parg = gu_seq_index(st->args, PgfPArg, k);
|
PgfPArg* parg = gu_seq_index(st->args, PgfPArg, k);
|
||||||
@@ -1629,6 +1724,8 @@ pgf_result_cat_init(PgfParseResult* pr,
|
|||||||
gu_buf_heap_push(pr->pqueue, &pgf_expr_qstate_order, &q);
|
gu_buf_heap_push(pr->pqueue, &pgf_expr_qstate_order, &q);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
default:
|
||||||
|
gu_impossible();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1656,18 +1753,11 @@ pgf_parse_result_next(PgfParseResult* pr, GuPool* pool)
|
|||||||
PgfPArg* arg = gu_seq_index(q.st->args, PgfPArg, q.st->arg_idx);
|
PgfPArg* arg = gu_seq_index(q.st->args, PgfPArg, q.st->arg_idx);
|
||||||
float cont_prob = q.prob - arg->ccat->viterbi_prob;
|
float cont_prob = q.prob - arg->ccat->viterbi_prob;
|
||||||
if (arg->ccat->fid < pr->state->ps->concr->total_cats) {
|
if (arg->ccat->fid < pr->state->ps->concr->total_cats) {
|
||||||
PgfExpr arg_meta;
|
|
||||||
PgfExprMeta *expr_meta =
|
|
||||||
gu_new_variant(PGF_EXPR_META,
|
|
||||||
PgfExprMeta,
|
|
||||||
&arg_meta, pool);
|
|
||||||
expr_meta->id = 0;
|
|
||||||
|
|
||||||
q.st->expr =
|
q.st->expr =
|
||||||
gu_new_variant_i(pool, PGF_EXPR_APP,
|
gu_new_variant_i(pool, PGF_EXPR_APP,
|
||||||
PgfExprApp,
|
PgfExprApp,
|
||||||
.fun = q.st->expr,
|
.fun = q.st->expr,
|
||||||
.arg = arg_meta);
|
.arg = pr->state->ps->meta_var);
|
||||||
q.st->arg_idx++;
|
q.st->arg_idx++;
|
||||||
gu_buf_heap_push(pr->pqueue, &pgf_expr_qstate_order, &q);
|
gu_buf_heap_push(pr->pqueue, &pgf_expr_qstate_order, &q);
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
Reference in New Issue
Block a user