diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am index 35d79afea..3d7353f03 100644 --- a/src/runtime/c/Makefile.am +++ b/src/runtime/c/Makefile.am @@ -48,6 +48,7 @@ pgfinclude_HEADERS = \ pgf/linearize.h \ pgf/parser.h \ pgf/lexer.h \ + pgf/literals.h \ pgf/pgf.h libgu_la_SOURCES = \ @@ -90,6 +91,8 @@ libpgf_la_SOURCES = \ pgf/parser.h \ pgf/lexer.c \ pgf/lexer.h \ + pgf/literals.c \ + pgf/literals.h \ pgf/reader.c \ pgf/linearize.c \ pgf/printer.c diff --git a/src/runtime/c/pgf/data.c b/src/runtime/c/pgf/data.c index 9f82c2e96..36729b23f 100644 --- a/src/runtime/c/pgf/data.c +++ b/src/runtime/c/pgf/data.c @@ -125,8 +125,10 @@ GU_DEFINE_TYPE( PGF_PRODUCTION_COERCE, PgfProductionCoerce, GU_MEMBER(PgfProductionCoerce, coerce, PgfCCatId)), GU_CONSTRUCTOR_S( - PGF_PRODUCTION_META, PgfProductionMeta, - GU_MEMBER(PgfProductionMeta, args, PgfPArgs))); + PGF_PRODUCTION_EXTERN, PgfProductionExtern, + GU_MEMBER(PgfProductionExtern, fun, PgfFunId), + GU_MEMBER(PgfProductionExtern, args, PgfPArgs), + GU_MEMBER(PgfProductionExtern, callback, PgfLiteralCallback))); GU_DEFINE_TYPE(PgfProductions, GuList, gu_type(PgfProduction)); GU_DEFINE_TYPE(PgfProductionSeq, GuSeq, gu_type(PgfProduction)); diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index 4af0a6614..f1c107efd 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -209,6 +209,17 @@ extern GU_DECLARE_TYPE(PgfTransitions, GuStringMap); typedef GuMap PgfEpsilonIdx; extern GU_DECLARE_TYPE(PgfEpsilonIdx, GuMap); +typedef struct PgfLiteralCallback PgfLiteralCallback; +extern GU_DECLARE_TYPE(PgfLiteralCallback, struct); + +struct PgfLiteralCallback { + bool (*match)(PgfLiteralCallback* self, int lin_idx, PgfTokens toks, + PgfExprProb** out_ep, GuPool *pool); +}; + +typedef GuMap PgfCallbacksMap; +extern GU_DECLARE_TYPE(PgfCallbacksMap, GuMap); + struct PgfConcr { PgfFlags* cflags; PgfPrintNames* printnames; @@ -220,6 +231,7 @@ struct PgfConcr { PgfCncFuns* cncfuns; PgfSequences* sequences; PgfCIdMap* cnccats; + PgfCallbacksMap* callbacks; int total_cats; int max_fid; }; @@ -269,7 +281,7 @@ typedef struct PgfSymbolKP typedef enum { PGF_PRODUCTION_APPLY, PGF_PRODUCTION_COERCE, - PGF_PRODUCTION_META + PGF_PRODUCTION_EXTERN } PgfProductionTag; typedef struct PgfPArg PgfPArg; @@ -299,14 +311,10 @@ typedef struct PgfProductionCoerce } PgfProductionCoerce; typedef struct { - PgfExpr expr; // XXX - GuLength n_toks; - GuString toks[]; // XXX -} PgfProductionConst; - -typedef struct { + PgfFunId fun; PgfPArgs args; -} PgfProductionMeta; + PgfLiteralCallback *callback; +} PgfProductionExtern; extern GU_DECLARE_TYPE(PgfPatt, GuVariant); diff --git a/src/runtime/c/pgf/linearize.c b/src/runtime/c/pgf/linearize.c index 15b7ba3cc..e77eb13b9 100644 --- a/src/runtime/c/pgf/linearize.c +++ b/src/runtime/c/pgf/linearize.c @@ -27,6 +27,7 @@ #include #include #include +#include typedef GuStringMap PgfLinInfer; typedef GuSeq PgfProdSeq; @@ -338,29 +339,6 @@ finish: return ret; } -PgfCCat* -pgf_literal_cat(PgfLzn* lzn, PgfLiteral lit) -{ - int fid; - - switch (gu_variant_tag(lit)) { - case PGF_LITERAL_STR: - fid = -1; - break; - case PGF_LITERAL_INT: - fid = -2; - break; - case PGF_LITERAL_FLT: - fid = -3; - break; - default: - gu_impossible(); - return NULL; - } - - return gu_map_get(lzn->concr->ccats, &fid, PgfCCat*); -} - static PgfCCat* pgf_lzn_infer(PgfLzn* lzn, PgfExpr expr, GuPool* pool, PgfCncTree* ctree_out) { @@ -380,7 +358,7 @@ pgf_lzn_infer(PgfLzn* lzn, PgfExpr expr, GuPool* pool, PgfCncTree* ctree_out) PgfCncTreeLit, .lit = elit->lit); } - ret = pgf_literal_cat(lzn, elit->lit); + ret = pgf_literal_cat(lzn->concr, elit->lit); } default: // XXX: should we do something here? diff --git a/src/runtime/c/pgf/literals.c b/src/runtime/c/pgf/literals.c new file mode 100644 index 000000000..ffc5687fb --- /dev/null +++ b/src/runtime/c/pgf/literals.c @@ -0,0 +1,263 @@ +#include +#include +#include + +GU_DEFINE_TYPE(PgfLiteralCallback, struct); + +GU_DEFINE_TYPE(PgfCallbacksMap, GuMap, + gu_type(PgfCncCat), NULL, + gu_ptr_type(PgfLiteralCallback), &gu_null_struct); + + +static bool +pgf_match_string_lit(PgfLiteralCallback* self, int lin_idx, PgfTokens toks, + PgfExprProb** out_ep, GuPool *pool) +{ + gu_assert(lin_idx == 0); + + if (gu_seq_length(toks) == 1) { + *out_ep = NULL; + return true; + } else if (gu_seq_length(toks) == 2) { + PgfExprProb* ep = gu_new(PgfExprProb, pool); + ep->prob = 0; + + PgfExprLit *expr_lit = + gu_new_variant(PGF_EXPR_LIT, + PgfExprLit, + &ep->expr, pool); + PgfLiteralStr *lit_str = + gu_new_variant(PGF_LITERAL_STR, + PgfLiteralStr, + &expr_lit->lit, pool); + lit_str->val = gu_seq_get(toks, PgfToken, 0); + + *out_ep = ep; + return false; + } else { + *out_ep = NULL; + return false; + } +} + +static PgfLiteralCallback pgf_string_literal_callback = + { pgf_match_string_lit } ; + + + +static bool +pgf_match_int_lit(PgfLiteralCallback* self, int lin_idx, PgfTokens toks, + PgfExprProb** out_ep, GuPool *pool) +{ + gu_assert(lin_idx == 0); + + size_t n_toks = gu_seq_length(toks); + if (n_toks == 1) { + PgfToken tok = gu_seq_get(toks, PgfToken, 0); + + int val; + + *out_ep = NULL; + return gu_string_to_int(tok, &val); + } else if (n_toks == 2) { + PgfToken tok = gu_seq_get(toks, PgfToken, 0); + + int val; + if (!gu_string_to_int(tok, &val)) { + *out_ep = NULL; + return false; + } + + PgfExprProb* ep = gu_new(PgfExprProb, pool); + ep->prob = 0; + + PgfExprLit *expr_lit = + gu_new_variant(PGF_EXPR_LIT, + PgfExprLit, + &ep->expr, pool); + PgfLiteralInt *lit_int = + gu_new_variant(PGF_LITERAL_INT, + PgfLiteralInt, + &expr_lit->lit, pool); + lit_int->val = val; + + *out_ep = ep; + return false; + } else { + *out_ep = NULL; + return false; + } +} + +static PgfLiteralCallback pgf_int_literal_callback = + { pgf_match_int_lit } ; + + + +static bool +pgf_match_float_lit(PgfLiteralCallback* self, int lin_idx, PgfTokens toks, + PgfExprProb** out_ep, GuPool *pool) +{ + gu_assert(lin_idx == 0); + + size_t n_toks = gu_seq_length(toks); + if (n_toks == 1) { + PgfToken tok = gu_seq_get(toks, PgfToken, 0); + + double val; + + *out_ep = NULL; + return gu_string_to_double(tok, &val); + } else if (n_toks == 2) { + PgfToken tok = gu_seq_get(toks, PgfToken, 0); + + double val; + if (!gu_string_to_double(tok, &val)) { + *out_ep = NULL; + return false; + } + + PgfExprProb* ep = gu_new(PgfExprProb, pool); + ep->prob = 0; + + PgfExprLit *expr_lit = + gu_new_variant(PGF_EXPR_LIT, + PgfExprLit, + &ep->expr, pool); + PgfLiteralFlt *lit_flt = + gu_new_variant(PGF_LITERAL_FLT, + PgfLiteralFlt, + &expr_lit->lit, pool); + lit_flt->val = val; + + *out_ep = ep; + return false; + } else { + *out_ep = NULL; + return false; + } +} + +static PgfLiteralCallback pgf_float_literal_callback = + { pgf_match_float_lit } ; + + + +static bool +pgf_match_name_lit(PgfLiteralCallback* self, int lin_idx, PgfTokens toks, + PgfExprProb** out_ep, GuPool *pool) +{ + gu_assert(lin_idx == 0); + + size_t n_toks = gu_seq_length(toks); + + if (n_toks == 0) { + *out_ep = NULL; + return false; + } + + PgfToken tok = gu_seq_get(toks, PgfToken, n_toks-1); + + GuPool* tmp_pool = gu_new_pool(); + GuReader* rdr = gu_string_reader(tok, tmp_pool); + GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool); + + bool iscap = iswupper(gu_read_ucs(rdr, err)); + if (!iscap && n_toks > 1) { + GuStringBuf *sbuf = gu_string_buf(tmp_pool); + GuWriter* wtr = gu_string_buf_writer(sbuf); + + for (size_t i = 0; i < n_toks-1; i++) { + if (i > 0) + gu_putc(' ', wtr, err); + + tok = gu_seq_get(toks, PgfToken, i); + gu_string_write(tok, wtr, err); + } + + PgfExprProb* ep = gu_new(PgfExprProb, pool); + ep->prob = 0; + + PgfExprApp *expr_app = + gu_new_variant(PGF_EXPR_APP, + PgfExprApp, + &ep->expr, pool); + PgfExprFun *expr_fun = + gu_new_variant(PGF_EXPR_FUN, + PgfExprFun, + &expr_app->fun, pool); + expr_fun->fun = gu_str_string("MkSymb", pool); + PgfExprLit *expr_lit = + gu_new_variant(PGF_EXPR_LIT, + PgfExprLit, + &expr_app->arg, pool); + PgfLiteralStr *lit_str = + gu_new_variant(PGF_LITERAL_STR, + PgfLiteralStr, + &expr_lit->lit, pool); + lit_str->val = gu_string_buf_freeze(sbuf, pool); + + *out_ep = ep; + } + + gu_pool_free(tmp_pool); + + return iscap; +} + +PgfLiteralCallback pgf_nerc_literal_callback = + { pgf_match_name_lit } ; + + +PgfCallbacksMap* +pgf_new_callbacks_map(PgfConcr* concr, GuPool *pool) +{ + int fid; + PgfCCat* ccat; + + PgfCallbacksMap* callbacks = + gu_map_type_new(PgfCallbacksMap, pool); + + fid = -1; + ccat = gu_map_get(concr->ccats, &fid, PgfCCat*); + if (ccat != NULL) + gu_map_put(callbacks, ccat->cnccat, + PgfLiteralCallback*, &pgf_string_literal_callback); + + fid = -2; + ccat = gu_map_get(concr->ccats, &fid, PgfCCat*); + if (ccat != NULL) + gu_map_put(callbacks, ccat->cnccat, + PgfLiteralCallback*, &pgf_int_literal_callback); + + fid = -3; + ccat = gu_map_get(concr->ccats, &fid, PgfCCat*); + if (ccat != NULL) + gu_map_put(callbacks, ccat->cnccat, + PgfLiteralCallback*, &pgf_float_literal_callback); + + return callbacks; +} + +PgfCCat* +pgf_literal_cat(PgfConcr* concr, PgfLiteral lit) +{ + int fid; + + switch (gu_variant_tag(lit)) { + case PGF_LITERAL_STR: + fid = -1; + break; + case PGF_LITERAL_INT: + fid = -2; + break; + case PGF_LITERAL_FLT: + fid = -3; + break; + default: + gu_impossible(); + return NULL; + } + + return gu_map_get(concr->ccats, &fid, PgfCCat*); +} diff --git a/src/runtime/c/pgf/literals.h b/src/runtime/c/pgf/literals.h new file mode 100644 index 000000000..88f9304a1 --- /dev/null +++ b/src/runtime/c/pgf/literals.h @@ -0,0 +1,15 @@ +#ifndef PGF_LITERALS_H_ +#define PGF_LITERALS_H_ + +#include + +PgfCallbacksMap* +pgf_new_callbacks_map(PgfConcr* concr, GuPool *pool); + +// literal for named entities recognition +extern PgfLiteralCallback pgf_nerc_literal_callback; + +PgfCCat* +pgf_literal_cat(PgfConcr* concr, PgfLiteral lit); + +#endif // PGF_LITERALS_H_ diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 5cd6e2bda..9b9496345 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -94,32 +94,73 @@ GU_DEFINE_TYPE(PgfTransitions, GuStringMap, typedef struct PgfParsing PgfParsing; -typedef struct { - PgfTokens tokens; - PgfExprProb ep; -} PgfLiteralCandidate; - typedef const struct PgfLexCallback PgfLexCallback; struct PgfLexCallback { void (*lex)(PgfLexCallback* self, PgfToken tok, PgfItem* item); - GuEnum *(*lit)(PgfLexCallback* self, PgfCCat* cat); }; struct PgfParsing { GuPool* pool; GuPool* tmp_pool; + PgfConcr* concr; PgfContsMap* conts_map; PgfGenCatMap* generated_cats; PgfCCatBuf* completed; PgfLexCallback* callback; PgfItemBuf *lexicon_idx; - PgfEpsilonIdx *epsilon_idx; PgfItemBuf *metas; + PgfToken tok; int max_fid; }; +static PgfSymbol +pgf_prev_extern_sym(PgfSymbol sym) +{ + GuVariantInfo i = gu_variant_open(sym); + switch (i.tag) { + case PGF_SYMBOL_CAT: + return *((PgfSymbol*) (((PgfSymbolCat*) i.data)+1)); + case PGF_SYMBOL_KP: + return *((PgfSymbol*) (((PgfSymbolKP*) i.data)+1)); + case PGF_SYMBOL_KS: + return *((PgfSymbol*) (((PgfSymbolKS*) i.data)+1)); + case PGF_SYMBOL_LIT: + return *((PgfSymbol*) (((PgfSymbolLit*) i.data)+1)); + case PGF_SYMBOL_VAR: + return *((PgfSymbol*) (((PgfSymbolVar*) i.data)+1)); + default: + gu_impossible(); + return gu_null_variant; + } +} + #ifdef PGF_PARSER_DEBUG +static void +pgf_print_production_args(PgfPArgs args, + GuWriter* wtr, GuExn* err) +{ + size_t n_args = gu_seq_length(args); + for (size_t j = 0; j < n_args; j++) { + if (j > 0) + gu_putc(',',wtr,err); + + PgfPArg arg = gu_seq_get(args, PgfPArg, j); + + if (arg.hypos != NULL && + gu_list_length(arg.hypos) > 0) { + size_t n_hypos = gu_list_length(arg.hypos); + for (size_t k = 0; k < n_hypos; k++) { + PgfCCat *hypo = gu_list_index(arg.hypos, k); + gu_printf(wtr,err,"C%d ",hypo->fid); + } + gu_printf(wtr,err,"-> "); + } + + gu_printf(wtr,err,"C%d",arg.ccat->fid); + } +} + static void pgf_print_production(int fid, PgfProduction prod, GuWriter *wtr, GuExn* err) { @@ -132,25 +173,7 @@ pgf_print_production(int fid, PgfProduction prod, GuWriter *wtr, GuExn* err) gu_printf(wtr,err,"F%d(",papp->fun->funid); pgf_print_expr(papp->fun->ep->expr, 0, wtr, err); gu_printf(wtr,err,")["); - size_t n_args = gu_seq_length(papp->args); - for (size_t j = 0; j < n_args; j++) { - if (j > 0) - gu_putc(',',wtr,err); - - PgfPArg arg = gu_seq_get(papp->args, PgfPArg, j); - - if (arg.hypos != NULL) { - size_t n_hypos = gu_list_length(arg.hypos); - for (size_t k = 0; k < n_hypos; k++) { - if (k > 0) - gu_putc(' ',wtr,err); - PgfCCat *hypo = gu_list_index(arg.hypos, k); - gu_printf(wtr,err,"C%d",hypo->fid); - } - } - - gu_printf(wtr,err,"C%d",arg.ccat->fid); - } + pgf_print_production_args(papp->args,wtr,err); gu_printf(wtr,err,"]\n"); break; } @@ -159,17 +182,12 @@ pgf_print_production(int fid, PgfProduction prod, GuWriter *wtr, GuExn* err) gu_printf(wtr,err,"_[C%d]\n",pcoerce->coerce->fid); break; } - case PGF_PRODUCTION_META: { - PgfProductionMeta* pmeta = i.data; - gu_printf(wtr,err,"?["); - size_t n_args = gu_seq_length(pmeta->args); - for (size_t j = 0; j < n_args; j++) { - if (j > 0) - gu_putc(',',wtr,err); - - PgfCCat *arg = gu_seq_get(pmeta->args, PgfCCat*, j); - gu_printf(wtr,err,"C%d",arg->fid); - } + case PGF_PRODUCTION_EXTERN: { + PgfProductionExtern* pext = i.data; + gu_printf(wtr,err,"("); + pgf_print_expr(pext->fun->ep->expr, 0, wtr, err); + gu_printf(wtr,err,")["); + pgf_print_production_args(pext->args,wtr,err); gu_printf(wtr,err,"]\n"); break; } @@ -181,6 +199,51 @@ pgf_print_production(int fid, PgfProduction prod, GuWriter *wtr, GuExn* err) void pgf_print_symbol(PgfSymbol sym, GuWriter *wtr, GuExn *err); +static int +pgf_print_extern_seq(PgfSymbol sym, int seq_idx, + GuWriter* wtr, GuExn* err) +{ + if (gu_variant_is_null(sym)) + return 0; + + PgfSymbol prev = pgf_prev_extern_sym(sym); + + int index = pgf_print_extern_seq(prev, seq_idx, wtr, err); + if (index == seq_idx) + gu_printf(wtr, err, " . "); + + pgf_print_symbol(sym, wtr, err); + + return index+1; +} + +static void +pgf_print_item_seq(PgfCncFun *fun, PgfItem *item, + GuWriter* wtr, GuExn* err) +{ + size_t index; + PgfSequence seq; + + gu_printf(wtr, err, "%d : ",item->base->lin_idx); + + if (fun != NULL && + !gu_seq_is_null(seq = fun->lins[item->base->lin_idx])) { + for (index = 0; index < gu_seq_length(seq); index++) { + if (item->seq_idx == index) + gu_printf(wtr, err, " . "); + + PgfSymbol *sym = gu_seq_index(seq, PgfSymbol, index); + pgf_print_symbol(*sym, wtr, err); + } + } else { + index = pgf_print_extern_seq(item->curr_sym, item->seq_idx, + wtr, err); + } + + if (item->seq_idx == index) + gu_printf(wtr, err, " ."); +} + static void pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err) { @@ -194,23 +257,9 @@ pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err) gu_printf(wtr, err, "F%d(", fun->funid); pgf_print_expr(fun->ep->expr, 0, wtr, err); gu_printf(wtr, err, ")["); - for (size_t i = 0; i < gu_seq_length(item->args); i++) { - PgfPArg arg = gu_seq_get(item->args, PgfPArg, i); - gu_printf(wtr, err, - ((i < gu_seq_length(item->args)-1) ? "C%d," : "C%d"), - arg.ccat->fid); - } - gu_printf(wtr, err, "]; %d : ",item->base->lin_idx); - PgfSequence seq = fun->lins[item->base->lin_idx]; - for (size_t i = 0; i < gu_seq_length(seq); i++) { - if (i == item->seq_idx) - gu_printf(wtr, err, " . "); - - PgfSymbol *sym = gu_seq_index(seq, PgfSymbol, i); - pgf_print_symbol(*sym, wtr, err); - } - if (item->seq_idx == gu_seq_length(seq)) - gu_printf(wtr, err, " ."); + pgf_print_production_args(item->args, wtr, err); + gu_printf(wtr, err, "]; "); + pgf_print_item_seq(fun, item, wtr, err); break; } case PGF_PRODUCTION_COERCE: { @@ -224,15 +273,18 @@ pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err) gu_printf(wtr, err, " ."); break; } - case PGF_PRODUCTION_META: { - gu_printf(wtr, err, "?["); - for (size_t i = 0; i < gu_seq_length(item->args); i++) { - PgfPArg arg = gu_seq_get(item->args, PgfPArg, i); - gu_printf(wtr, err, - ((i < gu_seq_length(item->args)-1) ? "C%d," : "C%d"), - arg.ccat->fid); - } - gu_printf(wtr, err, "]; %d : %d",item->base->lin_idx, item->seq_idx); + case PGF_PRODUCTION_EXTERN: { + PgfProductionExtern* pext = i.data; + gu_printf(wtr, err, ""); + if (pext->fun != NULL) { + gu_printf(wtr, err, "("); + pgf_print_expr(pext->fun->ep->expr, 0, wtr, err); + gu_printf(wtr, err, ")"); + } + gu_printf(wtr, err, "["); + pgf_print_production_args(item->args, wtr, err); + gu_printf(wtr, err, "]; "); + pgf_print_item_seq(pext->fun, item, wtr, err); break; } default: @@ -311,42 +363,41 @@ pgf_parsing_get_completed(PgfParsing* parsing, PgfItemBuf* conts) return gu_map_get(parsing->generated_cats, conts, PgfCCat*); } -static PgfSymbol -pgf_item_base_symbol(PgfItemBase* ibase, size_t seq_idx, GuPool* pool) +static void +pgf_item_set_curr_symbol(PgfItem* item, GuPool* pool) { - GuVariantInfo i = gu_variant_open(ibase->prod); + GuVariantInfo i = gu_variant_open(item->base->prod); switch (i.tag) { case PGF_PRODUCTION_APPLY: { PgfProductionApply* papp = i.data; PgfCncFun* fun = papp->fun; - gu_assert(ibase->lin_idx < fun->n_lins); - PgfSequence seq = fun->lins[ibase->lin_idx]; - gu_assert(seq_idx <= gu_seq_length(seq)); - if (seq_idx == gu_seq_length(seq)) { - return gu_null_variant; + gu_assert(item->base->lin_idx < fun->n_lins); + PgfSequence seq = fun->lins[item->base->lin_idx]; + gu_assert(item->seq_idx <= gu_seq_length(seq)); + if (item->seq_idx == gu_seq_length(seq)) { + item->curr_sym = gu_null_variant; } else { - return gu_seq_get(seq, PgfSymbol, seq_idx); + item->curr_sym = gu_seq_get(seq, PgfSymbol, item->seq_idx); } break; } case PGF_PRODUCTION_COERCE: { - gu_assert(seq_idx <= 1); - if (seq_idx == 1) { - return gu_null_variant; + gu_assert(item->seq_idx <= 1); + if (item->seq_idx == 1) { + item->curr_sym = gu_null_variant; } else { - return gu_new_variant_i(pool, PGF_SYMBOL_CAT, + item->curr_sym = gu_new_variant_i(pool, PGF_SYMBOL_CAT, PgfSymbolCat, - .d = 0, .r = ibase->lin_idx); + .d = 0, .r = item->base->lin_idx); } break; } - case PGF_PRODUCTION_META: { - return gu_null_variant; + case PGF_PRODUCTION_EXTERN: { + break; } default: gu_impossible(); } - return gu_null_variant; } static PgfItem* @@ -375,19 +426,21 @@ pgf_new_item(PgfCCat* ccat, size_t lin_idx, parg->ccat = pcoerce->coerce; break; } - case PGF_PRODUCTION_META: { - PgfProductionMeta* pmeta = pi.data; - item->args = pmeta->args; + case PGF_PRODUCTION_EXTERN: { + PgfProductionExtern* pext = pi.data; + item->args = pext->args; break; } default: gu_impossible(); } item->base = base; - item->curr_sym = pgf_item_base_symbol(item->base, 0, pool); + item->curr_sym = gu_null_variant; item->seq_idx = 0; item->tok_idx = 0; item->alt = 0; + + pgf_item_set_curr_symbol(item, pool); return item; } @@ -425,14 +478,15 @@ static void pgf_item_advance(PgfItem* item, GuPool* pool) { item->seq_idx++; - item->curr_sym = pgf_item_base_symbol(item->base, item->seq_idx, pool); + pgf_item_set_curr_symbol(item, pool); } static void pgf_parsing_item(PgfParsing* parsing, PgfItem* item); static void -pgf_parsing_combine(PgfParsing* parsing, PgfItem* cont, PgfCCat* cat) +pgf_parsing_combine(PgfParsing* parsing, PgfItem* cont, + PgfCCat* cat, int lin_idx) { if (cont == NULL) { gu_buf_push(parsing->completed, PgfCCat*, cat); @@ -464,6 +518,16 @@ pgf_parsing_combine(PgfParsing* parsing, PgfItem* cont, PgfCCat* cat) nargs * sizeof(PgfPArg)); gu_seq_set(item->args, PgfPArg, nargs, ((PgfPArg) { .hypos = NULL, .ccat = cat })); + + PgfSymbol prev = item->curr_sym; + PgfSymbolCat* scat = (PgfSymbolCat*) + gu_alloc_variant(PGF_SYMBOL_CAT, + sizeof(PgfSymbolCat)+sizeof(PgfSymbol), + gu_alignof(PgfSymbolCat), + &item->curr_sym, parsing->pool); + *((PgfSymbol*)(scat+1)) = prev; + scat->d = nargs; + scat->r = lin_idx; } pgf_item_advance(item, parsing->pool); @@ -480,7 +544,7 @@ pgf_parsing_production(PgfParsing* parsing, PgfCCat* ccat, size_t lin_idx, } static PgfProduction -pgf_parsing_new_production(PgfItem* item, GuPool *pool) +pgf_parsing_new_production(PgfItem* item, PgfExprProb *ep, GuPool *pool) { GuVariantInfo i = gu_variant_open(item->base->prod); PgfProduction prod = gu_null_variant; @@ -505,12 +569,53 @@ pgf_parsing_new_production(PgfItem* item, GuPool *pool) new_pcoerce->coerce = parg->ccat; break; } - case PGF_PRODUCTION_META: { - PgfProductionMeta* new_pmeta = - gu_new_variant(PGF_PRODUCTION_META, - PgfProductionMeta, - &prod, pool); - new_pmeta->args = item->args; + case PGF_PRODUCTION_EXTERN: { + PgfProductionExtern* pext = i.data; + PgfCncFun* fun = pext->fun; + + if (fun == NULL || + gu_seq_is_null(fun->lins[item->base->lin_idx])) { + + int seq_len = 0; + PgfSymbol sym = item->curr_sym; + while (!gu_variant_is_null(sym)) { + seq_len++; + sym = pgf_prev_extern_sym(sym); + } + + PgfSequence seq = + gu_new_seq(PgfSymbol, seq_len, pool); + sym = item->curr_sym; + while (!gu_variant_is_null(sym)) { + gu_seq_set(seq, PgfSymbol, --seq_len, sym); + sym = pgf_prev_extern_sym(sym); + } + + PgfCncCat *cnccat = item->base->ccat->cnccat; + size_t size = GU_FLEX_SIZE(PgfCncFun, lins, cnccat->n_lins); + fun = gu_malloc(pool, size); + if (pext->fun == NULL) { + fun->name = gu_empty_string; + fun->ep = ep; + fun->funid = -1; + fun->n_lins = cnccat->n_lins; + + for (size_t i = 0; i < fun->n_lins; i++) { + fun->lins[i] = gu_null_seq; + } + } else { + memcpy(fun, pext->fun, size); + } + fun->lins[item->base->lin_idx] = seq; + } + + PgfProductionExtern* new_pext = + gu_new_variant(PGF_PRODUCTION_EXTERN, + PgfProductionExtern, + &prod, pool); + new_pext->fun = fun; + new_pext->args = item->args; + new_pext->callback = pext->callback; break; } default: @@ -521,10 +626,10 @@ pgf_parsing_new_production(PgfItem* item, GuPool *pool) } static void -pgf_parsing_complete(PgfParsing* parsing, PgfItem* item) +pgf_parsing_complete(PgfParsing* parsing, PgfItem* item, PgfExprProb *ep) { PgfProduction prod = - pgf_parsing_new_production(item, parsing->pool); + pgf_parsing_new_production(item, ep, parsing->pool); PgfItemBuf* conts = item->base->conts; PgfCCat* tmp_cat = pgf_parsing_get_completed(parsing, conts); @@ -575,7 +680,7 @@ pgf_parsing_complete(PgfParsing* parsing, PgfItem* item) size_t n_conts = gu_buf_length(conts); for (size_t i = 0; i < n_conts; i++) { PgfItem* cont = gu_buf_get(conts, PgfItem*, i); - pgf_parsing_combine(parsing, cont, cat); + pgf_parsing_combine(parsing, cont, cat, item->base->lin_idx); } } } @@ -624,8 +729,8 @@ pgf_parsing_td_predict(PgfParsing* parsing, PgfItem* item, // Bottom-up prediction for epsilon rules PgfCFCat cfc = {ccat->fid, lin_idx}; - PgfCCat* eps_ccat = gu_map_get(parsing->epsilon_idx, &cfc, PgfCCat*); - + PgfCCat* eps_ccat = gu_map_get(parsing->concr->epsilon_idx, + &cfc, PgfCCat*); if (eps_ccat != NULL) { size_t n_prods = gu_seq_length(eps_ccat->prods); for (size_t i = 0; i < n_prods; i++) { @@ -650,7 +755,7 @@ pgf_parsing_td_predict(PgfParsing* parsing, PgfItem* item, PgfCCat* completed = pgf_parsing_get_completed(parsing, conts); if (completed) { - pgf_parsing_combine(parsing, item, completed); + pgf_parsing_combine(parsing, item, completed, lin_idx); } } gu_exit(NULL); @@ -780,61 +885,44 @@ pgf_parsing_symbol(PgfParsing* parsing, PgfItem* item, PgfSymbol sym) { break; } case PGF_SYMBOL_LIT: { - PgfSymbolLit* slit = gu_variant_data(sym); - PgfPArg* parg = gu_seq_index(item->args, PgfPArg, slit->d); + if (!gu_string_eq(parsing->tok, gu_empty_string)) { + PgfSymbolLit* slit = gu_variant_data(sym); + PgfPArg* parg = gu_seq_index(item->args, PgfPArg, slit->d); + gu_assert(!parg->hypos || !parg->hypos->len); - PgfCncCat *cnccat = parg->ccat->cnccat; + if (parg->ccat->fid > 0 && + parg->ccat->fid >= parsing->concr->total_cats) + pgf_parsing_td_predict(parsing, item, parg->ccat, slit->r); + else { + PgfItemBuf* conts = + pgf_parsing_get_conts(parsing->conts_map, + parg->ccat, slit->r, + parsing->pool, parsing->tmp_pool); + gu_buf_push(conts, PgfItem*, item); - // the linearization category must be {s : Str} - gu_assert(cnccat->n_lins == 1); - gu_assert(gu_list_length(cnccat->cats) == 1); + if (gu_buf_length(conts) == 1) { + /* This is the first time when we encounter this + * literal category so we must call the callback */ - PgfItemBuf* conts = - pgf_parsing_get_conts(parsing->conts_map, - parg->ccat, slit->r, - parsing->pool, parsing->tmp_pool); - gu_buf_push(conts, PgfItem*, item); - if (gu_buf_length(conts) == 1) { - /* This is the first time when we encounter this - * literal category so we must call the callback */ - - GuEnum* en = parsing->callback->lit(parsing->callback, parg->ccat); - for (;;) { - PgfLiteralCandidate* candidate = - gu_next(en, PgfLiteralCandidate*, parsing->pool); - if (candidate == NULL) - break; + PgfLiteralCallback* callback = + gu_map_get(parsing->concr->callbacks, + parg->ccat->cnccat, + PgfLiteralCallback*); - PgfSymbol sym = gu_null_variant; - PgfSymbolKS* sks = - gu_new_variant(PGF_SYMBOL_KS, - PgfSymbolKS, - &sym, parsing->pool); - sks->tokens = candidate->tokens; + if (callback != NULL) { + PgfProduction prod; + PgfProductionExtern* pext = + gu_new_variant(PGF_PRODUCTION_EXTERN, + PgfProductionExtern, + &prod, parsing->pool); + pext->fun = NULL; + pext->args = gu_new_seq(PgfPArg, 0, parsing->pool); + pext->callback = callback; - PgfSequence seq = gu_new_seq(PgfSymbol, 1, parsing->pool); - gu_seq_set(seq, PgfSymbol, 0, sym); - - PgfCncFun* fun = - gu_malloc(parsing->pool, - sizeof(PgfCncFun)+ - sizeof(PgfSequence*)*cnccat->n_lins); - fun->name = gu_empty_string; - fun->ep = &candidate->ep; - fun->funid = -1; - fun->n_lins = cnccat->n_lins; - fun->lins[0] = seq; - - PgfProduction prod; - PgfProductionApply* papp = - gu_new_variant(PGF_PRODUCTION_APPLY, - PgfProductionApply, - &prod, parsing->pool); - papp->fun = fun; - papp->args = gu_new_seq(PgfPArg, 0, parsing->pool); - - pgf_parsing_production(parsing, parg->ccat, slit->r, - prod, conts); + pgf_parsing_production(parsing, parg->ccat, slit->r, + prod, conts); + } + } } } break; @@ -847,6 +935,55 @@ pgf_parsing_symbol(PgfParsing* parsing, PgfItem* item, PgfSymbol sym) { } } +static void +pgf_foo(PgfParsing* parsing, PgfItem* item, + PgfLiteralCallback* callback, + PgfExprProb** out_ep, + bool* out_accepted) +{ + PgfTokens toks; + if (gu_variant_is_null(item->curr_sym)) { + toks = gu_new_seq(PgfToken, 1, parsing->pool); + gu_seq_set(toks, PgfToken, 0, parsing->tok); + } else { + GuVariantInfo i = gu_variant_open(item->curr_sym); + gu_assert(i.tag == PGF_SYMBOL_KS); + PgfTokens old_toks = ((PgfSymbolKS*) i.data)->tokens; + + size_t n_toks = gu_seq_length(old_toks); + toks = gu_new_seq(PgfToken, n_toks+1, parsing->pool); + for (size_t i = 0; i < n_toks; i++) { + gu_seq_set(toks, PgfToken, i, + gu_seq_get(old_toks, PgfToken, i)); + } + gu_seq_set(toks, PgfToken, n_toks, parsing->tok); + } + + PgfExprProb *ep = NULL; + bool accepted = + callback->match(callback, + item->base->lin_idx, toks, &ep, + parsing->pool); + + if (accepted) { + if (gu_variant_is_null(item->curr_sym)) + item->seq_idx = 1; + + PgfSymbolKS* sks = (PgfSymbolKS*) + gu_alloc_variant(PGF_SYMBOL_KS, + sizeof(PgfSymbolKS)+sizeof(PgfSymbol), + gu_alignof(PgfSymbolKS), + &item->curr_sym, parsing->pool); + *((PgfSymbol*)(sks+1)) = gu_null_variant; + sks->tokens = toks; + + pgf_parsing_add_transition(parsing, parsing->tok, item); + } + + *out_ep = ep; + *out_accepted = accepted; +} + static void pgf_parsing_item(PgfParsing* parsing, PgfItem* item) { @@ -866,7 +1003,7 @@ pgf_parsing_item(PgfParsing* parsing, PgfItem* item) PgfCncFun* fun = papp->fun; PgfSequence seq = fun->lins[item->base->lin_idx]; if (item->seq_idx == gu_seq_length(seq)) { - pgf_parsing_complete(parsing, item); + pgf_parsing_complete(parsing, item, NULL); } else { PgfSymbol sym = gu_seq_get(seq, PgfSymbol, item->seq_idx); @@ -883,16 +1020,74 @@ pgf_parsing_item(PgfParsing* parsing, PgfItem* item) item->base->lin_idx); break; case 1: - pgf_parsing_complete(parsing, item); + pgf_parsing_complete(parsing, item, NULL); break; default: gu_impossible(); } break; } - case PGF_PRODUCTION_META: { - pgf_parsing_complete(parsing, item); - gu_buf_push(parsing->metas, PgfItem*, item); + case PGF_PRODUCTION_EXTERN: { + PgfProductionExtern* pext = i.data; + PgfCncFun* fun = pext->fun; + + PgfSequence seq; + if (fun != NULL && + !gu_seq_is_null(seq = fun->lins[item->base->lin_idx])) { + if (item->seq_idx == gu_seq_length(seq)) { + pgf_parsing_complete(parsing, item, NULL); + } else { + PgfSymbol sym = + gu_seq_get(seq, PgfSymbol, item->seq_idx); + pgf_parsing_symbol(parsing, item, sym); + } + } else { + PgfSymbol prev = gu_null_variant; + + PgfTokens toks; + if (gu_variant_is_null(item->curr_sym) || + gu_variant_tag(item->curr_sym) != PGF_SYMBOL_KS) { + toks = gu_new_seq(PgfToken, 1, parsing->pool); + gu_seq_set(toks, PgfToken, 0, parsing->tok); + prev = item->curr_sym; + } else { + PgfTokens old_toks = + ((PgfSymbolKS*) gu_variant_data(item->curr_sym))->tokens; + prev = pgf_prev_extern_sym(item->curr_sym); + + size_t n_toks = gu_seq_length(old_toks); + toks = gu_new_seq(PgfToken, n_toks+1, parsing->pool); + for (size_t i = 0; i < n_toks; i++) { + gu_seq_set(toks, PgfToken, i, + gu_seq_get(old_toks, PgfToken, i)); + } + gu_seq_set(toks, PgfToken, n_toks, parsing->tok); + } + + PgfExprProb *ep = NULL; + bool accepted = + pext->callback->match(pext->callback, + item->base->lin_idx, toks, &ep, + parsing->pool); + + if (ep != NULL) + pgf_parsing_complete(parsing, item, ep); + + if (accepted) { + if (gu_variant_is_null(item->curr_sym)) + item->seq_idx = 1; + + PgfSymbolKS* sks = (PgfSymbolKS*) + gu_alloc_variant(PGF_SYMBOL_KS, + sizeof(PgfSymbolKS)+sizeof(PgfSymbol), + gu_alignof(PgfSymbolKS), + &item->curr_sym, parsing->pool); + *((PgfSymbol*)(sks+1)) = prev; + sks->tokens = toks; + + pgf_parsing_add_transition(parsing, parsing->tok, item); + } + } break; } default: @@ -905,15 +1100,16 @@ pgf_new_parsing(PgfConcr* concr, PgfLexCallback* callback, int max_fid, GuPool* parse_pool, GuPool* out_pool) { PgfParsing* parsing = gu_new(PgfParsing, out_pool); + parsing->concr = concr; parsing->generated_cats = gu_map_type_new(PgfGenCatMap, out_pool); parsing->conts_map = gu_map_type_new(PgfContsMap, out_pool); parsing->completed = gu_new_buf(PgfCCat*, parse_pool); parsing->callback = callback; parsing->lexicon_idx = NULL; - parsing->epsilon_idx = concr->epsilon_idx; parsing->pool = parse_pool; parsing->tmp_pool = out_pool; parsing->metas = gu_new_buf(PgfItem*, out_pool); + parsing->tok = gu_empty_string; parsing->max_fid = max_fid; return parsing; } @@ -928,29 +1124,10 @@ pgf_new_parse(PgfConcr* concr, int max_fid, GuPool* pool) return parse; } -static void -pgf_lex_noop(PgfLexCallback* self, PgfToken tok, PgfItem* item) -{ -} - -static void -pgf_enum_null(GuEnum* self, void* to, GuPool* pool) -{ - *((PgfLiteralCandidate**) to) = NULL; -} - -static GuEnum* -pgf_lit_noop(PgfLexCallback* self, PgfCCat* ccat) -{ - static GuEnum en = { pgf_enum_null }; - return &en; -} - typedef struct { PgfLexCallback fn; PgfToken tok; PgfItemBuf* agenda; - GuPool *pool; } PgfParseTokenCallback; static void @@ -963,79 +1140,6 @@ pgf_match_token(PgfLexCallback* self, PgfToken tok, PgfItem* item) } } -typedef struct { - GuEnum en; - PgfLiteralCandidate candidate; - size_t idx; -} PgfLitEnum; - -static void -pgf_enum_lits(GuEnum* self, void* to, GuPool* pool) -{ - PgfLitEnum* en = (PgfLitEnum*) self; - - *((PgfLiteralCandidate**) to) = - (en->idx++ > 0) ? NULL : &en->candidate; -} - -static GuEnum* -pgf_match_lit(PgfLexCallback* self, PgfCCat* ccat) -{ - PgfParseTokenCallback *clo = (PgfParseTokenCallback *) self; - - PgfLiteral lit; - - switch (ccat->fid) { - case -1: { - PgfLiteralStr *lit_str = - gu_new_variant(PGF_LITERAL_STR, - PgfLiteralStr, - &lit, clo->pool); - lit_str->val = clo->tok; - break; - } - case -2: { - PgfLiteralInt *lit_int = - gu_new_variant(PGF_LITERAL_INT, - PgfLiteralInt, - &lit, clo->pool); - if (!gu_string_to_int(clo->tok, &lit_int->val)) - return pgf_lit_noop(self, ccat); - break; - } - case -3: { - PgfLiteralFlt *lit_flt = - gu_new_variant(PGF_LITERAL_FLT, - PgfLiteralFlt, - &lit, clo->pool); - if (!gu_string_to_double(clo->tok, &lit_flt->val)) - return pgf_lit_noop(self, ccat); - break; - } - default: - gu_impossible(); - } - - PgfTokens tokens = gu_new_seq(PgfToken, 1, clo->pool); - gu_seq_set(tokens, PgfToken, 0, clo->tok); - - PgfExpr expr = gu_null_variant; - PgfExprLit *expr_lit = - gu_new_variant(PGF_EXPR_LIT, - PgfExprLit, - &expr, clo->pool); - expr_lit->lit = lit; - - PgfLitEnum* en = gu_new(PgfLitEnum, clo->pool); - en->en.next = pgf_enum_lits; - en->candidate.tokens = tokens; - en->candidate.ep.prob = INFINITY; - en->candidate.ep.expr = expr; - en->idx = 0; - - return &en->en; -} - typedef struct { GuMapItor fn; PgfProduction prod; @@ -1062,7 +1166,7 @@ pgf_parsing_get_metas(GuMapItor* fn, const void* key, void* value, PgfItem *item = pgf_new_item(ccat, lin_idx, prod, conts, pool); gu_buf_push(metas, PgfItem*, item); - + #ifdef PGF_PARSER_DEBUG GuPool* tmp_pool = gu_new_pool(); GuOut* out = gu_file_out(stderr, tmp_pool); @@ -1075,16 +1179,35 @@ pgf_parsing_get_metas(GuMapItor* fn, const void* key, void* value, } } +static bool +pgf_match_meta(PgfLiteralCallback* self, int lin_idx, PgfTokens toks, + PgfExprProb** out_ep, GuPool *pool) +{ + PgfExprProb *ep = gu_new(PgfExprProb, pool); + ep->prob = 100000000000 + rand(); + PgfExprMeta *expr_meta = + gu_new_variant(PGF_EXPR_META, + PgfExprMeta, + &ep->expr, pool); + expr_meta->id = 0; + + *out_ep = ep; + return true; +} + +static PgfLiteralCallback pgf_meta_callback = + { pgf_match_meta } ; + PgfParse* pgf_parse_token(PgfParse* parse, PgfToken tok, bool robust, GuPool* pool) { PgfItemBuf* agenda = gu_new_buf(PgfItem*, pool); - PgfParseTokenCallback clo1 = {{ pgf_match_token, pgf_match_lit }, - tok, agenda, pool}; + PgfParseTokenCallback clo1 = {{ pgf_match_token }, tok, agenda }; GuPool* tmp_pool = gu_new_pool(); PgfParsing* parsing = pgf_new_parsing(parse->concr, &clo1.fn, parse->max_fid, pool, tmp_pool); + parsing->tok = tok; parsing->lexicon_idx = gu_map_get(parse->concr->lexicon_idx, &tok, GuBuf*); size_t n_items = gu_buf_length(parse->agenda); @@ -1095,11 +1218,13 @@ pgf_parse_token(PgfParse* parse, PgfToken tok, bool robust, GuPool* pool) if (robust) { PgfProduction prod; - PgfProductionMeta* pmeta = - gu_new_variant(PGF_PRODUCTION_META, - PgfProductionMeta, + PgfProductionExtern* pext = + gu_new_variant(PGF_PRODUCTION_EXTERN, + PgfProductionExtern, &prod, parsing->pool); - pmeta->args = gu_new_seq(PgfPArg, 0, parsing->pool); + pext->fun = NULL; + pext->args = gu_new_seq(PgfPArg, 0, parsing->pool); + pext->callback = &pgf_meta_callback; PgfGetMetaFn clo2 = { { pgf_parsing_get_metas }, prod, parsing->metas, pool }; gu_map_iter(parsing->conts_map, &clo2.fn, NULL); @@ -1108,19 +1233,22 @@ pgf_parse_token(PgfParse* parse, PgfToken tok, bool robust, GuPool* pool) size_t n_items = gu_buf_length(parsing->lexicon_idx); for (size_t i = 0; i < n_items; i++) { PgfItem* item = gu_buf_get(parsing->lexicon_idx, PgfItem*, i); - + if (!pgf_parsing_has_conts(parsing->conts_map, item->base->ccat, item->base->lin_idx)) { pgf_parsing_bu_predict(parsing, item, agenda); } } } else { + // We have unknown word + size_t n_items = gu_buf_length(parsing->metas); for (size_t i = 0; i < n_items; i++) { PgfItem* item = gu_buf_get(parsing->metas, PgfItem*, i); - - pgf_item_advance(item, parsing->pool); - pgf_parsing_add_transition(parsing, tok, item); + + PgfExprProb *ep; + bool accepted; + pgf_foo(parsing, item, pext->callback, &ep, &accepted); } } } @@ -1168,14 +1296,12 @@ pgf_production_to_expr(PgfConcr* concr, PgfProduction prod, PgfProductionCoerce* pcoerce = pi.data; return pgf_cat_to_expr(concr, pcoerce->coerce, visited, choice, pool); } - case PGF_PRODUCTION_META: { - PgfProductionMeta* pmeta = pi.data; - PgfExpr expr = gu_new_variant_i(pool, PGF_EXPR_META, - PgfExprMeta, - .id = 0); - size_t n_args = gu_seq_length(pmeta->args); + case PGF_PRODUCTION_EXTERN: { + PgfProductionExtern* pext = pi.data; + PgfExpr expr = pext->fun->ep->expr; + size_t n_args = gu_seq_length(pext->args); for (size_t i = 0; i < n_args; i++) { - PgfPArg* parg = gu_seq_index(pmeta->args, PgfPArg, i); + PgfPArg* parg = gu_seq_index(pext->args, PgfPArg, i); gu_assert(!parg->hypos || !parg->hypos->len); PgfExpr earg = pgf_cat_to_expr(concr, parg->ccat, visited, choice, pool); if (gu_variant_is_null(earg)) @@ -1260,8 +1386,13 @@ pgf_parse_result_enum_next(GuEnum* self, void* to, GuPool* pool) *(PgfExpr*)to = pgf_parse_result_next(pr, pool); } +static void +pgf_lex_noop(PgfLexCallback* self, PgfToken tok, PgfItem* item) +{ +} + static PgfLexCallback lex_callback_noop = - { pgf_lex_noop, pgf_lit_noop }; + { pgf_lex_noop }; PgfExprEnum* pgf_parse_result(PgfParse* parse, GuPool* pool) @@ -1332,19 +1463,13 @@ pgf_parse_best_result_init(PgfCCat *ccat, GuBuf *pqueue, tmp_pool, out_pool); break; } - case PGF_PRODUCTION_META: { - PgfProductionMeta* pmeta = pi.data; + case PGF_PRODUCTION_EXTERN: { + PgfProductionExtern* pext = pi.data; PgfExprState *st = gu_new(PgfExprState, tmp_pool); - st->ep.prob = 100000000000 + rand(); - PgfExprMeta *expr_meta = - gu_new_variant(PGF_EXPR_META, - PgfExprMeta, - &st->ep.expr, out_pool); - expr_meta->id = 0; - st->args = pmeta->args; + st->ep = *pext->fun->ep; + st->args = pext->args; st->arg_idx = 0; - gu_buf_heap_push(pqueue, &pgf_expr_prob_order, &st); break; } @@ -1500,6 +1625,19 @@ pgf_parser_parse(PgfConcr* concr, PgfCId cat, size_t lin_idx, GuPool* pool) return parse; } +void +pgf_parser_add_literal(PgfConcr *concr, PgfCId cat, + PgfLiteralCallback* callback) +{ + PgfCncCat* cnccat = + gu_map_get(concr->cnccats, &cat, PgfCncCat*); + if (cnccat == NULL) + return; + + gu_map_put(concr->callbacks, cnccat, + PgfLiteralCallback*, callback); +} + static void pgf_parser_bu_add_entry(PgfConcr* concr, PgfTokens tokens, PgfItem* item, @@ -1614,7 +1752,7 @@ pgf_parser_bu_item(PgfConcr* concr, PgfItem* item, } PgfProduction prod = - pgf_parsing_new_production(item, pool); + pgf_parsing_new_production(item, NULL, pool); GuBuf* prodbuf = gu_seq_buf(eps_ccat->prods); gu_buf_push(prodbuf, PgfProduction, prod); eps_ccat->n_synprods++; diff --git a/src/runtime/c/pgf/parser.h b/src/runtime/c/pgf/parser.h index 2744dd242..dc033385b 100644 --- a/src/runtime/c/pgf/parser.h +++ b/src/runtime/c/pgf/parser.h @@ -65,6 +65,9 @@ pgf_parse_token(PgfParse* parse, PgfToken tok, bool robust, GuPool* pool); * the pool used to create \parse. */ +void +pgf_parser_add_literal(PgfConcr *concr, PgfCId cat, + PgfLiteralCallback* callback); /** @} * @name Retrieving abstract syntax trees diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index 76b67abcf..6b287f68e 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -19,6 +19,7 @@ #include "data.h" #include "expr.h" +#include "literals.h" #include #include #include @@ -766,6 +767,7 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool, concr->epsilon_idx = gu_map_type_new(PgfEpsilonIdx, pool); pgf_read_into_map(ccats_t, rdr, concr->ccats); concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap), pool, NULL); + concr->callbacks = pgf_new_callbacks_map(concr, pool); concr->total_cats = pgf_read_int(rdr); concr->max_fid = concr->total_cats; diff --git a/src/runtime/c/utils/pgf-translate.c b/src/runtime/c/utils/pgf-translate.c index b2a8bae59..878e07992 100644 --- a/src/runtime/c/utils/pgf-translate.c +++ b/src/runtime/c/utils/pgf-translate.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -77,6 +78,10 @@ int main(int argc, char* argv[]) { status = EXIT_FAILURE; goto fail_concr; } + + // Register a callback for the literal category Symbol + pgf_parser_add_literal(from_concr, gu_str_string("Symb", pool), + &pgf_nerc_literal_callback); // Arbitrarily choose linearization index 0. Usually the initial // categories we are interested in only have one field.