From 8317e090362b4108b804ea4fec1211a83cfd903b Mon Sep 17 00:00:00 2001 From: Krasimir Angelov Date: Tue, 22 Aug 2017 20:18:20 +0200 Subject: [PATCH 1/3] the parser in the C runtime now respects linref too --- src/runtime/c/pgf/parser.c | 111 ++++++++++++++++++++++++++----------- 1 file changed, 79 insertions(+), 32 deletions(-) diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index dbc3a4831..65ea44450 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -311,7 +311,12 @@ pgf_print_production(int fid, PgfProduction prod, case PGF_PRODUCTION_APPLY: { PgfProductionApply* papp = i.data; gu_printf(out,err,"F%d(",papp->fun->funid); - pgf_print_expr(papp->fun->ep->expr, NULL, 0, out, err); + if (papp->fun->ep != NULL) { + pgf_print_expr(papp->fun->ep->expr, NULL, 0, out, err); + } else { + PgfPArg* parg = gu_seq_index(papp->args, PgfPArg, 0); + gu_printf(out,err,"linref %s", parg->ccat->cnccat->abscat->name); + } gu_printf(out,err,")["); pgf_print_production_args(papp->args,out,err); gu_printf(out,err,"]\n"); @@ -381,7 +386,12 @@ pgf_print_item(PgfItem* item, PgfParseState* state, GuOut* out, GuExn* err, GuPo PgfProductionApply* papp = i.data; PgfCncFun* fun = papp->fun; gu_printf(out, err, "F%d(", fun->funid); - pgf_print_expr(fun->ep->expr, NULL, 0, out, err); + if (fun->ep != NULL) { + pgf_print_expr(fun->ep->expr, NULL, 0, out, err); + } else { + PgfPArg* parg = gu_seq_index(item->args, PgfPArg, 0); + gu_printf(out,err,"linref %s", parg->ccat->cnccat->abscat->name); + } gu_printf(out, err, ")["); pgf_print_production_args(item->args, out, err); gu_printf(out, err, "]; "); @@ -563,6 +573,7 @@ pgf_parsing_get_conts(PgfParseState* state, GuPool *pool) { gu_require(lin_idx < ccat->cnccat->n_lins); + PgfItemContss* contss = pgf_parsing_get_contss(state, ccat, pool); if (contss == NULL) { @@ -622,7 +633,7 @@ pgf_parsing_create_completed(PgfParsing* ps, PgfParseState* state, cat->fin[0].fn = gu_ccat_fini; gu_pool_finally(ps->pool, cat->fin); -#ifdef PGF_COUNTS_DEBUG +#ifdef PGF_COUNTS_DEBUG state->ps->ccat_full_count++; #endif @@ -835,13 +846,6 @@ pgf_parsing_combine(PgfParsing* ps, PgfParseState* before, PgfParseState* after, PgfItem* cont, PgfCCat* cat, int lin_idx) { - if (cont == NULL) { - if (before->end_offset == strlen(ps->sentence)) { - pgf_result_predict(ps, NULL, cat); - } - return; - } - PgfItem* item = NULL; switch (gu_variant_tag(cont->curr_sym)) { case PGF_SYMBOL_CAT: { @@ -981,6 +985,14 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep) gu_pool_free(tmp_pool); #endif + if (item->conts->ccat->fid == -4) { + if (ps->before->end_offset == strlen(ps->sentence)) { + PgfPArg* parg = gu_seq_index(item->args, PgfPArg, 0); + pgf_result_predict(ps, NULL, parg->ccat); + } + return; + } + if (tmp_ccat != NULL) { PgfItemContss* contss = pgf_parsing_get_contss(ps->before, ccat, ps->pool); @@ -1275,6 +1287,7 @@ pgf_parsing_td_predict(PgfParsing* ps, PgfItemConts* conts = pgf_parsing_get_conts(ps->before, ccat, lin_idx, ps->pool); gu_buf_push(conts->items, PgfItem*, item); + if (gu_buf_length(conts->items) == 1) { /* First time we encounter this linearization * of this category at the current position, @@ -1885,9 +1898,8 @@ pgf_parse_result_is_new(PgfExprState* st) return true; } -// TODO: s/CId/Cat, add the cid to Cat, make Cat the key to CncCat static PgfParsing* -pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx, +pgf_parsing_init(PgfConcr* concr, PgfCId cat, GuString sentence, double heuristic_factor, PgfCallbacksMap* callbacks, PgfOracleCallback* oracle, @@ -1901,8 +1913,6 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx, return NULL; } - gu_assert(lin_idx < cnccat->n_lins); - PgfParsing* ps = pgf_new_parsing(concr, sentence, callbacks, oracle, pool, out_pool); @@ -1913,31 +1923,68 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx, PgfParseState* state = pgf_new_parse_state(ps, 0, BIND_SOFT, 0); + int fidString = -1; + PgfCCat* start_ccat = gu_new(PgfCCat, ps->pool); + start_ccat->cnccat = gu_map_get(concr->ccats, &fidString, PgfCCat*)->cnccat; + start_ccat->lindefs = NULL; + start_ccat->linrefs = NULL; + start_ccat->viterbi_prob = 0; + start_ccat->fid = -4; + start_ccat->conts = NULL; + start_ccat->answers = NULL; + start_ccat->prods = NULL; + start_ccat->n_synprods = 0; + +#ifdef PGF_COUNTS_DEBUG + state->ps->ccat_full_count++; +#endif + + PgfItemConts* conts = + pgf_parsing_get_conts(state, start_ccat, 0, ps->pool); + gu_buf_push(conts->items, PgfItem*, NULL); + +#ifdef PGF_COUNTS_DEBUG + ps->cont_full_count++; +#endif + size_t n_ccats = gu_seq_length(cnccat->cats); for (size_t i = 0; i < n_ccats; i++) { PgfCCat* ccat = gu_seq_get(cnccat->cats, PgfCCat*, i); if (ccat != NULL) { - if (ccat->prods == NULL) { - // Empty category - continue; - } + PgfPArgs* args = gu_new_seq(PgfPArg, 1, ps->pool); + gu_seq_set(args, PgfPArg, 0, ((PgfPArg) { .hypos = NULL, .ccat = ccat })); - PgfItemConts* conts = - pgf_parsing_get_conts(state, ccat, lin_idx, ps->pool); - gu_buf_push(conts->items, PgfItem*, NULL); + size_t n_funs = gu_seq_length(ccat->linrefs); + for (size_t j = 0; j < n_funs; j++) { + PgfProduction prod = gu_null_variant; + PgfProductionApply* new_papp = + gu_new_variant(PGF_PRODUCTION_APPLY, + PgfProductionApply, + &prod, pool); + new_papp->fun = gu_seq_get(ccat->linrefs, PgfCncFun*, j); + new_papp->args = args; + + PgfItem* item = gu_new(PgfItem, ps->pool); + item->args = args; + item->inside_prob += ccat->viterbi_prob; + item->conts = conts; + item->prod = prod; + item->curr_sym = gu_null_variant; + item->sym_idx = 0; + item->alt_idx = 0; + item->alt = 0; + + conts->ref_count++; + + pgf_item_set_curr_symbol(item, ps->pool); #ifdef PGF_COUNTS_DEBUG - ps->cont_full_count++; + ps->item_full_count++; + ps->item_real_count++; #endif - size_t n_prods = gu_seq_length(ccat->prods); - for (size_t i = 0; i < n_prods; i++) { - PgfProduction prod = - gu_seq_get(ccat->prods, PgfProduction, i); - PgfItem* item = - pgf_new_item(ps, conts, prod); gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item); - } + } } } @@ -2133,7 +2180,7 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence, // Begin parsing a sentence with the specified category PgfParsing* ps = - pgf_parsing_init(concr, typ->cid, 0, sentence, heuristics, callbacks, NULL, err, pool, out_pool); + pgf_parsing_init(concr, typ->cid, sentence, heuristics, callbacks, NULL, err, pool, out_pool); if (ps == NULL) { return NULL; } @@ -2178,7 +2225,7 @@ pgf_parse_with_oracle(PgfConcr* concr, PgfType* typ, // Begin parsing a sentence with the specified category PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, out_pool); PgfParsing* ps = - pgf_parsing_init(concr, typ->cid, 0, sentence, -1, callbacks, oracle, err, pool, out_pool); + pgf_parsing_init(concr, typ->cid, sentence, -1, callbacks, oracle, err, pool, out_pool); if (ps == NULL) { return NULL; } @@ -2240,7 +2287,7 @@ pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence, PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, pool); PgfParsing* ps = - pgf_parsing_init(concr, type->cid, 0, sentence, -1.0, callbacks, NULL, err, pool, pool); + pgf_parsing_init(concr, type->cid, sentence, -1.0, callbacks, NULL, err, pool, pool); if (ps == NULL) { return NULL; } From 82153bf271ca8392c91ae7342a480709ac209058 Mon Sep 17 00:00:00 2001 From: Krasimir Angelov Date: Tue, 22 Aug 2017 20:28:59 +0200 Subject: [PATCH 2/3] fix the id for fidStart in the C runtime --- src/runtime/c/pgf/parser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 65ea44450..f91583033 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -985,7 +985,7 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep) gu_pool_free(tmp_pool); #endif - if (item->conts->ccat->fid == -4) { + if (item->conts->ccat->fid == -5) { if (ps->before->end_offset == strlen(ps->sentence)) { PgfPArg* parg = gu_seq_index(item->args, PgfPArg, 0); pgf_result_predict(ps, NULL, parg->ccat); @@ -1929,7 +1929,7 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, start_ccat->lindefs = NULL; start_ccat->linrefs = NULL; start_ccat->viterbi_prob = 0; - start_ccat->fid = -4; + start_ccat->fid = -5; start_ccat->conts = NULL; start_ccat->answers = NULL; start_ccat->prods = NULL; From 00388039f14c04b37a52cb3fb471bfbfeda4d95c Mon Sep 17 00:00:00 2001 From: Krasimir Angelov Date: Tue, 22 Aug 2017 21:13:21 +0200 Subject: [PATCH 3/3] a nicer pretty printing for the PGF format --- src/runtime/c/pgf/parser.c | 46 +++++++++++++++--------- src/runtime/c/pgf/printer.c | 57 +++++++++++++++++++----------- src/runtime/haskell/PGF/Printer.hs | 12 ++++--- 3 files changed, 74 insertions(+), 41 deletions(-) diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index f91583033..ecfb7d2ea 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -238,6 +238,12 @@ pgf_extern_syms_get(PgfItem* item, GuPool* pool) return syms; } +PGF_INTERNAL void +pgf_print_fid(int fid, GuOut* out, GuExn* err); + +PGF_INTERNAL_DECL void +pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err); + #ifdef PGF_PARSER_DEBUG static void pgf_item_symbols(PgfItem* item, @@ -291,12 +297,13 @@ pgf_print_production_args(PgfPArgs* args, size_t n_hypos = gu_seq_length(arg.hypos); for (size_t k = 0; k < n_hypos; k++) { PgfCCat *hypo = gu_seq_get(arg.hypos, PgfCCat*, k); - gu_printf(out,err,"C%d ",hypo->fid); + pgf_print_fid(hypo->fid, out, err); + gu_putc(' ',out,err); } - gu_printf(out,err,"-> "); + gu_puts("-> ",out,err); } - - gu_printf(out,err,"C%d",arg.ccat->fid); + + pgf_print_fid(arg.ccat->fid, out, err); } } @@ -304,7 +311,8 @@ static void pgf_print_production(int fid, PgfProduction prod, GuOut *out, GuExn* err, GuPool* pool) { - gu_printf(out,err,"C%d -> ",fid); + pgf_print_fid(fid, out, err); + gu_puts(" -> ", out, err); GuVariantInfo i = gu_variant_open(prod); switch (i.tag) { @@ -324,7 +332,9 @@ pgf_print_production(int fid, PgfProduction prod, } case PGF_PRODUCTION_COERCE: { PgfProductionCoerce* pcoerce = i.data; - gu_printf(out,err,"_[C%d]\n",pcoerce->coerce->fid); + gu_puts("_[",out,err); + pgf_print_fid(pcoerce->coerce->fid, out, err); + gu_printf("]\n",out,err); break; } case PGF_PRODUCTION_EXTERN: { @@ -339,9 +349,6 @@ pgf_print_production(int fid, PgfProduction prod, } } -PGF_INTERNAL_DECL void -pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err); - static void pgf_print_item_seq(PgfItem *item, GuOut *out, GuExn* err, GuPool* pool) @@ -376,9 +383,11 @@ pgf_print_range(PgfParseState* start, PgfParseState* end, GuOut* out, GuExn* err static void pgf_print_item(PgfItem* item, PgfParseState* state, GuOut* out, GuExn* err, GuPool* pool) { - gu_printf(out, err, "["); + gu_putc('[', out, err); pgf_print_range(item->conts->state, state, out, err); - gu_printf(out, err, "; C%d -> ", item->conts->ccat->fid); + gu_puts("; ", out, err); + pgf_print_fid(item->conts->ccat->fid, out, err); + gu_puts(" -> ", out, err); GuVariantInfo i = gu_variant_open(item->prod); switch (i.tag) { @@ -398,8 +407,9 @@ pgf_print_item(PgfItem* item, PgfParseState* state, GuOut* out, GuExn* err, GuPo break; } case PGF_PRODUCTION_COERCE: { - gu_printf(out, err, "_[C%d]; ", - gu_seq_index(item->args, PgfPArg, 0)->ccat->fid); + gu_puts("_[", out, err); + pgf_print_fid(gu_seq_index(item->args, PgfPArg, 0)->ccat->fid, out, err); + gu_puts("]; ", out, err); break; } case PGF_PRODUCTION_EXTERN: { @@ -976,10 +986,12 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep) if (tmp_ccat == NULL) { gu_printf(out, err, "["); pgf_print_range(item->conts->state, ps->before, out, err); - gu_printf(out, err, "; C%d; %d; C%d]\n", - item->conts->ccat->fid, - item->conts->lin_idx, - ccat->fid); + gu_puts("; ", out, err); + pgf_print_fid(item->conts->ccat->fid, out, err); + gu_printf(out, err, "; %d; ", + item->conts->lin_idx); + pgf_print_fid(ccat->fid, out, err); + gu_puts("]\n", out, err); } pgf_print_production(ccat->fid, prod, out, err, tmp_pool); gu_pool_free(tmp_pool); diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c index 0976242e0..153130b54 100644 --- a/src/runtime/c/pgf/printer.c +++ b/src/runtime/c/pgf/printer.c @@ -77,6 +77,23 @@ pgf_print_abstract(PgfAbstr* abstr, GuOut* out, GuExn* err) gu_puts("}\n", out, err); } +PGF_INTERNAL void +pgf_print_fid(int fid, GuOut* out, GuExn* err) +{ + if (fid == -1) + gu_puts("CString", out, err); + else if (fid == -2) + gu_puts("CInt", out, err); + else if (fid == -3) + gu_puts("CFloat", out, err); + else if (fid == -4) + gu_puts("CVar", out, err); + else if (fid == -5) + gu_puts("CStart", out, err); + else + gu_printf(out, err, "C%d", fid); +} + static void pgf_print_productions(GuMapItor* fn, const void* key, void* value, GuExn* err) @@ -91,7 +108,9 @@ pgf_print_productions(GuMapItor* fn, const void* key, void* value, for (size_t i = 0; i < n_prods; i++) { PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i); - gu_printf(out,err," C%d -> ",fid); + gu_puts(" ", out, err); + pgf_print_fid(fid, out, err); + gu_puts(" -> ", out, err); GuVariantInfo i = gu_variant_open(prod); switch (i.tag) { @@ -111,18 +130,20 @@ pgf_print_productions(GuMapItor* fn, const void* key, void* value, if (k > 0) gu_putc(' ',out,err); PgfCCat *hypo = gu_seq_get(arg.hypos, PgfCCat*, k); - gu_printf(out,err,"C%d",hypo->fid); + pgf_print_fid(hypo->fid, out, err); } } - - gu_printf(out,err,"C%d",arg.ccat->fid); + + pgf_print_fid(arg.ccat->fid, out, err); } gu_printf(out,err,"]\n"); break; } case PGF_PRODUCTION_COERCE: { PgfProductionCoerce* pcoerce = i.data; - gu_printf(out,err,"_[C%d]\n",pcoerce->coerce->fid); + gu_puts("_[", out, err); + pgf_print_fid(pcoerce->coerce->fid, out, err); + gu_puts("]\n", out, err); break; } default: @@ -142,17 +163,13 @@ pgf_print_lindefs(GuMapItor* fn, const void* key, void* value, GuOut *out = clo->out; if (ccat->lindefs != NULL) { - gu_printf(out,err," C%d -> ",fid); - size_t n_lindefs = gu_seq_length(ccat->lindefs); for (size_t i = 0; i < n_lindefs; i++) { - if (i > 0) gu_putc(' ', out, err); - PgfCncFun* fun = gu_seq_get(ccat->lindefs, PgfCncFun*, i); - gu_printf(out,err,"F%d",fun->funid); + gu_puts(" ",out,err); + pgf_print_fid(fid, out, err); + gu_printf(out,err," -> F%d[CVar]\n",fun->funid); } - - gu_putc('\n', out,err); } } @@ -166,17 +183,13 @@ pgf_print_linrefs(GuMapItor* fn, const void* key, void* value, GuOut *out = clo->out; if (ccat->linrefs != NULL) { - gu_puts(" ",out,err); - size_t n_linrefs = gu_seq_length(ccat->linrefs); for (size_t i = 0; i < n_linrefs; i++) { - if (i > 0) gu_putc(' ', out, err); - PgfCncFun* fun = gu_seq_get(ccat->linrefs, PgfCncFun*, i); - gu_printf(out,err,"F%d",fun->funid); + gu_printf(out,err," CVar -> F%d[",fun->funid); + pgf_print_fid(fid, out, err); + gu_puts("]\n", out, err); } - - gu_printf(out,err," -> C%d\n",fid); } } @@ -321,7 +334,11 @@ pgf_print_cnccat(GuMapItor* fn, const void* key, void* value, PgfCCat *start = gu_seq_get(cnccat->cats, PgfCCat*, 0); PgfCCat *end = gu_seq_get(cnccat->cats, PgfCCat*, gu_seq_length(cnccat->cats)-1); - gu_printf(out, err, " range [C%d..C%d]\n", start->fid, end->fid); + gu_puts(" range [", out, err); + pgf_print_fid(start->fid, out, err); + gu_puts("..", out, err); + pgf_print_fid(end->fid, out, err); + gu_puts("]\n", out, err); gu_puts(" labels [", out, err); for (size_t i = 0; i < cnccat->n_lins; i++) { diff --git a/src/runtime/haskell/PGF/Printer.hs b/src/runtime/haskell/PGF/Printer.hs index fbe9db596..43c270b13 100644 --- a/src/runtime/haskell/PGF/Printer.hs +++ b/src/runtime/haskell/PGF/Printer.hs @@ -47,9 +47,9 @@ ppCnc name cnc = text "productions" $$ nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions cnc), prod <- Set.toList set]) $$ text "lindefs" $$ - nest 2 (vcat (map ppFunList (IntMap.toList (lindefs cnc)))) $$ + nest 2 (vcat (concatMap ppLinDefs (IntMap.toList (lindefs cnc)))) $$ text "linrefs" $$ - nest 2 (vcat (map ppFunList (IntMap.toList (linrefs cnc)))) $$ + nest 2 (vcat (concatMap ppLinRefs (IntMap.toList (linrefs cnc)))) $$ text "lin" $$ nest 2 (vcat (map ppCncFun (assocs (cncfuns cnc)))) $$ text "sequences" $$ @@ -75,8 +75,11 @@ ppProduction (fid,PConst _ _ ss) = ppCncFun (funid,CncFun fun arr) = ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun) -ppFunList (fid,funids) = - ppFId fid <+> text "->" <+> hcat (punctuate comma (map ppFunId funids)) +ppLinDefs (fid,funids) = + [ppFId fid <+> text "->" <+> ppFunId funid <> brackets (ppFId fidVar) | funid <- funids] + +ppLinRefs (fid,funids) = + [ppFId fidVar <+> text "->" <+> ppFunId funid <> brackets (ppFId fid) | funid <- funids] ppSeq (seqid,seq) = ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq)) @@ -109,6 +112,7 @@ ppFId fid | fid == fidInt = text "CInt" | fid == fidFloat = text "CFloat" | fid == fidVar = text "CVar" + | fid == fidStart = text "CStart" | otherwise = char 'C' <> int fid ppFunId funid = char 'F' <> int funid