This commit is contained in:
Ayberk Tosun
2017-08-23 11:38:13 +03:00
3 changed files with 153 additions and 73 deletions

View File

@@ -238,6 +238,12 @@ pgf_extern_syms_get(PgfItem* item, GuPool* pool)
return syms;
}
PGF_INTERNAL void
pgf_print_fid(int fid, GuOut* out, GuExn* err);
PGF_INTERNAL_DECL void
pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err);
#ifdef PGF_PARSER_DEBUG
static void
pgf_item_symbols(PgfItem* item,
@@ -291,12 +297,13 @@ pgf_print_production_args(PgfPArgs* args,
size_t n_hypos = gu_seq_length(arg.hypos);
for (size_t k = 0; k < n_hypos; k++) {
PgfCCat *hypo = gu_seq_get(arg.hypos, PgfCCat*, k);
gu_printf(out,err,"C%d ",hypo->fid);
pgf_print_fid(hypo->fid, out, err);
gu_putc(' ',out,err);
}
gu_printf(out,err,"-> ");
gu_puts("-> ",out,err);
}
gu_printf(out,err,"C%d",arg.ccat->fid);
pgf_print_fid(arg.ccat->fid, out, err);
}
}
@@ -304,14 +311,20 @@ static void
pgf_print_production(int fid, PgfProduction prod,
GuOut *out, GuExn* err, GuPool* pool)
{
gu_printf(out,err,"C%d -> ",fid);
pgf_print_fid(fid, out, err);
gu_puts(" -> ", out, err);
GuVariantInfo i = gu_variant_open(prod);
switch (i.tag) {
case PGF_PRODUCTION_APPLY: {
PgfProductionApply* papp = i.data;
gu_printf(out,err,"F%d(",papp->fun->funid);
pgf_print_expr(papp->fun->ep->expr, NULL, 0, out, err);
if (papp->fun->ep != NULL) {
pgf_print_expr(papp->fun->ep->expr, NULL, 0, out, err);
} else {
PgfPArg* parg = gu_seq_index(papp->args, PgfPArg, 0);
gu_printf(out,err,"linref %s", parg->ccat->cnccat->abscat->name);
}
gu_printf(out,err,")[");
pgf_print_production_args(papp->args,out,err);
gu_printf(out,err,"]\n");
@@ -319,7 +332,9 @@ pgf_print_production(int fid, PgfProduction prod,
}
case PGF_PRODUCTION_COERCE: {
PgfProductionCoerce* pcoerce = i.data;
gu_printf(out,err,"_[C%d]\n",pcoerce->coerce->fid);
gu_puts("_[",out,err);
pgf_print_fid(pcoerce->coerce->fid, out, err);
gu_printf("]\n",out,err);
break;
}
case PGF_PRODUCTION_EXTERN: {
@@ -334,9 +349,6 @@ pgf_print_production(int fid, PgfProduction prod,
}
}
PGF_INTERNAL_DECL void
pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err);
static void
pgf_print_item_seq(PgfItem *item,
GuOut *out, GuExn* err, GuPool* pool)
@@ -371,9 +383,11 @@ pgf_print_range(PgfParseState* start, PgfParseState* end, GuOut* out, GuExn* err
static void
pgf_print_item(PgfItem* item, PgfParseState* state, GuOut* out, GuExn* err, GuPool* pool)
{
gu_printf(out, err, "[");
gu_putc('[', out, err);
pgf_print_range(item->conts->state, state, out, err);
gu_printf(out, err, "; C%d -> ", item->conts->ccat->fid);
gu_puts("; ", out, err);
pgf_print_fid(item->conts->ccat->fid, out, err);
gu_puts(" -> ", out, err);
GuVariantInfo i = gu_variant_open(item->prod);
switch (i.tag) {
@@ -381,15 +395,21 @@ pgf_print_item(PgfItem* item, PgfParseState* state, GuOut* out, GuExn* err, GuPo
PgfProductionApply* papp = i.data;
PgfCncFun* fun = papp->fun;
gu_printf(out, err, "F%d(", fun->funid);
pgf_print_expr(fun->ep->expr, NULL, 0, out, err);
if (fun->ep != NULL) {
pgf_print_expr(fun->ep->expr, NULL, 0, out, err);
} else {
PgfPArg* parg = gu_seq_index(item->args, PgfPArg, 0);
gu_printf(out,err,"linref %s", parg->ccat->cnccat->abscat->name);
}
gu_printf(out, err, ")[");
pgf_print_production_args(item->args, out, err);
gu_printf(out, err, "]; ");
break;
}
case PGF_PRODUCTION_COERCE: {
gu_printf(out, err, "_[C%d]; ",
gu_seq_index(item->args, PgfPArg, 0)->ccat->fid);
gu_puts("_[", out, err);
pgf_print_fid(gu_seq_index(item->args, PgfPArg, 0)->ccat->fid, out, err);
gu_puts("]; ", out, err);
break;
}
case PGF_PRODUCTION_EXTERN: {
@@ -563,6 +583,7 @@ pgf_parsing_get_conts(PgfParseState* state,
GuPool *pool)
{
gu_require(lin_idx < ccat->cnccat->n_lins);
PgfItemContss* contss =
pgf_parsing_get_contss(state, ccat, pool);
if (contss == NULL) {
@@ -622,7 +643,7 @@ pgf_parsing_create_completed(PgfParsing* ps, PgfParseState* state,
cat->fin[0].fn = gu_ccat_fini;
gu_pool_finally(ps->pool, cat->fin);
#ifdef PGF_COUNTS_DEBUG
#ifdef PGF_COUNTS_DEBUG
state->ps->ccat_full_count++;
#endif
@@ -835,13 +856,6 @@ pgf_parsing_combine(PgfParsing* ps,
PgfParseState* before, PgfParseState* after,
PgfItem* cont, PgfCCat* cat, int lin_idx)
{
if (cont == NULL) {
if (before->end_offset == strlen(ps->sentence)) {
pgf_result_predict(ps, NULL, cat);
}
return;
}
PgfItem* item = NULL;
switch (gu_variant_tag(cont->curr_sym)) {
case PGF_SYMBOL_CAT: {
@@ -972,15 +986,25 @@ pgf_parsing_complete(PgfParsing* ps, PgfItem* item, PgfExprProb *ep)
if (tmp_ccat == NULL) {
gu_printf(out, err, "[");
pgf_print_range(item->conts->state, ps->before, out, err);
gu_printf(out, err, "; C%d; %d; C%d]\n",
item->conts->ccat->fid,
item->conts->lin_idx,
ccat->fid);
gu_puts("; ", out, err);
pgf_print_fid(item->conts->ccat->fid, out, err);
gu_printf(out, err, "; %d; ",
item->conts->lin_idx);
pgf_print_fid(ccat->fid, out, err);
gu_puts("]\n", out, err);
}
pgf_print_production(ccat->fid, prod, out, err, tmp_pool);
gu_pool_free(tmp_pool);
#endif
if (item->conts->ccat->fid == -5) {
if (ps->before->end_offset == strlen(ps->sentence)) {
PgfPArg* parg = gu_seq_index(item->args, PgfPArg, 0);
pgf_result_predict(ps, NULL, parg->ccat);
}
return;
}
if (tmp_ccat != NULL) {
PgfItemContss* contss =
pgf_parsing_get_contss(ps->before, ccat, ps->pool);
@@ -1275,6 +1299,7 @@ pgf_parsing_td_predict(PgfParsing* ps,
PgfItemConts* conts =
pgf_parsing_get_conts(ps->before, ccat, lin_idx, ps->pool);
gu_buf_push(conts->items, PgfItem*, item);
if (gu_buf_length(conts->items) == 1) {
/* First time we encounter this linearization
* of this category at the current position,
@@ -1885,9 +1910,8 @@ pgf_parse_result_is_new(PgfExprState* st)
return true;
}
// TODO: s/CId/Cat, add the cid to Cat, make Cat the key to CncCat
static PgfParsing*
pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx,
pgf_parsing_init(PgfConcr* concr, PgfCId cat,
GuString sentence,
double heuristic_factor,
PgfCallbacksMap* callbacks, PgfOracleCallback* oracle,
@@ -1901,8 +1925,6 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx,
return NULL;
}
gu_assert(lin_idx < cnccat->n_lins);
PgfParsing* ps =
pgf_new_parsing(concr, sentence, callbacks, oracle, pool, out_pool);
@@ -1913,31 +1935,68 @@ pgf_parsing_init(PgfConcr* concr, PgfCId cat, size_t lin_idx,
PgfParseState* state =
pgf_new_parse_state(ps, 0, BIND_SOFT, 0);
int fidString = -1;
PgfCCat* start_ccat = gu_new(PgfCCat, ps->pool);
start_ccat->cnccat = gu_map_get(concr->ccats, &fidString, PgfCCat*)->cnccat;
start_ccat->lindefs = NULL;
start_ccat->linrefs = NULL;
start_ccat->viterbi_prob = 0;
start_ccat->fid = -5;
start_ccat->conts = NULL;
start_ccat->answers = NULL;
start_ccat->prods = NULL;
start_ccat->n_synprods = 0;
#ifdef PGF_COUNTS_DEBUG
state->ps->ccat_full_count++;
#endif
PgfItemConts* conts =
pgf_parsing_get_conts(state, start_ccat, 0, ps->pool);
gu_buf_push(conts->items, PgfItem*, NULL);
#ifdef PGF_COUNTS_DEBUG
ps->cont_full_count++;
#endif
size_t n_ccats = gu_seq_length(cnccat->cats);
for (size_t i = 0; i < n_ccats; i++) {
PgfCCat* ccat = gu_seq_get(cnccat->cats, PgfCCat*, i);
if (ccat != NULL) {
if (ccat->prods == NULL) {
// Empty category
continue;
}
PgfPArgs* args = gu_new_seq(PgfPArg, 1, ps->pool);
gu_seq_set(args, PgfPArg, 0, ((PgfPArg) { .hypos = NULL, .ccat = ccat }));
PgfItemConts* conts =
pgf_parsing_get_conts(state, ccat, lin_idx, ps->pool);
gu_buf_push(conts->items, PgfItem*, NULL);
size_t n_funs = gu_seq_length(ccat->linrefs);
for (size_t j = 0; j < n_funs; j++) {
PgfProduction prod = gu_null_variant;
PgfProductionApply* new_papp =
gu_new_variant(PGF_PRODUCTION_APPLY,
PgfProductionApply,
&prod, pool);
new_papp->fun = gu_seq_get(ccat->linrefs, PgfCncFun*, j);
new_papp->args = args;
PgfItem* item = gu_new(PgfItem, ps->pool);
item->args = args;
item->inside_prob += ccat->viterbi_prob;
item->conts = conts;
item->prod = prod;
item->curr_sym = gu_null_variant;
item->sym_idx = 0;
item->alt_idx = 0;
item->alt = 0;
conts->ref_count++;
pgf_item_set_curr_symbol(item, ps->pool);
#ifdef PGF_COUNTS_DEBUG
ps->cont_full_count++;
ps->item_full_count++;
ps->item_real_count++;
#endif
size_t n_prods = gu_seq_length(ccat->prods);
for (size_t i = 0; i < n_prods; i++) {
PgfProduction prod =
gu_seq_get(ccat->prods, PgfProduction, i);
PgfItem* item =
pgf_new_item(ps, conts, prod);
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
}
}
}
}
@@ -2133,7 +2192,7 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence,
// Begin parsing a sentence with the specified category
PgfParsing* ps =
pgf_parsing_init(concr, typ->cid, 0, sentence, heuristics, callbacks, NULL, err, pool, out_pool);
pgf_parsing_init(concr, typ->cid, sentence, heuristics, callbacks, NULL, err, pool, out_pool);
if (ps == NULL) {
return NULL;
}
@@ -2178,7 +2237,7 @@ pgf_parse_with_oracle(PgfConcr* concr, PgfType* typ,
// Begin parsing a sentence with the specified category
PgfCallbacksMap* callbacks = pgf_new_callbacks_map(concr, out_pool);
PgfParsing* ps =
pgf_parsing_init(concr, typ->cid, 0, sentence, -1, callbacks, oracle, err, pool, out_pool);
pgf_parsing_init(concr, typ->cid, sentence, -1, callbacks, oracle, err, pool, out_pool);
if (ps == NULL) {
return NULL;
}
@@ -2240,7 +2299,7 @@ pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence,
PgfCallbacksMap* callbacks =
pgf_new_callbacks_map(concr, pool);
PgfParsing* ps =
pgf_parsing_init(concr, type->cid, 0, sentence, -1.0, callbacks, NULL, err, pool, pool);
pgf_parsing_init(concr, type->cid, sentence, -1.0, callbacks, NULL, err, pool, pool);
if (ps == NULL) {
return NULL;
}

View File

@@ -77,6 +77,23 @@ pgf_print_abstract(PgfAbstr* abstr, GuOut* out, GuExn* err)
gu_puts("}\n", out, err);
}
PGF_INTERNAL void
pgf_print_fid(int fid, GuOut* out, GuExn* err)
{
if (fid == -1)
gu_puts("CString", out, err);
else if (fid == -2)
gu_puts("CInt", out, err);
else if (fid == -3)
gu_puts("CFloat", out, err);
else if (fid == -4)
gu_puts("CVar", out, err);
else if (fid == -5)
gu_puts("CStart", out, err);
else
gu_printf(out, err, "C%d", fid);
}
static void
pgf_print_productions(GuMapItor* fn, const void* key, void* value,
GuExn* err)
@@ -91,7 +108,9 @@ pgf_print_productions(GuMapItor* fn, const void* key, void* value,
for (size_t i = 0; i < n_prods; i++) {
PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i);
gu_printf(out,err," C%d -> ",fid);
gu_puts(" ", out, err);
pgf_print_fid(fid, out, err);
gu_puts(" -> ", out, err);
GuVariantInfo i = gu_variant_open(prod);
switch (i.tag) {
@@ -111,18 +130,20 @@ pgf_print_productions(GuMapItor* fn, const void* key, void* value,
if (k > 0)
gu_putc(' ',out,err);
PgfCCat *hypo = gu_seq_get(arg.hypos, PgfCCat*, k);
gu_printf(out,err,"C%d",hypo->fid);
pgf_print_fid(hypo->fid, out, err);
}
}
gu_printf(out,err,"C%d",arg.ccat->fid);
pgf_print_fid(arg.ccat->fid, out, err);
}
gu_printf(out,err,"]\n");
break;
}
case PGF_PRODUCTION_COERCE: {
PgfProductionCoerce* pcoerce = i.data;
gu_printf(out,err,"_[C%d]\n",pcoerce->coerce->fid);
gu_puts("_[", out, err);
pgf_print_fid(pcoerce->coerce->fid, out, err);
gu_puts("]\n", out, err);
break;
}
default:
@@ -142,17 +163,13 @@ pgf_print_lindefs(GuMapItor* fn, const void* key, void* value,
GuOut *out = clo->out;
if (ccat->lindefs != NULL) {
gu_printf(out,err," C%d -> ",fid);
size_t n_lindefs = gu_seq_length(ccat->lindefs);
for (size_t i = 0; i < n_lindefs; i++) {
if (i > 0) gu_putc(' ', out, err);
PgfCncFun* fun = gu_seq_get(ccat->lindefs, PgfCncFun*, i);
gu_printf(out,err,"F%d",fun->funid);
gu_puts(" ",out,err);
pgf_print_fid(fid, out, err);
gu_printf(out,err," -> F%d[CVar]\n",fun->funid);
}
gu_putc('\n', out,err);
}
}
@@ -166,17 +183,13 @@ pgf_print_linrefs(GuMapItor* fn, const void* key, void* value,
GuOut *out = clo->out;
if (ccat->linrefs != NULL) {
gu_puts(" ",out,err);
size_t n_linrefs = gu_seq_length(ccat->linrefs);
for (size_t i = 0; i < n_linrefs; i++) {
if (i > 0) gu_putc(' ', out, err);
PgfCncFun* fun = gu_seq_get(ccat->linrefs, PgfCncFun*, i);
gu_printf(out,err,"F%d",fun->funid);
gu_printf(out,err," CVar -> F%d[",fun->funid);
pgf_print_fid(fid, out, err);
gu_puts("]\n", out, err);
}
gu_printf(out,err," -> C%d\n",fid);
}
}
@@ -321,7 +334,11 @@ pgf_print_cnccat(GuMapItor* fn, const void* key, void* value,
PgfCCat *start = gu_seq_get(cnccat->cats, PgfCCat*, 0);
PgfCCat *end = gu_seq_get(cnccat->cats, PgfCCat*, gu_seq_length(cnccat->cats)-1);
gu_printf(out, err, " range [C%d..C%d]\n", start->fid, end->fid);
gu_puts(" range [", out, err);
pgf_print_fid(start->fid, out, err);
gu_puts("..", out, err);
pgf_print_fid(end->fid, out, err);
gu_puts("]\n", out, err);
gu_puts(" labels [", out, err);
for (size_t i = 0; i < cnccat->n_lins; i++) {

View File

@@ -47,9 +47,9 @@ ppCnc name cnc =
text "productions" $$
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions cnc), prod <- Set.toList set]) $$
text "lindefs" $$
nest 2 (vcat (map ppFunList (IntMap.toList (lindefs cnc)))) $$
nest 2 (vcat (concatMap ppLinDefs (IntMap.toList (lindefs cnc)))) $$
text "linrefs" $$
nest 2 (vcat (map ppFunList (IntMap.toList (linrefs cnc)))) $$
nest 2 (vcat (concatMap ppLinRefs (IntMap.toList (linrefs cnc)))) $$
text "lin" $$
nest 2 (vcat (map ppCncFun (assocs (cncfuns cnc)))) $$
text "sequences" $$
@@ -75,8 +75,11 @@ ppProduction (fid,PConst _ _ ss) =
ppCncFun (funid,CncFun fun arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
ppFunList (fid,funids) =
ppFId fid <+> text "->" <+> hcat (punctuate comma (map ppFunId funids))
ppLinDefs (fid,funids) =
[ppFId fid <+> text "->" <+> ppFunId funid <> brackets (ppFId fidVar) | funid <- funids]
ppLinRefs (fid,funids) =
[ppFId fidVar <+> text "->" <+> ppFunId funid <> brackets (ppFId fid) | funid <- funids]
ppSeq (seqid,seq) =
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
@@ -109,6 +112,7 @@ ppFId fid
| fid == fidInt = text "CInt"
| fid == fidFloat = text "CFloat"
| fid == fidVar = text "CVar"
| fid == fidStart = text "CStart"
| otherwise = char 'C' <> int fid
ppFunId funid = char 'F' <> int funid