diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index 185e9af05..94c2cbee2 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -209,9 +209,11 @@ struct PGF_INTERNAL_DECL PgfSymbolALLCAPIT { static const uint8_t tag = 10; }; +struct PGF_INTERNAL_DECL PgfConcrLincat; struct PGF_INTERNAL_DECL PgfLincatBackref; struct PGF_INTERNAL_DECL PgfLincatField { + ref lincat; ref name; ref> backrefs; diff --git a/src/runtime/c/pgf/parser.cxx b/src/runtime/c/pgf/parser.cxx index 71f47d4f2..8889f04fe 100644 --- a/src/runtime/c/pgf/parser.cxx +++ b/src/runtime/c/pgf/parser.cxx @@ -25,178 +25,56 @@ public: } }; -struct PGF_INTERNAL_DECL PgfParser::Choice { +struct PGF_INTERNAL_DECL PgfParser::Choice +{ + ParseItemConts* conts; size_t id; + prob_t viterbi_prob; + bool is_chunk; std::vector prods; - - Choice(size_t id) { - this->id = id; - } -}; + std::vector items; + std::vector> exprs; + Choice(ParseItemConts* conts, size_t id, prob_t prob) + { + this->conts = conts; + this->id = id; + this->viterbi_prob = prob; + this->is_chunk = true; + } + + void trace(State *state); +}; class PGF_INTERNAL_DECL PgfParser::Production { public: - static - void predict(Choice *choice, ref lin, size_t seq_index) - { - size_t n_args = lin->absfun->type->hypos->len; - - Production *prod = (Production*) - malloc(sizeof(Production)+sizeof(Choice*)*n_args); - prod->lin = lin; - prod->seq_index = seq_index; - memset(prod->args, 0, sizeof(Choice*)*n_args); - - prod->log(choice); - choice->prods.push_back(prod); - } - - void log(Choice *res) { -#ifdef PARSER_DEBUG - PgfPrinter printer(NULL,0,NULL); - printer.nprintf(10, "?%ld = ", res->id); - printer.puts(&lin->name); - - auto hypos = lin->absfun->type->hypos; - for (size_t i = 0; i < hypos->len; i++) { - if (args[i] == NULL) - printer.efun(&hypos->data[i].type->name); - else - printer.nprintf(10, " ?%ld", args[i]->id); - } - printer.puts("\n"); - printer.dump(); -#endif - } + void trace(Choice *res); ref lin; size_t seq_index; Choice *args[]; }; -struct PGF_INTERNAL_DECL PgfParser::ItemConts { +struct PGF_INTERNAL_DECL PgfParser::ParseItemConts { State *state; - std::vector items; -}; - -class PGF_INTERNAL_DECL PgfParser::Item -{ -public: - static - void combine(State *state, PgfLincatBackref *backref, Choice *choice) - { - ref seq = - *vector_elem(backref->lin->seqs, backref->seq_index); - - size_t index = backref->seq_index % backref->lin->lincat->fields->len; - ref field = vector_elem(backref->lin->lincat->fields, index); - -// state->get_conts(field, 0); - if (backref->dot+1 < seq->syms.len) { - size_t n_args = backref->lin->absfun->type->hypos->len; - - Item *item = (Item*) - malloc(sizeof(Item)+sizeof(Choice*)*n_args); - item->lin = backref->lin; - item->seq_index = backref->seq_index; - item->dot = backref->dot+1; - - memset(item->args, 0, sizeof(Choice*)*n_args); - ref seq = - *vector_elem(item->lin->seqs, backref->seq_index); - PgfSymbol sym = seq->syms.data[backref->dot]; - ref symcat = ref::untagged(sym); - item->args[symcat->d] = choice; - - item->log(); - } else { - Production::predict(choice, backref->lin, backref->seq_index); - } - } - - Production *complete() - { - size_t n_args = lin->absfun->type->hypos->len; - - Production *prod = (Production*) - malloc(sizeof(Production)+sizeof(Choice*)*n_args); - prod->lin = lin; - prod->seq_index = seq_index; - memcpy(prod->args, args, sizeof(Choice*)*n_args); - - return prod; - } - - void log() { -#ifdef PARSER_DEBUG - PgfPrinter printer(NULL,0,NULL); - - size_t index = seq_index / lin->lincat->fields->len; - ref res = *vector_elem(lin->res, index); - ref ty = lin->absfun->type; - - if (res->vars != 0) { - printer.lvar_ranges(res->vars); - printer.puts(" . "); - } - - printer.efun(&ty->name); - printer.puts("("); - printer.lparam(ref::from_ptr(&res->param)); - printer.puts(") -> "); - - printer.efun(&lin->name); - printer.puts("["); - size_t n_args = lin->args->len / lin->res->len; - for (size_t i = 0; i < n_args; i++) { - if (i > 0) - printer.puts(","); - - if (args[i] == NULL) - printer.parg(vector_elem(ty->hypos, i)->type, - vector_elem(lin->args, index*n_args + i)); - else - printer.nprintf(10, "?%ld", args[i]->id); - } - - printer.nprintf(10, "]; %ld : ", seq_index % lin->lincat->fields->len); - ref seq = *vector_elem(lin->seqs, seq_index); - for (size_t i = 0; i < seq->syms.len; i++) { - if (i > 0) - printer.puts(" "); - if (i == dot) - printer.puts(". "); - printer.symbol(*vector_elem(&seq->syms, i)); - } - printer.puts("\n"); - - printer.dump(); -#endif - } - - -private: - ItemConts *conts; - ref lin; - size_t seq_index; - size_t dot; - Choice *args[]; + ref field; + std::vector items; }; class PGF_INTERNAL_DECL PgfParser::State { public: - ItemConts *get_conts(ref field, size_t value) + ParseItemConts *get_conts(ref field, size_t value) { - ItemConts *conts; + ParseItemConts *conts; CFGCat cfg_cat = {field, value}; auto itr1 = contss.find(cfg_cat); if (itr1 == contss.end()) { - conts = new ItemConts(); + conts = new ParseItemConts(); conts->state = this; - contss.insert(std::pair(cfg_cat, conts)); + conts->field = field; + contss.insert(std::pair(cfg_cat, conts)); } else { conts = itr1->second; } @@ -207,78 +85,345 @@ public: size_t start, end; State *prev, *next; - std::map contss; - std::map choices; - std::priority_queue,PgfParser::ResultComparator> queue; + prob_t viterbi_prob; + + class ResultComparator : std::less { + public: + bool operator()(Item* &lhs, Item* &rhs) const + { + return lhs->get_prob() > rhs->get_prob(); + } + }; + + std::map contss; + std::map choices; + std::priority_queue,ResultComparator> queue; }; - -class PgfParser::ResultExpr : public Result +class PGF_INTERNAL_DECL PgfParser::ParseItem : public Item { -public: - ResultExpr(Production *prod) +public: + void* operator new(size_t size, size_t n_args) { - this->inside_prob = prod->lin->absfun->prob; - this->outside_prob = prod->lin->lincat->abscat->prob; - this->prod = prod; - this->arg_index = 0; + size += sizeof(Choice*)*n_args; + void *p = malloc(size); + if (p) memset(p, 0, size); + return p; } - virtual prob_t prob() + ParseItem(ParseItemConts *conts, ref lin, size_t seq_index) { - return inside_prob+outside_prob; + this->outside_prob = lin->lincat->abscat->prob; + this->inside_prob = lin->absfun->prob; + this->conts = conts; + this->lin = lin; + this->seq_index = seq_index; + this->dot = lin->seqs->data[seq_index]->syms.len; } - virtual PgfExpr expr(PgfUnmarshaller *u) + ParseItem(ParseItemConts *conts, PgfLincatBackref *backref, + size_t d, Choice *choice) { - return u->efun(&prod->lin->name); + this->outside_prob = backref->lin->lincat->abscat->prob; + this->inside_prob = backref->lin->absfun->prob + choice->viterbi_prob; + this->conts = conts; + this->lin = backref->lin; + this->seq_index = backref->seq_index; + this->dot = backref->dot+1; + this->args[d] = choice; } - virtual void proceed(PgfParser *parser, PgfUnmarshaller *u) + void bu_predict(PgfLincatBackref *backref, Choice *choice) { + ref seq = + *vector_elem(backref->lin->seqs, backref->seq_index); + PgfSymbol sym = seq->syms.data[backref->dot]; + ref symcat = ref::untagged(sym); + + size_t index = backref->seq_index % backref->lin->lincat->fields->len; + ref field = vector_elem(backref->lin->lincat->fields, index); + ParseItemConts *conts1 = conts->state->get_conts(field, 0); + + size_t n_args = backref->lin->absfun->type->hypos->len; + conts->state->queue.push(new(n_args) ParseItem(conts1, backref, + symcat->d, choice)); + } + + void complete(PgfParser *parser, ref seq) + { + // the last child as a non-chunk + size_t dot = seq->syms.len; + while (dot > 0) { + dot--; + PgfSymbol sym = *vector_elem(&seq->syms,dot); + if (ref::get_tag(sym) == PgfSymbolCat::tag) { + auto sym_cat = ref::untagged(sym); + Choice *last = args[sym_cat->d]; + if (last != NULL) { + if (last->conts == conts) + continue; + last->is_chunk = false; + } + } + break; + } + + // Create a new choice + Choice *choice; + auto itr2 = parser->after->choices.find(conts); + if (itr2 == parser->after->choices.end()) { + if (parser->after->choices.empty()) { + parser->after->viterbi_prob = conts->state->viterbi_prob+inside_prob+outside_prob; + } + choice = new Choice(conts, ++parser->last_choice_id, inside_prob); + choice->trace(parser->after); + parser->after->choices.insert(std::pair(conts, choice)); + } else { + choice = itr2->second; + } + + // Create a new production + size_t n_args = lin->absfun->type->hypos->len; + + Production *prod = (Production*) + malloc(sizeof(Production)+sizeof(Choice*)*n_args); + prod->lin = lin; + prod->seq_index = seq_index; + memcpy(prod->args, args, sizeof(Choice*)*n_args); + + prod->trace(choice); + choice->prods.push_back(prod); + + // Bottom up prediction if it has not been done already + if (itr2 == parser->after->choices.end()) { + for (size_t i = 0; i < conts->field->backrefs->len; i++) { + ref backref = vector_elem(conts->field->backrefs, i); + bu_predict(backref,choice); + } + } + } + + void symbol(PgfParser *parser, PgfSymbol sym) { + } + + virtual bool proceed(PgfParser *parser, PgfUnmarshaller *u) + { + ref seq = lin->seqs->data[seq_index]; + + if (dot >= seq->syms.len) { + complete(parser, seq); + } else { + PgfSymbol sym = *vector_elem(&seq->syms,dot); + symbol(parser, sym); + } + + return true; + } + + virtual void combine(PgfParser *parser, ParseItemConts *conts, PgfExpr expr, prob_t prob, PgfUnmarshaller *u) + { + } + + virtual void print1(PgfPrinter *printer, State *state, PgfMarshaller *m) + { +#ifdef PARSER_DEBUG + printer->nprintf(32,"%ld-%ld; ", conts->state->end, state->start); + + size_t index = seq_index / lin->lincat->fields->len; + ref res = *vector_elem(lin->res, index); + ref ty = lin->absfun->type; + + if (res->vars != 0) { + printer->lvar_ranges(res->vars); + printer->puts(" . "); + } + + printer->efun(&ty->name); + printer->puts("("); + printer->lparam(ref::from_ptr(&res->param)); + printer->puts(") -> "); + + printer->efun(&lin->name); + printer->puts("["); + size_t n_args = lin->args->len / lin->res->len; + for (size_t i = 0; i < n_args; i++) { + if (i > 0) + printer->puts(","); + + if (args[i] == NULL) + printer->parg(vector_elem(ty->hypos, i)->type, + vector_elem(lin->args, index*n_args + i)); + else + printer->nprintf(10, "?%ld", args[i]->id); + } + + printer->nprintf(10, "]; %ld : ", seq_index % lin->lincat->fields->len); + ref seq = *vector_elem(lin->seqs, seq_index); + for (size_t i = 0; i < seq->syms.len; i++) { + if (i > 0) + printer->puts(" "); + if (dot == i) + printer->puts(". "); + printer->symbol(*vector_elem(&seq->syms, i)); + } + + if (dot == seq->syms.len) + printer->puts(" . "); +#endif + } + + virtual void print2(PgfPrinter *printer, State *state, PgfMarshaller *m) + { + } + + virtual PgfExpr get_expr(PgfUnmarshaller *u) + { + return 0; } private: - prob_t inside_prob; - prob_t outside_prob; - - Production *prod; - size_t arg_index; + ParseItemConts *conts; + ref lin; + size_t seq_index; + size_t dot; + Choice *args[]; }; -class PgfParser::ResultMeta : public Result +class PgfParser::ExprItem : public Item { public: - ResultMeta(State *state, - PgfExpr arg, prob_t prob, - ResultMeta *next) + ExprItem(Choice *parent, Production *prod, prob_t outside_prob, PgfUnmarshaller *u) { - this->inside_prob = prob + (next ? next->inside_prob : 0); - this->state = state; - this->arg = arg; - this->next = next; - } + this->parent = parent; + this->outside_prob = outside_prob; + this->inside_prob = prod->lin->absfun->prob; + this->prod = prod; + this->arg_index = 0; + this->expr = u->efun(&prod->lin->name); - virtual prob_t prob() - { - return inside_prob; - } - - virtual PgfExpr expr(PgfUnmarshaller *u) - { - ResultMeta *res = this; - PgfExpr expr = u->emeta(0); - while (res->arg != 0) { - PgfExpr expr1 = u->eapp(expr, res->arg); - u->free_ref(expr); - expr = expr1; - res = res->next; + size_t n_args = prod->lin->absfun->type->hypos->len; + for (size_t i = 0; i < n_args; i++) { + if (prod->args[i] != NULL) + this->inside_prob += prod->args[i]->viterbi_prob; } + } + + ExprItem(ExprItem *prev, PgfExpr arg, prob_t prob, PgfUnmarshaller *u) + { + this->parent = prev->parent; + this->outside_prob = prev->outside_prob; + this->inside_prob = prev->inside_prob; + this->prod = prev->prod; + this->arg_index = prev->arg_index + 1; + this->expr = u->eapp(prev->expr,arg); + + this->inside_prob -= prod->args[prev->arg_index]->viterbi_prob; + this->inside_prob += prob; + } + + virtual bool proceed(PgfParser *parser, PgfUnmarshaller *u) + { + size_t n_args = prod->lin->absfun->type->hypos->len; + while (arg_index < n_args) { + Choice *choice = prod->args[arg_index]; + + if (choice != NULL) { + choice->items.push_back(this); + + if (choice->items.size() == 1) { + for (auto prod : choice->prods) { + parser->fetch_state->queue.push(new ExprItem(choice,prod,get_prob(),u)); + } + } else { + for (auto ep : choice->exprs) { + combine(parser,choice->conts,ep.first,ep.second,u); + } + } + return true; + } + + PgfExpr arg = u->emeta(0); + expr = u->eapp(expr,arg); + u->free_ref(arg); + arg_index++; + } + + parent->exprs.push_back(std::pair(expr,inside_prob)); + for (auto item : parent->items) { + item->combine(parser,parent->conts,expr,inside_prob,u); + } + + return true; + } + + virtual void combine(PgfParser *parser, ParseItemConts *conts, PgfExpr expr, prob_t prob, PgfUnmarshaller *u) + { + parser->fetch_state->queue.push(new ExprItem(this,expr,prob,u)); + } + + virtual void print1(PgfPrinter *printer, State *state, PgfMarshaller *m) + { +#ifdef PARSER_DEBUG + parent->items[0]->print1(printer,state,m); + + printer->puts(" "); + + size_t n_args = prod->lin->absfun->type->hypos->len; + if (n_args > 0) + printer->puts("("); + m->match_expr(printer,expr); +#endif + } + + virtual void print2(PgfPrinter *printer, State *state, PgfMarshaller *m) + { +#ifdef PARSER_DEBUG + size_t n_args = prod->lin->absfun->type->hypos->len; + for (size_t i = arg_index; i < n_args; i++) { + if (prod->args[i]) + printer->nprintf(10," ?%ld",prod->args[i]->id); + else + printer->puts(" ?"); + } + if (n_args > 0) + printer->puts(")"); + + parent->items[0]->print2(printer,state,m); +#endif + } + + virtual PgfExpr get_expr(PgfUnmarshaller *u) + { return expr; } - virtual void proceed(PgfParser *parser, PgfUnmarshaller *u) +private: + Choice *parent; + Production *prod; + size_t arg_index; + PgfExpr expr; +}; + +class PgfParser::MetaItem : public Item +{ +public: + MetaItem(State *state, + PgfExpr arg, + prob_t inside_prob, + MetaItem *next) { + this->outside_prob = state->viterbi_prob; + this->inside_prob = inside_prob; + this->state = state; + this->arg = arg; + this->next = next; + } + + virtual bool proceed(PgfParser *parser, PgfUnmarshaller *u) + { + if (state->prev == NULL) + return false; + if (state->choices.size() == 0) { State *prev = state; while (prev->prev != NULL && prev->choices.size() == 0) { @@ -290,36 +435,131 @@ public: token->size = size; memcpy(token->text,parser->sentence->text+prev->end,size); token->text[size] = 0; - + PgfExpr expr = u->elit(u->lstr(token)); - prev->queue.push(new ResultMeta(prev, - expr, 0, - this)); + prev->queue.push(new MetaItem(prev, expr, + inside_prob, + this)); } else { for (auto it : state->choices) { - ItemConts *conts = it.first; + ParseItemConts *conts = it.first; Choice *choice = it.second; - for (Production *prod : choice->prods) { - PgfExpr expr = u->efun(&prod->lin->name); - prob_t prob = prod->lin->absfun->prob + - prod->lin->lincat->abscat->prob; - conts->state->queue.push(new ResultMeta(conts->state, - expr, prob, - this)); + if (!choice->is_chunk) + continue; + + choice->items.push_back(this); + + if (choice->items.size() == 1) { + prob_t prob = conts->state->viterbi_prob+inside_prob; + for (Production *prod : choice->prods) { + conts->state->queue.push(new ExprItem(choice, + prod, prob+prod->lin->lincat->abscat->prob, u)); + } + } else { + for (auto ep : choice->exprs) { + combine(parser,conts,ep.first,ep.second,u); + } } } } + return false; + } + + virtual void combine(PgfParser *parser, ParseItemConts *conts, PgfExpr expr, prob_t prob, PgfUnmarshaller *u) + { + conts->state->queue.push(new MetaItem(conts->state, + expr, + this->inside_prob+conts->field->lincat->abscat->prob+prob, + this)); + } + + virtual void print1(PgfPrinter *printer, State *state, PgfMarshaller *m) + { +#ifdef PARSER_DEBUG + printer->nprintf(10, "<%ld> ?", state->end); +#endif + } + + virtual void print2(PgfPrinter *printer, State *state, PgfMarshaller *m) + { +#ifdef PARSER_DEBUG + MetaItem *res = this; + while (res->arg != 0) { + printer->puts(" "); + m->match_expr(printer, res->arg); + res = res->next; + } +#endif + } + + virtual PgfExpr get_expr(PgfUnmarshaller *u) + { + MetaItem *res = this; + PgfExpr expr = u->emeta(0); + while (res->arg != 0) { + PgfExpr expr1 = u->eapp(expr, res->arg); + u->free_ref(expr); + expr = expr1; + res = res->next; + } + return expr; } private: - prob_t inside_prob; State *state; PgfExpr arg; - ResultMeta *next; + MetaItem *next; }; -PgfParser::PgfParser(ref start, PgfText *sentence) +void PgfParser::Item::trace(State *state, PgfMarshaller *m) +{ +#ifdef PARSER_DEBUG + PgfPrinter printer(NULL,0,m); + printer.puts("["); + print1(&printer, state, m); + print2(&printer, state, m); + printer.nprintf(40,"; %f+%f=%f]\n",inside_prob,outside_prob,inside_prob+outside_prob); + printer.dump(); +#endif +} + +void PgfParser::Choice::trace(State *state) +{ +#ifdef PARSER_DEBUG + PgfPrinter printer(NULL,0,NULL); + printer.nprintf(40,"[%ld-%ld; ", conts->state->end, state->start); + printer.efun(&conts->field->lincat->name); + printer.puts("; "); + printer.puts(conts->field->name); + printer.nprintf(40,"; ?%ld; %f]\n", id, viterbi_prob); + printer.dump(); +#endif +} + +void PgfParser::Production::trace(PgfParser::Choice *res) { +#ifdef PARSER_DEBUG + PgfPrinter printer(NULL,0,NULL); + printer.nprintf(10, "?%ld = ", res->id); + printer.puts(&lin->name); + + printer.puts("["); + auto hypos = lin->absfun->type->hypos; + for (size_t i = 0; i < hypos->len; i++) { + if (i > 0) + printer.puts(","); + + if (args[i] == NULL) + printer.efun(&hypos->data[i].type->name); + else + printer.nprintf(10, "?%ld", args[i]->id); + } + printer.puts("]\n"); + printer.dump(); +#endif +} + +PgfParser::PgfParser(ref start, PgfText *sentence, PgfMarshaller *m) { this->start = start; this->sentence = textdup(sentence); @@ -327,6 +567,7 @@ PgfParser::PgfParser(ref start, PgfText *sentence) this->before = NULL; this->after = NULL; this->fetch_state = NULL; + this->m = m; } void PgfParser::space(size_t start, size_t end, PgfExn* err) @@ -344,6 +585,7 @@ void PgfParser::space(size_t start, size_t end, PgfExn* err) before->end = end; before->prev = prev; before->next = next; + before->viterbi_prob = prev ? prev->viterbi_prob : 0; if (prev != NULL) prev->next = before; if (next != NULL) next->prev = before; @@ -351,11 +593,6 @@ void PgfParser::space(size_t start, size_t end, PgfExn* err) before = next; before->end = end; } - - if (end == sentence->size) { - fetch_state = after; - fetch_state->queue.push(new ResultMeta(after,0,0,NULL)); - } } void PgfParser::start_matches(size_t end, PgfExn* err) @@ -373,6 +610,7 @@ void PgfParser::start_matches(size_t end, PgfExn* err) after->end = end; after->prev = prev; after->next = next; + after->viterbi_prob = prev ? prev->viterbi_prob : 0; if (prev != NULL) prev->next = after; if (next != NULL) next->prev = after; @@ -386,35 +624,27 @@ void PgfParser::match(ref lin, size_t seq_index, PgfExn* err) size_t index = seq_index % lin->lincat->fields->len; ref field = vector_elem(lin->lincat->fields, index); - ItemConts *conts = before->get_conts(field, 0); - - Choice *choice; - auto itr2 = after->choices.find(conts); - if (itr2 == after->choices.end()) { - choice = new Choice(++last_choice_id); - after->choices.insert(std::pair(conts, choice)); - } else { - choice = itr2->second; - } - - Production::predict(choice,lin,seq_index); -/* - if (itr2 == after->choices.end()) { - for (size_t i = 0; i < field->backrefs->len; i++) { - PgfLincatBackref *backref = vector_elem(field->backrefs, i); - Item::combine(before, backref, choice); - } - }*/ + ParseItemConts *conts = before->get_conts(field, 0); + before->queue.push(new(0) ParseItem(conts, lin, seq_index)); } void PgfParser::end_matches(size_t end, PgfExn* err) { - if (end == sentence->size) { - fetch_state = after; - fetch_state->queue.push(new ResultMeta(after,0,0,NULL)); + while (!before->queue.empty()) { + Item *item = before->queue.top(); + before->queue.pop(); + + item->trace(after,m); + item->proceed(this,NULL); } } +void PgfParser::prepare() +{ + fetch_state = after; + fetch_state->queue.push(new MetaItem(after,0,0,NULL)); +} + PgfExpr PgfParser::fetch(PgfDB *db, PgfUnmarshaller *u, prob_t *prob) { DB_scope scope(db, READER_SCOPE); @@ -423,29 +653,25 @@ PgfExpr PgfParser::fetch(PgfDB *db, PgfUnmarshaller *u, prob_t *prob) fetch_state = fetch_state->next; } - if (fetch_state == NULL) { - return 0; - } - - while (fetch_state->prev != NULL) { - if (!fetch_state->queue.empty()) { - Result *res = fetch_state->queue.top(); + while (fetch_state != NULL) { + while (!fetch_state->queue.empty()) { + Item *item = fetch_state->queue.top(); fetch_state->queue.pop(); - res->proceed(this,u); + + item->trace(after,m); + if (!item->proceed(this,u)) { + if (fetch_state->prev == NULL) { + *prob = item->get_prob(); + return item->get_expr(u); + } + break; + } } fetch_state = fetch_state->prev; } - if (fetch_state->queue.empty()) { - return 0; - } - - Result *res = fetch_state->queue.top(); - fetch_state->queue.pop(); - *prob = res->prob(); - - return res->expr(u); + return 0; } PgfParser::~PgfParser() diff --git a/src/runtime/c/pgf/parser.h b/src/runtime/c/pgf/parser.h index b0410fe60..c3caa27f2 100644 --- a/src/runtime/c/pgf/parser.h +++ b/src/runtime/c/pgf/parser.h @@ -3,13 +3,14 @@ class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum { public: - PgfParser(ref start, PgfText *sentence); + PgfParser(ref start, PgfText *sentence, PgfMarshaller *m); void space(size_t start, size_t end, PgfExn* err); void start_matches(size_t end, PgfExn* err); void match(ref lin, size_t seq_index, PgfExn* err); void end_matches(size_t end, PgfExn* err); + void prepare(); PgfExpr fetch(PgfDB *db, PgfUnmarshaller *u, prob_t *prob); virtual ~PgfParser(); @@ -17,35 +18,40 @@ public: private: class CFGCat; class State; - class Item; - class ItemConts; class Choice; class Production; - class Result { + class ParseItemConts; + + class Item { public: - virtual prob_t prob() = 0; - virtual PgfExpr expr(PgfUnmarshaller *u) = 0; - virtual void proceed(PgfParser *parser, PgfUnmarshaller *u) = 0; + prob_t get_prob() { return inside_prob + outside_prob; }; + + virtual bool proceed(PgfParser *parser, PgfUnmarshaller *u) = 0; + virtual void combine(PgfParser *parser, ParseItemConts *conts, PgfExpr expr, prob_t inside_prob, PgfUnmarshaller *u) = 0; + virtual void print1(PgfPrinter *printer, State *state, PgfMarshaller *m) = 0; + virtual void print2(PgfPrinter *printer, State *state, PgfMarshaller *m) = 0; + virtual PgfExpr get_expr(PgfUnmarshaller *u) = 0; + + void trace(State *state, PgfMarshaller *m); + + protected: + prob_t inside_prob; + prob_t outside_prob; }; - class ResultExpr; - class ResultMeta; - - class ResultComparator : std::less { - public: - bool operator()(Result* &lhs, Result* &rhs) const - { - return lhs->prob() > rhs->prob(); - } - }; + class ParseItem; + class ExprItem; + class MetaItem; ref start; PgfText *sentence; size_t last_choice_id; - + State *before, *after, *fetch_state; + + PgfMarshaller *m; }; #endif diff --git a/src/runtime/c/pgf/pgf.cxx b/src/runtime/c/pgf/pgf.cxx index 028edee46..75508d27a 100644 --- a/src/runtime/c/pgf/pgf.cxx +++ b/src/runtime/c/pgf/pgf.cxx @@ -1492,22 +1492,23 @@ public: this->n_lindefs = n_lindefs; this->n_linrefs = n_linrefs; - ref> db_fields = vector_new(n_fields); - for (size_t i = 0; i < n_fields; i++) { - ref name = textdup_db(fields[i]); - vector_elem(db_fields, i)->name = name; - vector_elem(db_fields, i)->backrefs = 0; - } - ref lincat = PgfDB::malloc(abscat->name.size+1); memcpy(&lincat->name, &abscat->name, sizeof(PgfText)+abscat->name.size+1); lincat->abscat = abscat; lincat->args = args; lincat->res = res; lincat->seqs = seqs; - lincat->fields = db_fields; lincat->n_lindefs = n_lindefs; + ref> db_fields = vector_new(n_fields); + for (size_t i = 0; i < n_fields; i++) { + ref name = textdup_db(fields[i]); + vector_elem(db_fields, i)->lincat = lincat; + vector_elem(db_fields, i)->name = name; + vector_elem(db_fields, i)->backrefs = 0; + } + lincat->fields = db_fields; + this->container = lincat.tagged(); build->build(this, err); @@ -1540,6 +1541,7 @@ public: ref lin = PgfDB::malloc(absfun->name.size+1); memcpy(&lin->name, &absfun->name, sizeof(PgfText)+absfun->name.size+1); lin->absfun = absfun; + lin->lincat = lincat; lin->args = args; lin->res = res; lin->seqs = seqs; @@ -2383,10 +2385,11 @@ PgfExprEnum *pgf_parse(PgfDB *db, PgfConcrRevision revision, if (u.lincat == 0) return 0; - PgfParser *parser = new PgfParser(u.lincat, sentence); + PgfParser *parser = new PgfParser(u.lincat, sentence, m); phrasetable_lookup_cohorts(concr->phrasetable, sentence, case_sensitive, parser, err); + parser->prepare(); return parser; } PGF_API_END diff --git a/src/runtime/c/pgf/reader.cxx b/src/runtime/c/pgf/reader.cxx index be71d1dec..b27de8e6a 100644 --- a/src/runtime/c/pgf/reader.cxx +++ b/src/runtime/c/pgf/reader.cxx @@ -667,7 +667,7 @@ ref PgfReader::read_lincat() { ref lincat = read_name(&PgfConcrLincat::name); lincat->abscat = namespace_lookup(abstract->cats, &lincat->name); - lincat->fields = read_vector(&PgfReader::read_lincat_field); + lincat->fields = read_lincat_fields(lincat); lincat->n_lindefs = read_len(); lincat->args = read_vector(&PgfReader::read_parg); lincat->res = read_vector(&PgfReader::read_presult2); @@ -675,10 +675,17 @@ ref PgfReader::read_lincat() return lincat; } -void PgfReader::read_lincat_field(ref field) +ref> PgfReader::read_lincat_fields(ref lincat) { - field->name = read_text(); - field->backrefs = 0; + size_t len = read_len(); + ref> fields = vector_new(len); + for (size_t i = 0; i < len; i++) { + ref field = vector_elem(fields,i); + field->lincat = lincat; + field->name = read_text(); + field->backrefs = 0; + } + return fields; } ref PgfReader::read_lin() diff --git a/src/runtime/c/pgf/reader.h b/src/runtime/c/pgf/reader.h index 2e853dd1d..cf2c67e16 100644 --- a/src/runtime/c/pgf/reader.h +++ b/src/runtime/c/pgf/reader.h @@ -69,7 +69,7 @@ public: void merge_abstract(ref abstract); ref read_lincat(); - void read_lincat_field(ref field); + ref> read_lincat_fields(ref lincat); ref read_lparam(); void read_variable_range(ref var_info); void read_parg(ref parg);