bottom up prediction and maximal chunks

This commit is contained in:
Krasimir Angelov
2022-09-23 11:33:16 +02:00
parent f8cfed15b4
commit cd280272f3
6 changed files with 528 additions and 284 deletions

View File

@@ -209,9 +209,11 @@ struct PGF_INTERNAL_DECL PgfSymbolALLCAPIT {
static const uint8_t tag = 10;
};
struct PGF_INTERNAL_DECL PgfConcrLincat;
struct PGF_INTERNAL_DECL PgfLincatBackref;
struct PGF_INTERNAL_DECL PgfLincatField {
ref<PgfConcrLincat> lincat;
ref<PgfText> name;
ref<Vector<PgfLincatBackref>> backrefs;

View File

@@ -25,178 +25,56 @@ public:
}
};
struct PGF_INTERNAL_DECL PgfParser::Choice {
struct PGF_INTERNAL_DECL PgfParser::Choice
{
ParseItemConts* conts;
size_t id;
prob_t viterbi_prob;
bool is_chunk;
std::vector<Production*> prods;
Choice(size_t id) {
this->id = id;
}
};
std::vector<Item*> items;
std::vector<std::pair<PgfExpr,prob_t>> exprs;
Choice(ParseItemConts* conts, size_t id, prob_t prob)
{
this->conts = conts;
this->id = id;
this->viterbi_prob = prob;
this->is_chunk = true;
}
void trace(State *state);
};
class PGF_INTERNAL_DECL PgfParser::Production
{
public:
static
void predict(Choice *choice, ref<PgfConcrLin> lin, size_t seq_index)
{
size_t n_args = lin->absfun->type->hypos->len;
Production *prod = (Production*)
malloc(sizeof(Production)+sizeof(Choice*)*n_args);
prod->lin = lin;
prod->seq_index = seq_index;
memset(prod->args, 0, sizeof(Choice*)*n_args);
prod->log(choice);
choice->prods.push_back(prod);
}
void log(Choice *res) {
#ifdef PARSER_DEBUG
PgfPrinter printer(NULL,0,NULL);
printer.nprintf(10, "?%ld = ", res->id);
printer.puts(&lin->name);
auto hypos = lin->absfun->type->hypos;
for (size_t i = 0; i < hypos->len; i++) {
if (args[i] == NULL)
printer.efun(&hypos->data[i].type->name);
else
printer.nprintf(10, " ?%ld", args[i]->id);
}
printer.puts("\n");
printer.dump();
#endif
}
void trace(Choice *res);
ref<PgfConcrLin> lin;
size_t seq_index;
Choice *args[];
};
struct PGF_INTERNAL_DECL PgfParser::ItemConts {
struct PGF_INTERNAL_DECL PgfParser::ParseItemConts {
State *state;
std::vector<Item> items;
};
class PGF_INTERNAL_DECL PgfParser::Item
{
public:
static
void combine(State *state, PgfLincatBackref *backref, Choice *choice)
{
ref<PgfSequence> seq =
*vector_elem(backref->lin->seqs, backref->seq_index);
size_t index = backref->seq_index % backref->lin->lincat->fields->len;
ref<PgfLincatField> field = vector_elem(backref->lin->lincat->fields, index);
// state->get_conts(field, 0);
if (backref->dot+1 < seq->syms.len) {
size_t n_args = backref->lin->absfun->type->hypos->len;
Item *item = (Item*)
malloc(sizeof(Item)+sizeof(Choice*)*n_args);
item->lin = backref->lin;
item->seq_index = backref->seq_index;
item->dot = backref->dot+1;
memset(item->args, 0, sizeof(Choice*)*n_args);
ref<PgfSequence> seq =
*vector_elem(item->lin->seqs, backref->seq_index);
PgfSymbol sym = seq->syms.data[backref->dot];
ref<PgfSymbolCat> symcat = ref<PgfSymbolCat>::untagged(sym);
item->args[symcat->d] = choice;
item->log();
} else {
Production::predict(choice, backref->lin, backref->seq_index);
}
}
Production *complete()
{
size_t n_args = lin->absfun->type->hypos->len;
Production *prod = (Production*)
malloc(sizeof(Production)+sizeof(Choice*)*n_args);
prod->lin = lin;
prod->seq_index = seq_index;
memcpy(prod->args, args, sizeof(Choice*)*n_args);
return prod;
}
void log() {
#ifdef PARSER_DEBUG
PgfPrinter printer(NULL,0,NULL);
size_t index = seq_index / lin->lincat->fields->len;
ref<PgfPResult> res = *vector_elem(lin->res, index);
ref<PgfDTyp> ty = lin->absfun->type;
if (res->vars != 0) {
printer.lvar_ranges(res->vars);
printer.puts(" . ");
}
printer.efun(&ty->name);
printer.puts("(");
printer.lparam(ref<PgfLParam>::from_ptr(&res->param));
printer.puts(") -> ");
printer.efun(&lin->name);
printer.puts("[");
size_t n_args = lin->args->len / lin->res->len;
for (size_t i = 0; i < n_args; i++) {
if (i > 0)
printer.puts(",");
if (args[i] == NULL)
printer.parg(vector_elem(ty->hypos, i)->type,
vector_elem(lin->args, index*n_args + i));
else
printer.nprintf(10, "?%ld", args[i]->id);
}
printer.nprintf(10, "]; %ld : ", seq_index % lin->lincat->fields->len);
ref<PgfSequence> seq = *vector_elem(lin->seqs, seq_index);
for (size_t i = 0; i < seq->syms.len; i++) {
if (i > 0)
printer.puts(" ");
if (i == dot)
printer.puts(". ");
printer.symbol(*vector_elem(&seq->syms, i));
}
printer.puts("\n");
printer.dump();
#endif
}
private:
ItemConts *conts;
ref<PgfConcrLin> lin;
size_t seq_index;
size_t dot;
Choice *args[];
ref<PgfLincatField> field;
std::vector<ParseItem> items;
};
class PGF_INTERNAL_DECL PgfParser::State
{
public:
ItemConts *get_conts(ref<PgfLincatField> field, size_t value)
ParseItemConts *get_conts(ref<PgfLincatField> field, size_t value)
{
ItemConts *conts;
ParseItemConts *conts;
CFGCat cfg_cat = {field, value};
auto itr1 = contss.find(cfg_cat);
if (itr1 == contss.end()) {
conts = new ItemConts();
conts = new ParseItemConts();
conts->state = this;
contss.insert(std::pair<CFGCat,ItemConts*>(cfg_cat, conts));
conts->field = field;
contss.insert(std::pair<CFGCat,ParseItemConts*>(cfg_cat, conts));
} else {
conts = itr1->second;
}
@@ -207,78 +85,345 @@ public:
size_t start, end;
State *prev, *next;
std::map<CFGCat,ItemConts*> contss;
std::map<ItemConts*,Choice*> choices;
std::priority_queue<PgfParser::Result*,std::vector<PgfParser::Result*>,PgfParser::ResultComparator> queue;
prob_t viterbi_prob;
class ResultComparator : std::less<Item*> {
public:
bool operator()(Item* &lhs, Item* &rhs) const
{
return lhs->get_prob() > rhs->get_prob();
}
};
std::map<CFGCat,ParseItemConts*> contss;
std::map<ParseItemConts*,Choice*> choices;
std::priority_queue<Item*,std::vector<Item*>,ResultComparator> queue;
};
class PgfParser::ResultExpr : public Result
class PGF_INTERNAL_DECL PgfParser::ParseItem : public Item
{
public:
ResultExpr(Production *prod)
public:
void* operator new(size_t size, size_t n_args)
{
this->inside_prob = prod->lin->absfun->prob;
this->outside_prob = prod->lin->lincat->abscat->prob;
this->prod = prod;
this->arg_index = 0;
size += sizeof(Choice*)*n_args;
void *p = malloc(size);
if (p) memset(p, 0, size);
return p;
}
virtual prob_t prob()
ParseItem(ParseItemConts *conts, ref<PgfConcrLin> lin, size_t seq_index)
{
return inside_prob+outside_prob;
this->outside_prob = lin->lincat->abscat->prob;
this->inside_prob = lin->absfun->prob;
this->conts = conts;
this->lin = lin;
this->seq_index = seq_index;
this->dot = lin->seqs->data[seq_index]->syms.len;
}
virtual PgfExpr expr(PgfUnmarshaller *u)
ParseItem(ParseItemConts *conts, PgfLincatBackref *backref,
size_t d, Choice *choice)
{
return u->efun(&prod->lin->name);
this->outside_prob = backref->lin->lincat->abscat->prob;
this->inside_prob = backref->lin->absfun->prob + choice->viterbi_prob;
this->conts = conts;
this->lin = backref->lin;
this->seq_index = backref->seq_index;
this->dot = backref->dot+1;
this->args[d] = choice;
}
virtual void proceed(PgfParser *parser, PgfUnmarshaller *u)
void bu_predict(PgfLincatBackref *backref, Choice *choice)
{
ref<PgfSequence> seq =
*vector_elem(backref->lin->seqs, backref->seq_index);
PgfSymbol sym = seq->syms.data[backref->dot];
ref<PgfSymbolCat> symcat = ref<PgfSymbolCat>::untagged(sym);
size_t index = backref->seq_index % backref->lin->lincat->fields->len;
ref<PgfLincatField> field = vector_elem(backref->lin->lincat->fields, index);
ParseItemConts *conts1 = conts->state->get_conts(field, 0);
size_t n_args = backref->lin->absfun->type->hypos->len;
conts->state->queue.push(new(n_args) ParseItem(conts1, backref,
symcat->d, choice));
}
void complete(PgfParser *parser, ref<PgfSequence> seq)
{
// the last child as a non-chunk
size_t dot = seq->syms.len;
while (dot > 0) {
dot--;
PgfSymbol sym = *vector_elem(&seq->syms,dot);
if (ref<PgfSymbol>::get_tag(sym) == PgfSymbolCat::tag) {
auto sym_cat = ref<PgfSymbolCat>::untagged(sym);
Choice *last = args[sym_cat->d];
if (last != NULL) {
if (last->conts == conts)
continue;
last->is_chunk = false;
}
}
break;
}
// Create a new choice
Choice *choice;
auto itr2 = parser->after->choices.find(conts);
if (itr2 == parser->after->choices.end()) {
if (parser->after->choices.empty()) {
parser->after->viterbi_prob = conts->state->viterbi_prob+inside_prob+outside_prob;
}
choice = new Choice(conts, ++parser->last_choice_id, inside_prob);
choice->trace(parser->after);
parser->after->choices.insert(std::pair<ParseItemConts*,Choice*>(conts, choice));
} else {
choice = itr2->second;
}
// Create a new production
size_t n_args = lin->absfun->type->hypos->len;
Production *prod = (Production*)
malloc(sizeof(Production)+sizeof(Choice*)*n_args);
prod->lin = lin;
prod->seq_index = seq_index;
memcpy(prod->args, args, sizeof(Choice*)*n_args);
prod->trace(choice);
choice->prods.push_back(prod);
// Bottom up prediction if it has not been done already
if (itr2 == parser->after->choices.end()) {
for (size_t i = 0; i < conts->field->backrefs->len; i++) {
ref<PgfLincatBackref> backref = vector_elem(conts->field->backrefs, i);
bu_predict(backref,choice);
}
}
}
void symbol(PgfParser *parser, PgfSymbol sym) {
}
virtual bool proceed(PgfParser *parser, PgfUnmarshaller *u)
{
ref<PgfSequence> seq = lin->seqs->data[seq_index];
if (dot >= seq->syms.len) {
complete(parser, seq);
} else {
PgfSymbol sym = *vector_elem(&seq->syms,dot);
symbol(parser, sym);
}
return true;
}
virtual void combine(PgfParser *parser, ParseItemConts *conts, PgfExpr expr, prob_t prob, PgfUnmarshaller *u)
{
}
virtual void print1(PgfPrinter *printer, State *state, PgfMarshaller *m)
{
#ifdef PARSER_DEBUG
printer->nprintf(32,"%ld-%ld; ", conts->state->end, state->start);
size_t index = seq_index / lin->lincat->fields->len;
ref<PgfPResult> res = *vector_elem(lin->res, index);
ref<PgfDTyp> ty = lin->absfun->type;
if (res->vars != 0) {
printer->lvar_ranges(res->vars);
printer->puts(" . ");
}
printer->efun(&ty->name);
printer->puts("(");
printer->lparam(ref<PgfLParam>::from_ptr(&res->param));
printer->puts(") -> ");
printer->efun(&lin->name);
printer->puts("[");
size_t n_args = lin->args->len / lin->res->len;
for (size_t i = 0; i < n_args; i++) {
if (i > 0)
printer->puts(",");
if (args[i] == NULL)
printer->parg(vector_elem(ty->hypos, i)->type,
vector_elem(lin->args, index*n_args + i));
else
printer->nprintf(10, "?%ld", args[i]->id);
}
printer->nprintf(10, "]; %ld : ", seq_index % lin->lincat->fields->len);
ref<PgfSequence> seq = *vector_elem(lin->seqs, seq_index);
for (size_t i = 0; i < seq->syms.len; i++) {
if (i > 0)
printer->puts(" ");
if (dot == i)
printer->puts(". ");
printer->symbol(*vector_elem(&seq->syms, i));
}
if (dot == seq->syms.len)
printer->puts(" . ");
#endif
}
virtual void print2(PgfPrinter *printer, State *state, PgfMarshaller *m)
{
}
virtual PgfExpr get_expr(PgfUnmarshaller *u)
{
return 0;
}
private:
prob_t inside_prob;
prob_t outside_prob;
Production *prod;
size_t arg_index;
ParseItemConts *conts;
ref<PgfConcrLin> lin;
size_t seq_index;
size_t dot;
Choice *args[];
};
class PgfParser::ResultMeta : public Result
class PgfParser::ExprItem : public Item
{
public:
ResultMeta(State *state,
PgfExpr arg, prob_t prob,
ResultMeta *next)
ExprItem(Choice *parent, Production *prod, prob_t outside_prob, PgfUnmarshaller *u)
{
this->inside_prob = prob + (next ? next->inside_prob : 0);
this->state = state;
this->arg = arg;
this->next = next;
}
this->parent = parent;
this->outside_prob = outside_prob;
this->inside_prob = prod->lin->absfun->prob;
this->prod = prod;
this->arg_index = 0;
this->expr = u->efun(&prod->lin->name);
virtual prob_t prob()
{
return inside_prob;
}
virtual PgfExpr expr(PgfUnmarshaller *u)
{
ResultMeta *res = this;
PgfExpr expr = u->emeta(0);
while (res->arg != 0) {
PgfExpr expr1 = u->eapp(expr, res->arg);
u->free_ref(expr);
expr = expr1;
res = res->next;
size_t n_args = prod->lin->absfun->type->hypos->len;
for (size_t i = 0; i < n_args; i++) {
if (prod->args[i] != NULL)
this->inside_prob += prod->args[i]->viterbi_prob;
}
}
ExprItem(ExprItem *prev, PgfExpr arg, prob_t prob, PgfUnmarshaller *u)
{
this->parent = prev->parent;
this->outside_prob = prev->outside_prob;
this->inside_prob = prev->inside_prob;
this->prod = prev->prod;
this->arg_index = prev->arg_index + 1;
this->expr = u->eapp(prev->expr,arg);
this->inside_prob -= prod->args[prev->arg_index]->viterbi_prob;
this->inside_prob += prob;
}
virtual bool proceed(PgfParser *parser, PgfUnmarshaller *u)
{
size_t n_args = prod->lin->absfun->type->hypos->len;
while (arg_index < n_args) {
Choice *choice = prod->args[arg_index];
if (choice != NULL) {
choice->items.push_back(this);
if (choice->items.size() == 1) {
for (auto prod : choice->prods) {
parser->fetch_state->queue.push(new ExprItem(choice,prod,get_prob(),u));
}
} else {
for (auto ep : choice->exprs) {
combine(parser,choice->conts,ep.first,ep.second,u);
}
}
return true;
}
PgfExpr arg = u->emeta(0);
expr = u->eapp(expr,arg);
u->free_ref(arg);
arg_index++;
}
parent->exprs.push_back(std::pair<PgfExpr,prob_t>(expr,inside_prob));
for (auto item : parent->items) {
item->combine(parser,parent->conts,expr,inside_prob,u);
}
return true;
}
virtual void combine(PgfParser *parser, ParseItemConts *conts, PgfExpr expr, prob_t prob, PgfUnmarshaller *u)
{
parser->fetch_state->queue.push(new ExprItem(this,expr,prob,u));
}
virtual void print1(PgfPrinter *printer, State *state, PgfMarshaller *m)
{
#ifdef PARSER_DEBUG
parent->items[0]->print1(printer,state,m);
printer->puts(" ");
size_t n_args = prod->lin->absfun->type->hypos->len;
if (n_args > 0)
printer->puts("(");
m->match_expr(printer,expr);
#endif
}
virtual void print2(PgfPrinter *printer, State *state, PgfMarshaller *m)
{
#ifdef PARSER_DEBUG
size_t n_args = prod->lin->absfun->type->hypos->len;
for (size_t i = arg_index; i < n_args; i++) {
if (prod->args[i])
printer->nprintf(10," ?%ld",prod->args[i]->id);
else
printer->puts(" ?");
}
if (n_args > 0)
printer->puts(")");
parent->items[0]->print2(printer,state,m);
#endif
}
virtual PgfExpr get_expr(PgfUnmarshaller *u)
{
return expr;
}
virtual void proceed(PgfParser *parser, PgfUnmarshaller *u)
private:
Choice *parent;
Production *prod;
size_t arg_index;
PgfExpr expr;
};
class PgfParser::MetaItem : public Item
{
public:
MetaItem(State *state,
PgfExpr arg,
prob_t inside_prob,
MetaItem *next)
{
this->outside_prob = state->viterbi_prob;
this->inside_prob = inside_prob;
this->state = state;
this->arg = arg;
this->next = next;
}
virtual bool proceed(PgfParser *parser, PgfUnmarshaller *u)
{
if (state->prev == NULL)
return false;
if (state->choices.size() == 0) {
State *prev = state;
while (prev->prev != NULL && prev->choices.size() == 0) {
@@ -290,36 +435,131 @@ public:
token->size = size;
memcpy(token->text,parser->sentence->text+prev->end,size);
token->text[size] = 0;
PgfExpr expr = u->elit(u->lstr(token));
prev->queue.push(new ResultMeta(prev,
expr, 0,
this));
prev->queue.push(new MetaItem(prev, expr,
inside_prob,
this));
} else {
for (auto it : state->choices) {
ItemConts *conts = it.first;
ParseItemConts *conts = it.first;
Choice *choice = it.second;
for (Production *prod : choice->prods) {
PgfExpr expr = u->efun(&prod->lin->name);
prob_t prob = prod->lin->absfun->prob +
prod->lin->lincat->abscat->prob;
conts->state->queue.push(new ResultMeta(conts->state,
expr, prob,
this));
if (!choice->is_chunk)
continue;
choice->items.push_back(this);
if (choice->items.size() == 1) {
prob_t prob = conts->state->viterbi_prob+inside_prob;
for (Production *prod : choice->prods) {
conts->state->queue.push(new ExprItem(choice,
prod, prob+prod->lin->lincat->abscat->prob, u));
}
} else {
for (auto ep : choice->exprs) {
combine(parser,conts,ep.first,ep.second,u);
}
}
}
}
return false;
}
virtual void combine(PgfParser *parser, ParseItemConts *conts, PgfExpr expr, prob_t prob, PgfUnmarshaller *u)
{
conts->state->queue.push(new MetaItem(conts->state,
expr,
this->inside_prob+conts->field->lincat->abscat->prob+prob,
this));
}
virtual void print1(PgfPrinter *printer, State *state, PgfMarshaller *m)
{
#ifdef PARSER_DEBUG
printer->nprintf(10, "<%ld> ?", state->end);
#endif
}
virtual void print2(PgfPrinter *printer, State *state, PgfMarshaller *m)
{
#ifdef PARSER_DEBUG
MetaItem *res = this;
while (res->arg != 0) {
printer->puts(" ");
m->match_expr(printer, res->arg);
res = res->next;
}
#endif
}
virtual PgfExpr get_expr(PgfUnmarshaller *u)
{
MetaItem *res = this;
PgfExpr expr = u->emeta(0);
while (res->arg != 0) {
PgfExpr expr1 = u->eapp(expr, res->arg);
u->free_ref(expr);
expr = expr1;
res = res->next;
}
return expr;
}
private:
prob_t inside_prob;
State *state;
PgfExpr arg;
ResultMeta *next;
MetaItem *next;
};
PgfParser::PgfParser(ref<PgfConcrLincat> start, PgfText *sentence)
void PgfParser::Item::trace(State *state, PgfMarshaller *m)
{
#ifdef PARSER_DEBUG
PgfPrinter printer(NULL,0,m);
printer.puts("[");
print1(&printer, state, m);
print2(&printer, state, m);
printer.nprintf(40,"; %f+%f=%f]\n",inside_prob,outside_prob,inside_prob+outside_prob);
printer.dump();
#endif
}
void PgfParser::Choice::trace(State *state)
{
#ifdef PARSER_DEBUG
PgfPrinter printer(NULL,0,NULL);
printer.nprintf(40,"[%ld-%ld; ", conts->state->end, state->start);
printer.efun(&conts->field->lincat->name);
printer.puts("; ");
printer.puts(conts->field->name);
printer.nprintf(40,"; ?%ld; %f]\n", id, viterbi_prob);
printer.dump();
#endif
}
void PgfParser::Production::trace(PgfParser::Choice *res) {
#ifdef PARSER_DEBUG
PgfPrinter printer(NULL,0,NULL);
printer.nprintf(10, "?%ld = ", res->id);
printer.puts(&lin->name);
printer.puts("[");
auto hypos = lin->absfun->type->hypos;
for (size_t i = 0; i < hypos->len; i++) {
if (i > 0)
printer.puts(",");
if (args[i] == NULL)
printer.efun(&hypos->data[i].type->name);
else
printer.nprintf(10, "?%ld", args[i]->id);
}
printer.puts("]\n");
printer.dump();
#endif
}
PgfParser::PgfParser(ref<PgfConcrLincat> start, PgfText *sentence, PgfMarshaller *m)
{
this->start = start;
this->sentence = textdup(sentence);
@@ -327,6 +567,7 @@ PgfParser::PgfParser(ref<PgfConcrLincat> start, PgfText *sentence)
this->before = NULL;
this->after = NULL;
this->fetch_state = NULL;
this->m = m;
}
void PgfParser::space(size_t start, size_t end, PgfExn* err)
@@ -344,6 +585,7 @@ void PgfParser::space(size_t start, size_t end, PgfExn* err)
before->end = end;
before->prev = prev;
before->next = next;
before->viterbi_prob = prev ? prev->viterbi_prob : 0;
if (prev != NULL) prev->next = before;
if (next != NULL) next->prev = before;
@@ -351,11 +593,6 @@ void PgfParser::space(size_t start, size_t end, PgfExn* err)
before = next;
before->end = end;
}
if (end == sentence->size) {
fetch_state = after;
fetch_state->queue.push(new ResultMeta(after,0,0,NULL));
}
}
void PgfParser::start_matches(size_t end, PgfExn* err)
@@ -373,6 +610,7 @@ void PgfParser::start_matches(size_t end, PgfExn* err)
after->end = end;
after->prev = prev;
after->next = next;
after->viterbi_prob = prev ? prev->viterbi_prob : 0;
if (prev != NULL) prev->next = after;
if (next != NULL) next->prev = after;
@@ -386,35 +624,27 @@ void PgfParser::match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)
size_t index = seq_index % lin->lincat->fields->len;
ref<PgfLincatField> field = vector_elem(lin->lincat->fields, index);
ItemConts *conts = before->get_conts(field, 0);
Choice *choice;
auto itr2 = after->choices.find(conts);
if (itr2 == after->choices.end()) {
choice = new Choice(++last_choice_id);
after->choices.insert(std::pair<ItemConts*,Choice*>(conts, choice));
} else {
choice = itr2->second;
}
Production::predict(choice,lin,seq_index);
/*
if (itr2 == after->choices.end()) {
for (size_t i = 0; i < field->backrefs->len; i++) {
PgfLincatBackref *backref = vector_elem(field->backrefs, i);
Item::combine(before, backref, choice);
}
}*/
ParseItemConts *conts = before->get_conts(field, 0);
before->queue.push(new(0) ParseItem(conts, lin, seq_index));
}
void PgfParser::end_matches(size_t end, PgfExn* err)
{
if (end == sentence->size) {
fetch_state = after;
fetch_state->queue.push(new ResultMeta(after,0,0,NULL));
while (!before->queue.empty()) {
Item *item = before->queue.top();
before->queue.pop();
item->trace(after,m);
item->proceed(this,NULL);
}
}
void PgfParser::prepare()
{
fetch_state = after;
fetch_state->queue.push(new MetaItem(after,0,0,NULL));
}
PgfExpr PgfParser::fetch(PgfDB *db, PgfUnmarshaller *u, prob_t *prob)
{
DB_scope scope(db, READER_SCOPE);
@@ -423,29 +653,25 @@ PgfExpr PgfParser::fetch(PgfDB *db, PgfUnmarshaller *u, prob_t *prob)
fetch_state = fetch_state->next;
}
if (fetch_state == NULL) {
return 0;
}
while (fetch_state->prev != NULL) {
if (!fetch_state->queue.empty()) {
Result *res = fetch_state->queue.top();
while (fetch_state != NULL) {
while (!fetch_state->queue.empty()) {
Item *item = fetch_state->queue.top();
fetch_state->queue.pop();
res->proceed(this,u);
item->trace(after,m);
if (!item->proceed(this,u)) {
if (fetch_state->prev == NULL) {
*prob = item->get_prob();
return item->get_expr(u);
}
break;
}
}
fetch_state = fetch_state->prev;
}
if (fetch_state->queue.empty()) {
return 0;
}
Result *res = fetch_state->queue.top();
fetch_state->queue.pop();
*prob = res->prob();
return res->expr(u);
return 0;
}
PgfParser::~PgfParser()

View File

@@ -3,13 +3,14 @@
class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum {
public:
PgfParser(ref<PgfConcrLincat> start, PgfText *sentence);
PgfParser(ref<PgfConcrLincat> start, PgfText *sentence, PgfMarshaller *m);
void space(size_t start, size_t end, PgfExn* err);
void start_matches(size_t end, PgfExn* err);
void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err);
void end_matches(size_t end, PgfExn* err);
void prepare();
PgfExpr fetch(PgfDB *db, PgfUnmarshaller *u, prob_t *prob);
virtual ~PgfParser();
@@ -17,35 +18,40 @@ public:
private:
class CFGCat;
class State;
class Item;
class ItemConts;
class Choice;
class Production;
class Result {
class ParseItemConts;
class Item {
public:
virtual prob_t prob() = 0;
virtual PgfExpr expr(PgfUnmarshaller *u) = 0;
virtual void proceed(PgfParser *parser, PgfUnmarshaller *u) = 0;
prob_t get_prob() { return inside_prob + outside_prob; };
virtual bool proceed(PgfParser *parser, PgfUnmarshaller *u) = 0;
virtual void combine(PgfParser *parser, ParseItemConts *conts, PgfExpr expr, prob_t inside_prob, PgfUnmarshaller *u) = 0;
virtual void print1(PgfPrinter *printer, State *state, PgfMarshaller *m) = 0;
virtual void print2(PgfPrinter *printer, State *state, PgfMarshaller *m) = 0;
virtual PgfExpr get_expr(PgfUnmarshaller *u) = 0;
void trace(State *state, PgfMarshaller *m);
protected:
prob_t inside_prob;
prob_t outside_prob;
};
class ResultExpr;
class ResultMeta;
class ResultComparator : std::less<Result*> {
public:
bool operator()(Result* &lhs, Result* &rhs) const
{
return lhs->prob() > rhs->prob();
}
};
class ParseItem;
class ExprItem;
class MetaItem;
ref<PgfConcrLincat> start;
PgfText *sentence;
size_t last_choice_id;
State *before, *after, *fetch_state;
PgfMarshaller *m;
};
#endif

View File

@@ -1492,22 +1492,23 @@ public:
this->n_lindefs = n_lindefs;
this->n_linrefs = n_linrefs;
ref<Vector<PgfLincatField>> db_fields = vector_new<PgfLincatField>(n_fields);
for (size_t i = 0; i < n_fields; i++) {
ref<PgfText> name = textdup_db(fields[i]);
vector_elem(db_fields, i)->name = name;
vector_elem(db_fields, i)->backrefs = 0;
}
ref<PgfConcrLincat> lincat = PgfDB::malloc<PgfConcrLincat>(abscat->name.size+1);
memcpy(&lincat->name, &abscat->name, sizeof(PgfText)+abscat->name.size+1);
lincat->abscat = abscat;
lincat->args = args;
lincat->res = res;
lincat->seqs = seqs;
lincat->fields = db_fields;
lincat->n_lindefs = n_lindefs;
ref<Vector<PgfLincatField>> db_fields = vector_new<PgfLincatField>(n_fields);
for (size_t i = 0; i < n_fields; i++) {
ref<PgfText> name = textdup_db(fields[i]);
vector_elem(db_fields, i)->lincat = lincat;
vector_elem(db_fields, i)->name = name;
vector_elem(db_fields, i)->backrefs = 0;
}
lincat->fields = db_fields;
this->container = lincat.tagged();
build->build(this, err);
@@ -1540,6 +1541,7 @@ public:
ref<PgfConcrLin> lin = PgfDB::malloc<PgfConcrLin>(absfun->name.size+1);
memcpy(&lin->name, &absfun->name, sizeof(PgfText)+absfun->name.size+1);
lin->absfun = absfun;
lin->lincat = lincat;
lin->args = args;
lin->res = res;
lin->seqs = seqs;
@@ -2383,10 +2385,11 @@ PgfExprEnum *pgf_parse(PgfDB *db, PgfConcrRevision revision,
if (u.lincat == 0)
return 0;
PgfParser *parser = new PgfParser(u.lincat, sentence);
PgfParser *parser = new PgfParser(u.lincat, sentence, m);
phrasetable_lookup_cohorts(concr->phrasetable,
sentence, case_sensitive,
parser, err);
parser->prepare();
return parser;
} PGF_API_END

View File

@@ -667,7 +667,7 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
{
ref<PgfConcrLincat> lincat = read_name(&PgfConcrLincat::name);
lincat->abscat = namespace_lookup(abstract->cats, &lincat->name);
lincat->fields = read_vector(&PgfReader::read_lincat_field);
lincat->fields = read_lincat_fields(lincat);
lincat->n_lindefs = read_len();
lincat->args = read_vector(&PgfReader::read_parg);
lincat->res = read_vector(&PgfReader::read_presult2);
@@ -675,10 +675,17 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
return lincat;
}
void PgfReader::read_lincat_field(ref<PgfLincatField> field)
ref<Vector<PgfLincatField>> PgfReader::read_lincat_fields(ref<PgfConcrLincat> lincat)
{
field->name = read_text();
field->backrefs = 0;
size_t len = read_len();
ref<Vector<PgfLincatField>> fields = vector_new<PgfLincatField>(len);
for (size_t i = 0; i < len; i++) {
ref<PgfLincatField> field = vector_elem(fields,i);
field->lincat = lincat;
field->name = read_text();
field->backrefs = 0;
}
return fields;
}
ref<PgfConcrLin> PgfReader::read_lin()

View File

@@ -69,7 +69,7 @@ public:
void merge_abstract(ref<PgfAbstr> abstract);
ref<PgfConcrLincat> read_lincat();
void read_lincat_field(ref<PgfLincatField> field);
ref<Vector<PgfLincatField>> read_lincat_fields(ref<PgfConcrLincat> lincat);
ref<PgfLParam> read_lparam();
void read_variable_range(ref<PgfVariableRange> var_info);
void read_parg(ref<PgfPArg> parg);