first rudimentary version of a parser

This commit is contained in:
Krasimir Angelov
2022-09-16 12:34:46 +02:00
parent bcb1076dda
commit 3e0cc91a02
24 changed files with 1009 additions and 477 deletions

View File

@@ -26,6 +26,8 @@ libpgf_la_SOURCES = \
pgf/typechecker.h \
pgf/linearizer.cxx \
pgf/linearizer.h \
pgf/parser.cxx \
pgf/parser.h \
pgf/graphviz.cxx \
pgf/graphviz.h \
pgf/data.cxx \

View File

@@ -47,9 +47,9 @@ void PgfConcr::release(ref<PgfConcr> concr)
void PgfConcrLincat::release(ref<PgfConcrLincat> lincat)
{
for (size_t i = 0; i < lincat->fields->len; i++) {
text_db_release(*vector_elem(lincat->fields, i));
PgfLincatField::release(vector_elem(lincat->fields, i));
}
Vector<ref<PgfText>>::release(lincat->fields);
Vector<PgfLincatField>::release(lincat->fields);
for (size_t i = 0; i < lincat->args->len; i++) {
PgfLParam::release(vector_elem(lincat->args, i)->param);
@@ -66,6 +66,13 @@ void PgfConcrLincat::release(ref<PgfConcrLincat> lincat)
PgfDB::free(lincat, lincat->name.size+1);
}
void PgfLincatField::release(ref<PgfLincatField> field)
{
text_db_release(field->name);
if (field->backrefs != 0)
Vector<PgfLincatBackref>::release(field->backrefs);
}
void PgfLParam::release(ref<PgfLParam> param)
{
PgfDB::free(param, param->n_terms*sizeof(param->terms[0]));

View File

@@ -209,17 +209,25 @@ struct PGF_INTERNAL_DECL PgfSymbolALLCAPIT {
static const uint8_t tag = 10;
};
struct PGF_INTERNAL_DECL PgfLincatBackref;
struct PGF_INTERNAL_DECL PgfLincatField {
ref<PgfText> name;
ref<Vector<PgfLincatBackref>> backrefs;
static void release(ref<PgfLincatField> field);
};
struct PGF_INTERNAL_DECL PgfConcrLincat {
static const uint8_t tag = 0;
ref<PgfAbsCat> abscat;
ref<Vector<ref<PgfText>>> fields;
size_t n_lindefs;
ref<Vector<PgfPArg>> args;
ref<Vector<ref<PgfPResult>>> res;
ref<Vector<ref<PgfSequence>>> seqs;
ref<Vector<PgfLincatField>> fields;
PgfText name;
@@ -230,6 +238,7 @@ struct PGF_INTERNAL_DECL PgfConcrLin {
static const uint8_t tag = 1;
ref<PgfAbsFun> absfun;
ref<PgfConcrLincat> lincat;
ref<Vector<PgfPArg>> args;
ref<Vector<ref<PgfPResult>>> res;
@@ -240,6 +249,12 @@ struct PGF_INTERNAL_DECL PgfConcrLin {
static void release(ref<PgfConcrLin> lin);
};
struct PGF_INTERNAL_DECL PgfLincatBackref {
ref<PgfConcrLin> lin;
size_t seq_index;
size_t dot;
};
struct PGF_INTERNAL_DECL PgfConcrPrintname {
ref<PgfText> printname;
PgfText name;

View File

@@ -1,79 +0,0 @@
#ifndef HEAP_H
#define HEAP_H
template <class A>
class PGF_INTERNAL_DECL Heap {
public:
Heap() {
len = 0;
avail = 0;
values = NULL;
}
~Heap() { free(values); }
void push(A value) {
if (len >= avail) {
avail = get_next_padovan(len+1);
A *new_values = (A *) realloc(values, sizeof(A)*avail);
if (new_values == NULL)
throw pgf_systemerror(errno);
values = new_values;
}
siftdown(value, 0, len);
len++;
}
bool is_empty() { return (len == 0); }
A top() { return values[0]; }
A pop() {
A top = values[0];
siftup(&values[len-1],0);
len--;
return top;
}
private:
size_t len;
size_t avail;
A *values;
void siftdown(A value, size_t startpos, size_t pos) {
while (pos > startpos) {
size_t parentpos = (pos - 1) >> 1;
A parent = values[parentpos];
if (value >= parent)
break;
values[pos] = parent;
pos = parentpos;
}
values[pos] = value;
}
void siftup(A *pvalue, size_t pos) {
size_t startpos = pos;
size_t endpos = len;
size_t childpos = 2*pos + 1;
while (childpos < endpos) {
size_t rightpos = childpos + 1;
if (rightpos < endpos &&
values[childpos] >= values[rightpos]) {
childpos = rightpos;
}
values[pos] = values[childpos];
pos = childpos;
childpos = 2*pos + 1;
}
siftdown(*pvalue, startpos, pos);
}
};
#endif

View File

@@ -287,11 +287,7 @@ void PgfLinearizer::TreeLinNode::check_category(PgfLinearizer *linearizer, PgfTe
void PgfLinearizer::TreeLinNode::linearize(PgfLinearizationOutputIface *out, PgfLinearizer *linearizer, size_t lindex)
{
PgfText *cat = &lin->absfun->type->name;
PgfText *field = NULL;
ref<PgfConcrLincat> lincat = namespace_lookup(linearizer->concr->lincats, cat);
if (lincat != 0) {
field = &(**vector_elem(lincat->fields, lindex));
}
PgfText *field = &*(vector_elem(lin->lincat->fields, lindex)->name);
if (linearizer->pre_stack == NULL)
out->begin_phrase(cat, fid, field, &lin->name);
@@ -393,7 +389,7 @@ void PgfLinearizer::TreeLindefNode::linearize_arg(PgfLinearizationOutputIface *o
void PgfLinearizer::TreeLindefNode::linearize(PgfLinearizationOutputIface *out, PgfLinearizer *linearizer, size_t lindex)
{
if (lincat != 0) {
PgfText *field = &(**vector_elem(lincat->fields, lindex));
PgfText *field = &*(vector_elem(lincat->fields, lindex)->name);
if (linearizer->pre_stack == NULL)
out->begin_phrase(&lincat->name, fid, field, linearizer->wild);
else {
@@ -546,7 +542,7 @@ void PgfLinearizer::TreeLitNode::linearize(PgfLinearizationOutputIface *out, Pgf
{
PgfText *field = NULL;
if (lincat != 0) {
field = &(**vector_elem(lincat->fields, lindex));
field = &*(vector_elem(lincat->fields, lindex)->name);
}
linearizer->flush_pre_stack(out, literal);

View File

@@ -98,6 +98,14 @@ class PGF_INTERNAL_DECL PgfLinearizer : public PgfUnmarshaller {
~TreeLitNode() { free(literal); };
};
struct TreeChunksNode : public TreeNode {
TreeChunksNode(PgfLinearizer *linearizer);
virtual bool resolve(PgfLinearizer *linearizer);
virtual void check_category(PgfLinearizer *linearizer, PgfText *cat);
virtual void linearize(PgfLinearizationOutputIface *out, PgfLinearizer *linearizer, size_t lindex);
virtual ref<PgfConcrLincat> get_lincat(PgfLinearizer *linearizer);
};
TreeNode *prev;
TreeNode *next;
TreeNode *args;

View File

@@ -0,0 +1,455 @@
#include "data.h"
#include "printer.h"
#include "parser.h"
#include <type_traits>
#include <map>
#include <vector>
#include <queue>
// #define PARSER_DEBUG
class PGF_INTERNAL_DECL PgfParser::CFGCat {
public:
ref<PgfLincatField> field;
size_t value;
// copy assignment
bool operator<(const CFGCat& other) const
{
if (field < other.field)
return true;
else if (field == other.field)
return (value < other.value);
else
return false;
}
};
struct PGF_INTERNAL_DECL PgfParser::Choice {
size_t id;
std::vector<Production*> prods;
Choice(size_t id) {
this->id = id;
}
};
class PGF_INTERNAL_DECL PgfParser::Production
{
public:
static
void predict(Choice *choice, ref<PgfConcrLin> lin, size_t seq_index)
{
size_t n_args = lin->absfun->type->hypos->len;
Production *prod = (Production*)
malloc(sizeof(Production)+sizeof(Choice*)*n_args);
prod->lin = lin;
prod->seq_index = seq_index;
memset(prod->args, 0, sizeof(Choice*)*n_args);
prod->log(choice);
choice->prods.push_back(prod);
}
void log(Choice *res) {
#ifdef PARSER_DEBUG
PgfPrinter printer(NULL,0,NULL);
printer.nprintf(10, "?%ld = ", res->id);
printer.puts(&lin->name);
auto hypos = lin->absfun->type->hypos;
for (size_t i = 0; i < hypos->len; i++) {
if (args[i] == NULL)
printer.efun(&hypos->data[i].type->name);
else
printer.nprintf(10, " ?%ld", args[i]->id);
}
printer.puts("\n");
printer.dump();
#endif
}
ref<PgfConcrLin> lin;
size_t seq_index;
Choice *args[];
};
struct PGF_INTERNAL_DECL PgfParser::ItemConts {
State *state;
std::vector<Item> items;
};
class PGF_INTERNAL_DECL PgfParser::Item
{
public:
static
void combine(State *state, PgfLincatBackref *backref, Choice *choice)
{
ref<PgfSequence> seq =
*vector_elem(backref->lin->seqs, backref->seq_index);
size_t index = backref->seq_index % backref->lin->lincat->fields->len;
ref<PgfLincatField> field = vector_elem(backref->lin->lincat->fields, index);
// state->get_conts(field, 0);
if (backref->dot+1 < seq->syms.len) {
size_t n_args = backref->lin->absfun->type->hypos->len;
Item *item = (Item*)
malloc(sizeof(Item)+sizeof(Choice*)*n_args);
item->lin = backref->lin;
item->seq_index = backref->seq_index;
item->dot = backref->dot+1;
memset(item->args, 0, sizeof(Choice*)*n_args);
ref<PgfSequence> seq =
*vector_elem(item->lin->seqs, backref->seq_index);
PgfSymbol sym = seq->syms.data[backref->dot];
ref<PgfSymbolCat> symcat = ref<PgfSymbolCat>::untagged(sym);
item->args[symcat->d] = choice;
item->log();
} else {
Production::predict(choice, backref->lin, backref->seq_index);
}
}
Production *complete()
{
size_t n_args = lin->absfun->type->hypos->len;
Production *prod = (Production*)
malloc(sizeof(Production)+sizeof(Choice*)*n_args);
prod->lin = lin;
prod->seq_index = seq_index;
memcpy(prod->args, args, sizeof(Choice*)*n_args);
return prod;
}
void log() {
#ifdef PARSER_DEBUG
PgfPrinter printer(NULL,0,NULL);
size_t index = seq_index / lin->lincat->fields->len;
ref<PgfPResult> res = *vector_elem(lin->res, index);
ref<PgfDTyp> ty = lin->absfun->type;
if (res->vars != 0) {
printer.lvar_ranges(res->vars);
printer.puts(" . ");
}
printer.efun(&ty->name);
printer.puts("(");
printer.lparam(ref<PgfLParam>::from_ptr(&res->param));
printer.puts(") -> ");
printer.efun(&lin->name);
printer.puts("[");
size_t n_args = lin->args->len / lin->res->len;
for (size_t i = 0; i < n_args; i++) {
if (i > 0)
printer.puts(",");
if (args[i] == NULL)
printer.parg(vector_elem(ty->hypos, i)->type,
vector_elem(lin->args, index*n_args + i));
else
printer.nprintf(10, "?%ld", args[i]->id);
}
printer.nprintf(10, "]; %ld : ", seq_index % lin->lincat->fields->len);
ref<PgfSequence> seq = *vector_elem(lin->seqs, seq_index);
for (size_t i = 0; i < seq->syms.len; i++) {
if (i > 0)
printer.puts(" ");
if (i == dot)
printer.puts(". ");
printer.symbol(*vector_elem(&seq->syms, i));
}
printer.puts("\n");
printer.dump();
#endif
}
private:
ItemConts *conts;
ref<PgfConcrLin> lin;
size_t seq_index;
size_t dot;
Choice *args[];
};
class PGF_INTERNAL_DECL PgfParser::State
{
public:
ItemConts *get_conts(ref<PgfLincatField> field, size_t value)
{
ItemConts *conts;
CFGCat cfg_cat = {field, value};
auto itr1 = contss.find(cfg_cat);
if (itr1 == contss.end()) {
conts = new ItemConts();
conts->state = this;
contss.insert(std::pair<CFGCat,ItemConts*>(cfg_cat, conts));
} else {
conts = itr1->second;
}
return conts;
}
public:
size_t start, end;
State *prev, *next;
std::map<CFGCat,ItemConts*> contss;
std::map<ItemConts*,Choice*> choices;
std::priority_queue<PgfParser::Result*,std::vector<PgfParser::Result*>,PgfParser::ResultComparator> queue;
};
class PgfParser::ResultExpr : public Result
{
public:
ResultExpr(Production *prod)
{
this->inside_prob = prod->lin->absfun->prob;
this->outside_prob = prod->lin->lincat->abscat->prob;
this->prod = prod;
this->arg_index = 0;
}
virtual prob_t prob()
{
return inside_prob+outside_prob;
}
virtual PgfExpr expr(PgfUnmarshaller *u)
{
return u->efun(&prod->lin->name);
}
virtual void proceed(PgfParser *parser, PgfUnmarshaller *u)
{
}
private:
prob_t inside_prob;
prob_t outside_prob;
Production *prod;
size_t arg_index;
};
class PgfParser::ResultMeta : public Result
{
public:
ResultMeta(State *state,
PgfExpr arg, prob_t prob,
ResultMeta *next)
{
this->inside_prob = prob + (next ? next->inside_prob : 0);
this->state = state;
this->arg = arg;
this->next = next;
}
virtual prob_t prob()
{
return inside_prob;
}
virtual PgfExpr expr(PgfUnmarshaller *u)
{
ResultMeta *res = this;
PgfExpr expr = u->emeta(0);
while (res->arg != 0) {
PgfExpr expr1 = u->eapp(expr, res->arg);
u->free_ref(expr);
expr = expr1;
res = res->next;
}
return expr;
}
virtual void proceed(PgfParser *parser, PgfUnmarshaller *u)
{
if (state->choices.size() == 0) {
State *prev = state;
while (prev->prev != NULL && prev->choices.size() == 0) {
prev = prev->prev;
}
size_t size = state->start-prev->end;
PgfText *token = (PgfText *) alloca(sizeof(PgfText)+size+1);
token->size = size;
memcpy(token->text,parser->sentence->text+prev->end,size);
token->text[size] = 0;
PgfExpr expr = u->elit(u->lstr(token));
prev->queue.push(new ResultMeta(prev,
expr, 0,
this));
} else {
for (auto it : state->choices) {
ItemConts *conts = it.first;
Choice *choice = it.second;
for (Production *prod : choice->prods) {
PgfExpr expr = u->efun(&prod->lin->name);
prob_t prob = prod->lin->absfun->prob +
prod->lin->lincat->abscat->prob;
conts->state->queue.push(new ResultMeta(conts->state,
expr, prob,
this));
}
}
}
}
private:
prob_t inside_prob;
State *state;
PgfExpr arg;
ResultMeta *next;
};
PgfParser::PgfParser(ref<PgfConcrLincat> start, PgfText *sentence)
{
this->start = start;
this->sentence = textdup(sentence);
this->last_choice_id = 0;
this->before = NULL;
this->after = NULL;
this->fetch_state = NULL;
}
void PgfParser::space(size_t start, size_t end, PgfExn* err)
{
State *prev = NULL;
State *next = before;
while (next != NULL && next->start < start) {
prev = next;
next = next->next;
}
if (next == NULL || next->start != start) {
before = new State();
before->start = start;
before->end = end;
before->prev = prev;
before->next = next;
if (prev != NULL) prev->next = before;
if (next != NULL) next->prev = before;
} else {
before = next;
before->end = end;
}
if (end == sentence->size) {
fetch_state = after;
fetch_state->queue.push(new ResultMeta(after,0,0,NULL));
}
}
void PgfParser::start_matches(size_t end, PgfExn* err)
{
State *prev = NULL;
State *next = before;
while (next != NULL && next->start < end) {
prev = next;
next = next->next;
}
if (next == NULL || next->start != end) {
after = new State();
after->start = end;
after->end = end;
after->prev = prev;
after->next = next;
if (prev != NULL) prev->next = after;
if (next != NULL) next->prev = after;
} else {
after = next;
}
}
void PgfParser::match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)
{
size_t index = seq_index % lin->lincat->fields->len;
ref<PgfLincatField> field = vector_elem(lin->lincat->fields, index);
ItemConts *conts = before->get_conts(field, 0);
Choice *choice;
auto itr2 = after->choices.find(conts);
if (itr2 == after->choices.end()) {
choice = new Choice(++last_choice_id);
after->choices.insert(std::pair<ItemConts*,Choice*>(conts, choice));
} else {
choice = itr2->second;
}
Production::predict(choice,lin,seq_index);
/*
if (itr2 == after->choices.end()) {
for (size_t i = 0; i < field->backrefs->len; i++) {
PgfLincatBackref *backref = vector_elem(field->backrefs, i);
Item::combine(before, backref, choice);
}
}*/
}
void PgfParser::end_matches(size_t end, PgfExn* err)
{
if (end == sentence->size) {
fetch_state = after;
fetch_state->queue.push(new ResultMeta(after,0,0,NULL));
}
}
PgfExpr PgfParser::fetch(PgfDB *db, PgfUnmarshaller *u, prob_t *prob)
{
DB_scope scope(db, READER_SCOPE);
while (fetch_state != NULL && fetch_state->queue.empty()) {
fetch_state = fetch_state->next;
}
if (fetch_state == NULL) {
return 0;
}
while (fetch_state->prev != NULL) {
if (!fetch_state->queue.empty()) {
Result *res = fetch_state->queue.top();
fetch_state->queue.pop();
res->proceed(this,u);
}
fetch_state = fetch_state->prev;
}
if (fetch_state->queue.empty()) {
return 0;
}
Result *res = fetch_state->queue.top();
fetch_state->queue.pop();
*prob = res->prob();
return res->expr(u);
}
PgfParser::~PgfParser()
{
free(sentence);
printf("~PgfParser()\n");
}

View File

@@ -0,0 +1,51 @@
#ifndef PARSER_H
#define PARSER_H
class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum {
public:
PgfParser(ref<PgfConcrLincat> start, PgfText *sentence);
void space(size_t start, size_t end, PgfExn* err);
void start_matches(size_t end, PgfExn* err);
void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err);
void end_matches(size_t end, PgfExn* err);
PgfExpr fetch(PgfDB *db, PgfUnmarshaller *u, prob_t *prob);
virtual ~PgfParser();
private:
class CFGCat;
class State;
class Item;
class ItemConts;
class Choice;
class Production;
class Result {
public:
virtual prob_t prob() = 0;
virtual PgfExpr expr(PgfUnmarshaller *u) = 0;
virtual void proceed(PgfParser *parser, PgfUnmarshaller *u) = 0;
};
class ResultExpr;
class ResultMeta;
class ResultComparator : std::less<Result*> {
public:
bool operator()(Result* &lhs, Result* &rhs) const
{
return lhs->prob() > rhs->prob();
}
};
ref<PgfConcrLincat> start;
PgfText *sentence;
size_t last_choice_id;
State *before, *after, *fetch_state;
};
#endif

View File

@@ -11,6 +11,7 @@
#include "printer.h"
#include "typechecker.h"
#include "linearizer.h"
#include "parser.h"
#include "graphviz.h"
static void
@@ -815,6 +816,35 @@ pgf_is_case_sensitive(ref<PgfConcr> concr)
return true;
}
class PGF_INTERNAL_DECL PgfMorphoScanner : public PgfPhraseScanner {
public:
PgfMorphoScanner(PgfMorphoCallback* callback) {
this->callback = callback;
}
virtual void space(size_t start, size_t end, PgfExn* err)
{
}
virtual void start_matches(size_t end, PgfExn* err)
{
}
virtual void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)
{
ref<PgfLincatField> field =
vector_elem(lin->lincat->fields, seq_index % lin->lincat->fields->len);
callback->fn(callback, &lin->absfun->name, &(*field->name), lin->lincat->abscat->prob+lin->absfun->prob, err);
}
virtual void end_matches(size_t end, PgfExn* err)
{
}
private:
PgfMorphoCallback* callback;
};
PGF_API
void pgf_lookup_morpho(PgfDB *db, PgfConcrRevision cnc_revision,
PgfText *sentence,
@@ -826,13 +856,45 @@ void pgf_lookup_morpho(PgfDB *db, PgfConcrRevision cnc_revision,
bool case_sensitive = pgf_is_case_sensitive(concr);
PgfMorphoScanner scanner(callback);
phrasetable_lookup(concr->phrasetable,
sentence, case_sensitive,
concr->lincats,
callback, err);
&scanner, err);
} PGF_API_END
}
class PGF_INTERNAL_DECL PgfCohortsScanner : public PgfPhraseScanner {
public:
PgfCohortsScanner(PgfCohortsCallback* callback) {
this->callback = callback;
}
virtual void space(size_t start, size_t end, PgfExn* err)
{
match_start = end;
}
virtual void start_matches(size_t match_end, PgfExn* err)
{
}
virtual void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)
{
ref<PgfLincatField> field =
vector_elem(lin->lincat->fields, seq_index % lin->lincat->fields->len);
callback->morpho.fn(&callback->morpho, &lin->absfun->name, &(*field->name), lin->lincat->abscat->prob+lin->absfun->prob, err);
}
virtual void end_matches(size_t match_end, PgfExn* err)
{
callback->fn(callback, match_start, match_end, err);
}
private:
size_t match_start;
PgfCohortsCallback* callback;
};
PGF_API
void pgf_lookup_cohorts(PgfDB *db, PgfConcrRevision cnc_revision,
PgfText *sentence,
@@ -844,10 +906,10 @@ void pgf_lookup_cohorts(PgfDB *db, PgfConcrRevision cnc_revision,
bool case_sensitive = pgf_is_case_sensitive(concr);
PgfCohortsScanner scanner(callback);
phrasetable_lookup_cohorts(concr->phrasetable,
sentence, case_sensitive,
concr->lincats,
callback, err);
&scanner, err);
} PGF_API_END
}
@@ -885,7 +947,7 @@ PGF_API
PgfText *pgf_get_lincat_field_internal(object o, size_t i)
{
ref<PgfConcrLincat> lincat = o;
return &(**vector_elem(lincat->fields, i));
return &*(vector_elem(lincat->fields, i)->name);
}
PGF_API
@@ -903,22 +965,18 @@ PgfText *pgf_print_lindef_internal(PgfPhrasetableIds *seq_ids, object o, size_t
PgfInternalMarshaller m;
PgfPrinter printer(NULL,0,&m);
printer.efun(&lincat->name);
printer.puts(" : ");
ref<PgfPResult> res = *vector_elem(lincat->res, i);
if (res->vars != 0) {
printer.lvar_ranges(res->vars);
printer.puts(" . ");
}
printer.puts(" String(0) -> ");
printer.efun(&lincat->name);
printer.puts("(");
printer.lparam(ref<PgfLParam>::from_ptr(&res->param));
printer.puts(") = [");
printer.puts(") -> ");
printer.efun(&lincat->name);
printer.puts("[String(0)] = [");
size_t n_seqs = lincat->fields->len;
for (size_t j = 0; j < n_seqs; j++) {
@@ -942,20 +1000,19 @@ PgfText *pgf_print_linref_internal(PgfPhrasetableIds *seq_ids, object o, size_t
PgfInternalMarshaller m;
PgfPrinter printer(NULL,0,&m);
printer.efun(&lincat->name);
printer.puts(" : ");
ref<PgfPResult> res = *vector_elem(lincat->res, lincat->n_lindefs+i);
if (res->vars != 0) {
printer.lvar_ranges(res->vars);
printer.puts(" . ");
}
printer.puts("String(0) -> ");
printer.efun(&lincat->name);
printer.puts("[");
printer.efun(&lincat->name);
printer.puts("(");
printer.lparam(vector_elem(lincat->args, lincat->n_lindefs+i)->param);
printer.puts(") -> String(0) = [");
printer.puts(")] = [");
size_t n_seqs = lincat->fields->len;
ref<PgfSequence> seq = *vector_elem(lincat->seqs, lincat->n_lindefs*n_seqs+i);
@@ -970,37 +1027,33 @@ PGF_API
PgfText *pgf_print_lin_internal(PgfPhrasetableIds *seq_ids, object o, size_t i)
{
ref<PgfConcrLin> lin = o;
ref<PgfDTyp> ty = lin->absfun->type;
PgfInternalMarshaller m;
PgfPrinter printer(NULL,0,&m);
printer.efun(&lin->name);
printer.puts(" : ");
ref<PgfPResult> res = *vector_elem(lin->res, i);
ref<PgfDTyp> ty = lin->absfun->type;
if (res->vars != 0) {
printer.lvar_ranges(res->vars);
printer.puts(" . ");
}
size_t n_args = lin->args->len / lin->res->len;
for (size_t j = 0; j < n_args; j++) {
if (j > 0)
printer.puts(" * ");
printer.parg(vector_elem(ty->hypos, j)->type,
vector_elem(lin->args, i*n_args + j));
}
if (n_args > 0)
printer.puts(" -> ");
printer.efun(&ty->name);
printer.puts("(");
printer.lparam(ref<PgfLParam>::from_ptr(&res->param));
printer.puts(") = [");
printer.puts(") -> ");
printer.efun(&lin->name);
printer.puts("[");
size_t n_args = lin->args->len / lin->res->len;
for (size_t j = 0; j < n_args; j++) {
if (j > 0)
printer.puts(",");
printer.parg(vector_elem(ty->hypos, j)->type,
vector_elem(lin->args, i*n_args + j));
}
printer.puts("] = [");
size_t n_seqs = lin->seqs->len / lin->res->len;
for (size_t j = 0; j < n_seqs; j++) {
@@ -1439,10 +1492,11 @@ public:
this->n_lindefs = n_lindefs;
this->n_linrefs = n_linrefs;
ref<Vector<ref<PgfText>>> db_fields = vector_new<ref<PgfText>>(n_fields);
ref<Vector<PgfLincatField>> db_fields = vector_new<PgfLincatField>(n_fields);
for (size_t i = 0; i < n_fields; i++) {
ref<PgfText> field = textdup_db(fields[i]);
*vector_elem(db_fields, i) = field;
ref<PgfText> name = textdup_db(fields[i]);
vector_elem(db_fields, i)->name = name;
vector_elem(db_fields, i)->backrefs = 0;
}
ref<PgfConcrLincat> lincat = PgfDB::malloc<PgfConcrLincat>(abscat->name.size+1);
@@ -2098,7 +2152,7 @@ PgfText **pgf_category_fields(PgfDB *db, PgfConcrRevision revision,
if (fields == 0)
throw pgf_systemerror(ENOMEM);
for (size_t i = 0; i < n_fields; i++) {
fields[i] = textdup(lincat->fields->data[i]);
fields[i] = textdup(vector_elem(lincat->fields, i)->name);
}
*p_n_fields = n_fields;
return fields;
@@ -2188,7 +2242,7 @@ PgfText **pgf_tabular_linearize(PgfDB *db, PgfConcrRevision revision,
PgfText *text = out.get_text();
if (text != NULL) {
res[pos++] = textdup(&(*lincat->fields->data[i]));
res[pos++] = textdup(&*(vector_elem(lincat->fields,i)->name));
res[pos++] = text;
}
}
@@ -2227,7 +2281,7 @@ PgfText **pgf_tabular_linearize_all(PgfDB *db, PgfConcrRevision revision,
PgfText *text = out.get_text();
if (text != NULL) {
res[pos++] = textdup(&(*lincat->fields->data[i]));
res[pos++] = textdup(&*(vector_elem(lincat->fields, i)->name));
res[pos++] = text;
}
}
@@ -2240,7 +2294,7 @@ PgfText **pgf_tabular_linearize_all(PgfDB *db, PgfConcrRevision revision,
return NULL;
}
PGF_API_DECL
PGF_API
void pgf_bracketed_linearize(PgfDB *db, PgfConcrRevision revision,
PgfExpr expr, PgfPrintContext *ctxt,
PgfMarshaller *m,
@@ -2260,7 +2314,7 @@ void pgf_bracketed_linearize(PgfDB *db, PgfConcrRevision revision,
} PGF_API_END
}
PGF_API_DECL
PGF_API
void pgf_bracketed_linearize_all(PgfDB *db, PgfConcrRevision revision,
PgfExpr expr, PgfPrintContext *ctxt,
PgfMarshaller *m,
@@ -2281,6 +2335,70 @@ void pgf_bracketed_linearize_all(PgfDB *db, PgfConcrRevision revision,
} PGF_API_END
}
struct PGF_INTERNAL_DECL PgfLincatUnmarshaller : PgfUnmarshaller {
PgfLincatUnmarshaller(ref<PgfConcr> concr) {
this->concr = concr;
this->lincat = 0;
}
virtual PgfExpr eabs(PgfBindType btype, PgfText *name, PgfExpr body) { return 0; }
virtual PgfExpr eapp(PgfExpr fun, PgfExpr arg) { return 0; }
virtual PgfExpr elit(PgfLiteral lit) { return 0; }
virtual PgfExpr emeta(PgfMetaId meta) { return 0; }
virtual PgfExpr efun(PgfText *name) { return 0; }
virtual PgfExpr evar(int index) { return 0; }
virtual PgfExpr etyped(PgfExpr expr, PgfType typ) { return 0; }
virtual PgfExpr eimplarg(PgfExpr expr) { return 0; }
virtual PgfLiteral lint(size_t size, uintmax_t *v) { return 0; }
virtual PgfLiteral lflt(double v) { return 0; }
virtual PgfLiteral lstr(PgfText *v) { return 0; }
virtual PgfType dtyp(size_t n_hypos, PgfTypeHypo *hypos,
PgfText *cat,
size_t n_exprs, PgfExpr *exprs) {
lincat =
namespace_lookup(concr->lincats, cat);
return 0;
}
virtual void free_ref(object x) {};
ref<PgfConcr> concr;
ref<PgfConcrLincat> lincat;
};
PGF_API
PgfExprEnum *pgf_parse(PgfDB *db, PgfConcrRevision revision,
PgfType ty, PgfMarshaller *m,
PgfText *sentence,
PgfExn * err)
{
PGF_API_BEGIN {
DB_scope scope(db, READER_SCOPE);
ref<PgfConcr> concr = db->revision2concr(revision);
bool case_sensitive = pgf_is_case_sensitive(concr);
PgfLincatUnmarshaller u(concr);
m->match_type(&u, ty);
if (u.lincat == 0)
return 0;
PgfParser *parser = new PgfParser(u.lincat, sentence);
phrasetable_lookup_cohorts(concr->phrasetable,
sentence, case_sensitive,
parser, err);
return parser;
} PGF_API_END
return NULL;
}
PGF_API
void pgf_free_expr_enum(PgfExprEnum *en)
{
delete en;
}
PGF_API
PgfText *pgf_get_printname(PgfDB *db, PgfConcrRevision revision,
PgfText *fun, PgfExn* err)

View File

@@ -724,6 +724,31 @@ void pgf_bracketed_linearize_all(PgfDB *db, PgfConcrRevision revision,
PgfLinearizationOutputIface *out,
PgfExn* err);
#ifdef __cplusplus
struct PgfExprEnum {
virtual PgfExpr fetch(PgfDB *db, PgfUnmarshaller *u, prob_t *prob)=0;
virtual ~PgfExprEnum() {};
};
#else
typedef struct PgfExprEnum PgfExprEnum;
typedef struct PgfExprEnumVtbl PgfExprEnumVtbl;
struct PgfExprEnumVtbl {
PgfExpr (*fetch)(PgfExprEnum *this, PgfDB *db, PgfUnmarshaller *u, prob_t *prob);
};
struct PgfExprEnum {
PgfExprEnumVtbl *vtbl;
};
#endif
PGF_API_DECL
PgfExprEnum *pgf_parse(PgfDB *db, PgfConcrRevision revision,
PgfType ty, PgfMarshaller *m,
PgfText *sentence,
PgfExn * err);
PGF_API_DECL
void pgf_free_expr_enum(PgfExprEnum *en);
PGF_API_DECL
PgfText *pgf_get_printname(PgfDB *db, PgfConcrRevision revision,
PgfText *fun, PgfExn* err);

View File

@@ -1,5 +1,5 @@
#include "data.h"
#include "heap.h"
#include <queue>
PgfPhrasetableIds::PgfPhrasetableIds()
{
@@ -231,10 +231,6 @@ int sequence_cmp(ref<PgfSequence> seq1, ref<PgfSequence> seq2)
struct PGF_INTERNAL_DECL PgfTextSpot {
size_t pos; // position in Unicode characters
const uint8_t *ptr; // pointer into the spot location
bool operator >= (PgfTextSpot const &obj) {
return pos >= obj.pos;
}
};
static
@@ -479,8 +475,7 @@ PGF_INTERNAL
void phrasetable_lookup(PgfPhrasetable table,
PgfText *sentence,
bool case_sensitive,
Namespace<PgfConcrLincat> lincats,
PgfMorphoCallback* callback, PgfExn* err)
PgfPhraseScanner *scanner, PgfExn* err)
{
if (table == 0)
return;
@@ -491,9 +486,9 @@ void phrasetable_lookup(PgfPhrasetable table,
const uint8_t *end = current.ptr+sentence->size;
int cmp = text_sequence_cmp(&current,end,table->value.seq,case_sensitive,true);
if (cmp < 0) {
phrasetable_lookup(table->left,sentence,case_sensitive,lincats,callback,err);
phrasetable_lookup(table->left,sentence,case_sensitive,scanner,err);
} else if (cmp > 0) {
phrasetable_lookup(table->right,sentence,case_sensitive,lincats,callback,err);
phrasetable_lookup(table->right,sentence,case_sensitive,scanner,err);
} else {
auto backrefs = table->value.backrefs;
if (backrefs != 0) {
@@ -502,13 +497,8 @@ void phrasetable_lookup(PgfPhrasetable table,
switch (ref<PgfConcrLin>::get_tag(backref.container)) {
case PgfConcrLin::tag: {
ref<PgfConcrLin> lin = ref<PgfConcrLin>::untagged(backref.container);
ref<PgfConcrLincat> lincat =
namespace_lookup(lincats, &lin->absfun->type->name);
if (lin->absfun->type->hypos->len == 0 && lincat != 0) {
ref<PgfText> field =
*vector_elem(lincat->fields, backref.seq_index % lincat->fields->len);
callback->fn(callback, &lin->absfun->name, &(*field), lincat->abscat->prob+lin->absfun->prob, err);
if (lin->absfun->type->hypos->len == 0) {
scanner->match(lin, backref.seq_index, err);
if (err->type != PGF_EXN_NONE)
return;
}
@@ -523,10 +513,10 @@ void phrasetable_lookup(PgfPhrasetable table,
}
if (!case_sensitive) {
phrasetable_lookup(table->left,sentence,false,lincats,callback,err);
phrasetable_lookup(table->left,sentence,false,scanner,err);
if (err->type != PGF_EXN_NONE)
return;
phrasetable_lookup(table->right,sentence,false,lincats,callback,err);
phrasetable_lookup(table->right,sentence,false,scanner,err);
if (err->type != PGF_EXN_NONE)
return;
}
@@ -534,18 +524,66 @@ void phrasetable_lookup(PgfPhrasetable table,
}
struct PGF_INTERNAL_DECL PgfCohortsState {
class PgfTextSpotComparator : std::less<PgfTextSpot> {
public:
bool operator()(PgfTextSpot &lhs, PgfTextSpot &rhs) const
{
return lhs.pos > rhs.pos;
}
};
PgfTextSpot spot;
Heap<PgfTextSpot> queue;
std::priority_queue<PgfTextSpot, std::vector<PgfTextSpot>, PgfTextSpotComparator> queue;
size_t last_pos;
size_t skip_pos;
bool skipping;
const uint8_t *end; // pointer into the end of the sentence
bool case_sensitive;
Namespace<PgfConcrLincat> lincats;
PgfCohortsCallback* callback;
PgfPhraseScanner *scanner;
PgfExn* err;
};
static
void finish_skipping(PgfCohortsState *state) {
if (state->skipping) {
while (!state->queue.empty()) {
PgfTextSpot spot = state->queue.top();
if (spot.pos >= state->spot.pos)
break;
if (spot.pos != state->last_pos) {
if (state->last_pos > 0) {
state->scanner->space(spot.pos, spot.pos,
state->err);
if (state->err->type != PGF_EXN_NONE)
return;
}
state->scanner->start_matches(state->spot.pos,
state->err);
if (state->err->type != PGF_EXN_NONE)
return;
state->scanner->end_matches(state->spot.pos,
state->err);
if (state->err->type != PGF_EXN_NONE)
return;
state->last_pos = spot.pos;
}
state->queue.pop();
}
state->scanner->space(state->spot.pos, state->spot.pos,
state->err);
state->last_pos = 0;
state->skipping = false;
}
}
static
void phrasetable_lookup_prefixes(PgfCohortsState *state,
PgfPhrasetable table,
@@ -561,38 +599,38 @@ void phrasetable_lookup_prefixes(PgfCohortsState *state,
} else if (cmp > 0) {
ptrdiff_t len = current.ptr - state->spot.ptr;
if (min <= len)
phrasetable_lookup_prefixes(state,table->left,min,len);
if (min <= len-1)
phrasetable_lookup_prefixes(state,table->left,min,len-1);
if (len+1 <= max)
phrasetable_lookup_prefixes(state,table->right,len+1,max);
if (len <= max)
phrasetable_lookup_prefixes(state,table->right,len,max);
} else {
ptrdiff_t len = current.ptr - state->spot.ptr;
finish_skipping(state);
if (state->err->type != PGF_EXN_NONE)
return;
if (min <= len)
phrasetable_lookup_prefixes(state,table->left,min,len);
auto backrefs = table->value.backrefs;
if (len > 0 && backrefs != 0) {
if (state->skip_pos != (size_t) -1) {
state->callback->fn(state->callback,
state->skip_pos,
state->spot.pos,
state->err);
if (state->err->type != PGF_EXN_NONE)
return;
state->skip_pos = (size_t) -1;
}
if (state->last_pos != current.pos) {
if (state->last_pos > 0) {
state->scanner->end_matches(state->last_pos,
state->err);
if (state->err->type != PGF_EXN_NONE)
return;
}
if (state->last_pos > 0 && state->last_pos != current.pos) {
state->callback->fn(state->callback,
state->spot.pos,
state->last_pos,
state->err);
state->scanner->start_matches(current.pos,
state->err);
if (state->err->type != PGF_EXN_NONE)
return;
state->last_pos = current.pos;
}
state->last_pos = current.pos;
state->queue.push(current);
for (size_t i = 0; i < backrefs->len; i++) {
@@ -600,17 +638,10 @@ void phrasetable_lookup_prefixes(PgfCohortsState *state,
switch (ref<PgfConcrLin>::get_tag(backref.container)) {
case PgfConcrLin::tag: {
ref<PgfConcrLin> lin = ref<PgfConcrLin>::untagged(backref.container);
ref<PgfConcrLincat> lincat =
namespace_lookup(state->lincats, &lin->absfun->type->name);
if (lin->absfun->type->hypos->len == 0 && lincat != 0) {
ref<PgfText> field =
*vector_elem(lincat->fields, backref.seq_index % lincat->fields->len);
state->callback->morpho.fn(&state->callback->morpho,
&lin->absfun->name,
&(*field),
lincat->abscat->prob+lin->absfun->prob,
state->err);
if (lin->absfun->type->hypos->len == 0) {
state->scanner->match(lin,
backref.seq_index,
state->err);
if (state->err->type != PGF_EXN_NONE)
return;
}
@@ -633,8 +664,7 @@ PGF_INTERNAL
void phrasetable_lookup_cohorts(PgfPhrasetable table,
PgfText *sentence,
bool case_sensitive,
Namespace<PgfConcrLincat> lincats,
PgfCohortsCallback* callback, PgfExn* err)
PgfPhraseScanner *scanner, PgfExn* err)
{
PgfTextSpot spot;
spot.pos = 0;
@@ -645,15 +675,16 @@ void phrasetable_lookup_cohorts(PgfPhrasetable table,
state.spot.ptr = NULL;
state.queue.push(spot);
state.last_pos = 0;
state.skip_pos = (size_t) -1;
state.skipping = false;
state.end = (uint8_t *) &sentence->text[sentence->size];
state.case_sensitive = case_sensitive;
state.lincats = lincats;
state.callback = callback;
state.scanner = scanner;
state.err = err;
while (!state.queue.is_empty()) {
PgfTextSpot spot = state.queue.pop();
while (!state.queue.empty()) {
PgfTextSpot spot = state.queue.top();
state.queue.pop();
if (spot.pos != state.spot.pos) {
state.spot = spot;
@@ -667,36 +698,38 @@ void phrasetable_lookup_cohorts(PgfPhrasetable table,
state.spot.ptr = ptr;
}
state.skip_pos = (size_t) -1;
state.scanner->space(spot.pos,state.spot.pos,state.err);
if (state.err->type != PGF_EXN_NONE)
return;
while (state.spot.ptr < state.end) {
phrasetable_lookup_prefixes(&state, table, 1, sentence->size);
if (state.err->type != PGF_EXN_NONE)
return;
if (state.last_pos > 0) {
// We found at least one match.
// The last range is yet to be reported.
state.callback->fn(state.callback,
state.spot.pos,
state.last_pos,
state.err);
state.scanner->end_matches(state.last_pos,
state.err);
if (state.err->type != PGF_EXN_NONE)
return;
state.last_pos = 0;
break;
} else {
// We didn't find any matches at this position,
// therefore we must skip one character and try again.
if (state.skip_pos == (size_t) -1)
state.skip_pos = state.spot.pos;
// No matches were found, try the next position
if (!state.skipping) {
while (!state.queue.empty() &&
state.queue.top().pos < state.spot.pos) {
state.queue.pop();
}
state.queue.push(state.spot);
state.skipping = true;
}
const uint8_t *ptr = state.spot.ptr;
uint32_t ucs = pgf_utf8_decode(&ptr);
if (pgf_utf8_is_space(ucs)) {
state.callback->fn(state.callback,
state.skip_pos,
state.spot.pos,
state.err);
if (state.err->type != PGF_EXN_NONE)
return;
state.skip_pos = -1;
state.queue.push(state.spot);
break;
}
@@ -704,16 +737,10 @@ void phrasetable_lookup_cohorts(PgfPhrasetable table,
state.spot.ptr = ptr;
}
}
if (state.skip_pos != (size_t) -1) {
state.callback->fn(state.callback,
state.skip_pos,
state.spot.pos,
state.err);
if (state.err->type != PGF_EXN_NONE)
return;
state.skip_pos = (size_t) -1;
}
finish_skipping(&state);
if (state.err->type != PGF_EXN_NONE)
return;
state.spot = spot;
}
@@ -748,10 +775,10 @@ void phrasetable_iter(PgfConcr *concr,
ref<PgfConcrLincat> lincat =
namespace_lookup(concr->lincats, &lin->absfun->type->name);
if (lincat != 0) {
ref<PgfText> field =
*vector_elem(lincat->fields, backref.seq_index % lincat->fields->len);
ref<PgfLincatField> field =
vector_elem(lincat->fields, backref.seq_index % lincat->fields->len);
callback->fn(callback, &lin->absfun->name, &(*field), lincat->abscat->prob+lin->absfun->prob, err);
callback->fn(callback, &lin->absfun->name, &(*field->name), lincat->abscat->prob+lin->absfun->prob, err);
if (err->type != PGF_EXN_NONE)
return;
}

View File

@@ -68,19 +68,27 @@ PgfPhrasetable phrasetable_delete(PgfPhrasetable table,
PGF_INTERNAL_DECL
size_t phrasetable_size(PgfPhrasetable table);
class PgfConcrLin;
class PGF_INTERNAL_DECL PgfPhraseScanner {
public:
virtual void space(size_t start, size_t end, PgfExn* err)=0;
virtual void start_matches(size_t pos, PgfExn* err)=0;
virtual void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)=0;
virtual void end_matches(size_t pos, PgfExn* err)=0;
};
PGF_INTERNAL_DECL
void phrasetable_lookup(PgfPhrasetable table,
PgfText *sentence,
bool case_sensitive,
Namespace<struct PgfConcrLincat> lincats,
PgfMorphoCallback* callback, PgfExn* err);
PgfPhraseScanner *scanner, PgfExn* err);
PGF_INTERNAL_DECL
void phrasetable_lookup_cohorts(PgfPhrasetable table,
PgfText *sentence,
bool case_sensitive,
Namespace<PgfConcrLincat> lincats,
PgfCohortsCallback* callback, PgfExn* err);
PgfPhraseScanner *scanner, PgfExn* err);
PGF_INTERNAL_DECL
void phrasetable_iter(PgfConcr *concr,

View File

@@ -33,7 +33,7 @@ public:
PgfPrinter(PgfPrintContext *context, int priority,
PgfMarshaller *marshaller);
PgfPrinter() { free(res); }
~PgfPrinter() { free(res); }
// Push a new variable in the printing context. If the name
// collides with an existing variable, the variable is renamed
@@ -52,6 +52,12 @@ public:
PgfText *get_text();
void dump() {
PgfText *text = get_text();
fprintf(stderr, "%.*s", (int) text->size, text->text);
free(text);
};
void hypo(PgfTypeHypo *hypo, int prio);
void parg(ref<PgfDTyp> ty, ref<PgfPArg> parg);

View File

@@ -667,7 +667,7 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
{
ref<PgfConcrLincat> lincat = read_name(&PgfConcrLincat::name);
lincat->abscat = namespace_lookup(abstract->cats, &lincat->name);
lincat->fields = read_vector(&PgfReader::read_text2);
lincat->fields = read_vector(&PgfReader::read_lincat_field);
lincat->n_lindefs = read_len();
lincat->args = read_vector(&PgfReader::read_parg);
lincat->res = read_vector(&PgfReader::read_presult2);
@@ -675,6 +675,12 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
return lincat;
}
void PgfReader::read_lincat_field(ref<PgfLincatField> field)
{
field->name = read_text();
field->backrefs = 0;
}
ref<PgfConcrLin> PgfReader::read_lin()
{
ref<PgfConcrLin> lin = read_name(&PgfConcrLin::name);
@@ -682,6 +688,76 @@ ref<PgfConcrLin> PgfReader::read_lin()
lin->args = read_vector(&PgfReader::read_parg);
lin->res = read_vector(&PgfReader::read_presult2);
lin->seqs = read_seq_ids(lin.tagged());
lin->lincat =
namespace_lookup(concrete->lincats, &lin->absfun->type->name);
if (lin->lincat == 0)
throw pgf_error("Found a lin which uses a category without a lincat");
ref<Vector<PgfHypo>> hypos = lin->absfun->type->hypos;
ref<PgfConcrLincat> lincats[hypos->len];
for (size_t d = 0; d < hypos->len; d++) {
lincats[d] =
namespace_lookup(concrete->lincats,
&vector_elem(hypos,d)->type->name);
if (lincats[d] == 0)
throw pgf_error("Found a lin which uses a category without a lincat");
}
size_t n_fields = lin->lincat->fields->len;
for (size_t seq_index = 0; seq_index < lin->seqs->len; seq_index++) {
ref<PgfSequence> seq = *vector_elem(lin->seqs,seq_index);
ref<PgfPResult> result = *vector_elem(lin->res, seq_index / n_fields);
size_t dot = 0;
if (dot < seq->syms.len) {
PgfSymbol sym = *vector_elem(&seq->syms,dot);
switch (ref<PgfSymbol>::get_tag(sym)) {
case PgfSymbolCat::tag: {
auto sym_cat = ref<PgfSymbolCat>::untagged(sym);
ref<PgfConcrLincat> lincat = lincats[sym_cat->d];
size_t max_values = 1;
size_t ranges[sym_cat->r.n_terms];
for (size_t i = 0; i < sym_cat->r.n_terms; i++) {
size_t range = 1;
for (size_t j = 0; j < result->vars->len; j++) {
auto var_range = vector_elem(result->vars, j);
if (var_range->var == sym_cat->r.terms[i].var) {
range = var_range->range;
break;
}
}
ranges[i] = range;
max_values *= range;
}
for (size_t values = 0; values < max_values; values++) {
size_t v = values;
size_t index = sym_cat->r.i0;
for (size_t i = 0; i < sym_cat->r.n_terms; i++) {
index += sym_cat->r.terms[i].factor * (v % ranges[i]);
v = v / ranges[i];
}
ref<Vector<PgfLincatBackref>> backrefs =
vector_elem(lincat->fields,index)->backrefs;
backrefs =
vector_resize(backrefs, backrefs->len+1,
PgfDB::get_txn_id());
vector_elem(lincat->fields,index)->backrefs = backrefs;
ref<PgfLincatBackref> backref =
vector_elem(backrefs,backrefs->len-1);
backref->lin = lin;
backref->seq_index = seq_index;
backref->dot = dot;
}
break;
}
}
}
}
return lin;
}

View File

@@ -69,6 +69,7 @@ public:
void merge_abstract(ref<PgfAbstr> abstract);
ref<PgfConcrLincat> read_lincat();
void read_lincat_field(ref<PgfLincatField> field);
ref<PgfLParam> read_lparam();
void read_variable_range(ref<PgfVariableRange> var_info);
void read_parg(ref<PgfPArg> parg);

View File

@@ -383,13 +383,18 @@ void PgfWriter::write_phrasetable_helper(PgfPhrasetable table)
void PgfWriter::write_lincat(ref<PgfConcrLincat> lincat)
{
write_name(&lincat->name);
write_vector(lincat->fields, &PgfWriter::write_text);
write_vector(lincat->fields, &PgfWriter::write_lincat_field);
write_len(lincat->n_lindefs);
write_vector(lincat->args, &PgfWriter::write_parg);
write_vector(lincat->res, &PgfWriter::write_presult);
write_vector(lincat->seqs, &PgfWriter::write_seq_id);
}
void PgfWriter::write_lincat_field(ref<PgfLincatField> field)
{
write_text(field->name);
}
void PgfWriter::write_lin(ref<PgfConcrLin> lin)
{
write_name(&lin->name);

View File

@@ -39,6 +39,7 @@ public:
void write_abstract(ref<PgfAbstr> abstract);
void write_lincat(ref<PgfConcrLincat> lincat);
void write_lincat_field(ref<PgfLincatField> field);
void write_variable_range(ref<PgfVariableRange> var);
void write_lparam(ref<PgfLParam> lparam);
void write_parg(ref<PgfPArg> linarg);

View File

@@ -97,7 +97,7 @@ import Foreign
import Foreign.C
import Control.Monad(forM,forM_)
import Control.Exception(bracket,mask_,throwIO)
import System.IO.Unsafe(unsafePerformIO)
import System.IO.Unsafe(unsafePerformIO, unsafeInterleaveIO)
import System.Random
import qualified Data.Map as Map
import Data.IORef
@@ -673,7 +673,30 @@ data ParseOutput a
| ParseIncomplete -- ^ The sentence is not complete.
parse :: Concr -> Type -> String -> ParseOutput [(Expr,Float)]
parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
parse c ty sent =
unsafePerformIO $
withForeignPtr (c_revision c) $ \c_revision ->
withForeignPtr marshaller $ \m ->
bracket (newStablePtr ty) freeStablePtr $ \c_ty ->
withText sent $ \c_sent -> do
c_enum <- withPgfExn "parse" (pgf_parse (c_db c) c_revision c_ty m c_sent)
c_fetch <- (#peek PgfExprEnumVtbl, fetch) =<< (#peek PgfExprEnum, vtbl) c_enum
exprs <- unsafeInterleaveIO (fetchLazy c_fetch c_enum)
return (ParseOk exprs)
where
fetchLazy c_fetch c_enum =
withForeignPtr (c_revision c) $ \c_revision ->
withForeignPtr unmarshaller $ \u ->
alloca $ \p_prob -> do
c_expr <- callFetch c_fetch c_enum (c_db c) u p_prob
if c_expr == castPtrToStablePtr nullPtr
then do pgf_free_expr_enum c_enum
return []
else do expr <- deRefStablePtr c_expr
freeStablePtr c_expr
prob <- peek p_prob
rest <- unsafeInterleaveIO (fetchLazy c_fetch c_enum)
return ((expr,prob) : rest)
parseWithHeuristics :: Concr -- ^ the language with which we parse
-> Type -- ^ the start category

View File

@@ -50,6 +50,7 @@ data PgfProbsCallback
data PgfMorphoCallback
data PgfCohortsCallback
data PgfPhrasetableIds
data PgfExprEnum
type Wrapper a = a -> IO (FunPtr a)
type Dynamic a = FunPtr a -> a
@@ -253,6 +254,12 @@ foreign import ccall pgf_bracketed_linearize :: Ptr PgfDB -> Ptr Concr -> Stable
foreign import ccall pgf_bracketed_linearize_all :: Ptr PgfDB -> Ptr Concr -> StablePtr Expr -> Ptr PgfPrintContext -> Ptr PgfMarshaller -> Ptr PgfLinearizationOutputIface -> Ptr PgfExn -> IO ()
foreign import ccall pgf_parse :: Ptr PgfDB -> Ptr Concr -> StablePtr Type -> Ptr PgfMarshaller -> Ptr PgfText -> Ptr PgfExn -> IO (Ptr PgfExprEnum)
foreign import ccall "dynamic" callFetch :: Dynamic (Ptr PgfExprEnum -> Ptr PgfDB -> Ptr PgfUnmarshaller -> Ptr (#type prob_t) -> IO (StablePtr Expr))
foreign import ccall pgf_free_expr_enum :: Ptr PgfExprEnum -> IO ()
foreign import ccall "wrapper" wrapSymbol0 :: Wrapper (Ptr PgfLinearizationOutputIface -> IO ())
foreign import ccall "wrapper" wrapSymbol1 :: Wrapper (Ptr PgfLinearizationOutputIface -> Ptr PgfText -> IO ())

View File

@@ -18,42 +18,42 @@ concrete basic_cnc {
lincat Float = [
"s"
]
lindef Float : String(0) -> Float(0) = [S0]
linref Float : Float(0) -> String(0) = [S0]
lindef Float(0) -> Float[String(0)] = [S0]
linref String(0) -> Float[Float(0)] = [S0]
lincat Int = [
"s"
]
lindef Int : String(0) -> Int(0) = [S0]
linref Int : Int(0) -> String(0) = [S0]
lindef Int(0) -> Int[String(0)] = [S0]
linref String(0) -> Int[Int(0)] = [S0]
lincat N = [
"s"
]
lindef N : String(0) -> N(0) = [S0]
linref N : ∀{i<2} . N(i) -> String(0) = [S0]
lindef N(0) -> N[String(0)] = [S0]
linref ∀{i<2} . String(0) -> N[N(i)] = [S0]
lincat P = [
"s"
]
lindef P : String(0) -> P(0) = [S0]
linref P : P(0) -> String(0) = [S0]
lindef P(0) -> P[String(0)] = [S0]
linref String(0) -> P[P(0)] = [S0]
lincat S = [
""
]
lindef S : String(0) -> S(0) = [S0]
linref S : S(0) -> String(0) = [S0]
lindef S(0) -> S[String(0)] = [S0]
linref String(0) -> S[S(0)] = [S0]
lincat String = [
"s"
]
lindef String : String(0) -> String(0) = [S0]
linref String : String(0) -> String(0) = [S0]
lin c : ∀{i<2} . N(i) -> S(0) = [S0]
lin floatLit : Float(0) -> S(0) = [S0]
lin ind : ∀{i<2} . P(0) * P(0) * N(i) -> P(0) = [S1]
lin intLit : Int(0) -> S(0) = [S0]
lin nat : ∀{i<2} . N(i) -> P(0) = [S5]
lin s : N(0) -> N(0) = [S2]
lin s : N(1) -> N(0) = [S4]
lin stringLit : String(0) -> S(0) = [S0]
lin z : N(1) = [S3]
lindef String(0) -> String[String(0)] = [S0]
linref String(0) -> String[String(0)] = [S0]
lin ∀{i<2} . S(0) -> c[N(i)] = [S0]
lin S(0) -> floatLit[Float(0)] = [S0]
lin ∀{i<2} . P(0) -> ind[P(0),P(0),N(i)] = [S1]
lin S(0) -> intLit[Int(0)] = [S0]
lin ∀{i<2} . P(0) -> nat[N(i)] = [S5]
lin N(0) -> s[N(0)] = [S2]
lin N(0) -> s[N(1)] = [S4]
lin S(0) -> stringLit[String(0)] = [S0]
lin N(1) -> z[] = [S3]
sequences {
S0 = <0,0>
S1 = <0,0> "&" "λ" SOFT_BIND <1,$0> SOFT_BIND "," SOFT_BIND <1,$1> "." <1,0>