1
0
forked from GitHub/gf-core

working fullFormLexicon. Slows down loading and compilation

This commit is contained in:
Krasimir Angelov
2022-02-08 16:38:30 +01:00
parent fc7c1249b0
commit 4d240f7260
13 changed files with 581 additions and 195 deletions

View File

@@ -39,6 +39,7 @@ void PgfConcr::release(ref<PgfConcr> concr)
namespace_release(concr->cflags);
namespace_release(concr->lins);
namespace_release(concr->lincats);
phrasetable_release(concr->phrasetable);
namespace_release(concr->printnames);
PgfDB::free(concr);
}
@@ -113,6 +114,11 @@ void PgfSequence::release(ref<PgfSequence> seq)
PgfDB::free(seq);
}
void PgfSequenceBackrefs::release(ref<PgfSequenceBackrefs> backrefs)
{
PgfDB::free(backrefs);
}
void PgfConcrLin::release(ref<PgfConcrLin> lin)
{
for (size_t i = 0; i < lin->args->len; i++) {

View File

@@ -141,6 +141,19 @@ struct PGF_INTERNAL_DECL PgfSequence {
static void release(ref<PgfSequence> lincat);
};
struct PGF_INTERNAL_DECL PgfSequenceBackref {
object container;
size_t seq_index;
};
struct PGF_INTERNAL_DECL PgfSequenceBackrefs {
size_t ref_count;
Vector<PgfSequenceBackref> list;
static
void release(ref<PgfSequenceBackrefs> backrefs);
};
struct PGF_INTERNAL_DECL PgfSymbolCat {
static const uint8_t tag = 0;
size_t d;
@@ -206,6 +219,8 @@ PGF_INTERNAL_DECL
void pgf_symbol_free(PgfSymbol sym);
struct PGF_INTERNAL_DECL PgfConcrLincat {
static const uint8_t tag = 0;
size_t ref_count;
ref<PgfAbsCat> abscat;
@@ -223,6 +238,8 @@ struct PGF_INTERNAL_DECL PgfConcrLincat {
};
struct PGF_INTERNAL_DECL PgfConcrLin {
static const uint8_t tag = 1;
size_t ref_count;
ref<PgfAbsFun> absfun;

View File

@@ -20,7 +20,7 @@ template <class V>
class Node;
template <class V>
using Namespace = ref<Node<V>>;
using Namespace = ref<Node<ref<V>>>;
template <class V>
class PGF_INTERNAL_DECL Node
@@ -29,12 +29,12 @@ public:
size_t ref_count;
size_t sz;
ref<V> value;
V value;
ref<Node> left;
ref<Node> right;
static
ref<Node> new_node(ref<V> value)
ref<Node> new_node(V value)
{
ref<Node> node = current_db->malloc<Node>();
node->ref_count = 1;
@@ -50,11 +50,11 @@ public:
}
static
ref<Node> new_node(ref<V> value, ref<Node> left, ref<Node> right)
ref<Node> new_node(V value, ref<Node> left, ref<Node> right)
{
ref<Node> node = current_db->malloc<Node>();
node->ref_count = 1;
node->sz = 1+namespace_size(left)+namespace_size(right);
node->sz = 1+Node::size(left)+Node::size(right);
node->value = value;
node->left = left;
node->right = right;
@@ -74,8 +74,17 @@ public:
#endif
}
static
void add_value_ref(ref<V> value)
template <class A> static
void add_value_ref(A &value)
{
value.add_ref();
#ifdef DEBUG_NAMESPACE
printf("add_ref value %5ld %s (ref_count=%ld)\n", value.as_object(), value->name.text, value->ref_count);
#endif
}
template <class A> static
void add_value_ref(ref<A> value)
{
value->ref_count++;
#ifdef DEBUG_NAMESPACE
@@ -83,12 +92,34 @@ public:
#endif
}
template <class A> static
void release_value_ref(A &value)
{
#ifdef DEBUG_NAMESPACE
printf("release value %5ld %s (ref_count=%ld)\n", value.as_object(), value->name.text, value->ref_count-1);
#endif
value.release_ref();
}
template <class A> static
void release_value_ref(ref<A> value)
{
#ifdef DEBUG_NAMESPACE
printf("release value %5ld %s (ref_count=%ld)\n", value.as_object(), value->name.text, value->ref_count-1);
#endif
if (!(--value->ref_count)) {
A::release(value);
}
}
static
ref<Node> balanceL(ref<V> value, ref<Node> left, ref<Node> right)
ref<Node> balanceL(V value, ref<Node> left, ref<Node> right)
{
if (right == 0) {
if (left == 0) {
value->ref_count++;
add_value_ref(value);
return new_node(value);
} else {
if (left->left == 0) {
@@ -100,8 +131,8 @@ public:
add_value_ref(value);
add_value_ref(left->value);
add_value_ref(left->right->value);
Namespace<V> new_left = new_node(left->value);
Namespace<V> new_right = new_node(value);
ref<Node<V>> new_left = new_node(left->value);
ref<Node<V>> new_right = new_node(value);
return new_node(left->right->value,
new_left,
new_right);
@@ -109,7 +140,7 @@ public:
} else {
if (left->right == 0) {
add_value_ref(value);
Namespace<V> new_right = new_node(value);
ref<Node<V>> new_right = new_node(value);
add_value_ref(left->value);
add_node_ref(left->left);
return new_node(left->value,
@@ -121,7 +152,7 @@ public:
add_value_ref(left->value);
add_node_ref(left->left);
add_node_ref(left->right);
Namespace<V> new_right =
ref<Node<V>> new_right =
new_node(value,
left->right,
0);
@@ -137,11 +168,11 @@ public:
add_node_ref(left->right->left);
if (left->right->right != 0)
add_node_ref(left->right->right);
Namespace<V> new_left =
ref<Node<V>> new_left =
new_node(left->value,
left->left,
left->right->left);
Namespace<V> new_right =
ref<Node<V>> new_right =
new_node(value,
left->right->right,
0);
@@ -165,7 +196,7 @@ public:
add_node_ref(left->left);
add_node_ref(left->right);
add_node_ref(right);
Namespace<V> new_right =
ref<Node<V>> new_right =
new_node(value,
left->right,
right);
@@ -182,11 +213,11 @@ public:
if (left->right->right != 0)
add_node_ref(left->right->right);
add_node_ref(right);
Namespace<V> new_left =
ref<Node<V>> new_left =
new_node(left->value,
left->left,
left->right->left);
Namespace<V> new_right =
ref<Node<V>> new_right =
new_node(value,
left->right->right,
right);
@@ -205,7 +236,7 @@ public:
}
static
ref<Node> balanceR(ref<V> value, ref<Node> left, ref<Node> right)
ref<Node> balanceR(V value, ref<Node> left, ref<Node> right)
{
if (left == 0) {
if (right == 0) {
@@ -221,7 +252,7 @@ public:
add_value_ref(value);
add_value_ref(right->value);
add_node_ref(right->right);
Namespace<V> new_left =
ref<Node<V>> new_left =
new_node(value);
return new_node(right->value,
new_left,
@@ -232,9 +263,9 @@ public:
add_value_ref(value);
add_value_ref(right->value);
add_value_ref(right->left->value);
Namespace<V> new_left =
ref<Node<V>> new_left =
new_node(value);
Namespace<V> new_right =
ref<Node<V>> new_right =
new_node(right->value);
return new_node(right->left->value,
new_left,
@@ -245,7 +276,7 @@ public:
add_value_ref(right->value);
add_node_ref(right->left);
add_node_ref(right->right);
Namespace<V> new_left =
ref<Node<V>> new_left =
new_node(value,
0,
right->left);
@@ -261,11 +292,11 @@ public:
if (right->left->right != 0)
add_node_ref(right->left->right);
add_node_ref(right->right);
Namespace<V> new_left =
ref<Node<V>> new_left =
new_node(value,
0,
right->left->left);
Namespace<V> new_right =
ref<Node<V>> new_right =
new_node(right->value,
right->left->right,
right->right);
@@ -289,7 +320,7 @@ public:
add_node_ref(left);
add_node_ref(right->left);
add_node_ref(right->right);
Namespace<V> new_left =
ref<Node<V>> new_left =
new_node(value,
left,
right->left);
@@ -306,11 +337,11 @@ public:
if (right->left->right != 0)
add_node_ref(right->left->right);
add_node_ref(right->right);
Namespace<V> new_left =
ref<Node<V>> new_left =
new_node(value,
left,
right->left->left);
Namespace<V> new_right =
ref<Node<V>> new_right =
new_node(right->value,
right->left->right,
right->right);
@@ -327,6 +358,68 @@ public:
}
}
}
static
size_t size(ref<Node> node)
{
if (node == 0)
return 0;
return node->sz;
}
static
ref<Node> pop_first(ref<Node> node, V *res)
{
if (node == 0) {
return 0;
} else if (node->left == 0) {
*res = node->value;
if (node->right != 0)
node->right->ref_count++;
return node->right;
} else {
ref<Node> new_left = pop_first(node->left, res);
ref<Node> new_node = balanceR(node->value, new_left, node->right);
release(new_left);
return new_node;
}
}
static
ref<Node> pop_last(ref<Node> node, V *res)
{
if (node == 0) {
return 0;
} else if (node->right == 0) {
*res = node->value;
if (node->left != 0)
node->left->ref_count++;
return node->left;
} else {
ref<Node> new_right = pop_last(node->right, res);
ref<Node> new_node = balanceR(node->value, node->left, new_right);
release(new_right);
return new_node;
}
}
static
void release(ref<Node> node)
{
if (node == 0)
return;
#ifdef DEBUG_NAMESPACE
printf("release node %6ld %s (ref_count=%ld)\n", node.as_object(), node->value->name.text, node->ref_count-1);
#endif
if (!(--node->ref_count)) {
release(node->left);
release(node->right);
release_value_ref(node->value);
PgfDB::free(node);
}
}
};
template <class V>
@@ -345,17 +438,17 @@ template <class V>
Namespace<V> namespace_insert(Namespace<V> map, ref<V> value)
{
if (map == 0)
return Node<V>::new_node(value);
return Node<ref<V>>::new_node(value);
int cmp = textcmp(&value->name,&map->value->name);
if (cmp < 0) {
Namespace<V> left = namespace_insert(map->left, value);
Namespace<V> node = Node<V>::balanceL(map->value,left,map->right);
Namespace<V> node = Node<ref<V>>::balanceL(map->value,left,map->right);
namespace_release(left);
return node;
} else if (cmp > 0) {
Namespace<V> right = namespace_insert(map->right, value);
Namespace<V> node = Node<V>::balanceR(map->value, map->left, right);
Namespace<V> node = Node<ref<V>>::balanceR(map->value, map->left, right);
namespace_release(right);
return node;
} else {
@@ -363,32 +456,37 @@ Namespace<V> namespace_insert(Namespace<V> map, ref<V> value)
map->left->ref_count++;
if (map->right != 0)
map->right->ref_count++;
return Node<V>::new_node(value,map->left,map->right);
return Node<ref<V>>::new_node(value,map->left,map->right);
}
}
template <class V>
Namespace<V> namespace_delete(Namespace<V> map, PgfText* name)
Namespace<V> namespace_delete(Namespace<V> map, PgfText* name,
ref<V> *pvalue = NULL)
{
if (map == 0)
if (map == 0) {
if (pvalue != NULL) *pvalue = 0;
return 0;
}
int cmp = textcmp(name,&map->value->name);
if (cmp < 0) {
Namespace<V> left = namespace_delete(map->left, name);
Namespace<V> left = namespace_delete(map->left, name, pvalue);
if (left == map->left)
return map;
Namespace<V> node = Node<V>::balanceR(map->value,left,map->right);
Namespace<V> node = Node<ref<V>>::balanceR(map->value,left,map->right);
namespace_release(left);
return node;
} else if (cmp > 0) {
Namespace<V> right = namespace_delete(map->right, name);
Namespace<V> right = namespace_delete(map->right, name, pvalue);
if (right == map->right)
return map;
Namespace<V> node = Node<V>::balanceL(map->value,map->left,right);
Namespace<V> node = Node<ref<V>>::balanceL(map->value,map->left,right);
namespace_release(right);
return node;
} else {
if (pvalue != NULL) *pvalue = map->value;
if (map->left == 0) {
if (map->right != 0)
map->right->ref_count++;
@@ -399,54 +497,20 @@ Namespace<V> namespace_delete(Namespace<V> map, PgfText* name)
return map->left;
} else if (map->left->sz > map->right->sz) {
ref<V> value;
Namespace<V> new_left = namespace_pop_last(map->left, &value);
Namespace<V> node = Node<V>::balanceR(value, new_left, map->right);
Namespace<V> new_left = Node<ref<V>>::pop_last(map->left, &value);
Namespace<V> node = Node<ref<V>>::balanceR(value, new_left, map->right);
namespace_release(new_left);
return node;
} else {
ref<V> value;
Namespace<V> new_right = namespace_pop_first(map->right, &value);
Namespace<V> node = Node<V>::balanceL(value, map->left, new_right);
Namespace<V> new_right = Node<ref<V>>::pop_first(map->right, &value);
Namespace<V> node = Node<ref<V>>::balanceL(value, map->left, new_right);
namespace_release(new_right);
return node;
}
}
}
template <class V>
Namespace<V> namespace_pop_first(Namespace<V> map, ref<V> *res)
{
if (map == 0) {
return 0;
} else if (map->left == 0) {
*res = map->value;
if (map->right != 0)
map->right->ref_count++;
return map->right;
} else {
Namespace<V> new_left = namespace_pop_first(map->left, res);
Namespace<V> node = Node<V>::balanceR(map->value, new_left, map->right);
namespace_release(new_left);
return node;
}
}
template <class V>
Namespace<V> namespace_pop_last(Namespace<V> map, ref<V> *res)
{
if (map == 0) {
return 0;
} else if (map->right == 0) {
*res = map->value;
if (map->left != 0)
map->left->ref_count++;
return map->left;
} else {
Namespace<V> new_right = namespace_pop_last(map->right, res);
return Node<V>::balanceR(map->value, map->left, new_right);
}
}
template <class V>
ref<V> namespace_lookup(Namespace<V> map, PgfText *name)
{
@@ -465,9 +529,7 @@ ref<V> namespace_lookup(Namespace<V> map, PgfText *name)
template <class V>
size_t namespace_size(Namespace<V> map)
{
if (map == 0)
return 0;
return map->sz;
return Node<ref<V>>::size(map);
}
template <class V>
@@ -492,27 +554,7 @@ void namespace_iter(Namespace<V> map, PgfItor* itor, PgfExn *err)
template <class V>
void namespace_release(Namespace<V> node)
{
if (node == 0)
return;
#ifdef DEBUG_NAMESPACE
printf("release node %6ld %s (ref_count=%ld)\n", node.as_object(), node->value->name.text, node->ref_count-1);
#endif
if (!(--node->ref_count)) {
namespace_release(node->left);
namespace_release(node->right);
#ifdef DEBUG_NAMESPACE
printf("release value %5ld %s (ref_count=%ld)\n", node->value.as_object(), node->value->name.text, node->value->ref_count-1);
#endif
if (!(--node->value->ref_count)) {
V::release(node->value);
}
PgfDB::free(node);
}
Node<ref<V>>::release(node);
}
#endif

View File

@@ -147,7 +147,7 @@ PgfDB *pgf_read_ngf(const char *fpath,
db->cleanup_revisions();
ref<PgfPGF> pgf = PgfDB::get_revision(master);
Node<PgfPGF>::add_value_ref(pgf);
Node<ref<PgfPGF>>::add_value_ref(pgf);
*revision = pgf.as_object();
}
@@ -828,16 +828,18 @@ void pgf_iter_lins(PgfDB *db, PgfConcrRevision cnc_revision,
PGF_API
PgfPhrasetableIds *pgf_iter_sequences(PgfDB *db, PgfConcrRevision cnc_revision,
PgfSequenceItor *itor, PgfExn *err)
PgfSequenceItor *itor,
PgfMorphoCallback *callback,
PgfExn *err)
{
PGF_API_BEGIN {
DB_scope scope(db, READER_SCOPE);
ref<PgfConcr> concr = PgfDB::revision2concr(cnc_revision);
PgfPhrasetableIds *seq_ids = new PgfPhrasetableIds();
seq_ids->start(concr);
PgfPhrasetableIds *seq_ids = new PgfPhrasetableIds();
seq_ids->start(concr);
phrasetable_iter(concr->phrasetable, itor, seq_ids, err);
phrasetable_iter(concr, concr->phrasetable, itor, callback, seq_ids, err);
return seq_ids;
} PGF_API_END
@@ -1108,25 +1110,25 @@ PgfRevision pgf_clone_revision(PgfDB *db, PgfRevision revision,
new_pgf->gflags = pgf->gflags;
if (pgf->gflags != 0)
Node<PgfFlag>::add_node_ref(pgf->gflags);
Node<ref<PgfFlag>>::add_node_ref(pgf->gflags);
new_pgf->abstract.name = textdup_db(&(*pgf->abstract.name));
new_pgf->abstract.aflags = pgf->abstract.aflags;
if (pgf->abstract.aflags != 0)
Node<PgfFlag>::add_node_ref(pgf->abstract.aflags);
Node<ref<PgfFlag>>::add_node_ref(pgf->abstract.aflags);
new_pgf->abstract.funs = pgf->abstract.funs;
if (pgf->abstract.funs != 0)
Node<PgfAbsFun>::add_node_ref(pgf->abstract.funs);
Node<ref<PgfAbsFun>>::add_node_ref(pgf->abstract.funs);
new_pgf->abstract.cats = pgf->abstract.cats;
if (pgf->abstract.cats != 0)
Node<PgfAbsCat>::add_node_ref(pgf->abstract.cats);
Node<ref<PgfAbsCat>>::add_node_ref(pgf->abstract.cats);
new_pgf->concretes = pgf->concretes;
if (pgf->concretes != 0)
Node<PgfConcr>::add_node_ref(pgf->concretes);
Node<ref<PgfConcr>>::add_node_ref(pgf->concretes);
new_pgf->prev = 0;
new_pgf->next = 0;
@@ -1170,7 +1172,7 @@ PgfRevision pgf_checkout_revision(PgfDB *db, PgfText *name,
DB_scope scope(db, WRITER_SCOPE);
ref<PgfPGF> pgf = PgfDB::get_revision(name);
if (pgf != 0) {
Node<PgfPGF>::add_value_ref(pgf);
Node<ref<PgfPGF>>::add_value_ref(pgf);
db->ref_count++;
}
return pgf.as_object();
@@ -1335,27 +1337,28 @@ PgfConcrRevision pgf_clone_concrete(PgfDB *db, PgfRevision revision,
clone->cflags = concr->cflags;
if (clone->cflags != 0)
Node<PgfFlag>::add_node_ref(clone->cflags);
Node<ref<PgfFlag>>::add_node_ref(clone->cflags);
clone->lins = concr->lins;
if (clone->lins != 0)
Node<PgfConcrLin>::add_node_ref(clone->lins);
Node<ref<PgfConcrLin>>::add_node_ref(clone->lins);
clone->lincats = concr->lincats;
if (clone->lincats != 0)
Node<PgfConcrLincat>::add_node_ref(clone->lincats);
Node<ref<PgfConcrLincat>>::add_node_ref(clone->lincats);
clone->phrasetable = concr->phrasetable;
if (clone->phrasetable != 0)
Node<PgfPhrasetableEntry>::add_node_ref(clone->phrasetable);
clone->printnames = concr->printnames;
if (clone->printnames != 0)
Node<PgfConcrPrintname>::add_node_ref(clone->printnames);
Node<ref<PgfConcrPrintname>>::add_node_ref(clone->printnames);
clone->prev = 0;
clone->next = 0;
memcpy(&clone->name, name, sizeof(PgfText)+name->size+1);
if (clone->cflags != 0)
Node<PgfFlag>::add_node_ref(clone->cflags);
PgfDB::link_transient_revision(clone);
Namespace<PgfConcr> concrs =
@@ -1394,6 +1397,8 @@ class PGF_INTERNAL PgfLinBuilder : public PgfLinBuilderIface
ref<Vector<ref<PgfPResult>>> res;
ref<Vector<ref<PgfSequence>>> seqs;
object container; // what are we building?
size_t var_index;
size_t arg_index;
size_t res_index;
@@ -1459,6 +1464,8 @@ public:
lincat->fields = db_fields;
lincat->n_lindefs = n_lindefs;
this->container = ref<PgfConcrLincat>::tagged(lincat);
build->build(this, err);
if (err->type == PGF_EXN_NONE && res_index != res->len) {
err->type = PGF_EXN_PGF_ERROR;
@@ -1500,6 +1507,8 @@ public:
lin->res = res;
lin->seqs = seqs;
this->container = ref<PgfConcrLin>::tagged(lin);
build->build(this, err);
if (err->type == PGF_EXN_NONE && res_index != res->len) {
err->type = PGF_EXN_PGF_ERROR;
@@ -1892,13 +1901,12 @@ public:
throw pgf_error(builder_error_msg);
PgfPhrasetable phrasetable =
phrasetable_internalize(concr->phrasetable, &seq);
if (phrasetable != concr->phrasetable) {
phrasetable_release(concr->phrasetable);
concr->phrasetable = phrasetable;
} else {
*vector_elem(seqs, seq_index) = seq;
}
phrasetable_internalize(concr->phrasetable,
container, seq_index,
&seq);
phrasetable_release(concr->phrasetable);
concr->phrasetable = phrasetable;
*vector_elem(seqs, seq_index) = seq;
res = seq;
@@ -1919,7 +1927,17 @@ public:
if (seq_index >= seqs->len)
throw pgf_error(builder_error_msg);
*vector_elem(seqs, seq_index) = seq_id;
ref<PgfSequence> seq = seq_id;
seq->ref_count++;
PgfPhrasetable phrasetable =
phrasetable_internalize(concr->phrasetable,
container, seq_index,
&seq);
phrasetable_release(concr->phrasetable);
concr->phrasetable = phrasetable;
*vector_elem(seqs, seq_index) = seq;
seq_index++;
} PGF_API_END
}
@@ -2055,8 +2073,19 @@ void pgf_drop_lincat(PgfDB *db,
ref<PgfConcr> concr = PgfDB::revision2concr(revision);
ref<PgfConcrLincat> lincat;
Namespace<PgfConcrLincat> lincats =
namespace_delete(concr->lincats, name);
namespace_delete(concr->lincats, name, &lincat);
if (lincat != 0) {
object container = ref<PgfConcrLincat>::tagged(lincat);
for (size_t i = 0; i < lincat->seqs->len; i++) {
ref<PgfSequence> seq = *vector_elem(lincat->seqs, i);
PgfPhrasetable new_phrasetable =
phrasetable_delete(concr->phrasetable,container,i,seq);
phrasetable_release(concr->phrasetable);
concr->phrasetable = new_phrasetable;
}
}
namespace_release(concr->lincats);
concr->lincats = lincats;
} PGF_API_END
@@ -2105,8 +2134,19 @@ void pgf_drop_lin(PgfDB *db,
ref<PgfConcr> concr = PgfDB::revision2concr(revision);
ref<PgfConcrLin> lin;
Namespace<PgfConcrLin> lins =
namespace_delete(concr->lins, name);
namespace_delete(concr->lins, name, &lin);
if (lin != 0) {
object container = ref<PgfConcrLin>::tagged(lin);
for (size_t i = 0; i < lin->seqs->len; i++) {
ref<PgfSequence> seq = *vector_elem(lin->seqs, i);
PgfPhrasetable new_phrasetable =
phrasetable_delete(concr->phrasetable,container,i,seq);
phrasetable_release(concr->phrasetable);
concr->phrasetable = new_phrasetable;
}
}
namespace_release(concr->lins);
concr->lins = lins;
} PGF_API_END

View File

@@ -400,12 +400,21 @@ typedef struct PgfPhrasetableIds PgfPhrasetableIds;
typedef struct PgfSequenceItor PgfSequenceItor;
struct PgfSequenceItor {
void (*fn)(PgfSequenceItor* self, size_t seq_id, object value, PgfExn *err);
int (*fn)(PgfSequenceItor* self, size_t seq_id, object value,
PgfExn *err);
};
PGF_API
typedef struct PgfMorphoCallback PgfMorphoCallback;
struct PgfMorphoCallback {
void (*fn)(PgfMorphoCallback* self, PgfText *lemma, PgfText *analysis, prob_t prob,
PgfExn* err);
};
PGF_API_DECL
PgfPhrasetableIds *pgf_iter_sequences(PgfDB *db, PgfConcrRevision cnc_revision,
PgfSequenceItor *itor, PgfExn *err);
PgfSequenceItor *itor,
PgfMorphoCallback *callback,
PgfExn *err);
PGF_API_DECL
void pgf_get_lincat_counts_internal(object o, size_t *counts);

View File

@@ -11,7 +11,7 @@ PgfPhrasetableIds::PgfPhrasetableIds()
void PgfPhrasetableIds::start(ref<PgfConcr> concr)
{
next_id = 0;
n_pairs = namespace_size(concr->phrasetable);
n_pairs = phrasetable_size(concr->phrasetable);
size_t mem_size = sizeof(SeqIdPair)*n_pairs;
pairs = (SeqIdPair*) malloc(mem_size);
if (pairs == NULL)
@@ -72,6 +72,24 @@ void PgfPhrasetableIds::end()
pairs = NULL;
}
void PgfPhrasetableEntry::add_ref()
{
seq->ref_count++;
if (backrefs != 0)
backrefs->ref_count++;
}
void PgfPhrasetableEntry::release_ref()
{
if (!(--seq->ref_count)) {
PgfSequence::release(seq);
}
if (backrefs != 0 && !(--backrefs->ref_count)) {
PgfSequenceBackrefs::release(backrefs);
}
}
static
int lparam_cmp(PgfLParam *p1, PgfLParam *p2)
{
@@ -228,55 +246,97 @@ int sequence_cmp(ref<PgfSequence> seq1, ref<PgfSequence> seq2)
}
PGF_INTERNAL
PgfPhrasetable phrasetable_internalize(PgfPhrasetable table, ref<PgfSequence> *pseq)
PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
object container,
size_t seq_index,
ref<PgfSequence> *pseq)
{
if (table == 0) {
PgfPhrasetable table = Node<PgfSequence>::new_node(*pseq);
Node<PgfSequence>::add_value_ref(table->value);
return table;
PgfPhrasetableEntry entry;
entry.seq = *pseq;
entry.backrefs = PgfDB::malloc<PgfSequenceBackrefs>(sizeof(PgfSequenceBackref));
entry.backrefs->ref_count = 1;
entry.backrefs->list.len = 1;
entry.backrefs->list.data[0].container = container;
entry.backrefs->list.data[0].seq_index = seq_index;
entry.seq->ref_count++;
return Node<PgfPhrasetableEntry>::new_node(entry);
}
int cmp = sequence_cmp(*pseq,table->value);
int cmp = sequence_cmp(*pseq,table->value.seq);
if (cmp < 0) {
PgfPhrasetable left = phrasetable_internalize(table->left, pseq);
if (left == table->left)
return table;
else {
PgfPhrasetable node = Node<PgfSequence>::balanceL(table->value,left,table->right);
namespace_release(left);
return node;
}
PgfPhrasetable left = phrasetable_internalize(table->left,
container,
seq_index,
pseq);
PgfPhrasetable node = Node<PgfPhrasetableEntry>::balanceL(table->value,left,table->right);
phrasetable_release(left);
return node;
} else if (cmp > 0) {
PgfPhrasetable right = phrasetable_internalize(table->right, pseq);
if (right == table->right)
return table;
else {
PgfPhrasetable node = Node<PgfSequence>::balanceR(table->value, table->left, right);
phrasetable_release(right);
return node;
}
PgfPhrasetable right = phrasetable_internalize(table->right,
container,
seq_index,
pseq);
PgfPhrasetable node = Node<PgfPhrasetableEntry>::balanceR(table->value, table->left, right);
phrasetable_release(right);
return node;
} else {
if (!(--(*pseq)->ref_count)) {
PgfSequence::release(*pseq);
}
Node<PgfSequence>::add_value_ref(table->value);
table->value.seq->ref_count++;
*pseq = table->value.seq;
*pseq = table->value;
return table;
}
if (table->left != 0)
table->left->ref_count++;
if (table->right != 0)
table->right->ref_count++;
size_t len = (table->value.backrefs)
? table->value.backrefs->list.len
: 0;
PgfPhrasetableEntry entry;
entry.seq = table->value.seq;
entry.backrefs = PgfDB::malloc<PgfSequenceBackrefs>((len+1)*sizeof(PgfSequenceBackref));
entry.backrefs->ref_count = 1;
entry.backrefs->list.len = len+1;
memcpy(entry.backrefs->list.data, table->value.backrefs->list.data, len*sizeof(PgfSequenceBackref));
entry.backrefs->list.data[len].container = container;
entry.backrefs->list.data[len].seq_index = seq_index;
entry.seq->ref_count++;
return Node<PgfPhrasetableEntry>::new_node(entry,table->left,table->right);
}
}
PGF_INTERNAL_DECL
ref<PgfSequence> phrasetable_get(PgfPhrasetable table, size_t seq_id)
ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
object container,
size_t seq_index,
size_t seq_id)
{
while (table != 0) {
size_t left_sz = table->left->sz;
if (seq_id < left_sz)
table = table->left;
else if (seq_id == left_sz)
return table->value;
else {
else if (seq_id == left_sz) {
size_t len = (table->value.backrefs)
? table->value.backrefs->list.len
: 0;
ref<PgfSequenceBackrefs> backrefs =
PgfDB::malloc<PgfSequenceBackrefs>((len+1)*sizeof(PgfSequenceBackref));
backrefs->ref_count = 1;
backrefs->list.len = len+1;
memcpy(backrefs->list.data, table->value.backrefs->list.data, len*sizeof(PgfSequenceBackref));
backrefs->list.data[len].container = container;
backrefs->list.data[len].seq_index = seq_index;
if (table->value.backrefs != 0)
PgfSequenceBackrefs::release(table->value.backrefs);
table->value.backrefs = backrefs;
return table->value.seq;
} else {
table = table->right;
seq_id -= left_sz+1;
}
@@ -284,23 +344,140 @@ ref<PgfSequence> phrasetable_get(PgfPhrasetable table, size_t seq_id)
return 0;
}
PgfPhrasetable phrasetable_delete(PgfPhrasetable table,
object container,
size_t seq_index,
ref<PgfSequence> seq)
{
if (table == 0)
return 0;
int cmp = sequence_cmp(seq,table->value.seq);
if (cmp < 0) {
PgfPhrasetable left = phrasetable_delete(table->left,
container, seq_index,
seq);
if (left == table->left)
return table;
PgfPhrasetable node = Node<PgfPhrasetableEntry>::balanceR(table->value,left,table->right);
phrasetable_release(left);
return node;
} else if (cmp > 0) {
PgfPhrasetable right = phrasetable_delete(table->right,
container, seq_index,
seq);
if (right == table->right)
return table;
PgfPhrasetable node = Node<PgfPhrasetableEntry>::balanceL(table->value,table->left,right);
phrasetable_release(right);
return node;
} else {
PgfPhrasetableEntry entry;
size_t len = table->value.backrefs->list.len;
if (len > 1) {
entry.backrefs =
PgfDB::malloc<PgfSequenceBackrefs>((len-1)*sizeof(PgfSequenceBackref));
entry.backrefs->ref_count = 1;
entry.backrefs->list.len = len-1;
size_t i = 0;
while (i < len) {
PgfSequenceBackref *backref =
vector_elem(&table->value.backrefs->list, i);
if (backref->container == container &&
backref->seq_index == seq_index) {
break;
}
*vector_elem(&entry.backrefs->list, i) = *backref;
i++;
}
i++;
while (i < len) {
PgfSequenceBackref *backref =
vector_elem(&table->value.backrefs->list, i);
*vector_elem(&entry.backrefs->list, i-1) = *backref;
i++;
}
entry.seq = table->value.seq;
table->value.seq->ref_count++;
return Node<PgfPhrasetableEntry>::new_node(entry, table->left, table->right);
} else {
if (table->left == 0) {
if (table->right != 0)
table->right->ref_count++;
return table->right;
} else if (table->right == 0) {
if (table->left != 0)
table->left->ref_count++;
return table->left;
} else if (table->left->sz > table->right->sz) {
PgfPhrasetable new_left = Node<PgfPhrasetableEntry>::pop_last(table->left, &entry);
PgfPhrasetable node = Node<PgfPhrasetableEntry>::balanceR(entry, new_left, table->right);
phrasetable_release(new_left);
return node;
} else {
PgfPhrasetable new_right = Node<PgfPhrasetableEntry>::pop_first(table->right, &entry);
PgfPhrasetable node = Node<PgfPhrasetableEntry>::balanceL(entry, table->left, new_right);
phrasetable_release(new_right);
return node;
}
}
}
}
PGF_INTERNAL
void phrasetable_iter(PgfPhrasetable table, PgfSequenceItor* itor,
size_t phrasetable_size(PgfPhrasetable table)
{
return Node<PgfPhrasetableEntry>::size(table);
}
PGF_INTERNAL
void phrasetable_iter(PgfConcr *concr,
PgfPhrasetable table,
PgfSequenceItor* itor,
PgfMorphoCallback *callback,
PgfPhrasetableIds *seq_ids, PgfExn *err)
{
if (table == 0)
return;
phrasetable_iter(table->left, itor, seq_ids, err);
phrasetable_iter(concr, table->left, itor, callback, seq_ids, err);
if (err->type != PGF_EXN_NONE)
return;
size_t seq_id = seq_ids->add(table->value);
itor->fn(itor, seq_id, table->value.as_object(), err);
size_t seq_id = seq_ids->add(table->value.seq);
int res = itor->fn(itor, seq_id, table->value.seq.as_object(), err);
if (err->type != PGF_EXN_NONE)
return;
phrasetable_iter(table->right, itor, seq_ids, err);
if (table->value.backrefs != 0 && res == 0 && callback != 0) {
for (size_t i = 0; i < table->value.backrefs->list.len; i++) {
PgfSequenceBackref backref = *vector_elem(&table->value.backrefs->list,i);
switch (ref<PgfConcrLin>::get_tag(backref.container)) {
case PgfConcrLin::tag: {
ref<PgfConcrLin> lin = ref<PgfConcrLin>::untagged(backref.container);
ref<PgfConcrLincat> lincat =
namespace_lookup(concr->lincats, &lin->absfun->type->name);
if (lincat != 0) {
ref<PgfText> field =
*vector_elem(lincat->fields, backref.seq_index % lincat->fields->len);
callback->fn(callback, &lin->absfun->name, &(*field), lincat->abscat->prob+lin->absfun->prob, err);
if (err->type != PGF_EXN_NONE)
return;
}
break;
}
case PgfConcrLincat::tag: {
//ignore
break;
}
}
}
}
phrasetable_iter(concr, table->right, itor, callback, seq_ids, err);
if (err->type != PGF_EXN_NONE)
return;
}
@@ -308,5 +485,5 @@ void phrasetable_iter(PgfPhrasetable table, PgfSequenceItor* itor,
PGF_INTERNAL
void phrasetable_release(PgfPhrasetable table)
{
namespace_release(table);
Node<PgfPhrasetableEntry>::release(table);
}

View File

@@ -1,9 +1,19 @@
#ifndef PHRASETABLE_H
#define PHRASETABLE_H
class PgfSequence;
struct PgfSequence;
struct PgfSequenceBackrefs;
struct PGF_INTERNAL_DECL PgfPhrasetableEntry {
ref<PgfSequence> seq;
ref<PgfSequenceBackrefs> backrefs;
void add_ref();
void release_ref();
};
class PgfSequenceItor;
typedef ref<Node<PgfSequence>> PgfPhrasetable;
typedef ref<Node<PgfPhrasetableEntry>> PgfPhrasetable;
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wattributes"
@@ -41,13 +51,30 @@ private:
#pragma GCC diagnostic pop
PGF_INTERNAL_DECL
PgfPhrasetable phrasetable_internalize(PgfPhrasetable table, ref<PgfSequence> *seq);
PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
object container,
size_t seq_index,
ref<PgfSequence> *seq);
PGF_INTERNAL_DECL
ref<PgfSequence> phrasetable_get(PgfPhrasetable table, size_t seq_id);
ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
object container,
size_t seq_index,
size_t seq_id);
PgfPhrasetable phrasetable_delete(PgfPhrasetable table,
object container,
size_t seq_index,
ref<PgfSequence> seq);
PGF_INTERNAL_DECL
void phrasetable_iter(PgfPhrasetable table, PgfSequenceItor* itor,
size_t phrasetable_size(PgfPhrasetable table);
PGF_INTERNAL_DECL
void phrasetable_iter(PgfConcr *concr,
PgfPhrasetable table,
PgfSequenceItor* itor,
PgfMorphoCallback *callback,
PgfPhrasetableIds *seq_ids, PgfExn *err);
PGF_INTERNAL_DECL

View File

@@ -160,7 +160,7 @@ Namespace<V> PgfReader::read_namespace(ref<V> (PgfReader::*read_value)(), size_t
ref<V> value = (this->*read_value)();
Namespace<V> right = read_namespace(read_value, len-half-1);
return Node<V>::new_node(value, left, right);
return Node<ref<V>>::new_node(value, left, right);
}
template<class V>
@@ -576,13 +576,42 @@ ref<PgfSequence> PgfReader::read_seq()
return seq;
}
void PgfReader::read_seq_id(ref<ref<PgfSequence>> r)
ref<Vector<ref<PgfSequence>>> PgfReader::read_seq_ids(object container)
{
size_t seq_id = read_len();
ref<PgfSequence> seq = phrasetable_get(concrete->phrasetable, seq_id);
if (seq == 0)
throw pgf_error("Invalid sequence id");
*r = seq;
size_t len = read_len();
ref<Vector<ref<PgfSequence>>> vec = vector_new<ref<PgfSequence>>(len);
for (size_t i = 0; i < len; i++) {
size_t seq_id = read_len();
ref<PgfSequence> seq = phrasetable_relink(concrete->phrasetable,
container, i,
seq_id);
if (seq == 0)
throw pgf_error("Invalid sequence id");
*vector_elem(vec,i) = seq;
}
return vec;
}
PgfPhrasetable PgfReader::read_phrasetable(size_t len)
{
if (len == 0)
return 0;
PgfPhrasetableEntry value;
size_t half = len/2;
PgfPhrasetable left = read_phrasetable(half);
value.seq = read_seq();
value.backrefs = 0;
PgfPhrasetable right = read_phrasetable(len-half-1);
return Node<PgfPhrasetableEntry>::new_node(value, left, right);
}
PgfPhrasetable PgfReader::read_phrasetable()
{
size_t len = read_len();
return read_phrasetable(len);
}
ref<PgfConcrLincat> PgfReader::read_lincat()
@@ -594,7 +623,7 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
lincat->n_lindefs = read_len();
lincat->args = read_vector(&PgfReader::read_parg);
lincat->res = read_vector(&PgfReader::read_presult2);
lincat->seqs = read_vector(&PgfReader::read_seq_id);
lincat->seqs = read_seq_ids(ref<PgfConcrLincat>::tagged(lincat));
return lincat;
}
@@ -605,7 +634,7 @@ ref<PgfConcrLin> PgfReader::read_lin()
lin->absfun = namespace_lookup(abstract->funs, &lin->name);
lin->args = read_vector(&PgfReader::read_parg);
lin->res = read_vector(&PgfReader::read_presult2);
lin->seqs = read_vector(&PgfReader::read_seq_id);
lin->seqs = read_seq_ids(ref<PgfConcrLin>::tagged(lin));
return lin;
}
@@ -623,7 +652,7 @@ ref<PgfConcr> PgfReader::read_concrete()
concrete->ref_count = 1;
concrete->ref_count_ex = 0;
concrete->cflags = read_namespace<PgfFlag>(&PgfReader::read_flag);
concrete->phrasetable = read_namespace<PgfSequence>(&PgfReader::read_seq);
concrete->phrasetable = read_phrasetable();
concrete->lincats = read_namespace<PgfConcrLincat>(&PgfReader::read_lincat);
concrete->lins = read_namespace<PgfConcrLin>(&PgfReader::read_lin);
concrete->printnames = read_namespace<PgfConcrPrintname>(&PgfReader::read_printname);

View File

@@ -74,7 +74,9 @@ public:
ref<PgfPResult> read_presult();
PgfSymbol read_symbol();
ref<PgfSequence> read_seq();
void read_seq_id(ref<ref<PgfSequence>> r);
ref<Vector<ref<PgfSequence>>> read_seq_ids(object container);
PgfPhrasetable read_phrasetable(size_t len);
PgfPhrasetable read_phrasetable();
ref<PgfConcrLin> read_lin();
ref<PgfConcrPrintname> read_printname();

View File

@@ -418,6 +418,22 @@ void PgfWriter::write_seq(ref<PgfSequence> seq)
write_vector(ref<Vector<PgfSymbol>>::from_ptr(&seq->syms), &PgfWriter::write_symbol);
}
void PgfWriter::write_phrasetable(PgfPhrasetable table)
{
write_len(phrasetable_size(table));
write_phrasetable_helper(table);
}
void PgfWriter::write_phrasetable_helper(PgfPhrasetable table)
{
if (table == 0)
return;
write_phrasetable_helper(table->left);
write_seq(table->value.seq);
write_phrasetable_helper(table->right);
}
void PgfWriter::write_lincat(ref<PgfConcrLincat> lincat)
{
write_name(&lincat->name);
@@ -448,7 +464,7 @@ void PgfWriter::write_concrete(ref<PgfConcr> concr)
write_name(&concr->name);
write_namespace<PgfFlag>(concr->cflags, &PgfWriter::write_flag);
write_namespace<PgfSequence>(concr->phrasetable, &PgfWriter::write_seq);
write_phrasetable(concr->phrasetable);
write_namespace<PgfConcrLincat>(concr->lincats, &PgfWriter::write_lincat);
write_namespace<PgfConcrLin>(concr->lins, &PgfWriter::write_lin);
write_namespace<PgfConcrPrintname>(concr->printnames, &PgfWriter::write_printname);

View File

@@ -46,6 +46,7 @@ public:
void write_symbol(PgfSymbol sym);
void write_seq(ref<PgfSequence> seq);
void write_seq_id(ref<ref<PgfSequence>> r) { write_len(seq_ids.get(*r)); };
void write_phrasetable(PgfPhrasetable table);
void write_lin(ref<PgfConcrLin> lin);
void write_printname(ref<PgfConcrPrintname> printname);
@@ -56,6 +57,7 @@ public:
private:
template<class V>
void write_namespace_helper(Namespace<V> nmsp, void (PgfWriter::*write_value)(ref<V>));
void write_phrasetable_helper(PgfPhrasetable table);
void write_text(ref<ref<PgfText>> r) { write_text(&(**r)); };
void write_lparam(ref<ref<PgfLParam>> r) { write_lparam(*r); };

View File

@@ -294,7 +294,7 @@ showPGF p =
bracket (wrapSequenceItorCallback (getSequences ref)) freeHaskellFunPtr $ \fptr ->
withForeignPtr (c_revision c) $ \c_revision -> do
(#poke PgfSequenceItor, fn) itor fptr
withPgfExn "showPGF" (pgf_iter_sequences (a_db p) c_revision itor))
withPgfExn "showPGF" (pgf_iter_sequences (a_db p) c_revision itor nullPtr))
doc <- readIORef ref
return (seq_ids, doc)
where
@@ -303,6 +303,7 @@ showPGF p =
def <- bracket (pgf_print_sequence_internal seq_id val) free $ \c_text -> do
fmap text (peekText c_text)
modifyIORef ref $ (\doc -> doc $$ def)
return 0
-- | The abstract language name is the name of the top-level
-- abstract module
@@ -570,20 +571,33 @@ unk _ _ = False
fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
fullFormLexicon c = unsafePerformIO $ do
ref <- newIORef []
(allocaBytes (#size PgfSequenceItor) $ \itor ->
bracket (wrapSequenceItorCallback (getSequences ref)) freeHaskellFunPtr $ \fptr ->
(allocaBytes (#size PgfSequenceItor) $ \itor1 ->
bracket (wrapSequenceItorCallback (getSequences ref)) freeHaskellFunPtr $ \fptr1 ->
allocaBytes (#size PgfMorphoCallback) $ \itor2 ->
bracket (wrapMorphoCallback (getMorphology ref)) freeHaskellFunPtr $ \fptr2 ->
withForeignPtr (c_revision c) $ \c_revision -> do
(#poke PgfSequenceItor, fn) itor fptr
seq_ids <- withPgfExn "fullFormLexicon" (pgf_iter_sequences (c_db c) c_revision itor)
(#poke PgfSequenceItor, fn) itor1 fptr1
(#poke PgfMorphoCallback, fn) itor2 fptr2
seq_ids <- withPgfExn "fullFormLexicon" (pgf_iter_sequences (c_db c) c_revision itor1 itor2)
pgf_release_phrasetable_ids seq_ids)
fmap reverse (readIORef ref)
where
getSequences ref itor seq_id val exn = do
getSequences ref _ seq_id val exn = do
bracket (pgf_sequence_get_text_internal val) free $ \c_text ->
if c_text == nullPtr
then return ()
else do lemma <- peekText c_text
modifyIORef ref $ (\lexicon -> (lemma, []) : lexicon)
then return 1
else do form <- peekText c_text
case form of
[] -> return 1
_ -> do modifyIORef ref $ (\lexicon -> (form, []) : lexicon)
return 0
getMorphology ref _ c_name c_field c_prob exn = do
name <- peekText c_name
field <- peekText c_field
let prob = realToFrac c_prob
ann = (name,field,prob)
modifyIORef ref (\((form,anns) : lexicon) -> (form,ann:anns) : lexicon)
-- | This data type encodes the different outcomes which you could get from the parser.

View File

@@ -46,6 +46,7 @@ data PgfLinBuilderIface
data PgfLinearizationOutputIface
data PgfGraphvizOptions
data PgfSequenceItor
data PgfMorphoCallback
data PgfPhrasetableIds
type Wrapper a = a -> IO (FunPtr a)
@@ -112,11 +113,15 @@ foreign import ccall pgf_iter_lincats :: Ptr PgfDB -> Ptr Concr -> Ptr PgfItor -
foreign import ccall pgf_iter_lins :: Ptr PgfDB -> Ptr Concr -> Ptr PgfItor -> Ptr PgfExn -> IO ()
type SequenceItorCallback = Ptr PgfSequenceItor -> CSize -> Ptr () -> Ptr PgfExn -> IO ()
type SequenceItorCallback = Ptr PgfSequenceItor -> CSize -> Ptr () -> Ptr PgfExn -> IO CInt
foreign import ccall "wrapper" wrapSequenceItorCallback :: Wrapper SequenceItorCallback
foreign import ccall pgf_iter_sequences :: Ptr PgfDB -> Ptr Concr -> Ptr PgfSequenceItor -> Ptr PgfExn -> IO (Ptr PgfPhrasetableIds)
type MorphoCallback = Ptr PgfMorphoCallback -> Ptr PgfText -> Ptr PgfText -> (#type prob_t) -> Ptr PgfExn -> IO ()
foreign import ccall "wrapper" wrapMorphoCallback :: Wrapper MorphoCallback
foreign import ccall pgf_iter_sequences :: Ptr PgfDB -> Ptr Concr -> Ptr PgfSequenceItor -> Ptr PgfMorphoCallback -> Ptr PgfExn -> IO (Ptr PgfPhrasetableIds)
foreign import ccall pgf_get_lincat_counts_internal :: Ptr () -> Ptr CSize -> IO ()