diff --git a/src/runtime/c/pgf/data.cxx b/src/runtime/c/pgf/data.cxx index faaf916b6..9ec6eb9e3 100644 --- a/src/runtime/c/pgf/data.cxx +++ b/src/runtime/c/pgf/data.cxx @@ -39,6 +39,7 @@ void PgfConcr::release(ref concr) namespace_release(concr->cflags); namespace_release(concr->lins); namespace_release(concr->lincats); + phrasetable_release(concr->phrasetable); namespace_release(concr->printnames); PgfDB::free(concr); } @@ -113,6 +114,11 @@ void PgfSequence::release(ref seq) PgfDB::free(seq); } +void PgfSequenceBackrefs::release(ref backrefs) +{ + PgfDB::free(backrefs); +} + void PgfConcrLin::release(ref lin) { for (size_t i = 0; i < lin->args->len; i++) { diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index fe5adf4c4..40f3e1b92 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -141,6 +141,19 @@ struct PGF_INTERNAL_DECL PgfSequence { static void release(ref lincat); }; +struct PGF_INTERNAL_DECL PgfSequenceBackref { + object container; + size_t seq_index; +}; + +struct PGF_INTERNAL_DECL PgfSequenceBackrefs { + size_t ref_count; + Vector list; + + static + void release(ref backrefs); +}; + struct PGF_INTERNAL_DECL PgfSymbolCat { static const uint8_t tag = 0; size_t d; @@ -206,6 +219,8 @@ PGF_INTERNAL_DECL void pgf_symbol_free(PgfSymbol sym); struct PGF_INTERNAL_DECL PgfConcrLincat { + static const uint8_t tag = 0; + size_t ref_count; ref abscat; @@ -223,6 +238,8 @@ struct PGF_INTERNAL_DECL PgfConcrLincat { }; struct PGF_INTERNAL_DECL PgfConcrLin { + static const uint8_t tag = 1; + size_t ref_count; ref absfun; diff --git a/src/runtime/c/pgf/namespace.h b/src/runtime/c/pgf/namespace.h index 0bb1d7824..41cabf429 100644 --- a/src/runtime/c/pgf/namespace.h +++ b/src/runtime/c/pgf/namespace.h @@ -20,7 +20,7 @@ template class Node; template -using Namespace = ref>; +using Namespace = ref>>; template class PGF_INTERNAL_DECL Node @@ -29,12 +29,12 @@ public: size_t ref_count; size_t sz; - ref value; + V value; ref left; ref right; static - ref new_node(ref value) + ref new_node(V value) { ref node = current_db->malloc(); node->ref_count = 1; @@ -50,11 +50,11 @@ public: } static - ref new_node(ref value, ref left, ref right) + ref new_node(V value, ref left, ref right) { ref node = current_db->malloc(); node->ref_count = 1; - node->sz = 1+namespace_size(left)+namespace_size(right); + node->sz = 1+Node::size(left)+Node::size(right); node->value = value; node->left = left; node->right = right; @@ -74,8 +74,17 @@ public: #endif } - static - void add_value_ref(ref value) + template static + void add_value_ref(A &value) + { + value.add_ref(); +#ifdef DEBUG_NAMESPACE + printf("add_ref value %5ld %s (ref_count=%ld)\n", value.as_object(), value->name.text, value->ref_count); +#endif + } + + template static + void add_value_ref(ref value) { value->ref_count++; #ifdef DEBUG_NAMESPACE @@ -83,12 +92,34 @@ public: #endif } + template static + void release_value_ref(A &value) + { +#ifdef DEBUG_NAMESPACE + printf("release value %5ld %s (ref_count=%ld)\n", value.as_object(), value->name.text, value->ref_count-1); +#endif + + value.release_ref(); + } + + template static + void release_value_ref(ref value) + { +#ifdef DEBUG_NAMESPACE + printf("release value %5ld %s (ref_count=%ld)\n", value.as_object(), value->name.text, value->ref_count-1); +#endif + + if (!(--value->ref_count)) { + A::release(value); + } + } + static - ref balanceL(ref value, ref left, ref right) + ref balanceL(V value, ref left, ref right) { if (right == 0) { if (left == 0) { - value->ref_count++; + add_value_ref(value); return new_node(value); } else { if (left->left == 0) { @@ -100,8 +131,8 @@ public: add_value_ref(value); add_value_ref(left->value); add_value_ref(left->right->value); - Namespace new_left = new_node(left->value); - Namespace new_right = new_node(value); + ref> new_left = new_node(left->value); + ref> new_right = new_node(value); return new_node(left->right->value, new_left, new_right); @@ -109,7 +140,7 @@ public: } else { if (left->right == 0) { add_value_ref(value); - Namespace new_right = new_node(value); + ref> new_right = new_node(value); add_value_ref(left->value); add_node_ref(left->left); return new_node(left->value, @@ -121,7 +152,7 @@ public: add_value_ref(left->value); add_node_ref(left->left); add_node_ref(left->right); - Namespace new_right = + ref> new_right = new_node(value, left->right, 0); @@ -137,11 +168,11 @@ public: add_node_ref(left->right->left); if (left->right->right != 0) add_node_ref(left->right->right); - Namespace new_left = + ref> new_left = new_node(left->value, left->left, left->right->left); - Namespace new_right = + ref> new_right = new_node(value, left->right->right, 0); @@ -165,7 +196,7 @@ public: add_node_ref(left->left); add_node_ref(left->right); add_node_ref(right); - Namespace new_right = + ref> new_right = new_node(value, left->right, right); @@ -182,11 +213,11 @@ public: if (left->right->right != 0) add_node_ref(left->right->right); add_node_ref(right); - Namespace new_left = + ref> new_left = new_node(left->value, left->left, left->right->left); - Namespace new_right = + ref> new_right = new_node(value, left->right->right, right); @@ -205,7 +236,7 @@ public: } static - ref balanceR(ref value, ref left, ref right) + ref balanceR(V value, ref left, ref right) { if (left == 0) { if (right == 0) { @@ -221,7 +252,7 @@ public: add_value_ref(value); add_value_ref(right->value); add_node_ref(right->right); - Namespace new_left = + ref> new_left = new_node(value); return new_node(right->value, new_left, @@ -232,9 +263,9 @@ public: add_value_ref(value); add_value_ref(right->value); add_value_ref(right->left->value); - Namespace new_left = + ref> new_left = new_node(value); - Namespace new_right = + ref> new_right = new_node(right->value); return new_node(right->left->value, new_left, @@ -245,7 +276,7 @@ public: add_value_ref(right->value); add_node_ref(right->left); add_node_ref(right->right); - Namespace new_left = + ref> new_left = new_node(value, 0, right->left); @@ -261,11 +292,11 @@ public: if (right->left->right != 0) add_node_ref(right->left->right); add_node_ref(right->right); - Namespace new_left = + ref> new_left = new_node(value, 0, right->left->left); - Namespace new_right = + ref> new_right = new_node(right->value, right->left->right, right->right); @@ -289,7 +320,7 @@ public: add_node_ref(left); add_node_ref(right->left); add_node_ref(right->right); - Namespace new_left = + ref> new_left = new_node(value, left, right->left); @@ -306,11 +337,11 @@ public: if (right->left->right != 0) add_node_ref(right->left->right); add_node_ref(right->right); - Namespace new_left = + ref> new_left = new_node(value, left, right->left->left); - Namespace new_right = + ref> new_right = new_node(right->value, right->left->right, right->right); @@ -327,6 +358,68 @@ public: } } } + + static + size_t size(ref node) + { + if (node == 0) + return 0; + return node->sz; + } + + static + ref pop_first(ref node, V *res) + { + if (node == 0) { + return 0; + } else if (node->left == 0) { + *res = node->value; + if (node->right != 0) + node->right->ref_count++; + return node->right; + } else { + ref new_left = pop_first(node->left, res); + ref new_node = balanceR(node->value, new_left, node->right); + release(new_left); + return new_node; + } + } + + static + ref pop_last(ref node, V *res) + { + if (node == 0) { + return 0; + } else if (node->right == 0) { + *res = node->value; + if (node->left != 0) + node->left->ref_count++; + return node->left; + } else { + ref new_right = pop_last(node->right, res); + ref new_node = balanceR(node->value, node->left, new_right); + release(new_right); + return new_node; + } + } + + static + void release(ref node) + { + if (node == 0) + return; + +#ifdef DEBUG_NAMESPACE + printf("release node %6ld %s (ref_count=%ld)\n", node.as_object(), node->value->name.text, node->ref_count-1); +#endif + + if (!(--node->ref_count)) { + release(node->left); + release(node->right); + release_value_ref(node->value); + PgfDB::free(node); + } + } }; template @@ -345,17 +438,17 @@ template Namespace namespace_insert(Namespace map, ref value) { if (map == 0) - return Node::new_node(value); + return Node>::new_node(value); int cmp = textcmp(&value->name,&map->value->name); if (cmp < 0) { Namespace left = namespace_insert(map->left, value); - Namespace node = Node::balanceL(map->value,left,map->right); + Namespace node = Node>::balanceL(map->value,left,map->right); namespace_release(left); return node; } else if (cmp > 0) { Namespace right = namespace_insert(map->right, value); - Namespace node = Node::balanceR(map->value, map->left, right); + Namespace node = Node>::balanceR(map->value, map->left, right); namespace_release(right); return node; } else { @@ -363,32 +456,37 @@ Namespace namespace_insert(Namespace map, ref value) map->left->ref_count++; if (map->right != 0) map->right->ref_count++; - return Node::new_node(value,map->left,map->right); + return Node>::new_node(value,map->left,map->right); } } template -Namespace namespace_delete(Namespace map, PgfText* name) +Namespace namespace_delete(Namespace map, PgfText* name, + ref *pvalue = NULL) { - if (map == 0) + if (map == 0) { + if (pvalue != NULL) *pvalue = 0; return 0; + } int cmp = textcmp(name,&map->value->name); if (cmp < 0) { - Namespace left = namespace_delete(map->left, name); + Namespace left = namespace_delete(map->left, name, pvalue); if (left == map->left) return map; - Namespace node = Node::balanceR(map->value,left,map->right); + Namespace node = Node>::balanceR(map->value,left,map->right); namespace_release(left); return node; } else if (cmp > 0) { - Namespace right = namespace_delete(map->right, name); + Namespace right = namespace_delete(map->right, name, pvalue); if (right == map->right) return map; - Namespace node = Node::balanceL(map->value,map->left,right); + Namespace node = Node>::balanceL(map->value,map->left,right); namespace_release(right); return node; } else { + if (pvalue != NULL) *pvalue = map->value; + if (map->left == 0) { if (map->right != 0) map->right->ref_count++; @@ -399,54 +497,20 @@ Namespace namespace_delete(Namespace map, PgfText* name) return map->left; } else if (map->left->sz > map->right->sz) { ref value; - Namespace new_left = namespace_pop_last(map->left, &value); - Namespace node = Node::balanceR(value, new_left, map->right); + Namespace new_left = Node>::pop_last(map->left, &value); + Namespace node = Node>::balanceR(value, new_left, map->right); namespace_release(new_left); return node; } else { ref value; - Namespace new_right = namespace_pop_first(map->right, &value); - Namespace node = Node::balanceL(value, map->left, new_right); + Namespace new_right = Node>::pop_first(map->right, &value); + Namespace node = Node>::balanceL(value, map->left, new_right); namespace_release(new_right); return node; } } } -template -Namespace namespace_pop_first(Namespace map, ref *res) -{ - if (map == 0) { - return 0; - } else if (map->left == 0) { - *res = map->value; - if (map->right != 0) - map->right->ref_count++; - return map->right; - } else { - Namespace new_left = namespace_pop_first(map->left, res); - Namespace node = Node::balanceR(map->value, new_left, map->right); - namespace_release(new_left); - return node; - } -} - -template -Namespace namespace_pop_last(Namespace map, ref *res) -{ - if (map == 0) { - return 0; - } else if (map->right == 0) { - *res = map->value; - if (map->left != 0) - map->left->ref_count++; - return map->left; - } else { - Namespace new_right = namespace_pop_last(map->right, res); - return Node::balanceR(map->value, map->left, new_right); - } -} - template ref namespace_lookup(Namespace map, PgfText *name) { @@ -465,9 +529,7 @@ ref namespace_lookup(Namespace map, PgfText *name) template size_t namespace_size(Namespace map) { - if (map == 0) - return 0; - return map->sz; + return Node>::size(map); } template @@ -492,27 +554,7 @@ void namespace_iter(Namespace map, PgfItor* itor, PgfExn *err) template void namespace_release(Namespace node) { - if (node == 0) - return; - -#ifdef DEBUG_NAMESPACE - printf("release node %6ld %s (ref_count=%ld)\n", node.as_object(), node->value->name.text, node->ref_count-1); -#endif - - if (!(--node->ref_count)) { - namespace_release(node->left); - namespace_release(node->right); - -#ifdef DEBUG_NAMESPACE - printf("release value %5ld %s (ref_count=%ld)\n", node->value.as_object(), node->value->name.text, node->value->ref_count-1); -#endif - - if (!(--node->value->ref_count)) { - V::release(node->value); - } - - PgfDB::free(node); - } + Node>::release(node); } #endif diff --git a/src/runtime/c/pgf/pgf.cxx b/src/runtime/c/pgf/pgf.cxx index b176b1cee..c629d28fa 100644 --- a/src/runtime/c/pgf/pgf.cxx +++ b/src/runtime/c/pgf/pgf.cxx @@ -147,7 +147,7 @@ PgfDB *pgf_read_ngf(const char *fpath, db->cleanup_revisions(); ref pgf = PgfDB::get_revision(master); - Node::add_value_ref(pgf); + Node>::add_value_ref(pgf); *revision = pgf.as_object(); } @@ -828,16 +828,18 @@ void pgf_iter_lins(PgfDB *db, PgfConcrRevision cnc_revision, PGF_API PgfPhrasetableIds *pgf_iter_sequences(PgfDB *db, PgfConcrRevision cnc_revision, - PgfSequenceItor *itor, PgfExn *err) + PgfSequenceItor *itor, + PgfMorphoCallback *callback, + PgfExn *err) { PGF_API_BEGIN { DB_scope scope(db, READER_SCOPE); ref concr = PgfDB::revision2concr(cnc_revision); - PgfPhrasetableIds *seq_ids = new PgfPhrasetableIds(); - seq_ids->start(concr); + PgfPhrasetableIds *seq_ids = new PgfPhrasetableIds(); + seq_ids->start(concr); - phrasetable_iter(concr->phrasetable, itor, seq_ids, err); + phrasetable_iter(concr, concr->phrasetable, itor, callback, seq_ids, err); return seq_ids; } PGF_API_END @@ -1108,25 +1110,25 @@ PgfRevision pgf_clone_revision(PgfDB *db, PgfRevision revision, new_pgf->gflags = pgf->gflags; if (pgf->gflags != 0) - Node::add_node_ref(pgf->gflags); + Node>::add_node_ref(pgf->gflags); new_pgf->abstract.name = textdup_db(&(*pgf->abstract.name)); new_pgf->abstract.aflags = pgf->abstract.aflags; if (pgf->abstract.aflags != 0) - Node::add_node_ref(pgf->abstract.aflags); + Node>::add_node_ref(pgf->abstract.aflags); new_pgf->abstract.funs = pgf->abstract.funs; if (pgf->abstract.funs != 0) - Node::add_node_ref(pgf->abstract.funs); + Node>::add_node_ref(pgf->abstract.funs); new_pgf->abstract.cats = pgf->abstract.cats; if (pgf->abstract.cats != 0) - Node::add_node_ref(pgf->abstract.cats); + Node>::add_node_ref(pgf->abstract.cats); new_pgf->concretes = pgf->concretes; if (pgf->concretes != 0) - Node::add_node_ref(pgf->concretes); + Node>::add_node_ref(pgf->concretes); new_pgf->prev = 0; new_pgf->next = 0; @@ -1170,7 +1172,7 @@ PgfRevision pgf_checkout_revision(PgfDB *db, PgfText *name, DB_scope scope(db, WRITER_SCOPE); ref pgf = PgfDB::get_revision(name); if (pgf != 0) { - Node::add_value_ref(pgf); + Node>::add_value_ref(pgf); db->ref_count++; } return pgf.as_object(); @@ -1335,27 +1337,28 @@ PgfConcrRevision pgf_clone_concrete(PgfDB *db, PgfRevision revision, clone->cflags = concr->cflags; if (clone->cflags != 0) - Node::add_node_ref(clone->cflags); + Node>::add_node_ref(clone->cflags); clone->lins = concr->lins; if (clone->lins != 0) - Node::add_node_ref(clone->lins); + Node>::add_node_ref(clone->lins); clone->lincats = concr->lincats; if (clone->lincats != 0) - Node::add_node_ref(clone->lincats); + Node>::add_node_ref(clone->lincats); + + clone->phrasetable = concr->phrasetable; + if (clone->phrasetable != 0) + Node::add_node_ref(clone->phrasetable); clone->printnames = concr->printnames; if (clone->printnames != 0) - Node::add_node_ref(clone->printnames); + Node>::add_node_ref(clone->printnames); clone->prev = 0; clone->next = 0; memcpy(&clone->name, name, sizeof(PgfText)+name->size+1); - if (clone->cflags != 0) - Node::add_node_ref(clone->cflags); - PgfDB::link_transient_revision(clone); Namespace concrs = @@ -1394,6 +1397,8 @@ class PGF_INTERNAL PgfLinBuilder : public PgfLinBuilderIface ref>> res; ref>> seqs; + object container; // what are we building? + size_t var_index; size_t arg_index; size_t res_index; @@ -1459,6 +1464,8 @@ public: lincat->fields = db_fields; lincat->n_lindefs = n_lindefs; + this->container = ref::tagged(lincat); + build->build(this, err); if (err->type == PGF_EXN_NONE && res_index != res->len) { err->type = PGF_EXN_PGF_ERROR; @@ -1500,6 +1507,8 @@ public: lin->res = res; lin->seqs = seqs; + this->container = ref::tagged(lin); + build->build(this, err); if (err->type == PGF_EXN_NONE && res_index != res->len) { err->type = PGF_EXN_PGF_ERROR; @@ -1892,13 +1901,12 @@ public: throw pgf_error(builder_error_msg); PgfPhrasetable phrasetable = - phrasetable_internalize(concr->phrasetable, &seq); - if (phrasetable != concr->phrasetable) { - phrasetable_release(concr->phrasetable); - concr->phrasetable = phrasetable; - } else { - *vector_elem(seqs, seq_index) = seq; - } + phrasetable_internalize(concr->phrasetable, + container, seq_index, + &seq); + phrasetable_release(concr->phrasetable); + concr->phrasetable = phrasetable; + *vector_elem(seqs, seq_index) = seq; res = seq; @@ -1919,7 +1927,17 @@ public: if (seq_index >= seqs->len) throw pgf_error(builder_error_msg); - *vector_elem(seqs, seq_index) = seq_id; + ref seq = seq_id; + seq->ref_count++; + + PgfPhrasetable phrasetable = + phrasetable_internalize(concr->phrasetable, + container, seq_index, + &seq); + phrasetable_release(concr->phrasetable); + concr->phrasetable = phrasetable; + *vector_elem(seqs, seq_index) = seq; + seq_index++; } PGF_API_END } @@ -2055,8 +2073,19 @@ void pgf_drop_lincat(PgfDB *db, ref concr = PgfDB::revision2concr(revision); + ref lincat; Namespace lincats = - namespace_delete(concr->lincats, name); + namespace_delete(concr->lincats, name, &lincat); + if (lincat != 0) { + object container = ref::tagged(lincat); + for (size_t i = 0; i < lincat->seqs->len; i++) { + ref seq = *vector_elem(lincat->seqs, i); + PgfPhrasetable new_phrasetable = + phrasetable_delete(concr->phrasetable,container,i,seq); + phrasetable_release(concr->phrasetable); + concr->phrasetable = new_phrasetable; + } + } namespace_release(concr->lincats); concr->lincats = lincats; } PGF_API_END @@ -2105,8 +2134,19 @@ void pgf_drop_lin(PgfDB *db, ref concr = PgfDB::revision2concr(revision); + ref lin; Namespace lins = - namespace_delete(concr->lins, name); + namespace_delete(concr->lins, name, &lin); + if (lin != 0) { + object container = ref::tagged(lin); + for (size_t i = 0; i < lin->seqs->len; i++) { + ref seq = *vector_elem(lin->seqs, i); + PgfPhrasetable new_phrasetable = + phrasetable_delete(concr->phrasetable,container,i,seq); + phrasetable_release(concr->phrasetable); + concr->phrasetable = new_phrasetable; + } + } namespace_release(concr->lins); concr->lins = lins; } PGF_API_END diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index 850f858a9..f04d456fa 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -400,12 +400,21 @@ typedef struct PgfPhrasetableIds PgfPhrasetableIds; typedef struct PgfSequenceItor PgfSequenceItor; struct PgfSequenceItor { - void (*fn)(PgfSequenceItor* self, size_t seq_id, object value, PgfExn *err); + int (*fn)(PgfSequenceItor* self, size_t seq_id, object value, + PgfExn *err); }; -PGF_API +typedef struct PgfMorphoCallback PgfMorphoCallback; +struct PgfMorphoCallback { + void (*fn)(PgfMorphoCallback* self, PgfText *lemma, PgfText *analysis, prob_t prob, + PgfExn* err); +}; + +PGF_API_DECL PgfPhrasetableIds *pgf_iter_sequences(PgfDB *db, PgfConcrRevision cnc_revision, - PgfSequenceItor *itor, PgfExn *err); + PgfSequenceItor *itor, + PgfMorphoCallback *callback, + PgfExn *err); PGF_API_DECL void pgf_get_lincat_counts_internal(object o, size_t *counts); diff --git a/src/runtime/c/pgf/phrasetable.cxx b/src/runtime/c/pgf/phrasetable.cxx index 71f540680..d9aca46e7 100644 --- a/src/runtime/c/pgf/phrasetable.cxx +++ b/src/runtime/c/pgf/phrasetable.cxx @@ -11,7 +11,7 @@ PgfPhrasetableIds::PgfPhrasetableIds() void PgfPhrasetableIds::start(ref concr) { next_id = 0; - n_pairs = namespace_size(concr->phrasetable); + n_pairs = phrasetable_size(concr->phrasetable); size_t mem_size = sizeof(SeqIdPair)*n_pairs; pairs = (SeqIdPair*) malloc(mem_size); if (pairs == NULL) @@ -72,6 +72,24 @@ void PgfPhrasetableIds::end() pairs = NULL; } +void PgfPhrasetableEntry::add_ref() +{ + seq->ref_count++; + if (backrefs != 0) + backrefs->ref_count++; +} + +void PgfPhrasetableEntry::release_ref() +{ + if (!(--seq->ref_count)) { + PgfSequence::release(seq); + } + + if (backrefs != 0 && !(--backrefs->ref_count)) { + PgfSequenceBackrefs::release(backrefs); + } +} + static int lparam_cmp(PgfLParam *p1, PgfLParam *p2) { @@ -228,55 +246,97 @@ int sequence_cmp(ref seq1, ref seq2) } PGF_INTERNAL -PgfPhrasetable phrasetable_internalize(PgfPhrasetable table, ref *pseq) +PgfPhrasetable phrasetable_internalize(PgfPhrasetable table, + object container, + size_t seq_index, + ref *pseq) { if (table == 0) { - PgfPhrasetable table = Node::new_node(*pseq); - Node::add_value_ref(table->value); - return table; + PgfPhrasetableEntry entry; + entry.seq = *pseq; + entry.backrefs = PgfDB::malloc(sizeof(PgfSequenceBackref)); + entry.backrefs->ref_count = 1; + entry.backrefs->list.len = 1; + entry.backrefs->list.data[0].container = container; + entry.backrefs->list.data[0].seq_index = seq_index; + entry.seq->ref_count++; + return Node::new_node(entry); } - int cmp = sequence_cmp(*pseq,table->value); + int cmp = sequence_cmp(*pseq,table->value.seq); if (cmp < 0) { - PgfPhrasetable left = phrasetable_internalize(table->left, pseq); - if (left == table->left) - return table; - else { - PgfPhrasetable node = Node::balanceL(table->value,left,table->right); - namespace_release(left); - return node; - } + PgfPhrasetable left = phrasetable_internalize(table->left, + container, + seq_index, + pseq); + PgfPhrasetable node = Node::balanceL(table->value,left,table->right); + phrasetable_release(left); + return node; } else if (cmp > 0) { - PgfPhrasetable right = phrasetable_internalize(table->right, pseq); - if (right == table->right) - return table; - else { - PgfPhrasetable node = Node::balanceR(table->value, table->left, right); - phrasetable_release(right); - return node; - } + PgfPhrasetable right = phrasetable_internalize(table->right, + container, + seq_index, + pseq); + PgfPhrasetable node = Node::balanceR(table->value, table->left, right); + phrasetable_release(right); + return node; } else { if (!(--(*pseq)->ref_count)) { PgfSequence::release(*pseq); } - Node::add_value_ref(table->value); + table->value.seq->ref_count++; + *pseq = table->value.seq; - *pseq = table->value; - return table; - } + if (table->left != 0) + table->left->ref_count++; + if (table->right != 0) + table->right->ref_count++; + + size_t len = (table->value.backrefs) + ? table->value.backrefs->list.len + : 0; + PgfPhrasetableEntry entry; + entry.seq = table->value.seq; + entry.backrefs = PgfDB::malloc((len+1)*sizeof(PgfSequenceBackref)); + entry.backrefs->ref_count = 1; + entry.backrefs->list.len = len+1; + memcpy(entry.backrefs->list.data, table->value.backrefs->list.data, len*sizeof(PgfSequenceBackref)); + entry.backrefs->list.data[len].container = container; + entry.backrefs->list.data[len].seq_index = seq_index; + entry.seq->ref_count++; + return Node::new_node(entry,table->left,table->right); + } } PGF_INTERNAL_DECL -ref phrasetable_get(PgfPhrasetable table, size_t seq_id) +ref phrasetable_relink(PgfPhrasetable table, + object container, + size_t seq_index, + size_t seq_id) { while (table != 0) { size_t left_sz = table->left->sz; if (seq_id < left_sz) table = table->left; - else if (seq_id == left_sz) - return table->value; - else { + else if (seq_id == left_sz) { + size_t len = (table->value.backrefs) + ? table->value.backrefs->list.len + : 0; + + ref backrefs = + PgfDB::malloc((len+1)*sizeof(PgfSequenceBackref)); + backrefs->ref_count = 1; + backrefs->list.len = len+1; + memcpy(backrefs->list.data, table->value.backrefs->list.data, len*sizeof(PgfSequenceBackref)); + backrefs->list.data[len].container = container; + backrefs->list.data[len].seq_index = seq_index; + if (table->value.backrefs != 0) + PgfSequenceBackrefs::release(table->value.backrefs); + table->value.backrefs = backrefs; + + return table->value.seq; + } else { table = table->right; seq_id -= left_sz+1; } @@ -284,23 +344,140 @@ ref phrasetable_get(PgfPhrasetable table, size_t seq_id) return 0; } +PgfPhrasetable phrasetable_delete(PgfPhrasetable table, + object container, + size_t seq_index, + ref seq) +{ + if (table == 0) + return 0; + + int cmp = sequence_cmp(seq,table->value.seq); + if (cmp < 0) { + PgfPhrasetable left = phrasetable_delete(table->left, + container, seq_index, + seq); + if (left == table->left) + return table; + PgfPhrasetable node = Node::balanceR(table->value,left,table->right); + phrasetable_release(left); + return node; + } else if (cmp > 0) { + PgfPhrasetable right = phrasetable_delete(table->right, + container, seq_index, + seq); + if (right == table->right) + return table; + PgfPhrasetable node = Node::balanceL(table->value,table->left,right); + phrasetable_release(right); + return node; + } else { + PgfPhrasetableEntry entry; + + size_t len = table->value.backrefs->list.len; + if (len > 1) { + entry.backrefs = + PgfDB::malloc((len-1)*sizeof(PgfSequenceBackref)); + entry.backrefs->ref_count = 1; + entry.backrefs->list.len = len-1; + size_t i = 0; + while (i < len) { + PgfSequenceBackref *backref = + vector_elem(&table->value.backrefs->list, i); + if (backref->container == container && + backref->seq_index == seq_index) { + break; + } + *vector_elem(&entry.backrefs->list, i) = *backref; + i++; + } + i++; + while (i < len) { + PgfSequenceBackref *backref = + vector_elem(&table->value.backrefs->list, i); + *vector_elem(&entry.backrefs->list, i-1) = *backref; + i++; + } + + entry.seq = table->value.seq; + table->value.seq->ref_count++; + return Node::new_node(entry, table->left, table->right); + } else { + if (table->left == 0) { + if (table->right != 0) + table->right->ref_count++; + return table->right; + } else if (table->right == 0) { + if (table->left != 0) + table->left->ref_count++; + return table->left; + } else if (table->left->sz > table->right->sz) { + PgfPhrasetable new_left = Node::pop_last(table->left, &entry); + PgfPhrasetable node = Node::balanceR(entry, new_left, table->right); + phrasetable_release(new_left); + return node; + } else { + PgfPhrasetable new_right = Node::pop_first(table->right, &entry); + PgfPhrasetable node = Node::balanceL(entry, table->left, new_right); + phrasetable_release(new_right); + return node; + } + } + } +} + PGF_INTERNAL -void phrasetable_iter(PgfPhrasetable table, PgfSequenceItor* itor, +size_t phrasetable_size(PgfPhrasetable table) +{ + return Node::size(table); +} + +PGF_INTERNAL +void phrasetable_iter(PgfConcr *concr, + PgfPhrasetable table, + PgfSequenceItor* itor, + PgfMorphoCallback *callback, PgfPhrasetableIds *seq_ids, PgfExn *err) { if (table == 0) return; - phrasetable_iter(table->left, itor, seq_ids, err); + phrasetable_iter(concr, table->left, itor, callback, seq_ids, err); if (err->type != PGF_EXN_NONE) return; - size_t seq_id = seq_ids->add(table->value); - itor->fn(itor, seq_id, table->value.as_object(), err); + size_t seq_id = seq_ids->add(table->value.seq); + int res = itor->fn(itor, seq_id, table->value.seq.as_object(), err); if (err->type != PGF_EXN_NONE) return; - phrasetable_iter(table->right, itor, seq_ids, err); + if (table->value.backrefs != 0 && res == 0 && callback != 0) { + for (size_t i = 0; i < table->value.backrefs->list.len; i++) { + PgfSequenceBackref backref = *vector_elem(&table->value.backrefs->list,i); + switch (ref::get_tag(backref.container)) { + case PgfConcrLin::tag: { + ref lin = ref::untagged(backref.container); + ref lincat = + namespace_lookup(concr->lincats, &lin->absfun->type->name); + if (lincat != 0) { + ref field = + *vector_elem(lincat->fields, backref.seq_index % lincat->fields->len); + + callback->fn(callback, &lin->absfun->name, &(*field), lincat->abscat->prob+lin->absfun->prob, err); + if (err->type != PGF_EXN_NONE) + return; + } + break; + } + case PgfConcrLincat::tag: { + //ignore + break; + } + } + } + } + + phrasetable_iter(concr, table->right, itor, callback, seq_ids, err); if (err->type != PGF_EXN_NONE) return; } @@ -308,5 +485,5 @@ void phrasetable_iter(PgfPhrasetable table, PgfSequenceItor* itor, PGF_INTERNAL void phrasetable_release(PgfPhrasetable table) { - namespace_release(table); + Node::release(table); } diff --git a/src/runtime/c/pgf/phrasetable.h b/src/runtime/c/pgf/phrasetable.h index bbfd7116b..987ba9d7f 100644 --- a/src/runtime/c/pgf/phrasetable.h +++ b/src/runtime/c/pgf/phrasetable.h @@ -1,9 +1,19 @@ #ifndef PHRASETABLE_H #define PHRASETABLE_H -class PgfSequence; +struct PgfSequence; +struct PgfSequenceBackrefs; + +struct PGF_INTERNAL_DECL PgfPhrasetableEntry { + ref seq; + ref backrefs; + + void add_ref(); + void release_ref(); +}; + class PgfSequenceItor; -typedef ref> PgfPhrasetable; +typedef ref> PgfPhrasetable; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wattributes" @@ -41,13 +51,30 @@ private: #pragma GCC diagnostic pop PGF_INTERNAL_DECL -PgfPhrasetable phrasetable_internalize(PgfPhrasetable table, ref *seq); +PgfPhrasetable phrasetable_internalize(PgfPhrasetable table, + object container, + size_t seq_index, + ref *seq); PGF_INTERNAL_DECL -ref phrasetable_get(PgfPhrasetable table, size_t seq_id); +ref phrasetable_relink(PgfPhrasetable table, + object container, + size_t seq_index, + size_t seq_id); + +PgfPhrasetable phrasetable_delete(PgfPhrasetable table, + object container, + size_t seq_index, + ref seq); PGF_INTERNAL_DECL -void phrasetable_iter(PgfPhrasetable table, PgfSequenceItor* itor, +size_t phrasetable_size(PgfPhrasetable table); + +PGF_INTERNAL_DECL +void phrasetable_iter(PgfConcr *concr, + PgfPhrasetable table, + PgfSequenceItor* itor, + PgfMorphoCallback *callback, PgfPhrasetableIds *seq_ids, PgfExn *err); PGF_INTERNAL_DECL diff --git a/src/runtime/c/pgf/reader.cxx b/src/runtime/c/pgf/reader.cxx index 0737888b5..cf939441e 100644 --- a/src/runtime/c/pgf/reader.cxx +++ b/src/runtime/c/pgf/reader.cxx @@ -160,7 +160,7 @@ Namespace PgfReader::read_namespace(ref (PgfReader::*read_value)(), size_t ref value = (this->*read_value)(); Namespace right = read_namespace(read_value, len-half-1); - return Node::new_node(value, left, right); + return Node>::new_node(value, left, right); } template @@ -576,13 +576,42 @@ ref PgfReader::read_seq() return seq; } -void PgfReader::read_seq_id(ref> r) +ref>> PgfReader::read_seq_ids(object container) { - size_t seq_id = read_len(); - ref seq = phrasetable_get(concrete->phrasetable, seq_id); - if (seq == 0) - throw pgf_error("Invalid sequence id"); - *r = seq; + size_t len = read_len(); + ref>> vec = vector_new>(len); + for (size_t i = 0; i < len; i++) { + size_t seq_id = read_len(); + ref seq = phrasetable_relink(concrete->phrasetable, + container, i, + seq_id); + if (seq == 0) + throw pgf_error("Invalid sequence id"); + *vector_elem(vec,i) = seq; + } + return vec; +} + +PgfPhrasetable PgfReader::read_phrasetable(size_t len) +{ + if (len == 0) + return 0; + + PgfPhrasetableEntry value; + + size_t half = len/2; + PgfPhrasetable left = read_phrasetable(half); + value.seq = read_seq(); + value.backrefs = 0; + PgfPhrasetable right = read_phrasetable(len-half-1); + + return Node::new_node(value, left, right); +} + +PgfPhrasetable PgfReader::read_phrasetable() +{ + size_t len = read_len(); + return read_phrasetable(len); } ref PgfReader::read_lincat() @@ -594,7 +623,7 @@ ref PgfReader::read_lincat() lincat->n_lindefs = read_len(); lincat->args = read_vector(&PgfReader::read_parg); lincat->res = read_vector(&PgfReader::read_presult2); - lincat->seqs = read_vector(&PgfReader::read_seq_id); + lincat->seqs = read_seq_ids(ref::tagged(lincat)); return lincat; } @@ -605,7 +634,7 @@ ref PgfReader::read_lin() lin->absfun = namespace_lookup(abstract->funs, &lin->name); lin->args = read_vector(&PgfReader::read_parg); lin->res = read_vector(&PgfReader::read_presult2); - lin->seqs = read_vector(&PgfReader::read_seq_id); + lin->seqs = read_seq_ids(ref::tagged(lin)); return lin; } @@ -623,7 +652,7 @@ ref PgfReader::read_concrete() concrete->ref_count = 1; concrete->ref_count_ex = 0; concrete->cflags = read_namespace(&PgfReader::read_flag); - concrete->phrasetable = read_namespace(&PgfReader::read_seq); + concrete->phrasetable = read_phrasetable(); concrete->lincats = read_namespace(&PgfReader::read_lincat); concrete->lins = read_namespace(&PgfReader::read_lin); concrete->printnames = read_namespace(&PgfReader::read_printname); diff --git a/src/runtime/c/pgf/reader.h b/src/runtime/c/pgf/reader.h index 87c678b61..fe5006eb2 100644 --- a/src/runtime/c/pgf/reader.h +++ b/src/runtime/c/pgf/reader.h @@ -74,7 +74,9 @@ public: ref read_presult(); PgfSymbol read_symbol(); ref read_seq(); - void read_seq_id(ref> r); + ref>> read_seq_ids(object container); + PgfPhrasetable read_phrasetable(size_t len); + PgfPhrasetable read_phrasetable(); ref read_lin(); ref read_printname(); diff --git a/src/runtime/c/pgf/writer.cxx b/src/runtime/c/pgf/writer.cxx index f1293bd18..ecb267c68 100644 --- a/src/runtime/c/pgf/writer.cxx +++ b/src/runtime/c/pgf/writer.cxx @@ -418,6 +418,22 @@ void PgfWriter::write_seq(ref seq) write_vector(ref>::from_ptr(&seq->syms), &PgfWriter::write_symbol); } +void PgfWriter::write_phrasetable(PgfPhrasetable table) +{ + write_len(phrasetable_size(table)); + write_phrasetable_helper(table); +} + +void PgfWriter::write_phrasetable_helper(PgfPhrasetable table) +{ + if (table == 0) + return; + + write_phrasetable_helper(table->left); + write_seq(table->value.seq); + write_phrasetable_helper(table->right); +} + void PgfWriter::write_lincat(ref lincat) { write_name(&lincat->name); @@ -448,7 +464,7 @@ void PgfWriter::write_concrete(ref concr) write_name(&concr->name); write_namespace(concr->cflags, &PgfWriter::write_flag); - write_namespace(concr->phrasetable, &PgfWriter::write_seq); + write_phrasetable(concr->phrasetable); write_namespace(concr->lincats, &PgfWriter::write_lincat); write_namespace(concr->lins, &PgfWriter::write_lin); write_namespace(concr->printnames, &PgfWriter::write_printname); diff --git a/src/runtime/c/pgf/writer.h b/src/runtime/c/pgf/writer.h index 4ee415fdd..1bf7c9370 100644 --- a/src/runtime/c/pgf/writer.h +++ b/src/runtime/c/pgf/writer.h @@ -46,6 +46,7 @@ public: void write_symbol(PgfSymbol sym); void write_seq(ref seq); void write_seq_id(ref> r) { write_len(seq_ids.get(*r)); }; + void write_phrasetable(PgfPhrasetable table); void write_lin(ref lin); void write_printname(ref printname); @@ -56,6 +57,7 @@ public: private: template void write_namespace_helper(Namespace nmsp, void (PgfWriter::*write_value)(ref)); + void write_phrasetable_helper(PgfPhrasetable table); void write_text(ref> r) { write_text(&(**r)); }; void write_lparam(ref> r) { write_lparam(*r); }; diff --git a/src/runtime/haskell/PGF2.hsc b/src/runtime/haskell/PGF2.hsc index 623167297..ad6300fc4 100644 --- a/src/runtime/haskell/PGF2.hsc +++ b/src/runtime/haskell/PGF2.hsc @@ -294,7 +294,7 @@ showPGF p = bracket (wrapSequenceItorCallback (getSequences ref)) freeHaskellFunPtr $ \fptr -> withForeignPtr (c_revision c) $ \c_revision -> do (#poke PgfSequenceItor, fn) itor fptr - withPgfExn "showPGF" (pgf_iter_sequences (a_db p) c_revision itor)) + withPgfExn "showPGF" (pgf_iter_sequences (a_db p) c_revision itor nullPtr)) doc <- readIORef ref return (seq_ids, doc) where @@ -303,6 +303,7 @@ showPGF p = def <- bracket (pgf_print_sequence_internal seq_id val) free $ \c_text -> do fmap text (peekText c_text) modifyIORef ref $ (\doc -> doc $$ def) + return 0 -- | The abstract language name is the name of the top-level -- abstract module @@ -570,20 +571,33 @@ unk _ _ = False fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])] fullFormLexicon c = unsafePerformIO $ do ref <- newIORef [] - (allocaBytes (#size PgfSequenceItor) $ \itor -> - bracket (wrapSequenceItorCallback (getSequences ref)) freeHaskellFunPtr $ \fptr -> + (allocaBytes (#size PgfSequenceItor) $ \itor1 -> + bracket (wrapSequenceItorCallback (getSequences ref)) freeHaskellFunPtr $ \fptr1 -> + allocaBytes (#size PgfMorphoCallback) $ \itor2 -> + bracket (wrapMorphoCallback (getMorphology ref)) freeHaskellFunPtr $ \fptr2 -> withForeignPtr (c_revision c) $ \c_revision -> do - (#poke PgfSequenceItor, fn) itor fptr - seq_ids <- withPgfExn "fullFormLexicon" (pgf_iter_sequences (c_db c) c_revision itor) + (#poke PgfSequenceItor, fn) itor1 fptr1 + (#poke PgfMorphoCallback, fn) itor2 fptr2 + seq_ids <- withPgfExn "fullFormLexicon" (pgf_iter_sequences (c_db c) c_revision itor1 itor2) pgf_release_phrasetable_ids seq_ids) fmap reverse (readIORef ref) where - getSequences ref itor seq_id val exn = do + getSequences ref _ seq_id val exn = do bracket (pgf_sequence_get_text_internal val) free $ \c_text -> if c_text == nullPtr - then return () - else do lemma <- peekText c_text - modifyIORef ref $ (\lexicon -> (lemma, []) : lexicon) + then return 1 + else do form <- peekText c_text + case form of + [] -> return 1 + _ -> do modifyIORef ref $ (\lexicon -> (form, []) : lexicon) + return 0 + + getMorphology ref _ c_name c_field c_prob exn = do + name <- peekText c_name + field <- peekText c_field + let prob = realToFrac c_prob + ann = (name,field,prob) + modifyIORef ref (\((form,anns) : lexicon) -> (form,ann:anns) : lexicon) -- | This data type encodes the different outcomes which you could get from the parser. diff --git a/src/runtime/haskell/PGF2/FFI.hsc b/src/runtime/haskell/PGF2/FFI.hsc index b4530456f..6c0b26844 100644 --- a/src/runtime/haskell/PGF2/FFI.hsc +++ b/src/runtime/haskell/PGF2/FFI.hsc @@ -46,6 +46,7 @@ data PgfLinBuilderIface data PgfLinearizationOutputIface data PgfGraphvizOptions data PgfSequenceItor +data PgfMorphoCallback data PgfPhrasetableIds type Wrapper a = a -> IO (FunPtr a) @@ -112,11 +113,15 @@ foreign import ccall pgf_iter_lincats :: Ptr PgfDB -> Ptr Concr -> Ptr PgfItor - foreign import ccall pgf_iter_lins :: Ptr PgfDB -> Ptr Concr -> Ptr PgfItor -> Ptr PgfExn -> IO () -type SequenceItorCallback = Ptr PgfSequenceItor -> CSize -> Ptr () -> Ptr PgfExn -> IO () +type SequenceItorCallback = Ptr PgfSequenceItor -> CSize -> Ptr () -> Ptr PgfExn -> IO CInt foreign import ccall "wrapper" wrapSequenceItorCallback :: Wrapper SequenceItorCallback -foreign import ccall pgf_iter_sequences :: Ptr PgfDB -> Ptr Concr -> Ptr PgfSequenceItor -> Ptr PgfExn -> IO (Ptr PgfPhrasetableIds) +type MorphoCallback = Ptr PgfMorphoCallback -> Ptr PgfText -> Ptr PgfText -> (#type prob_t) -> Ptr PgfExn -> IO () + +foreign import ccall "wrapper" wrapMorphoCallback :: Wrapper MorphoCallback + +foreign import ccall pgf_iter_sequences :: Ptr PgfDB -> Ptr Concr -> Ptr PgfSequenceItor -> Ptr PgfMorphoCallback -> Ptr PgfExn -> IO (Ptr PgfPhrasetableIds) foreign import ccall pgf_get_lincat_counts_internal :: Ptr () -> Ptr CSize -> IO ()