forked from GitHub/gf-core
much faster grammar loading and dynamic updates
This commit is contained in:
@@ -143,6 +143,9 @@ static inline ssize_t get_mmap_size(size_t init_size, size_t page_size)
|
|||||||
return mmap_size;
|
return mmap_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PGF_INTERNAL_DECL
|
||||||
|
size_t get_next_padovan(size_t min);
|
||||||
|
|
||||||
static inline ssize_t get_mremap_size(size_t file_size, size_t block_size, size_t free_size, size_t page_size)
|
static inline ssize_t get_mremap_size(size_t file_size, size_t block_size, size_t free_size, size_t page_size)
|
||||||
{
|
{
|
||||||
size_t n_pages =
|
size_t n_pages =
|
||||||
|
|||||||
@@ -2264,13 +2264,7 @@ public:
|
|||||||
throw pgf_error(builder_error_msg);
|
throw pgf_error(builder_error_msg);
|
||||||
|
|
||||||
ref<PgfPhrasetableEntry> entry = seq_id;
|
ref<PgfPhrasetableEntry> entry = seq_id;
|
||||||
|
phrasetable_add_backref(entry,PgfDB::get_txn_id(),container,seq_index);
|
||||||
size_t len = entry->backrefs->len;
|
|
||||||
ref<Vector<PgfSequenceBackref>> backrefs =
|
|
||||||
vector_resize<PgfSequenceBackref>(entry->backrefs, len+1, PgfDB::get_txn_id());
|
|
||||||
backrefs->data[len].container = container;
|
|
||||||
backrefs->data[len].seq_index = seq_index;
|
|
||||||
entry->backrefs = backrefs;
|
|
||||||
|
|
||||||
*vector_elem(seqs, seq_index) = entry->seq;
|
*vector_elem(seqs, seq_index) = entry->seq;
|
||||||
|
|
||||||
|
|||||||
@@ -342,48 +342,27 @@ int backref_cmp(ref<PgfSequenceBackref> backref, ref<PgfConcrLincat> lincat, siz
|
|||||||
return cmp;
|
return cmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
PGF_INTERNAL_DECL
|
||||||
ref<Vector<PgfSequenceBackref>> phrasetable_update_backrefs(PgfPhrasetable table,
|
size_t get_next_padovan(size_t min);
|
||||||
ref<PgfConcrLincat> lincat,
|
|
||||||
|
PGF_INTERNAL_DECL
|
||||||
|
void phrasetable_add_backref(ref<PgfPhrasetableEntry> entry, txn_t txn_id,
|
||||||
object container,
|
object container,
|
||||||
size_t seq_index)
|
size_t seq_index)
|
||||||
{
|
{
|
||||||
size_t len = (table->value.backrefs != 0)
|
ref<Vector<PgfSequenceBackref>> backrefs = entry->backrefs;
|
||||||
? table->value.backrefs->len
|
|
||||||
: 0;
|
|
||||||
|
|
||||||
ref<Vector<PgfSequenceBackref>> backrefs =
|
size_t len = (backrefs != 0) ? backrefs->len : 0;
|
||||||
vector_resize<PgfSequenceBackref>(table->value.backrefs, len+1, table->txn_id);
|
if (entry->n_backrefs >= len) {
|
||||||
ssize_t i = 0;
|
size_t new_len = get_next_padovan(entry->n_backrefs+1);
|
||||||
ssize_t j = len-1;
|
backrefs = PgfDB::realloc<Vector<PgfSequenceBackref>>(backrefs,len*sizeof(PgfSequenceBackref),new_len*sizeof(PgfSequenceBackref),txn_id);
|
||||||
if (table->value.seq->syms.len == 0 && len > 0) {
|
backrefs->len = new_len;
|
||||||
// The backrefs for the epsilon sequence are sorted by lincat and r
|
}
|
||||||
|
backrefs->data[entry->n_backrefs].container = container;
|
||||||
|
backrefs->data[entry->n_backrefs].seq_index = seq_index;
|
||||||
|
|
||||||
size_t r = (lincat!=0) ? (seq_index % lincat->fields->len) : 0;
|
entry->n_backrefs++;
|
||||||
while (i <= j) {
|
entry->backrefs = backrefs;
|
||||||
ssize_t k = (i + j) / 2;
|
|
||||||
ref<PgfSequenceBackref> backref = vector_elem(backrefs, k);
|
|
||||||
|
|
||||||
int cmp = backref_cmp(backref, lincat, r);
|
|
||||||
if (cmp < 0) {
|
|
||||||
while (j >= k) {
|
|
||||||
backrefs->data[j+1] = backrefs->data[j];
|
|
||||||
j--;
|
|
||||||
}
|
|
||||||
} else if (cmp > 0) {
|
|
||||||
i = k+1;
|
|
||||||
} else {
|
|
||||||
while (j > k) {
|
|
||||||
backrefs->data[j+1] = backrefs->data[j];
|
|
||||||
j--;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
backrefs->data[j+1].container = container;
|
|
||||||
backrefs->data[j+1].seq_index = seq_index;
|
|
||||||
return backrefs;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PGF_INTERNAL
|
PGF_INTERNAL
|
||||||
@@ -397,6 +376,7 @@ PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
|||||||
if (table == 0) {
|
if (table == 0) {
|
||||||
PgfPhrasetableEntry entry;
|
PgfPhrasetableEntry entry;
|
||||||
entry.seq = seq;
|
entry.seq = seq;
|
||||||
|
entry.n_backrefs = 1;
|
||||||
entry.backrefs = vector_new<PgfSequenceBackref>(1);
|
entry.backrefs = vector_new<PgfSequenceBackref>(1);
|
||||||
entry.backrefs->data[0].container = container;
|
entry.backrefs->data[0].container = container;
|
||||||
entry.backrefs->data[0].seq_index = seq_index;
|
entry.backrefs->data[0].seq_index = seq_index;
|
||||||
@@ -427,24 +407,16 @@ PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
|||||||
} else {
|
} else {
|
||||||
PgfSequence::release(seq);
|
PgfSequence::release(seq);
|
||||||
|
|
||||||
size_t len = (table->value.backrefs)
|
|
||||||
? table->value.backrefs->len
|
|
||||||
: 0;
|
|
||||||
|
|
||||||
ref<Vector<PgfSequenceBackref>> backrefs =
|
|
||||||
phrasetable_update_backrefs(table,lincat,container,seq_index);
|
|
||||||
|
|
||||||
PgfPhrasetable new_table =
|
PgfPhrasetable new_table =
|
||||||
Node<PgfPhrasetableEntry>::upd_node(table, table->left, table->right);
|
Node<PgfPhrasetableEntry>::upd_node(table, table->left, table->right);
|
||||||
new_table->value.backrefs = backrefs;
|
|
||||||
*pentry = ref<PgfPhrasetableEntry>::from_ptr(&new_table->value);
|
*pentry = ref<PgfPhrasetableEntry>::from_ptr(&new_table->value);
|
||||||
|
phrasetable_add_backref(*pentry,table->txn_id,container,seq_index);
|
||||||
return new_table;
|
return new_table;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PGF_INTERNAL
|
PGF_INTERNAL
|
||||||
ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
|
ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
|
||||||
ref<PgfConcrLincat> lincat,
|
|
||||||
object container,
|
object container,
|
||||||
size_t seq_index,
|
size_t seq_index,
|
||||||
size_t seq_id)
|
size_t seq_id)
|
||||||
@@ -454,14 +426,8 @@ ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
|
|||||||
if (seq_id < left_sz)
|
if (seq_id < left_sz)
|
||||||
table = table->left;
|
table = table->left;
|
||||||
else if (seq_id == left_sz) {
|
else if (seq_id == left_sz) {
|
||||||
size_t len = (table->value.backrefs == 0)
|
auto entry = ref<PgfPhrasetableEntry>::from_ptr(&table->value);
|
||||||
? 0
|
phrasetable_add_backref(entry,table->txn_id,container,seq_index);
|
||||||
: table->value.backrefs->len;
|
|
||||||
|
|
||||||
ref<Vector<PgfSequenceBackref>> backrefs =
|
|
||||||
phrasetable_update_backrefs(table,lincat,container,seq_index);
|
|
||||||
table->value.backrefs = backrefs;
|
|
||||||
|
|
||||||
return table->value.seq;
|
return table->value.seq;
|
||||||
} else {
|
} else {
|
||||||
table = table->right;
|
table = table->right;
|
||||||
@@ -471,6 +437,7 @@ ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PGF_INTERNAL
|
||||||
PgfPhrasetable phrasetable_delete(PgfPhrasetable table,
|
PgfPhrasetable phrasetable_delete(PgfPhrasetable table,
|
||||||
object container,
|
object container,
|
||||||
size_t seq_index,
|
size_t seq_index,
|
||||||
@@ -494,11 +461,12 @@ PgfPhrasetable phrasetable_delete(PgfPhrasetable table,
|
|||||||
return Node<PgfPhrasetableEntry>::balanceL(table);
|
return Node<PgfPhrasetableEntry>::balanceL(table);
|
||||||
} else {
|
} else {
|
||||||
size_t len = table->value.backrefs->len;
|
size_t len = table->value.backrefs->len;
|
||||||
if (len > 1) {
|
size_t n_backrefs = table->value.n_backrefs;
|
||||||
|
if (n_backrefs > 1) {
|
||||||
ref<Vector<PgfSequenceBackref>> backrefs =
|
ref<Vector<PgfSequenceBackref>> backrefs =
|
||||||
vector_resize<PgfSequenceBackref>(table->value.backrefs, len-1, table->txn_id);
|
PgfDB::realloc<Vector<PgfSequenceBackref>>(table->value.backrefs,len*sizeof(PgfSequenceBackref),n_backrefs*sizeof(PgfSequenceBackref),table->txn_id);
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
while (i < len-1) {
|
while (i < n_backrefs) {
|
||||||
ref<PgfSequenceBackref> backref =
|
ref<PgfSequenceBackref> backref =
|
||||||
vector_elem(backrefs, i);
|
vector_elem(backrefs, i);
|
||||||
if (backref->container == container &&
|
if (backref->container == container &&
|
||||||
@@ -508,13 +476,15 @@ PgfPhrasetable phrasetable_delete(PgfPhrasetable table,
|
|||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
i++;
|
i++;
|
||||||
while (i < len) {
|
while (i < n_backrefs) {
|
||||||
*vector_elem(backrefs, i-1) = *vector_elem(table->value.backrefs, i);
|
*vector_elem(backrefs, i-1) = *vector_elem(table->value.backrefs, i);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
n_backrefs--;
|
||||||
|
|
||||||
PgfPhrasetable new_table =
|
PgfPhrasetable new_table =
|
||||||
Node<PgfPhrasetableEntry>::upd_node(table, table->left, table->right);
|
Node<PgfPhrasetableEntry>::upd_node(table, table->left, table->right);
|
||||||
|
new_table->value.n_backrefs = n_backrefs;
|
||||||
new_table->value.backrefs = backrefs;
|
new_table->value.backrefs = backrefs;
|
||||||
return new_table;
|
return new_table;
|
||||||
} else {
|
} else {
|
||||||
@@ -570,8 +540,7 @@ void phrasetable_lookup(PgfPhrasetable table,
|
|||||||
phrasetable_lookup(table->right,sentence,case_sensitive,scanner,err);
|
phrasetable_lookup(table->right,sentence,case_sensitive,scanner,err);
|
||||||
} else {
|
} else {
|
||||||
auto backrefs = table->value.backrefs;
|
auto backrefs = table->value.backrefs;
|
||||||
if (backrefs != 0) {
|
for (size_t i = 0; i < table->value.n_backrefs; i++) {
|
||||||
for (size_t i = 0; i < backrefs->len; i++) {
|
|
||||||
PgfSequenceBackref backref = *vector_elem<PgfSequenceBackref>(backrefs,i);
|
PgfSequenceBackref backref = *vector_elem<PgfSequenceBackref>(backrefs,i);
|
||||||
switch (ref<PgfConcrLin>::get_tag(backref.container)) {
|
switch (ref<PgfConcrLin>::get_tag(backref.container)) {
|
||||||
case PgfConcrLin::tag: {
|
case PgfConcrLin::tag: {
|
||||||
@@ -589,7 +558,6 @@ void phrasetable_lookup(PgfPhrasetable table,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (!case_sensitive) {
|
if (!case_sensitive) {
|
||||||
phrasetable_lookup(table->left,sentence,false,scanner,err);
|
phrasetable_lookup(table->left,sentence,false,scanner,err);
|
||||||
@@ -714,7 +682,7 @@ void phrasetable_lookup_prefixes(PgfCohortsState *state,
|
|||||||
}
|
}
|
||||||
state->queue.push(current);
|
state->queue.push(current);
|
||||||
|
|
||||||
for (size_t i = 0; i < backrefs->len; i++) {
|
for (size_t i = 0; i < table->value.n_backrefs; i++) {
|
||||||
PgfSequenceBackref backref = *vector_elem<PgfSequenceBackref>(backrefs,i);
|
PgfSequenceBackref backref = *vector_elem<PgfSequenceBackref>(backrefs,i);
|
||||||
switch (ref<PgfConcrLin>::get_tag(backref.container)) {
|
switch (ref<PgfConcrLin>::get_tag(backref.container)) {
|
||||||
case PgfConcrLin::tag: {
|
case PgfConcrLin::tag: {
|
||||||
@@ -849,7 +817,7 @@ void phrasetable_iter(PgfConcr *concr,
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
if (table->value.backrefs != 0 && res == 0 && callback != 0) {
|
if (table->value.backrefs != 0 && res == 0 && callback != 0) {
|
||||||
for (size_t i = 0; i < table->value.backrefs->len; i++) {
|
for (size_t i = 0; i < table->value.n_backrefs; i++) {
|
||||||
PgfSequenceBackref backref = *vector_elem<PgfSequenceBackref>(table->value.backrefs,i);
|
PgfSequenceBackref backref = *vector_elem<PgfSequenceBackref>(table->value.backrefs,i);
|
||||||
switch (ref<PgfConcrLin>::get_tag(backref.container)) {
|
switch (ref<PgfConcrLin>::get_tag(backref.container)) {
|
||||||
case PgfConcrLin::tag: {
|
case PgfConcrLin::tag: {
|
||||||
|
|||||||
@@ -6,6 +6,11 @@ struct PgfSequenceBackref;
|
|||||||
|
|
||||||
struct PGF_INTERNAL_DECL PgfPhrasetableEntry {
|
struct PGF_INTERNAL_DECL PgfPhrasetableEntry {
|
||||||
ref<PgfSequence> seq;
|
ref<PgfSequence> seq;
|
||||||
|
|
||||||
|
// Here n_backrefs tells us how many actual backrefs there are in
|
||||||
|
// the vector backrefs. On the other hand, backrefs->len tells us
|
||||||
|
// how big buffer we have allocated.
|
||||||
|
size_t n_backrefs;
|
||||||
ref<Vector<PgfSequenceBackref>> backrefs;
|
ref<Vector<PgfSequenceBackref>> backrefs;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -64,7 +69,6 @@ PgfPhrasetable phrasetable_internalize(PgfPhrasetable table,
|
|||||||
|
|
||||||
PGF_INTERNAL_DECL
|
PGF_INTERNAL_DECL
|
||||||
ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
|
ref<PgfSequence> phrasetable_relink(PgfPhrasetable table,
|
||||||
ref<PgfConcrLincat> lincat,
|
|
||||||
object container,
|
object container,
|
||||||
size_t seq_index,
|
size_t seq_index,
|
||||||
size_t seq_id);
|
size_t seq_id);
|
||||||
@@ -124,4 +128,11 @@ int text_sequence_cmp(PgfTextSpot *spot, const uint8_t *end,
|
|||||||
ref<PgfSequence> seq, size_t *p_i,
|
ref<PgfSequence> seq, size_t *p_i,
|
||||||
bool case_sensitive, SeqMatch sm);
|
bool case_sensitive, SeqMatch sm);
|
||||||
|
|
||||||
|
// The following is used internally in the grammar builder
|
||||||
|
|
||||||
|
PGF_INTERNAL_DECL
|
||||||
|
void phrasetable_add_backref(ref<PgfPhrasetableEntry> entry, txn_t txn_id,
|
||||||
|
object container,
|
||||||
|
size_t seq_index);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -632,14 +632,14 @@ ref<PgfSequence> PgfReader::read_seq()
|
|||||||
return seq;
|
return seq;
|
||||||
}
|
}
|
||||||
|
|
||||||
ref<Vector<ref<PgfSequence>>> PgfReader::read_seq_ids(ref<PgfConcrLincat> lincat, object container)
|
ref<Vector<ref<PgfSequence>>> PgfReader::read_seq_ids(object container)
|
||||||
{
|
{
|
||||||
size_t len = read_len();
|
size_t len = read_len();
|
||||||
ref<Vector<ref<PgfSequence>>> vec = vector_new<ref<PgfSequence>>(len);
|
ref<Vector<ref<PgfSequence>>> vec = vector_new<ref<PgfSequence>>(len);
|
||||||
for (size_t i = 0; i < len; i++) {
|
for (size_t i = 0; i < len; i++) {
|
||||||
size_t seq_id = read_len();
|
size_t seq_id = read_len();
|
||||||
ref<PgfSequence> seq = phrasetable_relink(concrete->phrasetable,
|
ref<PgfSequence> seq = phrasetable_relink(concrete->phrasetable,
|
||||||
lincat, container, i,
|
container, i,
|
||||||
seq_id);
|
seq_id);
|
||||||
if (seq == 0) {
|
if (seq == 0) {
|
||||||
throw pgf_error("Invalid sequence id");
|
throw pgf_error("Invalid sequence id");
|
||||||
@@ -659,6 +659,7 @@ PgfPhrasetable PgfReader::read_phrasetable(size_t len)
|
|||||||
size_t half = len/2;
|
size_t half = len/2;
|
||||||
PgfPhrasetable left = read_phrasetable(half);
|
PgfPhrasetable left = read_phrasetable(half);
|
||||||
value.seq = read_seq();
|
value.seq = read_seq();
|
||||||
|
value.n_backrefs = 0;
|
||||||
value.backrefs = 0;
|
value.backrefs = 0;
|
||||||
PgfPhrasetable right = read_phrasetable(len-half-1);
|
PgfPhrasetable right = read_phrasetable(len-half-1);
|
||||||
|
|
||||||
@@ -683,7 +684,7 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
|
|||||||
auto n_lindefs = read_len();
|
auto n_lindefs = read_len();
|
||||||
auto args = read_vector(&PgfReader::read_parg);
|
auto args = read_vector(&PgfReader::read_parg);
|
||||||
auto res = read_vector(&PgfReader::read_presult2);
|
auto res = read_vector(&PgfReader::read_presult2);
|
||||||
auto seqs = read_seq_ids(0, lincat.tagged());
|
auto seqs = read_seq_ids(lincat.tagged());
|
||||||
|
|
||||||
lincat->abscat = namespace_lookup(abstract->cats, &lincat->name);
|
lincat->abscat = namespace_lookup(abstract->cats, &lincat->name);
|
||||||
lincat->fields = fields;
|
lincat->fields = fields;
|
||||||
@@ -718,7 +719,7 @@ ref<PgfConcrLin> PgfReader::read_lin()
|
|||||||
|
|
||||||
auto args = read_vector(&PgfReader::read_parg);
|
auto args = read_vector(&PgfReader::read_parg);
|
||||||
auto res = read_vector(&PgfReader::read_presult2);
|
auto res = read_vector(&PgfReader::read_presult2);
|
||||||
auto seqs = read_seq_ids(lin->lincat, lin.tagged());
|
auto seqs = read_seq_ids(lin.tagged());
|
||||||
|
|
||||||
lin->args = args;
|
lin->args = args;
|
||||||
lin->res = res;
|
lin->res = res;
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ public:
|
|||||||
ref<PgfPResult> read_presult();
|
ref<PgfPResult> read_presult();
|
||||||
PgfSymbol read_symbol();
|
PgfSymbol read_symbol();
|
||||||
ref<PgfSequence> read_seq();
|
ref<PgfSequence> read_seq();
|
||||||
ref<Vector<ref<PgfSequence>>> read_seq_ids(ref<PgfConcrLincat> lincat, object container);
|
ref<Vector<ref<PgfSequence>>> read_seq_ids(object container);
|
||||||
PgfPhrasetable read_phrasetable(size_t len);
|
PgfPhrasetable read_phrasetable(size_t len);
|
||||||
PgfPhrasetable read_phrasetable();
|
PgfPhrasetable read_phrasetable();
|
||||||
ref<PgfConcrLin> read_lin();
|
ref<PgfConcrLin> read_lin();
|
||||||
|
|||||||
@@ -28,25 +28,6 @@ ref<C> vector_new(Vector<A> C::* field, size_t len)
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
PGF_INTERNAL_DECL size_t
|
|
||||||
get_next_padovan(size_t min);
|
|
||||||
|
|
||||||
/* Resize a vector by changing its length. If there is no enough space
|
|
||||||
* the implementation will create a copy, but whenever possible it will
|
|
||||||
* return the reference to the original vector. A copy is created also
|
|
||||||
* if txn_id is different from the current transaction. In this way
|
|
||||||
* it is safe to change the length. */
|
|
||||||
template <class A> inline PGF_INTERNAL
|
|
||||||
ref<Vector<A>> vector_resize(ref<Vector<A>> vec, size_t len, txn_t txn_id)
|
|
||||||
{
|
|
||||||
size_t new_len = get_next_padovan(len);
|
|
||||||
size_t old_len = get_next_padovan(vec->len);
|
|
||||||
|
|
||||||
vec = PgfDB::realloc<Vector<A>>(vec,old_len*sizeof(A),new_len*sizeof(A),txn_id);
|
|
||||||
vec->len = len;
|
|
||||||
return vec;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class A> inline PGF_INTERNAL
|
template <class A> inline PGF_INTERNAL
|
||||||
ref<A> vector_elem(ref<Vector<A>> v, size_t index)
|
ref<A> vector_elem(ref<Vector<A>> v, size_t index)
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user