mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
fix the handling of 'pre' in the C runtime
This commit is contained in:
@@ -255,7 +255,7 @@ gu_buf_sort(GuBuf *buf, GuOrder *order)
|
||||
}
|
||||
|
||||
void*
|
||||
gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_offset, void *key)
|
||||
gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, void *key)
|
||||
{
|
||||
size_t i = 0;
|
||||
size_t j = seq->len-1;
|
||||
@@ -263,8 +263,8 @@ gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_off
|
||||
while (i <= j) {
|
||||
size_t k = (i+j) / 2;
|
||||
uint8_t* elem_p = &seq->data[elem_size * k];
|
||||
int cmp = order->compare(order, key, elem_p + field_offset);
|
||||
|
||||
int cmp = order->compare(order, key, elem_p);
|
||||
|
||||
if (cmp < 0) {
|
||||
j = k-1;
|
||||
} else if (cmp > 0) {
|
||||
@@ -273,10 +273,36 @@ gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_off
|
||||
return elem_p;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool
|
||||
gu_seq_binsearch_index_(GuSeq *seq, GuOrder *order, size_t elem_size,
|
||||
void *key, size_t *pindex)
|
||||
{
|
||||
size_t i = 0;
|
||||
size_t j = seq->len-1;
|
||||
|
||||
while (i <= j) {
|
||||
size_t k = (i+j) / 2;
|
||||
uint8_t* elem_p = &seq->data[elem_size * k];
|
||||
int cmp = order->compare(order, key, elem_p);
|
||||
|
||||
if (cmp < 0) {
|
||||
j = k-1;
|
||||
} else if (cmp > 0) {
|
||||
i = k+1;
|
||||
} else {
|
||||
*pindex = k;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
*pindex = j;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
gu_heap_siftdown(GuBuf *buf, GuOrder *order,
|
||||
const void *value, int startpos, int pos)
|
||||
|
||||
@@ -114,11 +114,18 @@ gu_seq_resize_tail(GuSeq seq, ptrdiff_t change);
|
||||
void
|
||||
gu_buf_sort(GuBuf *buf, GuOrder *order);
|
||||
|
||||
#define gu_seq_binsearch(S, O, T, N, V) \
|
||||
((T*) gu_seq_binsearch_(S, O, sizeof(T), offsetof(T,N), V))
|
||||
#define gu_seq_binsearch(S, O, T, V) \
|
||||
((T*) gu_seq_binsearch_(S, O, sizeof(T), V))
|
||||
|
||||
void*
|
||||
gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_offset, void *key);
|
||||
gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, void *key);
|
||||
|
||||
#define gu_seq_binsearch_index(S, O, T, V, PI) \
|
||||
gu_seq_binsearch_index_(S, O, sizeof(T), V, PI)
|
||||
|
||||
bool
|
||||
gu_seq_binsearch_index_(GuSeq *seq, GuOrder *order, size_t elem_size,
|
||||
void *key, size_t *pindex);
|
||||
|
||||
// Using a buffer as a heap
|
||||
void
|
||||
|
||||
@@ -237,6 +237,7 @@ struct PgfConcr {
|
||||
PgfCncOverloadMap* coerce_idx;
|
||||
PgfCncFuns* cncfuns;
|
||||
PgfSequences* sequences;
|
||||
GuBuf* pre_sequences;
|
||||
PgfCIdMap* cnccats;
|
||||
PgfCallbacksMap* callbacks;
|
||||
int total_cats;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#include <pgf/linearizer.h>
|
||||
#include <gu/seq.h>
|
||||
#include <gu/assert.h>
|
||||
|
||||
#include <gu/choice.h>
|
||||
#include <gu/file.h>
|
||||
#include <gu/utf8.h>
|
||||
#include <math.h>
|
||||
@@ -1176,7 +1176,21 @@ pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state)
|
||||
}
|
||||
}
|
||||
|
||||
next:
|
||||
next:;
|
||||
size_t n_pres = gu_buf_length(ps->concr->pre_sequences);
|
||||
for (size_t pi = 0; pi < n_pres; pi++) {
|
||||
PgfSequence* seq = gu_buf_index(ps->concr->pre_sequences, PgfSequence, pi);
|
||||
|
||||
GuString current = ps->sentence + state->end_offset;
|
||||
BIND_TYPE bind_type = state->needs_bind ? BIND_NONE : BIND_HARD;
|
||||
if (pgf_symbols_cmp(¤t, n, &bind_type, seq->syms) == 0) {
|
||||
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
|
||||
entry->idx = seq->idx;
|
||||
entry->bind_type = bind_type;
|
||||
entry->offset = (current - ps->sentence);
|
||||
}
|
||||
}
|
||||
|
||||
j = s;
|
||||
n++;
|
||||
}
|
||||
@@ -2426,8 +2440,8 @@ pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
|
||||
PgfMorphoCallback* callback, GuExn* err)
|
||||
{
|
||||
PgfSequence* seq = (PgfSequence*)
|
||||
gu_seq_binsearch_(concr->sequences, pgf_sequence_order,
|
||||
sizeof(PgfSequence), 0, (void*) sentence);
|
||||
gu_seq_binsearch(concr->sequences, pgf_sequence_order,
|
||||
PgfSequence, (void*) sentence);
|
||||
|
||||
if (seq != NULL && seq->idx != NULL)
|
||||
pgf_morpho_iter(seq->idx, callback, err);
|
||||
@@ -2493,11 +2507,76 @@ pgf_fullform_get_analyses(PgfFullFormEntry* entry,
|
||||
pgf_morpho_iter(entry->idx, callback, err);
|
||||
}
|
||||
|
||||
// The 'pre' construction needs a special handling since
|
||||
// it cannot be sorted alphabetically (a single pre contains
|
||||
// many alternative tokens).
|
||||
static GuBuf*
|
||||
pgf_parser_index_pre_(GuBuf* buf, PgfSymbols* syms,
|
||||
GuChoice* ch, GuPool *pool)
|
||||
{
|
||||
size_t n_syms = gu_seq_length(syms);
|
||||
for (size_t i = 0; i < n_syms; i++) {
|
||||
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, i);
|
||||
GuVariantInfo inf = gu_variant_open(sym);
|
||||
if (inf.tag == PGF_SYMBOL_KP) {
|
||||
PgfSymbolKP* skp = inf.data;
|
||||
|
||||
if (buf == NULL) {
|
||||
// Since most of the sequences doesn't contain 'pre'
|
||||
// we create the buffer on demand. This minimizes
|
||||
// the overhead.
|
||||
buf = gu_new_buf(PgfSymbol, pool);
|
||||
gu_buf_extend_n(buf, i);
|
||||
for (size_t j = 0; j < i; j++) {
|
||||
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, j);
|
||||
gu_buf_set(buf, PgfSymbol, j, sym);
|
||||
}
|
||||
}
|
||||
|
||||
int idx = gu_choice_next(ch, skp->n_forms+1);
|
||||
if (idx == 0) {
|
||||
buf = pgf_parser_index_pre_(buf, skp->default_form, ch, pool);
|
||||
} else {
|
||||
buf = pgf_parser_index_pre_(buf, skp->forms[idx-1].form, ch, pool);
|
||||
}
|
||||
} else {
|
||||
if (buf != NULL) {
|
||||
gu_buf_push(buf, PgfSymbol, sym);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_parser_index_pre(PgfConcr* concr, PgfSequence* seq,
|
||||
GuChoice* ch, GuPool *pool)
|
||||
{
|
||||
do {
|
||||
GuChoiceMark mark = gu_choice_mark(ch);
|
||||
|
||||
GuBuf* buf =
|
||||
pgf_parser_index_pre_(NULL, seq->syms, ch, pool);
|
||||
|
||||
if (buf != NULL) {
|
||||
PgfSequence* pre_seq = gu_buf_extend(concr->pre_sequences);
|
||||
pre_seq->syms = gu_buf_data_seq(buf);
|
||||
pre_seq->idx = seq->idx;
|
||||
}
|
||||
|
||||
gu_choice_reset(ch, mark);
|
||||
} while (gu_choice_advance(ch));
|
||||
}
|
||||
|
||||
void
|
||||
pgf_parser_index(PgfConcr* concr,
|
||||
PgfCCat* ccat, PgfProduction prod,
|
||||
GuPool *pool)
|
||||
{
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
GuChoice* choice = gu_new_choice(tmp_pool); // we need this for the pres
|
||||
|
||||
for (size_t lin_idx = 0; lin_idx < ccat->cnccat->n_lins; lin_idx++) {
|
||||
GuVariantInfo i = gu_variant_open(prod);
|
||||
switch (i.tag) {
|
||||
@@ -2510,6 +2589,7 @@ pgf_parser_index(PgfConcr* concr,
|
||||
PgfSequence* seq = papp->fun->lins[lin_idx];
|
||||
if (seq->idx == NULL) {
|
||||
seq->idx = gu_new_buf(PgfProductionIdxEntry, pool);
|
||||
pgf_parser_index_pre(concr, seq, choice, pool);
|
||||
}
|
||||
|
||||
PgfProductionIdxEntry* entry = gu_buf_extend(seq->idx);
|
||||
@@ -2525,6 +2605,8 @@ pgf_parser_index(PgfConcr* concr,
|
||||
gu_impossible();
|
||||
}
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
}
|
||||
|
||||
prob_t
|
||||
|
||||
@@ -1142,7 +1142,9 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr)
|
||||
concr->sequences =
|
||||
pgf_read_sequences(rdr);
|
||||
gu_return_on_exn(rdr->err, NULL);
|
||||
|
||||
|
||||
concr->pre_sequences = gu_new_buf(PgfSequence, rdr->opool);
|
||||
|
||||
concr->cncfuns =
|
||||
pgf_read_cncfuns(rdr, abstr, concr);
|
||||
gu_return_on_exn(rdr->err, NULL);
|
||||
|
||||
Reference in New Issue
Block a user