fix the handling of 'pre' in the C runtime

This commit is contained in:
kr.angelov
2013-10-28 12:35:37 +00:00
parent fad9947aab
commit 841880144d
5 changed files with 130 additions and 12 deletions

View File

@@ -255,7 +255,7 @@ gu_buf_sort(GuBuf *buf, GuOrder *order)
}
void*
gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_offset, void *key)
gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, void *key)
{
size_t i = 0;
size_t j = seq->len-1;
@@ -263,8 +263,8 @@ gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_off
while (i <= j) {
size_t k = (i+j) / 2;
uint8_t* elem_p = &seq->data[elem_size * k];
int cmp = order->compare(order, key, elem_p + field_offset);
int cmp = order->compare(order, key, elem_p);
if (cmp < 0) {
j = k-1;
} else if (cmp > 0) {
@@ -273,10 +273,36 @@ gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_off
return elem_p;
}
}
return NULL;
}
bool
gu_seq_binsearch_index_(GuSeq *seq, GuOrder *order, size_t elem_size,
void *key, size_t *pindex)
{
size_t i = 0;
size_t j = seq->len-1;
while (i <= j) {
size_t k = (i+j) / 2;
uint8_t* elem_p = &seq->data[elem_size * k];
int cmp = order->compare(order, key, elem_p);
if (cmp < 0) {
j = k-1;
} else if (cmp > 0) {
i = k+1;
} else {
*pindex = k;
return true;
}
}
*pindex = j;
return false;
}
static void
gu_heap_siftdown(GuBuf *buf, GuOrder *order,
const void *value, int startpos, int pos)

View File

@@ -114,11 +114,18 @@ gu_seq_resize_tail(GuSeq seq, ptrdiff_t change);
void
gu_buf_sort(GuBuf *buf, GuOrder *order);
#define gu_seq_binsearch(S, O, T, N, V) \
((T*) gu_seq_binsearch_(S, O, sizeof(T), offsetof(T,N), V))
#define gu_seq_binsearch(S, O, T, V) \
((T*) gu_seq_binsearch_(S, O, sizeof(T), V))
void*
gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_offset, void *key);
gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, void *key);
#define gu_seq_binsearch_index(S, O, T, V, PI) \
gu_seq_binsearch_index_(S, O, sizeof(T), V, PI)
bool
gu_seq_binsearch_index_(GuSeq *seq, GuOrder *order, size_t elem_size,
void *key, size_t *pindex);
// Using a buffer as a heap
void

View File

@@ -237,6 +237,7 @@ struct PgfConcr {
PgfCncOverloadMap* coerce_idx;
PgfCncFuns* cncfuns;
PgfSequences* sequences;
GuBuf* pre_sequences;
PgfCIdMap* cnccats;
PgfCallbacksMap* callbacks;
int total_cats;

View File

@@ -2,7 +2,7 @@
#include <pgf/linearizer.h>
#include <gu/seq.h>
#include <gu/assert.h>
#include <gu/choice.h>
#include <gu/file.h>
#include <gu/utf8.h>
#include <math.h>
@@ -1176,7 +1176,21 @@ pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state)
}
}
next:
next:;
size_t n_pres = gu_buf_length(ps->concr->pre_sequences);
for (size_t pi = 0; pi < n_pres; pi++) {
PgfSequence* seq = gu_buf_index(ps->concr->pre_sequences, PgfSequence, pi);
GuString current = ps->sentence + state->end_offset;
BIND_TYPE bind_type = state->needs_bind ? BIND_NONE : BIND_HARD;
if (pgf_symbols_cmp(&current, n, &bind_type, seq->syms) == 0) {
PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx);
entry->idx = seq->idx;
entry->bind_type = bind_type;
entry->offset = (current - ps->sentence);
}
}
j = s;
n++;
}
@@ -2426,8 +2440,8 @@ pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
PgfMorphoCallback* callback, GuExn* err)
{
PgfSequence* seq = (PgfSequence*)
gu_seq_binsearch_(concr->sequences, pgf_sequence_order,
sizeof(PgfSequence), 0, (void*) sentence);
gu_seq_binsearch(concr->sequences, pgf_sequence_order,
PgfSequence, (void*) sentence);
if (seq != NULL && seq->idx != NULL)
pgf_morpho_iter(seq->idx, callback, err);
@@ -2493,11 +2507,76 @@ pgf_fullform_get_analyses(PgfFullFormEntry* entry,
pgf_morpho_iter(entry->idx, callback, err);
}
// The 'pre' construction needs a special handling since
// it cannot be sorted alphabetically (a single pre contains
// many alternative tokens).
static GuBuf*
pgf_parser_index_pre_(GuBuf* buf, PgfSymbols* syms,
GuChoice* ch, GuPool *pool)
{
size_t n_syms = gu_seq_length(syms);
for (size_t i = 0; i < n_syms; i++) {
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, i);
GuVariantInfo inf = gu_variant_open(sym);
if (inf.tag == PGF_SYMBOL_KP) {
PgfSymbolKP* skp = inf.data;
if (buf == NULL) {
// Since most of the sequences doesn't contain 'pre'
// we create the buffer on demand. This minimizes
// the overhead.
buf = gu_new_buf(PgfSymbol, pool);
gu_buf_extend_n(buf, i);
for (size_t j = 0; j < i; j++) {
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, j);
gu_buf_set(buf, PgfSymbol, j, sym);
}
}
int idx = gu_choice_next(ch, skp->n_forms+1);
if (idx == 0) {
buf = pgf_parser_index_pre_(buf, skp->default_form, ch, pool);
} else {
buf = pgf_parser_index_pre_(buf, skp->forms[idx-1].form, ch, pool);
}
} else {
if (buf != NULL) {
gu_buf_push(buf, PgfSymbol, sym);
}
}
}
return buf;
}
static void
pgf_parser_index_pre(PgfConcr* concr, PgfSequence* seq,
GuChoice* ch, GuPool *pool)
{
do {
GuChoiceMark mark = gu_choice_mark(ch);
GuBuf* buf =
pgf_parser_index_pre_(NULL, seq->syms, ch, pool);
if (buf != NULL) {
PgfSequence* pre_seq = gu_buf_extend(concr->pre_sequences);
pre_seq->syms = gu_buf_data_seq(buf);
pre_seq->idx = seq->idx;
}
gu_choice_reset(ch, mark);
} while (gu_choice_advance(ch));
}
void
pgf_parser_index(PgfConcr* concr,
PgfCCat* ccat, PgfProduction prod,
GuPool *pool)
{
GuPool* tmp_pool = gu_local_pool();
GuChoice* choice = gu_new_choice(tmp_pool); // we need this for the pres
for (size_t lin_idx = 0; lin_idx < ccat->cnccat->n_lins; lin_idx++) {
GuVariantInfo i = gu_variant_open(prod);
switch (i.tag) {
@@ -2510,6 +2589,7 @@ pgf_parser_index(PgfConcr* concr,
PgfSequence* seq = papp->fun->lins[lin_idx];
if (seq->idx == NULL) {
seq->idx = gu_new_buf(PgfProductionIdxEntry, pool);
pgf_parser_index_pre(concr, seq, choice, pool);
}
PgfProductionIdxEntry* entry = gu_buf_extend(seq->idx);
@@ -2525,6 +2605,8 @@ pgf_parser_index(PgfConcr* concr,
gu_impossible();
}
}
gu_pool_free(tmp_pool);
}
prob_t

View File

@@ -1142,7 +1142,9 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr)
concr->sequences =
pgf_read_sequences(rdr);
gu_return_on_exn(rdr->err, NULL);
concr->pre_sequences = gu_new_buf(PgfSequence, rdr->opool);
concr->cncfuns =
pgf_read_cncfuns(rdr, abstr, concr);
gu_return_on_exn(rdr->err, NULL);