From 841880144db75a25cbeb2258bdd9784cc2f3540d Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Mon, 28 Oct 2013 12:35:37 +0000 Subject: [PATCH] fix the handling of 'pre' in the C runtime --- src/runtime/c/gu/seq.c | 34 ++++++++++++-- src/runtime/c/gu/seq.h | 13 ++++-- src/runtime/c/pgf/data.h | 1 + src/runtime/c/pgf/parser.c | 90 ++++++++++++++++++++++++++++++++++++-- src/runtime/c/pgf/reader.c | 4 +- 5 files changed, 130 insertions(+), 12 deletions(-) diff --git a/src/runtime/c/gu/seq.c b/src/runtime/c/gu/seq.c index b798d2923..8ec6480cc 100644 --- a/src/runtime/c/gu/seq.c +++ b/src/runtime/c/gu/seq.c @@ -255,7 +255,7 @@ gu_buf_sort(GuBuf *buf, GuOrder *order) } void* -gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_offset, void *key) +gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, void *key) { size_t i = 0; size_t j = seq->len-1; @@ -263,8 +263,8 @@ gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_off while (i <= j) { size_t k = (i+j) / 2; uint8_t* elem_p = &seq->data[elem_size * k]; - int cmp = order->compare(order, key, elem_p + field_offset); - + int cmp = order->compare(order, key, elem_p); + if (cmp < 0) { j = k-1; } else if (cmp > 0) { @@ -273,10 +273,36 @@ gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_off return elem_p; } } - + return NULL; } +bool +gu_seq_binsearch_index_(GuSeq *seq, GuOrder *order, size_t elem_size, + void *key, size_t *pindex) +{ + size_t i = 0; + size_t j = seq->len-1; + + while (i <= j) { + size_t k = (i+j) / 2; + uint8_t* elem_p = &seq->data[elem_size * k]; + int cmp = order->compare(order, key, elem_p); + + if (cmp < 0) { + j = k-1; + } else if (cmp > 0) { + i = k+1; + } else { + *pindex = k; + return true; + } + } + + *pindex = j; + return false; +} + static void gu_heap_siftdown(GuBuf *buf, GuOrder *order, const void *value, int startpos, int pos) diff --git a/src/runtime/c/gu/seq.h b/src/runtime/c/gu/seq.h index cb18a90b5..2b0020bab 100644 --- a/src/runtime/c/gu/seq.h +++ b/src/runtime/c/gu/seq.h @@ -114,11 +114,18 @@ gu_seq_resize_tail(GuSeq seq, ptrdiff_t change); void gu_buf_sort(GuBuf *buf, GuOrder *order); -#define gu_seq_binsearch(S, O, T, N, V) \ - ((T*) gu_seq_binsearch_(S, O, sizeof(T), offsetof(T,N), V)) +#define gu_seq_binsearch(S, O, T, V) \ + ((T*) gu_seq_binsearch_(S, O, sizeof(T), V)) void* -gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, size_t field_offset, void *key); +gu_seq_binsearch_(GuSeq *seq, GuOrder *order, size_t elem_size, void *key); + +#define gu_seq_binsearch_index(S, O, T, V, PI) \ + gu_seq_binsearch_index_(S, O, sizeof(T), V, PI) + +bool +gu_seq_binsearch_index_(GuSeq *seq, GuOrder *order, size_t elem_size, + void *key, size_t *pindex); // Using a buffer as a heap void diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index 163d45d95..ea932d111 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -237,6 +237,7 @@ struct PgfConcr { PgfCncOverloadMap* coerce_idx; PgfCncFuns* cncfuns; PgfSequences* sequences; + GuBuf* pre_sequences; PgfCIdMap* cnccats; PgfCallbacksMap* callbacks; int total_cats; diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 205655cdc..b857ae7d0 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -2,7 +2,7 @@ #include #include #include - +#include #include #include #include @@ -1176,7 +1176,21 @@ pgf_parsing_lookahead(PgfParsing *ps, PgfParseState* state) } } -next: +next:; + size_t n_pres = gu_buf_length(ps->concr->pre_sequences); + for (size_t pi = 0; pi < n_pres; pi++) { + PgfSequence* seq = gu_buf_index(ps->concr->pre_sequences, PgfSequence, pi); + + GuString current = ps->sentence + state->end_offset; + BIND_TYPE bind_type = state->needs_bind ? BIND_NONE : BIND_HARD; + if (pgf_symbols_cmp(¤t, n, &bind_type, seq->syms) == 0) { + PgfLexiconIdxEntry* entry = gu_buf_extend(state->lexicon_idx); + entry->idx = seq->idx; + entry->bind_type = bind_type; + entry->offset = (current - ps->sentence); + } + } + j = s; n++; } @@ -2426,8 +2440,8 @@ pgf_lookup_morpho(PgfConcr *concr, GuString sentence, PgfMorphoCallback* callback, GuExn* err) { PgfSequence* seq = (PgfSequence*) - gu_seq_binsearch_(concr->sequences, pgf_sequence_order, - sizeof(PgfSequence), 0, (void*) sentence); + gu_seq_binsearch(concr->sequences, pgf_sequence_order, + PgfSequence, (void*) sentence); if (seq != NULL && seq->idx != NULL) pgf_morpho_iter(seq->idx, callback, err); @@ -2493,11 +2507,76 @@ pgf_fullform_get_analyses(PgfFullFormEntry* entry, pgf_morpho_iter(entry->idx, callback, err); } +// The 'pre' construction needs a special handling since +// it cannot be sorted alphabetically (a single pre contains +// many alternative tokens). +static GuBuf* +pgf_parser_index_pre_(GuBuf* buf, PgfSymbols* syms, + GuChoice* ch, GuPool *pool) +{ + size_t n_syms = gu_seq_length(syms); + for (size_t i = 0; i < n_syms; i++) { + PgfSymbol sym = gu_seq_get(syms, PgfSymbol, i); + GuVariantInfo inf = gu_variant_open(sym); + if (inf.tag == PGF_SYMBOL_KP) { + PgfSymbolKP* skp = inf.data; + + if (buf == NULL) { + // Since most of the sequences doesn't contain 'pre' + // we create the buffer on demand. This minimizes + // the overhead. + buf = gu_new_buf(PgfSymbol, pool); + gu_buf_extend_n(buf, i); + for (size_t j = 0; j < i; j++) { + PgfSymbol sym = gu_seq_get(syms, PgfSymbol, j); + gu_buf_set(buf, PgfSymbol, j, sym); + } + } + + int idx = gu_choice_next(ch, skp->n_forms+1); + if (idx == 0) { + buf = pgf_parser_index_pre_(buf, skp->default_form, ch, pool); + } else { + buf = pgf_parser_index_pre_(buf, skp->forms[idx-1].form, ch, pool); + } + } else { + if (buf != NULL) { + gu_buf_push(buf, PgfSymbol, sym); + } + } + } + + return buf; +} + +static void +pgf_parser_index_pre(PgfConcr* concr, PgfSequence* seq, + GuChoice* ch, GuPool *pool) +{ + do { + GuChoiceMark mark = gu_choice_mark(ch); + + GuBuf* buf = + pgf_parser_index_pre_(NULL, seq->syms, ch, pool); + + if (buf != NULL) { + PgfSequence* pre_seq = gu_buf_extend(concr->pre_sequences); + pre_seq->syms = gu_buf_data_seq(buf); + pre_seq->idx = seq->idx; + } + + gu_choice_reset(ch, mark); + } while (gu_choice_advance(ch)); +} + void pgf_parser_index(PgfConcr* concr, PgfCCat* ccat, PgfProduction prod, GuPool *pool) { + GuPool* tmp_pool = gu_local_pool(); + GuChoice* choice = gu_new_choice(tmp_pool); // we need this for the pres + for (size_t lin_idx = 0; lin_idx < ccat->cnccat->n_lins; lin_idx++) { GuVariantInfo i = gu_variant_open(prod); switch (i.tag) { @@ -2510,6 +2589,7 @@ pgf_parser_index(PgfConcr* concr, PgfSequence* seq = papp->fun->lins[lin_idx]; if (seq->idx == NULL) { seq->idx = gu_new_buf(PgfProductionIdxEntry, pool); + pgf_parser_index_pre(concr, seq, choice, pool); } PgfProductionIdxEntry* entry = gu_buf_extend(seq->idx); @@ -2525,6 +2605,8 @@ pgf_parser_index(PgfConcr* concr, gu_impossible(); } } + + gu_pool_free(tmp_pool); } prob_t diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index 1e29c119c..41619a0b8 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -1142,7 +1142,9 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr) concr->sequences = pgf_read_sequences(rdr); gu_return_on_exn(rdr->err, NULL); - + + concr->pre_sequences = gu_new_buf(PgfSequence, rdr->opool); + concr->cncfuns = pgf_read_cncfuns(rdr, abstr, concr); gu_return_on_exn(rdr->err, NULL);