1
0
forked from GitHub/gf-core

added pgf_lookup_word_prefix which makes it possible to do simple word prediction

This commit is contained in:
kr.angelov
2014-03-07 21:24:20 +00:00
parent 752a0a3607
commit a77dc568bb
2 changed files with 40 additions and 1 deletions

View File

@@ -2581,6 +2581,7 @@ pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
typedef struct {
GuEnum en;
PgfSequences* sequences;
GuString prefix;
size_t seq_idx;
} PgfFullFormState;
@@ -2600,6 +2601,12 @@ gu_fullform_enum_next(GuEnum* self, void* to, GuPool* pool)
while (st->seq_idx < n_seqs) {
PgfSymbols* syms = gu_seq_index(st->sequences, PgfSequence, st->seq_idx)->syms;
GuString tokens = pgf_get_tokens(syms, 0, pool);
if (!gu_string_is_prefix(st->prefix, tokens)) {
st->seq_idx = n_seqs;
break;
}
if (strlen(tokens) > 0 &&
gu_seq_index(st->sequences, PgfSequence, st->seq_idx)->idx != NULL) {
entry = gu_new(PgfFullFormEntry, pool);
@@ -2609,7 +2616,7 @@ gu_fullform_enum_next(GuEnum* self, void* to, GuPool* pool)
st->seq_idx++;
break;
}
st->seq_idx++;
}
}
@@ -2623,6 +2630,7 @@ pgf_fullform_lexicon(PgfConcr *concr, GuPool* pool)
PgfFullFormState* st = gu_new(PgfFullFormState, pool);
st->en.next = gu_fullform_enum_next;
st->sequences = concr->sequences;
st->prefix = "";
st->seq_idx = 0;
return &st->en;
}
@@ -2640,6 +2648,33 @@ pgf_fullform_get_analyses(PgfFullFormEntry* entry,
pgf_morpho_iter(entry->idx, callback, err);
}
GuEnum*
pgf_lookup_word_prefix(PgfConcr *concr, GuString prefix,
GuPool* pool, GuExn* err)
{
if (concr->sequences == NULL) {
GuExnData* err_data = gu_raise(err, PgfExn);
if (err_data) {
err_data->data = "The concrete syntax is not loaded";
return NULL;
}
}
PgfFullFormState* state = gu_new(PgfFullFormState, pool);
state->en.next = gu_fullform_enum_next;
state->sequences = concr->sequences;
state->prefix = prefix;
state->seq_idx = 0;
if (!gu_seq_binsearch_index(concr->sequences, pgf_sequence_order,
PgfSequence, (void*) prefix,
&state->seq_idx)) {
state->seq_idx++;
}
return &state->en;
}
// The 'pre' construction needs a special handling since
// it cannot be sorted alphabetically (a single pre contains
// many alternative tokens).

View File

@@ -131,6 +131,10 @@ void
pgf_fullform_get_analyses(PgfFullFormEntry* entry,
PgfMorphoCallback* callback, GuExn* err);
GuEnum*
pgf_lookup_word_prefix(PgfConcr *concr, GuString prefix,
GuPool* pool, GuExn* err);
PgfExprEnum*
pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat,
GuString sentence, double heuristics,