From a77dc568bb334bd9ca1530475a2ae894db3fd85c Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Fri, 7 Mar 2014 21:24:20 +0000 Subject: [PATCH] added pgf_lookup_word_prefix which makes it possible to do simple word prediction --- src/runtime/c/pgf/parser.c | 37 ++++++++++++++++++++++++++++++++++++- src/runtime/c/pgf/pgf.h | 4 ++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 8f58026ed..74e3fb989 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -2581,6 +2581,7 @@ pgf_lookup_morpho(PgfConcr *concr, GuString sentence, typedef struct { GuEnum en; PgfSequences* sequences; + GuString prefix; size_t seq_idx; } PgfFullFormState; @@ -2600,6 +2601,12 @@ gu_fullform_enum_next(GuEnum* self, void* to, GuPool* pool) while (st->seq_idx < n_seqs) { PgfSymbols* syms = gu_seq_index(st->sequences, PgfSequence, st->seq_idx)->syms; GuString tokens = pgf_get_tokens(syms, 0, pool); + + if (!gu_string_is_prefix(st->prefix, tokens)) { + st->seq_idx = n_seqs; + break; + } + if (strlen(tokens) > 0 && gu_seq_index(st->sequences, PgfSequence, st->seq_idx)->idx != NULL) { entry = gu_new(PgfFullFormEntry, pool); @@ -2609,7 +2616,7 @@ gu_fullform_enum_next(GuEnum* self, void* to, GuPool* pool) st->seq_idx++; break; } - + st->seq_idx++; } } @@ -2623,6 +2630,7 @@ pgf_fullform_lexicon(PgfConcr *concr, GuPool* pool) PgfFullFormState* st = gu_new(PgfFullFormState, pool); st->en.next = gu_fullform_enum_next; st->sequences = concr->sequences; + st->prefix = ""; st->seq_idx = 0; return &st->en; } @@ -2640,6 +2648,33 @@ pgf_fullform_get_analyses(PgfFullFormEntry* entry, pgf_morpho_iter(entry->idx, callback, err); } +GuEnum* +pgf_lookup_word_prefix(PgfConcr *concr, GuString prefix, + GuPool* pool, GuExn* err) +{ + if (concr->sequences == NULL) { + GuExnData* err_data = gu_raise(err, PgfExn); + if (err_data) { + err_data->data = "The concrete syntax is not loaded"; + return NULL; + } + } + + PgfFullFormState* state = gu_new(PgfFullFormState, pool); + state->en.next = gu_fullform_enum_next; + state->sequences = concr->sequences; + state->prefix = prefix; + state->seq_idx = 0; + + if (!gu_seq_binsearch_index(concr->sequences, pgf_sequence_order, + PgfSequence, (void*) prefix, + &state->seq_idx)) { + state->seq_idx++; + } + + return &state->en; +} + // The 'pre' construction needs a special handling since // it cannot be sorted alphabetically (a single pre contains // many alternative tokens). diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index 20e6b1fa8..87ca3ae28 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -131,6 +131,10 @@ void pgf_fullform_get_analyses(PgfFullFormEntry* entry, PgfMorphoCallback* callback, GuExn* err); +GuEnum* +pgf_lookup_word_prefix(PgfConcr *concr, GuString prefix, + GuPool* pool, GuExn* err); + PgfExprEnum* pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, GuString sentence, double heuristics,