1
0
forked from GitHub/gf-core

word completion in the C runtime now returns multi-word expressions and the category for every expression

This commit is contained in:
kr.angelov
2013-08-19 15:34:19 +00:00
parent 043ab3e07a
commit 2d4f17e30c
4 changed files with 67 additions and 35 deletions

View File

@@ -10,6 +10,7 @@ typedef GuSeq PgfTokens; // -> PgfToken
typedef struct {
prob_t prob;
PgfCId cat;
PgfToken tok;
} PgfTokenProb;

View File

@@ -1814,6 +1814,49 @@ typedef struct {
PgfParseState* state;
} PgfPrefixTokenState;
static GuString
pgf_get_tokens(PgfSequence seq,
uint16_t seq_idx, uint8_t tok_idx,
GuPool* pool)
{
GuPool* tmp_pool = gu_new_pool();
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
GuStringBuf* sbuf = gu_string_buf(tmp_pool);
GuWriter* wtr = gu_string_buf_writer(sbuf);
// collect the tokens in the production
size_t len = gu_seq_length(seq);
for (size_t i = seq_idx; i < len; i++) {
PgfSymbol sym = gu_seq_get(seq, PgfSymbol, i);
GuVariantInfo i = gu_variant_open(sym);
switch (i.tag) {
case PGF_SYMBOL_KS: {
PgfSymbolKS* symks = i.data;
size_t len = gu_seq_length(symks->tokens);
for (size_t i = tok_idx; i < len; i++) {
if (i > 0) {
gu_putc(' ', wtr, err);
}
PgfToken tok = gu_seq_get(symks->tokens, PgfToken, i);
gu_string_write(tok, wtr, err);
}
tok_idx = 0;
}
default:
continue;
}
}
GuString tokens = gu_string_buf_freeze(sbuf, pool);
gu_pool_free(tmp_pool);
return tokens;
}
static bool
pgf_prefix_match_token(PgfTokenState* ts0, PgfToken tok, PgfItem* item)
{
@@ -1821,8 +1864,23 @@ pgf_prefix_match_token(PgfTokenState* ts0, PgfToken tok, PgfItem* item)
gu_container(ts0, PgfPrefixTokenState, ts);
if (gu_string_is_prefix(ts->prefix, tok)) {
size_t lin_idx;
PgfSequence seq;
pgf_item_sequence(item, &lin_idx, &seq, ts->pool);
uint16_t seq_idx = item->seq_idx;
uint8_t tok_idx = item->tok_idx;
// go one token back
if (tok_idx > 0)
tok_idx--;
else
seq_idx--;
ts->tp = gu_new(PgfTokenProb, ts->pool);
ts->tp->tok = tok;
ts->tp->tok =
pgf_get_tokens(seq, seq_idx, tok_idx, ts->pool);
ts->tp->cat = item->conts->ccat->cnccat->abscat->name;
ts->tp->prob = item->inside_prob+item->conts->outside_prob;
}
@@ -2446,37 +2504,9 @@ pgf_fullform_iter(GuMapItor* fn, const void* key, void* value, GuExn* err)
switch (i.tag) {
case PGF_PRODUCTION_APPLY: {
PgfProductionApply* papp = i.data;
GuPool* tmp_pool = gu_new_pool();
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
GuStringBuf* sbuf = gu_string_buf(tmp_pool);
GuWriter* wtr = gu_string_buf_writer(sbuf);
// collect the tokens in the production
PgfSequence seq = papp->fun->lins[cfc.lin_idx];
size_t len = gu_seq_length(seq);
for (size_t i = 0; i < len; i++) {
PgfSymbol sym = gu_seq_get(seq, PgfSymbol, i);
GuVariantInfo i = gu_variant_open(sym);
switch (i.tag) {
case PGF_SYMBOL_KS: {
PgfSymbolKS* symks = i.data;
size_t len = gu_seq_length(symks->tokens);
for (size_t i = 0; i < len; i++) {
if (i > 0) {
gu_putc(' ', wtr, err);
}
PgfToken tok = gu_seq_get(symks->tokens, PgfToken, i);
gu_string_write(tok, wtr, err);
}
}
default:
continue;
}
}
GuString tokens = gu_string_buf_freeze(sbuf, st->pool);
GuString tokens = pgf_get_tokens(seq, 0, 0, st->pool);
// create a new production index with keys that
// are multiword units

View File

@@ -920,9 +920,10 @@ Iter_fetch_token(IterObject* self)
if (tp == NULL)
return NULL;
PyObject* ty_tok = gu2py_string(tp->tok);
PyObject* res = Py_BuildValue("(f,O)", tp->prob, ty_tok);
Py_DECREF(ty_tok);
PyObject* py_tok = gu2py_string(tp->tok);
PyObject* py_cat = gu2py_string(tp->cat);
PyObject* res = Py_BuildValue("(f,O,O)", tp->prob, py_tok, py_cat);
Py_DECREF(py_tok);
return res;
}

View File

@@ -24,7 +24,7 @@ class Completer():
if state == 0:
line = readline.get_line_buffer()
line = line[0:readline.get_begidx()]
self.i = source_lang.getCompletions(line, prefix=prefix)
self.i = source_lang.complete(line, prefix=prefix)
self.tokens = sets.Set()
if len(self.tokens) > 50:
@@ -32,7 +32,7 @@ class Completer():
while True:
try:
(p,t) = self.i.next()
(p,t,c) = self.i.next()
if t not in self.tokens:
self.tokens.add(t)
return t