mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
word completion in the C runtime. The runtime/python/test.py example is now using readline with word completion
This commit is contained in:
@@ -311,6 +311,58 @@ gu_string_to_double(GuString s, double *res)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
gu_string_is_prefix(GuString s1, GuString s2)
|
||||
{
|
||||
GuWord w1 = s1.w_;
|
||||
uint8_t buf1[sizeof(GuWord)];
|
||||
size_t sz1;
|
||||
char* str1;
|
||||
if (w1 & 1) {
|
||||
sz1 = (w1 & 0xff) >> 1;
|
||||
gu_assert(sz1 <= sizeof(GuWord));
|
||||
size_t i = sz1;
|
||||
while (i > 0) {
|
||||
w1 >>= 8;
|
||||
buf1[--i] = w1 & 0xff;
|
||||
}
|
||||
str1 = (char*) buf1;
|
||||
} else {
|
||||
uint8_t* p = (void*) w1;
|
||||
sz1 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
|
||||
str1 = (char*) &p[1];
|
||||
}
|
||||
|
||||
GuWord w2 = s2.w_;
|
||||
uint8_t buf2[sizeof(GuWord)];
|
||||
size_t sz2;
|
||||
char* str2;
|
||||
if (w2 & 1) {
|
||||
sz2 = (w2 & 0xff) >> 1;
|
||||
gu_assert(sz2 <= sizeof(GuWord));
|
||||
size_t i = sz2;
|
||||
while (i > 0) {
|
||||
w2 >>= 8;
|
||||
buf2[--i] = w2 & 0xff;
|
||||
}
|
||||
str2 = (char*) buf2;
|
||||
} else {
|
||||
uint8_t* p = (void*) w2;
|
||||
sz2 = (p[0] == 0) ? ((size_t*) p)[-1] : p[0];
|
||||
str2 = (char*) &p[1];
|
||||
}
|
||||
|
||||
while (sz1 > 0 && sz2 > 0) {
|
||||
if (*str1 != *str2)
|
||||
return false;
|
||||
|
||||
str1++; sz1--;
|
||||
str2++; sz2--;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
GuWord
|
||||
gu_string_hash(GuString s)
|
||||
{
|
||||
|
||||
@@ -69,6 +69,10 @@ gu_string_to_int(GuString s, int *res);
|
||||
bool
|
||||
gu_string_to_double(GuString s, double *res);
|
||||
|
||||
|
||||
bool
|
||||
gu_string_is_prefix(GuString s1, GuString s2);
|
||||
|
||||
#endif // GU_STRING_H_
|
||||
|
||||
#if defined(GU_HASH_H_) && !defined(GU_STRING_H_HASH_)
|
||||
|
||||
@@ -2,11 +2,17 @@
|
||||
#define PGF_LEXER_H_
|
||||
|
||||
#include <gu/read.h>
|
||||
#include <pgf/expr.h>
|
||||
|
||||
/// A single lexical token
|
||||
typedef GuString PgfToken;
|
||||
typedef GuSeq PgfTokens; // -> PgfToken
|
||||
|
||||
typedef struct {
|
||||
prob_t prob;
|
||||
PgfToken tok;
|
||||
} PgfTokenProb;
|
||||
|
||||
typedef struct {
|
||||
PgfToken (*read_token)();
|
||||
PgfToken tok;
|
||||
|
||||
@@ -46,7 +46,6 @@ typedef struct {
|
||||
PgfConcr* concr;
|
||||
GuPool* pool;
|
||||
GuBuf* expr_queue;
|
||||
PgfItem* target;
|
||||
PgfExpr meta_var;
|
||||
PgfProduction meta_prod;
|
||||
int max_fid;
|
||||
@@ -76,11 +75,18 @@ GU_DEFINE_TYPE(PgfProductionIdx, GuMap,
|
||||
gu_type(PgfCFCat), &pgf_cfcat_hasher,
|
||||
gu_type(PgfProductionSeq), &gu_null_seq);
|
||||
|
||||
typedef struct PgfTokenState PgfTokenState;
|
||||
|
||||
typedef struct {
|
||||
PgfToken tok;
|
||||
PgfProductionIdx* lexicon_idx;
|
||||
bool (*match_token)(PgfTokenState* ts, PgfToken tok, PgfItem* item);
|
||||
PgfToken (*get_token)(PgfTokenState* ts);
|
||||
PgfProductionIdx* (*get_lexicon_idx)(PgfTokenState* ts);
|
||||
} PgfTokenFn;
|
||||
|
||||
struct PgfTokenState {
|
||||
PgfTokenFn* fn;
|
||||
prob_t lexical_prob;
|
||||
} PgfTokenState;
|
||||
};
|
||||
|
||||
struct PgfParseState {
|
||||
PgfParseState* next;
|
||||
@@ -785,9 +791,8 @@ static void
|
||||
pgf_parsing_add_transition(PgfParseState* before, PgfParseState* after,
|
||||
PgfToken tok, PgfItem* item)
|
||||
{
|
||||
if (gu_string_eq(tok, after->ts->tok)) {
|
||||
if (after->ts->fn->match_token(after->ts, tok, item)) {
|
||||
if (after->next == NULL) {
|
||||
after->ps->target = item;
|
||||
after->viterbi_prob =
|
||||
item->inside_prob+item->conts->outside_prob;
|
||||
}
|
||||
@@ -1076,20 +1081,31 @@ pgf_parsing_td_predict(PgfParseState* before, PgfParseState* after,
|
||||
item->inside_prob-conts->ccat->viterbi_prob+
|
||||
item->conts->outside_prob;
|
||||
|
||||
size_t n_prods = ccat->n_synprods;
|
||||
PgfProductionIdx* lexicon_idx = NULL;
|
||||
if (after != NULL) {
|
||||
lexicon_idx = after->ts->fn->get_lexicon_idx(after->ts);
|
||||
|
||||
// we don't know the current token.
|
||||
// probably we just compute the list of completions
|
||||
if (lexicon_idx == NULL)
|
||||
n_prods = gu_seq_length(ccat->prods);
|
||||
}
|
||||
|
||||
// Top-down prediction for syntactic rules
|
||||
PgfProductionSeq prods = ccat->prods;
|
||||
for (size_t i = 0; i < ccat->n_synprods; i++) {
|
||||
for (size_t i = 0; i < n_prods; i++) {
|
||||
PgfProduction prod =
|
||||
gu_seq_get(prods, PgfProduction, i);
|
||||
gu_seq_get(ccat->prods, PgfProduction, i);
|
||||
pgf_parsing_production(before, conts, prod);
|
||||
}
|
||||
|
||||
// Bottom-up prediction for lexical rules
|
||||
if (after != NULL && after->ts->lexicon_idx != NULL) {
|
||||
|
||||
if (lexicon_idx != NULL) {
|
||||
PgfCFCat cfc = {ccat, lin_idx};
|
||||
PgfProductionSeq tok_prods =
|
||||
gu_map_get(after->ts->lexicon_idx, &cfc, PgfProductionSeq);
|
||||
|
||||
gu_map_get(lexicon_idx, &cfc, PgfProductionSeq);
|
||||
|
||||
if (!gu_seq_is_null(tok_prods)) {
|
||||
size_t n_prods = gu_seq_length(tok_prods);
|
||||
for (size_t i = 0; i < n_prods; i++) {
|
||||
@@ -1141,20 +1157,24 @@ static void
|
||||
pgf_parsing_meta_scan(PgfParseState* before, PgfParseState* after,
|
||||
PgfItem* meta_item, prob_t meta_prob)
|
||||
{
|
||||
PgfItem* item = pgf_item_copy(meta_item, before->ps->pool, before->ps);
|
||||
item->inside_prob += meta_prob;
|
||||
PgfToken tok = after->ts->fn->get_token(after->ts);
|
||||
|
||||
if (!gu_string_eq(tok, gu_empty_string)) {
|
||||
PgfItem* item = pgf_item_copy(meta_item, before->ps->pool, before->ps);
|
||||
item->inside_prob += meta_prob;
|
||||
|
||||
PgfSymbol prev = item->curr_sym;
|
||||
PgfSymbolKS* sks = (PgfSymbolKS*)
|
||||
gu_alloc_variant(PGF_SYMBOL_KS,
|
||||
sizeof(PgfSymbolKS)+sizeof(PgfSymbol),
|
||||
gu_alignof(PgfSymbolKS),
|
||||
&item->curr_sym, after->ps->pool);
|
||||
*((PgfSymbol*)(sks+1)) = prev;
|
||||
sks->tokens = gu_new_seq(PgfToken, 1, after->ps->pool);
|
||||
gu_seq_set(sks->tokens, PgfToken, 0, after->ts->tok);
|
||||
PgfSymbol prev = item->curr_sym;
|
||||
PgfSymbolKS* sks = (PgfSymbolKS*)
|
||||
gu_alloc_variant(PGF_SYMBOL_KS,
|
||||
sizeof(PgfSymbolKS)+sizeof(PgfSymbol),
|
||||
gu_alignof(PgfSymbolKS),
|
||||
&item->curr_sym, after->ps->pool);
|
||||
*((PgfSymbol*)(sks+1)) = prev;
|
||||
sks->tokens = gu_new_seq(PgfToken, 1, after->ps->pool);
|
||||
gu_seq_set(sks->tokens, PgfToken, 0, tok);
|
||||
|
||||
gu_buf_heap_push(before->agenda, &pgf_item_prob_order, &item);
|
||||
gu_buf_heap_push(before->agenda, &pgf_item_prob_order, &item);
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
@@ -1468,8 +1488,9 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
|
||||
pgf_parsing_symbol(before, after, item, sym);
|
||||
}
|
||||
} else {
|
||||
PgfToken tok = (after != NULL) ? after->ts->tok
|
||||
: gu_empty_string;
|
||||
PgfToken tok = (after != NULL)
|
||||
? after->ts->fn->get_token(after->ts)
|
||||
: gu_empty_string;
|
||||
|
||||
PgfExprProb *ep = NULL;
|
||||
bool accepted =
|
||||
@@ -1563,7 +1584,7 @@ pgf_parsing_proceed(PgfParseState* state)
|
||||
before = st;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
prob_t state_delta =
|
||||
(st->viterbi_prob-(st->next ? st->next->viterbi_prob : 0))*
|
||||
state->ps->beam_size;
|
||||
@@ -1623,7 +1644,6 @@ pgf_new_parsing(PgfConcr* concr, GuPool* pool)
|
||||
ps->concr = concr;
|
||||
ps->pool = pool;
|
||||
ps->expr_queue = gu_new_buf(PgfExprState*, pool);
|
||||
ps->target = NULL;
|
||||
ps->max_fid = concr->total_cats;
|
||||
#ifdef PGF_COUNTS_DEBUG
|
||||
ps->item_full_count = 0;
|
||||
@@ -1702,20 +1722,14 @@ pgf_parser_compute_lexicon_prob(GuMapItor* fn, const void* key, void* value, GuE
|
||||
}
|
||||
}
|
||||
|
||||
#define pgf_new_token_state(ty, pool) \
|
||||
(ty*) pgf_new_token_state_(&pgf_tsfn_##ty, (PgfTokenState*) gu_new(ty, pool))
|
||||
|
||||
static PgfTokenState*
|
||||
pgf_new_token_state(PgfConcr *concr, PgfToken tok, GuPool* pool)
|
||||
pgf_new_token_state_(PgfTokenFn* fn, PgfTokenState* ts)
|
||||
{
|
||||
PgfTokenState* ts = gu_new(PgfTokenState, pool);
|
||||
ts->tok = tok;
|
||||
ts->lexicon_idx = gu_map_get(concr->leftcorner_tok_idx,
|
||||
&tok, PgfProductionIdx*);
|
||||
ts->lexical_prob = INFINITY;
|
||||
if (ts->lexicon_idx != NULL) {
|
||||
PgfLexiconFn clo = { { pgf_parser_compute_lexicon_prob }, ts };
|
||||
gu_map_iter(ts->lexicon_idx, &clo.fn, NULL);
|
||||
}
|
||||
if (ts->lexical_prob == INFINITY)
|
||||
ts->lexical_prob = 0;
|
||||
ts->fn = fn;
|
||||
ts->lexical_prob = INFINITY;
|
||||
return ts;
|
||||
}
|
||||
|
||||
@@ -1731,6 +1745,34 @@ void pgf_parsing_print_counts(PgfParsing* ps)
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
PgfTokenState ts;
|
||||
PgfToken tok;
|
||||
PgfProductionIdx *lexicon_idx;
|
||||
} PgfRealTokenState;
|
||||
|
||||
static bool
|
||||
pgf_real_match_token(PgfTokenState* ts, PgfToken tok, PgfItem* item)
|
||||
{
|
||||
return gu_string_eq(gu_container(ts, PgfRealTokenState, ts)->tok, tok);
|
||||
}
|
||||
|
||||
static PgfToken
|
||||
pgf_real_get_token(PgfTokenState* ts) {
|
||||
return gu_container(ts, PgfRealTokenState, ts)->tok;
|
||||
}
|
||||
|
||||
static PgfProductionIdx*
|
||||
pgf_real_get_lexicon_idx(PgfTokenState* ts) {
|
||||
return gu_container(ts, PgfRealTokenState, ts)->lexicon_idx;
|
||||
}
|
||||
|
||||
static PgfTokenFn pgf_tsfn_PgfRealTokenState = {
|
||||
pgf_real_match_token,
|
||||
pgf_real_get_token,
|
||||
pgf_real_get_lexicon_idx
|
||||
};
|
||||
|
||||
PgfParseState*
|
||||
pgf_parser_next_state(PgfParseState* prev, PgfToken tok)
|
||||
{
|
||||
@@ -1738,21 +1780,102 @@ pgf_parser_next_state(PgfParseState* prev, PgfToken tok)
|
||||
pgf_parsing_print_counts(prev->ps);
|
||||
#endif
|
||||
|
||||
PgfTokenState* ts =
|
||||
pgf_new_token_state(prev->ps->concr,tok,prev->ps->pool);
|
||||
PgfParseState* state =
|
||||
pgf_new_parse_state(prev->ps, prev, ts, prev->ps->pool);
|
||||
PgfRealTokenState* ts =
|
||||
pgf_new_token_state(PgfRealTokenState, prev->ps->pool);
|
||||
ts->tok = tok;
|
||||
ts->lexicon_idx = gu_map_get(prev->ps->concr->leftcorner_tok_idx,
|
||||
&tok, PgfProductionIdx*);
|
||||
if (ts->lexicon_idx != NULL) {
|
||||
PgfLexiconFn clo = { { pgf_parser_compute_lexicon_prob }, &ts->ts };
|
||||
gu_map_iter(ts->lexicon_idx, &clo.fn, NULL);
|
||||
}
|
||||
if (ts->ts.lexical_prob == INFINITY)
|
||||
ts->ts.lexical_prob = 0;
|
||||
|
||||
state->ps->target = NULL;
|
||||
while (state->ps->target == NULL) {
|
||||
PgfParseState* state =
|
||||
pgf_new_parse_state(prev->ps, prev, &ts->ts, prev->ps->pool);
|
||||
|
||||
while (gu_buf_length(state->agenda) == 0) {
|
||||
if (!pgf_parsing_proceed(state))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
PgfTokenState ts;
|
||||
GuEnum en;
|
||||
GuString prefix;
|
||||
PgfTokenProb* tp;
|
||||
GuPool* pool;
|
||||
PgfParseState* state;
|
||||
} PgfPrefixTokenState;
|
||||
|
||||
static bool
|
||||
pgf_prefix_match_token(PgfTokenState* ts0, PgfToken tok, PgfItem* item)
|
||||
{
|
||||
PgfPrefixTokenState* ts =
|
||||
gu_container(ts0, PgfPrefixTokenState, ts);
|
||||
|
||||
if (gu_string_is_prefix(ts->prefix, tok)) {
|
||||
ts->tp = gu_new(PgfTokenProb, ts->pool);
|
||||
ts->tp->tok = tok;
|
||||
ts->tp->prob = item->inside_prob+item->conts->outside_prob;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static PgfToken
|
||||
pgf_prefix_get_token(PgfTokenState* ts) {
|
||||
return gu_empty_string;
|
||||
}
|
||||
|
||||
static PgfProductionIdx*
|
||||
pgf_prefix_get_lexicon_idx(PgfTokenState* ts) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static PgfTokenFn pgf_tsfn_PgfPrefixTokenState = {
|
||||
pgf_prefix_match_token,
|
||||
pgf_prefix_get_token,
|
||||
pgf_prefix_get_lexicon_idx
|
||||
};
|
||||
|
||||
static void
|
||||
pgf_parser_completions_next(GuEnum* self, void* to, GuPool* pool)
|
||||
{
|
||||
PgfPrefixTokenState* ts =
|
||||
gu_container(self, PgfPrefixTokenState, en);
|
||||
|
||||
ts->tp = NULL;
|
||||
ts->pool = pool;
|
||||
while (ts->tp == NULL) {
|
||||
if (!pgf_parsing_proceed(ts->state))
|
||||
break;
|
||||
}
|
||||
if (state->ps->target != NULL) {
|
||||
return state;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
*((PgfTokenProb**)to) = ts->tp;
|
||||
}
|
||||
|
||||
GuEnum*
|
||||
pgf_parser_completions(PgfParseState* prev, GuString prefix,
|
||||
GuPool* pool)
|
||||
{
|
||||
#ifdef PGF_COUNTS_DEBUG
|
||||
pgf_parsing_print_counts(prev->ps);
|
||||
#endif
|
||||
|
||||
PgfPrefixTokenState* ts =
|
||||
pgf_new_token_state(PgfPrefixTokenState, pool);
|
||||
ts->en.next = pgf_parser_completions_next;
|
||||
ts->prefix = prefix;
|
||||
ts->tp = NULL;
|
||||
ts->state =
|
||||
pgf_new_parse_state(prev->ps, prev, &ts->ts, pool);
|
||||
|
||||
return &ts->en;
|
||||
}
|
||||
|
||||
static int
|
||||
|
||||
@@ -66,6 +66,10 @@ pgf_parser_next_state(PgfParseState* prev, PgfToken tok);
|
||||
* the pool used to create \parse.
|
||||
*/
|
||||
|
||||
GuEnum*
|
||||
pgf_parser_completions(PgfParseState* prev, GuString prefix,
|
||||
GuPool* pool);
|
||||
|
||||
void
|
||||
pgf_parser_set_beam_size(PgfParseState* state, double beam_size);
|
||||
|
||||
|
||||
@@ -236,6 +236,37 @@ pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
|
||||
return pgf_parse_result(state, pool);
|
||||
}
|
||||
|
||||
GuEnum*
|
||||
pgf_get_completions(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
|
||||
GuString prefix, GuPool* pool)
|
||||
{
|
||||
// Begin parsing a sentence of the specified category
|
||||
PgfParseState* state =
|
||||
pgf_parser_init_state(concr, cat, 0, pool);
|
||||
if (state == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Tokenization
|
||||
GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool);
|
||||
PgfToken tok = pgf_lexer_read_token(lexer, lex_err);
|
||||
while (!gu_exn_is_raised(lex_err)) {
|
||||
// feed the token to get a new parse state
|
||||
state = pgf_parser_next_state(state, tok);
|
||||
if (state == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tok = pgf_lexer_read_token(lexer, lex_err);
|
||||
}
|
||||
|
||||
if (gu_exn_caught(lex_err) != gu_type(GuEOF))
|
||||
return NULL;
|
||||
|
||||
// Now begin enumerating the resulting syntax trees
|
||||
return pgf_parser_completions(state, prefix, pool);
|
||||
}
|
||||
|
||||
void
|
||||
pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
|
||||
{
|
||||
|
||||
@@ -116,6 +116,10 @@ pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
|
||||
PgfExprEnum*
|
||||
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
|
||||
|
||||
GuEnum*
|
||||
pgf_get_completions(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
|
||||
GuString prefix, GuPool* pool);
|
||||
|
||||
bool
|
||||
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
|
||||
double *precision, double *recall, double *exact);
|
||||
|
||||
@@ -432,62 +432,19 @@ Expr_getattro(ExprObject *self, PyObject *attr_name) {
|
||||
return PyObject_GenericGetAttr((PyObject*)self, attr_name);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
typedef struct IterObject {
|
||||
PyObject_HEAD
|
||||
PGFObject* grammar;
|
||||
GuPool* pool;
|
||||
int max_count;
|
||||
int counter;
|
||||
GuEnum* res;
|
||||
} ExprIterObject;
|
||||
PyObject* (*fetch)(struct IterObject* self);
|
||||
} IterObject;
|
||||
|
||||
static ExprIterObject*
|
||||
ExprIter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
PyObject*
|
||||
Iter_fetch_expr(IterObject* self)
|
||||
{
|
||||
ExprIterObject* self = (ExprIterObject *)type->tp_alloc(type, 0);
|
||||
if (self != NULL) {
|
||||
self->grammar = NULL;
|
||||
self->pool = NULL;
|
||||
self->max_count = -1;
|
||||
self->counter = 0;
|
||||
self->res = NULL;
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
static void
|
||||
ExprIter_dealloc(ExprIterObject* self)
|
||||
{
|
||||
if (self->pool != NULL)
|
||||
gu_pool_free(self->pool);
|
||||
|
||||
Py_XDECREF(self->grammar);
|
||||
|
||||
self->ob_type->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
static int
|
||||
ExprIter_init(ExprIterObject *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
ExprIter_iter(ExprIterObject *self)
|
||||
{
|
||||
Py_INCREF(self);
|
||||
return (PyObject*) self;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
ExprIter_iternext(ExprIterObject *self)
|
||||
{
|
||||
if (self->max_count >= 0 && self->counter >= self->max_count) {
|
||||
return NULL;
|
||||
}
|
||||
self->counter++;
|
||||
|
||||
PgfExprProb* ep = gu_next(self->res, PgfExprProb*, self->pool);
|
||||
if (ep == NULL)
|
||||
return NULL;
|
||||
@@ -506,17 +463,81 @@ ExprIter_iternext(ExprIterObject *self)
|
||||
return res;
|
||||
}
|
||||
|
||||
static PyMethodDef ExprIter_methods[] = {
|
||||
PyObject*
|
||||
Iter_fetch_token(IterObject* self)
|
||||
{
|
||||
PgfTokenProb* tp = gu_next(self->res, PgfTokenProb*, self->pool);
|
||||
if (tp == NULL)
|
||||
return NULL;
|
||||
|
||||
PyObject* ty_tok = gu2py_string(tp->tok);
|
||||
PyObject* res = Py_BuildValue("(f,O)", tp->prob, ty_tok);
|
||||
Py_DECREF(ty_tok);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static IterObject*
|
||||
Iter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
IterObject* self = (IterObject *)type->tp_alloc(type, 0);
|
||||
if (self != NULL) {
|
||||
self->grammar = NULL;
|
||||
self->pool = NULL;
|
||||
self->max_count = -1;
|
||||
self->counter = 0;
|
||||
self->res = NULL;
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
static void
|
||||
Iter_dealloc(IterObject* self)
|
||||
{
|
||||
if (self->pool != NULL)
|
||||
gu_pool_free(self->pool);
|
||||
|
||||
Py_XDECREF(self->grammar);
|
||||
|
||||
self->ob_type->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
static int
|
||||
Iter_init(IterObject *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
Iter_iter(IterObject *self)
|
||||
{
|
||||
Py_INCREF(self);
|
||||
return (PyObject*) self;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
Iter_iternext(IterObject *self)
|
||||
{
|
||||
if (self->max_count >= 0 && self->counter >= self->max_count) {
|
||||
return NULL;
|
||||
}
|
||||
self->counter++;
|
||||
|
||||
return self->fetch(self);
|
||||
}
|
||||
|
||||
static PyMethodDef Iter_methods[] = {
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static PyTypeObject pgf_ExprIterType = {
|
||||
PyObject_HEAD_INIT(NULL)
|
||||
0, /*ob_size*/
|
||||
"pgf.ExprIter", /*tp_name*/
|
||||
sizeof(ExprIterObject), /*tp_basicsize*/
|
||||
"pgf.Iter", /*tp_name*/
|
||||
sizeof(IterObject), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)ExprIter_dealloc, /*tp_dealloc*/
|
||||
(destructor)Iter_dealloc, /*tp_dealloc*/
|
||||
0, /*tp_print*/
|
||||
0, /*tp_getattr*/
|
||||
0, /*tp_setattr*/
|
||||
@@ -537,9 +558,9 @@ static PyTypeObject pgf_ExprIterType = {
|
||||
0, /*tp_clear */
|
||||
0, /*tp_richcompare */
|
||||
0, /*tp_weaklistoffset */
|
||||
(getiterfunc) ExprIter_iter, /*tp_iter */
|
||||
(iternextfunc) ExprIter_iternext, /*tp_iternext */
|
||||
ExprIter_methods, /*tp_methods */
|
||||
(getiterfunc) Iter_iter, /*tp_iter */
|
||||
(iternextfunc) Iter_iternext, /*tp_iternext */
|
||||
Iter_methods, /*tp_methods */
|
||||
0, /*tp_members */
|
||||
0, /*tp_getset */
|
||||
0, /*tp_base */
|
||||
@@ -547,9 +568,9 @@ static PyTypeObject pgf_ExprIterType = {
|
||||
0, /*tp_descr_get */
|
||||
0, /*tp_descr_set */
|
||||
0, /*tp_dictoffset */
|
||||
(initproc)ExprIter_init, /*tp_init */
|
||||
(initproc)Iter_init, /*tp_init */
|
||||
0, /*tp_alloc */
|
||||
(newfunc) ExprIter_new, /*tp_new */
|
||||
(newfunc) Iter_new, /*tp_new */
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
@@ -640,7 +661,7 @@ pypgf_new_python_lexer(PyObject* pylexer, GuPool* pool)
|
||||
return ((PgfLexer*) lexer);
|
||||
}
|
||||
|
||||
static ExprIterObject*
|
||||
static IterObject*
|
||||
Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
{
|
||||
static char *kwlist[] = {"sentence", "tokens", "cat", "n", NULL};
|
||||
@@ -667,7 +688,7 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ExprIterObject* pyres = (ExprIterObject*)
|
||||
IterObject* pyres = (IterObject*)
|
||||
pgf_ExprIterType.tp_alloc(&pgf_ExprIterType, 0);
|
||||
if (pyres == NULL) {
|
||||
Py_XDECREF(py_lexer);
|
||||
@@ -680,6 +701,7 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
pyres->pool = gu_new_pool();
|
||||
pyres->max_count = max_count;
|
||||
pyres->counter = 0;
|
||||
pyres->fetch = Iter_fetch_expr;
|
||||
|
||||
GuPool *tmp_pool = gu_local_pool();
|
||||
GuString catname =
|
||||
@@ -723,6 +745,98 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
return pyres;
|
||||
}
|
||||
|
||||
static IterObject*
|
||||
Concr_getCompletions(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
{
|
||||
static char *kwlist[] = {"sentence", "tokens", "cat",
|
||||
"prefix", "n", NULL};
|
||||
|
||||
size_t len;
|
||||
const uint8_t *buf = NULL;
|
||||
PyObject* py_lexer = NULL;
|
||||
const char *catname_s = NULL;
|
||||
const char *prefix_s = NULL;
|
||||
int max_count = -1;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#Ossi", kwlist,
|
||||
&buf, &len, &py_lexer, &catname_s,
|
||||
&prefix_s, &max_count))
|
||||
return NULL;
|
||||
|
||||
if ((buf == NULL && py_lexer == NULL) ||
|
||||
(buf != NULL && py_lexer != NULL)) {
|
||||
PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (py_lexer != NULL) {
|
||||
// get an iterator out of the iterable object
|
||||
py_lexer = PyObject_GetIter(py_lexer);
|
||||
if (py_lexer == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
IterObject* pyres = (IterObject*)
|
||||
pgf_ExprIterType.tp_alloc(&pgf_ExprIterType, 0);
|
||||
if (pyres == NULL) {
|
||||
Py_XDECREF(py_lexer);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pyres->grammar = self->grammar;
|
||||
Py_XINCREF(pyres->grammar);
|
||||
|
||||
pyres->pool = gu_new_pool();
|
||||
pyres->max_count = max_count;
|
||||
pyres->counter = 0;
|
||||
pyres->fetch = Iter_fetch_token;
|
||||
|
||||
GuPool *tmp_pool = gu_local_pool();
|
||||
|
||||
GuString catname =
|
||||
(catname_s == NULL) ? pgf_start_cat(self->grammar->pgf, tmp_pool)
|
||||
: gu_str_string(catname_s, tmp_pool);
|
||||
|
||||
GuString prefix =
|
||||
(prefix_s == NULL) ? gu_empty_string
|
||||
: gu_str_string(prefix_s, pyres->pool);
|
||||
|
||||
PgfLexer *lexer = NULL;
|
||||
if (buf != NULL) {
|
||||
GuIn* in = gu_data_in(buf, len, tmp_pool);
|
||||
GuReader* rdr = gu_new_utf8_reader(in, tmp_pool);
|
||||
lexer = pgf_new_simple_lexer(rdr, tmp_pool);
|
||||
}
|
||||
if (py_lexer != NULL) {
|
||||
lexer = pypgf_new_python_lexer(py_lexer, tmp_pool);
|
||||
}
|
||||
|
||||
pyres->res =
|
||||
pgf_get_completions(self->concr, catname, lexer, prefix, pyres->pool);
|
||||
|
||||
if (pyres->res == NULL) {
|
||||
Py_DECREF(pyres);
|
||||
pyres = NULL;
|
||||
|
||||
PgfToken tok =
|
||||
pgf_lexer_current_token(lexer);
|
||||
|
||||
if (gu_string_eq(tok, gu_empty_string))
|
||||
PyErr_SetString(PGFError, "The sentence cannot be parsed");
|
||||
else {
|
||||
PyObject* py_tok = gu2py_string(tok);
|
||||
PyObject_SetAttrString(ParseError, "token", py_tok);
|
||||
PyErr_Format(ParseError, "Unexpected token: \"%s\"",
|
||||
PyString_AsString(py_tok));
|
||||
Py_DECREF(py_tok);
|
||||
}
|
||||
}
|
||||
|
||||
Py_XDECREF(py_lexer);
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
return pyres;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
Concr_parseval(ConcrObject* self, PyObject *args) {
|
||||
ExprObject* pyexpr = NULL;
|
||||
@@ -747,6 +861,26 @@ Concr_parseval(ConcrObject* self, PyObject *args) {
|
||||
return Py_BuildValue("ddd", precision, recall, exact);
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
Concr_addLiteral(ConcrObject* self, PyObject *args) {
|
||||
ExprObject* pyexpr = NULL;
|
||||
const char* s_cat = NULL;
|
||||
if (!PyArg_ParseTuple(args, "sO!", &s_cat, &pgf_ExprType, &pyexpr))
|
||||
return NULL;
|
||||
/*
|
||||
PgfLiteralCallback* callback = NULL;
|
||||
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
|
||||
PgfCId cat = gu_str_string(s_cat, tmp_pool);
|
||||
|
||||
pgf_parser_add_literal(self->concr, cat, callback);
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
*/
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
Concr_linearize(ConcrObject* self, PyObject *args)
|
||||
{
|
||||
@@ -1045,9 +1179,15 @@ static PyMethodDef Concr_methods[] = {
|
||||
{"parse", (PyCFunction)Concr_parse, METH_VARARGS | METH_KEYWORDS,
|
||||
"Parses a string and returns an iterator over the abstract trees for this sentence"
|
||||
},
|
||||
{"getCompletions", (PyCFunction)Concr_getCompletions, METH_VARARGS | METH_KEYWORDS,
|
||||
"Parses a partial string and returns a list with the top n possible next tokens"
|
||||
},
|
||||
{"parseval", (PyCFunction)Concr_parseval, METH_VARARGS,
|
||||
"Computes precision, recall and exact match for the parser on a given abstract tree"
|
||||
},
|
||||
{"addLiteral", (PyCFunction)Concr_addLiteral, METH_VARARGS,
|
||||
"adds callbacks for custom literals in the grammar"
|
||||
},
|
||||
{"linearize", (PyCFunction)Concr_linearize, METH_VARARGS,
|
||||
"Takes an abstract tree and linearizes it to a string"
|
||||
},
|
||||
@@ -1335,7 +1475,7 @@ PGF_functionsByCat(PGFObject* self, PyObject *args)
|
||||
return functions;
|
||||
}
|
||||
|
||||
static ExprIterObject*
|
||||
static IterObject*
|
||||
PGF_generate(PGFObject* self, PyObject *args, PyObject *keywds)
|
||||
{
|
||||
static char *kwlist[] = {"cat", "n", NULL};
|
||||
@@ -1346,7 +1486,7 @@ PGF_generate(PGFObject* self, PyObject *args, PyObject *keywds)
|
||||
&catname_s, &max_count))
|
||||
return NULL;
|
||||
|
||||
ExprIterObject* pyres = (ExprIterObject*)
|
||||
IterObject* pyres = (IterObject*)
|
||||
pgf_ExprIterType.tp_alloc(&pgf_ExprIterType, 0);
|
||||
if (pyres == NULL) {
|
||||
return NULL;
|
||||
@@ -1358,6 +1498,7 @@ PGF_generate(PGFObject* self, PyObject *args, PyObject *keywds)
|
||||
pyres->pool = gu_new_pool();
|
||||
pyres->max_count = max_count;
|
||||
pyres->counter = 0;
|
||||
pyres->fetch = Iter_fetch_expr;
|
||||
|
||||
GuPool *tmp_pool = gu_local_pool();
|
||||
GuString catname = gu_str_string(catname_s, tmp_pool);
|
||||
|
||||
@@ -1,26 +1,63 @@
|
||||
import sys
|
||||
import pgf
|
||||
import sys
|
||||
import sets
|
||||
import readline
|
||||
|
||||
sys.stdout.write("loading...")
|
||||
sys.stdout.flush();
|
||||
gr = pgf.readPGF("../../../treebanks/PennTreebank/ParseEngAbs.pgf")
|
||||
sys.stdout.write("\n")
|
||||
|
||||
source_lang = gr.languages["ParseEng"]
|
||||
target_lang = gr.languages["ParseBul"]
|
||||
|
||||
we = pgf.readExpr("UttImpSg PPos (ImpVP (UseV try_V))")
|
||||
print gr.languages["ParseEng"].linearize(we)
|
||||
print source_lang.linearize(we)
|
||||
|
||||
sys.stdout.write("start cat: "+gr.startCat+"\n\n")
|
||||
|
||||
class Completer():
|
||||
def __init__(self, lang):
|
||||
self.gr = lang
|
||||
|
||||
def complete(self, prefix, state):
|
||||
if state == 0:
|
||||
line = readline.get_line_buffer()
|
||||
line = line[0:readline.get_begidx()]
|
||||
self.i = source_lang.getCompletions(line, prefix=prefix)
|
||||
self.tokens = sets.Set()
|
||||
|
||||
if len(self.tokens) > 50:
|
||||
return None
|
||||
|
||||
while True:
|
||||
try:
|
||||
(p,t) = self.i.next()
|
||||
if t not in self.tokens:
|
||||
self.tokens.add(t)
|
||||
return t
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
completer = Completer(source_lang)
|
||||
readline.set_completer(completer.complete)
|
||||
readline.parse_and_bind("tab: complete")
|
||||
|
||||
while True:
|
||||
sys.stdout.write("> ")
|
||||
line = sys.stdin.readline();
|
||||
if line == '':
|
||||
try:
|
||||
line = raw_input("> ");
|
||||
except EOFError:
|
||||
sys.stdout.write("\n")
|
||||
break;
|
||||
readline.set_completer(None)
|
||||
break
|
||||
except KeyboardInterrupt:
|
||||
sys.stdout.write("\n")
|
||||
readline.set_completer(None)
|
||||
break
|
||||
|
||||
try:
|
||||
for (p,e) in gr.languages["ParseEng"].parse(line, n=5):
|
||||
for (p,e) in source_lang.parse(line, n=1):
|
||||
sys.stdout.write("["+str(p)+"] "+str(e)+"\n")
|
||||
print gr.languages["ParseEngBul"].linearize(e)
|
||||
print target_lang.linearize(e)
|
||||
except pgf.ParseError as e:
|
||||
print e.message
|
||||
|
||||
Reference in New Issue
Block a user