a major redesign in the C runtime. The parser and the linearizer now fully support BIND. The following things are still broken: parseval, word completion, handling 'pre', the robust mode

This commit is contained in:
kr.angelov
2013-10-09 12:08:51 +00:00
parent 0736a9a2ea
commit b058fdd17d
20 changed files with 1220 additions and 1763 deletions

View File

@@ -39,7 +39,6 @@ pgfinclude_HEADERS = \
pgf/reader.h \
pgf/linearizer.h \
pgf/parser.h \
pgf/lexer.h \
pgf/literals.h \
pgf/graphviz.h \
pgf/pgf.h
@@ -93,8 +92,6 @@ libpgf_la_SOURCES = \
pgf/parser.h \
pgf/jit.c \
pgf/parseval.c \
pgf/lexer.c \
pgf/lexer.h \
pgf/literals.c \
pgf/literals.h \
pgf/reader.h \

View File

@@ -91,7 +91,7 @@ gu_exn_clear(GuExn* err) {
GuType*
gu_exn_caught(GuExn* err);
inline const void*
static inline const void*
gu_exn_caught_data(GuExn* err)
{
return err->data.data;

View File

@@ -37,7 +37,7 @@ gu_utf8_decode(const uint8_t** utf8);
void
gu_in_utf8_buf(uint8_t** buf, GuIn* in, GuExn* err);
bool
static inline bool
gu_is_space(uint8_t c) {
return (c == '\t' || c == '\n' || c == '\v' ||
c == '\f' || c == '\r' || c == ' ');

View File

@@ -128,14 +128,16 @@ typedef struct {
extern GU_DECLARE_TYPE(PgfCncCat, abstract);
typedef GuString PgfToken;
typedef GuSeq PgfTokens;
bool
pgf_tokens_equal(PgfTokens* t1, PgfTokens* t2);
typedef GuSeq PgfSequence; // -> PgfSymbol
typedef GuSeq PgfSequences;
typedef GuSeq PgfSymbols;
typedef struct {
PgfSequence* form;
PgfSymbols* form;
/**< The form of this variant as a list of tokens. */
GuStrings* prefixes;
@@ -154,19 +156,8 @@ extern GU_DECLARE_TYPE(PgfCncFunOverloadMap, GuStringMap);
typedef GuMap PgfCncOverloadMap;
extern GU_DECLARE_TYPE(PgfCncOverloadMap, GuMap);
typedef GuMap PgfProductionIdx;
extern GU_DECLARE_TYPE(PgfProductionIdx, GuMap);
typedef GuMap PgfLeftcornerTokIdx;
extern GU_DECLARE_TYPE(PgfLeftcornerTokIdx, GuMap);
typedef struct PgfItem PgfItem;
typedef struct {
bool (*match)(PgfConcr* concr, PgfItem* item, PgfToken tok,
PgfExprProb** out_ep, GuPool *pool);
} PgfLiteralCallback;
typedef GuMap PgfCallbacksMap;
extern GU_DECLARE_TYPE(PgfCallbacksMap, GuMap);
@@ -197,7 +188,7 @@ typedef struct PgfSymbolKP
/** A prefix-dependent symbol. The form that this symbol takes
* depends on the form of a prefix of the following symbol. */
{
PgfSequence* default_form;
PgfSymbols* default_form;
/**< Default form that this symbol takes if none of of the
* variant forms is triggered. */
@@ -213,6 +204,21 @@ typedef struct {
typedef struct {
} PgfSymbolBIND;
typedef struct {
PgfExprProb* (*match)(PgfConcr* concr, PgfSymbol* psym, size_t lin_idx,
GuString sentence, size_t* poffset,
GuPool *pool, GuPool *out_pool);
} PgfLiteralCallback;
typedef GuBuf PgfProductionIdx;
typedef struct {
PgfSymbols* syms; // -> PgfSymbol
PgfProductionIdx* idx;
} PgfSequence;
typedef GuSeq PgfSequences;
typedef struct {
PgfAbsFun* absfun;
PgfExprProb *ep;
@@ -230,8 +236,6 @@ struct PgfConcr {
GuMap* ccats;
PgfCncFunOverloadMap* fun_indices;
PgfCncOverloadMap* coerce_idx;
PgfProductionIdx* epsilon_idx;
PgfLeftcornerTokIdx* leftcorner_tok_idx;
PgfCncFuns* cncfuns;
PgfSequences* sequences;
PgfCIdMap* cnccats;
@@ -274,7 +278,6 @@ typedef struct PgfProductionCoerce
} PgfProductionCoerce;
typedef struct {
PgfLiteralCallback *callback;
PgfExprProb *ep;
GuSeq* lins;
} PgfProductionExtern;
@@ -287,8 +290,11 @@ typedef struct {
typedef GuSeq PgfProductionSeq;
extern GU_DECLARE_TYPE(PgfProductionSeq, abstract);
typedef GuBuf PgfProductionBuf;
extern GU_DECLARE_TYPE(PgfProductionBuf, abstract);
typedef struct {
PgfCCat* ccat;
size_t lin_idx;
PgfProductionApply* papp;
} PgfProductionIdxEntry;
struct PgfCCat {
PgfCncCat* cnccat;

View File

@@ -1,128 +0,0 @@
#include <gu/utf8.h>
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <wctype.h>
typedef struct {
PgfLexer base;
GuIn* in;
GuPool* pool;
GuUCS ucs;
} PgfSimpleLexer;
static void
pgf_lexer_read_ucs(PgfSimpleLexer *lexer, GuExn* err)
{
lexer->ucs = gu_in_utf8(lexer->in, err);
if (gu_exn_is_raised(err)) {
gu_exn_clear(err);
lexer->ucs = ' ';
}
}
static PgfToken
pgf_simple_lexer_read_token(PgfLexer *base, GuExn* err)
{
PgfSimpleLexer* lexer = (PgfSimpleLexer*) base;
GuPool* tmp_pool = gu_new_pool();
GuStringBuf* buf = gu_string_buf(tmp_pool);
GuOut* out = gu_string_buf_out(buf);
while (iswspace(lexer->ucs)) {
lexer->ucs = gu_in_utf8(lexer->in, err);
if (gu_exn_is_raised(err))
goto stop;
}
if (iswalpha(lexer->ucs) ||
lexer->ucs == '\'' ||
lexer->ucs == '_') {
int counter = 0;
do {
gu_out_utf8(lexer->ucs, out, err);
if (gu_exn_is_raised(err))
goto stop;
counter++;
pgf_lexer_read_ucs(lexer, err);
if (lexer->ucs == '.' && counter < 4) {
// perhaps an abreviation
gu_out_utf8(lexer->ucs, out, err);
if (gu_exn_is_raised(err))
goto stop;
counter = 0;
pgf_lexer_read_ucs(lexer, err);
}
} while (iswalnum(lexer->ucs) ||
lexer->ucs == '\'' ||
lexer->ucs == '_');
} else if (iswdigit(lexer->ucs) || lexer->ucs == '-') {
if (lexer->ucs == '-') {
gu_out_utf8(lexer->ucs, out, err);
if (gu_exn_is_raised(err))
goto stop;
pgf_lexer_read_ucs(lexer, err);
if (!iswdigit(lexer->ucs))
goto stop;
}
do {
gu_out_utf8(lexer->ucs, out, err);
if (gu_exn_is_raised(err))
goto stop;
pgf_lexer_read_ucs(lexer, err);
} while (iswdigit(lexer->ucs));
if (lexer->ucs == '.') {
gu_out_utf8(lexer->ucs, out, err);
if (gu_exn_is_raised(err))
goto stop;
pgf_lexer_read_ucs(lexer, err);
while (iswdigit(lexer->ucs)) {
gu_out_utf8(lexer->ucs, out, err);
if (gu_exn_is_raised(err))
goto stop;
pgf_lexer_read_ucs(lexer, err);
}
}
} else {
gu_out_utf8(lexer->ucs, out, err);
if (gu_exn_is_raised(err))
goto stop;
pgf_lexer_read_ucs(lexer, err);
}
stop:
lexer->base.tok = gu_string_buf_freeze(buf, lexer->pool);
gu_pool_free(tmp_pool);
return lexer->base.tok;
}
PgfLexer*
pgf_new_simple_lexer(GuIn *in, GuPool *pool)
{
PgfSimpleLexer* lexer = gu_new(PgfSimpleLexer, pool);
lexer->base.read_token = pgf_simple_lexer_read_token;
lexer->base.tok = "";
lexer->in = in;
lexer->pool = pool;
lexer->ucs = ' ';
return ((PgfLexer*) lexer);
}
PgfToken
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err)
{
return lexer->read_token(lexer, err);
}
PgfToken
pgf_lexer_current_token(PgfLexer *lexer)
{
return lexer->tok;
}

View File

@@ -1,31 +0,0 @@
#ifndef PGF_LEXER_H_
#define PGF_LEXER_H_
#include <gu/in.h>
#include <pgf/expr.h>
/// A single lexical token
typedef GuString PgfToken;
typedef GuSeq PgfTokens; // -> PgfToken
typedef struct {
prob_t prob;
PgfCId cat;
PgfToken tok;
} PgfTokenProb;
typedef struct {
PgfToken (*read_token)();
PgfToken tok;
} PgfLexer;
PgfLexer*
pgf_new_simple_lexer(GuIn *in, GuPool *pool);
PgfToken
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err);
PgfToken
pgf_lexer_current_token(PgfLexer *lexer);
#endif // PGF_LEXER_H_

View File

@@ -455,14 +455,13 @@ pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuPool* pool)
}
void
pgf_lzr_linearize_sequence(PgfConcr* concr, PgfCncTreeApp* fapp,
PgfSequence* seq, uint16_t seq_idx,
PgfLinFuncs** fnsp)
pgf_lzr_linearize_symbols(PgfConcr* concr, PgfCncTreeApp* fapp,
PgfSymbols* syms, uint16_t sym_idx,
PgfLinFuncs** fnsp)
{
size_t nsyms = gu_seq_length(seq);
PgfSymbol* syms = gu_seq_data(seq);
for (size_t i = seq_idx; i < nsyms; i++) {
PgfSymbol sym = syms[i];
size_t nsyms = gu_seq_length(syms);
for (size_t i = sym_idx; i < nsyms; i++) {
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, i);
GuVariantInfo sym_i = gu_variant_open(sym);
switch (sym_i.tag) {
case PGF_SYMBOL_CAT:
@@ -488,7 +487,7 @@ pgf_lzr_linearize_sequence(PgfConcr* concr, PgfCncTreeApp* fapp,
case PGF_SYMBOL_KP: {
// TODO: correct prefix-dependencies
PgfSymbolKP* kp = sym_i.data;
pgf_lzr_linearize_sequence(concr, fapp, kp->default_form, 0, fnsp);
pgf_lzr_linearize_symbols(concr, fapp, kp->default_form, 0, fnsp);
break;
}
case PGF_SYMBOL_NE: {
@@ -528,9 +527,7 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs
}
gu_require(lin_idx < fun->n_lins);
PgfSequence* seq = fun->lins[lin_idx];
pgf_lzr_linearize_sequence(concr, fapp, seq, 0, fnsp);
pgf_lzr_linearize_symbols(concr, fapp, fun->lins[lin_idx]->syms, 0, fnsp);
if (fns->end_phrase) {
fns->end_phrase(fnsp,
@@ -681,7 +678,7 @@ pgf_lzr_linearize_simple(PgfConcr* concr, PgfCncTree ctree,
}
GuString
pgf_get_tokens(PgfSequence* seq, uint16_t seq_idx, GuPool* pool)
pgf_get_tokens(PgfSymbols* syms, uint16_t sym_idx, GuPool* pool)
{
GuPool* tmp_pool = gu_new_pool();
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
@@ -695,7 +692,7 @@ pgf_get_tokens(PgfSequence* seq, uint16_t seq_idx, GuPool* pool)
.err = err
};
pgf_lzr_linearize_sequence(NULL, NULL, seq, seq_idx, &flin.funcs);
pgf_lzr_linearize_symbols(NULL, NULL, syms, sym_idx, &flin.funcs);
GuString tokens = gu_ok(err) ? gu_string_buf_freeze(sbuf, pool)
: "";

View File

@@ -76,5 +76,5 @@ pgf_lzr_linearize_simple(PgfConcr* concr, PgfCncTree ctree,
#ifdef PGF_PARSER_H_
// Used internally in the parser
GuString
pgf_get_tokens(PgfSequence* seq, uint16_t seq_idx, GuPool* pool);
pgf_get_tokens(PgfSymbols* sym, uint16_t sym_idx, GuPool* pool);
#endif

View File

@@ -11,48 +11,41 @@ GU_DEFINE_TYPE(PgfCallbacksMap, GuMap,
gu_ptr_type(PgfLiteralCallback), &gu_null_struct);
static bool
pgf_match_string_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
PgfExprProb** out_ep, GuPool *pool)
static PgfExprProb*
pgf_match_string_lit(PgfConcr* concr, PgfSymbol* psym, size_t lin_idx,
GuString sentence, size_t* poffset,
GuPool *pool, GuPool *out_pool)
{
GuPool* tmp_pool = gu_new_pool();
size_t lin_idx;
PgfSequence* seq;
pgf_item_sequence(item, &lin_idx, &seq, tmp_pool);
gu_assert(lin_idx == 0);
bool accepted = false;
int n_syms = gu_seq_length(seq);
if (n_syms == 0) {
*out_ep = NULL;
accepted = true;
} else if (n_syms == 1) {
PgfExprProb* ep = gu_new(PgfExprProb, pool);
size_t offset = *poffset;
while (!gu_is_space(sentence[offset]))
offset++;
size_t len = offset - *poffset;
if (len > 0) {
PgfExprProb* ep = gu_new(PgfExprProb, out_pool);
ep->prob = 0;
PgfSymbolKS* sks =
gu_variant_data(gu_seq_get(seq, PgfSymbol, 0));
PgfExprLit *expr_lit =
gu_new_variant(PGF_EXPR_LIT,
PgfExprLit,
&ep->expr, pool);
&ep->expr, out_pool);
PgfLiteralStr *lit_str =
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, strlen(sks->token)+1,
&expr_lit->lit, pool);
strcpy(lit_str->val, sks->token);
val, len+1,
&expr_lit->lit, out_pool);
memcpy(lit_str->val, sentence+*poffset, len);
lit_str->val[len] = 0;
*out_ep = ep;
accepted = false;
pgf_add_extern_tok(psym, lit_str->val, pool);
*poffset = offset;
return ep;
} else {
*out_ep = NULL;
return NULL;
}
gu_pool_free(tmp_pool);
return accepted;
}
static PgfLiteralCallback pgf_string_literal_callback =
@@ -60,55 +53,46 @@ static PgfLiteralCallback pgf_string_literal_callback =
static bool
pgf_match_int_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
PgfExprProb** out_ep, GuPool *pool)
static PgfExprProb*
pgf_match_int_lit(PgfConcr* concr, PgfSymbol* psym, size_t lin_idx,
GuString sentence, size_t* poffset,
GuPool *pool, GuPool *out_pool)
{
GuPool* tmp_pool = gu_new_pool();
size_t lin_idx;
PgfSequence* seq;
pgf_item_sequence(item, &lin_idx, &seq, tmp_pool);
gu_assert(lin_idx == 0);
bool accepted = false;
int n_syms = gu_seq_length(seq);
if (n_syms == 0) {
int val;
size_t offset = *poffset;
while (!gu_is_space(sentence[offset]))
offset++;
*out_ep = NULL;
accepted = gu_string_to_int(tok, &val);
} else if (n_syms == 1) {
PgfSymbolKS* sks =
gu_variant_data(gu_seq_get(seq, PgfSymbol, 0));
size_t len = offset - *poffset;
if (len > 0) {
PgfToken tok = gu_malloc(pool, len+1);
memcpy((char*) tok, sentence+*poffset, len);
((char*) tok)[len] = 0;
int val;
if (!gu_string_to_int(sks->token, &val)) {
*out_ep = NULL;
} else {
PgfExprProb* ep = gu_new(PgfExprProb, pool);
ep->prob = 0;
if (!gu_string_to_int(tok, &val))
return NULL;
PgfExprLit *expr_lit =
gu_new_variant(PGF_EXPR_LIT,
PgfExprLit,
&ep->expr, pool);
PgfLiteralInt *lit_int =
gu_new_variant(PGF_LITERAL_INT,
PgfLiteralInt,
&expr_lit->lit, pool);
lit_int->val = val;
PgfExprProb* ep = gu_new(PgfExprProb, pool);
ep->prob = 0;
*out_ep = ep;
}
accepted = false;
PgfExprLit *expr_lit =
gu_new_variant(PGF_EXPR_LIT,
PgfExprLit,
&ep->expr, pool);
PgfLiteralInt *lit_int =
gu_new_variant(PGF_LITERAL_INT,
PgfLiteralInt,
&expr_lit->lit, pool);
lit_int->val = val;
pgf_add_extern_tok(psym, tok, pool);
*poffset = offset;
return ep;
} else {
*out_ep = NULL;
return NULL;
}
gu_pool_free(tmp_pool);
return accepted;
}
static PgfLiteralCallback pgf_int_literal_callback =
@@ -116,55 +100,46 @@ static PgfLiteralCallback pgf_int_literal_callback =
static bool
pgf_match_float_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
PgfExprProb** out_ep, GuPool *pool)
static PgfExprProb*
pgf_match_float_lit(PgfConcr* concr, PgfSymbol* psym, size_t lin_idx,
GuString sentence, size_t* poffset,
GuPool *pool, GuPool *out_pool)
{
GuPool* tmp_pool = gu_new_pool();
size_t lin_idx;
PgfSequence* seq;
pgf_item_sequence(item, &lin_idx, &seq, tmp_pool);
gu_assert(lin_idx == 0);
bool accepted = false;
int n_syms = gu_seq_length(seq);
if (n_syms == 0) {
double val;
size_t offset = *poffset;
while (!gu_is_space(sentence[offset]))
offset++;
*out_ep = NULL;
accepted = gu_string_to_double(tok, &val);
} else if (n_syms == 1) {
PgfSymbolKS* sks =
gu_variant_data(gu_seq_get(seq, PgfSymbol, 0));
size_t len = offset - *poffset;
if (len > 0) {
PgfToken tok = gu_malloc(pool, len+1);
memcpy((char*) tok, sentence+*poffset, len);
((char*) tok)[len] = 0;
double val;
if (!gu_string_to_double(sks->token, &val)) {
*out_ep = NULL;
} else {
PgfExprProb* ep = gu_new(PgfExprProb, pool);
ep->prob = 0;
if (!gu_string_to_double(tok, &val))
return NULL;
PgfExprLit *expr_lit =
gu_new_variant(PGF_EXPR_LIT,
PgfExprLit,
&ep->expr, pool);
PgfLiteralFlt *lit_flt =
gu_new_variant(PGF_LITERAL_FLT,
PgfLiteralFlt,
&expr_lit->lit, pool);
lit_flt->val = val;
PgfExprProb* ep = gu_new(PgfExprProb, pool);
ep->prob = 0;
*out_ep = ep;
}
accepted = false;
PgfExprLit *expr_lit =
gu_new_variant(PGF_EXPR_LIT,
PgfExprLit,
&ep->expr, pool);
PgfLiteralFlt *lit_flt =
gu_new_variant(PGF_LITERAL_FLT,
PgfLiteralFlt,
&expr_lit->lit, pool);
lit_flt->val = val;
pgf_add_extern_tok(psym, tok, pool);
*poffset = offset;
return ep;
} else {
*out_ep = NULL;
return NULL;
}
gu_pool_free(tmp_pool);
return accepted;
}
static PgfLiteralCallback pgf_float_literal_callback =
@@ -172,45 +147,49 @@ static PgfLiteralCallback pgf_float_literal_callback =
static bool
pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
PgfExprProb** out_ep, GuPool *pool)
static PgfExprProb*
pgf_match_name_lit(PgfConcr* concr, PgfSymbol* psym, size_t lin_idx,
GuString sentence, size_t* poffset,
GuPool *pool, GuPool *out_pool)
{
GuPool* tmp_pool = gu_new_pool();
size_t lin_idx;
PgfSequence* seq;
pgf_item_sequence(item, &lin_idx, &seq, tmp_pool);
gu_assert(lin_idx == 0);
GuPool* tmp_pool = gu_new_pool();
GuStringBuf *sbuf = gu_string_buf(tmp_pool);
GuOut* out = gu_string_buf_out(sbuf);
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
bool iscap = false;
if (strcmp(tok, "-") == 0) {
iscap = true;
} else if (*tok) {
GuIn* in = gu_string_in(tok, tmp_pool);
iscap = iswupper(gu_in_utf8(in, err));
}
size_t n_syms = gu_seq_length(seq);
if (!iscap && n_syms > 0) {
GuStringBuf *sbuf = gu_string_buf(tmp_pool);
GuOut* out = gu_string_buf_out(sbuf);
size_t offset = *poffset;
for (size_t i = 0; i < n_syms; i++) {
if (i > 0)
gu_putc(' ', out, err);
PgfSymbol sym = gu_seq_get(seq, PgfSymbol, i);
gu_assert(gu_variant_tag(sym) == PGF_SYMBOL_KS);
PgfSymbolKS* sks = gu_variant_data(sym);
gu_string_write(sks->token, out, err);
int i = 0;
while (iswupper(sentence[offset])) {
size_t len = 0;
while (!gu_is_space(sentence[offset+len])) {
len++;
}
PgfExprProb* ep = gu_new(PgfExprProb, pool);
PgfToken tok = gu_malloc(pool, len+1);
memcpy((char*) tok, sentence+offset, len);
((char*) tok)[len] = 0;
pgf_add_extern_tok(psym, tok, pool);
if (i > 0)
gu_putc(' ', out, err);
gu_string_write(tok, out, err);
i++;
offset += len;
*poffset = offset;
while (gu_is_space(sentence[offset]))
offset++;
}
PgfExprProb* ep = NULL;
if (i > 0) {
ep = gu_new(PgfExprProb, pool);
ep->prob = 0;
PgfExprApp *expr_app =
@@ -235,14 +214,11 @@ pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
val, strlen(val)+1,
&expr_lit->lit, pool);
strcpy(lit_str->val, val);
*out_ep = ep;
} else {
*out_ep = NULL;
}
gu_pool_free(tmp_pool);
return iscap;
return ep;
}
PgfLiteralCallback pgf_nerc_literal_callback =

File diff suppressed because it is too large Load Diff

View File

@@ -5,119 +5,14 @@
#include <pgf/data.h>
#include <pgf/expr.h>
/// Parsing
/** @file
*
* @todo Querying the parser for expected continuations
*
* @todo Literals and custom categories
*
* @todo HOAS, dependent types...
*/
void
pgf_add_extern_tok(PgfSymbol* psym, PgfToken tok, GuPool* pool);
typedef struct PgfParseState PgfParseState;
/** @}
*
* @name Parsing a sentence
*
* The progress of parsing is controlled by the client code. Firstly, the
* parsing of a sentence is initiated with #pgf_parser_parse. This returns an
* initial #PgfParse object, which represents the state of the parsing. A new
* parse state is obtained by feeding a token with #pgf_parse_token. The old
* parse state is unaffected by this, so backtracking - and even branching -
* can be accomplished by retaining the earlier #PgfParse objects.
*
* @{
*/
/// Begin parsing
PgfParseState*
pgf_parser_init_state(PgfConcr* concr, PgfCId cat, size_t lin_idx,
double heuristics,
GuPool* pool, GuPool* out_pool);
/**<
* @param parser The parser to use
*
* @param cat The identifier of the abstract category to parse
*
* @param lin_idx The index of the field of the concrete category to parse
*
* @pool
*
* @return An initial parsing state.
*/
/// Feed a token to the parser
PgfParseState*
pgf_parser_next_state(PgfParseState* prev, PgfToken tok);
/**<
* @param parse The current parse state
*
* @param tok The token to feed
*
* @pool
*
* @return A new parse state obtained by feeding \p tok as an input to \p
* parse, or \c NULL if the token was unexpected.
*
* @note The new parse state partially depends on the old one, so it doesn't
* make sense to use a \p pool argument with a longer lifetime than that of
* the pool used to create \parse.
*/
GuEnum*
pgf_parser_completions(PgfParseState* prev, GuString prefix);
void
pgf_add_extern_cat(PgfSymbol* psym, int d, int r, GuPool* pool);
void
pgf_parser_add_literal(PgfConcr *concr, PgfCId cat,
PgfLiteralCallback* callback);
/** @}
* @name Retrieving abstract syntax trees
*
* After the desired tokens have been fed to the parser, the resulting parse
* state can be queried for completed results. The #pgf_parse_result function
* returns an enumeration (#GuEnum) of possible abstract syntax trees whose
* linearization is the sequence of tokens fed so far.
*
* @{
*/
/// Retrieve the current parses from the parse state.
PgfExprEnum*
pgf_parse_result(PgfParseState* state);
/**<
* @param parse A parse state
*
* @pool
*
* @return An enumeration of #PgfExpr elements representing the abstract
* syntax trees that would linearize to the sequence of tokens fed to produce
* \p parse. The enumeration may yield zero, one or more abstract syntax
* trees, depending on whether the parse was unsuccesful, unambiguously
* succesful, or ambiguously successful.
*/
// Use this procedure only on your own risk.
// It is dirty and it will probably be removed or replaced
// with something else. Currently it is here only for experimental
// purposes.
void
pgf_parse_print_chunks(PgfParseState* state);
size_t
pgf_item_lin_idx(PgfItem* item);
void
pgf_item_sequence(PgfItem* item,
size_t* lin_idx, PgfSequence** seq,
GuPool* pool);
int
pgf_item_sequence_length(PgfItem* item);
/** @} */
#endif // PGF_PARSER_H_

View File

@@ -1,8 +1,9 @@
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <pgf/linearizer.h>
#include <pgf/parser.h>
typedef struct {
/*typedef struct {
int start, end;
PgfCId cat;
int lin_idx;
@@ -123,24 +124,50 @@ pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_id
}
static PgfLinFuncs pgf_metrics_lin_funcs1 = {
v v v v v v v
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
.expr_literal = pgf_metrics_lzn_expr_literal,
.begin_phrase = pgf_metrics_lzn_begin_phrase,
.end_phrase = pgf_metrics_lzn_end_phrase1
=============
.symbol_token = pgf_metrics_lzn_symbol_token,
.expr_literal = pgf_metrics_lzn_expr_literal,
.begin_phrase = pgf_metrics_lzn_begin_phrase,
.end_phrase = pgf_metrics_lzn_end_phrase1
*************
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
.expr_literal = pgf_metrics_lzn_expr_literal,
.begin_phrase = pgf_metrics_lzn_begin_phrase,
.end_phrase = pgf_metrics_lzn_end_phrase1,
.symbol_glue = NULL
^ ^ ^ ^ ^ ^ ^
};
static PgfLinFuncs pgf_metrics_lin_funcs2 = {
v v v v v v v
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
.expr_literal = pgf_metrics_lzn_expr_literal,
.begin_phrase = pgf_metrics_lzn_begin_phrase,
.end_phrase = pgf_metrics_lzn_end_phrase2
=============
.symbol_token = pgf_metrics_lzn_symbol_token,
.expr_literal = pgf_metrics_lzn_expr_literal,
.begin_phrase = pgf_metrics_lzn_begin_phrase,
.end_phrase = pgf_metrics_lzn_end_phrase2
*************
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
.expr_literal = pgf_metrics_lzn_expr_literal,
.begin_phrase = pgf_metrics_lzn_begin_phrase,
.end_phrase = pgf_metrics_lzn_end_phrase2,
.symbol_glue = NULL
^ ^ ^ ^ ^ ^ ^
};
*/
bool
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
double *precision, double *recall, double *exact)
{
GuPool* pool = gu_new_pool();
/* GuPool* pool = gu_new_pool();
GuEnum* en_lins1 =
pgf_lzr_concretize(concr, expr, pool);
@@ -190,6 +217,6 @@ pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
*precision = ((double) state.matches)/((double) state.found);
*recall = ((double) state.matches)/((double) gu_buf_length(state.phrases));
*exact = pgf_expr_eq(expr, ep->expr) ? 1 : 0;
*/
return true;
}

View File

@@ -4,7 +4,6 @@
#include <pgf/reader.h>
#include <pgf/linearizer.h>
#include <pgf/parser.h>
#include <pgf/lexer.h>
#include <gu/file.h>
#include <gu/string.h>
#include <gu/enum.h>
@@ -61,7 +60,8 @@ pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath,
PgfAbsCat* abscat1 =
gu_map_get(pgf->abstract.cats, cat1, PgfAbsCat*);
if (abscat1 == NULL) {
gu_raise(err, PgfExn);
GuExnData* exn = gu_raise(err, PgfExn);
exn->data = "Unknown category name";
goto close;
}
@@ -73,6 +73,8 @@ pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath,
PgfAbsCat* abscat2 = gu_map_get(pgf->abstract.cats, cat2, PgfAbsCat*);
if (abscat2 == NULL) {
gu_raise(err, PgfExn);
GuExnData* exn = gu_raise(err, PgfExn);
exn->data = "Unknown category name";
goto close;
}
@@ -228,73 +230,3 @@ pgf_linearize(PgfConcr* concr, PgfExpr expr, GuOut* out, GuExn* err)
gu_pool_free(tmp_pool);
}
GuEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
GuPool* pool, GuPool* out_pool)
{
return pgf_parse_with_heuristics(concr, cat, lexer, -1.0, pool, out_pool);
}
GuEnum*
pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
double heuristics,
GuPool* pool, GuPool* out_pool)
{
// Begin parsing a sentence of the specified category
PgfParseState* state =
pgf_parser_init_state(concr, cat, 0, heuristics, pool, out_pool);
if (state == NULL) {
return NULL;
}
// Tokenization
GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool);
PgfToken tok = pgf_lexer_read_token(lexer, lex_err);
while (!gu_exn_is_raised(lex_err)) {
// feed the token to get a new parse state
state = pgf_parser_next_state(state, tok);
if (state == NULL) {
return NULL;
}
tok = pgf_lexer_read_token(lexer, lex_err);
}
if (gu_exn_caught(lex_err) != gu_type(GuEOF))
return NULL;
// Now begin enumerating the resulting syntax trees
return pgf_parse_result(state);
}
GuEnum*
pgf_complete(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
GuString prefix, GuPool* pool)
{
// Begin parsing a sentence of the specified category
PgfParseState* state =
pgf_parser_init_state(concr, cat, 0, -1, pool, pool);
if (state == NULL) {
return NULL;
}
// Tokenization
GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool);
PgfToken tok = pgf_lexer_read_token(lexer, lex_err);
while (!gu_exn_is_raised(lex_err)) {
// feed the token to get a new parse state
state = pgf_parser_next_state(state, tok);
if (state == NULL) {
return NULL;
}
tok = pgf_lexer_read_token(lexer, lex_err);
}
if (gu_exn_caught(lex_err) != gu_type(GuEOF))
return NULL;
// Now begin enumerating the resulting syntax trees
return pgf_parser_completions(state, prefix);
}

View File

@@ -37,6 +37,7 @@ extern GU_DECLARE_TYPE(PgfCId, typedef);
extern GU_DECLARE_TYPE(PgfExn, abstract);
extern GU_DECLARE_TYPE(PgfParseError, abstract);
/// @name PGF Grammar objects
/// @{
@@ -50,7 +51,6 @@ typedef struct PgfConcr PgfConcr;
*/
#include <pgf/expr.h>
#include <pgf/lexer.h>
#include <pgf/graphviz.h>
/// An enumeration of #PgfExpr elements.
@@ -120,8 +120,16 @@ pgf_print_name(PgfConcr*, PgfCId id);
void
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuOut* out, GuExn* err);
bool
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
double *precision, double *recall, double *exact);
PgfExprEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
pgf_generate_all(PgfPGF* pgf, PgfCId cat, GuPool* pool);
PgfExprEnum*
pgf_parse(PgfConcr* concr, PgfCId cat, GuString sentence,
GuExn* err,
GuPool* pool, GuPool* out_pool);
typedef struct PgfMorphoCallback PgfMorphoCallback;
@@ -132,10 +140,10 @@ struct PgfMorphoCallback {
};
void
pgf_lookup_morpho(PgfConcr *concr, PgfLexer *lexer,
pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
PgfMorphoCallback* callback, GuExn* err);
typedef GuMapKeyValue PgfFullFormEntry;
typedef struct PgfFullFormEntry PgfFullFormEntry;
GuEnum*
pgf_fullform_lexicon(PgfConcr *concr, GuPool* pool);
@@ -148,20 +156,14 @@ pgf_fullform_get_analyses(PgfFullFormEntry* entry,
PgfMorphoCallback* callback, GuExn* err);
PgfExprEnum*
pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
double heuristics,
pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat,
GuString sentence, double heuristics,
GuExn* err,
GuPool* pool, GuPool* out_pool);
GuEnum*
pgf_complete(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
GuString prefix, GuPool* pool);
bool
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
double *precision, double *recall, double *exact);
PgfExprEnum*
pgf_generate_all(PgfPGF* pgf, PgfCId cat, GuPool* pool);
pgf_complete(PgfConcr* concr, PgfCId cat, GuString string,
GuString prefix, GuExn* err, GuPool* pool);
/// @}

View File

@@ -169,21 +169,14 @@ pgf_print_cncfun(PgfCncFun *cncfun, PgfSequences* sequences,
GuOut *out, GuExn *err)
{
gu_printf(out,err," F%d := (", cncfun->funid);
size_t n_seqs = gu_seq_length(sequences);
for (size_t i = 0; i < cncfun->n_lins; i++) {
if (i > 0) gu_putc(',', out, err);
PgfSequence* seq = cncfun->lins[i];
for (size_t seqid = 0; seqid < n_seqs; seqid++) {
if (gu_seq_data(gu_seq_get(sequences, PgfSequence*, seqid)) == gu_seq_data(seq)) {
gu_printf(out,err,"S%d", seqid);
break;
}
}
PgfSequence* seq = cncfun->lins[i];
gu_printf(out,err,"S%d", (seq - ((PgfSequence*) gu_seq_data(sequences))));
}
gu_puts(")", out, err);
if (cncfun->absfun != NULL) {
@@ -204,7 +197,7 @@ pgf_print_token(PgfToken tok, GuOut *out, GuExn *err)
}
static void
pgf_print_sequence(PgfSequence* seq, GuOut *out, GuExn *err);
pgf_print_symbols(PgfSymbols* syms, GuOut *out, GuExn *err);
void
pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err)
@@ -224,11 +217,11 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err)
PgfSymbolKP* skp = gu_variant_data(sym);
gu_puts("pre {", out, err);
pgf_print_sequence(skp->default_form, out, err);
pgf_print_symbols(skp->default_form, out, err);
for (size_t i = 0; i < skp->n_forms; i++) {
gu_puts("; ", out, err);
pgf_print_sequence(skp->forms[i].form, out, err);
pgf_print_symbols(skp->forms[i].form, out, err);
gu_puts(" / ", out, err);
size_t n_prefixes = gu_seq_length(skp->forms[i].prefixes);
@@ -269,13 +262,13 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err)
}
static void
pgf_print_sequence(PgfSequence* seq, GuOut *out, GuExn *err)
pgf_print_symbols(PgfSymbols* syms, GuOut *out, GuExn *err)
{
int n_syms = gu_seq_length(seq);
int n_syms = gu_seq_length(syms);
for (int i = 0; i < n_syms; i++) {
if (i > 0) gu_putc(' ', out, err);
PgfSymbol sym = gu_seq_get(seq, PgfSymbol, i);
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, i);
pgf_print_symbol(sym, out, err);
}
}
@@ -338,10 +331,9 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr,
gu_puts(" sequences\n", out, err);
size_t n_seqs = gu_seq_length(concr->sequences);
for (size_t i = 0; i < n_seqs; i++) {
PgfSequence* seq = gu_seq_get(concr->sequences, PgfSequence*, i);
gu_printf(out,err," S%d := ", i);
pgf_print_sequence(seq, out, err);
PgfSymbols* syms = gu_seq_index(concr->sequences, PgfSequence, i)->syms;
pgf_print_symbols(syms, out, err);
gu_putc('\n', out, err);
}

View File

@@ -606,13 +606,13 @@ pgf_read_printnames(PgfReader* rdr)
return printnames;
}
static PgfSequence*
pgf_read_sequence(PgfReader* rdr);
static PgfSymbols*
pgf_read_symbols(PgfReader* rdr);
static void
pgf_read_alternative(PgfReader* rdr, PgfAlternative* alt)
{
alt->form = pgf_read_sequence(rdr);
alt->form = pgf_read_symbols(rdr);
gu_return_on_exn(rdr->err,);
size_t n_prefixes = pgf_read_len(rdr);
@@ -692,7 +692,7 @@ pgf_read_symbol(PgfReader* rdr)
break;
}
case PGF_SYMBOL_KP: {
PgfSequence* default_form = pgf_read_sequence(rdr);
PgfSymbols* default_form = pgf_read_symbols(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
size_t n_forms = pgf_read_len(rdr);
@@ -732,21 +732,21 @@ pgf_read_symbol(PgfReader* rdr)
return sym;
}
static PgfSequence*
pgf_read_sequence(PgfReader* rdr)
static PgfSymbols*
pgf_read_symbols(PgfReader* rdr)
{
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
PgfSequence* seq = gu_new_seq(PgfSymbol, len, rdr->opool);
PgfSymbols* syms = gu_new_seq(PgfSymbol, len, rdr->opool);
for (size_t i = 0; i < len; i++) {
PgfSymbol sym = pgf_read_symbol(rdr);
gu_return_on_exn(rdr->err, NULL);
gu_seq_set(seq, PgfSymbol, i, sym);
gu_seq_set(syms, PgfSymbol, i, sym);
}
return seq;
return syms;
}
static PgfSequences*
@@ -755,12 +755,14 @@ pgf_read_sequences(PgfReader* rdr)
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
PgfSequences* seqs = gu_new_seq(PgfSequence*, len, rdr->opool);
PgfSequences* seqs = gu_new_seq(PgfSequence, len, rdr->opool);
for (size_t i = 0; i < len; i++) {
PgfSequence* seq = pgf_read_sequence(rdr);
PgfSymbols* syms = pgf_read_symbols(rdr);
gu_return_on_exn(rdr->err, NULL);
gu_seq_set(seqs, PgfSequence*, i, seq);
gu_seq_index(seqs, PgfSequence, i)->syms = syms;
gu_seq_index(seqs, PgfSequence, i)->idx = NULL;
}
return seqs;
@@ -793,7 +795,7 @@ pgf_read_cncfun(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, int funid)
return NULL;
}
cncfun->lins[i] = gu_seq_get(concr->sequences, PgfSequence*, seqid);
cncfun->lins[i] = gu_seq_index(concr->sequences, PgfSequence, seqid);
}
return cncfun;
@@ -1149,8 +1151,6 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr)
gu_new_int_map(PgfCCat*, &gu_null_struct, rdr->opool);
concr->fun_indices = gu_map_type_new(PgfCncFunOverloadMap, rdr->opool);
concr->coerce_idx = gu_map_type_new(PgfCncOverloadMap, rdr->opool);
concr->epsilon_idx = gu_map_type_new(PgfProductionIdx, rdr->opool);
concr->leftcorner_tok_idx = gu_map_type_new(PgfLeftcornerTokIdx,rdr->opool);
pgf_read_lindefs(rdr, concr);
pgf_read_ccats(rdr, concr);
concr->cnccats = pgf_read_cnccats(rdr, abstr, concr);
@@ -1200,7 +1200,7 @@ pgf_read_pgf(PgfReader* rdr) {
pgf_read_abstract(rdr, &pgf->abstract);
gu_return_on_exn(rdr->err, NULL);
pgf->concretes = pgf_read_concretes(rdr, &pgf->abstract);
gu_return_on_exn(rdr->err, NULL);

View File

@@ -110,12 +110,11 @@ int main(int argc, char* argv[]) {
clock_t start = clock();
GuIn *in = gu_string_in(line, ppool);
PgfLexer *lexer = pgf_new_simple_lexer(in, ppool);
GuEnum* result = pgf_parse_with_heuristics(concr, cat, lexer, heuristics, ppool, ppool);
GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), ppool);
GuEnum* result = pgf_parse_with_heuristics(concr, cat, line, heuristics, parse_err, ppool, ppool);
PgfExprProb* ep = NULL;
if (result != NULL)
if (gu_ok(parse_err))
ep = gu_next(result, PgfExprProb*, ppool);
clock_t end = clock();

View File

@@ -2,6 +2,7 @@
#include <gu/map.h>
#include <gu/enum.h>
#include <gu/file.h>
#include <gu/exn.h>
#include <pgf/pgf.h>
#include <pgf/parser.h>
#include <pgf/literals.h>
@@ -153,23 +154,19 @@ int main(int argc, char* argv[]) {
// sentence, so our memory usage doesn't increase over time.
ppool = gu_new_pool();
GuIn *in =
gu_string_in(line, ppool);
PgfLexer *lexer =
pgf_new_simple_lexer(in, ppool);
clock_t start = clock();
GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), ppool);
result =
pgf_parse(from_concr, cat, lexer, ppool, ppool);
if (result == NULL) {
PgfToken tok =
pgf_lexer_current_token(lexer);
if (*tok == 0)
gu_puts("Couldn't begin parsing", out, err);
else {
pgf_parse(from_concr, cat, line, parse_err, ppool, ppool);
if (!gu_ok(parse_err)) {
if (gu_exn_caught(parse_err) == gu_type(PgfExn)) {
GuString msg = gu_exn_caught_data(parse_err);
gu_string_write(msg, out, err);
gu_putc('\n', out, err);
} else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) {
gu_puts("Unexpected token: \"", out, err);
GuString tok = gu_exn_caught_data(parse_err);
gu_string_write(tok, out, err);
gu_puts("\"\n", out, err);
}

View File

@@ -1,6 +1,5 @@
#include <pgf/pgf.h>
#include <pgf/reader.h>
#include <pgf/lexer.h>
#include <gu/mem.h>
#include <gu/exn.h>
#include <gu/utf8.h>
@@ -19,7 +18,7 @@ gu2j_string(JNIEnv *env, GuString s) {
jchar* dst = utf16;
while (s-utf8 < len) {
GuUCS ucs = gu_utf8_decode((const uint8_t**) &s);
if (ucs <= 0xFFFF) {
*dst++ = ucs;
} else {
@@ -281,21 +280,21 @@ Java_org_grammaticalframework_pgf_Parser_parse
GuString startCat = j2gu_string(env, jstartCat, pool);
GuString s = j2gu_string(env, js, pool);
GuIn* in = gu_string_in(s, pool);
PgfLexer *lexer = pgf_new_simple_lexer(in, pool);
GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), pool);
GuEnum* res =
pgf_parse(get_ref(env, concr), startCat, lexer, pool, out_pool);
pgf_parse(get_ref(env, concr), startCat, s, parse_err, pool, out_pool);
if (res == NULL) {
PgfToken tok =
pgf_lexer_current_token(lexer);
if (*tok == 0)
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", "The sentence cannot be parsed");
else
throw_jstring_exception(env, "org/grammaticalframework/pgf/ParseError", gu2j_string(env, tok));
if (!gu_ok(parse_err)) {
if (gu_exn_caught(parse_err) == gu_type(PgfExn)) {
GuString msg = (GuString) gu_exn_caught_data(parse_err);
jstring jmsg = gu2j_string(env, msg);
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", jmsg);
} else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) {
GuString tok = (GuString) gu_exn_caught_data(parse_err);
jstring jtok = gu2j_string(env, tok);
throw_jstring_exception(env, "org/grammaticalframework/pgf/ParseError", jtok);
}
gu_pool_free(pool);
gu_pool_free(out_pool);

View File

@@ -1046,48 +1046,6 @@ Concr_printName(ConcrObject* self, PyObject *args)
return PyString_FromString(pgf_print_name(self->concr, name));
}
typedef struct {
PgfLexer base;
PyObject* pylexer;
GuPool* pool;
} PgfPythonLexer;
GU_DEFINE_TYPE(PyPgfLexerExn, abstract, _);
static PgfToken
pypgf_python_lexer_read_token(PgfLexer *base, GuExn* err)
{
PgfPythonLexer* lexer = (PgfPythonLexer*) base;
lexer->base.tok = "";
PyObject* item = PyIter_Next(lexer->pylexer);
if (item == NULL)
if (PyErr_Occurred() != NULL)
gu_raise(err, PyPgfLexerExn);
else
gu_raise(err, GuEOF);
else {
const char* str = PyString_AsString(item);
if (str == NULL)
gu_raise(err, PyPgfLexerExn);
else
lexer->base.tok = gu_string_copy(str, lexer->pool);
}
return lexer->base.tok;
}
static PgfLexer*
pypgf_new_python_lexer(PyObject* pylexer, GuPool* pool)
{
PgfPythonLexer* lexer = gu_new(PgfPythonLexer, pool);
lexer->base.read_token = pypgf_python_lexer_read_token;
lexer->base.tok = "";
lexer->pylexer = pylexer;
lexer->pool = pool;
return ((PgfLexer*) lexer);
}
#if ( (PY_VERSION_HEX < 0x02070000) \
|| ((PY_VERSION_HEX >= 0x03000000) \
&& (PY_VERSION_HEX < 0x03010000)) )
@@ -1114,35 +1072,19 @@ void pypgf_container_descructor(PyObject *capsule)
static IterObject*
Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
{
static char *kwlist[] = {"sentence", "tokens", "cat", "n", "heuristics", NULL};
static char *kwlist[] = {"sentence", "cat", "n", "heuristics", NULL};
int len;
const uint8_t *buf = NULL;
PyObject* py_lexer = NULL;
const char *sentence = NULL;
PgfCId catname = pgf_start_cat(self->grammar->pgf);
int max_count = -1;
double heuristics = -1;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#Osid", kwlist,
&buf, &len, &py_lexer, &catname, &max_count, &heuristics))
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|sid", kwlist,
&sentence, &catname, &max_count, &heuristics))
return NULL;
if ((buf == NULL && py_lexer == NULL) ||
(buf != NULL && py_lexer != NULL)) {
PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
return NULL;
}
if (py_lexer != NULL) {
// get an iterator out of the iterable object
py_lexer = PyObject_GetIter(py_lexer);
if (py_lexer == NULL)
return NULL;
}
IterObject* pyres = (IterObject*)
pgf_IterType.tp_alloc(&pgf_IterType, 0);
if (pyres == NULL) {
Py_XDECREF(py_lexer);
return NULL;
}
@@ -1160,30 +1102,22 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
pyres->counter = 0;
pyres->fetch = Iter_fetch_expr;
PgfLexer *lexer = NULL;
if (buf != NULL) {
GuIn* in = gu_data_in(buf, len, pyres->pool);
lexer = pgf_new_simple_lexer(in, pyres->pool);
}
if (py_lexer != NULL) {
lexer = pypgf_new_python_lexer(py_lexer, pyres->pool);
}
GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), pyres->pool);
pyres->res =
pgf_parse_with_heuristics(self->concr, catname, lexer,
heuristics, pyres->pool, out_pool);
pgf_parse_with_heuristics(self->concr, catname, sentence,
heuristics, parse_err,
pyres->pool, out_pool);
if (pyres->res == NULL) {
PgfToken tok =
pgf_lexer_current_token(lexer);
if (*tok == 0)
PyErr_SetString(PGFError, "The sentence cannot be parsed");
else {
if (!gu_ok(parse_err)) {
if (gu_exn_caught(parse_err) == gu_type(PgfExn)) {
GuString msg = (GuString) gu_exn_caught_data(parse_err);
PyErr_SetString(PGFError, msg);
} else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) {
GuString tok = (GuString) gu_exn_caught_data(parse_err);
PyObject* py_tok = PyString_FromString(tok);
PyObject_SetAttrString(ParseError, "token", py_tok);
PyErr_Format(ParseError, "Unexpected token: \"%s\"",
PyString_AsString(py_tok));
PyErr_Format(ParseError, "Unexpected token: \"%s\"", tok);
Py_DECREF(py_tok);
}
@@ -1191,45 +1125,26 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
pyres = NULL;
}
Py_XDECREF(py_lexer);
return pyres;
}
static IterObject*
Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
{
static char *kwlist[] = {"sentence", "tokens", "cat",
"prefix", "n", NULL};
static char *kwlist[] = {"sentence", "cat", "prefix", "n", NULL};
int len;
const uint8_t *buf = NULL;
PyObject* py_lexer = NULL;
const char *sentence = NULL;
GuString catname = pgf_start_cat(self->grammar->pgf);
GuString prefix = "";
int max_count = -1;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#Ossi", kwlist,
&buf, &len, &py_lexer, &catname,
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|ssi", kwlist,
&sentence, &catname,
&prefix, &max_count))
return NULL;
if ((buf == NULL && py_lexer == NULL) ||
(buf != NULL && py_lexer != NULL)) {
PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
return NULL;
}
if (py_lexer != NULL) {
// get an iterator out of the iterable object
py_lexer = PyObject_GetIter(py_lexer);
if (py_lexer == NULL)
return NULL;
}
IterObject* pyres = (IterObject*)
pgf_IterType.tp_alloc(&pgf_IterType, 0);
if (pyres == NULL) {
Py_XDECREF(py_lexer);
return NULL;
}
@@ -1245,37 +1160,27 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
GuPool *tmp_pool = gu_local_pool();
PgfLexer *lexer = NULL;
if (buf != NULL) {
GuIn* in = gu_data_in(buf, len, tmp_pool);
lexer = pgf_new_simple_lexer(in, tmp_pool);
}
if (py_lexer != NULL) {
lexer = pypgf_new_python_lexer(py_lexer, tmp_pool);
}
GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
pyres->res =
pgf_complete(self->concr, catname, lexer, prefix, pyres->pool);
pgf_complete(self->concr, catname, sentence, prefix, parse_err, pyres->pool);
if (pyres->res == NULL) {
if (!gu_ok(parse_err)) {
Py_DECREF(pyres);
pyres = NULL;
PgfToken tok =
pgf_lexer_current_token(lexer);
if (*tok == 0)
PyErr_SetString(PGFError, "The sentence cannot be parsed");
else {
if (gu_exn_caught(parse_err) == gu_type(PgfExn)) {
GuString msg = (GuString) gu_exn_caught_data(parse_err);
PyErr_SetString(PGFError, msg);
} else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) {
GuString tok = (GuString) gu_exn_caught_data(parse_err);
PyObject* py_tok = PyString_FromString(tok);
PyObject_SetAttrString(ParseError, "token", py_tok);
PyErr_Format(ParseError, "Unexpected token: \"%s\"",
PyString_AsString(py_tok));
PyErr_Format(ParseError, "Unexpected token: \"%s\"", tok);
Py_DECREF(py_tok);
}
}
Py_XDECREF(py_lexer);
gu_pool_free(tmp_pool);
return pyres;
@@ -1671,56 +1576,21 @@ pypgf_collect_morpho(PgfMorphoCallback* self,
}
static PyObject*
Concr_lookupMorpho(ConcrObject* self, PyObject *args, PyObject *keywds) {
static char *kwlist[] = {"sentence", "tokens", NULL};
int len;
const uint8_t *buf = NULL;
PyObject* py_lexer = NULL;
if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#O", kwlist,
&buf, &len, &py_lexer))
Concr_lookupMorpho(ConcrObject* self, PyObject *args) {
GuString sent;
if (!PyArg_ParseTuple(args, "s", &sent))
return NULL;
if ((buf == NULL && py_lexer == NULL) ||
(buf != NULL && py_lexer != NULL)) {
PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
return NULL;
}
GuPool* tmp_pool = gu_local_pool();
PgfLexer *lexer = NULL;
if (buf != NULL) {
GuIn* in = gu_data_in(buf, len, tmp_pool);
lexer = pgf_new_simple_lexer(in, tmp_pool);
}
if (py_lexer != NULL) {
// get an iterator out of the iterable object
py_lexer = PyObject_GetIter(py_lexer);
if (py_lexer == NULL) {
gu_pool_free(tmp_pool);
return NULL;
}
lexer = pypgf_new_python_lexer(py_lexer, tmp_pool);
}
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
GuPool *tmp_pool = gu_local_pool();
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
PyObject* analyses = PyList_New(0);
PyMorphoCallback callback = { { pypgf_collect_morpho }, analyses };
pgf_lookup_morpho(self->concr, lexer, &callback.fn, err);
Py_XDECREF(py_lexer);
pgf_lookup_morpho(self->concr, sent, &callback.fn, err);
gu_pool_free(tmp_pool);
if (!gu_ok(err)) {
Py_DECREF(analyses);
return NULL;
}
return analyses;
}
@@ -1833,7 +1703,7 @@ static PyMethodDef Concr_methods[] = {
{"graphvizParseTree", (PyCFunction)Concr_graphvizParseTree, METH_VARARGS,
"Renders an abstract syntax tree as a parse tree in Graphviz format"
},
{"lookupMorpho", (PyCFunction)Concr_lookupMorpho, METH_VARARGS | METH_KEYWORDS,
{"lookupMorpho", (PyCFunction)Concr_lookupMorpho, METH_VARARGS,
"Looks up a word in the lexicon of the grammar"
},
{"fullFormLexicon", (PyCFunction)Concr_fullFormLexicon, METH_VARARGS,