mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
a major redesign in the C runtime. The parser and the linearizer now fully support BIND. The following things are still broken: parseval, word completion, handling 'pre', the robust mode
This commit is contained in:
@@ -39,7 +39,6 @@ pgfinclude_HEADERS = \
|
||||
pgf/reader.h \
|
||||
pgf/linearizer.h \
|
||||
pgf/parser.h \
|
||||
pgf/lexer.h \
|
||||
pgf/literals.h \
|
||||
pgf/graphviz.h \
|
||||
pgf/pgf.h
|
||||
@@ -93,8 +92,6 @@ libpgf_la_SOURCES = \
|
||||
pgf/parser.h \
|
||||
pgf/jit.c \
|
||||
pgf/parseval.c \
|
||||
pgf/lexer.c \
|
||||
pgf/lexer.h \
|
||||
pgf/literals.c \
|
||||
pgf/literals.h \
|
||||
pgf/reader.h \
|
||||
|
||||
@@ -91,7 +91,7 @@ gu_exn_clear(GuExn* err) {
|
||||
GuType*
|
||||
gu_exn_caught(GuExn* err);
|
||||
|
||||
inline const void*
|
||||
static inline const void*
|
||||
gu_exn_caught_data(GuExn* err)
|
||||
{
|
||||
return err->data.data;
|
||||
|
||||
@@ -37,7 +37,7 @@ gu_utf8_decode(const uint8_t** utf8);
|
||||
void
|
||||
gu_in_utf8_buf(uint8_t** buf, GuIn* in, GuExn* err);
|
||||
|
||||
bool
|
||||
static inline bool
|
||||
gu_is_space(uint8_t c) {
|
||||
return (c == '\t' || c == '\n' || c == '\v' ||
|
||||
c == '\f' || c == '\r' || c == ' ');
|
||||
|
||||
@@ -128,14 +128,16 @@ typedef struct {
|
||||
|
||||
extern GU_DECLARE_TYPE(PgfCncCat, abstract);
|
||||
|
||||
typedef GuString PgfToken;
|
||||
typedef GuSeq PgfTokens;
|
||||
|
||||
bool
|
||||
pgf_tokens_equal(PgfTokens* t1, PgfTokens* t2);
|
||||
|
||||
typedef GuSeq PgfSequence; // -> PgfSymbol
|
||||
typedef GuSeq PgfSequences;
|
||||
typedef GuSeq PgfSymbols;
|
||||
|
||||
typedef struct {
|
||||
PgfSequence* form;
|
||||
PgfSymbols* form;
|
||||
/**< The form of this variant as a list of tokens. */
|
||||
|
||||
GuStrings* prefixes;
|
||||
@@ -154,19 +156,8 @@ extern GU_DECLARE_TYPE(PgfCncFunOverloadMap, GuStringMap);
|
||||
typedef GuMap PgfCncOverloadMap;
|
||||
extern GU_DECLARE_TYPE(PgfCncOverloadMap, GuMap);
|
||||
|
||||
typedef GuMap PgfProductionIdx;
|
||||
extern GU_DECLARE_TYPE(PgfProductionIdx, GuMap);
|
||||
|
||||
typedef GuMap PgfLeftcornerTokIdx;
|
||||
extern GU_DECLARE_TYPE(PgfLeftcornerTokIdx, GuMap);
|
||||
|
||||
typedef struct PgfItem PgfItem;
|
||||
|
||||
typedef struct {
|
||||
bool (*match)(PgfConcr* concr, PgfItem* item, PgfToken tok,
|
||||
PgfExprProb** out_ep, GuPool *pool);
|
||||
} PgfLiteralCallback;
|
||||
|
||||
typedef GuMap PgfCallbacksMap;
|
||||
extern GU_DECLARE_TYPE(PgfCallbacksMap, GuMap);
|
||||
|
||||
@@ -197,7 +188,7 @@ typedef struct PgfSymbolKP
|
||||
/** A prefix-dependent symbol. The form that this symbol takes
|
||||
* depends on the form of a prefix of the following symbol. */
|
||||
{
|
||||
PgfSequence* default_form;
|
||||
PgfSymbols* default_form;
|
||||
/**< Default form that this symbol takes if none of of the
|
||||
* variant forms is triggered. */
|
||||
|
||||
@@ -213,6 +204,21 @@ typedef struct {
|
||||
typedef struct {
|
||||
} PgfSymbolBIND;
|
||||
|
||||
typedef struct {
|
||||
PgfExprProb* (*match)(PgfConcr* concr, PgfSymbol* psym, size_t lin_idx,
|
||||
GuString sentence, size_t* poffset,
|
||||
GuPool *pool, GuPool *out_pool);
|
||||
} PgfLiteralCallback;
|
||||
|
||||
typedef GuBuf PgfProductionIdx;
|
||||
|
||||
typedef struct {
|
||||
PgfSymbols* syms; // -> PgfSymbol
|
||||
PgfProductionIdx* idx;
|
||||
} PgfSequence;
|
||||
|
||||
typedef GuSeq PgfSequences;
|
||||
|
||||
typedef struct {
|
||||
PgfAbsFun* absfun;
|
||||
PgfExprProb *ep;
|
||||
@@ -230,8 +236,6 @@ struct PgfConcr {
|
||||
GuMap* ccats;
|
||||
PgfCncFunOverloadMap* fun_indices;
|
||||
PgfCncOverloadMap* coerce_idx;
|
||||
PgfProductionIdx* epsilon_idx;
|
||||
PgfLeftcornerTokIdx* leftcorner_tok_idx;
|
||||
PgfCncFuns* cncfuns;
|
||||
PgfSequences* sequences;
|
||||
PgfCIdMap* cnccats;
|
||||
@@ -274,7 +278,6 @@ typedef struct PgfProductionCoerce
|
||||
} PgfProductionCoerce;
|
||||
|
||||
typedef struct {
|
||||
PgfLiteralCallback *callback;
|
||||
PgfExprProb *ep;
|
||||
GuSeq* lins;
|
||||
} PgfProductionExtern;
|
||||
@@ -287,8 +290,11 @@ typedef struct {
|
||||
typedef GuSeq PgfProductionSeq;
|
||||
extern GU_DECLARE_TYPE(PgfProductionSeq, abstract);
|
||||
|
||||
typedef GuBuf PgfProductionBuf;
|
||||
extern GU_DECLARE_TYPE(PgfProductionBuf, abstract);
|
||||
typedef struct {
|
||||
PgfCCat* ccat;
|
||||
size_t lin_idx;
|
||||
PgfProductionApply* papp;
|
||||
} PgfProductionIdxEntry;
|
||||
|
||||
struct PgfCCat {
|
||||
PgfCncCat* cnccat;
|
||||
|
||||
@@ -1,128 +0,0 @@
|
||||
#include <gu/utf8.h>
|
||||
#include <pgf/pgf.h>
|
||||
#include <pgf/data.h>
|
||||
#include <wctype.h>
|
||||
|
||||
typedef struct {
|
||||
PgfLexer base;
|
||||
GuIn* in;
|
||||
GuPool* pool;
|
||||
GuUCS ucs;
|
||||
} PgfSimpleLexer;
|
||||
|
||||
static void
|
||||
pgf_lexer_read_ucs(PgfSimpleLexer *lexer, GuExn* err)
|
||||
{
|
||||
lexer->ucs = gu_in_utf8(lexer->in, err);
|
||||
if (gu_exn_is_raised(err)) {
|
||||
gu_exn_clear(err);
|
||||
lexer->ucs = ' ';
|
||||
}
|
||||
}
|
||||
|
||||
static PgfToken
|
||||
pgf_simple_lexer_read_token(PgfLexer *base, GuExn* err)
|
||||
{
|
||||
PgfSimpleLexer* lexer = (PgfSimpleLexer*) base;
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
|
||||
GuStringBuf* buf = gu_string_buf(tmp_pool);
|
||||
GuOut* out = gu_string_buf_out(buf);
|
||||
|
||||
while (iswspace(lexer->ucs)) {
|
||||
lexer->ucs = gu_in_utf8(lexer->in, err);
|
||||
if (gu_exn_is_raised(err))
|
||||
goto stop;
|
||||
}
|
||||
|
||||
if (iswalpha(lexer->ucs) ||
|
||||
lexer->ucs == '\'' ||
|
||||
lexer->ucs == '_') {
|
||||
int counter = 0;
|
||||
do {
|
||||
gu_out_utf8(lexer->ucs, out, err);
|
||||
if (gu_exn_is_raised(err))
|
||||
goto stop;
|
||||
counter++;
|
||||
pgf_lexer_read_ucs(lexer, err);
|
||||
|
||||
if (lexer->ucs == '.' && counter < 4) {
|
||||
// perhaps an abreviation
|
||||
gu_out_utf8(lexer->ucs, out, err);
|
||||
if (gu_exn_is_raised(err))
|
||||
goto stop;
|
||||
counter = 0;
|
||||
pgf_lexer_read_ucs(lexer, err);
|
||||
}
|
||||
} while (iswalnum(lexer->ucs) ||
|
||||
lexer->ucs == '\'' ||
|
||||
lexer->ucs == '_');
|
||||
} else if (iswdigit(lexer->ucs) || lexer->ucs == '-') {
|
||||
if (lexer->ucs == '-') {
|
||||
gu_out_utf8(lexer->ucs, out, err);
|
||||
if (gu_exn_is_raised(err))
|
||||
goto stop;
|
||||
|
||||
pgf_lexer_read_ucs(lexer, err);
|
||||
if (!iswdigit(lexer->ucs))
|
||||
goto stop;
|
||||
}
|
||||
|
||||
do {
|
||||
gu_out_utf8(lexer->ucs, out, err);
|
||||
if (gu_exn_is_raised(err))
|
||||
goto stop;
|
||||
|
||||
pgf_lexer_read_ucs(lexer, err);
|
||||
} while (iswdigit(lexer->ucs));
|
||||
|
||||
if (lexer->ucs == '.') {
|
||||
gu_out_utf8(lexer->ucs, out, err);
|
||||
if (gu_exn_is_raised(err))
|
||||
goto stop;
|
||||
|
||||
pgf_lexer_read_ucs(lexer, err);
|
||||
while (iswdigit(lexer->ucs)) {
|
||||
gu_out_utf8(lexer->ucs, out, err);
|
||||
if (gu_exn_is_raised(err))
|
||||
goto stop;
|
||||
pgf_lexer_read_ucs(lexer, err);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
gu_out_utf8(lexer->ucs, out, err);
|
||||
if (gu_exn_is_raised(err))
|
||||
goto stop;
|
||||
pgf_lexer_read_ucs(lexer, err);
|
||||
}
|
||||
|
||||
stop:
|
||||
lexer->base.tok = gu_string_buf_freeze(buf, lexer->pool);
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
return lexer->base.tok;
|
||||
}
|
||||
|
||||
PgfLexer*
|
||||
pgf_new_simple_lexer(GuIn *in, GuPool *pool)
|
||||
{
|
||||
PgfSimpleLexer* lexer = gu_new(PgfSimpleLexer, pool);
|
||||
lexer->base.read_token = pgf_simple_lexer_read_token;
|
||||
lexer->base.tok = "";
|
||||
lexer->in = in;
|
||||
lexer->pool = pool;
|
||||
lexer->ucs = ' ';
|
||||
return ((PgfLexer*) lexer);
|
||||
}
|
||||
|
||||
PgfToken
|
||||
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err)
|
||||
{
|
||||
return lexer->read_token(lexer, err);
|
||||
}
|
||||
|
||||
PgfToken
|
||||
pgf_lexer_current_token(PgfLexer *lexer)
|
||||
{
|
||||
return lexer->tok;
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
#ifndef PGF_LEXER_H_
|
||||
#define PGF_LEXER_H_
|
||||
|
||||
#include <gu/in.h>
|
||||
#include <pgf/expr.h>
|
||||
|
||||
/// A single lexical token
|
||||
typedef GuString PgfToken;
|
||||
typedef GuSeq PgfTokens; // -> PgfToken
|
||||
|
||||
typedef struct {
|
||||
prob_t prob;
|
||||
PgfCId cat;
|
||||
PgfToken tok;
|
||||
} PgfTokenProb;
|
||||
|
||||
typedef struct {
|
||||
PgfToken (*read_token)();
|
||||
PgfToken tok;
|
||||
} PgfLexer;
|
||||
|
||||
PgfLexer*
|
||||
pgf_new_simple_lexer(GuIn *in, GuPool *pool);
|
||||
|
||||
PgfToken
|
||||
pgf_lexer_read_token(PgfLexer *lexer, GuExn* err);
|
||||
|
||||
PgfToken
|
||||
pgf_lexer_current_token(PgfLexer *lexer);
|
||||
|
||||
#endif // PGF_LEXER_H_
|
||||
@@ -455,14 +455,13 @@ pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuPool* pool)
|
||||
}
|
||||
|
||||
void
|
||||
pgf_lzr_linearize_sequence(PgfConcr* concr, PgfCncTreeApp* fapp,
|
||||
PgfSequence* seq, uint16_t seq_idx,
|
||||
PgfLinFuncs** fnsp)
|
||||
pgf_lzr_linearize_symbols(PgfConcr* concr, PgfCncTreeApp* fapp,
|
||||
PgfSymbols* syms, uint16_t sym_idx,
|
||||
PgfLinFuncs** fnsp)
|
||||
{
|
||||
size_t nsyms = gu_seq_length(seq);
|
||||
PgfSymbol* syms = gu_seq_data(seq);
|
||||
for (size_t i = seq_idx; i < nsyms; i++) {
|
||||
PgfSymbol sym = syms[i];
|
||||
size_t nsyms = gu_seq_length(syms);
|
||||
for (size_t i = sym_idx; i < nsyms; i++) {
|
||||
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, i);
|
||||
GuVariantInfo sym_i = gu_variant_open(sym);
|
||||
switch (sym_i.tag) {
|
||||
case PGF_SYMBOL_CAT:
|
||||
@@ -488,7 +487,7 @@ pgf_lzr_linearize_sequence(PgfConcr* concr, PgfCncTreeApp* fapp,
|
||||
case PGF_SYMBOL_KP: {
|
||||
// TODO: correct prefix-dependencies
|
||||
PgfSymbolKP* kp = sym_i.data;
|
||||
pgf_lzr_linearize_sequence(concr, fapp, kp->default_form, 0, fnsp);
|
||||
pgf_lzr_linearize_symbols(concr, fapp, kp->default_form, 0, fnsp);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_NE: {
|
||||
@@ -528,9 +527,7 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs
|
||||
}
|
||||
|
||||
gu_require(lin_idx < fun->n_lins);
|
||||
|
||||
PgfSequence* seq = fun->lins[lin_idx];
|
||||
pgf_lzr_linearize_sequence(concr, fapp, seq, 0, fnsp);
|
||||
pgf_lzr_linearize_symbols(concr, fapp, fun->lins[lin_idx]->syms, 0, fnsp);
|
||||
|
||||
if (fns->end_phrase) {
|
||||
fns->end_phrase(fnsp,
|
||||
@@ -681,7 +678,7 @@ pgf_lzr_linearize_simple(PgfConcr* concr, PgfCncTree ctree,
|
||||
}
|
||||
|
||||
GuString
|
||||
pgf_get_tokens(PgfSequence* seq, uint16_t seq_idx, GuPool* pool)
|
||||
pgf_get_tokens(PgfSymbols* syms, uint16_t sym_idx, GuPool* pool)
|
||||
{
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
|
||||
@@ -695,7 +692,7 @@ pgf_get_tokens(PgfSequence* seq, uint16_t seq_idx, GuPool* pool)
|
||||
.err = err
|
||||
};
|
||||
|
||||
pgf_lzr_linearize_sequence(NULL, NULL, seq, seq_idx, &flin.funcs);
|
||||
pgf_lzr_linearize_symbols(NULL, NULL, syms, sym_idx, &flin.funcs);
|
||||
|
||||
GuString tokens = gu_ok(err) ? gu_string_buf_freeze(sbuf, pool)
|
||||
: "";
|
||||
|
||||
@@ -76,5 +76,5 @@ pgf_lzr_linearize_simple(PgfConcr* concr, PgfCncTree ctree,
|
||||
#ifdef PGF_PARSER_H_
|
||||
// Used internally in the parser
|
||||
GuString
|
||||
pgf_get_tokens(PgfSequence* seq, uint16_t seq_idx, GuPool* pool);
|
||||
pgf_get_tokens(PgfSymbols* sym, uint16_t sym_idx, GuPool* pool);
|
||||
#endif
|
||||
|
||||
@@ -11,48 +11,41 @@ GU_DEFINE_TYPE(PgfCallbacksMap, GuMap,
|
||||
gu_ptr_type(PgfLiteralCallback), &gu_null_struct);
|
||||
|
||||
|
||||
static bool
|
||||
pgf_match_string_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
|
||||
PgfExprProb** out_ep, GuPool *pool)
|
||||
static PgfExprProb*
|
||||
pgf_match_string_lit(PgfConcr* concr, PgfSymbol* psym, size_t lin_idx,
|
||||
GuString sentence, size_t* poffset,
|
||||
GuPool *pool, GuPool *out_pool)
|
||||
{
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
|
||||
size_t lin_idx;
|
||||
PgfSequence* seq;
|
||||
pgf_item_sequence(item, &lin_idx, &seq, tmp_pool);
|
||||
gu_assert(lin_idx == 0);
|
||||
|
||||
bool accepted = false;
|
||||
int n_syms = gu_seq_length(seq);
|
||||
if (n_syms == 0) {
|
||||
*out_ep = NULL;
|
||||
accepted = true;
|
||||
} else if (n_syms == 1) {
|
||||
PgfExprProb* ep = gu_new(PgfExprProb, pool);
|
||||
size_t offset = *poffset;
|
||||
while (!gu_is_space(sentence[offset]))
|
||||
offset++;
|
||||
|
||||
size_t len = offset - *poffset;
|
||||
if (len > 0) {
|
||||
PgfExprProb* ep = gu_new(PgfExprProb, out_pool);
|
||||
ep->prob = 0;
|
||||
|
||||
PgfSymbolKS* sks =
|
||||
gu_variant_data(gu_seq_get(seq, PgfSymbol, 0));
|
||||
|
||||
PgfExprLit *expr_lit =
|
||||
gu_new_variant(PGF_EXPR_LIT,
|
||||
PgfExprLit,
|
||||
&ep->expr, pool);
|
||||
&ep->expr, out_pool);
|
||||
|
||||
PgfLiteralStr *lit_str =
|
||||
gu_new_flex_variant(PGF_LITERAL_STR,
|
||||
PgfLiteralStr,
|
||||
val, strlen(sks->token)+1,
|
||||
&expr_lit->lit, pool);
|
||||
strcpy(lit_str->val, sks->token);
|
||||
val, len+1,
|
||||
&expr_lit->lit, out_pool);
|
||||
memcpy(lit_str->val, sentence+*poffset, len);
|
||||
lit_str->val[len] = 0;
|
||||
|
||||
*out_ep = ep;
|
||||
accepted = false;
|
||||
pgf_add_extern_tok(psym, lit_str->val, pool);
|
||||
*poffset = offset;
|
||||
return ep;
|
||||
} else {
|
||||
*out_ep = NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
return accepted;
|
||||
}
|
||||
|
||||
static PgfLiteralCallback pgf_string_literal_callback =
|
||||
@@ -60,55 +53,46 @@ static PgfLiteralCallback pgf_string_literal_callback =
|
||||
|
||||
|
||||
|
||||
static bool
|
||||
pgf_match_int_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
|
||||
PgfExprProb** out_ep, GuPool *pool)
|
||||
static PgfExprProb*
|
||||
pgf_match_int_lit(PgfConcr* concr, PgfSymbol* psym, size_t lin_idx,
|
||||
GuString sentence, size_t* poffset,
|
||||
GuPool *pool, GuPool *out_pool)
|
||||
{
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
|
||||
size_t lin_idx;
|
||||
PgfSequence* seq;
|
||||
pgf_item_sequence(item, &lin_idx, &seq, tmp_pool);
|
||||
gu_assert(lin_idx == 0);
|
||||
|
||||
bool accepted = false;
|
||||
int n_syms = gu_seq_length(seq);
|
||||
if (n_syms == 0) {
|
||||
int val;
|
||||
size_t offset = *poffset;
|
||||
while (!gu_is_space(sentence[offset]))
|
||||
offset++;
|
||||
|
||||
*out_ep = NULL;
|
||||
accepted = gu_string_to_int(tok, &val);
|
||||
} else if (n_syms == 1) {
|
||||
PgfSymbolKS* sks =
|
||||
gu_variant_data(gu_seq_get(seq, PgfSymbol, 0));
|
||||
size_t len = offset - *poffset;
|
||||
if (len > 0) {
|
||||
PgfToken tok = gu_malloc(pool, len+1);
|
||||
memcpy((char*) tok, sentence+*poffset, len);
|
||||
((char*) tok)[len] = 0;
|
||||
|
||||
int val;
|
||||
if (!gu_string_to_int(sks->token, &val)) {
|
||||
*out_ep = NULL;
|
||||
} else {
|
||||
PgfExprProb* ep = gu_new(PgfExprProb, pool);
|
||||
ep->prob = 0;
|
||||
if (!gu_string_to_int(tok, &val))
|
||||
return NULL;
|
||||
|
||||
PgfExprLit *expr_lit =
|
||||
gu_new_variant(PGF_EXPR_LIT,
|
||||
PgfExprLit,
|
||||
&ep->expr, pool);
|
||||
PgfLiteralInt *lit_int =
|
||||
gu_new_variant(PGF_LITERAL_INT,
|
||||
PgfLiteralInt,
|
||||
&expr_lit->lit, pool);
|
||||
lit_int->val = val;
|
||||
PgfExprProb* ep = gu_new(PgfExprProb, pool);
|
||||
ep->prob = 0;
|
||||
|
||||
*out_ep = ep;
|
||||
}
|
||||
|
||||
accepted = false;
|
||||
PgfExprLit *expr_lit =
|
||||
gu_new_variant(PGF_EXPR_LIT,
|
||||
PgfExprLit,
|
||||
&ep->expr, pool);
|
||||
PgfLiteralInt *lit_int =
|
||||
gu_new_variant(PGF_LITERAL_INT,
|
||||
PgfLiteralInt,
|
||||
&expr_lit->lit, pool);
|
||||
lit_int->val = val;
|
||||
|
||||
pgf_add_extern_tok(psym, tok, pool);
|
||||
*poffset = offset;
|
||||
return ep;
|
||||
} else {
|
||||
*out_ep = NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
return accepted;
|
||||
}
|
||||
|
||||
static PgfLiteralCallback pgf_int_literal_callback =
|
||||
@@ -116,55 +100,46 @@ static PgfLiteralCallback pgf_int_literal_callback =
|
||||
|
||||
|
||||
|
||||
static bool
|
||||
pgf_match_float_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
|
||||
PgfExprProb** out_ep, GuPool *pool)
|
||||
static PgfExprProb*
|
||||
pgf_match_float_lit(PgfConcr* concr, PgfSymbol* psym, size_t lin_idx,
|
||||
GuString sentence, size_t* poffset,
|
||||
GuPool *pool, GuPool *out_pool)
|
||||
{
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
|
||||
size_t lin_idx;
|
||||
PgfSequence* seq;
|
||||
pgf_item_sequence(item, &lin_idx, &seq, tmp_pool);
|
||||
gu_assert(lin_idx == 0);
|
||||
|
||||
bool accepted = false;
|
||||
int n_syms = gu_seq_length(seq);
|
||||
if (n_syms == 0) {
|
||||
double val;
|
||||
size_t offset = *poffset;
|
||||
while (!gu_is_space(sentence[offset]))
|
||||
offset++;
|
||||
|
||||
*out_ep = NULL;
|
||||
accepted = gu_string_to_double(tok, &val);
|
||||
} else if (n_syms == 1) {
|
||||
PgfSymbolKS* sks =
|
||||
gu_variant_data(gu_seq_get(seq, PgfSymbol, 0));
|
||||
size_t len = offset - *poffset;
|
||||
if (len > 0) {
|
||||
PgfToken tok = gu_malloc(pool, len+1);
|
||||
memcpy((char*) tok, sentence+*poffset, len);
|
||||
((char*) tok)[len] = 0;
|
||||
|
||||
double val;
|
||||
if (!gu_string_to_double(sks->token, &val)) {
|
||||
*out_ep = NULL;
|
||||
} else {
|
||||
PgfExprProb* ep = gu_new(PgfExprProb, pool);
|
||||
ep->prob = 0;
|
||||
if (!gu_string_to_double(tok, &val))
|
||||
return NULL;
|
||||
|
||||
PgfExprLit *expr_lit =
|
||||
gu_new_variant(PGF_EXPR_LIT,
|
||||
PgfExprLit,
|
||||
&ep->expr, pool);
|
||||
PgfLiteralFlt *lit_flt =
|
||||
gu_new_variant(PGF_LITERAL_FLT,
|
||||
PgfLiteralFlt,
|
||||
&expr_lit->lit, pool);
|
||||
lit_flt->val = val;
|
||||
PgfExprProb* ep = gu_new(PgfExprProb, pool);
|
||||
ep->prob = 0;
|
||||
|
||||
*out_ep = ep;
|
||||
}
|
||||
|
||||
accepted = false;
|
||||
PgfExprLit *expr_lit =
|
||||
gu_new_variant(PGF_EXPR_LIT,
|
||||
PgfExprLit,
|
||||
&ep->expr, pool);
|
||||
PgfLiteralFlt *lit_flt =
|
||||
gu_new_variant(PGF_LITERAL_FLT,
|
||||
PgfLiteralFlt,
|
||||
&expr_lit->lit, pool);
|
||||
lit_flt->val = val;
|
||||
|
||||
pgf_add_extern_tok(psym, tok, pool);
|
||||
*poffset = offset;
|
||||
return ep;
|
||||
} else {
|
||||
*out_ep = NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
return accepted;
|
||||
}
|
||||
|
||||
static PgfLiteralCallback pgf_float_literal_callback =
|
||||
@@ -172,45 +147,49 @@ static PgfLiteralCallback pgf_float_literal_callback =
|
||||
|
||||
|
||||
|
||||
static bool
|
||||
pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
|
||||
PgfExprProb** out_ep, GuPool *pool)
|
||||
static PgfExprProb*
|
||||
pgf_match_name_lit(PgfConcr* concr, PgfSymbol* psym, size_t lin_idx,
|
||||
GuString sentence, size_t* poffset,
|
||||
GuPool *pool, GuPool *out_pool)
|
||||
{
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
|
||||
size_t lin_idx;
|
||||
PgfSequence* seq;
|
||||
pgf_item_sequence(item, &lin_idx, &seq, tmp_pool);
|
||||
|
||||
gu_assert(lin_idx == 0);
|
||||
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
GuStringBuf *sbuf = gu_string_buf(tmp_pool);
|
||||
GuOut* out = gu_string_buf_out(sbuf);
|
||||
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
|
||||
|
||||
bool iscap = false;
|
||||
if (strcmp(tok, "-") == 0) {
|
||||
iscap = true;
|
||||
} else if (*tok) {
|
||||
GuIn* in = gu_string_in(tok, tmp_pool);
|
||||
iscap = iswupper(gu_in_utf8(in, err));
|
||||
}
|
||||
|
||||
size_t n_syms = gu_seq_length(seq);
|
||||
if (!iscap && n_syms > 0) {
|
||||
GuStringBuf *sbuf = gu_string_buf(tmp_pool);
|
||||
GuOut* out = gu_string_buf_out(sbuf);
|
||||
size_t offset = *poffset;
|
||||
|
||||
for (size_t i = 0; i < n_syms; i++) {
|
||||
if (i > 0)
|
||||
gu_putc(' ', out, err);
|
||||
|
||||
PgfSymbol sym = gu_seq_get(seq, PgfSymbol, i);
|
||||
gu_assert(gu_variant_tag(sym) == PGF_SYMBOL_KS);
|
||||
PgfSymbolKS* sks = gu_variant_data(sym);
|
||||
|
||||
gu_string_write(sks->token, out, err);
|
||||
int i = 0;
|
||||
while (iswupper(sentence[offset])) {
|
||||
size_t len = 0;
|
||||
while (!gu_is_space(sentence[offset+len])) {
|
||||
len++;
|
||||
}
|
||||
|
||||
PgfExprProb* ep = gu_new(PgfExprProb, pool);
|
||||
PgfToken tok = gu_malloc(pool, len+1);
|
||||
memcpy((char*) tok, sentence+offset, len);
|
||||
((char*) tok)[len] = 0;
|
||||
|
||||
pgf_add_extern_tok(psym, tok, pool);
|
||||
|
||||
if (i > 0)
|
||||
gu_putc(' ', out, err);
|
||||
gu_string_write(tok, out, err);
|
||||
|
||||
i++;
|
||||
|
||||
offset += len;
|
||||
*poffset = offset;
|
||||
|
||||
while (gu_is_space(sentence[offset]))
|
||||
offset++;
|
||||
}
|
||||
|
||||
PgfExprProb* ep = NULL;
|
||||
if (i > 0) {
|
||||
ep = gu_new(PgfExprProb, pool);
|
||||
ep->prob = 0;
|
||||
|
||||
PgfExprApp *expr_app =
|
||||
@@ -235,14 +214,11 @@ pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
|
||||
val, strlen(val)+1,
|
||||
&expr_lit->lit, pool);
|
||||
strcpy(lit_str->val, val);
|
||||
*out_ep = ep;
|
||||
} else {
|
||||
*out_ep = NULL;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
return iscap;
|
||||
|
||||
return ep;
|
||||
}
|
||||
|
||||
PgfLiteralCallback pgf_nerc_literal_callback =
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,119 +5,14 @@
|
||||
#include <pgf/data.h>
|
||||
#include <pgf/expr.h>
|
||||
|
||||
/// Parsing
|
||||
/** @file
|
||||
*
|
||||
* @todo Querying the parser for expected continuations
|
||||
*
|
||||
* @todo Literals and custom categories
|
||||
*
|
||||
* @todo HOAS, dependent types...
|
||||
*/
|
||||
void
|
||||
pgf_add_extern_tok(PgfSymbol* psym, PgfToken tok, GuPool* pool);
|
||||
|
||||
typedef struct PgfParseState PgfParseState;
|
||||
|
||||
/** @}
|
||||
*
|
||||
* @name Parsing a sentence
|
||||
*
|
||||
* The progress of parsing is controlled by the client code. Firstly, the
|
||||
* parsing of a sentence is initiated with #pgf_parser_parse. This returns an
|
||||
* initial #PgfParse object, which represents the state of the parsing. A new
|
||||
* parse state is obtained by feeding a token with #pgf_parse_token. The old
|
||||
* parse state is unaffected by this, so backtracking - and even branching -
|
||||
* can be accomplished by retaining the earlier #PgfParse objects.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/// Begin parsing
|
||||
PgfParseState*
|
||||
pgf_parser_init_state(PgfConcr* concr, PgfCId cat, size_t lin_idx,
|
||||
double heuristics,
|
||||
GuPool* pool, GuPool* out_pool);
|
||||
/**<
|
||||
* @param parser The parser to use
|
||||
*
|
||||
* @param cat The identifier of the abstract category to parse
|
||||
*
|
||||
* @param lin_idx The index of the field of the concrete category to parse
|
||||
*
|
||||
* @pool
|
||||
*
|
||||
* @return An initial parsing state.
|
||||
*/
|
||||
|
||||
|
||||
/// Feed a token to the parser
|
||||
PgfParseState*
|
||||
pgf_parser_next_state(PgfParseState* prev, PgfToken tok);
|
||||
/**<
|
||||
* @param parse The current parse state
|
||||
*
|
||||
* @param tok The token to feed
|
||||
*
|
||||
* @pool
|
||||
*
|
||||
* @return A new parse state obtained by feeding \p tok as an input to \p
|
||||
* parse, or \c NULL if the token was unexpected.
|
||||
*
|
||||
* @note The new parse state partially depends on the old one, so it doesn't
|
||||
* make sense to use a \p pool argument with a longer lifetime than that of
|
||||
* the pool used to create \parse.
|
||||
*/
|
||||
|
||||
GuEnum*
|
||||
pgf_parser_completions(PgfParseState* prev, GuString prefix);
|
||||
void
|
||||
pgf_add_extern_cat(PgfSymbol* psym, int d, int r, GuPool* pool);
|
||||
|
||||
void
|
||||
pgf_parser_add_literal(PgfConcr *concr, PgfCId cat,
|
||||
PgfLiteralCallback* callback);
|
||||
|
||||
/** @}
|
||||
* @name Retrieving abstract syntax trees
|
||||
*
|
||||
* After the desired tokens have been fed to the parser, the resulting parse
|
||||
* state can be queried for completed results. The #pgf_parse_result function
|
||||
* returns an enumeration (#GuEnum) of possible abstract syntax trees whose
|
||||
* linearization is the sequence of tokens fed so far.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/// Retrieve the current parses from the parse state.
|
||||
PgfExprEnum*
|
||||
pgf_parse_result(PgfParseState* state);
|
||||
/**<
|
||||
* @param parse A parse state
|
||||
*
|
||||
* @pool
|
||||
*
|
||||
* @return An enumeration of #PgfExpr elements representing the abstract
|
||||
* syntax trees that would linearize to the sequence of tokens fed to produce
|
||||
* \p parse. The enumeration may yield zero, one or more abstract syntax
|
||||
* trees, depending on whether the parse was unsuccesful, unambiguously
|
||||
* succesful, or ambiguously successful.
|
||||
*/
|
||||
|
||||
// Use this procedure only on your own risk.
|
||||
// It is dirty and it will probably be removed or replaced
|
||||
// with something else. Currently it is here only for experimental
|
||||
// purposes.
|
||||
void
|
||||
pgf_parse_print_chunks(PgfParseState* state);
|
||||
|
||||
size_t
|
||||
pgf_item_lin_idx(PgfItem* item);
|
||||
|
||||
void
|
||||
pgf_item_sequence(PgfItem* item,
|
||||
size_t* lin_idx, PgfSequence** seq,
|
||||
GuPool* pool);
|
||||
|
||||
int
|
||||
pgf_item_sequence_length(PgfItem* item);
|
||||
|
||||
/** @} */
|
||||
|
||||
#endif // PGF_PARSER_H_
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
#include <pgf/pgf.h>
|
||||
#include <pgf/data.h>
|
||||
#include <pgf/linearizer.h>
|
||||
#include <pgf/parser.h>
|
||||
|
||||
typedef struct {
|
||||
/*typedef struct {
|
||||
int start, end;
|
||||
PgfCId cat;
|
||||
int lin_idx;
|
||||
@@ -123,24 +124,50 @@ pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_id
|
||||
}
|
||||
|
||||
static PgfLinFuncs pgf_metrics_lin_funcs1 = {
|
||||
v v v v v v v
|
||||
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
|
||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
||||
.end_phrase = pgf_metrics_lzn_end_phrase1
|
||||
=============
|
||||
.symbol_token = pgf_metrics_lzn_symbol_token,
|
||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
||||
.end_phrase = pgf_metrics_lzn_end_phrase1
|
||||
*************
|
||||
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
|
||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
||||
.end_phrase = pgf_metrics_lzn_end_phrase1,
|
||||
.symbol_glue = NULL
|
||||
^ ^ ^ ^ ^ ^ ^
|
||||
};
|
||||
|
||||
static PgfLinFuncs pgf_metrics_lin_funcs2 = {
|
||||
v v v v v v v
|
||||
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
|
||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
||||
.end_phrase = pgf_metrics_lzn_end_phrase2
|
||||
=============
|
||||
.symbol_token = pgf_metrics_lzn_symbol_token,
|
||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
||||
.end_phrase = pgf_metrics_lzn_end_phrase2
|
||||
*************
|
||||
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
|
||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
||||
.end_phrase = pgf_metrics_lzn_end_phrase2,
|
||||
.symbol_glue = NULL
|
||||
^ ^ ^ ^ ^ ^ ^
|
||||
};
|
||||
|
||||
*/
|
||||
bool
|
||||
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
|
||||
double *precision, double *recall, double *exact)
|
||||
{
|
||||
GuPool* pool = gu_new_pool();
|
||||
/* GuPool* pool = gu_new_pool();
|
||||
|
||||
GuEnum* en_lins1 =
|
||||
pgf_lzr_concretize(concr, expr, pool);
|
||||
@@ -190,6 +217,6 @@ pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
|
||||
*precision = ((double) state.matches)/((double) state.found);
|
||||
*recall = ((double) state.matches)/((double) gu_buf_length(state.phrases));
|
||||
*exact = pgf_expr_eq(expr, ep->expr) ? 1 : 0;
|
||||
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
#include <pgf/reader.h>
|
||||
#include <pgf/linearizer.h>
|
||||
#include <pgf/parser.h>
|
||||
#include <pgf/lexer.h>
|
||||
#include <gu/file.h>
|
||||
#include <gu/string.h>
|
||||
#include <gu/enum.h>
|
||||
@@ -61,7 +60,8 @@ pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath,
|
||||
PgfAbsCat* abscat1 =
|
||||
gu_map_get(pgf->abstract.cats, cat1, PgfAbsCat*);
|
||||
if (abscat1 == NULL) {
|
||||
gu_raise(err, PgfExn);
|
||||
GuExnData* exn = gu_raise(err, PgfExn);
|
||||
exn->data = "Unknown category name";
|
||||
goto close;
|
||||
}
|
||||
|
||||
@@ -73,6 +73,8 @@ pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath,
|
||||
PgfAbsCat* abscat2 = gu_map_get(pgf->abstract.cats, cat2, PgfAbsCat*);
|
||||
if (abscat2 == NULL) {
|
||||
gu_raise(err, PgfExn);
|
||||
GuExnData* exn = gu_raise(err, PgfExn);
|
||||
exn->data = "Unknown category name";
|
||||
goto close;
|
||||
}
|
||||
|
||||
@@ -228,73 +230,3 @@ pgf_linearize(PgfConcr* concr, PgfExpr expr, GuOut* out, GuExn* err)
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
}
|
||||
|
||||
GuEnum*
|
||||
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
|
||||
GuPool* pool, GuPool* out_pool)
|
||||
{
|
||||
return pgf_parse_with_heuristics(concr, cat, lexer, -1.0, pool, out_pool);
|
||||
}
|
||||
|
||||
GuEnum*
|
||||
pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
|
||||
double heuristics,
|
||||
GuPool* pool, GuPool* out_pool)
|
||||
{
|
||||
// Begin parsing a sentence of the specified category
|
||||
PgfParseState* state =
|
||||
pgf_parser_init_state(concr, cat, 0, heuristics, pool, out_pool);
|
||||
if (state == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Tokenization
|
||||
GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool);
|
||||
PgfToken tok = pgf_lexer_read_token(lexer, lex_err);
|
||||
while (!gu_exn_is_raised(lex_err)) {
|
||||
// feed the token to get a new parse state
|
||||
state = pgf_parser_next_state(state, tok);
|
||||
if (state == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tok = pgf_lexer_read_token(lexer, lex_err);
|
||||
}
|
||||
|
||||
if (gu_exn_caught(lex_err) != gu_type(GuEOF))
|
||||
return NULL;
|
||||
|
||||
// Now begin enumerating the resulting syntax trees
|
||||
return pgf_parse_result(state);
|
||||
}
|
||||
|
||||
GuEnum*
|
||||
pgf_complete(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
|
||||
GuString prefix, GuPool* pool)
|
||||
{
|
||||
// Begin parsing a sentence of the specified category
|
||||
PgfParseState* state =
|
||||
pgf_parser_init_state(concr, cat, 0, -1, pool, pool);
|
||||
if (state == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Tokenization
|
||||
GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool);
|
||||
PgfToken tok = pgf_lexer_read_token(lexer, lex_err);
|
||||
while (!gu_exn_is_raised(lex_err)) {
|
||||
// feed the token to get a new parse state
|
||||
state = pgf_parser_next_state(state, tok);
|
||||
if (state == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tok = pgf_lexer_read_token(lexer, lex_err);
|
||||
}
|
||||
|
||||
if (gu_exn_caught(lex_err) != gu_type(GuEOF))
|
||||
return NULL;
|
||||
|
||||
// Now begin enumerating the resulting syntax trees
|
||||
return pgf_parser_completions(state, prefix);
|
||||
}
|
||||
|
||||
@@ -37,6 +37,7 @@ extern GU_DECLARE_TYPE(PgfCId, typedef);
|
||||
|
||||
|
||||
extern GU_DECLARE_TYPE(PgfExn, abstract);
|
||||
extern GU_DECLARE_TYPE(PgfParseError, abstract);
|
||||
|
||||
/// @name PGF Grammar objects
|
||||
/// @{
|
||||
@@ -50,7 +51,6 @@ typedef struct PgfConcr PgfConcr;
|
||||
*/
|
||||
|
||||
#include <pgf/expr.h>
|
||||
#include <pgf/lexer.h>
|
||||
#include <pgf/graphviz.h>
|
||||
|
||||
/// An enumeration of #PgfExpr elements.
|
||||
@@ -120,8 +120,16 @@ pgf_print_name(PgfConcr*, PgfCId id);
|
||||
void
|
||||
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuOut* out, GuExn* err);
|
||||
|
||||
bool
|
||||
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
|
||||
double *precision, double *recall, double *exact);
|
||||
|
||||
PgfExprEnum*
|
||||
pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
|
||||
pgf_generate_all(PgfPGF* pgf, PgfCId cat, GuPool* pool);
|
||||
|
||||
PgfExprEnum*
|
||||
pgf_parse(PgfConcr* concr, PgfCId cat, GuString sentence,
|
||||
GuExn* err,
|
||||
GuPool* pool, GuPool* out_pool);
|
||||
|
||||
typedef struct PgfMorphoCallback PgfMorphoCallback;
|
||||
@@ -132,10 +140,10 @@ struct PgfMorphoCallback {
|
||||
};
|
||||
|
||||
void
|
||||
pgf_lookup_morpho(PgfConcr *concr, PgfLexer *lexer,
|
||||
pgf_lookup_morpho(PgfConcr *concr, GuString sentence,
|
||||
PgfMorphoCallback* callback, GuExn* err);
|
||||
|
||||
typedef GuMapKeyValue PgfFullFormEntry;
|
||||
typedef struct PgfFullFormEntry PgfFullFormEntry;
|
||||
|
||||
GuEnum*
|
||||
pgf_fullform_lexicon(PgfConcr *concr, GuPool* pool);
|
||||
@@ -148,20 +156,14 @@ pgf_fullform_get_analyses(PgfFullFormEntry* entry,
|
||||
PgfMorphoCallback* callback, GuExn* err);
|
||||
|
||||
PgfExprEnum*
|
||||
pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
|
||||
double heuristics,
|
||||
pgf_parse_with_heuristics(PgfConcr* concr, PgfCId cat,
|
||||
GuString sentence, double heuristics,
|
||||
GuExn* err,
|
||||
GuPool* pool, GuPool* out_pool);
|
||||
|
||||
GuEnum*
|
||||
pgf_complete(PgfConcr* concr, PgfCId cat, PgfLexer *lexer,
|
||||
GuString prefix, GuPool* pool);
|
||||
|
||||
bool
|
||||
pgf_parseval(PgfConcr* concr, PgfExpr expr, PgfCId cat,
|
||||
double *precision, double *recall, double *exact);
|
||||
|
||||
PgfExprEnum*
|
||||
pgf_generate_all(PgfPGF* pgf, PgfCId cat, GuPool* pool);
|
||||
pgf_complete(PgfConcr* concr, PgfCId cat, GuString string,
|
||||
GuString prefix, GuExn* err, GuPool* pool);
|
||||
|
||||
/// @}
|
||||
|
||||
|
||||
@@ -169,21 +169,14 @@ pgf_print_cncfun(PgfCncFun *cncfun, PgfSequences* sequences,
|
||||
GuOut *out, GuExn *err)
|
||||
{
|
||||
gu_printf(out,err," F%d := (", cncfun->funid);
|
||||
|
||||
size_t n_seqs = gu_seq_length(sequences);
|
||||
|
||||
|
||||
for (size_t i = 0; i < cncfun->n_lins; i++) {
|
||||
if (i > 0) gu_putc(',', out, err);
|
||||
PgfSequence* seq = cncfun->lins[i];
|
||||
|
||||
for (size_t seqid = 0; seqid < n_seqs; seqid++) {
|
||||
if (gu_seq_data(gu_seq_get(sequences, PgfSequence*, seqid)) == gu_seq_data(seq)) {
|
||||
gu_printf(out,err,"S%d", seqid);
|
||||
break;
|
||||
}
|
||||
}
|
||||
PgfSequence* seq = cncfun->lins[i];
|
||||
gu_printf(out,err,"S%d", (seq - ((PgfSequence*) gu_seq_data(sequences))));
|
||||
}
|
||||
|
||||
|
||||
gu_puts(")", out, err);
|
||||
|
||||
if (cncfun->absfun != NULL) {
|
||||
@@ -204,7 +197,7 @@ pgf_print_token(PgfToken tok, GuOut *out, GuExn *err)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_print_sequence(PgfSequence* seq, GuOut *out, GuExn *err);
|
||||
pgf_print_symbols(PgfSymbols* syms, GuOut *out, GuExn *err);
|
||||
|
||||
void
|
||||
pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err)
|
||||
@@ -224,11 +217,11 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err)
|
||||
PgfSymbolKP* skp = gu_variant_data(sym);
|
||||
|
||||
gu_puts("pre {", out, err);
|
||||
pgf_print_sequence(skp->default_form, out, err);
|
||||
pgf_print_symbols(skp->default_form, out, err);
|
||||
|
||||
for (size_t i = 0; i < skp->n_forms; i++) {
|
||||
gu_puts("; ", out, err);
|
||||
pgf_print_sequence(skp->forms[i].form, out, err);
|
||||
pgf_print_symbols(skp->forms[i].form, out, err);
|
||||
gu_puts(" / ", out, err);
|
||||
|
||||
size_t n_prefixes = gu_seq_length(skp->forms[i].prefixes);
|
||||
@@ -269,13 +262,13 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_print_sequence(PgfSequence* seq, GuOut *out, GuExn *err)
|
||||
pgf_print_symbols(PgfSymbols* syms, GuOut *out, GuExn *err)
|
||||
{
|
||||
int n_syms = gu_seq_length(seq);
|
||||
int n_syms = gu_seq_length(syms);
|
||||
for (int i = 0; i < n_syms; i++) {
|
||||
if (i > 0) gu_putc(' ', out, err);
|
||||
|
||||
PgfSymbol sym = gu_seq_get(seq, PgfSymbol, i);
|
||||
PgfSymbol sym = gu_seq_get(syms, PgfSymbol, i);
|
||||
pgf_print_symbol(sym, out, err);
|
||||
}
|
||||
}
|
||||
@@ -338,10 +331,9 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr,
|
||||
gu_puts(" sequences\n", out, err);
|
||||
size_t n_seqs = gu_seq_length(concr->sequences);
|
||||
for (size_t i = 0; i < n_seqs; i++) {
|
||||
PgfSequence* seq = gu_seq_get(concr->sequences, PgfSequence*, i);
|
||||
|
||||
gu_printf(out,err," S%d := ", i);
|
||||
pgf_print_sequence(seq, out, err);
|
||||
PgfSymbols* syms = gu_seq_index(concr->sequences, PgfSequence, i)->syms;
|
||||
pgf_print_symbols(syms, out, err);
|
||||
gu_putc('\n', out, err);
|
||||
}
|
||||
|
||||
|
||||
@@ -606,13 +606,13 @@ pgf_read_printnames(PgfReader* rdr)
|
||||
return printnames;
|
||||
}
|
||||
|
||||
static PgfSequence*
|
||||
pgf_read_sequence(PgfReader* rdr);
|
||||
static PgfSymbols*
|
||||
pgf_read_symbols(PgfReader* rdr);
|
||||
|
||||
static void
|
||||
pgf_read_alternative(PgfReader* rdr, PgfAlternative* alt)
|
||||
{
|
||||
alt->form = pgf_read_sequence(rdr);
|
||||
alt->form = pgf_read_symbols(rdr);
|
||||
gu_return_on_exn(rdr->err,);
|
||||
|
||||
size_t n_prefixes = pgf_read_len(rdr);
|
||||
@@ -692,7 +692,7 @@ pgf_read_symbol(PgfReader* rdr)
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KP: {
|
||||
PgfSequence* default_form = pgf_read_sequence(rdr);
|
||||
PgfSymbols* default_form = pgf_read_symbols(rdr);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
|
||||
size_t n_forms = pgf_read_len(rdr);
|
||||
@@ -732,21 +732,21 @@ pgf_read_symbol(PgfReader* rdr)
|
||||
return sym;
|
||||
}
|
||||
|
||||
static PgfSequence*
|
||||
pgf_read_sequence(PgfReader* rdr)
|
||||
static PgfSymbols*
|
||||
pgf_read_symbols(PgfReader* rdr)
|
||||
{
|
||||
size_t len = pgf_read_len(rdr);
|
||||
gu_return_on_exn(rdr->err, NULL);
|
||||
|
||||
PgfSequence* seq = gu_new_seq(PgfSymbol, len, rdr->opool);
|
||||
PgfSymbols* syms = gu_new_seq(PgfSymbol, len, rdr->opool);
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
PgfSymbol sym = pgf_read_symbol(rdr);
|
||||
gu_return_on_exn(rdr->err, NULL);
|
||||
gu_seq_set(seq, PgfSymbol, i, sym);
|
||||
gu_seq_set(syms, PgfSymbol, i, sym);
|
||||
}
|
||||
|
||||
return seq;
|
||||
return syms;
|
||||
}
|
||||
|
||||
static PgfSequences*
|
||||
@@ -755,12 +755,14 @@ pgf_read_sequences(PgfReader* rdr)
|
||||
size_t len = pgf_read_len(rdr);
|
||||
gu_return_on_exn(rdr->err, NULL);
|
||||
|
||||
PgfSequences* seqs = gu_new_seq(PgfSequence*, len, rdr->opool);
|
||||
PgfSequences* seqs = gu_new_seq(PgfSequence, len, rdr->opool);
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
PgfSequence* seq = pgf_read_sequence(rdr);
|
||||
PgfSymbols* syms = pgf_read_symbols(rdr);
|
||||
gu_return_on_exn(rdr->err, NULL);
|
||||
gu_seq_set(seqs, PgfSequence*, i, seq);
|
||||
|
||||
gu_seq_index(seqs, PgfSequence, i)->syms = syms;
|
||||
gu_seq_index(seqs, PgfSequence, i)->idx = NULL;
|
||||
}
|
||||
|
||||
return seqs;
|
||||
@@ -793,7 +795,7 @@ pgf_read_cncfun(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, int funid)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cncfun->lins[i] = gu_seq_get(concr->sequences, PgfSequence*, seqid);
|
||||
cncfun->lins[i] = gu_seq_index(concr->sequences, PgfSequence, seqid);
|
||||
}
|
||||
|
||||
return cncfun;
|
||||
@@ -1149,8 +1151,6 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr)
|
||||
gu_new_int_map(PgfCCat*, &gu_null_struct, rdr->opool);
|
||||
concr->fun_indices = gu_map_type_new(PgfCncFunOverloadMap, rdr->opool);
|
||||
concr->coerce_idx = gu_map_type_new(PgfCncOverloadMap, rdr->opool);
|
||||
concr->epsilon_idx = gu_map_type_new(PgfProductionIdx, rdr->opool);
|
||||
concr->leftcorner_tok_idx = gu_map_type_new(PgfLeftcornerTokIdx,rdr->opool);
|
||||
pgf_read_lindefs(rdr, concr);
|
||||
pgf_read_ccats(rdr, concr);
|
||||
concr->cnccats = pgf_read_cnccats(rdr, abstr, concr);
|
||||
@@ -1200,7 +1200,7 @@ pgf_read_pgf(PgfReader* rdr) {
|
||||
|
||||
pgf_read_abstract(rdr, &pgf->abstract);
|
||||
gu_return_on_exn(rdr->err, NULL);
|
||||
|
||||
|
||||
pgf->concretes = pgf_read_concretes(rdr, &pgf->abstract);
|
||||
gu_return_on_exn(rdr->err, NULL);
|
||||
|
||||
|
||||
@@ -110,12 +110,11 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
clock_t start = clock();
|
||||
|
||||
GuIn *in = gu_string_in(line, ppool);
|
||||
PgfLexer *lexer = pgf_new_simple_lexer(in, ppool);
|
||||
GuEnum* result = pgf_parse_with_heuristics(concr, cat, lexer, heuristics, ppool, ppool);
|
||||
GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), ppool);
|
||||
GuEnum* result = pgf_parse_with_heuristics(concr, cat, line, heuristics, parse_err, ppool, ppool);
|
||||
|
||||
PgfExprProb* ep = NULL;
|
||||
if (result != NULL)
|
||||
if (gu_ok(parse_err))
|
||||
ep = gu_next(result, PgfExprProb*, ppool);
|
||||
|
||||
clock_t end = clock();
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#include <gu/map.h>
|
||||
#include <gu/enum.h>
|
||||
#include <gu/file.h>
|
||||
#include <gu/exn.h>
|
||||
#include <pgf/pgf.h>
|
||||
#include <pgf/parser.h>
|
||||
#include <pgf/literals.h>
|
||||
@@ -153,23 +154,19 @@ int main(int argc, char* argv[]) {
|
||||
// sentence, so our memory usage doesn't increase over time.
|
||||
ppool = gu_new_pool();
|
||||
|
||||
GuIn *in =
|
||||
gu_string_in(line, ppool);
|
||||
PgfLexer *lexer =
|
||||
pgf_new_simple_lexer(in, ppool);
|
||||
|
||||
clock_t start = clock();
|
||||
|
||||
GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), ppool);
|
||||
result =
|
||||
pgf_parse(from_concr, cat, lexer, ppool, ppool);
|
||||
if (result == NULL) {
|
||||
PgfToken tok =
|
||||
pgf_lexer_current_token(lexer);
|
||||
|
||||
if (*tok == 0)
|
||||
gu_puts("Couldn't begin parsing", out, err);
|
||||
else {
|
||||
pgf_parse(from_concr, cat, line, parse_err, ppool, ppool);
|
||||
if (!gu_ok(parse_err)) {
|
||||
if (gu_exn_caught(parse_err) == gu_type(PgfExn)) {
|
||||
GuString msg = gu_exn_caught_data(parse_err);
|
||||
gu_string_write(msg, out, err);
|
||||
gu_putc('\n', out, err);
|
||||
} else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) {
|
||||
gu_puts("Unexpected token: \"", out, err);
|
||||
GuString tok = gu_exn_caught_data(parse_err);
|
||||
gu_string_write(tok, out, err);
|
||||
gu_puts("\"\n", out, err);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
#include <pgf/pgf.h>
|
||||
#include <pgf/reader.h>
|
||||
#include <pgf/lexer.h>
|
||||
#include <gu/mem.h>
|
||||
#include <gu/exn.h>
|
||||
#include <gu/utf8.h>
|
||||
@@ -19,7 +18,7 @@ gu2j_string(JNIEnv *env, GuString s) {
|
||||
jchar* dst = utf16;
|
||||
while (s-utf8 < len) {
|
||||
GuUCS ucs = gu_utf8_decode((const uint8_t**) &s);
|
||||
|
||||
|
||||
if (ucs <= 0xFFFF) {
|
||||
*dst++ = ucs;
|
||||
} else {
|
||||
@@ -281,21 +280,21 @@ Java_org_grammaticalframework_pgf_Parser_parse
|
||||
|
||||
GuString startCat = j2gu_string(env, jstartCat, pool);
|
||||
GuString s = j2gu_string(env, js, pool);
|
||||
|
||||
GuIn* in = gu_string_in(s, pool);
|
||||
PgfLexer *lexer = pgf_new_simple_lexer(in, pool);
|
||||
GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), pool);
|
||||
|
||||
GuEnum* res =
|
||||
pgf_parse(get_ref(env, concr), startCat, lexer, pool, out_pool);
|
||||
pgf_parse(get_ref(env, concr), startCat, s, parse_err, pool, out_pool);
|
||||
|
||||
if (res == NULL) {
|
||||
PgfToken tok =
|
||||
pgf_lexer_current_token(lexer);
|
||||
|
||||
if (*tok == 0)
|
||||
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", "The sentence cannot be parsed");
|
||||
else
|
||||
throw_jstring_exception(env, "org/grammaticalframework/pgf/ParseError", gu2j_string(env, tok));
|
||||
if (!gu_ok(parse_err)) {
|
||||
if (gu_exn_caught(parse_err) == gu_type(PgfExn)) {
|
||||
GuString msg = (GuString) gu_exn_caught_data(parse_err);
|
||||
jstring jmsg = gu2j_string(env, msg);
|
||||
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", jmsg);
|
||||
} else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) {
|
||||
GuString tok = (GuString) gu_exn_caught_data(parse_err);
|
||||
jstring jtok = gu2j_string(env, tok);
|
||||
throw_jstring_exception(env, "org/grammaticalframework/pgf/ParseError", jtok);
|
||||
}
|
||||
|
||||
gu_pool_free(pool);
|
||||
gu_pool_free(out_pool);
|
||||
|
||||
@@ -1046,48 +1046,6 @@ Concr_printName(ConcrObject* self, PyObject *args)
|
||||
return PyString_FromString(pgf_print_name(self->concr, name));
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
PgfLexer base;
|
||||
PyObject* pylexer;
|
||||
GuPool* pool;
|
||||
} PgfPythonLexer;
|
||||
|
||||
GU_DEFINE_TYPE(PyPgfLexerExn, abstract, _);
|
||||
|
||||
static PgfToken
|
||||
pypgf_python_lexer_read_token(PgfLexer *base, GuExn* err)
|
||||
{
|
||||
PgfPythonLexer* lexer = (PgfPythonLexer*) base;
|
||||
lexer->base.tok = "";
|
||||
|
||||
PyObject* item = PyIter_Next(lexer->pylexer);
|
||||
if (item == NULL)
|
||||
if (PyErr_Occurred() != NULL)
|
||||
gu_raise(err, PyPgfLexerExn);
|
||||
else
|
||||
gu_raise(err, GuEOF);
|
||||
else {
|
||||
const char* str = PyString_AsString(item);
|
||||
if (str == NULL)
|
||||
gu_raise(err, PyPgfLexerExn);
|
||||
else
|
||||
lexer->base.tok = gu_string_copy(str, lexer->pool);
|
||||
}
|
||||
|
||||
return lexer->base.tok;
|
||||
}
|
||||
|
||||
static PgfLexer*
|
||||
pypgf_new_python_lexer(PyObject* pylexer, GuPool* pool)
|
||||
{
|
||||
PgfPythonLexer* lexer = gu_new(PgfPythonLexer, pool);
|
||||
lexer->base.read_token = pypgf_python_lexer_read_token;
|
||||
lexer->base.tok = "";
|
||||
lexer->pylexer = pylexer;
|
||||
lexer->pool = pool;
|
||||
return ((PgfLexer*) lexer);
|
||||
}
|
||||
|
||||
#if ( (PY_VERSION_HEX < 0x02070000) \
|
||||
|| ((PY_VERSION_HEX >= 0x03000000) \
|
||||
&& (PY_VERSION_HEX < 0x03010000)) )
|
||||
@@ -1114,35 +1072,19 @@ void pypgf_container_descructor(PyObject *capsule)
|
||||
static IterObject*
|
||||
Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
{
|
||||
static char *kwlist[] = {"sentence", "tokens", "cat", "n", "heuristics", NULL};
|
||||
static char *kwlist[] = {"sentence", "cat", "n", "heuristics", NULL};
|
||||
|
||||
int len;
|
||||
const uint8_t *buf = NULL;
|
||||
PyObject* py_lexer = NULL;
|
||||
const char *sentence = NULL;
|
||||
PgfCId catname = pgf_start_cat(self->grammar->pgf);
|
||||
int max_count = -1;
|
||||
double heuristics = -1;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#Osid", kwlist,
|
||||
&buf, &len, &py_lexer, &catname, &max_count, &heuristics))
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|sid", kwlist,
|
||||
&sentence, &catname, &max_count, &heuristics))
|
||||
return NULL;
|
||||
|
||||
if ((buf == NULL && py_lexer == NULL) ||
|
||||
(buf != NULL && py_lexer != NULL)) {
|
||||
PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (py_lexer != NULL) {
|
||||
// get an iterator out of the iterable object
|
||||
py_lexer = PyObject_GetIter(py_lexer);
|
||||
if (py_lexer == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
IterObject* pyres = (IterObject*)
|
||||
pgf_IterType.tp_alloc(&pgf_IterType, 0);
|
||||
if (pyres == NULL) {
|
||||
Py_XDECREF(py_lexer);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1160,30 +1102,22 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
pyres->counter = 0;
|
||||
pyres->fetch = Iter_fetch_expr;
|
||||
|
||||
PgfLexer *lexer = NULL;
|
||||
if (buf != NULL) {
|
||||
GuIn* in = gu_data_in(buf, len, pyres->pool);
|
||||
lexer = pgf_new_simple_lexer(in, pyres->pool);
|
||||
}
|
||||
if (py_lexer != NULL) {
|
||||
lexer = pypgf_new_python_lexer(py_lexer, pyres->pool);
|
||||
}
|
||||
GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), pyres->pool);
|
||||
|
||||
pyres->res =
|
||||
pgf_parse_with_heuristics(self->concr, catname, lexer,
|
||||
heuristics, pyres->pool, out_pool);
|
||||
pgf_parse_with_heuristics(self->concr, catname, sentence,
|
||||
heuristics, parse_err,
|
||||
pyres->pool, out_pool);
|
||||
|
||||
if (pyres->res == NULL) {
|
||||
PgfToken tok =
|
||||
pgf_lexer_current_token(lexer);
|
||||
|
||||
if (*tok == 0)
|
||||
PyErr_SetString(PGFError, "The sentence cannot be parsed");
|
||||
else {
|
||||
if (!gu_ok(parse_err)) {
|
||||
if (gu_exn_caught(parse_err) == gu_type(PgfExn)) {
|
||||
GuString msg = (GuString) gu_exn_caught_data(parse_err);
|
||||
PyErr_SetString(PGFError, msg);
|
||||
} else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) {
|
||||
GuString tok = (GuString) gu_exn_caught_data(parse_err);
|
||||
PyObject* py_tok = PyString_FromString(tok);
|
||||
PyObject_SetAttrString(ParseError, "token", py_tok);
|
||||
PyErr_Format(ParseError, "Unexpected token: \"%s\"",
|
||||
PyString_AsString(py_tok));
|
||||
PyErr_Format(ParseError, "Unexpected token: \"%s\"", tok);
|
||||
Py_DECREF(py_tok);
|
||||
}
|
||||
|
||||
@@ -1191,45 +1125,26 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
pyres = NULL;
|
||||
}
|
||||
|
||||
Py_XDECREF(py_lexer);
|
||||
|
||||
return pyres;
|
||||
}
|
||||
|
||||
static IterObject*
|
||||
Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
{
|
||||
static char *kwlist[] = {"sentence", "tokens", "cat",
|
||||
"prefix", "n", NULL};
|
||||
static char *kwlist[] = {"sentence", "cat", "prefix", "n", NULL};
|
||||
|
||||
int len;
|
||||
const uint8_t *buf = NULL;
|
||||
PyObject* py_lexer = NULL;
|
||||
const char *sentence = NULL;
|
||||
GuString catname = pgf_start_cat(self->grammar->pgf);
|
||||
GuString prefix = "";
|
||||
int max_count = -1;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#Ossi", kwlist,
|
||||
&buf, &len, &py_lexer, &catname,
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|ssi", kwlist,
|
||||
&sentence, &catname,
|
||||
&prefix, &max_count))
|
||||
return NULL;
|
||||
|
||||
if ((buf == NULL && py_lexer == NULL) ||
|
||||
(buf != NULL && py_lexer != NULL)) {
|
||||
PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (py_lexer != NULL) {
|
||||
// get an iterator out of the iterable object
|
||||
py_lexer = PyObject_GetIter(py_lexer);
|
||||
if (py_lexer == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
IterObject* pyres = (IterObject*)
|
||||
pgf_IterType.tp_alloc(&pgf_IterType, 0);
|
||||
if (pyres == NULL) {
|
||||
Py_XDECREF(py_lexer);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1245,37 +1160,27 @@ Concr_complete(ConcrObject* self, PyObject *args, PyObject *keywds)
|
||||
|
||||
GuPool *tmp_pool = gu_local_pool();
|
||||
|
||||
PgfLexer *lexer = NULL;
|
||||
if (buf != NULL) {
|
||||
GuIn* in = gu_data_in(buf, len, tmp_pool);
|
||||
lexer = pgf_new_simple_lexer(in, tmp_pool);
|
||||
}
|
||||
if (py_lexer != NULL) {
|
||||
lexer = pypgf_new_python_lexer(py_lexer, tmp_pool);
|
||||
}
|
||||
|
||||
GuExn* parse_err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
|
||||
|
||||
pyres->res =
|
||||
pgf_complete(self->concr, catname, lexer, prefix, pyres->pool);
|
||||
pgf_complete(self->concr, catname, sentence, prefix, parse_err, pyres->pool);
|
||||
|
||||
if (pyres->res == NULL) {
|
||||
if (!gu_ok(parse_err)) {
|
||||
Py_DECREF(pyres);
|
||||
pyres = NULL;
|
||||
|
||||
PgfToken tok =
|
||||
pgf_lexer_current_token(lexer);
|
||||
|
||||
if (*tok == 0)
|
||||
PyErr_SetString(PGFError, "The sentence cannot be parsed");
|
||||
else {
|
||||
if (gu_exn_caught(parse_err) == gu_type(PgfExn)) {
|
||||
GuString msg = (GuString) gu_exn_caught_data(parse_err);
|
||||
PyErr_SetString(PGFError, msg);
|
||||
} else if (gu_exn_caught(parse_err) == gu_type(PgfParseError)) {
|
||||
GuString tok = (GuString) gu_exn_caught_data(parse_err);
|
||||
PyObject* py_tok = PyString_FromString(tok);
|
||||
PyObject_SetAttrString(ParseError, "token", py_tok);
|
||||
PyErr_Format(ParseError, "Unexpected token: \"%s\"",
|
||||
PyString_AsString(py_tok));
|
||||
PyErr_Format(ParseError, "Unexpected token: \"%s\"", tok);
|
||||
Py_DECREF(py_tok);
|
||||
}
|
||||
}
|
||||
|
||||
Py_XDECREF(py_lexer);
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
return pyres;
|
||||
@@ -1671,56 +1576,21 @@ pypgf_collect_morpho(PgfMorphoCallback* self,
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
Concr_lookupMorpho(ConcrObject* self, PyObject *args, PyObject *keywds) {
|
||||
static char *kwlist[] = {"sentence", "tokens", NULL};
|
||||
|
||||
int len;
|
||||
const uint8_t *buf = NULL;
|
||||
PyObject* py_lexer = NULL;
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "|s#O", kwlist,
|
||||
&buf, &len, &py_lexer))
|
||||
Concr_lookupMorpho(ConcrObject* self, PyObject *args) {
|
||||
GuString sent;
|
||||
if (!PyArg_ParseTuple(args, "s", &sent))
|
||||
return NULL;
|
||||
|
||||
if ((buf == NULL && py_lexer == NULL) ||
|
||||
(buf != NULL && py_lexer != NULL)) {
|
||||
PyErr_SetString(PyExc_TypeError, "either the sentence or the tokens argument must be provided");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
GuPool* tmp_pool = gu_local_pool();
|
||||
|
||||
PgfLexer *lexer = NULL;
|
||||
if (buf != NULL) {
|
||||
GuIn* in = gu_data_in(buf, len, tmp_pool);
|
||||
lexer = pgf_new_simple_lexer(in, tmp_pool);
|
||||
}
|
||||
if (py_lexer != NULL) {
|
||||
// get an iterator out of the iterable object
|
||||
py_lexer = PyObject_GetIter(py_lexer);
|
||||
if (py_lexer == NULL) {
|
||||
gu_pool_free(tmp_pool);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
lexer = pypgf_new_python_lexer(py_lexer, tmp_pool);
|
||||
}
|
||||
|
||||
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
|
||||
GuPool *tmp_pool = gu_local_pool();
|
||||
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
|
||||
|
||||
PyObject* analyses = PyList_New(0);
|
||||
|
||||
PyMorphoCallback callback = { { pypgf_collect_morpho }, analyses };
|
||||
pgf_lookup_morpho(self->concr, lexer, &callback.fn, err);
|
||||
|
||||
Py_XDECREF(py_lexer);
|
||||
pgf_lookup_morpho(self->concr, sent, &callback.fn, err);
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
if (!gu_ok(err)) {
|
||||
Py_DECREF(analyses);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return analyses;
|
||||
}
|
||||
|
||||
@@ -1833,7 +1703,7 @@ static PyMethodDef Concr_methods[] = {
|
||||
{"graphvizParseTree", (PyCFunction)Concr_graphvizParseTree, METH_VARARGS,
|
||||
"Renders an abstract syntax tree as a parse tree in Graphviz format"
|
||||
},
|
||||
{"lookupMorpho", (PyCFunction)Concr_lookupMorpho, METH_VARARGS | METH_KEYWORDS,
|
||||
{"lookupMorpho", (PyCFunction)Concr_lookupMorpho, METH_VARARGS,
|
||||
"Looks up a word in the lexicon of the grammar"
|
||||
},
|
||||
{"fullFormLexicon", (PyCFunction)Concr_fullFormLexicon, METH_VARARGS,
|
||||
|
||||
Reference in New Issue
Block a user