1
0
forked from GitHub/gf-core

libpgf: a new implementation for literals which also allows custom literals. the same mechanism is now used for the metavariables

This commit is contained in:
kr.angelov
2012-03-12 14:25:51 +00:00
parent c1b2246fa9
commit 2bf3f22fac
10 changed files with 728 additions and 311 deletions

View File

@@ -48,6 +48,7 @@ pgfinclude_HEADERS = \
pgf/linearize.h \ pgf/linearize.h \
pgf/parser.h \ pgf/parser.h \
pgf/lexer.h \ pgf/lexer.h \
pgf/literals.h \
pgf/pgf.h pgf/pgf.h
libgu_la_SOURCES = \ libgu_la_SOURCES = \
@@ -90,6 +91,8 @@ libpgf_la_SOURCES = \
pgf/parser.h \ pgf/parser.h \
pgf/lexer.c \ pgf/lexer.c \
pgf/lexer.h \ pgf/lexer.h \
pgf/literals.c \
pgf/literals.h \
pgf/reader.c \ pgf/reader.c \
pgf/linearize.c \ pgf/linearize.c \
pgf/printer.c pgf/printer.c

View File

@@ -125,8 +125,10 @@ GU_DEFINE_TYPE(
PGF_PRODUCTION_COERCE, PgfProductionCoerce, PGF_PRODUCTION_COERCE, PgfProductionCoerce,
GU_MEMBER(PgfProductionCoerce, coerce, PgfCCatId)), GU_MEMBER(PgfProductionCoerce, coerce, PgfCCatId)),
GU_CONSTRUCTOR_S( GU_CONSTRUCTOR_S(
PGF_PRODUCTION_META, PgfProductionMeta, PGF_PRODUCTION_EXTERN, PgfProductionExtern,
GU_MEMBER(PgfProductionMeta, args, PgfPArgs))); GU_MEMBER(PgfProductionExtern, fun, PgfFunId),
GU_MEMBER(PgfProductionExtern, args, PgfPArgs),
GU_MEMBER(PgfProductionExtern, callback, PgfLiteralCallback)));
GU_DEFINE_TYPE(PgfProductions, GuList, gu_type(PgfProduction)); GU_DEFINE_TYPE(PgfProductions, GuList, gu_type(PgfProduction));
GU_DEFINE_TYPE(PgfProductionSeq, GuSeq, gu_type(PgfProduction)); GU_DEFINE_TYPE(PgfProductionSeq, GuSeq, gu_type(PgfProduction));

View File

@@ -209,6 +209,17 @@ extern GU_DECLARE_TYPE(PgfTransitions, GuStringMap);
typedef GuMap PgfEpsilonIdx; typedef GuMap PgfEpsilonIdx;
extern GU_DECLARE_TYPE(PgfEpsilonIdx, GuMap); extern GU_DECLARE_TYPE(PgfEpsilonIdx, GuMap);
typedef struct PgfLiteralCallback PgfLiteralCallback;
extern GU_DECLARE_TYPE(PgfLiteralCallback, struct);
struct PgfLiteralCallback {
bool (*match)(PgfLiteralCallback* self, int lin_idx, PgfTokens toks,
PgfExprProb** out_ep, GuPool *pool);
};
typedef GuMap PgfCallbacksMap;
extern GU_DECLARE_TYPE(PgfCallbacksMap, GuMap);
struct PgfConcr { struct PgfConcr {
PgfFlags* cflags; PgfFlags* cflags;
PgfPrintNames* printnames; PgfPrintNames* printnames;
@@ -220,6 +231,7 @@ struct PgfConcr {
PgfCncFuns* cncfuns; PgfCncFuns* cncfuns;
PgfSequences* sequences; PgfSequences* sequences;
PgfCIdMap* cnccats; PgfCIdMap* cnccats;
PgfCallbacksMap* callbacks;
int total_cats; int total_cats;
int max_fid; int max_fid;
}; };
@@ -269,7 +281,7 @@ typedef struct PgfSymbolKP
typedef enum { typedef enum {
PGF_PRODUCTION_APPLY, PGF_PRODUCTION_APPLY,
PGF_PRODUCTION_COERCE, PGF_PRODUCTION_COERCE,
PGF_PRODUCTION_META PGF_PRODUCTION_EXTERN
} PgfProductionTag; } PgfProductionTag;
typedef struct PgfPArg PgfPArg; typedef struct PgfPArg PgfPArg;
@@ -299,14 +311,10 @@ typedef struct PgfProductionCoerce
} PgfProductionCoerce; } PgfProductionCoerce;
typedef struct { typedef struct {
PgfExpr expr; // XXX PgfFunId fun;
GuLength n_toks;
GuString toks[]; // XXX
} PgfProductionConst;
typedef struct {
PgfPArgs args; PgfPArgs args;
} PgfProductionMeta; PgfLiteralCallback *callback;
} PgfProductionExtern;
extern GU_DECLARE_TYPE(PgfPatt, GuVariant); extern GU_DECLARE_TYPE(PgfPatt, GuVariant);

View File

@@ -27,6 +27,7 @@
#include <gu/string.h> #include <gu/string.h>
#include <gu/assert.h> #include <gu/assert.h>
#include <pgf/expr.h> #include <pgf/expr.h>
#include <pgf/literals.h>
typedef GuStringMap PgfLinInfer; typedef GuStringMap PgfLinInfer;
typedef GuSeq PgfProdSeq; typedef GuSeq PgfProdSeq;
@@ -338,29 +339,6 @@ finish:
return ret; return ret;
} }
PgfCCat*
pgf_literal_cat(PgfLzn* lzn, PgfLiteral lit)
{
int fid;
switch (gu_variant_tag(lit)) {
case PGF_LITERAL_STR:
fid = -1;
break;
case PGF_LITERAL_INT:
fid = -2;
break;
case PGF_LITERAL_FLT:
fid = -3;
break;
default:
gu_impossible();
return NULL;
}
return gu_map_get(lzn->concr->ccats, &fid, PgfCCat*);
}
static PgfCCat* static PgfCCat*
pgf_lzn_infer(PgfLzn* lzn, PgfExpr expr, GuPool* pool, PgfCncTree* ctree_out) pgf_lzn_infer(PgfLzn* lzn, PgfExpr expr, GuPool* pool, PgfCncTree* ctree_out)
{ {
@@ -380,7 +358,7 @@ pgf_lzn_infer(PgfLzn* lzn, PgfExpr expr, GuPool* pool, PgfCncTree* ctree_out)
PgfCncTreeLit, PgfCncTreeLit,
.lit = elit->lit); .lit = elit->lit);
} }
ret = pgf_literal_cat(lzn, elit->lit); ret = pgf_literal_cat(lzn->concr, elit->lit);
} }
default: default:
// XXX: should we do something here? // XXX: should we do something here?

View File

@@ -0,0 +1,263 @@
#include <gu/read.h>
#include <pgf/literals.h>
#include <wctype.h>
GU_DEFINE_TYPE(PgfLiteralCallback, struct);
GU_DEFINE_TYPE(PgfCallbacksMap, GuMap,
gu_type(PgfCncCat), NULL,
gu_ptr_type(PgfLiteralCallback), &gu_null_struct);
static bool
pgf_match_string_lit(PgfLiteralCallback* self, int lin_idx, PgfTokens toks,
PgfExprProb** out_ep, GuPool *pool)
{
gu_assert(lin_idx == 0);
if (gu_seq_length(toks) == 1) {
*out_ep = NULL;
return true;
} else if (gu_seq_length(toks) == 2) {
PgfExprProb* ep = gu_new(PgfExprProb, pool);
ep->prob = 0;
PgfExprLit *expr_lit =
gu_new_variant(PGF_EXPR_LIT,
PgfExprLit,
&ep->expr, pool);
PgfLiteralStr *lit_str =
gu_new_variant(PGF_LITERAL_STR,
PgfLiteralStr,
&expr_lit->lit, pool);
lit_str->val = gu_seq_get(toks, PgfToken, 0);
*out_ep = ep;
return false;
} else {
*out_ep = NULL;
return false;
}
}
static PgfLiteralCallback pgf_string_literal_callback =
{ pgf_match_string_lit } ;
static bool
pgf_match_int_lit(PgfLiteralCallback* self, int lin_idx, PgfTokens toks,
PgfExprProb** out_ep, GuPool *pool)
{
gu_assert(lin_idx == 0);
size_t n_toks = gu_seq_length(toks);
if (n_toks == 1) {
PgfToken tok = gu_seq_get(toks, PgfToken, 0);
int val;
*out_ep = NULL;
return gu_string_to_int(tok, &val);
} else if (n_toks == 2) {
PgfToken tok = gu_seq_get(toks, PgfToken, 0);
int val;
if (!gu_string_to_int(tok, &val)) {
*out_ep = NULL;
return false;
}
PgfExprProb* ep = gu_new(PgfExprProb, pool);
ep->prob = 0;
PgfExprLit *expr_lit =
gu_new_variant(PGF_EXPR_LIT,
PgfExprLit,
&ep->expr, pool);
PgfLiteralInt *lit_int =
gu_new_variant(PGF_LITERAL_INT,
PgfLiteralInt,
&expr_lit->lit, pool);
lit_int->val = val;
*out_ep = ep;
return false;
} else {
*out_ep = NULL;
return false;
}
}
static PgfLiteralCallback pgf_int_literal_callback =
{ pgf_match_int_lit } ;
static bool
pgf_match_float_lit(PgfLiteralCallback* self, int lin_idx, PgfTokens toks,
PgfExprProb** out_ep, GuPool *pool)
{
gu_assert(lin_idx == 0);
size_t n_toks = gu_seq_length(toks);
if (n_toks == 1) {
PgfToken tok = gu_seq_get(toks, PgfToken, 0);
double val;
*out_ep = NULL;
return gu_string_to_double(tok, &val);
} else if (n_toks == 2) {
PgfToken tok = gu_seq_get(toks, PgfToken, 0);
double val;
if (!gu_string_to_double(tok, &val)) {
*out_ep = NULL;
return false;
}
PgfExprProb* ep = gu_new(PgfExprProb, pool);
ep->prob = 0;
PgfExprLit *expr_lit =
gu_new_variant(PGF_EXPR_LIT,
PgfExprLit,
&ep->expr, pool);
PgfLiteralFlt *lit_flt =
gu_new_variant(PGF_LITERAL_FLT,
PgfLiteralFlt,
&expr_lit->lit, pool);
lit_flt->val = val;
*out_ep = ep;
return false;
} else {
*out_ep = NULL;
return false;
}
}
static PgfLiteralCallback pgf_float_literal_callback =
{ pgf_match_float_lit } ;
static bool
pgf_match_name_lit(PgfLiteralCallback* self, int lin_idx, PgfTokens toks,
PgfExprProb** out_ep, GuPool *pool)
{
gu_assert(lin_idx == 0);
size_t n_toks = gu_seq_length(toks);
if (n_toks == 0) {
*out_ep = NULL;
return false;
}
PgfToken tok = gu_seq_get(toks, PgfToken, n_toks-1);
GuPool* tmp_pool = gu_new_pool();
GuReader* rdr = gu_string_reader(tok, tmp_pool);
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
bool iscap = iswupper(gu_read_ucs(rdr, err));
if (!iscap && n_toks > 1) {
GuStringBuf *sbuf = gu_string_buf(tmp_pool);
GuWriter* wtr = gu_string_buf_writer(sbuf);
for (size_t i = 0; i < n_toks-1; i++) {
if (i > 0)
gu_putc(' ', wtr, err);
tok = gu_seq_get(toks, PgfToken, i);
gu_string_write(tok, wtr, err);
}
PgfExprProb* ep = gu_new(PgfExprProb, pool);
ep->prob = 0;
PgfExprApp *expr_app =
gu_new_variant(PGF_EXPR_APP,
PgfExprApp,
&ep->expr, pool);
PgfExprFun *expr_fun =
gu_new_variant(PGF_EXPR_FUN,
PgfExprFun,
&expr_app->fun, pool);
expr_fun->fun = gu_str_string("MkSymb", pool);
PgfExprLit *expr_lit =
gu_new_variant(PGF_EXPR_LIT,
PgfExprLit,
&expr_app->arg, pool);
PgfLiteralStr *lit_str =
gu_new_variant(PGF_LITERAL_STR,
PgfLiteralStr,
&expr_lit->lit, pool);
lit_str->val = gu_string_buf_freeze(sbuf, pool);
*out_ep = ep;
}
gu_pool_free(tmp_pool);
return iscap;
}
PgfLiteralCallback pgf_nerc_literal_callback =
{ pgf_match_name_lit } ;
PgfCallbacksMap*
pgf_new_callbacks_map(PgfConcr* concr, GuPool *pool)
{
int fid;
PgfCCat* ccat;
PgfCallbacksMap* callbacks =
gu_map_type_new(PgfCallbacksMap, pool);
fid = -1;
ccat = gu_map_get(concr->ccats, &fid, PgfCCat*);
if (ccat != NULL)
gu_map_put(callbacks, ccat->cnccat,
PgfLiteralCallback*, &pgf_string_literal_callback);
fid = -2;
ccat = gu_map_get(concr->ccats, &fid, PgfCCat*);
if (ccat != NULL)
gu_map_put(callbacks, ccat->cnccat,
PgfLiteralCallback*, &pgf_int_literal_callback);
fid = -3;
ccat = gu_map_get(concr->ccats, &fid, PgfCCat*);
if (ccat != NULL)
gu_map_put(callbacks, ccat->cnccat,
PgfLiteralCallback*, &pgf_float_literal_callback);
return callbacks;
}
PgfCCat*
pgf_literal_cat(PgfConcr* concr, PgfLiteral lit)
{
int fid;
switch (gu_variant_tag(lit)) {
case PGF_LITERAL_STR:
fid = -1;
break;
case PGF_LITERAL_INT:
fid = -2;
break;
case PGF_LITERAL_FLT:
fid = -3;
break;
default:
gu_impossible();
return NULL;
}
return gu_map_get(concr->ccats, &fid, PgfCCat*);
}

View File

@@ -0,0 +1,15 @@
#ifndef PGF_LITERALS_H_
#define PGF_LITERALS_H_
#include <pgf/data.h>
PgfCallbacksMap*
pgf_new_callbacks_map(PgfConcr* concr, GuPool *pool);
// literal for named entities recognition
extern PgfLiteralCallback pgf_nerc_literal_callback;
PgfCCat*
pgf_literal_cat(PgfConcr* concr, PgfLiteral lit);
#endif // PGF_LITERALS_H_

View File

@@ -94,32 +94,73 @@ GU_DEFINE_TYPE(PgfTransitions, GuStringMap,
typedef struct PgfParsing PgfParsing; typedef struct PgfParsing PgfParsing;
typedef struct {
PgfTokens tokens;
PgfExprProb ep;
} PgfLiteralCandidate;
typedef const struct PgfLexCallback PgfLexCallback; typedef const struct PgfLexCallback PgfLexCallback;
struct PgfLexCallback { struct PgfLexCallback {
void (*lex)(PgfLexCallback* self, PgfToken tok, PgfItem* item); void (*lex)(PgfLexCallback* self, PgfToken tok, PgfItem* item);
GuEnum *(*lit)(PgfLexCallback* self, PgfCCat* cat);
}; };
struct PgfParsing { struct PgfParsing {
GuPool* pool; GuPool* pool;
GuPool* tmp_pool; GuPool* tmp_pool;
PgfConcr* concr;
PgfContsMap* conts_map; PgfContsMap* conts_map;
PgfGenCatMap* generated_cats; PgfGenCatMap* generated_cats;
PgfCCatBuf* completed; PgfCCatBuf* completed;
PgfLexCallback* callback; PgfLexCallback* callback;
PgfItemBuf *lexicon_idx; PgfItemBuf *lexicon_idx;
PgfEpsilonIdx *epsilon_idx;
PgfItemBuf *metas; PgfItemBuf *metas;
PgfToken tok;
int max_fid; int max_fid;
}; };
static PgfSymbol
pgf_prev_extern_sym(PgfSymbol sym)
{
GuVariantInfo i = gu_variant_open(sym);
switch (i.tag) {
case PGF_SYMBOL_CAT:
return *((PgfSymbol*) (((PgfSymbolCat*) i.data)+1));
case PGF_SYMBOL_KP:
return *((PgfSymbol*) (((PgfSymbolKP*) i.data)+1));
case PGF_SYMBOL_KS:
return *((PgfSymbol*) (((PgfSymbolKS*) i.data)+1));
case PGF_SYMBOL_LIT:
return *((PgfSymbol*) (((PgfSymbolLit*) i.data)+1));
case PGF_SYMBOL_VAR:
return *((PgfSymbol*) (((PgfSymbolVar*) i.data)+1));
default:
gu_impossible();
return gu_null_variant;
}
}
#ifdef PGF_PARSER_DEBUG #ifdef PGF_PARSER_DEBUG
static void
pgf_print_production_args(PgfPArgs args,
GuWriter* wtr, GuExn* err)
{
size_t n_args = gu_seq_length(args);
for (size_t j = 0; j < n_args; j++) {
if (j > 0)
gu_putc(',',wtr,err);
PgfPArg arg = gu_seq_get(args, PgfPArg, j);
if (arg.hypos != NULL &&
gu_list_length(arg.hypos) > 0) {
size_t n_hypos = gu_list_length(arg.hypos);
for (size_t k = 0; k < n_hypos; k++) {
PgfCCat *hypo = gu_list_index(arg.hypos, k);
gu_printf(wtr,err,"C%d ",hypo->fid);
}
gu_printf(wtr,err,"-> ");
}
gu_printf(wtr,err,"C%d",arg.ccat->fid);
}
}
static void static void
pgf_print_production(int fid, PgfProduction prod, GuWriter *wtr, GuExn* err) pgf_print_production(int fid, PgfProduction prod, GuWriter *wtr, GuExn* err)
{ {
@@ -132,25 +173,7 @@ pgf_print_production(int fid, PgfProduction prod, GuWriter *wtr, GuExn* err)
gu_printf(wtr,err,"F%d(",papp->fun->funid); gu_printf(wtr,err,"F%d(",papp->fun->funid);
pgf_print_expr(papp->fun->ep->expr, 0, wtr, err); pgf_print_expr(papp->fun->ep->expr, 0, wtr, err);
gu_printf(wtr,err,")["); gu_printf(wtr,err,")[");
size_t n_args = gu_seq_length(papp->args); pgf_print_production_args(papp->args,wtr,err);
for (size_t j = 0; j < n_args; j++) {
if (j > 0)
gu_putc(',',wtr,err);
PgfPArg arg = gu_seq_get(papp->args, PgfPArg, j);
if (arg.hypos != NULL) {
size_t n_hypos = gu_list_length(arg.hypos);
for (size_t k = 0; k < n_hypos; k++) {
if (k > 0)
gu_putc(' ',wtr,err);
PgfCCat *hypo = gu_list_index(arg.hypos, k);
gu_printf(wtr,err,"C%d",hypo->fid);
}
}
gu_printf(wtr,err,"C%d",arg.ccat->fid);
}
gu_printf(wtr,err,"]\n"); gu_printf(wtr,err,"]\n");
break; break;
} }
@@ -159,17 +182,12 @@ pgf_print_production(int fid, PgfProduction prod, GuWriter *wtr, GuExn* err)
gu_printf(wtr,err,"_[C%d]\n",pcoerce->coerce->fid); gu_printf(wtr,err,"_[C%d]\n",pcoerce->coerce->fid);
break; break;
} }
case PGF_PRODUCTION_META: { case PGF_PRODUCTION_EXTERN: {
PgfProductionMeta* pmeta = i.data; PgfProductionExtern* pext = i.data;
gu_printf(wtr,err,"?["); gu_printf(wtr,err,"<extern>(");
size_t n_args = gu_seq_length(pmeta->args); pgf_print_expr(pext->fun->ep->expr, 0, wtr, err);
for (size_t j = 0; j < n_args; j++) { gu_printf(wtr,err,")[");
if (j > 0) pgf_print_production_args(pext->args,wtr,err);
gu_putc(',',wtr,err);
PgfCCat *arg = gu_seq_get(pmeta->args, PgfCCat*, j);
gu_printf(wtr,err,"C%d",arg->fid);
}
gu_printf(wtr,err,"]\n"); gu_printf(wtr,err,"]\n");
break; break;
} }
@@ -181,6 +199,51 @@ pgf_print_production(int fid, PgfProduction prod, GuWriter *wtr, GuExn* err)
void void
pgf_print_symbol(PgfSymbol sym, GuWriter *wtr, GuExn *err); pgf_print_symbol(PgfSymbol sym, GuWriter *wtr, GuExn *err);
static int
pgf_print_extern_seq(PgfSymbol sym, int seq_idx,
GuWriter* wtr, GuExn* err)
{
if (gu_variant_is_null(sym))
return 0;
PgfSymbol prev = pgf_prev_extern_sym(sym);
int index = pgf_print_extern_seq(prev, seq_idx, wtr, err);
if (index == seq_idx)
gu_printf(wtr, err, " . ");
pgf_print_symbol(sym, wtr, err);
return index+1;
}
static void
pgf_print_item_seq(PgfCncFun *fun, PgfItem *item,
GuWriter* wtr, GuExn* err)
{
size_t index;
PgfSequence seq;
gu_printf(wtr, err, "%d : ",item->base->lin_idx);
if (fun != NULL &&
!gu_seq_is_null(seq = fun->lins[item->base->lin_idx])) {
for (index = 0; index < gu_seq_length(seq); index++) {
if (item->seq_idx == index)
gu_printf(wtr, err, " . ");
PgfSymbol *sym = gu_seq_index(seq, PgfSymbol, index);
pgf_print_symbol(*sym, wtr, err);
}
} else {
index = pgf_print_extern_seq(item->curr_sym, item->seq_idx,
wtr, err);
}
if (item->seq_idx == index)
gu_printf(wtr, err, " .");
}
static void static void
pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err) pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err)
{ {
@@ -194,23 +257,9 @@ pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err)
gu_printf(wtr, err, "F%d(", fun->funid); gu_printf(wtr, err, "F%d(", fun->funid);
pgf_print_expr(fun->ep->expr, 0, wtr, err); pgf_print_expr(fun->ep->expr, 0, wtr, err);
gu_printf(wtr, err, ")["); gu_printf(wtr, err, ")[");
for (size_t i = 0; i < gu_seq_length(item->args); i++) { pgf_print_production_args(item->args, wtr, err);
PgfPArg arg = gu_seq_get(item->args, PgfPArg, i); gu_printf(wtr, err, "]; ");
gu_printf(wtr, err, pgf_print_item_seq(fun, item, wtr, err);
((i < gu_seq_length(item->args)-1) ? "C%d," : "C%d"),
arg.ccat->fid);
}
gu_printf(wtr, err, "]; %d : ",item->base->lin_idx);
PgfSequence seq = fun->lins[item->base->lin_idx];
for (size_t i = 0; i < gu_seq_length(seq); i++) {
if (i == item->seq_idx)
gu_printf(wtr, err, " . ");
PgfSymbol *sym = gu_seq_index(seq, PgfSymbol, i);
pgf_print_symbol(*sym, wtr, err);
}
if (item->seq_idx == gu_seq_length(seq))
gu_printf(wtr, err, " .");
break; break;
} }
case PGF_PRODUCTION_COERCE: { case PGF_PRODUCTION_COERCE: {
@@ -224,15 +273,18 @@ pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err)
gu_printf(wtr, err, " ."); gu_printf(wtr, err, " .");
break; break;
} }
case PGF_PRODUCTION_META: { case PGF_PRODUCTION_EXTERN: {
gu_printf(wtr, err, "?["); PgfProductionExtern* pext = i.data;
for (size_t i = 0; i < gu_seq_length(item->args); i++) { gu_printf(wtr, err, "<extern>");
PgfPArg arg = gu_seq_get(item->args, PgfPArg, i); if (pext->fun != NULL) {
gu_printf(wtr, err, gu_printf(wtr, err, "(");
((i < gu_seq_length(item->args)-1) ? "C%d," : "C%d"), pgf_print_expr(pext->fun->ep->expr, 0, wtr, err);
arg.ccat->fid); gu_printf(wtr, err, ")");
} }
gu_printf(wtr, err, "]; %d : %d",item->base->lin_idx, item->seq_idx); gu_printf(wtr, err, "[");
pgf_print_production_args(item->args, wtr, err);
gu_printf(wtr, err, "]; ");
pgf_print_item_seq(pext->fun, item, wtr, err);
break; break;
} }
default: default:
@@ -311,42 +363,41 @@ pgf_parsing_get_completed(PgfParsing* parsing, PgfItemBuf* conts)
return gu_map_get(parsing->generated_cats, conts, PgfCCat*); return gu_map_get(parsing->generated_cats, conts, PgfCCat*);
} }
static PgfSymbol static void
pgf_item_base_symbol(PgfItemBase* ibase, size_t seq_idx, GuPool* pool) pgf_item_set_curr_symbol(PgfItem* item, GuPool* pool)
{ {
GuVariantInfo i = gu_variant_open(ibase->prod); GuVariantInfo i = gu_variant_open(item->base->prod);
switch (i.tag) { switch (i.tag) {
case PGF_PRODUCTION_APPLY: { case PGF_PRODUCTION_APPLY: {
PgfProductionApply* papp = i.data; PgfProductionApply* papp = i.data;
PgfCncFun* fun = papp->fun; PgfCncFun* fun = papp->fun;
gu_assert(ibase->lin_idx < fun->n_lins); gu_assert(item->base->lin_idx < fun->n_lins);
PgfSequence seq = fun->lins[ibase->lin_idx]; PgfSequence seq = fun->lins[item->base->lin_idx];
gu_assert(seq_idx <= gu_seq_length(seq)); gu_assert(item->seq_idx <= gu_seq_length(seq));
if (seq_idx == gu_seq_length(seq)) { if (item->seq_idx == gu_seq_length(seq)) {
return gu_null_variant; item->curr_sym = gu_null_variant;
} else { } else {
return gu_seq_get(seq, PgfSymbol, seq_idx); item->curr_sym = gu_seq_get(seq, PgfSymbol, item->seq_idx);
} }
break; break;
} }
case PGF_PRODUCTION_COERCE: { case PGF_PRODUCTION_COERCE: {
gu_assert(seq_idx <= 1); gu_assert(item->seq_idx <= 1);
if (seq_idx == 1) { if (item->seq_idx == 1) {
return gu_null_variant; item->curr_sym = gu_null_variant;
} else { } else {
return gu_new_variant_i(pool, PGF_SYMBOL_CAT, item->curr_sym = gu_new_variant_i(pool, PGF_SYMBOL_CAT,
PgfSymbolCat, PgfSymbolCat,
.d = 0, .r = ibase->lin_idx); .d = 0, .r = item->base->lin_idx);
} }
break; break;
} }
case PGF_PRODUCTION_META: { case PGF_PRODUCTION_EXTERN: {
return gu_null_variant; break;
} }
default: default:
gu_impossible(); gu_impossible();
} }
return gu_null_variant;
} }
static PgfItem* static PgfItem*
@@ -375,19 +426,21 @@ pgf_new_item(PgfCCat* ccat, size_t lin_idx,
parg->ccat = pcoerce->coerce; parg->ccat = pcoerce->coerce;
break; break;
} }
case PGF_PRODUCTION_META: { case PGF_PRODUCTION_EXTERN: {
PgfProductionMeta* pmeta = pi.data; PgfProductionExtern* pext = pi.data;
item->args = pmeta->args; item->args = pext->args;
break; break;
} }
default: default:
gu_impossible(); gu_impossible();
} }
item->base = base; item->base = base;
item->curr_sym = pgf_item_base_symbol(item->base, 0, pool); item->curr_sym = gu_null_variant;
item->seq_idx = 0; item->seq_idx = 0;
item->tok_idx = 0; item->tok_idx = 0;
item->alt = 0; item->alt = 0;
pgf_item_set_curr_symbol(item, pool);
return item; return item;
} }
@@ -425,14 +478,15 @@ static void
pgf_item_advance(PgfItem* item, GuPool* pool) pgf_item_advance(PgfItem* item, GuPool* pool)
{ {
item->seq_idx++; item->seq_idx++;
item->curr_sym = pgf_item_base_symbol(item->base, item->seq_idx, pool); pgf_item_set_curr_symbol(item, pool);
} }
static void static void
pgf_parsing_item(PgfParsing* parsing, PgfItem* item); pgf_parsing_item(PgfParsing* parsing, PgfItem* item);
static void static void
pgf_parsing_combine(PgfParsing* parsing, PgfItem* cont, PgfCCat* cat) pgf_parsing_combine(PgfParsing* parsing, PgfItem* cont,
PgfCCat* cat, int lin_idx)
{ {
if (cont == NULL) { if (cont == NULL) {
gu_buf_push(parsing->completed, PgfCCat*, cat); gu_buf_push(parsing->completed, PgfCCat*, cat);
@@ -464,6 +518,16 @@ pgf_parsing_combine(PgfParsing* parsing, PgfItem* cont, PgfCCat* cat)
nargs * sizeof(PgfPArg)); nargs * sizeof(PgfPArg));
gu_seq_set(item->args, PgfPArg, nargs, gu_seq_set(item->args, PgfPArg, nargs,
((PgfPArg) { .hypos = NULL, .ccat = cat })); ((PgfPArg) { .hypos = NULL, .ccat = cat }));
PgfSymbol prev = item->curr_sym;
PgfSymbolCat* scat = (PgfSymbolCat*)
gu_alloc_variant(PGF_SYMBOL_CAT,
sizeof(PgfSymbolCat)+sizeof(PgfSymbol),
gu_alignof(PgfSymbolCat),
&item->curr_sym, parsing->pool);
*((PgfSymbol*)(scat+1)) = prev;
scat->d = nargs;
scat->r = lin_idx;
} }
pgf_item_advance(item, parsing->pool); pgf_item_advance(item, parsing->pool);
@@ -480,7 +544,7 @@ pgf_parsing_production(PgfParsing* parsing, PgfCCat* ccat, size_t lin_idx,
} }
static PgfProduction static PgfProduction
pgf_parsing_new_production(PgfItem* item, GuPool *pool) pgf_parsing_new_production(PgfItem* item, PgfExprProb *ep, GuPool *pool)
{ {
GuVariantInfo i = gu_variant_open(item->base->prod); GuVariantInfo i = gu_variant_open(item->base->prod);
PgfProduction prod = gu_null_variant; PgfProduction prod = gu_null_variant;
@@ -505,12 +569,53 @@ pgf_parsing_new_production(PgfItem* item, GuPool *pool)
new_pcoerce->coerce = parg->ccat; new_pcoerce->coerce = parg->ccat;
break; break;
} }
case PGF_PRODUCTION_META: { case PGF_PRODUCTION_EXTERN: {
PgfProductionMeta* new_pmeta = PgfProductionExtern* pext = i.data;
gu_new_variant(PGF_PRODUCTION_META, PgfCncFun* fun = pext->fun;
PgfProductionMeta,
&prod, pool); if (fun == NULL ||
new_pmeta->args = item->args; gu_seq_is_null(fun->lins[item->base->lin_idx])) {
int seq_len = 0;
PgfSymbol sym = item->curr_sym;
while (!gu_variant_is_null(sym)) {
seq_len++;
sym = pgf_prev_extern_sym(sym);
}
PgfSequence seq =
gu_new_seq(PgfSymbol, seq_len, pool);
sym = item->curr_sym;
while (!gu_variant_is_null(sym)) {
gu_seq_set(seq, PgfSymbol, --seq_len, sym);
sym = pgf_prev_extern_sym(sym);
}
PgfCncCat *cnccat = item->base->ccat->cnccat;
size_t size = GU_FLEX_SIZE(PgfCncFun, lins, cnccat->n_lins);
fun = gu_malloc(pool, size);
if (pext->fun == NULL) {
fun->name = gu_empty_string;
fun->ep = ep;
fun->funid = -1;
fun->n_lins = cnccat->n_lins;
for (size_t i = 0; i < fun->n_lins; i++) {
fun->lins[i] = gu_null_seq;
}
} else {
memcpy(fun, pext->fun, size);
}
fun->lins[item->base->lin_idx] = seq;
}
PgfProductionExtern* new_pext =
gu_new_variant(PGF_PRODUCTION_EXTERN,
PgfProductionExtern,
&prod, pool);
new_pext->fun = fun;
new_pext->args = item->args;
new_pext->callback = pext->callback;
break; break;
} }
default: default:
@@ -521,10 +626,10 @@ pgf_parsing_new_production(PgfItem* item, GuPool *pool)
} }
static void static void
pgf_parsing_complete(PgfParsing* parsing, PgfItem* item) pgf_parsing_complete(PgfParsing* parsing, PgfItem* item, PgfExprProb *ep)
{ {
PgfProduction prod = PgfProduction prod =
pgf_parsing_new_production(item, parsing->pool); pgf_parsing_new_production(item, ep, parsing->pool);
PgfItemBuf* conts = item->base->conts; PgfItemBuf* conts = item->base->conts;
PgfCCat* tmp_cat = pgf_parsing_get_completed(parsing, conts); PgfCCat* tmp_cat = pgf_parsing_get_completed(parsing, conts);
@@ -575,7 +680,7 @@ pgf_parsing_complete(PgfParsing* parsing, PgfItem* item)
size_t n_conts = gu_buf_length(conts); size_t n_conts = gu_buf_length(conts);
for (size_t i = 0; i < n_conts; i++) { for (size_t i = 0; i < n_conts; i++) {
PgfItem* cont = gu_buf_get(conts, PgfItem*, i); PgfItem* cont = gu_buf_get(conts, PgfItem*, i);
pgf_parsing_combine(parsing, cont, cat); pgf_parsing_combine(parsing, cont, cat, item->base->lin_idx);
} }
} }
} }
@@ -624,8 +729,8 @@ pgf_parsing_td_predict(PgfParsing* parsing, PgfItem* item,
// Bottom-up prediction for epsilon rules // Bottom-up prediction for epsilon rules
PgfCFCat cfc = {ccat->fid, lin_idx}; PgfCFCat cfc = {ccat->fid, lin_idx};
PgfCCat* eps_ccat = gu_map_get(parsing->epsilon_idx, &cfc, PgfCCat*); PgfCCat* eps_ccat = gu_map_get(parsing->concr->epsilon_idx,
&cfc, PgfCCat*);
if (eps_ccat != NULL) { if (eps_ccat != NULL) {
size_t n_prods = gu_seq_length(eps_ccat->prods); size_t n_prods = gu_seq_length(eps_ccat->prods);
for (size_t i = 0; i < n_prods; i++) { for (size_t i = 0; i < n_prods; i++) {
@@ -650,7 +755,7 @@ pgf_parsing_td_predict(PgfParsing* parsing, PgfItem* item,
PgfCCat* completed = PgfCCat* completed =
pgf_parsing_get_completed(parsing, conts); pgf_parsing_get_completed(parsing, conts);
if (completed) { if (completed) {
pgf_parsing_combine(parsing, item, completed); pgf_parsing_combine(parsing, item, completed, lin_idx);
} }
} }
gu_exit(NULL); gu_exit(NULL);
@@ -780,61 +885,44 @@ pgf_parsing_symbol(PgfParsing* parsing, PgfItem* item, PgfSymbol sym) {
break; break;
} }
case PGF_SYMBOL_LIT: { case PGF_SYMBOL_LIT: {
PgfSymbolLit* slit = gu_variant_data(sym); if (!gu_string_eq(parsing->tok, gu_empty_string)) {
PgfPArg* parg = gu_seq_index(item->args, PgfPArg, slit->d); PgfSymbolLit* slit = gu_variant_data(sym);
PgfPArg* parg = gu_seq_index(item->args, PgfPArg, slit->d);
gu_assert(!parg->hypos || !parg->hypos->len);
PgfCncCat *cnccat = parg->ccat->cnccat; if (parg->ccat->fid > 0 &&
parg->ccat->fid >= parsing->concr->total_cats)
pgf_parsing_td_predict(parsing, item, parg->ccat, slit->r);
else {
PgfItemBuf* conts =
pgf_parsing_get_conts(parsing->conts_map,
parg->ccat, slit->r,
parsing->pool, parsing->tmp_pool);
gu_buf_push(conts, PgfItem*, item);
// the linearization category must be {s : Str} if (gu_buf_length(conts) == 1) {
gu_assert(cnccat->n_lins == 1); /* This is the first time when we encounter this
gu_assert(gu_list_length(cnccat->cats) == 1); * literal category so we must call the callback */
PgfItemBuf* conts = PgfLiteralCallback* callback =
pgf_parsing_get_conts(parsing->conts_map, gu_map_get(parsing->concr->callbacks,
parg->ccat, slit->r, parg->ccat->cnccat,
parsing->pool, parsing->tmp_pool); PgfLiteralCallback*);
gu_buf_push(conts, PgfItem*, item);
if (gu_buf_length(conts) == 1) {
/* This is the first time when we encounter this
* literal category so we must call the callback */
GuEnum* en = parsing->callback->lit(parsing->callback, parg->ccat);
for (;;) {
PgfLiteralCandidate* candidate =
gu_next(en, PgfLiteralCandidate*, parsing->pool);
if (candidate == NULL)
break;
PgfSymbol sym = gu_null_variant; if (callback != NULL) {
PgfSymbolKS* sks = PgfProduction prod;
gu_new_variant(PGF_SYMBOL_KS, PgfProductionExtern* pext =
PgfSymbolKS, gu_new_variant(PGF_PRODUCTION_EXTERN,
&sym, parsing->pool); PgfProductionExtern,
sks->tokens = candidate->tokens; &prod, parsing->pool);
pext->fun = NULL;
pext->args = gu_new_seq(PgfPArg, 0, parsing->pool);
pext->callback = callback;
PgfSequence seq = gu_new_seq(PgfSymbol, 1, parsing->pool); pgf_parsing_production(parsing, parg->ccat, slit->r,
gu_seq_set(seq, PgfSymbol, 0, sym); prod, conts);
}
PgfCncFun* fun = }
gu_malloc(parsing->pool,
sizeof(PgfCncFun)+
sizeof(PgfSequence*)*cnccat->n_lins);
fun->name = gu_empty_string;
fun->ep = &candidate->ep;
fun->funid = -1;
fun->n_lins = cnccat->n_lins;
fun->lins[0] = seq;
PgfProduction prod;
PgfProductionApply* papp =
gu_new_variant(PGF_PRODUCTION_APPLY,
PgfProductionApply,
&prod, parsing->pool);
papp->fun = fun;
papp->args = gu_new_seq(PgfPArg, 0, parsing->pool);
pgf_parsing_production(parsing, parg->ccat, slit->r,
prod, conts);
} }
} }
break; break;
@@ -847,6 +935,55 @@ pgf_parsing_symbol(PgfParsing* parsing, PgfItem* item, PgfSymbol sym) {
} }
} }
static void
pgf_foo(PgfParsing* parsing, PgfItem* item,
PgfLiteralCallback* callback,
PgfExprProb** out_ep,
bool* out_accepted)
{
PgfTokens toks;
if (gu_variant_is_null(item->curr_sym)) {
toks = gu_new_seq(PgfToken, 1, parsing->pool);
gu_seq_set(toks, PgfToken, 0, parsing->tok);
} else {
GuVariantInfo i = gu_variant_open(item->curr_sym);
gu_assert(i.tag == PGF_SYMBOL_KS);
PgfTokens old_toks = ((PgfSymbolKS*) i.data)->tokens;
size_t n_toks = gu_seq_length(old_toks);
toks = gu_new_seq(PgfToken, n_toks+1, parsing->pool);
for (size_t i = 0; i < n_toks; i++) {
gu_seq_set(toks, PgfToken, i,
gu_seq_get(old_toks, PgfToken, i));
}
gu_seq_set(toks, PgfToken, n_toks, parsing->tok);
}
PgfExprProb *ep = NULL;
bool accepted =
callback->match(callback,
item->base->lin_idx, toks, &ep,
parsing->pool);
if (accepted) {
if (gu_variant_is_null(item->curr_sym))
item->seq_idx = 1;
PgfSymbolKS* sks = (PgfSymbolKS*)
gu_alloc_variant(PGF_SYMBOL_KS,
sizeof(PgfSymbolKS)+sizeof(PgfSymbol),
gu_alignof(PgfSymbolKS),
&item->curr_sym, parsing->pool);
*((PgfSymbol*)(sks+1)) = gu_null_variant;
sks->tokens = toks;
pgf_parsing_add_transition(parsing, parsing->tok, item);
}
*out_ep = ep;
*out_accepted = accepted;
}
static void static void
pgf_parsing_item(PgfParsing* parsing, PgfItem* item) pgf_parsing_item(PgfParsing* parsing, PgfItem* item)
{ {
@@ -866,7 +1003,7 @@ pgf_parsing_item(PgfParsing* parsing, PgfItem* item)
PgfCncFun* fun = papp->fun; PgfCncFun* fun = papp->fun;
PgfSequence seq = fun->lins[item->base->lin_idx]; PgfSequence seq = fun->lins[item->base->lin_idx];
if (item->seq_idx == gu_seq_length(seq)) { if (item->seq_idx == gu_seq_length(seq)) {
pgf_parsing_complete(parsing, item); pgf_parsing_complete(parsing, item, NULL);
} else { } else {
PgfSymbol sym = PgfSymbol sym =
gu_seq_get(seq, PgfSymbol, item->seq_idx); gu_seq_get(seq, PgfSymbol, item->seq_idx);
@@ -883,16 +1020,74 @@ pgf_parsing_item(PgfParsing* parsing, PgfItem* item)
item->base->lin_idx); item->base->lin_idx);
break; break;
case 1: case 1:
pgf_parsing_complete(parsing, item); pgf_parsing_complete(parsing, item, NULL);
break; break;
default: default:
gu_impossible(); gu_impossible();
} }
break; break;
} }
case PGF_PRODUCTION_META: { case PGF_PRODUCTION_EXTERN: {
pgf_parsing_complete(parsing, item); PgfProductionExtern* pext = i.data;
gu_buf_push(parsing->metas, PgfItem*, item); PgfCncFun* fun = pext->fun;
PgfSequence seq;
if (fun != NULL &&
!gu_seq_is_null(seq = fun->lins[item->base->lin_idx])) {
if (item->seq_idx == gu_seq_length(seq)) {
pgf_parsing_complete(parsing, item, NULL);
} else {
PgfSymbol sym =
gu_seq_get(seq, PgfSymbol, item->seq_idx);
pgf_parsing_symbol(parsing, item, sym);
}
} else {
PgfSymbol prev = gu_null_variant;
PgfTokens toks;
if (gu_variant_is_null(item->curr_sym) ||
gu_variant_tag(item->curr_sym) != PGF_SYMBOL_KS) {
toks = gu_new_seq(PgfToken, 1, parsing->pool);
gu_seq_set(toks, PgfToken, 0, parsing->tok);
prev = item->curr_sym;
} else {
PgfTokens old_toks =
((PgfSymbolKS*) gu_variant_data(item->curr_sym))->tokens;
prev = pgf_prev_extern_sym(item->curr_sym);
size_t n_toks = gu_seq_length(old_toks);
toks = gu_new_seq(PgfToken, n_toks+1, parsing->pool);
for (size_t i = 0; i < n_toks; i++) {
gu_seq_set(toks, PgfToken, i,
gu_seq_get(old_toks, PgfToken, i));
}
gu_seq_set(toks, PgfToken, n_toks, parsing->tok);
}
PgfExprProb *ep = NULL;
bool accepted =
pext->callback->match(pext->callback,
item->base->lin_idx, toks, &ep,
parsing->pool);
if (ep != NULL)
pgf_parsing_complete(parsing, item, ep);
if (accepted) {
if (gu_variant_is_null(item->curr_sym))
item->seq_idx = 1;
PgfSymbolKS* sks = (PgfSymbolKS*)
gu_alloc_variant(PGF_SYMBOL_KS,
sizeof(PgfSymbolKS)+sizeof(PgfSymbol),
gu_alignof(PgfSymbolKS),
&item->curr_sym, parsing->pool);
*((PgfSymbol*)(sks+1)) = prev;
sks->tokens = toks;
pgf_parsing_add_transition(parsing, parsing->tok, item);
}
}
break; break;
} }
default: default:
@@ -905,15 +1100,16 @@ pgf_new_parsing(PgfConcr* concr, PgfLexCallback* callback, int max_fid,
GuPool* parse_pool, GuPool* out_pool) GuPool* parse_pool, GuPool* out_pool)
{ {
PgfParsing* parsing = gu_new(PgfParsing, out_pool); PgfParsing* parsing = gu_new(PgfParsing, out_pool);
parsing->concr = concr;
parsing->generated_cats = gu_map_type_new(PgfGenCatMap, out_pool); parsing->generated_cats = gu_map_type_new(PgfGenCatMap, out_pool);
parsing->conts_map = gu_map_type_new(PgfContsMap, out_pool); parsing->conts_map = gu_map_type_new(PgfContsMap, out_pool);
parsing->completed = gu_new_buf(PgfCCat*, parse_pool); parsing->completed = gu_new_buf(PgfCCat*, parse_pool);
parsing->callback = callback; parsing->callback = callback;
parsing->lexicon_idx = NULL; parsing->lexicon_idx = NULL;
parsing->epsilon_idx = concr->epsilon_idx;
parsing->pool = parse_pool; parsing->pool = parse_pool;
parsing->tmp_pool = out_pool; parsing->tmp_pool = out_pool;
parsing->metas = gu_new_buf(PgfItem*, out_pool); parsing->metas = gu_new_buf(PgfItem*, out_pool);
parsing->tok = gu_empty_string;
parsing->max_fid = max_fid; parsing->max_fid = max_fid;
return parsing; return parsing;
} }
@@ -928,29 +1124,10 @@ pgf_new_parse(PgfConcr* concr, int max_fid, GuPool* pool)
return parse; return parse;
} }
static void
pgf_lex_noop(PgfLexCallback* self, PgfToken tok, PgfItem* item)
{
}
static void
pgf_enum_null(GuEnum* self, void* to, GuPool* pool)
{
*((PgfLiteralCandidate**) to) = NULL;
}
static GuEnum*
pgf_lit_noop(PgfLexCallback* self, PgfCCat* ccat)
{
static GuEnum en = { pgf_enum_null };
return &en;
}
typedef struct { typedef struct {
PgfLexCallback fn; PgfLexCallback fn;
PgfToken tok; PgfToken tok;
PgfItemBuf* agenda; PgfItemBuf* agenda;
GuPool *pool;
} PgfParseTokenCallback; } PgfParseTokenCallback;
static void static void
@@ -963,79 +1140,6 @@ pgf_match_token(PgfLexCallback* self, PgfToken tok, PgfItem* item)
} }
} }
typedef struct {
GuEnum en;
PgfLiteralCandidate candidate;
size_t idx;
} PgfLitEnum;
static void
pgf_enum_lits(GuEnum* self, void* to, GuPool* pool)
{
PgfLitEnum* en = (PgfLitEnum*) self;
*((PgfLiteralCandidate**) to) =
(en->idx++ > 0) ? NULL : &en->candidate;
}
static GuEnum*
pgf_match_lit(PgfLexCallback* self, PgfCCat* ccat)
{
PgfParseTokenCallback *clo = (PgfParseTokenCallback *) self;
PgfLiteral lit;
switch (ccat->fid) {
case -1: {
PgfLiteralStr *lit_str =
gu_new_variant(PGF_LITERAL_STR,
PgfLiteralStr,
&lit, clo->pool);
lit_str->val = clo->tok;
break;
}
case -2: {
PgfLiteralInt *lit_int =
gu_new_variant(PGF_LITERAL_INT,
PgfLiteralInt,
&lit, clo->pool);
if (!gu_string_to_int(clo->tok, &lit_int->val))
return pgf_lit_noop(self, ccat);
break;
}
case -3: {
PgfLiteralFlt *lit_flt =
gu_new_variant(PGF_LITERAL_FLT,
PgfLiteralFlt,
&lit, clo->pool);
if (!gu_string_to_double(clo->tok, &lit_flt->val))
return pgf_lit_noop(self, ccat);
break;
}
default:
gu_impossible();
}
PgfTokens tokens = gu_new_seq(PgfToken, 1, clo->pool);
gu_seq_set(tokens, PgfToken, 0, clo->tok);
PgfExpr expr = gu_null_variant;
PgfExprLit *expr_lit =
gu_new_variant(PGF_EXPR_LIT,
PgfExprLit,
&expr, clo->pool);
expr_lit->lit = lit;
PgfLitEnum* en = gu_new(PgfLitEnum, clo->pool);
en->en.next = pgf_enum_lits;
en->candidate.tokens = tokens;
en->candidate.ep.prob = INFINITY;
en->candidate.ep.expr = expr;
en->idx = 0;
return &en->en;
}
typedef struct { typedef struct {
GuMapItor fn; GuMapItor fn;
PgfProduction prod; PgfProduction prod;
@@ -1062,7 +1166,7 @@ pgf_parsing_get_metas(GuMapItor* fn, const void* key, void* value,
PgfItem *item = PgfItem *item =
pgf_new_item(ccat, lin_idx, prod, conts, pool); pgf_new_item(ccat, lin_idx, prod, conts, pool);
gu_buf_push(metas, PgfItem*, item); gu_buf_push(metas, PgfItem*, item);
#ifdef PGF_PARSER_DEBUG #ifdef PGF_PARSER_DEBUG
GuPool* tmp_pool = gu_new_pool(); GuPool* tmp_pool = gu_new_pool();
GuOut* out = gu_file_out(stderr, tmp_pool); GuOut* out = gu_file_out(stderr, tmp_pool);
@@ -1075,16 +1179,35 @@ pgf_parsing_get_metas(GuMapItor* fn, const void* key, void* value,
} }
} }
static bool
pgf_match_meta(PgfLiteralCallback* self, int lin_idx, PgfTokens toks,
PgfExprProb** out_ep, GuPool *pool)
{
PgfExprProb *ep = gu_new(PgfExprProb, pool);
ep->prob = 100000000000 + rand();
PgfExprMeta *expr_meta =
gu_new_variant(PGF_EXPR_META,
PgfExprMeta,
&ep->expr, pool);
expr_meta->id = 0;
*out_ep = ep;
return true;
}
static PgfLiteralCallback pgf_meta_callback =
{ pgf_match_meta } ;
PgfParse* PgfParse*
pgf_parse_token(PgfParse* parse, PgfToken tok, bool robust, GuPool* pool) pgf_parse_token(PgfParse* parse, PgfToken tok, bool robust, GuPool* pool)
{ {
PgfItemBuf* agenda = gu_new_buf(PgfItem*, pool); PgfItemBuf* agenda = gu_new_buf(PgfItem*, pool);
PgfParseTokenCallback clo1 = {{ pgf_match_token, pgf_match_lit }, PgfParseTokenCallback clo1 = {{ pgf_match_token }, tok, agenda };
tok, agenda, pool};
GuPool* tmp_pool = gu_new_pool(); GuPool* tmp_pool = gu_new_pool();
PgfParsing* parsing = pgf_new_parsing(parse->concr, &clo1.fn, parse->max_fid, pool, tmp_pool); PgfParsing* parsing = pgf_new_parsing(parse->concr, &clo1.fn, parse->max_fid, pool, tmp_pool);
parsing->tok = tok;
parsing->lexicon_idx = gu_map_get(parse->concr->lexicon_idx, &tok, GuBuf*); parsing->lexicon_idx = gu_map_get(parse->concr->lexicon_idx, &tok, GuBuf*);
size_t n_items = gu_buf_length(parse->agenda); size_t n_items = gu_buf_length(parse->agenda);
@@ -1095,11 +1218,13 @@ pgf_parse_token(PgfParse* parse, PgfToken tok, bool robust, GuPool* pool)
if (robust) { if (robust) {
PgfProduction prod; PgfProduction prod;
PgfProductionMeta* pmeta = PgfProductionExtern* pext =
gu_new_variant(PGF_PRODUCTION_META, gu_new_variant(PGF_PRODUCTION_EXTERN,
PgfProductionMeta, PgfProductionExtern,
&prod, parsing->pool); &prod, parsing->pool);
pmeta->args = gu_new_seq(PgfPArg, 0, parsing->pool); pext->fun = NULL;
pext->args = gu_new_seq(PgfPArg, 0, parsing->pool);
pext->callback = &pgf_meta_callback;
PgfGetMetaFn clo2 = { { pgf_parsing_get_metas }, prod, parsing->metas, pool }; PgfGetMetaFn clo2 = { { pgf_parsing_get_metas }, prod, parsing->metas, pool };
gu_map_iter(parsing->conts_map, &clo2.fn, NULL); gu_map_iter(parsing->conts_map, &clo2.fn, NULL);
@@ -1108,19 +1233,22 @@ pgf_parse_token(PgfParse* parse, PgfToken tok, bool robust, GuPool* pool)
size_t n_items = gu_buf_length(parsing->lexicon_idx); size_t n_items = gu_buf_length(parsing->lexicon_idx);
for (size_t i = 0; i < n_items; i++) { for (size_t i = 0; i < n_items; i++) {
PgfItem* item = gu_buf_get(parsing->lexicon_idx, PgfItem*, i); PgfItem* item = gu_buf_get(parsing->lexicon_idx, PgfItem*, i);
if (!pgf_parsing_has_conts(parsing->conts_map, if (!pgf_parsing_has_conts(parsing->conts_map,
item->base->ccat, item->base->lin_idx)) { item->base->ccat, item->base->lin_idx)) {
pgf_parsing_bu_predict(parsing, item, agenda); pgf_parsing_bu_predict(parsing, item, agenda);
} }
} }
} else { } else {
// We have unknown word
size_t n_items = gu_buf_length(parsing->metas); size_t n_items = gu_buf_length(parsing->metas);
for (size_t i = 0; i < n_items; i++) { for (size_t i = 0; i < n_items; i++) {
PgfItem* item = gu_buf_get(parsing->metas, PgfItem*, i); PgfItem* item = gu_buf_get(parsing->metas, PgfItem*, i);
pgf_item_advance(item, parsing->pool); PgfExprProb *ep;
pgf_parsing_add_transition(parsing, tok, item); bool accepted;
pgf_foo(parsing, item, pext->callback, &ep, &accepted);
} }
} }
} }
@@ -1168,14 +1296,12 @@ pgf_production_to_expr(PgfConcr* concr, PgfProduction prod,
PgfProductionCoerce* pcoerce = pi.data; PgfProductionCoerce* pcoerce = pi.data;
return pgf_cat_to_expr(concr, pcoerce->coerce, visited, choice, pool); return pgf_cat_to_expr(concr, pcoerce->coerce, visited, choice, pool);
} }
case PGF_PRODUCTION_META: { case PGF_PRODUCTION_EXTERN: {
PgfProductionMeta* pmeta = pi.data; PgfProductionExtern* pext = pi.data;
PgfExpr expr = gu_new_variant_i(pool, PGF_EXPR_META, PgfExpr expr = pext->fun->ep->expr;
PgfExprMeta, size_t n_args = gu_seq_length(pext->args);
.id = 0);
size_t n_args = gu_seq_length(pmeta->args);
for (size_t i = 0; i < n_args; i++) { for (size_t i = 0; i < n_args; i++) {
PgfPArg* parg = gu_seq_index(pmeta->args, PgfPArg, i); PgfPArg* parg = gu_seq_index(pext->args, PgfPArg, i);
gu_assert(!parg->hypos || !parg->hypos->len); gu_assert(!parg->hypos || !parg->hypos->len);
PgfExpr earg = pgf_cat_to_expr(concr, parg->ccat, visited, choice, pool); PgfExpr earg = pgf_cat_to_expr(concr, parg->ccat, visited, choice, pool);
if (gu_variant_is_null(earg)) if (gu_variant_is_null(earg))
@@ -1260,8 +1386,13 @@ pgf_parse_result_enum_next(GuEnum* self, void* to, GuPool* pool)
*(PgfExpr*)to = pgf_parse_result_next(pr, pool); *(PgfExpr*)to = pgf_parse_result_next(pr, pool);
} }
static void
pgf_lex_noop(PgfLexCallback* self, PgfToken tok, PgfItem* item)
{
}
static PgfLexCallback lex_callback_noop = static PgfLexCallback lex_callback_noop =
{ pgf_lex_noop, pgf_lit_noop }; { pgf_lex_noop };
PgfExprEnum* PgfExprEnum*
pgf_parse_result(PgfParse* parse, GuPool* pool) pgf_parse_result(PgfParse* parse, GuPool* pool)
@@ -1332,19 +1463,13 @@ pgf_parse_best_result_init(PgfCCat *ccat, GuBuf *pqueue,
tmp_pool, out_pool); tmp_pool, out_pool);
break; break;
} }
case PGF_PRODUCTION_META: { case PGF_PRODUCTION_EXTERN: {
PgfProductionMeta* pmeta = pi.data; PgfProductionExtern* pext = pi.data;
PgfExprState *st = gu_new(PgfExprState, tmp_pool); PgfExprState *st = gu_new(PgfExprState, tmp_pool);
st->ep.prob = 100000000000 + rand(); st->ep = *pext->fun->ep;
PgfExprMeta *expr_meta = st->args = pext->args;
gu_new_variant(PGF_EXPR_META,
PgfExprMeta,
&st->ep.expr, out_pool);
expr_meta->id = 0;
st->args = pmeta->args;
st->arg_idx = 0; st->arg_idx = 0;
gu_buf_heap_push(pqueue, &pgf_expr_prob_order, &st); gu_buf_heap_push(pqueue, &pgf_expr_prob_order, &st);
break; break;
} }
@@ -1500,6 +1625,19 @@ pgf_parser_parse(PgfConcr* concr, PgfCId cat, size_t lin_idx, GuPool* pool)
return parse; return parse;
} }
void
pgf_parser_add_literal(PgfConcr *concr, PgfCId cat,
PgfLiteralCallback* callback)
{
PgfCncCat* cnccat =
gu_map_get(concr->cnccats, &cat, PgfCncCat*);
if (cnccat == NULL)
return;
gu_map_put(concr->callbacks, cnccat,
PgfLiteralCallback*, callback);
}
static void static void
pgf_parser_bu_add_entry(PgfConcr* concr, PgfTokens tokens, pgf_parser_bu_add_entry(PgfConcr* concr, PgfTokens tokens,
PgfItem* item, PgfItem* item,
@@ -1614,7 +1752,7 @@ pgf_parser_bu_item(PgfConcr* concr, PgfItem* item,
} }
PgfProduction prod = PgfProduction prod =
pgf_parsing_new_production(item, pool); pgf_parsing_new_production(item, NULL, pool);
GuBuf* prodbuf = gu_seq_buf(eps_ccat->prods); GuBuf* prodbuf = gu_seq_buf(eps_ccat->prods);
gu_buf_push(prodbuf, PgfProduction, prod); gu_buf_push(prodbuf, PgfProduction, prod);
eps_ccat->n_synprods++; eps_ccat->n_synprods++;

View File

@@ -65,6 +65,9 @@ pgf_parse_token(PgfParse* parse, PgfToken tok, bool robust, GuPool* pool);
* the pool used to create \parse. * the pool used to create \parse.
*/ */
void
pgf_parser_add_literal(PgfConcr *concr, PgfCId cat,
PgfLiteralCallback* callback);
/** @} /** @}
* @name Retrieving abstract syntax trees * @name Retrieving abstract syntax trees

View File

@@ -19,6 +19,7 @@
#include "data.h" #include "data.h"
#include "expr.h" #include "expr.h"
#include "literals.h"
#include <gu/defs.h> #include <gu/defs.h>
#include <gu/map.h> #include <gu/map.h>
#include <gu/seq.h> #include <gu/seq.h>
@@ -766,6 +767,7 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool,
concr->epsilon_idx = gu_map_type_new(PgfEpsilonIdx, pool); concr->epsilon_idx = gu_map_type_new(PgfEpsilonIdx, pool);
pgf_read_into_map(ccats_t, rdr, concr->ccats); pgf_read_into_map(ccats_t, rdr, concr->ccats);
concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap), pool, NULL); concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap), pool, NULL);
concr->callbacks = pgf_new_callbacks_map(concr, pool);
concr->total_cats = pgf_read_int(rdr); concr->total_cats = pgf_read_int(rdr);
concr->max_fid = concr->total_cats; concr->max_fid = concr->total_cats;

View File

@@ -8,6 +8,7 @@
#include <pgf/data.h> #include <pgf/data.h>
#include <pgf/parser.h> #include <pgf/parser.h>
#include <pgf/lexer.h> #include <pgf/lexer.h>
#include <pgf/literals.h>
#include <pgf/linearize.h> #include <pgf/linearize.h>
#include <pgf/expr.h> #include <pgf/expr.h>
#include <pgf/edsl.h> #include <pgf/edsl.h>
@@ -77,6 +78,10 @@ int main(int argc, char* argv[]) {
status = EXIT_FAILURE; status = EXIT_FAILURE;
goto fail_concr; goto fail_concr;
} }
// Register a callback for the literal category Symbol
pgf_parser_add_literal(from_concr, gu_str_string("Symb", pool),
&pgf_nerc_literal_callback);
// Arbitrarily choose linearization index 0. Usually the initial // Arbitrarily choose linearization index 0. Usually the initial
// categories we are interested in only have one field. // categories we are interested in only have one field.