the linearizer API now allows to detect metavariables. This is used for instancein the lookup where the tokens produced from the metavariables are interpreted as distinct from all other tokens.

This commit is contained in:
Krasimir Angelov
2017-08-07 16:39:19 +02:00
parent a4c19875ed
commit a8eeb49767
10 changed files with 132 additions and 19 deletions

View File

@@ -176,13 +176,20 @@ pgf_aligner_lzn_symbol_capit(PgfLinFuncs** funcs, PgfCapitState capit)
alin->capit = capit;
}
static void
pgf_aligner_lzn_symbol_meta(PgfLinFuncs** funcs, PgfMetaId id)
{
pgf_aligner_lzn_symbol_token(funcs, "?");
}
static PgfLinFuncs pgf_file_lin_funcs = {
.symbol_token = pgf_aligner_lzn_symbol_token,
.begin_phrase = pgf_aligner_lzn_begin_phrase,
.end_phrase = pgf_aligner_lzn_end_phrase,
.symbol_ne = pgf_aligner_lzn_symbol_ne,
.symbol_bind = pgf_aligner_lzn_symbol_bind,
.symbol_capit = pgf_aligner_lzn_symbol_capit
.symbol_capit = pgf_aligner_lzn_symbol_capit,
.symbol_meta = pgf_aligner_lzn_symbol_meta
};
GuSeq*

View File

@@ -174,13 +174,26 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex,
state->parent = state->parent->parent;
}
static void
pgf_bracket_lzn_symbol_meta(PgfLinFuncs** funcs, PgfMetaId meta_id)
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
PgfParseNode* node = gu_new(PgfParseNode, state->pool);
node->id = 100000 + gu_buf_length(state->leaves);
node->parent = state->parent;
node->label = "?";
gu_buf_push(state->leaves, PgfParseNode*, node);
}
static PgfLinFuncs pgf_bracket_lin_funcs = {
.symbol_token = pgf_bracket_lzn_symbol_token,
.begin_phrase = pgf_bracket_lzn_begin_phrase,
.end_phrase = pgf_bracket_lzn_end_phrase,
.symbol_ne = NULL,
.symbol_bind = NULL,
.symbol_capit = NULL
.symbol_capit = NULL,
.symbol_meta = pgf_bracket_lzn_symbol_meta
};
static void

View File

@@ -111,7 +111,10 @@ pgf_print_cnc_tree(PgfCncTree ctree, GuOut* out, GuExn* err)
PgfCncTreeChunks* chunks = ti.data;
if (chunks->n_vars+chunks->n_args > 0) gu_putc('(', out, err);
pgf_print_cnc_tree_vars(chunks->n_vars, chunks->context, out, err);
gu_putc('?', out, err);
if (chunks->id > 0)
gu_printf(out, err, "?%d", chunks->id);
else
gu_putc('?', out, err);
for (size_t i = 0; i < chunks->n_args; i++) {
gu_putc(' ', out, err);
pgf_print_cnc_tree(chunks->args[i], out, err);
@@ -361,6 +364,7 @@ pgf_cnc_resolve(PgfCnc* cnc,
goto done;
}
case PGF_EXPR_META: {
PgfExprMeta* emeta = i.data;
size_t n_args = gu_buf_length(args);
PgfCncTree chunks_tree;
@@ -368,9 +372,10 @@ pgf_cnc_resolve(PgfCnc* cnc,
gu_new_flex_variant(PGF_CNC_TREE_CHUNKS,
PgfCncTreeChunks,
args, n_args, &chunks_tree, pool);
chunks->id = emeta->id;
chunks->n_vars = n_vars;
chunks->context = context;
chunks->n_args = n_args;
chunks->n_args = n_args;
for (size_t i = 0; i < n_args; i++) {
PgfExpr earg = gu_buf_get(args, PgfExpr, n_args-i-1);
@@ -773,13 +778,25 @@ pgf_lzr_cache_symbol_capit(PgfLinFuncs** funcs, PgfCapitState capit)
event->tag = (capit == PGF_CAPIT_ALL) ? PGF_CACHED_ALL_CAPIT : PGF_CACHED_CAPIT;
}
static void
pgf_lzr_cache_symbol_meta(PgfLinFuncs** funcs, PgfMetaId id)
{
PgfLzrCache* cache = gu_container(funcs, PgfLzrCache, funcs);
pgf_lzr_cache_flush(cache, cache->kp->default_form);
if ((*cache->lzr->funcs)->symbol_meta) {
(*cache->lzr->funcs)->symbol_meta(cache->lzr->funcs, id);
}
}
static PgfLinFuncs pgf_lzr_cache_funcs = {
.symbol_token = pgf_lzr_cache_symbol_token,
.begin_phrase = pgf_lzr_cache_begin_phrase,
.end_phrase = pgf_lzr_cache_end_phrase,
.symbol_ne = pgf_lzr_cache_symbol_ne,
.symbol_bind = pgf_lzr_cache_symbol_bind,
.symbol_capit = pgf_lzr_cache_symbol_capit
.symbol_capit = pgf_lzr_cache_symbol_capit,
.symbol_meta = pgf_lzr_cache_symbol_meta
};
static void
@@ -943,8 +960,8 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
PgfCncTreeChunks* fchunks = cti.data;
if (fchunks->n_args == 0) {
if ((*lzr->funcs)->symbol_token) {
(*lzr->funcs)->symbol_token(lzr->funcs, "?");
if ((*lzr->funcs)->symbol_meta) {
(*lzr->funcs)->symbol_meta(lzr->funcs, fchunks->id);
}
} else {
for (size_t i = 0; i < fchunks->n_args; i++) {
@@ -1055,7 +1072,7 @@ pgf_file_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
gu_string_write(tok, flin->out, flin->err);
flin->capit = PGF_CAPIT_NONE;
break;
}
}
case PGF_CAPIT_ALL:
flin->capit = PGF_CAPIT_NEXT;
// continue
@@ -1092,13 +1109,33 @@ pgf_file_lzn_symbol_capit(PgfLinFuncs** funcs, PgfCapitState capit)
flin->capit = capit;
}
static void
pgf_file_lzn_symbol_meta(PgfLinFuncs** funcs, PgfMetaId id)
{
PgfSimpleLin* flin = gu_container(funcs, PgfSimpleLin, funcs);
if (!gu_ok(flin->err)) {
return;
}
if (flin->bind)
flin->bind = false;
else {
gu_putc(' ', flin->out, flin->err);
if (flin->capit == PGF_CAPIT_NEXT)
flin->capit = PGF_CAPIT_NONE;
}
gu_putc('?', flin->out, flin->err);
}
static PgfLinFuncs pgf_file_lin_funcs = {
.symbol_token = pgf_file_lzn_symbol_token,
.begin_phrase = NULL,
.end_phrase = NULL,
.symbol_ne = pgf_file_lzn_symbol_ne,
.symbol_bind = pgf_file_lzn_symbol_bind,
.symbol_capit = pgf_file_lzn_symbol_capit
.symbol_capit = pgf_file_lzn_symbol_capit,
.symbol_meta = pgf_file_lzn_symbol_meta
};
PGF_API void

View File

@@ -33,6 +33,8 @@ typedef struct {
} PgfCncTreeApp;
typedef struct {
PgfMetaId id;
size_t n_vars;
PgfPrintContext* context;
@@ -94,6 +96,9 @@ struct PgfLinFuncs
/// capitalization
void (*symbol_capit)(PgfLinFuncs** self, PgfCapitState capit);
/// meta variable
void (*symbol_meta)(PgfLinFuncs** self, PgfMetaId id);
};
/// Linearize a concrete syntax tree.

View File

@@ -634,6 +634,7 @@ pgf_lookup_extract(PgfLookupState* st, PgfCCat* ccat)
gu_new_flex_variant(PGF_CNC_TREE_CHUNKS,
PgfCncTreeChunks,
args, 0, &capp->args[0], st->pool);
chunks->id = 0;
chunks->n_vars = 0;
chunks->context = NULL;
chunks->n_args = 0;
@@ -736,7 +737,8 @@ pgf_lookup_compute_kernel_helper(GuBuf* sentence_tokens, GuBuf* expr_tokens,
PgfInputToken* sentence_token = gu_buf_index(sentence_tokens, PgfInputToken, l);
PgfInputToken* expr_token = gu_buf_index(expr_tokens, PgfInputToken, k-1);
if (strcmp(sentence_token->token, expr_token->token) == 0) {
if (sentence_token->token != NULL && expr_token->token != NULL &&
strcmp(sentence_token->token, expr_token->token) == 0) {
score += 1 + pgf_lookup_compute_kernel_helper(sentence_tokens, expr_tokens, matrix, l, k-1);
} else {
bool match = false;
@@ -803,10 +805,10 @@ pgf_lookup_ctree_to_expr(PgfCncTree ctree, PgfExprProb* ep,
PgfCncTreeChunks* fchunks = cti.data;
n_args = fchunks->n_args;
args = fchunks->args;
ep->expr = gu_new_variant_i(out_pool,
PGF_EXPR_META, PgfExprMeta,
.id = 0);
.id = fchunks->id);
ep->prob = 0;
break;
}
@@ -887,13 +889,23 @@ pgf_lookup_end_phrase(PgfLinFuncs** self, PgfCId cat, int fid, int lindex, PgfCI
st->curr_absfun = NULL;
}
static void
pgf_lookup_symbol_meta(PgfLinFuncs** self, PgfMetaId meta_id)
{
PgfLookupState* st = gu_container(self, PgfLookupState, funcs);
PgfInputToken* tok = gu_buf_extend(st->expr_tokens);
tok->token = NULL;
tok->n_funs = 0;
}
static PgfLinFuncs pgf_lookup_lin_funcs = {
.symbol_token = pgf_lookup_symbol_token,
.begin_phrase = pgf_lookup_begin_phrase,
.end_phrase = pgf_lookup_end_phrase,
.symbol_ne = NULL,
.symbol_bind = NULL,
.symbol_capit = NULL
.symbol_capit = NULL,
.symbol_meta = pgf_lookup_symbol_meta
};
PGF_API GuEnum*

View File

@@ -59,7 +59,7 @@ pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_id
int start = gu_buf_pop(state->marks, int);
int end = state->pos;
if (start != end) {
PgfPhrase* phrase = gu_new(PgfPhrase, state->pool);
phrase->start = start;
@@ -110,13 +110,26 @@ pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_id
}
}
static void
pgf_metrics_lzn_symbol_meta(PgfLinFuncs** funcs, PgfMetaId meta_id)
{
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
pgf_metrics_put_space(state);
if (state->out != NULL)
gu_putc('?', state->out, state->err);
state->pos += 1;
}
static PgfLinFuncs pgf_metrics_lin_funcs1 = {
.symbol_token = pgf_metrics_lzn_symbol_token,
.begin_phrase = pgf_metrics_lzn_begin_phrase,
.end_phrase = pgf_metrics_lzn_end_phrase1,
.symbol_ne = pgf_metrics_symbol_ne,
.symbol_bind = pgf_metrics_symbol_bind,
.symbol_capit = NULL
.symbol_capit = NULL,
.symbol_meta = pgf_metrics_lzn_symbol_meta
};
static PgfLinFuncs pgf_metrics_lin_funcs2 = {
@@ -125,7 +138,8 @@ static PgfLinFuncs pgf_metrics_lin_funcs2 = {
.end_phrase = pgf_metrics_lzn_end_phrase2,
.symbol_ne = pgf_metrics_symbol_ne,
.symbol_bind = pgf_metrics_symbol_bind,
.symbol_capit = NULL
.symbol_capit = NULL,
.symbol_meta = pgf_metrics_lzn_symbol_meta
};
PGF_API bool

View File

@@ -52,7 +52,8 @@ namespace PGFSharp
end_phrase = EndPhrase,
symbol_ne = null,
symbol_bind = null,
symbol_capit = null
symbol_capit = null,
symbol_meta = SymbolMeta
};
}
@@ -80,6 +81,10 @@ namespace PGFSharp
else
stack.Peek ().AddChild (b);
}
private void SymbolMeta(IntPtr self, int meta_id) {
stack.Peek().AddChild(new StringChildBracket("?"));
}
public Bracket Build() {
return final;

View File

@@ -221,6 +221,9 @@ namespace PGFSharp
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate void LinFuncSymbolCapitalization(IntPtr self);
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate void LinFuncSymbolMeta(IntPtr self, int meta_id);
[StructLayout(LayoutKind.Sequential)]
public struct PgfLinFuncs
{
@@ -241,6 +244,9 @@ namespace PGFSharp
[MarshalAs(UnmanagedType.FunctionPtr)]
public LinFuncSymbolCapitalization symbol_capit;
[MarshalAs(UnmanagedType.FunctionPtr)]
public LinFuncSymbolMeta symbol_meta;
}
#endregion

View File

@@ -951,13 +951,20 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex,
state->list = parent;
}
static void
pgf_bracket_lzn_symbol_meta(PgfLinFuncs** funcs, PgfMetaId id)
{
pgf_bracket_lzn_symbol_token(funcs, "?");
}
static PgfLinFuncs pgf_bracket_lin_funcs = {
.symbol_token = pgf_bracket_lzn_symbol_token,
.begin_phrase = pgf_bracket_lzn_begin_phrase,
.end_phrase = pgf_bracket_lzn_end_phrase,
.symbol_ne = NULL,
.symbol_bind = NULL,
.symbol_capit = NULL
.symbol_capit = NULL,
.symbol_meta = pgf_bracket_lzn_symbol_meta
};
JNIEXPORT jobjectArray JNICALL

View File

@@ -2039,13 +2039,20 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex,
state->list = parent;
}
static void
pgf_bracket_lzn_symbol_meta(PgfLinFuncs** funcs, PgfMetaId meta_id)
{
pgf_bracket_lzn_symbol_token(funcs, "?");
}
static PgfLinFuncs pgf_bracket_lin_funcs = {
.symbol_token = pgf_bracket_lzn_symbol_token,
.begin_phrase = pgf_bracket_lzn_begin_phrase,
.end_phrase = pgf_bracket_lzn_end_phrase,
.symbol_ne = NULL,
.symbol_bind = NULL,
.symbol_capit = NULL
.symbol_capit = NULL,
.symbol_meta = pgf_bracket_lzn_symbol_meta
};
static PyObject*