From fa6ee145d78a56308a9ce9c155653e522a417d8f Mon Sep 17 00:00:00 2001 From: krasimir Date: Wed, 3 May 2017 16:28:20 +0000 Subject: [PATCH] first steps for sentence lookup in the C runtime --- src/runtime/c/CMakeLists.txt | 1 + src/runtime/c/Makefile.am | 1 + src/runtime/c/pgf/lookup.c | 188 +++++++++++++++++++++++++++++++++++ src/runtime/c/pgf/pgf.h | 3 + 4 files changed, 193 insertions(+) create mode 100644 src/runtime/c/pgf/lookup.c diff --git a/src/runtime/c/CMakeLists.txt b/src/runtime/c/CMakeLists.txt index 82dfe259a..45a13a067 100644 --- a/src/runtime/c/CMakeLists.txt +++ b/src/runtime/c/CMakeLists.txt @@ -49,6 +49,7 @@ set(libpgf_la_SOURCES pgf/expr.c pgf/expr.h pgf/parser.c + pgf/lookup.c pgf/jit.c pgf/parseval.c pgf/literals.c diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am index 941c404ae..9f6ce9a76 100644 --- a/src/runtime/c/Makefile.am +++ b/src/runtime/c/Makefile.am @@ -68,6 +68,7 @@ libpgf_la_SOURCES = \ pgf/expr.c \ pgf/expr.h \ pgf/parser.c \ + pgf/lookup.c \ pgf/jit.c \ pgf/parseval.c \ pgf/literals.c \ diff --git a/src/runtime/c/pgf/lookup.c b/src/runtime/c/pgf/lookup.c new file mode 100644 index 000000000..9e6f58bb7 --- /dev/null +++ b/src/runtime/c/pgf/lookup.c @@ -0,0 +1,188 @@ +#include +#include +#include +#include +#include +#include + +#define PGF_LOOKUP_DEBUG + +typedef struct { + PgfAbsFun* fun; + size_t arg_idx; +} PgfAbsBottomUpEntry; + +typedef struct { + PgfAbsFun* fun; + PgfMetaId args[0]; +} PgfAbsProduction; + +#ifdef PGF_LOOKUP_DEBUG +static void +pgf_print_abs_production(PgfMetaId id, + PgfAbsProduction* prod, + GuOut* out, GuExn* err) +{ + gu_printf(out,err,"?%d = %s",id,prod->fun->name); + size_t n_hypos = gu_seq_length(prod->fun->type->hypos); + for (size_t i = 0; i < n_hypos; i++) { + gu_printf(out,err," ?%d", prod->args[i]); + } + gu_putc('\n',out,err); +} +#endif + +static void +pgf_lookup_index_syms(GuMap* lexicon_idx, PgfSymbols* syms, PgfProductionIdx* idx, GuPool* pool) { + size_t n_syms = gu_seq_length(syms); + for (size_t j = 0; j < n_syms; j++) { + PgfSymbol sym = gu_seq_get(syms, PgfSymbol, j); + GuVariantInfo i = gu_variant_open(sym); + switch (i.tag) { + case PGF_SYMBOL_KP: { + PgfSymbolKP* skp = (PgfSymbolKP*) i.data; + pgf_lookup_index_syms(lexicon_idx, skp->default_form, idx, pool); + for (size_t k = 0; k < skp->n_forms; k++) { + pgf_lookup_index_syms(lexicon_idx, skp->forms[k].form, idx, pool); + } + break; + } + case PGF_SYMBOL_KS: { + PgfSymbolKS* sks = (PgfSymbolKS*) i.data; + GuBuf* funs = gu_map_get(lexicon_idx, sks->token, GuBuf*); + if (funs == NULL) { + funs = gu_new_buf(PgfAbsFun*, pool); + gu_map_put(lexicon_idx, sks->token, GuBuf*, funs); + } + + size_t n_idx = gu_buf_length(idx); + for (size_t k = 0; k < n_idx; k++) { + PgfProductionIdxEntry* entry = + gu_buf_index(idx, PgfProductionIdxEntry, k); + gu_buf_push(funs, PgfAbsFun*, entry->papp->fun->absfun); + } + break; + } + } + } +} + +typedef struct { + GuMap* function_idx; + GuMap* cat_ids; + PgfMetaId next_id; + GuPool* pool; +} PgfSpineBuilder; + +static PgfAbsProduction* +pgf_lookup_new_production(PgfSpineBuilder* builder, PgfAbsFun* fun) { + size_t n_hypos = gu_seq_length(fun->type->hypos); + PgfAbsProduction* prod = gu_new_flex(builder->pool, PgfAbsProduction, args, n_hypos); + prod->fun = fun; + for (size_t i = 0; i < n_hypos; i++) { + prod->args[i] = 0; + } + return prod; +} + +static PgfMetaId +pgf_lookup_add_spine_nodes(PgfSpineBuilder* builder, PgfCId cat) { + PgfMetaId cat_id = gu_map_get(builder->cat_ids, cat, PgfMetaId); + if (cat_id != 0) { + return cat_id; + } + + cat_id = ++builder->next_id; + gu_map_put(builder->cat_ids, cat, PgfMetaId, cat_id); + + GuBuf* entries = gu_map_get(builder->function_idx, cat, GuBuf*); + if (entries != NULL) { + size_t n_entries = gu_buf_length(entries); + for (size_t i = 0; i < n_entries; i++) { + PgfAbsBottomUpEntry* entry = gu_buf_index(entries, PgfAbsBottomUpEntry, i); + + PgfMetaId id = pgf_lookup_add_spine_nodes(builder, entry->fun->type->cid); + + PgfAbsProduction* prod = pgf_lookup_new_production(builder, entry->fun); + prod->args[entry->arg_idx] = cat_id; + +#ifdef PGF_LOOKUP_DEBUG + GuPool* tmp_pool = gu_new_pool(); + GuOut* out = gu_file_out(stderr, tmp_pool); + GuExn* err = gu_exn(tmp_pool); + pgf_print_abs_production(id, prod, out, err); + gu_pool_free(tmp_pool); +#endif + } + } + + return cat_id; +} + +static void +pgf_lookup_add_spine_leaf(PgfSpineBuilder* builder, PgfAbsFun *fun) +{ + PgfMetaId id = pgf_lookup_add_spine_nodes(builder, fun->type->cid); + PgfAbsProduction* prod = pgf_lookup_new_production(builder, fun); + +#ifdef PGF_LOOKUP_DEBUG + GuPool* tmp_pool = gu_new_pool(); + GuOut* out = gu_file_out(stderr, tmp_pool); + GuExn* err = gu_exn(tmp_pool); + pgf_print_abs_production(id, prod, out, err); + gu_pool_free(tmp_pool); +#endif +} + +PGF_API GuEnum* +pgf_lookup_sentence(PgfConcr* concr, GuString sentence, GuPool* pool, GuPool* out_pool) +{ + //// building search indices // + GuMap* lexicon_idx = gu_new_string_map(GuBuf*, &gu_null_struct, pool); + size_t n_seqs = gu_seq_length(concr->sequences); + for (size_t i = 0; i < n_seqs; i++) { + PgfSequence* seq = gu_seq_index(concr->sequences, PgfSequence, i); + if (seq->idx != NULL) { + pgf_lookup_index_syms(lexicon_idx, seq->syms, seq->idx, pool); + } + } + + GuMap* function_idx = gu_new_string_map(GuBuf*, &gu_null_struct, pool); + size_t n_funs = gu_seq_length(concr->abstr->funs); + for (size_t i = 0; i < n_funs; i++) { + PgfAbsFun* fun = gu_seq_index(concr->abstr->funs, PgfAbsFun, i); + + size_t n_hypos = gu_seq_length(fun->type->hypos); + for (size_t j = 0; j < n_hypos; j++) { + PgfHypo* hypo = gu_seq_index(fun->type->hypos, PgfHypo, j); + + GuBuf* funs = gu_map_get(function_idx, hypo->type->cid, GuBuf*); + if (funs == NULL) { + funs = gu_new_buf(PgfAbsBottomUpEntry, pool); + gu_map_put(function_idx, hypo->type->cid, GuBuf*, funs); + } + + PgfAbsBottomUpEntry* entry = gu_buf_extend(funs); + entry->fun = fun; + entry->arg_idx = j; + } + } + /////////////////////////////// + + PgfSpineBuilder builder; + builder.function_idx = function_idx; + builder.cat_ids = gu_new_string_map(PgfMetaId, &gu_null_struct, pool); + builder.next_id = 0; + builder.pool = pool; + + GuBuf* funs = gu_map_get(lexicon_idx, sentence, GuBuf*); + if (funs != NULL) { + size_t n_funs = gu_buf_length(funs); + for (size_t i = 0; i < n_funs; i++) { + PgfAbsFun* absfun = + gu_buf_get(funs, PgfAbsFun*, i); + pgf_lookup_add_spine_leaf(&builder, absfun); + } + } + return NULL; +} diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index 2a1cdcb80..c5e9cd52a 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -126,6 +126,9 @@ PGF_API_DECL PgfExprEnum* pgf_parse(PgfConcr* concr, PgfType* typ, GuString sentence, GuExn* err, GuPool* pool, GuPool* out_pool); +PGF_API_DECL GuEnum* +pgf_lookup_sentence(PgfConcr* concr, GuString sentence, GuPool* pool, GuPool* out_pool); + typedef struct PgfMorphoCallback PgfMorphoCallback; struct PgfMorphoCallback { void (*callback)(PgfMorphoCallback* self,