From f1d2852c4d9a830a4cc890a40aada0ee7e7300a9 Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Wed, 22 Feb 2012 21:27:54 +0000 Subject: [PATCH] libpgf: now we have both complete bottom up index for robust parsing and fast lexical lookup from the same index --- src/runtime/c/pgf/data.h | 9 +- src/runtime/c/pgf/parser.c | 428 ++++++++++++++++++++++++------------- src/runtime/c/pgf/reader.c | 69 ++++-- 3 files changed, 334 insertions(+), 172 deletions(-) diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index cf0e00d82..36c5e509c 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -189,10 +189,11 @@ extern GU_DECLARE_TYPE(PgfFunIndices, GuStringMap); typedef GuMap PgfCoerceIdx; extern GU_DECLARE_TYPE(PgfCoerceIdx, GuMap); -typedef GuStringMap PgfLexiconIdx; -extern GU_DECLARE_TYPE(PgfLexiconIdx, GuStringMap); +typedef GuStringMap PgfTransitions; +extern GU_DECLARE_TYPE(PgfTransitions, GuStringMap); -typedef GuBuf PgfEpsilonIdx; +typedef GuMap PgfEpsilonIdx; +extern GU_DECLARE_TYPE(PgfEpsilonIdx, GuMap); struct PgfConcr { PgfFlags* cflags; @@ -200,7 +201,7 @@ struct PgfConcr { GuMap* ccats; PgfFunIndices* fun_indices; PgfCoerceIdx* coerce_idx; - PgfLexiconIdx* lexicon_idx; + PgfTransitions* lexicon_idx; PgfEpsilonIdx* epsilon_idx; PgfCncFuns* cncfuns; PgfSequences* sequences; diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 861157f50..93ea2da79 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -11,10 +11,6 @@ typedef struct PgfItem PgfItem; typedef GuBuf PgfItemBuf; typedef GuList(PgfItemBuf*) PgfItemBufs; - -// GuString -> PgfItemBuf* -typedef GuMap PgfTransitions; - typedef GuBuf PgfCCatBuf; struct PgfParse { @@ -50,24 +46,40 @@ struct PgfItem { uint8_t alt; }; -typedef GuMap PgfContsMap; +typedef struct { + int fid; + int lin_idx; +} PgfCFCat; +static GU_DEFINE_TYPE(PgfCFCat, struct, + GU_MEMBER(PgfCFCat, fid, int), + GU_MEMBER(PgfCFCat, lin_idx, int)); + +extern GuHasher pgf_cfcat_hasher; static GU_DEFINE_TYPE(PgfItemBuf, abstract, _); static GU_DEFINE_TYPE(PgfItemBufs, abstract, _); -static GU_DEFINE_TYPE(PgfContsMap, GuMap, + +typedef GuMap PgfContsMap; +GU_DEFINE_TYPE(PgfContsMap, GuMap, gu_type(PgfCCat), NULL, gu_ptr_type(PgfItemBufs), &gu_null_struct); -static GU_DEFINE_TYPE(PgfGenCatMap, GuMap, +typedef GuMap PgfEpsilonIdx; +GU_DEFINE_TYPE(PgfEpsilonIdx, GuMap, + gu_type(PgfCFCat), &pgf_cfcat_hasher, + gu_ptr_type(PgfCCat), &gu_null_struct); + +typedef GuMap PgfGenCatMap; +GU_DEFINE_TYPE(PgfGenCatMap, GuMap, gu_type(PgfItemBuf), NULL, gu_ptr_type(PgfCCat), &gu_null_struct); -static GU_DEFINE_TYPE(PgfTransitions, GuStringMap, +// GuString -> PgfItemBuf* +typedef GuStringMap PgfTransitions; +GU_DEFINE_TYPE(PgfTransitions, GuStringMap, gu_ptr_type(PgfItemBuf), &gu_null_struct); -typedef GuMap PgfGenCatMap; - typedef struct PgfParsing PgfParsing; typedef const struct PgfLexCallback PgfLexCallback; @@ -83,12 +95,11 @@ struct PgfParsing { PgfGenCatMap* generated_cats; PgfCCatBuf* completed; PgfLexCallback* callback; - GuBuf *lexicon_idx, *epsilon_idx; + PgfItemBuf *lexicon_idx; + PgfEpsilonIdx *epsilon_idx; int max_fid; }; -GU_DEFINE_TYPE(PgfLexiconIdx, GuStringMap, gu_ptr_type(GuBuf), - &gu_null_struct); #ifdef PGF_PARSER_DEBUG static void @@ -190,93 +201,6 @@ pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err) } #endif -static void -pgf_parser_bu_add_entry(PgfConcr* concr, PgfTokens tokens, - PgfCCat* ccat, size_t lin_idx, - PgfProduction prod, - GuPool *pool) -{ - PgfToken tok = gu_seq_get(tokens, PgfToken, 0); - - GuBuf* items = gu_map_get(concr->lexicon_idx, &tok, GuBuf*); - if (items == NULL) { - items = gu_new_buf(PgfItemBase*, pool); - gu_map_put(concr->lexicon_idx, &tok, GuBuf*, items); - } - - PgfItemBase* base = gu_new(PgfItemBase, pool); - base->ccat = ccat; - base->lin_idx = lin_idx; - base->prod = prod; - base->conts = NULL; - - gu_buf_push(items, PgfItemBase*, base); -} - -void -pgf_parser_bu_index(PgfConcr* concr, PgfCCat* ccat, PgfProduction prod, - GuPool *pool) -{ - GuVariantInfo i = gu_variant_open(prod); - switch (i.tag) { - case PGF_PRODUCTION_APPLY: { - PgfProductionApply* papp = i.data; - - for (size_t lin_idx = 0; lin_idx < papp->fun->n_lins; lin_idx++) { - PgfSequence seq = papp->fun->lins[lin_idx]; - if (gu_seq_length(seq) > 0) { - PgfSymbol sym = gu_seq_get(seq, PgfSymbol, 0); - GuVariantInfo i = gu_variant_open(sym); - switch (i.tag) { - case PGF_SYMBOL_CAT: { - PgfSymbolCat* scat = i.data; - break; - } - case PGF_SYMBOL_KS: { - PgfSymbolKS* sks = gu_variant_data(sym); - pgf_parser_bu_add_entry(concr, sks->tokens, - ccat, lin_idx, prod, pool); - break; - } - case PGF_SYMBOL_KP: { - PgfSymbolKP* skp = gu_variant_data(sym); - pgf_parser_bu_add_entry(concr, skp->default_form, - ccat, lin_idx, prod, pool); - for (size_t i = 0; i < skp->n_forms; i++) { - pgf_parser_bu_add_entry(concr, skp->forms[i].form, - ccat, lin_idx, prod, pool); - } - break; - } - case PGF_SYMBOL_LIT: - // XXX TODO proper support - break; - case PGF_SYMBOL_VAR: - // XXX TODO proper support - break; - default: - gu_impossible(); - } - } else { - PgfItemBase* base = gu_new(PgfItemBase, pool); - base->ccat = ccat; - base->lin_idx = lin_idx; - base->prod = prod; - base->conts = NULL; - - gu_buf_push(concr->epsilon_idx, PgfItemBase*, base); - } - } - break; - case PGF_PRODUCTION_COERCE: { - break; - } - default: - gu_impossible(); - } - } -} - static void pgf_parsing_add_transition(PgfParsing* parsing, PgfToken tok, PgfItem* item) { @@ -284,29 +208,31 @@ pgf_parsing_add_transition(PgfParsing* parsing, PgfToken tok, PgfItem* item) } static PgfItemBufs* -pgf_parsing_get_contss(PgfParsing* parsing, PgfCCat* cat) +pgf_parsing_get_contss(PgfContsMap* conts_map, PgfCCat* cat, GuPool *tmp_pool) { - PgfItemBufs* contss = gu_map_get(parsing->conts_map, cat, PgfItemBufs*); + PgfItemBufs* contss = gu_map_get(conts_map, cat, PgfItemBufs*); if (!contss) { size_t n_lins = cat->cnccat->n_lins; - contss = gu_new_list(PgfItemBufs, parsing->tmp_pool, n_lins); + contss = gu_new_list(PgfItemBufs, tmp_pool, n_lins); for (size_t i = 0; i < n_lins; i++) { gu_list_index(contss, i) = NULL; } - gu_map_put(parsing->conts_map, cat, PgfItemBufs*, contss); + gu_map_put(conts_map, cat, PgfItemBufs*, contss); } return contss; } - static PgfItemBuf* -pgf_parsing_get_conts(PgfParsing* parsing, PgfCCat* cat, size_t lin_idx) +pgf_parsing_get_conts(PgfContsMap* conts_map, + PgfCCat* ccat, size_t lin_idx, + GuPool *pool, GuPool *tmp_pool) { - gu_require(lin_idx < cat->cnccat->n_lins); - PgfItemBufs* contss = pgf_parsing_get_contss(parsing, cat); + gu_require(lin_idx < ccat->cnccat->n_lins); + PgfItemBufs* contss = + pgf_parsing_get_contss(conts_map, ccat, tmp_pool); PgfItemBuf* conts = gu_list_index(contss, lin_idx); if (!conts) { - conts = gu_new_buf(PgfItem*, parsing->pool); + conts = gu_new_buf(PgfItem*, pool); gu_list_index(contss, lin_idx) = conts; } return conts; @@ -411,6 +337,20 @@ pgf_item_copy(PgfItem* item, GuPool* pool) return copy; } +static PgfItem* +pgf_item_update_arg(PgfItem* item, size_t d, PgfCCat *ccat, GuPool* pool) +{ + PgfItem* new_item = pgf_item_copy(item, pool); + size_t nargs = gu_seq_length(item->args); + new_item->args = gu_new_seq(PgfPArg, nargs, pool); + memcpy(gu_seq_data(new_item->args), gu_seq_data(item->args), + nargs * sizeof(PgfPArg)); + gu_seq_set(new_item->args, PgfPArg, d, + ((PgfPArg) { .hypos = NULL, .ccat = ccat })); + + return new_item; +} + static void pgf_item_advance(PgfItem* item, GuPool* pool) { @@ -428,15 +368,12 @@ pgf_parsing_combine(PgfParsing* parsing, PgfItem* cont, PgfCCat* cat) gu_buf_push(parsing->completed, PgfCCat*, cat); return; } - PgfItem* item = pgf_item_copy(cont, parsing->pool); - size_t nargs = gu_seq_length(cont->args); - item->args = gu_new_seq(PgfPArg, nargs, parsing->pool); - memcpy(gu_seq_data(item->args), gu_seq_data(cont->args), - nargs * sizeof(PgfPArg)); - gu_assert(gu_variant_tag(item->curr_sym) == PGF_SYMBOL_CAT); - PgfSymbolCat* pcat = gu_variant_data(cont->curr_sym); - gu_seq_set(item->args, PgfPArg, pcat->d, - ((PgfPArg) { .hypos = NULL, .ccat = cat })); + + gu_assert(gu_variant_tag(cont->curr_sym) == PGF_SYMBOL_CAT); + PgfSymbolCat* scat = gu_variant_data(cont->curr_sym); + + PgfItem* item = + pgf_item_update_arg(cont, scat->d, cat, parsing->pool); pgf_item_advance(item, parsing->pool); pgf_parsing_item(parsing, item); } @@ -445,7 +382,7 @@ static void pgf_parsing_production(PgfParsing* parsing, PgfCCat* ccat, size_t lin_idx, PgfProduction prod, PgfItemBuf* conts) { - PgfItem* item = + PgfItem* item = pgf_new_item(ccat, lin_idx, prod, conts, parsing->pool); pgf_parsing_item(parsing, item); } @@ -509,7 +446,9 @@ pgf_parsing_complete(PgfParsing* parsing, PgfItem* item) if (tmp_cat != NULL) { // The category has already been created. If it has also been // predicted already, then process a new item for this production. - PgfItemBufs* contss = pgf_parsing_get_contss(parsing, cat); + PgfItemBufs* contss = + pgf_parsing_get_contss(parsing->conts_map, cat, + parsing->tmp_pool); size_t n_contss = gu_list_length(contss); for (size_t i = 0; i < n_contss; i++) { PgfItemBuf* conts2 = gu_list_index(contss, i); @@ -532,33 +471,6 @@ pgf_parsing_complete(PgfParsing* parsing, PgfItem* item) } } -static void -pgf_parsing_bu_predict(PgfParsing* parsing, GuBuf* items, - PgfCCat* ccat, size_t lin_idx, - PgfItemBuf* conts) -{ - if (items != NULL) { - size_t n_items = gu_buf_length(items); - for (size_t i = 0; i < n_items; i++) { - PgfItemBase* base = gu_buf_get(items, PgfItemBase*, i); - - if (base->ccat == ccat && base->lin_idx == lin_idx) { - GuVariantInfo i = gu_variant_open(base->prod); - switch (i.tag) { - case PGF_PRODUCTION_APPLY: { - PgfProductionApply* papp = i.data; - if (gu_seq_length(papp->args) == 0) { - pgf_parsing_production(parsing, ccat, lin_idx, - base->prod, conts); - } - break; - } - } - } - } - } -} - static void pgf_parsing_predict(PgfParsing* parsing, PgfItem* item, PgfCCat* ccat, size_t lin_idx) @@ -568,12 +480,16 @@ pgf_parsing_predict(PgfParsing* parsing, PgfItem* item, // Empty category return; } - PgfItemBuf* conts = pgf_parsing_get_conts(parsing, ccat, lin_idx); + PgfItemBuf* conts = + pgf_parsing_get_conts(parsing->conts_map, ccat, lin_idx, + parsing->pool, parsing->tmp_pool); gu_buf_push(conts, PgfItem*, item); if (gu_buf_length(conts) == 1) { /* First time we encounter this linearization * of this category at the current position, * so predict it. */ + + // Top-down prediction for syntactic rules PgfProductionSeq prods = ccat->prods; for (size_t i = 0; i < ccat->n_synprods; i++) { PgfProduction prod = @@ -582,10 +498,44 @@ pgf_parsing_predict(PgfParsing* parsing, PgfItem* item, prod, conts); } - pgf_parsing_bu_predict(parsing, parsing->lexicon_idx, - ccat, lin_idx, conts); - pgf_parsing_bu_predict(parsing, parsing->epsilon_idx, - ccat, lin_idx, conts); + // Bottom-up prediction for lexical rules + if (parsing->lexicon_idx != NULL) { + size_t n_items = gu_buf_length(parsing->lexicon_idx); + for (size_t i = 0; i < n_items; i++) { + PgfItem* item = gu_buf_get(parsing->lexicon_idx, PgfItem*, i); + + if (item->base->ccat == ccat && + item->base->lin_idx == lin_idx && + gu_seq_length(item->args) == 0) { + pgf_parsing_production(parsing, ccat, lin_idx, + item->base->prod, conts); + } + } + } + + // Bottom-up prediction for epsilon rules + PgfCFCat cfc = {ccat->fid, lin_idx}; + PgfCCat* eps_ccat = gu_map_get(parsing->epsilon_idx, &cfc, PgfCCat*); + + if (eps_ccat != NULL) { + size_t n_prods = gu_seq_length(eps_ccat->prods); + for (size_t i = 0; i < n_prods; i++) { + PgfProduction prod = + gu_seq_get(eps_ccat->prods, PgfProduction, i); + + GuVariantInfo i = gu_variant_open(prod); + switch (i.tag) { + case PGF_PRODUCTION_APPLY: { + PgfProductionApply* papp = i.data; + if (gu_seq_length(papp->args) == 0) { + pgf_parsing_production(parsing, ccat, lin_idx, + prod, conts); + } + break; + } + } + } + } } else { /* If it has already been completed, combine. */ PgfCCat* completed = @@ -965,3 +915,177 @@ pgf_parser_parse(PgfConcr* concr, PgfCId cat, size_t lin_idx, GuPool* pool) } return parse; } + +static void +pgf_parser_bu_add_entry(PgfConcr* concr, PgfTokens tokens, + PgfItem* item, + GuPool *pool) +{ + PgfToken tok = gu_seq_get(tokens, PgfToken, 0); + + GuBuf* items = gu_map_get(concr->lexicon_idx, &tok, GuBuf*); + if (items == NULL) { + items = gu_new_buf(PgfItemBase*, pool); + gu_map_put(concr->lexicon_idx, &tok, GuBuf*, items); + } + + gu_buf_push(items, PgfItem*, item); +} + +void +pgf_parser_bu_item(PgfConcr* concr, PgfItem* item, + PgfContsMap* conts_map, + GuPool *pool, GuPool *tmp_pool) +{ + GuVariantInfo i = gu_variant_open(item->base->prod); + switch (i.tag) { + case PGF_PRODUCTION_APPLY: { + PgfProductionApply* papp = i.data; + + PgfSequence seq = papp->fun->lins[item->base->lin_idx]; + if (item->seq_idx < gu_seq_length(seq)) { + GuVariantInfo i = gu_variant_open(item->curr_sym); + switch (i.tag) { + case PGF_SYMBOL_CAT: { + PgfSymbolCat* scat = i.data; + + // Place the item in the continuation map + PgfPArg* parg = + gu_seq_index(papp->args, PgfPArg, scat->d); + PgfItemBuf* conts_ = + pgf_parsing_get_conts(conts_map, + parg->ccat, scat->r, + pool, tmp_pool); + gu_buf_push(conts_, PgfItem*, item); + + // If the current category has epsilon rules + // then we must do the same for a new item where + // the dot is moved with one position. + PgfCFCat cfc = {parg->ccat->fid, scat->r}; + PgfCCat* eps_ccat = + gu_map_get(concr->epsilon_idx, &cfc, PgfCCat*); + + if (eps_ccat != NULL) { + PgfItem* new_item = + pgf_item_update_arg(item, scat->d, eps_ccat, pool); + pgf_item_advance(new_item, pool); + pgf_parser_bu_item(concr, new_item, conts_map, + pool, tmp_pool); + } + break; + } + case PGF_SYMBOL_KS: { + PgfSymbolKS* sks = i.data; + pgf_parser_bu_add_entry(concr, sks->tokens, + item, pool); + break; + } + case PGF_SYMBOL_KP: { + PgfSymbolKP* skp = i.data; + pgf_parser_bu_add_entry(concr, skp->default_form, + item, pool); + for (size_t i = 0; i < skp->n_forms; i++) { + pgf_parser_bu_add_entry(concr, skp->forms[i].form, + item, pool); + } + break; + } + case PGF_SYMBOL_LIT: + // XXX TODO proper support + break; + case PGF_SYMBOL_VAR: + // XXX TODO proper support + break; + default: + gu_impossible(); + } + } else { + PgfCFCat cfc = {item->base->ccat->fid, item->base->lin_idx}; + PgfCCat* tmp_ccat = + gu_map_get(concr->epsilon_idx, &cfc, PgfCCat*); + + PgfCCat* eps_ccat = tmp_ccat; + if (eps_ccat == NULL) { + eps_ccat = gu_new(PgfCCat, pool); + eps_ccat->cnccat = item->base->ccat->cnccat; + eps_ccat->fid = concr->max_fid++; + eps_ccat->prods = + gu_buf_seq(gu_new_buf(PgfProduction, pool)); + eps_ccat->n_synprods = 0; + gu_map_put(concr->epsilon_idx, &cfc, PgfCCat*, eps_ccat); + } + + GuBuf* prodbuf = gu_seq_buf(eps_ccat->prods); + gu_buf_push(prodbuf, PgfProduction, item->base->prod); + eps_ccat->n_synprods++; + + if (tmp_ccat == NULL) { + size_t n_items = gu_buf_length(item->base->conts); + for (size_t i = 0; i < n_items; i++) { + PgfItem* cont = + gu_buf_get(item->base->conts, PgfItem*, i); + + gu_assert(gu_variant_tag(cont->curr_sym) == PGF_SYMBOL_CAT); + PgfSymbolCat* scat = gu_variant_data(cont->curr_sym); + + PgfItem* new_item = pgf_item_copy(cont, pool); + pgf_item_update_arg(cont, scat->d, eps_ccat, pool); + pgf_item_advance(new_item, pool); + pgf_parser_bu_item(concr, new_item, conts_map, + pool, tmp_pool); + } + } + } + } + break; + case PGF_PRODUCTION_COERCE: { + PgfProductionCoerce* pcoerce = i.data; + + PgfItemBuf* conts_ = + pgf_parsing_get_conts(conts_map, + pcoerce->coerce, item->base->lin_idx, + pool, tmp_pool); + gu_buf_push(conts_, PgfItem*, item); + break; + } + default: + gu_impossible(); + } +} + +void +pgf_parser_bu_index(PgfConcr* concr, PgfCCat* ccat, PgfProduction prod, + PgfContsMap* conts_map, + GuPool *pool, GuPool *tmp_pool) +{ + for (size_t lin_idx = 0; lin_idx < ccat->cnccat->n_lins; lin_idx++) { + PgfItemBuf* conts = + pgf_parsing_get_conts(conts_map, ccat, lin_idx, + pool, tmp_pool); + PgfItem* item = + pgf_new_item(ccat, lin_idx, prod, conts, pool); + + pgf_parser_bu_item(concr, item, conts_map, pool, tmp_pool); + } +} + +bool +pgf_cfcat_eq_fn(GuEquality* self, const void* a, const void* b) +{ + PgfCFCat *x = (PgfCFCat *) a; + PgfCFCat *y = (PgfCFCat *) b; + + return (x->fid == y->fid && x->lin_idx == y->lin_idx); +} + +GuHash +pgf_cfcat_hash_fn(GuHasher* self, const void* a) +{ + PgfCFCat *x = (PgfCFCat *) a; + return ((x->fid << 16) ^ x->lin_idx); +} + +GuHasher pgf_cfcat_hasher = { + { pgf_cfcat_eq_fn }, + pgf_cfcat_hash_fn +}; diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index e7de966f5..bff700bd5 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -34,6 +34,8 @@ typedef struct PgfIdContext PgfIdContext; +typedef GuMap PgfContsMap; + // // PgfReader // @@ -48,6 +50,7 @@ struct PgfReader { GuSymTable* symtab; PgfConcr* curr_concr; GuMap* curr_lindefs; + PgfContsMap* curr_conts_map; // used temporary for building the bu index for the parser GuTypeMap* read_to_map; GuTypeMap* read_new_map; void* curr_key; @@ -440,14 +443,6 @@ pgf_read_to_PgfCCatId(GuType* type, PgfReader* rdr, void* to) *pto = ccat; } -void -pgf_parser_bu_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod, - GuPool *pool); - -void -pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod, - GuPool *pool); - static void pgf_read_to_PgfCCat(GuType* type, PgfReader* rdr, void* to) { @@ -488,9 +483,6 @@ pgf_read_to_PgfCCat(GuType* type, PgfReader* rdr, void* to) default: gu_impossible(); } - - pgf_parser_bu_index(rdr->curr_concr, ccat, prod, rdr->opool); - pgf_lzr_index(rdr->curr_concr, ccat, prod, rdr->opool); } ccat->n_synprods = top; @@ -668,6 +660,11 @@ pgf_ccat_set_cnccat(PgfCCat* ccat) return ccat->cnccat; } +typedef struct { + GuMapItor fn; + PgfReader* rdr; +} PgfIndexFn; + static void pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err) { @@ -677,6 +674,40 @@ pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err) pgf_ccat_set_cnccat(ccat); } +extern GU_DECLARE_TYPE(PgfContsMap, GuMap); + +void +pgf_parser_bu_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod, + PgfContsMap* conts_map, + GuPool *pool, GuPool *tmp_pool); + +void +pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod, + GuPool *pool); + +static void +pgf_index_prods(GuMapItor* fn, const void* key, void* value, GuExn* err) +{ + (void) (key && err); + + PgfIndexFn* clo = (PgfIndexFn*) fn; + PgfCCat* ccat = *((PgfCCat**) value); + PgfReader *rdr = clo->rdr; + + if (gu_seq_is_null(ccat->prods)) + return; + + size_t n_prods = gu_seq_length(ccat->prods); + for (size_t i = 0; i < n_prods; i++) { + PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i); + + pgf_parser_bu_index(rdr->curr_concr, ccat, prod, + rdr->curr_conts_map, + rdr->opool, rdr->tmp_pool); + pgf_lzr_index(rdr->curr_concr, ccat, prod, rdr->opool); + } +} + static void* pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool, size_t* size_out) @@ -695,19 +726,23 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool, GuMapType* lindefs_t = gu_type_cast(gu_type(PgfLinDefs), GuMap); rdr->curr_lindefs = gu_map_type_make(lindefs_t, rdr->tmp_pool); pgf_read_into_map(lindefs_t, rdr, rdr->curr_lindefs); + rdr->curr_conts_map = gu_map_type_new(PgfContsMap, rdr->tmp_pool); GuMapType* ccats_t = gu_type_cast(gu_type(PgfCCatMap), GuMap); concr->ccats = gu_new_int_map(PgfCCat*, &gu_null_struct, pool); concr->fun_indices = gu_map_type_new(PgfFunIndices, pool); - concr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool); - concr->lexicon_idx = gu_map_type_new(PgfLexiconIdx, pool); - concr->epsilon_idx = gu_new_buf(struct PgfItemBase*, pool); + concr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool); + concr->lexicon_idx = gu_map_type_new(PgfTransitions,pool); + concr->epsilon_idx = gu_map_type_new(PgfEpsilonIdx, pool); pgf_read_into_map(ccats_t, rdr, concr->ccats); concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap), pool, NULL); concr->max_fid = pgf_read_int(rdr); - GuMapItor fn = { pgf_read_ccat_cb }; - gu_map_iter(concr->ccats, &fn, NULL); + PgfIndexFn clo1 = { { pgf_read_ccat_cb }, rdr }; + gu_map_iter(concr->ccats, &clo1.fn, NULL); + + PgfIndexFn clo2 = { { pgf_index_prods }, rdr }; + gu_map_iter(concr->ccats, &clo2.fn, NULL); // set the function ids int n_funs = gu_list_length(concr->cncfuns); @@ -822,6 +857,8 @@ pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err) rdr->err = err; rdr->in = in; rdr->curr_concr = NULL; + rdr->curr_lindefs = NULL; + rdr->curr_conts_map = NULL; rdr->read_to_map = gu_new_type_map(&pgf_read_to_table, tmp_pool); rdr->read_new_map = gu_new_type_map(&pgf_read_new_table, tmp_pool); return rdr;