diff --git a/src/runtime/c/pgf/data.c b/src/runtime/c/pgf/data.c index 4a7741f90..bd5af87e5 100644 --- a/src/runtime/c/pgf/data.c +++ b/src/runtime/c/pgf/data.c @@ -4,10 +4,10 @@ #include #include -PgfCCat pgf_ccat_string = { NULL, NULL, GU_NULL_SEQ, -1 }; -PgfCCat pgf_ccat_int = { NULL, NULL, GU_NULL_SEQ, -2 }; -PgfCCat pgf_ccat_float = { NULL, NULL, GU_NULL_SEQ, -3 }; -PgfCCat pgf_ccat_var = { NULL, NULL, GU_NULL_SEQ, -4 }; +PgfCCat pgf_ccat_string = { NULL, NULL, 0, GU_NULL_SEQ, -1 }; +PgfCCat pgf_ccat_int = { NULL, NULL, 0, GU_NULL_SEQ, -2 }; +PgfCCat pgf_ccat_float = { NULL, NULL, 0, GU_NULL_SEQ, -3 }; +PgfCCat pgf_ccat_var = { NULL, NULL, 0, GU_NULL_SEQ, -4 }; PgfCCatId pgf_literal_cat(PgfLiteral lit) diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index 45f060c23..cf0e00d82 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -173,6 +173,7 @@ struct PgfAlternative { struct PgfCCat { PgfCncCat* cnccat; PgfFunIds* lindefs; + size_t n_synprods; PgfProductionSeq prods; int fid; }; @@ -188,12 +189,19 @@ extern GU_DECLARE_TYPE(PgfFunIndices, GuStringMap); typedef GuMap PgfCoerceIdx; extern GU_DECLARE_TYPE(PgfCoerceIdx, GuMap); +typedef GuStringMap PgfLexiconIdx; +extern GU_DECLARE_TYPE(PgfLexiconIdx, GuStringMap); + +typedef GuBuf PgfEpsilonIdx; + struct PgfConcr { PgfFlags* cflags; PgfPrintNames* printnames; GuMap* ccats; PgfFunIndices* fun_indices; PgfCoerceIdx* coerce_idx; + PgfLexiconIdx* lexicon_idx; + PgfEpsilonIdx* epsilon_idx; PgfCncFuns* cncfuns; PgfSequences* sequences; PgfCIdMap* cnccats; diff --git a/src/runtime/c/pgf/linearize.c b/src/runtime/c/pgf/linearize.c index 77cf65813..aef966eba 100644 --- a/src/runtime/c/pgf/linearize.c +++ b/src/runtime/c/pgf/linearize.c @@ -104,14 +104,12 @@ static GU_DEFINE_TYPE(PgfInferMap, GuMap, gu_ptr_type(PgfCCatIds), pgf_lzr_cats_hasher, gu_ptr_type(PgfLinInfers), &gu_null_struct); -typedef GuStringMap PgfFunIndices; GU_DEFINE_TYPE(PgfFunIndices, GuStringMap, gu_ptr_type(PgfInferMap), &gu_null_struct); typedef GuBuf PgfCCatBuf; static GU_DEFINE_TYPE(PgfCCatBuf, GuBuf, gu_ptr_type(PgfCCat)); -typedef GuMap PgfCoerceIdx; GU_DEFINE_TYPE(PgfCoerceIdx, GuMap, gu_type(PgfCCat), NULL, gu_ptr_type(PgfCCatBuf), &gu_null_struct); @@ -155,7 +153,7 @@ pgf_lzr_add_infer_entry( void -pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod, +pgf_lzr_index(PgfConcr* concr, PgfCCat* ccat, PgfProduction prod, GuPool *pool) { void* data = gu_variant_data(prod); @@ -164,27 +162,27 @@ pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod, PgfProductionApply* papply = data; PgfInferMap* infer = gu_map_get(concr->fun_indices, &papply->fun->fun, - PgfInferMap*); - gu_debug("index: %s -> %d", papply->fun->fun, cat->fid); + PgfInferMap*); + gu_debug("index: %s -> %d", papply->fun->fun, ccat->fid); if (!infer) { infer = gu_map_type_new(PgfInferMap, pool); gu_map_put(concr->fun_indices, - &papply->fun->fun, PgfInferMap*, infer); + &papply->fun->fun, PgfInferMap*, infer); } - pgf_lzr_add_infer_entry(infer, cat, papply, pool); + pgf_lzr_add_infer_entry(infer, ccat, papply, pool); break; } case PGF_PRODUCTION_COERCE: { PgfProductionCoerce* pcoerce = data; PgfCCatBuf* cats = gu_map_get(concr->coerce_idx, pcoerce->coerce, - PgfCCatBuf*); + PgfCCatBuf*); if (!cats) { cats = gu_new_buf(PgfCCat*, pool); gu_map_put(concr->coerce_idx, - pcoerce->coerce, PgfCCatBuf*, cats); + pcoerce->coerce, PgfCCatBuf*, cats); } - gu_debug("coerce_idx: %d -> %d", pcoerce->coerce->fid, cat->fid); - gu_buf_push(cats, PgfCCat*, cat); + gu_debug("coerce_idx: %d -> %d", pcoerce->coerce->fid, ccat->fid); + gu_buf_push(cats, PgfCCat*, ccat); break; } default: diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 5eb8d0e97..a87c62e90 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -12,7 +12,6 @@ typedef GuBuf PgfItemBuf; typedef GuList(PgfItemBuf*) PgfItemBufs; - // GuString -> PgfItemBuf* typedef GuMap PgfTransitions; @@ -84,9 +83,13 @@ struct PgfParsing { PgfGenCatMap* generated_cats; PgfCCatBuf* completed; PgfLexCallback* callback; + GuBuf *lexicon_idx, *epsilon_idx; int max_fid; }; +GU_DEFINE_TYPE(PgfLexiconIdx, GuStringMap, gu_ptr_type(GuBuf), + &gu_null_struct); + #ifdef PGF_PARSER_DEBUG static void pgf_print_production(int fid, PgfProduction prod, GuWriter *wtr, GuExn* err) @@ -187,6 +190,93 @@ pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err) } #endif +static void +pgf_parser_bu_add_entry(PgfConcr* concr, PgfTokens tokens, + PgfCCat* ccat, size_t lin_idx, + PgfProduction prod, + GuPool *pool) +{ + PgfToken tok = gu_seq_get(tokens, PgfToken, 0); + + GuBuf* items = gu_map_get(concr->lexicon_idx, &tok, GuBuf*); + if (items == NULL) { + items = gu_new_buf(PgfItemBase*, pool); + gu_map_put(concr->lexicon_idx, &tok, GuBuf*, items); + } + + PgfItemBase* base = gu_new(PgfItemBase, pool); + base->ccat = ccat; + base->lin_idx = lin_idx; + base->prod = prod; + base->conts = NULL; + + gu_buf_push(items, PgfItemBase*, base); +} + +void +pgf_parser_bu_index(PgfConcr* concr, PgfCCat* ccat, PgfProduction prod, + GuPool *pool) +{ + GuVariantInfo i = gu_variant_open(prod); + switch (i.tag) { + case PGF_PRODUCTION_APPLY: { + PgfProductionApply* papp = i.data; + + for (size_t lin_idx = 0; lin_idx < papp->fun->n_lins; lin_idx++) { + PgfSequence seq = papp->fun->lins[lin_idx]; + if (gu_seq_length(seq) > 0) { + PgfSymbol sym = gu_seq_get(seq, PgfSymbol, 0); + GuVariantInfo i = gu_variant_open(sym); + switch (i.tag) { + case PGF_SYMBOL_CAT: { + PgfSymbolCat* scat = i.data; + break; + } + case PGF_SYMBOL_KS: { + PgfSymbolKS* sks = gu_variant_data(sym); + pgf_parser_bu_add_entry(concr, sks->tokens, + ccat, lin_idx, prod, pool); + break; + } + case PGF_SYMBOL_KP: { + PgfSymbolKP* skp = gu_variant_data(sym); + pgf_parser_bu_add_entry(concr, skp->default_form, + ccat, lin_idx, prod, pool); + for (size_t i = 0; i < skp->n_forms; i++) { + pgf_parser_bu_add_entry(concr, skp->forms[i].form, + ccat, lin_idx, prod, pool); + } + break; + } + case PGF_SYMBOL_LIT: + // XXX TODO proper support + break; + case PGF_SYMBOL_VAR: + // XXX TODO proper support + break; + default: + gu_impossible(); + } + } else { + PgfItemBase* base = gu_new(PgfItemBase, pool); + base->ccat = ccat; + base->lin_idx = lin_idx; + base->prod = prod; + base->conts = NULL; + + gu_buf_push(concr->epsilon_idx, PgfItemBase*, base); + } + } + break; + case PGF_PRODUCTION_COERCE: { + break; + } + default: + gu_impossible(); + } + } +} + static void pgf_parsing_add_transition(PgfParsing* parsing, PgfToken tok, PgfItem* item) { @@ -230,6 +320,7 @@ pgf_parsing_create_completed(PgfParsing* parsing, PgfItemBuf* conts, cat->cnccat = cnccat; cat->fid = parsing->max_fid++; cat->prods = gu_buf_seq(gu_new_buf(PgfProduction, parsing->pool)); + cat->n_synprods = 0; gu_map_put(parsing->generated_cats, conts, PgfCCat*, cat); return cat; } @@ -399,6 +490,7 @@ pgf_parsing_complete(PgfParsing* parsing, PgfItem* item) GuBuf* prodbuf = gu_seq_buf(cat->prods); gu_buf_push(prodbuf, PgfProduction, prod); + cat->n_synprods++; #ifdef PGF_PARSER_DEBUG GuPool* tmp_pool = gu_new_pool(); @@ -440,30 +532,60 @@ pgf_parsing_complete(PgfParsing* parsing, PgfItem* item) } } +static void +pgf_parsing_bu_predict(PgfParsing* parsing, GuBuf* items, + PgfCCat* ccat, size_t lin_idx, + PgfItemBuf* conts) +{ + if (items != NULL) { + size_t n_items = gu_buf_length(items); + for (size_t i = 0; i < n_items; i++) { + PgfItemBase* base = gu_buf_get(items, PgfItemBase*, i); + + if (base->ccat == ccat && base->lin_idx == lin_idx) { + GuVariantInfo i = gu_variant_open(base->prod); + switch (i.tag) { + case PGF_PRODUCTION_APPLY: { + PgfProductionApply* papp = i.data; + if (gu_seq_length(papp->args) == 0) { + pgf_parsing_production(parsing, ccat, lin_idx, + base->prod, conts); + } + break; + } + } + } + } + } +} static void pgf_parsing_predict(PgfParsing* parsing, PgfItem* item, - PgfCCat* cat, size_t lin_idx) + PgfCCat* ccat, size_t lin_idx) { - gu_enter("-> cat: %d", cat->fid); - if (gu_seq_is_null(cat->prods)) { + gu_enter("-> cat: %d", ccat->fid); + if (gu_seq_is_null(ccat->prods)) { // Empty category return; } - PgfItemBuf* conts = pgf_parsing_get_conts(parsing, cat, lin_idx); + PgfItemBuf* conts = pgf_parsing_get_conts(parsing, ccat, lin_idx); gu_buf_push(conts, PgfItem*, item); if (gu_buf_length(conts) == 1) { /* First time we encounter this linearization * of this category at the current position, * so predict it. */ - PgfProductionSeq prods = cat->prods; - size_t n_prods = gu_seq_length(prods); - for (size_t i = 0; i < n_prods; i++) { + PgfProductionSeq prods = ccat->prods; + for (size_t i = 0; i < ccat->n_synprods; i++) { PgfProduction prod = gu_seq_get(prods, PgfProduction, i); - pgf_parsing_production(parsing, cat, lin_idx, + pgf_parsing_production(parsing, ccat, lin_idx, prod, conts); } + + pgf_parsing_bu_predict(parsing, parsing->lexicon_idx, + ccat, lin_idx, conts); + pgf_parsing_bu_predict(parsing, parsing->epsilon_idx, + ccat, lin_idx, conts); } else { /* If it has already been completed, combine. */ PgfCCat* completed = @@ -618,7 +740,7 @@ pgf_parsing_item(PgfParsing* parsing, PgfItem* item) } static PgfParsing* -pgf_new_parsing(PgfLexCallback* callback, int max_fid, +pgf_new_parsing(PgfConcr* concr, PgfLexCallback* callback, int max_fid, GuPool* parse_pool, GuPool* out_pool) { PgfParsing* parsing = gu_new(PgfParsing, out_pool); @@ -626,6 +748,8 @@ pgf_new_parsing(PgfLexCallback* callback, int max_fid, parsing->conts_map = gu_map_type_new(PgfContsMap, out_pool); parsing->completed = gu_new_buf(PgfCCat*, parse_pool); parsing->callback = callback; + parsing->lexicon_idx = NULL; + parsing->epsilon_idx = concr->epsilon_idx; parsing->pool = parse_pool; parsing->tmp_pool = out_pool; parsing->max_fid = max_fid; @@ -666,7 +790,9 @@ pgf_parse_token(PgfParse* parse, PgfToken tok, GuPool* pool) PgfParseTokenCallback clo = {{ pgf_match_token }, tok, agenda}; GuPool* tmp_pool = gu_new_pool(); - PgfParsing* parsing = pgf_new_parsing(&clo.fn, parse->max_fid, pool, tmp_pool); + PgfParsing* parsing = pgf_new_parsing(parse->concr, &clo.fn, parse->max_fid, pool, tmp_pool); + parsing->lexicon_idx = gu_map_get(parse->concr->lexicon_idx, &tok, GuBuf*); + size_t n_items = gu_buf_length(parse->agenda); for (size_t i = 0; i < n_items; i++) { PgfItem* item = gu_buf_get(parse->agenda, PgfItem*, i); @@ -779,7 +905,7 @@ pgf_parse_result(PgfParse* parse, GuPool* pool) PgfLexCallback fn = { pgf_noop }; GuPool* tmp_pool = gu_new_pool(); - PgfParsing* parsing = pgf_new_parsing(&fn, parse->max_fid, pool, tmp_pool); + PgfParsing* parsing = pgf_new_parsing(parse->concr, &fn, parse->max_fid, pool, tmp_pool); size_t n_items = gu_buf_length(parse->agenda); for (size_t i = 0; i < n_items; i++) { PgfItem* item = gu_buf_get(parse->agenda, PgfItem*, i); diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index 0fd6297ba..2472aa1b0 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -45,11 +45,8 @@ struct PgfReader { GuExn* err; GuPool* opool; GuSymTable* symtab; - PgfSequences* curr_sequences; - PgfCncFuns* curr_cncfuns; - GuMap* curr_ccats; + PgfConcr* curr_concr; GuMap* curr_lindefs; - GuMap* curr_coercions; GuTypeMap* read_to_map; GuTypeMap* read_new_map; void* curr_key; @@ -329,12 +326,8 @@ pgf_read_to_alias(GuType* type, PgfReader* rdr, void* to) } static void -pgf_read_into_map(GuMapType* mtype, PgfReader* rdr, GuMap* map, GuPool* pool) +pgf_read_into_map(GuMapType* mtype, PgfReader* rdr, GuMap* map) { - /* The parameter pool is the temporary pool used to store the - map. But the actual values need to be more persistent so we - store them in rdr->opool. */ - (void) pool; GuPool* tmp_pool = gu_new_pool(); void* key = NULL; GuLength len = pgf_read_len(rdr); @@ -368,7 +361,7 @@ pgf_read_new_GuMap(GuType* type, PgfReader* rdr, GuPool* pool, size_t* size_out) (void) size_out; GuMapType* mtype = (GuMapType*) type; GuMap* map = gu_map_type_make(mtype, pool); - pgf_read_into_map(mtype, rdr, map, pool); + pgf_read_into_map(mtype, rdr, map); gu_return_on_exn(rdr->err, NULL); return map; } @@ -432,7 +425,7 @@ pgf_read_to_PgfCCatId(GuType* type, PgfReader* rdr, void* to) int fid = pgf_read_int(rdr); gu_return_on_exn(rdr->err,); - PgfCCat* ccat = gu_map_get(rdr->curr_ccats, &fid, PgfCCat*); + PgfCCat* ccat = gu_map_get(rdr->curr_concr->ccats, &fid, PgfCCat*); if (!ccat) { ccat = gu_new(PgfCCat, rdr->opool); ccat->cnccat = NULL; @@ -440,12 +433,20 @@ pgf_read_to_PgfCCatId(GuType* type, PgfReader* rdr, void* to) ccat->prods = gu_null_seq; ccat->fid = fid; - gu_map_put(rdr->curr_ccats, &fid, PgfCCat*, ccat); + gu_map_put(rdr->curr_concr->ccats, &fid, PgfCCat*, ccat); } *pto = ccat; } +void +pgf_parser_bu_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod, + GuPool *pool); + +void +pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod, + GuPool *pool); + static void pgf_read_to_PgfCCat(GuType* type, PgfReader* rdr, void* to) { @@ -453,11 +454,46 @@ pgf_read_to_PgfCCat(GuType* type, PgfReader* rdr, void* to) gu_enter("->"); int* fidp = rdr->curr_key; + GuLength n_prods = pgf_read_len(rdr); + gu_return_on_exn(rdr->err, ); + PgfCCat* ccat = to; ccat->cnccat = NULL; ccat->lindefs = gu_map_get(rdr->curr_lindefs, fidp, PgfFunIds*); - pgf_read_to(rdr, gu_type(PgfProductionSeq), &ccat->prods); + ccat->prods = gu_new_seq(PgfProduction, n_prods, rdr->opool); ccat->fid = *fidp; + + size_t top = 0; + size_t bot = n_prods-1; + for (size_t i = 0; i < n_prods; i++) { + PgfProduction prod; + pgf_read_to(rdr, gu_type(PgfProduction), &prod); + gu_return_on_exn(rdr->err, ); + + GuVariantInfo i = gu_variant_open(prod); + switch (i.tag) { + case PGF_PRODUCTION_APPLY: { + PgfProductionApply* papp = i.data; + if (gu_seq_length(papp->args) > 0) + gu_seq_set(ccat->prods, PgfProduction, top++, prod); + else + gu_seq_set(ccat->prods, PgfProduction, bot--, prod); + break; + } + case PGF_PRODUCTION_COERCE: { + gu_seq_set(ccat->prods, PgfProduction, top++, prod); + break; + } + default: + gu_impossible(); + } + + pgf_parser_bu_index(rdr->curr_concr, ccat, prod, rdr->opool); + pgf_lzr_index(rdr->curr_concr, ccat, prod, rdr->opool); + } + + ccat->n_synprods = top; + gu_exit("<-"); } @@ -467,10 +503,10 @@ static void* pgf_read_new_PgfCCat(GuType* type, PgfReader* rdr, GuPool* pool, size_t* size_out) { - PgfCCat* ccat = gu_map_get(rdr->curr_ccats, rdr->curr_key, PgfCCat*); + PgfCCat* ccat = gu_map_get(rdr->curr_concr->ccats, rdr->curr_key, PgfCCat*); if (!ccat) { ccat = gu_new(PgfCCat, pool); - gu_map_put(rdr->curr_ccats, rdr->curr_key, PgfCCat*, ccat); + gu_map_put(rdr->curr_concr->ccats, rdr->curr_key, PgfCCat*, ccat); } pgf_read_to_PgfCCat(type, rdr, ccat); *size_out = sizeof(PgfCCat); @@ -558,41 +594,17 @@ pgf_read_to_PgfEquationsM(GuType* type, PgfReader* rdr, void* to) } } - -static void* -pgf_read_new_idarray(GuType* type, PgfReader* rdr, GuPool* pool, - size_t* size_out) -{ - (void) type; - void* list = pgf_read_new_GuList(type, rdr, rdr->opool, size_out); - if (type == gu_type(PgfSequences)) { - rdr->curr_sequences = list; - } else if (type == gu_type(PgfCncFuns)) { - rdr->curr_cncfuns = list; - - // set the function ids - int n_funs = gu_list_length(rdr->curr_cncfuns); - for (int funid = 0; funid < n_funs; funid++) { - PgfCncFun* cncfun = gu_list_index(rdr->curr_cncfuns, funid); - cncfun->funid = funid; - } - } else { - gu_impossible(); - } - return list; -} - static void pgf_read_to_PgfSeqId(GuType* type, PgfReader* rdr, void* to) { (void) type; int32_t id = pgf_read_int(rdr); gu_return_on_exn(rdr->err,); - if (id < 0 || id >= gu_list_length(rdr->curr_sequences)) { + if (id < 0 || id >= gu_list_length(rdr->curr_concr->sequences)) { gu_raise(rdr->err, PgfReadExn); return; } - *(PgfSeqId*) to = gu_list_elems(rdr->curr_sequences)[id]; + *(PgfSeqId*) to = gu_list_elems(rdr->curr_concr->sequences)[id]; } @@ -602,11 +614,11 @@ pgf_read_to_PgfFunId(GuType* type, PgfReader* rdr, void* to) (void) type; int32_t id = pgf_read_int(rdr); gu_return_on_exn(rdr->err,); - if (id < 0 || id >= gu_list_length(rdr->curr_cncfuns)) { + if (id < 0 || id >= gu_list_length(rdr->curr_concr->cncfuns)) { gu_raise(rdr->err, PgfReadExn); return; } - *(PgfFunId*) to = gu_list_elems(rdr->curr_cncfuns)[id]; + *(PgfFunId*) to = gu_list_elems(rdr->curr_concr->cncfuns)[id]; } static GU_DEFINE_TYPE(PgfLinDefs, GuIntMap, gu_ptr_type(PgfFunIds), @@ -655,32 +667,13 @@ pgf_ccat_set_cnccat(PgfCCat* ccat) return ccat->cnccat; } -typedef struct { - GuMapItor fn; - PgfConcr* concr; - GuPool *pool; -} PgfIndexFn; - -void -pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod, - GuPool *pool); - static void pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err) { (void) (key && err); - PgfIndexFn* clo = (PgfIndexFn*) fn; PgfCCat* ccat = *((PgfCCat**) value); pgf_ccat_set_cnccat(ccat); - - if (!gu_seq_is_null(ccat->prods)) { - size_t n_prods = gu_seq_length(ccat->prods); - for (size_t i = 0; i < n_prods; i++) { - PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i); - pgf_lzr_index(clo->concr, ccat, prod, clo->pool); - } - } } static void* @@ -689,30 +682,39 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool, { (void) (type && size_out); PgfConcr* concr = gu_new(PgfConcr, pool); + rdr->curr_concr = concr; concr->cflags = pgf_read_new(rdr, gu_type(PgfFlags), pool, NULL); concr->printnames = pgf_read_new(rdr, gu_type(PgfPrintNames), pool, NULL); concr->sequences = - pgf_read_new(rdr, gu_type(PgfSequences), rdr->opool, NULL); + pgf_read_new(rdr, gu_type(PgfSequences), pool, NULL); concr->cncfuns = pgf_read_new(rdr, gu_type(PgfCncFuns), pool, NULL); GuMapType* lindefs_t = gu_type_cast(gu_type(PgfLinDefs), GuMap); rdr->curr_lindefs = gu_map_type_make(lindefs_t, pool); - pgf_read_into_map(lindefs_t, rdr, rdr->curr_lindefs, rdr->opool); + pgf_read_into_map(lindefs_t, rdr, rdr->curr_lindefs); GuMapType* ccats_t = gu_type_cast(gu_type(PgfCCatMap), GuMap); concr->ccats = gu_new_int_map(PgfCCat*, &gu_null_struct, pool); concr->fun_indices = gu_map_type_new(PgfFunIndices, pool); - concr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool); - rdr->curr_ccats = concr->ccats; - pgf_read_into_map(ccats_t, rdr, concr->ccats, rdr->opool); - concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap), - rdr->opool, NULL); + concr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool); + concr->lexicon_idx = gu_map_type_new(PgfLexiconIdx, pool); + concr->epsilon_idx = gu_new_buf(struct PgfItemBase*, pool); + pgf_read_into_map(ccats_t, rdr, concr->ccats); + concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap), pool, NULL); concr->max_fid = pgf_read_int(rdr); - PgfIndexFn clo = { { pgf_read_ccat_cb }, concr, pool }; - gu_map_iter(concr->ccats, &clo.fn, NULL); + GuMapItor fn = { pgf_read_ccat_cb }; + gu_map_iter(concr->ccats, &fn, NULL); + + // set the function ids + int n_funs = gu_list_length(concr->cncfuns); + for (int funid = 0; funid < n_funs; funid++) { + PgfCncFun* cncfun = gu_list_index(concr->cncfuns, funid); + cncfun->funid = funid; + } + return concr; } @@ -737,7 +739,7 @@ pgf_read_new_PgfCncCat(GuType* type, PgfReader* rdr, GuPool* pool, for (int i = 0; i < len; i++) { int fid = first + i; - PgfCCat* ccat = gu_map_get(rdr->curr_ccats, &fid, PgfCCat*); + PgfCCat* ccat = gu_map_get(rdr->curr_concr->ccats, &fid, PgfCCat*); if (!ccat) { ccat = gu_new(PgfCCat, rdr->opool); ccat->cnccat = NULL; @@ -745,7 +747,7 @@ pgf_read_new_PgfCncCat(GuType* type, PgfReader* rdr, GuPool* pool, ccat->prods = gu_null_seq; ccat->fid = fid; - gu_map_put(rdr->curr_ccats, &fid, PgfCCat*, ccat); + gu_map_put(rdr->curr_concr->ccats, &fid, PgfCCat*, ccat); } gu_list_index(cnccat->cats, i) = ccat; @@ -806,9 +808,7 @@ pgf_read_new_table = GU_TYPETABLE( PGF_READ_NEW(GuList), PGF_READ_NEW(PgfCCat), PGF_READ_NEW(PgfCncCat), - PGF_READ_NEW(PgfConcr), - PGF_READ_NEW_FN(PgfSequences, pgf_read_new_idarray), - PGF_READ_NEW_FN(PgfCncFuns, pgf_read_new_idarray) + PGF_READ_NEW(PgfConcr) ); static PgfReader* @@ -819,8 +819,7 @@ pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err) rdr->symtab = gu_new_symtable(opool, tmp_pool); rdr->err = err; rdr->in = in; - rdr->curr_sequences = NULL; - rdr->curr_cncfuns = NULL; + rdr->curr_concr = NULL; rdr->read_to_map = gu_new_type_map(&pgf_read_to_table, tmp_pool); rdr->read_new_map = gu_new_type_map(&pgf_read_new_table, tmp_pool); return rdr;