mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 13:09:33 -06:00
libpgf: now we have both complete bottom up index for robust parsing and fast lexical lookup from the same index
This commit is contained in:
@@ -189,10 +189,11 @@ extern GU_DECLARE_TYPE(PgfFunIndices, GuStringMap);
|
||||
typedef GuMap PgfCoerceIdx;
|
||||
extern GU_DECLARE_TYPE(PgfCoerceIdx, GuMap);
|
||||
|
||||
typedef GuStringMap PgfLexiconIdx;
|
||||
extern GU_DECLARE_TYPE(PgfLexiconIdx, GuStringMap);
|
||||
typedef GuStringMap PgfTransitions;
|
||||
extern GU_DECLARE_TYPE(PgfTransitions, GuStringMap);
|
||||
|
||||
typedef GuBuf PgfEpsilonIdx;
|
||||
typedef GuMap PgfEpsilonIdx;
|
||||
extern GU_DECLARE_TYPE(PgfEpsilonIdx, GuMap);
|
||||
|
||||
struct PgfConcr {
|
||||
PgfFlags* cflags;
|
||||
@@ -200,7 +201,7 @@ struct PgfConcr {
|
||||
GuMap* ccats;
|
||||
PgfFunIndices* fun_indices;
|
||||
PgfCoerceIdx* coerce_idx;
|
||||
PgfLexiconIdx* lexicon_idx;
|
||||
PgfTransitions* lexicon_idx;
|
||||
PgfEpsilonIdx* epsilon_idx;
|
||||
PgfCncFuns* cncfuns;
|
||||
PgfSequences* sequences;
|
||||
|
||||
@@ -11,10 +11,6 @@ typedef struct PgfItem PgfItem;
|
||||
typedef GuBuf PgfItemBuf;
|
||||
typedef GuList(PgfItemBuf*) PgfItemBufs;
|
||||
|
||||
|
||||
// GuString -> PgfItemBuf*
|
||||
typedef GuMap PgfTransitions;
|
||||
|
||||
typedef GuBuf PgfCCatBuf;
|
||||
|
||||
struct PgfParse {
|
||||
@@ -50,24 +46,40 @@ struct PgfItem {
|
||||
uint8_t alt;
|
||||
};
|
||||
|
||||
typedef GuMap PgfContsMap;
|
||||
typedef struct {
|
||||
int fid;
|
||||
int lin_idx;
|
||||
} PgfCFCat;
|
||||
|
||||
static GU_DEFINE_TYPE(PgfCFCat, struct,
|
||||
GU_MEMBER(PgfCFCat, fid, int),
|
||||
GU_MEMBER(PgfCFCat, lin_idx, int));
|
||||
|
||||
extern GuHasher pgf_cfcat_hasher;
|
||||
|
||||
static GU_DEFINE_TYPE(PgfItemBuf, abstract, _);
|
||||
static GU_DEFINE_TYPE(PgfItemBufs, abstract, _);
|
||||
static GU_DEFINE_TYPE(PgfContsMap, GuMap,
|
||||
|
||||
typedef GuMap PgfContsMap;
|
||||
GU_DEFINE_TYPE(PgfContsMap, GuMap,
|
||||
gu_type(PgfCCat), NULL,
|
||||
gu_ptr_type(PgfItemBufs), &gu_null_struct);
|
||||
|
||||
static GU_DEFINE_TYPE(PgfGenCatMap, GuMap,
|
||||
typedef GuMap PgfEpsilonIdx;
|
||||
GU_DEFINE_TYPE(PgfEpsilonIdx, GuMap,
|
||||
gu_type(PgfCFCat), &pgf_cfcat_hasher,
|
||||
gu_ptr_type(PgfCCat), &gu_null_struct);
|
||||
|
||||
typedef GuMap PgfGenCatMap;
|
||||
GU_DEFINE_TYPE(PgfGenCatMap, GuMap,
|
||||
gu_type(PgfItemBuf), NULL,
|
||||
gu_ptr_type(PgfCCat), &gu_null_struct);
|
||||
|
||||
static GU_DEFINE_TYPE(PgfTransitions, GuStringMap,
|
||||
// GuString -> PgfItemBuf*
|
||||
typedef GuStringMap PgfTransitions;
|
||||
GU_DEFINE_TYPE(PgfTransitions, GuStringMap,
|
||||
gu_ptr_type(PgfItemBuf), &gu_null_struct);
|
||||
|
||||
typedef GuMap PgfGenCatMap;
|
||||
|
||||
typedef struct PgfParsing PgfParsing;
|
||||
|
||||
typedef const struct PgfLexCallback PgfLexCallback;
|
||||
@@ -83,12 +95,11 @@ struct PgfParsing {
|
||||
PgfGenCatMap* generated_cats;
|
||||
PgfCCatBuf* completed;
|
||||
PgfLexCallback* callback;
|
||||
GuBuf *lexicon_idx, *epsilon_idx;
|
||||
PgfItemBuf *lexicon_idx;
|
||||
PgfEpsilonIdx *epsilon_idx;
|
||||
int max_fid;
|
||||
};
|
||||
|
||||
GU_DEFINE_TYPE(PgfLexiconIdx, GuStringMap, gu_ptr_type(GuBuf),
|
||||
&gu_null_struct);
|
||||
|
||||
#ifdef PGF_PARSER_DEBUG
|
||||
static void
|
||||
@@ -190,93 +201,6 @@ pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err)
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
pgf_parser_bu_add_entry(PgfConcr* concr, PgfTokens tokens,
|
||||
PgfCCat* ccat, size_t lin_idx,
|
||||
PgfProduction prod,
|
||||
GuPool *pool)
|
||||
{
|
||||
PgfToken tok = gu_seq_get(tokens, PgfToken, 0);
|
||||
|
||||
GuBuf* items = gu_map_get(concr->lexicon_idx, &tok, GuBuf*);
|
||||
if (items == NULL) {
|
||||
items = gu_new_buf(PgfItemBase*, pool);
|
||||
gu_map_put(concr->lexicon_idx, &tok, GuBuf*, items);
|
||||
}
|
||||
|
||||
PgfItemBase* base = gu_new(PgfItemBase, pool);
|
||||
base->ccat = ccat;
|
||||
base->lin_idx = lin_idx;
|
||||
base->prod = prod;
|
||||
base->conts = NULL;
|
||||
|
||||
gu_buf_push(items, PgfItemBase*, base);
|
||||
}
|
||||
|
||||
void
|
||||
pgf_parser_bu_index(PgfConcr* concr, PgfCCat* ccat, PgfProduction prod,
|
||||
GuPool *pool)
|
||||
{
|
||||
GuVariantInfo i = gu_variant_open(prod);
|
||||
switch (i.tag) {
|
||||
case PGF_PRODUCTION_APPLY: {
|
||||
PgfProductionApply* papp = i.data;
|
||||
|
||||
for (size_t lin_idx = 0; lin_idx < papp->fun->n_lins; lin_idx++) {
|
||||
PgfSequence seq = papp->fun->lins[lin_idx];
|
||||
if (gu_seq_length(seq) > 0) {
|
||||
PgfSymbol sym = gu_seq_get(seq, PgfSymbol, 0);
|
||||
GuVariantInfo i = gu_variant_open(sym);
|
||||
switch (i.tag) {
|
||||
case PGF_SYMBOL_CAT: {
|
||||
PgfSymbolCat* scat = i.data;
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* sks = gu_variant_data(sym);
|
||||
pgf_parser_bu_add_entry(concr, sks->tokens,
|
||||
ccat, lin_idx, prod, pool);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KP: {
|
||||
PgfSymbolKP* skp = gu_variant_data(sym);
|
||||
pgf_parser_bu_add_entry(concr, skp->default_form,
|
||||
ccat, lin_idx, prod, pool);
|
||||
for (size_t i = 0; i < skp->n_forms; i++) {
|
||||
pgf_parser_bu_add_entry(concr, skp->forms[i].form,
|
||||
ccat, lin_idx, prod, pool);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_LIT:
|
||||
// XXX TODO proper support
|
||||
break;
|
||||
case PGF_SYMBOL_VAR:
|
||||
// XXX TODO proper support
|
||||
break;
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
} else {
|
||||
PgfItemBase* base = gu_new(PgfItemBase, pool);
|
||||
base->ccat = ccat;
|
||||
base->lin_idx = lin_idx;
|
||||
base->prod = prod;
|
||||
base->conts = NULL;
|
||||
|
||||
gu_buf_push(concr->epsilon_idx, PgfItemBase*, base);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PGF_PRODUCTION_COERCE: {
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_parsing_add_transition(PgfParsing* parsing, PgfToken tok, PgfItem* item)
|
||||
{
|
||||
@@ -284,29 +208,31 @@ pgf_parsing_add_transition(PgfParsing* parsing, PgfToken tok, PgfItem* item)
|
||||
}
|
||||
|
||||
static PgfItemBufs*
|
||||
pgf_parsing_get_contss(PgfParsing* parsing, PgfCCat* cat)
|
||||
pgf_parsing_get_contss(PgfContsMap* conts_map, PgfCCat* cat, GuPool *tmp_pool)
|
||||
{
|
||||
PgfItemBufs* contss = gu_map_get(parsing->conts_map, cat, PgfItemBufs*);
|
||||
PgfItemBufs* contss = gu_map_get(conts_map, cat, PgfItemBufs*);
|
||||
if (!contss) {
|
||||
size_t n_lins = cat->cnccat->n_lins;
|
||||
contss = gu_new_list(PgfItemBufs, parsing->tmp_pool, n_lins);
|
||||
contss = gu_new_list(PgfItemBufs, tmp_pool, n_lins);
|
||||
for (size_t i = 0; i < n_lins; i++) {
|
||||
gu_list_index(contss, i) = NULL;
|
||||
}
|
||||
gu_map_put(parsing->conts_map, cat, PgfItemBufs*, contss);
|
||||
gu_map_put(conts_map, cat, PgfItemBufs*, contss);
|
||||
}
|
||||
return contss;
|
||||
}
|
||||
|
||||
|
||||
static PgfItemBuf*
|
||||
pgf_parsing_get_conts(PgfParsing* parsing, PgfCCat* cat, size_t lin_idx)
|
||||
pgf_parsing_get_conts(PgfContsMap* conts_map,
|
||||
PgfCCat* ccat, size_t lin_idx,
|
||||
GuPool *pool, GuPool *tmp_pool)
|
||||
{
|
||||
gu_require(lin_idx < cat->cnccat->n_lins);
|
||||
PgfItemBufs* contss = pgf_parsing_get_contss(parsing, cat);
|
||||
gu_require(lin_idx < ccat->cnccat->n_lins);
|
||||
PgfItemBufs* contss =
|
||||
pgf_parsing_get_contss(conts_map, ccat, tmp_pool);
|
||||
PgfItemBuf* conts = gu_list_index(contss, lin_idx);
|
||||
if (!conts) {
|
||||
conts = gu_new_buf(PgfItem*, parsing->pool);
|
||||
conts = gu_new_buf(PgfItem*, pool);
|
||||
gu_list_index(contss, lin_idx) = conts;
|
||||
}
|
||||
return conts;
|
||||
@@ -411,6 +337,20 @@ pgf_item_copy(PgfItem* item, GuPool* pool)
|
||||
return copy;
|
||||
}
|
||||
|
||||
static PgfItem*
|
||||
pgf_item_update_arg(PgfItem* item, size_t d, PgfCCat *ccat, GuPool* pool)
|
||||
{
|
||||
PgfItem* new_item = pgf_item_copy(item, pool);
|
||||
size_t nargs = gu_seq_length(item->args);
|
||||
new_item->args = gu_new_seq(PgfPArg, nargs, pool);
|
||||
memcpy(gu_seq_data(new_item->args), gu_seq_data(item->args),
|
||||
nargs * sizeof(PgfPArg));
|
||||
gu_seq_set(new_item->args, PgfPArg, d,
|
||||
((PgfPArg) { .hypos = NULL, .ccat = ccat }));
|
||||
|
||||
return new_item;
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_item_advance(PgfItem* item, GuPool* pool)
|
||||
{
|
||||
@@ -428,15 +368,12 @@ pgf_parsing_combine(PgfParsing* parsing, PgfItem* cont, PgfCCat* cat)
|
||||
gu_buf_push(parsing->completed, PgfCCat*, cat);
|
||||
return;
|
||||
}
|
||||
PgfItem* item = pgf_item_copy(cont, parsing->pool);
|
||||
size_t nargs = gu_seq_length(cont->args);
|
||||
item->args = gu_new_seq(PgfPArg, nargs, parsing->pool);
|
||||
memcpy(gu_seq_data(item->args), gu_seq_data(cont->args),
|
||||
nargs * sizeof(PgfPArg));
|
||||
gu_assert(gu_variant_tag(item->curr_sym) == PGF_SYMBOL_CAT);
|
||||
PgfSymbolCat* pcat = gu_variant_data(cont->curr_sym);
|
||||
gu_seq_set(item->args, PgfPArg, pcat->d,
|
||||
((PgfPArg) { .hypos = NULL, .ccat = cat }));
|
||||
|
||||
gu_assert(gu_variant_tag(cont->curr_sym) == PGF_SYMBOL_CAT);
|
||||
PgfSymbolCat* scat = gu_variant_data(cont->curr_sym);
|
||||
|
||||
PgfItem* item =
|
||||
pgf_item_update_arg(cont, scat->d, cat, parsing->pool);
|
||||
pgf_item_advance(item, parsing->pool);
|
||||
pgf_parsing_item(parsing, item);
|
||||
}
|
||||
@@ -445,7 +382,7 @@ static void
|
||||
pgf_parsing_production(PgfParsing* parsing, PgfCCat* ccat, size_t lin_idx,
|
||||
PgfProduction prod, PgfItemBuf* conts)
|
||||
{
|
||||
PgfItem* item =
|
||||
PgfItem* item =
|
||||
pgf_new_item(ccat, lin_idx, prod, conts, parsing->pool);
|
||||
pgf_parsing_item(parsing, item);
|
||||
}
|
||||
@@ -509,7 +446,9 @@ pgf_parsing_complete(PgfParsing* parsing, PgfItem* item)
|
||||
if (tmp_cat != NULL) {
|
||||
// The category has already been created. If it has also been
|
||||
// predicted already, then process a new item for this production.
|
||||
PgfItemBufs* contss = pgf_parsing_get_contss(parsing, cat);
|
||||
PgfItemBufs* contss =
|
||||
pgf_parsing_get_contss(parsing->conts_map, cat,
|
||||
parsing->tmp_pool);
|
||||
size_t n_contss = gu_list_length(contss);
|
||||
for (size_t i = 0; i < n_contss; i++) {
|
||||
PgfItemBuf* conts2 = gu_list_index(contss, i);
|
||||
@@ -532,33 +471,6 @@ pgf_parsing_complete(PgfParsing* parsing, PgfItem* item)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_parsing_bu_predict(PgfParsing* parsing, GuBuf* items,
|
||||
PgfCCat* ccat, size_t lin_idx,
|
||||
PgfItemBuf* conts)
|
||||
{
|
||||
if (items != NULL) {
|
||||
size_t n_items = gu_buf_length(items);
|
||||
for (size_t i = 0; i < n_items; i++) {
|
||||
PgfItemBase* base = gu_buf_get(items, PgfItemBase*, i);
|
||||
|
||||
if (base->ccat == ccat && base->lin_idx == lin_idx) {
|
||||
GuVariantInfo i = gu_variant_open(base->prod);
|
||||
switch (i.tag) {
|
||||
case PGF_PRODUCTION_APPLY: {
|
||||
PgfProductionApply* papp = i.data;
|
||||
if (gu_seq_length(papp->args) == 0) {
|
||||
pgf_parsing_production(parsing, ccat, lin_idx,
|
||||
base->prod, conts);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_parsing_predict(PgfParsing* parsing, PgfItem* item,
|
||||
PgfCCat* ccat, size_t lin_idx)
|
||||
@@ -568,12 +480,16 @@ pgf_parsing_predict(PgfParsing* parsing, PgfItem* item,
|
||||
// Empty category
|
||||
return;
|
||||
}
|
||||
PgfItemBuf* conts = pgf_parsing_get_conts(parsing, ccat, lin_idx);
|
||||
PgfItemBuf* conts =
|
||||
pgf_parsing_get_conts(parsing->conts_map, ccat, lin_idx,
|
||||
parsing->pool, parsing->tmp_pool);
|
||||
gu_buf_push(conts, PgfItem*, item);
|
||||
if (gu_buf_length(conts) == 1) {
|
||||
/* First time we encounter this linearization
|
||||
* of this category at the current position,
|
||||
* so predict it. */
|
||||
|
||||
// Top-down prediction for syntactic rules
|
||||
PgfProductionSeq prods = ccat->prods;
|
||||
for (size_t i = 0; i < ccat->n_synprods; i++) {
|
||||
PgfProduction prod =
|
||||
@@ -582,10 +498,44 @@ pgf_parsing_predict(PgfParsing* parsing, PgfItem* item,
|
||||
prod, conts);
|
||||
}
|
||||
|
||||
pgf_parsing_bu_predict(parsing, parsing->lexicon_idx,
|
||||
ccat, lin_idx, conts);
|
||||
pgf_parsing_bu_predict(parsing, parsing->epsilon_idx,
|
||||
ccat, lin_idx, conts);
|
||||
// Bottom-up prediction for lexical rules
|
||||
if (parsing->lexicon_idx != NULL) {
|
||||
size_t n_items = gu_buf_length(parsing->lexicon_idx);
|
||||
for (size_t i = 0; i < n_items; i++) {
|
||||
PgfItem* item = gu_buf_get(parsing->lexicon_idx, PgfItem*, i);
|
||||
|
||||
if (item->base->ccat == ccat &&
|
||||
item->base->lin_idx == lin_idx &&
|
||||
gu_seq_length(item->args) == 0) {
|
||||
pgf_parsing_production(parsing, ccat, lin_idx,
|
||||
item->base->prod, conts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Bottom-up prediction for epsilon rules
|
||||
PgfCFCat cfc = {ccat->fid, lin_idx};
|
||||
PgfCCat* eps_ccat = gu_map_get(parsing->epsilon_idx, &cfc, PgfCCat*);
|
||||
|
||||
if (eps_ccat != NULL) {
|
||||
size_t n_prods = gu_seq_length(eps_ccat->prods);
|
||||
for (size_t i = 0; i < n_prods; i++) {
|
||||
PgfProduction prod =
|
||||
gu_seq_get(eps_ccat->prods, PgfProduction, i);
|
||||
|
||||
GuVariantInfo i = gu_variant_open(prod);
|
||||
switch (i.tag) {
|
||||
case PGF_PRODUCTION_APPLY: {
|
||||
PgfProductionApply* papp = i.data;
|
||||
if (gu_seq_length(papp->args) == 0) {
|
||||
pgf_parsing_production(parsing, ccat, lin_idx,
|
||||
prod, conts);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* If it has already been completed, combine. */
|
||||
PgfCCat* completed =
|
||||
@@ -965,3 +915,177 @@ pgf_parser_parse(PgfConcr* concr, PgfCId cat, size_t lin_idx, GuPool* pool)
|
||||
}
|
||||
return parse;
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_parser_bu_add_entry(PgfConcr* concr, PgfTokens tokens,
|
||||
PgfItem* item,
|
||||
GuPool *pool)
|
||||
{
|
||||
PgfToken tok = gu_seq_get(tokens, PgfToken, 0);
|
||||
|
||||
GuBuf* items = gu_map_get(concr->lexicon_idx, &tok, GuBuf*);
|
||||
if (items == NULL) {
|
||||
items = gu_new_buf(PgfItemBase*, pool);
|
||||
gu_map_put(concr->lexicon_idx, &tok, GuBuf*, items);
|
||||
}
|
||||
|
||||
gu_buf_push(items, PgfItem*, item);
|
||||
}
|
||||
|
||||
void
|
||||
pgf_parser_bu_item(PgfConcr* concr, PgfItem* item,
|
||||
PgfContsMap* conts_map,
|
||||
GuPool *pool, GuPool *tmp_pool)
|
||||
{
|
||||
GuVariantInfo i = gu_variant_open(item->base->prod);
|
||||
switch (i.tag) {
|
||||
case PGF_PRODUCTION_APPLY: {
|
||||
PgfProductionApply* papp = i.data;
|
||||
|
||||
PgfSequence seq = papp->fun->lins[item->base->lin_idx];
|
||||
if (item->seq_idx < gu_seq_length(seq)) {
|
||||
GuVariantInfo i = gu_variant_open(item->curr_sym);
|
||||
switch (i.tag) {
|
||||
case PGF_SYMBOL_CAT: {
|
||||
PgfSymbolCat* scat = i.data;
|
||||
|
||||
// Place the item in the continuation map
|
||||
PgfPArg* parg =
|
||||
gu_seq_index(papp->args, PgfPArg, scat->d);
|
||||
PgfItemBuf* conts_ =
|
||||
pgf_parsing_get_conts(conts_map,
|
||||
parg->ccat, scat->r,
|
||||
pool, tmp_pool);
|
||||
gu_buf_push(conts_, PgfItem*, item);
|
||||
|
||||
// If the current category has epsilon rules
|
||||
// then we must do the same for a new item where
|
||||
// the dot is moved with one position.
|
||||
PgfCFCat cfc = {parg->ccat->fid, scat->r};
|
||||
PgfCCat* eps_ccat =
|
||||
gu_map_get(concr->epsilon_idx, &cfc, PgfCCat*);
|
||||
|
||||
if (eps_ccat != NULL) {
|
||||
PgfItem* new_item =
|
||||
pgf_item_update_arg(item, scat->d, eps_ccat, pool);
|
||||
pgf_item_advance(new_item, pool);
|
||||
pgf_parser_bu_item(concr, new_item, conts_map,
|
||||
pool, tmp_pool);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* sks = i.data;
|
||||
pgf_parser_bu_add_entry(concr, sks->tokens,
|
||||
item, pool);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KP: {
|
||||
PgfSymbolKP* skp = i.data;
|
||||
pgf_parser_bu_add_entry(concr, skp->default_form,
|
||||
item, pool);
|
||||
for (size_t i = 0; i < skp->n_forms; i++) {
|
||||
pgf_parser_bu_add_entry(concr, skp->forms[i].form,
|
||||
item, pool);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_LIT:
|
||||
// XXX TODO proper support
|
||||
break;
|
||||
case PGF_SYMBOL_VAR:
|
||||
// XXX TODO proper support
|
||||
break;
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
} else {
|
||||
PgfCFCat cfc = {item->base->ccat->fid, item->base->lin_idx};
|
||||
PgfCCat* tmp_ccat =
|
||||
gu_map_get(concr->epsilon_idx, &cfc, PgfCCat*);
|
||||
|
||||
PgfCCat* eps_ccat = tmp_ccat;
|
||||
if (eps_ccat == NULL) {
|
||||
eps_ccat = gu_new(PgfCCat, pool);
|
||||
eps_ccat->cnccat = item->base->ccat->cnccat;
|
||||
eps_ccat->fid = concr->max_fid++;
|
||||
eps_ccat->prods =
|
||||
gu_buf_seq(gu_new_buf(PgfProduction, pool));
|
||||
eps_ccat->n_synprods = 0;
|
||||
gu_map_put(concr->epsilon_idx, &cfc, PgfCCat*, eps_ccat);
|
||||
}
|
||||
|
||||
GuBuf* prodbuf = gu_seq_buf(eps_ccat->prods);
|
||||
gu_buf_push(prodbuf, PgfProduction, item->base->prod);
|
||||
eps_ccat->n_synprods++;
|
||||
|
||||
if (tmp_ccat == NULL) {
|
||||
size_t n_items = gu_buf_length(item->base->conts);
|
||||
for (size_t i = 0; i < n_items; i++) {
|
||||
PgfItem* cont =
|
||||
gu_buf_get(item->base->conts, PgfItem*, i);
|
||||
|
||||
gu_assert(gu_variant_tag(cont->curr_sym) == PGF_SYMBOL_CAT);
|
||||
PgfSymbolCat* scat = gu_variant_data(cont->curr_sym);
|
||||
|
||||
PgfItem* new_item = pgf_item_copy(cont, pool);
|
||||
pgf_item_update_arg(cont, scat->d, eps_ccat, pool);
|
||||
pgf_item_advance(new_item, pool);
|
||||
pgf_parser_bu_item(concr, new_item, conts_map,
|
||||
pool, tmp_pool);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PGF_PRODUCTION_COERCE: {
|
||||
PgfProductionCoerce* pcoerce = i.data;
|
||||
|
||||
PgfItemBuf* conts_ =
|
||||
pgf_parsing_get_conts(conts_map,
|
||||
pcoerce->coerce, item->base->lin_idx,
|
||||
pool, tmp_pool);
|
||||
gu_buf_push(conts_, PgfItem*, item);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pgf_parser_bu_index(PgfConcr* concr, PgfCCat* ccat, PgfProduction prod,
|
||||
PgfContsMap* conts_map,
|
||||
GuPool *pool, GuPool *tmp_pool)
|
||||
{
|
||||
for (size_t lin_idx = 0; lin_idx < ccat->cnccat->n_lins; lin_idx++) {
|
||||
PgfItemBuf* conts =
|
||||
pgf_parsing_get_conts(conts_map, ccat, lin_idx,
|
||||
pool, tmp_pool);
|
||||
PgfItem* item =
|
||||
pgf_new_item(ccat, lin_idx, prod, conts, pool);
|
||||
|
||||
pgf_parser_bu_item(concr, item, conts_map, pool, tmp_pool);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
pgf_cfcat_eq_fn(GuEquality* self, const void* a, const void* b)
|
||||
{
|
||||
PgfCFCat *x = (PgfCFCat *) a;
|
||||
PgfCFCat *y = (PgfCFCat *) b;
|
||||
|
||||
return (x->fid == y->fid && x->lin_idx == y->lin_idx);
|
||||
}
|
||||
|
||||
GuHash
|
||||
pgf_cfcat_hash_fn(GuHasher* self, const void* a)
|
||||
{
|
||||
PgfCFCat *x = (PgfCFCat *) a;
|
||||
return ((x->fid << 16) ^ x->lin_idx);
|
||||
}
|
||||
|
||||
GuHasher pgf_cfcat_hasher = {
|
||||
{ pgf_cfcat_eq_fn },
|
||||
pgf_cfcat_hash_fn
|
||||
};
|
||||
|
||||
@@ -34,6 +34,8 @@
|
||||
|
||||
typedef struct PgfIdContext PgfIdContext;
|
||||
|
||||
typedef GuMap PgfContsMap;
|
||||
|
||||
//
|
||||
// PgfReader
|
||||
//
|
||||
@@ -48,6 +50,7 @@ struct PgfReader {
|
||||
GuSymTable* symtab;
|
||||
PgfConcr* curr_concr;
|
||||
GuMap* curr_lindefs;
|
||||
PgfContsMap* curr_conts_map; // used temporary for building the bu index for the parser
|
||||
GuTypeMap* read_to_map;
|
||||
GuTypeMap* read_new_map;
|
||||
void* curr_key;
|
||||
@@ -440,14 +443,6 @@ pgf_read_to_PgfCCatId(GuType* type, PgfReader* rdr, void* to)
|
||||
*pto = ccat;
|
||||
}
|
||||
|
||||
void
|
||||
pgf_parser_bu_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod,
|
||||
GuPool *pool);
|
||||
|
||||
void
|
||||
pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod,
|
||||
GuPool *pool);
|
||||
|
||||
static void
|
||||
pgf_read_to_PgfCCat(GuType* type, PgfReader* rdr, void* to)
|
||||
{
|
||||
@@ -488,9 +483,6 @@ pgf_read_to_PgfCCat(GuType* type, PgfReader* rdr, void* to)
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
|
||||
pgf_parser_bu_index(rdr->curr_concr, ccat, prod, rdr->opool);
|
||||
pgf_lzr_index(rdr->curr_concr, ccat, prod, rdr->opool);
|
||||
}
|
||||
|
||||
ccat->n_synprods = top;
|
||||
@@ -668,6 +660,11 @@ pgf_ccat_set_cnccat(PgfCCat* ccat)
|
||||
return ccat->cnccat;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
GuMapItor fn;
|
||||
PgfReader* rdr;
|
||||
} PgfIndexFn;
|
||||
|
||||
static void
|
||||
pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
{
|
||||
@@ -677,6 +674,40 @@ pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
pgf_ccat_set_cnccat(ccat);
|
||||
}
|
||||
|
||||
extern GU_DECLARE_TYPE(PgfContsMap, GuMap);
|
||||
|
||||
void
|
||||
pgf_parser_bu_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod,
|
||||
PgfContsMap* conts_map,
|
||||
GuPool *pool, GuPool *tmp_pool);
|
||||
|
||||
void
|
||||
pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod,
|
||||
GuPool *pool);
|
||||
|
||||
static void
|
||||
pgf_index_prods(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
{
|
||||
(void) (key && err);
|
||||
|
||||
PgfIndexFn* clo = (PgfIndexFn*) fn;
|
||||
PgfCCat* ccat = *((PgfCCat**) value);
|
||||
PgfReader *rdr = clo->rdr;
|
||||
|
||||
if (gu_seq_is_null(ccat->prods))
|
||||
return;
|
||||
|
||||
size_t n_prods = gu_seq_length(ccat->prods);
|
||||
for (size_t i = 0; i < n_prods; i++) {
|
||||
PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i);
|
||||
|
||||
pgf_parser_bu_index(rdr->curr_concr, ccat, prod,
|
||||
rdr->curr_conts_map,
|
||||
rdr->opool, rdr->tmp_pool);
|
||||
pgf_lzr_index(rdr->curr_concr, ccat, prod, rdr->opool);
|
||||
}
|
||||
}
|
||||
|
||||
static void*
|
||||
pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool,
|
||||
size_t* size_out)
|
||||
@@ -695,19 +726,23 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool,
|
||||
GuMapType* lindefs_t = gu_type_cast(gu_type(PgfLinDefs), GuMap);
|
||||
rdr->curr_lindefs = gu_map_type_make(lindefs_t, rdr->tmp_pool);
|
||||
pgf_read_into_map(lindefs_t, rdr, rdr->curr_lindefs);
|
||||
rdr->curr_conts_map = gu_map_type_new(PgfContsMap, rdr->tmp_pool);
|
||||
GuMapType* ccats_t = gu_type_cast(gu_type(PgfCCatMap), GuMap);
|
||||
concr->ccats =
|
||||
gu_new_int_map(PgfCCat*, &gu_null_struct, pool);
|
||||
concr->fun_indices = gu_map_type_new(PgfFunIndices, pool);
|
||||
concr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool);
|
||||
concr->lexicon_idx = gu_map_type_new(PgfLexiconIdx, pool);
|
||||
concr->epsilon_idx = gu_new_buf(struct PgfItemBase*, pool);
|
||||
concr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool);
|
||||
concr->lexicon_idx = gu_map_type_new(PgfTransitions,pool);
|
||||
concr->epsilon_idx = gu_map_type_new(PgfEpsilonIdx, pool);
|
||||
pgf_read_into_map(ccats_t, rdr, concr->ccats);
|
||||
concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap), pool, NULL);
|
||||
concr->max_fid = pgf_read_int(rdr);
|
||||
|
||||
GuMapItor fn = { pgf_read_ccat_cb };
|
||||
gu_map_iter(concr->ccats, &fn, NULL);
|
||||
PgfIndexFn clo1 = { { pgf_read_ccat_cb }, rdr };
|
||||
gu_map_iter(concr->ccats, &clo1.fn, NULL);
|
||||
|
||||
PgfIndexFn clo2 = { { pgf_index_prods }, rdr };
|
||||
gu_map_iter(concr->ccats, &clo2.fn, NULL);
|
||||
|
||||
// set the function ids
|
||||
int n_funs = gu_list_length(concr->cncfuns);
|
||||
@@ -822,6 +857,8 @@ pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err)
|
||||
rdr->err = err;
|
||||
rdr->in = in;
|
||||
rdr->curr_concr = NULL;
|
||||
rdr->curr_lindefs = NULL;
|
||||
rdr->curr_conts_map = NULL;
|
||||
rdr->read_to_map = gu_new_type_map(&pgf_read_to_table, tmp_pool);
|
||||
rdr->read_new_map = gu_new_type_map(&pgf_read_new_table, tmp_pool);
|
||||
return rdr;
|
||||
|
||||
Reference in New Issue
Block a user