mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 03:32:51 -06:00
libpgf: added index for fast lexicon lookup. Still not perfect
This commit is contained in:
@@ -4,10 +4,10 @@
|
|||||||
#include <gu/variant.h>
|
#include <gu/variant.h>
|
||||||
#include <gu/assert.h>
|
#include <gu/assert.h>
|
||||||
|
|
||||||
PgfCCat pgf_ccat_string = { NULL, NULL, GU_NULL_SEQ, -1 };
|
PgfCCat pgf_ccat_string = { NULL, NULL, 0, GU_NULL_SEQ, -1 };
|
||||||
PgfCCat pgf_ccat_int = { NULL, NULL, GU_NULL_SEQ, -2 };
|
PgfCCat pgf_ccat_int = { NULL, NULL, 0, GU_NULL_SEQ, -2 };
|
||||||
PgfCCat pgf_ccat_float = { NULL, NULL, GU_NULL_SEQ, -3 };
|
PgfCCat pgf_ccat_float = { NULL, NULL, 0, GU_NULL_SEQ, -3 };
|
||||||
PgfCCat pgf_ccat_var = { NULL, NULL, GU_NULL_SEQ, -4 };
|
PgfCCat pgf_ccat_var = { NULL, NULL, 0, GU_NULL_SEQ, -4 };
|
||||||
|
|
||||||
PgfCCatId
|
PgfCCatId
|
||||||
pgf_literal_cat(PgfLiteral lit)
|
pgf_literal_cat(PgfLiteral lit)
|
||||||
|
|||||||
@@ -173,6 +173,7 @@ struct PgfAlternative {
|
|||||||
struct PgfCCat {
|
struct PgfCCat {
|
||||||
PgfCncCat* cnccat;
|
PgfCncCat* cnccat;
|
||||||
PgfFunIds* lindefs;
|
PgfFunIds* lindefs;
|
||||||
|
size_t n_synprods;
|
||||||
PgfProductionSeq prods;
|
PgfProductionSeq prods;
|
||||||
int fid;
|
int fid;
|
||||||
};
|
};
|
||||||
@@ -188,12 +189,19 @@ extern GU_DECLARE_TYPE(PgfFunIndices, GuStringMap);
|
|||||||
typedef GuMap PgfCoerceIdx;
|
typedef GuMap PgfCoerceIdx;
|
||||||
extern GU_DECLARE_TYPE(PgfCoerceIdx, GuMap);
|
extern GU_DECLARE_TYPE(PgfCoerceIdx, GuMap);
|
||||||
|
|
||||||
|
typedef GuStringMap PgfLexiconIdx;
|
||||||
|
extern GU_DECLARE_TYPE(PgfLexiconIdx, GuStringMap);
|
||||||
|
|
||||||
|
typedef GuBuf PgfEpsilonIdx;
|
||||||
|
|
||||||
struct PgfConcr {
|
struct PgfConcr {
|
||||||
PgfFlags* cflags;
|
PgfFlags* cflags;
|
||||||
PgfPrintNames* printnames;
|
PgfPrintNames* printnames;
|
||||||
GuMap* ccats;
|
GuMap* ccats;
|
||||||
PgfFunIndices* fun_indices;
|
PgfFunIndices* fun_indices;
|
||||||
PgfCoerceIdx* coerce_idx;
|
PgfCoerceIdx* coerce_idx;
|
||||||
|
PgfLexiconIdx* lexicon_idx;
|
||||||
|
PgfEpsilonIdx* epsilon_idx;
|
||||||
PgfCncFuns* cncfuns;
|
PgfCncFuns* cncfuns;
|
||||||
PgfSequences* sequences;
|
PgfSequences* sequences;
|
||||||
PgfCIdMap* cnccats;
|
PgfCIdMap* cnccats;
|
||||||
|
|||||||
@@ -104,14 +104,12 @@ static GU_DEFINE_TYPE(PgfInferMap, GuMap,
|
|||||||
gu_ptr_type(PgfCCatIds), pgf_lzr_cats_hasher,
|
gu_ptr_type(PgfCCatIds), pgf_lzr_cats_hasher,
|
||||||
gu_ptr_type(PgfLinInfers), &gu_null_struct);
|
gu_ptr_type(PgfLinInfers), &gu_null_struct);
|
||||||
|
|
||||||
typedef GuStringMap PgfFunIndices;
|
|
||||||
GU_DEFINE_TYPE(PgfFunIndices, GuStringMap, gu_ptr_type(PgfInferMap),
|
GU_DEFINE_TYPE(PgfFunIndices, GuStringMap, gu_ptr_type(PgfInferMap),
|
||||||
&gu_null_struct);
|
&gu_null_struct);
|
||||||
|
|
||||||
typedef GuBuf PgfCCatBuf;
|
typedef GuBuf PgfCCatBuf;
|
||||||
static GU_DEFINE_TYPE(PgfCCatBuf, GuBuf, gu_ptr_type(PgfCCat));
|
static GU_DEFINE_TYPE(PgfCCatBuf, GuBuf, gu_ptr_type(PgfCCat));
|
||||||
|
|
||||||
typedef GuMap PgfCoerceIdx;
|
|
||||||
GU_DEFINE_TYPE(PgfCoerceIdx, GuMap,
|
GU_DEFINE_TYPE(PgfCoerceIdx, GuMap,
|
||||||
gu_type(PgfCCat), NULL,
|
gu_type(PgfCCat), NULL,
|
||||||
gu_ptr_type(PgfCCatBuf), &gu_null_struct);
|
gu_ptr_type(PgfCCatBuf), &gu_null_struct);
|
||||||
@@ -155,7 +153,7 @@ pgf_lzr_add_infer_entry(
|
|||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod,
|
pgf_lzr_index(PgfConcr* concr, PgfCCat* ccat, PgfProduction prod,
|
||||||
GuPool *pool)
|
GuPool *pool)
|
||||||
{
|
{
|
||||||
void* data = gu_variant_data(prod);
|
void* data = gu_variant_data(prod);
|
||||||
@@ -164,27 +162,27 @@ pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod,
|
|||||||
PgfProductionApply* papply = data;
|
PgfProductionApply* papply = data;
|
||||||
PgfInferMap* infer =
|
PgfInferMap* infer =
|
||||||
gu_map_get(concr->fun_indices, &papply->fun->fun,
|
gu_map_get(concr->fun_indices, &papply->fun->fun,
|
||||||
PgfInferMap*);
|
PgfInferMap*);
|
||||||
gu_debug("index: %s -> %d", papply->fun->fun, cat->fid);
|
gu_debug("index: %s -> %d", papply->fun->fun, ccat->fid);
|
||||||
if (!infer) {
|
if (!infer) {
|
||||||
infer = gu_map_type_new(PgfInferMap, pool);
|
infer = gu_map_type_new(PgfInferMap, pool);
|
||||||
gu_map_put(concr->fun_indices,
|
gu_map_put(concr->fun_indices,
|
||||||
&papply->fun->fun, PgfInferMap*, infer);
|
&papply->fun->fun, PgfInferMap*, infer);
|
||||||
}
|
}
|
||||||
pgf_lzr_add_infer_entry(infer, cat, papply, pool);
|
pgf_lzr_add_infer_entry(infer, ccat, papply, pool);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case PGF_PRODUCTION_COERCE: {
|
case PGF_PRODUCTION_COERCE: {
|
||||||
PgfProductionCoerce* pcoerce = data;
|
PgfProductionCoerce* pcoerce = data;
|
||||||
PgfCCatBuf* cats = gu_map_get(concr->coerce_idx, pcoerce->coerce,
|
PgfCCatBuf* cats = gu_map_get(concr->coerce_idx, pcoerce->coerce,
|
||||||
PgfCCatBuf*);
|
PgfCCatBuf*);
|
||||||
if (!cats) {
|
if (!cats) {
|
||||||
cats = gu_new_buf(PgfCCat*, pool);
|
cats = gu_new_buf(PgfCCat*, pool);
|
||||||
gu_map_put(concr->coerce_idx,
|
gu_map_put(concr->coerce_idx,
|
||||||
pcoerce->coerce, PgfCCatBuf*, cats);
|
pcoerce->coerce, PgfCCatBuf*, cats);
|
||||||
}
|
}
|
||||||
gu_debug("coerce_idx: %d -> %d", pcoerce->coerce->fid, cat->fid);
|
gu_debug("coerce_idx: %d -> %d", pcoerce->coerce->fid, ccat->fid);
|
||||||
gu_buf_push(cats, PgfCCat*, cat);
|
gu_buf_push(cats, PgfCCat*, ccat);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ typedef GuBuf PgfItemBuf;
|
|||||||
typedef GuList(PgfItemBuf*) PgfItemBufs;
|
typedef GuList(PgfItemBuf*) PgfItemBufs;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// GuString -> PgfItemBuf*
|
// GuString -> PgfItemBuf*
|
||||||
typedef GuMap PgfTransitions;
|
typedef GuMap PgfTransitions;
|
||||||
|
|
||||||
@@ -84,9 +83,13 @@ struct PgfParsing {
|
|||||||
PgfGenCatMap* generated_cats;
|
PgfGenCatMap* generated_cats;
|
||||||
PgfCCatBuf* completed;
|
PgfCCatBuf* completed;
|
||||||
PgfLexCallback* callback;
|
PgfLexCallback* callback;
|
||||||
|
GuBuf *lexicon_idx, *epsilon_idx;
|
||||||
int max_fid;
|
int max_fid;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
GU_DEFINE_TYPE(PgfLexiconIdx, GuStringMap, gu_ptr_type(GuBuf),
|
||||||
|
&gu_null_struct);
|
||||||
|
|
||||||
#ifdef PGF_PARSER_DEBUG
|
#ifdef PGF_PARSER_DEBUG
|
||||||
static void
|
static void
|
||||||
pgf_print_production(int fid, PgfProduction prod, GuWriter *wtr, GuExn* err)
|
pgf_print_production(int fid, PgfProduction prod, GuWriter *wtr, GuExn* err)
|
||||||
@@ -187,6 +190,93 @@ pgf_print_item(PgfItem* item, GuWriter* wtr, GuExn* err)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_parser_bu_add_entry(PgfConcr* concr, PgfTokens tokens,
|
||||||
|
PgfCCat* ccat, size_t lin_idx,
|
||||||
|
PgfProduction prod,
|
||||||
|
GuPool *pool)
|
||||||
|
{
|
||||||
|
PgfToken tok = gu_seq_get(tokens, PgfToken, 0);
|
||||||
|
|
||||||
|
GuBuf* items = gu_map_get(concr->lexicon_idx, &tok, GuBuf*);
|
||||||
|
if (items == NULL) {
|
||||||
|
items = gu_new_buf(PgfItemBase*, pool);
|
||||||
|
gu_map_put(concr->lexicon_idx, &tok, GuBuf*, items);
|
||||||
|
}
|
||||||
|
|
||||||
|
PgfItemBase* base = gu_new(PgfItemBase, pool);
|
||||||
|
base->ccat = ccat;
|
||||||
|
base->lin_idx = lin_idx;
|
||||||
|
base->prod = prod;
|
||||||
|
base->conts = NULL;
|
||||||
|
|
||||||
|
gu_buf_push(items, PgfItemBase*, base);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_parser_bu_index(PgfConcr* concr, PgfCCat* ccat, PgfProduction prod,
|
||||||
|
GuPool *pool)
|
||||||
|
{
|
||||||
|
GuVariantInfo i = gu_variant_open(prod);
|
||||||
|
switch (i.tag) {
|
||||||
|
case PGF_PRODUCTION_APPLY: {
|
||||||
|
PgfProductionApply* papp = i.data;
|
||||||
|
|
||||||
|
for (size_t lin_idx = 0; lin_idx < papp->fun->n_lins; lin_idx++) {
|
||||||
|
PgfSequence seq = papp->fun->lins[lin_idx];
|
||||||
|
if (gu_seq_length(seq) > 0) {
|
||||||
|
PgfSymbol sym = gu_seq_get(seq, PgfSymbol, 0);
|
||||||
|
GuVariantInfo i = gu_variant_open(sym);
|
||||||
|
switch (i.tag) {
|
||||||
|
case PGF_SYMBOL_CAT: {
|
||||||
|
PgfSymbolCat* scat = i.data;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PGF_SYMBOL_KS: {
|
||||||
|
PgfSymbolKS* sks = gu_variant_data(sym);
|
||||||
|
pgf_parser_bu_add_entry(concr, sks->tokens,
|
||||||
|
ccat, lin_idx, prod, pool);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PGF_SYMBOL_KP: {
|
||||||
|
PgfSymbolKP* skp = gu_variant_data(sym);
|
||||||
|
pgf_parser_bu_add_entry(concr, skp->default_form,
|
||||||
|
ccat, lin_idx, prod, pool);
|
||||||
|
for (size_t i = 0; i < skp->n_forms; i++) {
|
||||||
|
pgf_parser_bu_add_entry(concr, skp->forms[i].form,
|
||||||
|
ccat, lin_idx, prod, pool);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PGF_SYMBOL_LIT:
|
||||||
|
// XXX TODO proper support
|
||||||
|
break;
|
||||||
|
case PGF_SYMBOL_VAR:
|
||||||
|
// XXX TODO proper support
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
gu_impossible();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
PgfItemBase* base = gu_new(PgfItemBase, pool);
|
||||||
|
base->ccat = ccat;
|
||||||
|
base->lin_idx = lin_idx;
|
||||||
|
base->prod = prod;
|
||||||
|
base->conts = NULL;
|
||||||
|
|
||||||
|
gu_buf_push(concr->epsilon_idx, PgfItemBase*, base);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case PGF_PRODUCTION_COERCE: {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
gu_impossible();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_parsing_add_transition(PgfParsing* parsing, PgfToken tok, PgfItem* item)
|
pgf_parsing_add_transition(PgfParsing* parsing, PgfToken tok, PgfItem* item)
|
||||||
{
|
{
|
||||||
@@ -230,6 +320,7 @@ pgf_parsing_create_completed(PgfParsing* parsing, PgfItemBuf* conts,
|
|||||||
cat->cnccat = cnccat;
|
cat->cnccat = cnccat;
|
||||||
cat->fid = parsing->max_fid++;
|
cat->fid = parsing->max_fid++;
|
||||||
cat->prods = gu_buf_seq(gu_new_buf(PgfProduction, parsing->pool));
|
cat->prods = gu_buf_seq(gu_new_buf(PgfProduction, parsing->pool));
|
||||||
|
cat->n_synprods = 0;
|
||||||
gu_map_put(parsing->generated_cats, conts, PgfCCat*, cat);
|
gu_map_put(parsing->generated_cats, conts, PgfCCat*, cat);
|
||||||
return cat;
|
return cat;
|
||||||
}
|
}
|
||||||
@@ -399,6 +490,7 @@ pgf_parsing_complete(PgfParsing* parsing, PgfItem* item)
|
|||||||
|
|
||||||
GuBuf* prodbuf = gu_seq_buf(cat->prods);
|
GuBuf* prodbuf = gu_seq_buf(cat->prods);
|
||||||
gu_buf_push(prodbuf, PgfProduction, prod);
|
gu_buf_push(prodbuf, PgfProduction, prod);
|
||||||
|
cat->n_synprods++;
|
||||||
|
|
||||||
#ifdef PGF_PARSER_DEBUG
|
#ifdef PGF_PARSER_DEBUG
|
||||||
GuPool* tmp_pool = gu_new_pool();
|
GuPool* tmp_pool = gu_new_pool();
|
||||||
@@ -440,30 +532,60 @@ pgf_parsing_complete(PgfParsing* parsing, PgfItem* item)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pgf_parsing_bu_predict(PgfParsing* parsing, GuBuf* items,
|
||||||
|
PgfCCat* ccat, size_t lin_idx,
|
||||||
|
PgfItemBuf* conts)
|
||||||
|
{
|
||||||
|
if (items != NULL) {
|
||||||
|
size_t n_items = gu_buf_length(items);
|
||||||
|
for (size_t i = 0; i < n_items; i++) {
|
||||||
|
PgfItemBase* base = gu_buf_get(items, PgfItemBase*, i);
|
||||||
|
|
||||||
|
if (base->ccat == ccat && base->lin_idx == lin_idx) {
|
||||||
|
GuVariantInfo i = gu_variant_open(base->prod);
|
||||||
|
switch (i.tag) {
|
||||||
|
case PGF_PRODUCTION_APPLY: {
|
||||||
|
PgfProductionApply* papp = i.data;
|
||||||
|
if (gu_seq_length(papp->args) == 0) {
|
||||||
|
pgf_parsing_production(parsing, ccat, lin_idx,
|
||||||
|
base->prod, conts);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_parsing_predict(PgfParsing* parsing, PgfItem* item,
|
pgf_parsing_predict(PgfParsing* parsing, PgfItem* item,
|
||||||
PgfCCat* cat, size_t lin_idx)
|
PgfCCat* ccat, size_t lin_idx)
|
||||||
{
|
{
|
||||||
gu_enter("-> cat: %d", cat->fid);
|
gu_enter("-> cat: %d", ccat->fid);
|
||||||
if (gu_seq_is_null(cat->prods)) {
|
if (gu_seq_is_null(ccat->prods)) {
|
||||||
// Empty category
|
// Empty category
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
PgfItemBuf* conts = pgf_parsing_get_conts(parsing, cat, lin_idx);
|
PgfItemBuf* conts = pgf_parsing_get_conts(parsing, ccat, lin_idx);
|
||||||
gu_buf_push(conts, PgfItem*, item);
|
gu_buf_push(conts, PgfItem*, item);
|
||||||
if (gu_buf_length(conts) == 1) {
|
if (gu_buf_length(conts) == 1) {
|
||||||
/* First time we encounter this linearization
|
/* First time we encounter this linearization
|
||||||
* of this category at the current position,
|
* of this category at the current position,
|
||||||
* so predict it. */
|
* so predict it. */
|
||||||
PgfProductionSeq prods = cat->prods;
|
PgfProductionSeq prods = ccat->prods;
|
||||||
size_t n_prods = gu_seq_length(prods);
|
for (size_t i = 0; i < ccat->n_synprods; i++) {
|
||||||
for (size_t i = 0; i < n_prods; i++) {
|
|
||||||
PgfProduction prod =
|
PgfProduction prod =
|
||||||
gu_seq_get(prods, PgfProduction, i);
|
gu_seq_get(prods, PgfProduction, i);
|
||||||
pgf_parsing_production(parsing, cat, lin_idx,
|
pgf_parsing_production(parsing, ccat, lin_idx,
|
||||||
prod, conts);
|
prod, conts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pgf_parsing_bu_predict(parsing, parsing->lexicon_idx,
|
||||||
|
ccat, lin_idx, conts);
|
||||||
|
pgf_parsing_bu_predict(parsing, parsing->epsilon_idx,
|
||||||
|
ccat, lin_idx, conts);
|
||||||
} else {
|
} else {
|
||||||
/* If it has already been completed, combine. */
|
/* If it has already been completed, combine. */
|
||||||
PgfCCat* completed =
|
PgfCCat* completed =
|
||||||
@@ -618,7 +740,7 @@ pgf_parsing_item(PgfParsing* parsing, PgfItem* item)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static PgfParsing*
|
static PgfParsing*
|
||||||
pgf_new_parsing(PgfLexCallback* callback, int max_fid,
|
pgf_new_parsing(PgfConcr* concr, PgfLexCallback* callback, int max_fid,
|
||||||
GuPool* parse_pool, GuPool* out_pool)
|
GuPool* parse_pool, GuPool* out_pool)
|
||||||
{
|
{
|
||||||
PgfParsing* parsing = gu_new(PgfParsing, out_pool);
|
PgfParsing* parsing = gu_new(PgfParsing, out_pool);
|
||||||
@@ -626,6 +748,8 @@ pgf_new_parsing(PgfLexCallback* callback, int max_fid,
|
|||||||
parsing->conts_map = gu_map_type_new(PgfContsMap, out_pool);
|
parsing->conts_map = gu_map_type_new(PgfContsMap, out_pool);
|
||||||
parsing->completed = gu_new_buf(PgfCCat*, parse_pool);
|
parsing->completed = gu_new_buf(PgfCCat*, parse_pool);
|
||||||
parsing->callback = callback;
|
parsing->callback = callback;
|
||||||
|
parsing->lexicon_idx = NULL;
|
||||||
|
parsing->epsilon_idx = concr->epsilon_idx;
|
||||||
parsing->pool = parse_pool;
|
parsing->pool = parse_pool;
|
||||||
parsing->tmp_pool = out_pool;
|
parsing->tmp_pool = out_pool;
|
||||||
parsing->max_fid = max_fid;
|
parsing->max_fid = max_fid;
|
||||||
@@ -666,7 +790,9 @@ pgf_parse_token(PgfParse* parse, PgfToken tok, GuPool* pool)
|
|||||||
PgfParseTokenCallback clo = {{ pgf_match_token }, tok, agenda};
|
PgfParseTokenCallback clo = {{ pgf_match_token }, tok, agenda};
|
||||||
|
|
||||||
GuPool* tmp_pool = gu_new_pool();
|
GuPool* tmp_pool = gu_new_pool();
|
||||||
PgfParsing* parsing = pgf_new_parsing(&clo.fn, parse->max_fid, pool, tmp_pool);
|
PgfParsing* parsing = pgf_new_parsing(parse->concr, &clo.fn, parse->max_fid, pool, tmp_pool);
|
||||||
|
parsing->lexicon_idx = gu_map_get(parse->concr->lexicon_idx, &tok, GuBuf*);
|
||||||
|
|
||||||
size_t n_items = gu_buf_length(parse->agenda);
|
size_t n_items = gu_buf_length(parse->agenda);
|
||||||
for (size_t i = 0; i < n_items; i++) {
|
for (size_t i = 0; i < n_items; i++) {
|
||||||
PgfItem* item = gu_buf_get(parse->agenda, PgfItem*, i);
|
PgfItem* item = gu_buf_get(parse->agenda, PgfItem*, i);
|
||||||
@@ -779,7 +905,7 @@ pgf_parse_result(PgfParse* parse, GuPool* pool)
|
|||||||
PgfLexCallback fn = { pgf_noop };
|
PgfLexCallback fn = { pgf_noop };
|
||||||
|
|
||||||
GuPool* tmp_pool = gu_new_pool();
|
GuPool* tmp_pool = gu_new_pool();
|
||||||
PgfParsing* parsing = pgf_new_parsing(&fn, parse->max_fid, pool, tmp_pool);
|
PgfParsing* parsing = pgf_new_parsing(parse->concr, &fn, parse->max_fid, pool, tmp_pool);
|
||||||
size_t n_items = gu_buf_length(parse->agenda);
|
size_t n_items = gu_buf_length(parse->agenda);
|
||||||
for (size_t i = 0; i < n_items; i++) {
|
for (size_t i = 0; i < n_items; i++) {
|
||||||
PgfItem* item = gu_buf_get(parse->agenda, PgfItem*, i);
|
PgfItem* item = gu_buf_get(parse->agenda, PgfItem*, i);
|
||||||
|
|||||||
@@ -45,11 +45,8 @@ struct PgfReader {
|
|||||||
GuExn* err;
|
GuExn* err;
|
||||||
GuPool* opool;
|
GuPool* opool;
|
||||||
GuSymTable* symtab;
|
GuSymTable* symtab;
|
||||||
PgfSequences* curr_sequences;
|
PgfConcr* curr_concr;
|
||||||
PgfCncFuns* curr_cncfuns;
|
|
||||||
GuMap* curr_ccats;
|
|
||||||
GuMap* curr_lindefs;
|
GuMap* curr_lindefs;
|
||||||
GuMap* curr_coercions;
|
|
||||||
GuTypeMap* read_to_map;
|
GuTypeMap* read_to_map;
|
||||||
GuTypeMap* read_new_map;
|
GuTypeMap* read_new_map;
|
||||||
void* curr_key;
|
void* curr_key;
|
||||||
@@ -329,12 +326,8 @@ pgf_read_to_alias(GuType* type, PgfReader* rdr, void* to)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_read_into_map(GuMapType* mtype, PgfReader* rdr, GuMap* map, GuPool* pool)
|
pgf_read_into_map(GuMapType* mtype, PgfReader* rdr, GuMap* map)
|
||||||
{
|
{
|
||||||
/* The parameter pool is the temporary pool used to store the
|
|
||||||
map. But the actual values need to be more persistent so we
|
|
||||||
store them in rdr->opool. */
|
|
||||||
(void) pool;
|
|
||||||
GuPool* tmp_pool = gu_new_pool();
|
GuPool* tmp_pool = gu_new_pool();
|
||||||
void* key = NULL;
|
void* key = NULL;
|
||||||
GuLength len = pgf_read_len(rdr);
|
GuLength len = pgf_read_len(rdr);
|
||||||
@@ -368,7 +361,7 @@ pgf_read_new_GuMap(GuType* type, PgfReader* rdr, GuPool* pool, size_t* size_out)
|
|||||||
(void) size_out;
|
(void) size_out;
|
||||||
GuMapType* mtype = (GuMapType*) type;
|
GuMapType* mtype = (GuMapType*) type;
|
||||||
GuMap* map = gu_map_type_make(mtype, pool);
|
GuMap* map = gu_map_type_make(mtype, pool);
|
||||||
pgf_read_into_map(mtype, rdr, map, pool);
|
pgf_read_into_map(mtype, rdr, map);
|
||||||
gu_return_on_exn(rdr->err, NULL);
|
gu_return_on_exn(rdr->err, NULL);
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
@@ -432,7 +425,7 @@ pgf_read_to_PgfCCatId(GuType* type, PgfReader* rdr, void* to)
|
|||||||
int fid = pgf_read_int(rdr);
|
int fid = pgf_read_int(rdr);
|
||||||
gu_return_on_exn(rdr->err,);
|
gu_return_on_exn(rdr->err,);
|
||||||
|
|
||||||
PgfCCat* ccat = gu_map_get(rdr->curr_ccats, &fid, PgfCCat*);
|
PgfCCat* ccat = gu_map_get(rdr->curr_concr->ccats, &fid, PgfCCat*);
|
||||||
if (!ccat) {
|
if (!ccat) {
|
||||||
ccat = gu_new(PgfCCat, rdr->opool);
|
ccat = gu_new(PgfCCat, rdr->opool);
|
||||||
ccat->cnccat = NULL;
|
ccat->cnccat = NULL;
|
||||||
@@ -440,12 +433,20 @@ pgf_read_to_PgfCCatId(GuType* type, PgfReader* rdr, void* to)
|
|||||||
ccat->prods = gu_null_seq;
|
ccat->prods = gu_null_seq;
|
||||||
ccat->fid = fid;
|
ccat->fid = fid;
|
||||||
|
|
||||||
gu_map_put(rdr->curr_ccats, &fid, PgfCCat*, ccat);
|
gu_map_put(rdr->curr_concr->ccats, &fid, PgfCCat*, ccat);
|
||||||
}
|
}
|
||||||
|
|
||||||
*pto = ccat;
|
*pto = ccat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_parser_bu_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod,
|
||||||
|
GuPool *pool);
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod,
|
||||||
|
GuPool *pool);
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_read_to_PgfCCat(GuType* type, PgfReader* rdr, void* to)
|
pgf_read_to_PgfCCat(GuType* type, PgfReader* rdr, void* to)
|
||||||
{
|
{
|
||||||
@@ -453,11 +454,46 @@ pgf_read_to_PgfCCat(GuType* type, PgfReader* rdr, void* to)
|
|||||||
gu_enter("->");
|
gu_enter("->");
|
||||||
int* fidp = rdr->curr_key;
|
int* fidp = rdr->curr_key;
|
||||||
|
|
||||||
|
GuLength n_prods = pgf_read_len(rdr);
|
||||||
|
gu_return_on_exn(rdr->err, );
|
||||||
|
|
||||||
PgfCCat* ccat = to;
|
PgfCCat* ccat = to;
|
||||||
ccat->cnccat = NULL;
|
ccat->cnccat = NULL;
|
||||||
ccat->lindefs = gu_map_get(rdr->curr_lindefs, fidp, PgfFunIds*);
|
ccat->lindefs = gu_map_get(rdr->curr_lindefs, fidp, PgfFunIds*);
|
||||||
pgf_read_to(rdr, gu_type(PgfProductionSeq), &ccat->prods);
|
ccat->prods = gu_new_seq(PgfProduction, n_prods, rdr->opool);
|
||||||
ccat->fid = *fidp;
|
ccat->fid = *fidp;
|
||||||
|
|
||||||
|
size_t top = 0;
|
||||||
|
size_t bot = n_prods-1;
|
||||||
|
for (size_t i = 0; i < n_prods; i++) {
|
||||||
|
PgfProduction prod;
|
||||||
|
pgf_read_to(rdr, gu_type(PgfProduction), &prod);
|
||||||
|
gu_return_on_exn(rdr->err, );
|
||||||
|
|
||||||
|
GuVariantInfo i = gu_variant_open(prod);
|
||||||
|
switch (i.tag) {
|
||||||
|
case PGF_PRODUCTION_APPLY: {
|
||||||
|
PgfProductionApply* papp = i.data;
|
||||||
|
if (gu_seq_length(papp->args) > 0)
|
||||||
|
gu_seq_set(ccat->prods, PgfProduction, top++, prod);
|
||||||
|
else
|
||||||
|
gu_seq_set(ccat->prods, PgfProduction, bot--, prod);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PGF_PRODUCTION_COERCE: {
|
||||||
|
gu_seq_set(ccat->prods, PgfProduction, top++, prod);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
gu_impossible();
|
||||||
|
}
|
||||||
|
|
||||||
|
pgf_parser_bu_index(rdr->curr_concr, ccat, prod, rdr->opool);
|
||||||
|
pgf_lzr_index(rdr->curr_concr, ccat, prod, rdr->opool);
|
||||||
|
}
|
||||||
|
|
||||||
|
ccat->n_synprods = top;
|
||||||
|
|
||||||
gu_exit("<-");
|
gu_exit("<-");
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -467,10 +503,10 @@ static void*
|
|||||||
pgf_read_new_PgfCCat(GuType* type, PgfReader* rdr, GuPool* pool,
|
pgf_read_new_PgfCCat(GuType* type, PgfReader* rdr, GuPool* pool,
|
||||||
size_t* size_out)
|
size_t* size_out)
|
||||||
{
|
{
|
||||||
PgfCCat* ccat = gu_map_get(rdr->curr_ccats, rdr->curr_key, PgfCCat*);
|
PgfCCat* ccat = gu_map_get(rdr->curr_concr->ccats, rdr->curr_key, PgfCCat*);
|
||||||
if (!ccat) {
|
if (!ccat) {
|
||||||
ccat = gu_new(PgfCCat, pool);
|
ccat = gu_new(PgfCCat, pool);
|
||||||
gu_map_put(rdr->curr_ccats, rdr->curr_key, PgfCCat*, ccat);
|
gu_map_put(rdr->curr_concr->ccats, rdr->curr_key, PgfCCat*, ccat);
|
||||||
}
|
}
|
||||||
pgf_read_to_PgfCCat(type, rdr, ccat);
|
pgf_read_to_PgfCCat(type, rdr, ccat);
|
||||||
*size_out = sizeof(PgfCCat);
|
*size_out = sizeof(PgfCCat);
|
||||||
@@ -558,41 +594,17 @@ pgf_read_to_PgfEquationsM(GuType* type, PgfReader* rdr, void* to)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void*
|
|
||||||
pgf_read_new_idarray(GuType* type, PgfReader* rdr, GuPool* pool,
|
|
||||||
size_t* size_out)
|
|
||||||
{
|
|
||||||
(void) type;
|
|
||||||
void* list = pgf_read_new_GuList(type, rdr, rdr->opool, size_out);
|
|
||||||
if (type == gu_type(PgfSequences)) {
|
|
||||||
rdr->curr_sequences = list;
|
|
||||||
} else if (type == gu_type(PgfCncFuns)) {
|
|
||||||
rdr->curr_cncfuns = list;
|
|
||||||
|
|
||||||
// set the function ids
|
|
||||||
int n_funs = gu_list_length(rdr->curr_cncfuns);
|
|
||||||
for (int funid = 0; funid < n_funs; funid++) {
|
|
||||||
PgfCncFun* cncfun = gu_list_index(rdr->curr_cncfuns, funid);
|
|
||||||
cncfun->funid = funid;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
gu_impossible();
|
|
||||||
}
|
|
||||||
return list;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_read_to_PgfSeqId(GuType* type, PgfReader* rdr, void* to)
|
pgf_read_to_PgfSeqId(GuType* type, PgfReader* rdr, void* to)
|
||||||
{
|
{
|
||||||
(void) type;
|
(void) type;
|
||||||
int32_t id = pgf_read_int(rdr);
|
int32_t id = pgf_read_int(rdr);
|
||||||
gu_return_on_exn(rdr->err,);
|
gu_return_on_exn(rdr->err,);
|
||||||
if (id < 0 || id >= gu_list_length(rdr->curr_sequences)) {
|
if (id < 0 || id >= gu_list_length(rdr->curr_concr->sequences)) {
|
||||||
gu_raise(rdr->err, PgfReadExn);
|
gu_raise(rdr->err, PgfReadExn);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
*(PgfSeqId*) to = gu_list_elems(rdr->curr_sequences)[id];
|
*(PgfSeqId*) to = gu_list_elems(rdr->curr_concr->sequences)[id];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -602,11 +614,11 @@ pgf_read_to_PgfFunId(GuType* type, PgfReader* rdr, void* to)
|
|||||||
(void) type;
|
(void) type;
|
||||||
int32_t id = pgf_read_int(rdr);
|
int32_t id = pgf_read_int(rdr);
|
||||||
gu_return_on_exn(rdr->err,);
|
gu_return_on_exn(rdr->err,);
|
||||||
if (id < 0 || id >= gu_list_length(rdr->curr_cncfuns)) {
|
if (id < 0 || id >= gu_list_length(rdr->curr_concr->cncfuns)) {
|
||||||
gu_raise(rdr->err, PgfReadExn);
|
gu_raise(rdr->err, PgfReadExn);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
*(PgfFunId*) to = gu_list_elems(rdr->curr_cncfuns)[id];
|
*(PgfFunId*) to = gu_list_elems(rdr->curr_concr->cncfuns)[id];
|
||||||
}
|
}
|
||||||
|
|
||||||
static GU_DEFINE_TYPE(PgfLinDefs, GuIntMap, gu_ptr_type(PgfFunIds),
|
static GU_DEFINE_TYPE(PgfLinDefs, GuIntMap, gu_ptr_type(PgfFunIds),
|
||||||
@@ -655,32 +667,13 @@ pgf_ccat_set_cnccat(PgfCCat* ccat)
|
|||||||
return ccat->cnccat;
|
return ccat->cnccat;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
GuMapItor fn;
|
|
||||||
PgfConcr* concr;
|
|
||||||
GuPool *pool;
|
|
||||||
} PgfIndexFn;
|
|
||||||
|
|
||||||
void
|
|
||||||
pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod,
|
|
||||||
GuPool *pool);
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||||
{
|
{
|
||||||
(void) (key && err);
|
(void) (key && err);
|
||||||
PgfIndexFn* clo = (PgfIndexFn*) fn;
|
|
||||||
PgfCCat* ccat = *((PgfCCat**) value);
|
PgfCCat* ccat = *((PgfCCat**) value);
|
||||||
|
|
||||||
pgf_ccat_set_cnccat(ccat);
|
pgf_ccat_set_cnccat(ccat);
|
||||||
|
|
||||||
if (!gu_seq_is_null(ccat->prods)) {
|
|
||||||
size_t n_prods = gu_seq_length(ccat->prods);
|
|
||||||
for (size_t i = 0; i < n_prods; i++) {
|
|
||||||
PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i);
|
|
||||||
pgf_lzr_index(clo->concr, ccat, prod, clo->pool);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void*
|
static void*
|
||||||
@@ -689,30 +682,39 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool,
|
|||||||
{
|
{
|
||||||
(void) (type && size_out);
|
(void) (type && size_out);
|
||||||
PgfConcr* concr = gu_new(PgfConcr, pool);
|
PgfConcr* concr = gu_new(PgfConcr, pool);
|
||||||
|
rdr->curr_concr = concr;
|
||||||
concr->cflags =
|
concr->cflags =
|
||||||
pgf_read_new(rdr, gu_type(PgfFlags), pool, NULL);
|
pgf_read_new(rdr, gu_type(PgfFlags), pool, NULL);
|
||||||
concr->printnames =
|
concr->printnames =
|
||||||
pgf_read_new(rdr, gu_type(PgfPrintNames), pool, NULL);
|
pgf_read_new(rdr, gu_type(PgfPrintNames), pool, NULL);
|
||||||
concr->sequences =
|
concr->sequences =
|
||||||
pgf_read_new(rdr, gu_type(PgfSequences), rdr->opool, NULL);
|
pgf_read_new(rdr, gu_type(PgfSequences), pool, NULL);
|
||||||
concr->cncfuns =
|
concr->cncfuns =
|
||||||
pgf_read_new(rdr, gu_type(PgfCncFuns), pool, NULL);
|
pgf_read_new(rdr, gu_type(PgfCncFuns), pool, NULL);
|
||||||
GuMapType* lindefs_t = gu_type_cast(gu_type(PgfLinDefs), GuMap);
|
GuMapType* lindefs_t = gu_type_cast(gu_type(PgfLinDefs), GuMap);
|
||||||
rdr->curr_lindefs = gu_map_type_make(lindefs_t, pool);
|
rdr->curr_lindefs = gu_map_type_make(lindefs_t, pool);
|
||||||
pgf_read_into_map(lindefs_t, rdr, rdr->curr_lindefs, rdr->opool);
|
pgf_read_into_map(lindefs_t, rdr, rdr->curr_lindefs);
|
||||||
GuMapType* ccats_t = gu_type_cast(gu_type(PgfCCatMap), GuMap);
|
GuMapType* ccats_t = gu_type_cast(gu_type(PgfCCatMap), GuMap);
|
||||||
concr->ccats =
|
concr->ccats =
|
||||||
gu_new_int_map(PgfCCat*, &gu_null_struct, pool);
|
gu_new_int_map(PgfCCat*, &gu_null_struct, pool);
|
||||||
concr->fun_indices = gu_map_type_new(PgfFunIndices, pool);
|
concr->fun_indices = gu_map_type_new(PgfFunIndices, pool);
|
||||||
concr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool);
|
concr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool);
|
||||||
rdr->curr_ccats = concr->ccats;
|
concr->lexicon_idx = gu_map_type_new(PgfLexiconIdx, pool);
|
||||||
pgf_read_into_map(ccats_t, rdr, concr->ccats, rdr->opool);
|
concr->epsilon_idx = gu_new_buf(struct PgfItemBase*, pool);
|
||||||
concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap),
|
pgf_read_into_map(ccats_t, rdr, concr->ccats);
|
||||||
rdr->opool, NULL);
|
concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap), pool, NULL);
|
||||||
concr->max_fid = pgf_read_int(rdr);
|
concr->max_fid = pgf_read_int(rdr);
|
||||||
|
|
||||||
PgfIndexFn clo = { { pgf_read_ccat_cb }, concr, pool };
|
GuMapItor fn = { pgf_read_ccat_cb };
|
||||||
gu_map_iter(concr->ccats, &clo.fn, NULL);
|
gu_map_iter(concr->ccats, &fn, NULL);
|
||||||
|
|
||||||
|
// set the function ids
|
||||||
|
int n_funs = gu_list_length(concr->cncfuns);
|
||||||
|
for (int funid = 0; funid < n_funs; funid++) {
|
||||||
|
PgfCncFun* cncfun = gu_list_index(concr->cncfuns, funid);
|
||||||
|
cncfun->funid = funid;
|
||||||
|
}
|
||||||
|
|
||||||
return concr;
|
return concr;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -737,7 +739,7 @@ pgf_read_new_PgfCncCat(GuType* type, PgfReader* rdr, GuPool* pool,
|
|||||||
|
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
int fid = first + i;
|
int fid = first + i;
|
||||||
PgfCCat* ccat = gu_map_get(rdr->curr_ccats, &fid, PgfCCat*);
|
PgfCCat* ccat = gu_map_get(rdr->curr_concr->ccats, &fid, PgfCCat*);
|
||||||
if (!ccat) {
|
if (!ccat) {
|
||||||
ccat = gu_new(PgfCCat, rdr->opool);
|
ccat = gu_new(PgfCCat, rdr->opool);
|
||||||
ccat->cnccat = NULL;
|
ccat->cnccat = NULL;
|
||||||
@@ -745,7 +747,7 @@ pgf_read_new_PgfCncCat(GuType* type, PgfReader* rdr, GuPool* pool,
|
|||||||
ccat->prods = gu_null_seq;
|
ccat->prods = gu_null_seq;
|
||||||
ccat->fid = fid;
|
ccat->fid = fid;
|
||||||
|
|
||||||
gu_map_put(rdr->curr_ccats, &fid, PgfCCat*, ccat);
|
gu_map_put(rdr->curr_concr->ccats, &fid, PgfCCat*, ccat);
|
||||||
}
|
}
|
||||||
gu_list_index(cnccat->cats, i) = ccat;
|
gu_list_index(cnccat->cats, i) = ccat;
|
||||||
|
|
||||||
@@ -806,9 +808,7 @@ pgf_read_new_table = GU_TYPETABLE(
|
|||||||
PGF_READ_NEW(GuList),
|
PGF_READ_NEW(GuList),
|
||||||
PGF_READ_NEW(PgfCCat),
|
PGF_READ_NEW(PgfCCat),
|
||||||
PGF_READ_NEW(PgfCncCat),
|
PGF_READ_NEW(PgfCncCat),
|
||||||
PGF_READ_NEW(PgfConcr),
|
PGF_READ_NEW(PgfConcr)
|
||||||
PGF_READ_NEW_FN(PgfSequences, pgf_read_new_idarray),
|
|
||||||
PGF_READ_NEW_FN(PgfCncFuns, pgf_read_new_idarray)
|
|
||||||
);
|
);
|
||||||
|
|
||||||
static PgfReader*
|
static PgfReader*
|
||||||
@@ -819,8 +819,7 @@ pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err)
|
|||||||
rdr->symtab = gu_new_symtable(opool, tmp_pool);
|
rdr->symtab = gu_new_symtable(opool, tmp_pool);
|
||||||
rdr->err = err;
|
rdr->err = err;
|
||||||
rdr->in = in;
|
rdr->in = in;
|
||||||
rdr->curr_sequences = NULL;
|
rdr->curr_concr = NULL;
|
||||||
rdr->curr_cncfuns = NULL;
|
|
||||||
rdr->read_to_map = gu_new_type_map(&pgf_read_to_table, tmp_pool);
|
rdr->read_to_map = gu_new_type_map(&pgf_read_to_table, tmp_pool);
|
||||||
rdr->read_new_map = gu_new_type_map(&pgf_read_new_table, tmp_pool);
|
rdr->read_new_map = gu_new_type_map(&pgf_read_new_table, tmp_pool);
|
||||||
return rdr;
|
return rdr;
|
||||||
|
|||||||
Reference in New Issue
Block a user