mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-23 18:02:54 -06:00
a major refactoring in the robust parser: bottom-up filtering and garbage collection for the chart
This commit is contained in:
@@ -251,6 +251,12 @@ gu_map_find_key(GuMap* map, const void* key)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
gu_map_has(GuMap* ht, const void* key)
|
||||||
|
{
|
||||||
|
size_t idx;
|
||||||
|
return gu_map_lookup(ht, key, &idx);
|
||||||
|
}
|
||||||
|
|
||||||
void*
|
void*
|
||||||
gu_map_insert(GuMap* map, const void* key)
|
gu_map_insert(GuMap* map, const void* key)
|
||||||
|
|||||||
@@ -52,12 +52,8 @@ gu_map_find(GuMap* ht, const void* key);
|
|||||||
const void*
|
const void*
|
||||||
gu_map_find_key(GuMap* ht, const void* key);
|
gu_map_find_key(GuMap* ht, const void* key);
|
||||||
|
|
||||||
static inline bool
|
bool
|
||||||
gu_map_has(GuMap* ht, const void* key)
|
gu_map_has(GuMap* ht, const void* key);
|
||||||
{
|
|
||||||
return gu_map_find_key(ht, key) != NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void*
|
void*
|
||||||
gu_map_insert(GuMap* ht, const void* key);
|
gu_map_insert(GuMap* ht, const void* key);
|
||||||
|
|||||||
@@ -193,6 +193,8 @@ struct PgfAlternative {
|
|||||||
* form. */
|
* form. */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef struct PgfItemConts PgfItemConts;
|
||||||
|
|
||||||
struct PgfCCat {
|
struct PgfCCat {
|
||||||
PgfCncCat* cnccat;
|
PgfCncCat* cnccat;
|
||||||
PgfFunIds* lindefs;
|
PgfFunIds* lindefs;
|
||||||
@@ -200,6 +202,7 @@ struct PgfCCat {
|
|||||||
PgfProductionSeq prods;
|
PgfProductionSeq prods;
|
||||||
float viterbi_prob;
|
float viterbi_prob;
|
||||||
int fid;
|
int fid;
|
||||||
|
PgfItemConts* conts;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern PgfCCat pgf_ccat_string, pgf_ccat_int, pgf_ccat_float, pgf_ccat_var;
|
extern PgfCCat pgf_ccat_string, pgf_ccat_int, pgf_ccat_float, pgf_ccat_var;
|
||||||
@@ -213,11 +216,14 @@ extern GU_DECLARE_TYPE(PgfFunIndices, GuStringMap);
|
|||||||
typedef GuMap PgfCoerceIdx;
|
typedef GuMap PgfCoerceIdx;
|
||||||
extern GU_DECLARE_TYPE(PgfCoerceIdx, GuMap);
|
extern GU_DECLARE_TYPE(PgfCoerceIdx, GuMap);
|
||||||
|
|
||||||
typedef GuStringMap PgfTransitions;
|
typedef GuMap PgfProductionIdx;
|
||||||
extern GU_DECLARE_TYPE(PgfTransitions, GuStringMap);
|
extern GU_DECLARE_TYPE(PgfProductionIdx, GuMap);
|
||||||
|
|
||||||
typedef GuMap PgfEpsilonIdx;
|
typedef GuMap PgfLeftcornerCatIdx;
|
||||||
extern GU_DECLARE_TYPE(PgfEpsilonIdx, GuMap);
|
extern GU_DECLARE_TYPE(PgfLeftcornerCatIdx, GuMap);
|
||||||
|
|
||||||
|
typedef GuMap PgfLeftcornerTokIdx;
|
||||||
|
extern GU_DECLARE_TYPE(PgfLeftcornerTokIdx, GuMap);
|
||||||
|
|
||||||
typedef struct PgfLiteralCallback PgfLiteralCallback;
|
typedef struct PgfLiteralCallback PgfLiteralCallback;
|
||||||
extern GU_DECLARE_TYPE(PgfLiteralCallback, struct);
|
extern GU_DECLARE_TYPE(PgfLiteralCallback, struct);
|
||||||
@@ -238,15 +244,14 @@ struct PgfConcr {
|
|||||||
GuMap* ccats;
|
GuMap* ccats;
|
||||||
PgfFunIndices* fun_indices;
|
PgfFunIndices* fun_indices;
|
||||||
PgfCoerceIdx* coerce_idx;
|
PgfCoerceIdx* coerce_idx;
|
||||||
PgfTransitions* lexicon_idx;
|
PgfProductionIdx* epsilon_idx;
|
||||||
PgfEpsilonIdx* epsilon_idx;
|
PgfLeftcornerCatIdx* leftcorner_cat_idx;
|
||||||
|
PgfLeftcornerTokIdx* leftcorner_tok_idx;
|
||||||
PgfCncFuns* cncfuns;
|
PgfCncFuns* cncfuns;
|
||||||
PgfSequences* sequences;
|
PgfSequences* sequences;
|
||||||
PgfCIdMap* cnccats;
|
PgfCIdMap* cnccats;
|
||||||
PgfCallbacksMap* callbacks;
|
PgfCallbacksMap* callbacks;
|
||||||
int total_cats;
|
int total_cats;
|
||||||
int max_fid;
|
|
||||||
int item_quota;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
extern GU_DECLARE_TYPE(PgfConcr, struct);
|
extern GU_DECLARE_TYPE(PgfConcr, struct);
|
||||||
|
|||||||
@@ -152,11 +152,29 @@ pgf_lzr_add_infer_entry(
|
|||||||
gu_buf_push(entries, PgfLinInferEntry, entry);
|
gu_buf_push(entries, PgfLinInferEntry, entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
GuMapItor fn;
|
||||||
|
PgfConcr* concr;
|
||||||
|
GuPool* pool;
|
||||||
|
} PgfLzrIndexFn;
|
||||||
|
|
||||||
void
|
static void
|
||||||
pgf_lzr_index(PgfConcr* concr, PgfCCat* ccat, PgfProduction prod,
|
pgf_lzr_index_iter(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||||
GuPool *pool)
|
|
||||||
{
|
{
|
||||||
|
(void) (key && err);
|
||||||
|
|
||||||
|
PgfLzrIndexFn* clo = (PgfLzrIndexFn*) fn;
|
||||||
|
PgfCCat* ccat = *((PgfCCat**) value);
|
||||||
|
PgfConcr *concr = clo->concr;
|
||||||
|
GuPool *pool = clo->pool;
|
||||||
|
|
||||||
|
if (gu_seq_is_null(ccat->prods))
|
||||||
|
return;
|
||||||
|
|
||||||
|
size_t n_prods = gu_seq_length(ccat->prods);
|
||||||
|
for (size_t i = 0; i < n_prods; i++) {
|
||||||
|
PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i);
|
||||||
|
|
||||||
void* data = gu_variant_data(prod);
|
void* data = gu_variant_data(prod);
|
||||||
switch (gu_variant_tag(prod)) {
|
switch (gu_variant_tag(prod)) {
|
||||||
case PGF_PRODUCTION_APPLY: {
|
case PGF_PRODUCTION_APPLY: {
|
||||||
@@ -190,6 +208,15 @@ pgf_lzr_index(PgfConcr* concr, PgfCCat* ccat, PgfProduction prod,
|
|||||||
// Display warning?
|
// Display warning?
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_lzr_index(PgfConcr* concr, GuPool *pool)
|
||||||
|
{
|
||||||
|
PgfLzrIndexFn clo = { { pgf_lzr_index_iter }, concr, pool };
|
||||||
|
gu_map_iter(concr->ccats, &clo.fn, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct PgfLzn PgfLzn;
|
typedef struct PgfLzn PgfLzn;
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -74,13 +74,6 @@ pgf_load_meta_child_probs(PgfPGF*, const char* fpath, GuPool* pool);
|
|||||||
|
|
||||||
typedef struct PgfConcr PgfConcr;
|
typedef struct PgfConcr PgfConcr;
|
||||||
|
|
||||||
void
|
|
||||||
pgf_set_item_quota(PgfConcr* concr, int quota);
|
|
||||||
|
|
||||||
int
|
|
||||||
pgf_get_item_quota(PgfConcr* concr);
|
|
||||||
|
|
||||||
|
|
||||||
#include <gu/type.h>
|
#include <gu/type.h>
|
||||||
extern GU_DECLARE_TYPE(PgfPGF, struct);
|
extern GU_DECLARE_TYPE(PgfPGF, struct);
|
||||||
|
|
||||||
|
|||||||
@@ -36,8 +36,6 @@
|
|||||||
#include <gu/log.h>
|
#include <gu/log.h>
|
||||||
|
|
||||||
|
|
||||||
typedef GuMap PgfContsMap;
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// PgfReader
|
// PgfReader
|
||||||
//
|
//
|
||||||
@@ -53,7 +51,6 @@ struct PgfReader {
|
|||||||
PgfAbstr* curr_abstr;
|
PgfAbstr* curr_abstr;
|
||||||
PgfConcr* curr_concr;
|
PgfConcr* curr_concr;
|
||||||
GuMap* curr_lindefs;
|
GuMap* curr_lindefs;
|
||||||
PgfContsMap* curr_conts_map; // used temporary for building the bu index for the parser
|
|
||||||
GuTypeMap* read_to_map;
|
GuTypeMap* read_to_map;
|
||||||
GuTypeMap* read_new_map;
|
GuTypeMap* read_new_map;
|
||||||
void* curr_key;
|
void* curr_key;
|
||||||
@@ -445,6 +442,7 @@ pgf_read_to_PgfCCatId(GuType* type, PgfReader* rdr, void* to)
|
|||||||
ccat->prods = gu_null_seq;
|
ccat->prods = gu_null_seq;
|
||||||
ccat->viterbi_prob = 0;
|
ccat->viterbi_prob = 0;
|
||||||
ccat->fid = fid;
|
ccat->fid = fid;
|
||||||
|
ccat->conts = NULL;
|
||||||
|
|
||||||
gu_map_put(rdr->curr_concr->ccats, &fid, PgfCCat*, ccat);
|
gu_map_put(rdr->curr_concr->ccats, &fid, PgfCCat*, ccat);
|
||||||
}
|
}
|
||||||
@@ -468,6 +466,7 @@ pgf_read_to_PgfCCat(GuType* type, PgfReader* rdr, void* to)
|
|||||||
ccat->prods = gu_new_seq(PgfProduction, n_prods, rdr->opool);
|
ccat->prods = gu_new_seq(PgfProduction, n_prods, rdr->opool);
|
||||||
ccat->viterbi_prob = 0;
|
ccat->viterbi_prob = 0;
|
||||||
ccat->fid = *fidp;
|
ccat->fid = *fidp;
|
||||||
|
ccat->conts = NULL;
|
||||||
|
|
||||||
size_t top = 0;
|
size_t top = 0;
|
||||||
size_t bot = n_prods-1;
|
size_t bot = n_prods-1;
|
||||||
@@ -716,6 +715,9 @@ pgf_ccat_set_cnccat(PgfCCat* ccat)
|
|||||||
return ccat->cnccat;
|
return ccat->cnccat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern float
|
||||||
|
pgf_ccat_set_viterbi_prob(PgfCCat* ccat);
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||||
{
|
{
|
||||||
@@ -723,41 +725,14 @@ pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
|||||||
PgfCCat* ccat = *((PgfCCat**) value);
|
PgfCCat* ccat = *((PgfCCat**) value);
|
||||||
|
|
||||||
pgf_ccat_set_cnccat(ccat);
|
pgf_ccat_set_cnccat(ccat);
|
||||||
|
// pgf_ccat_set_viterbi_prob(ccat);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern GU_DECLARE_TYPE(PgfContsMap, GuMap);
|
void
|
||||||
|
pgf_parser_index(PgfConcr* concr, GuPool *pool);
|
||||||
|
|
||||||
void
|
void
|
||||||
pgf_parser_bu_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod,
|
pgf_lzr_index(PgfConcr* concr, GuPool *pool);
|
||||||
PgfContsMap* conts_map,
|
|
||||||
GuPool *pool, GuPool *tmp_pool);
|
|
||||||
|
|
||||||
void
|
|
||||||
pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod,
|
|
||||||
GuPool *pool);
|
|
||||||
|
|
||||||
static void
|
|
||||||
pgf_index_prods(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
|
||||||
{
|
|
||||||
(void) (key && err);
|
|
||||||
|
|
||||||
PgfIndexFn* clo = (PgfIndexFn*) fn;
|
|
||||||
PgfCCat* ccat = *((PgfCCat**) value);
|
|
||||||
PgfReader *rdr = clo->rdr;
|
|
||||||
|
|
||||||
if (gu_seq_is_null(ccat->prods))
|
|
||||||
return;
|
|
||||||
|
|
||||||
size_t n_prods = gu_seq_length(ccat->prods);
|
|
||||||
for (size_t i = 0; i < n_prods; i++) {
|
|
||||||
PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i);
|
|
||||||
|
|
||||||
pgf_parser_bu_index(rdr->curr_concr, ccat, prod,
|
|
||||||
rdr->curr_conts_map,
|
|
||||||
rdr->opool, rdr->tmp_pool);
|
|
||||||
pgf_lzr_index(rdr->curr_concr, ccat, prod, rdr->opool);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void*
|
static void*
|
||||||
pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool,
|
pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool,
|
||||||
@@ -777,20 +752,18 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool,
|
|||||||
GuMapType* lindefs_t = gu_type_cast(gu_type(PgfLinDefs), GuMap);
|
GuMapType* lindefs_t = gu_type_cast(gu_type(PgfLinDefs), GuMap);
|
||||||
rdr->curr_lindefs = gu_map_type_make(lindefs_t, rdr->tmp_pool);
|
rdr->curr_lindefs = gu_map_type_make(lindefs_t, rdr->tmp_pool);
|
||||||
pgf_read_into_map(lindefs_t, rdr, rdr->curr_lindefs);
|
pgf_read_into_map(lindefs_t, rdr, rdr->curr_lindefs);
|
||||||
rdr->curr_conts_map = gu_map_type_new(PgfContsMap, rdr->tmp_pool);
|
|
||||||
GuMapType* ccats_t = gu_type_cast(gu_type(PgfCCatMap), GuMap);
|
GuMapType* ccats_t = gu_type_cast(gu_type(PgfCCatMap), GuMap);
|
||||||
concr->ccats =
|
concr->ccats =
|
||||||
gu_new_int_map(PgfCCat*, &gu_null_struct, pool);
|
gu_new_int_map(PgfCCat*, &gu_null_struct, pool);
|
||||||
concr->fun_indices = gu_map_type_new(PgfFunIndices, pool);
|
concr->fun_indices = gu_map_type_new(PgfFunIndices, pool);
|
||||||
concr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool);
|
concr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool);
|
||||||
concr->lexicon_idx = gu_map_type_new(PgfTransitions,pool);
|
concr->epsilon_idx = gu_map_type_new(PgfProductionIdx, pool);
|
||||||
concr->epsilon_idx = gu_map_type_new(PgfEpsilonIdx, pool);
|
concr->leftcorner_cat_idx = gu_map_type_new(PgfLeftcornerCatIdx,pool);
|
||||||
|
concr->leftcorner_tok_idx = gu_map_type_new(PgfLeftcornerTokIdx,pool);
|
||||||
pgf_read_into_map(ccats_t, rdr, concr->ccats);
|
pgf_read_into_map(ccats_t, rdr, concr->ccats);
|
||||||
concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap), pool, NULL);
|
concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap), pool, NULL);
|
||||||
concr->callbacks = pgf_new_callbacks_map(concr, pool);
|
concr->callbacks = pgf_new_callbacks_map(concr, pool);
|
||||||
concr->total_cats = pgf_read_int(rdr);
|
concr->total_cats = pgf_read_int(rdr);
|
||||||
concr->max_fid = concr->total_cats;
|
|
||||||
concr->item_quota = 2000000;
|
|
||||||
|
|
||||||
// set the function ids
|
// set the function ids
|
||||||
int n_funs = gu_list_length(concr->cncfuns);
|
int n_funs = gu_list_length(concr->cncfuns);
|
||||||
@@ -806,8 +779,8 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool,
|
|||||||
PgfIndexFn clo1 = { { pgf_read_ccat_cb }, rdr };
|
PgfIndexFn clo1 = { { pgf_read_ccat_cb }, rdr };
|
||||||
gu_map_iter(concr->ccats, &clo1.fn, NULL);
|
gu_map_iter(concr->ccats, &clo1.fn, NULL);
|
||||||
|
|
||||||
PgfIndexFn clo2 = { { pgf_index_prods }, rdr };
|
pgf_parser_index(concr, pool);
|
||||||
gu_map_iter(concr->ccats, &clo2.fn, NULL);
|
pgf_lzr_index(concr, pool);
|
||||||
|
|
||||||
return concr;
|
return concr;
|
||||||
}
|
}
|
||||||
@@ -844,6 +817,7 @@ pgf_read_new_PgfCncCat(GuType* type, PgfReader* rdr, GuPool* pool,
|
|||||||
ccat->prods = gu_null_seq;
|
ccat->prods = gu_null_seq;
|
||||||
ccat->viterbi_prob = 0;
|
ccat->viterbi_prob = 0;
|
||||||
ccat->fid = fid;
|
ccat->fid = fid;
|
||||||
|
ccat->conts = NULL;
|
||||||
|
|
||||||
gu_map_put(rdr->curr_concr->ccats, &fid, PgfCCat*, ccat);
|
gu_map_put(rdr->curr_concr->ccats, &fid, PgfCCat*, ccat);
|
||||||
}
|
}
|
||||||
@@ -922,7 +896,6 @@ pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err)
|
|||||||
rdr->curr_abstr = NULL;
|
rdr->curr_abstr = NULL;
|
||||||
rdr->curr_concr = NULL;
|
rdr->curr_concr = NULL;
|
||||||
rdr->curr_lindefs = NULL;
|
rdr->curr_lindefs = NULL;
|
||||||
rdr->curr_conts_map = NULL;
|
|
||||||
rdr->read_to_map = gu_new_type_map(&pgf_read_to_table, tmp_pool);
|
rdr->read_to_map = gu_new_type_map(&pgf_read_to_table, tmp_pool);
|
||||||
rdr->read_new_map = gu_new_type_map(&pgf_read_new_table, tmp_pool);
|
rdr->read_new_map = gu_new_type_map(&pgf_read_new_table, tmp_pool);
|
||||||
return rdr;
|
return rdr;
|
||||||
@@ -989,13 +962,3 @@ pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath, GuPool* pool)
|
|||||||
fclose(fp);
|
fclose(fp);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
pgf_set_item_quota(PgfConcr* concr, int quota) {
|
|
||||||
concr->item_quota = quota > 0 ? quota : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
pgf_get_item_quota(PgfConcr* concr) {
|
|
||||||
return concr->item_quota;
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -87,7 +87,7 @@ int main(int argc, char* argv[]) {
|
|||||||
goto fail_read;
|
goto fail_read;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!pgf_load_meta_child_probs(pgf, "../../../examples/PennTreebank/ParseEngAbs2.probs", pool)) {
|
if (!pgf_load_meta_child_probs(pgf, "../../../treebanks/PennTreebank/ParseEngAbs3.probs", pool)) {
|
||||||
fprintf(stderr, "Loading meta child probs failed\n");
|
fprintf(stderr, "Loading meta child probs failed\n");
|
||||||
status = EXIT_FAILURE;
|
status = EXIT_FAILURE;
|
||||||
goto fail_read;
|
goto fail_read;
|
||||||
|
|||||||
Reference in New Issue
Block a user