forked from GitHub/gf-core
a new unbiased statistical parser. it is still far from perfect use it on your own risk.
This commit is contained in:
@@ -125,7 +125,7 @@ struct PgfPGF {
|
||||
extern GU_DECLARE_TYPE(PgfPGF, struct);
|
||||
|
||||
typedef struct {
|
||||
double prob;
|
||||
float prob;
|
||||
PgfExpr expr;
|
||||
} PgfExprProb;
|
||||
|
||||
@@ -148,6 +148,9 @@ struct PgfCatFun {
|
||||
struct PgfCat {
|
||||
// TODO: Add cid here
|
||||
PgfHypos context;
|
||||
|
||||
float meta_prob;
|
||||
|
||||
GuLength n_functions;
|
||||
PgfCatFun functions[]; // XXX: resolve to PgfFunDecl*?
|
||||
};
|
||||
@@ -189,6 +192,7 @@ struct PgfCCat {
|
||||
PgfFunIds* lindefs;
|
||||
size_t n_synprods;
|
||||
PgfProductionSeq prods;
|
||||
float viterbi_prob;
|
||||
int fid;
|
||||
};
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool)
|
||||
if (gu_exn_is_raised(err))
|
||||
goto stop;
|
||||
|
||||
if (lexer->ucs == '.' && counter < 3) {
|
||||
if (lexer->ucs == '.' && counter < 4) {
|
||||
// perhaps an abreviation
|
||||
gu_ucs_write(lexer->ucs, wtr, err);
|
||||
if (gu_exn_is_raised(err))
|
||||
|
||||
@@ -235,6 +235,8 @@ pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
|
||||
lit_str->val = gu_string_buf_freeze(sbuf, pool);
|
||||
|
||||
*out_ep = ep;
|
||||
} else {
|
||||
*out_ep = NULL;
|
||||
}
|
||||
|
||||
gu_pool_free(tmp_pool);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,7 +15,7 @@
|
||||
* @todo HOAS, dependent types...
|
||||
*/
|
||||
|
||||
typedef struct PgfParse PgfParse;
|
||||
typedef struct PgfParseState PgfParseState;
|
||||
|
||||
/** @}
|
||||
*
|
||||
@@ -32,8 +32,9 @@ typedef struct PgfParse PgfParse;
|
||||
*/
|
||||
|
||||
/// Begin parsing
|
||||
PgfParse*
|
||||
pgf_parser_parse(PgfConcr* concr, PgfCId cat, size_t lin_idx, GuPool* pool);
|
||||
PgfParseState*
|
||||
pgf_parser_init_state(PgfConcr* concr, PgfCId cat, size_t lin_idx,
|
||||
GuPool* pool);
|
||||
/**<
|
||||
* @param parser The parser to use
|
||||
*
|
||||
@@ -48,8 +49,9 @@ pgf_parser_parse(PgfConcr* concr, PgfCId cat, size_t lin_idx, GuPool* pool);
|
||||
|
||||
|
||||
/// Feed a token to the parser
|
||||
PgfParse*
|
||||
pgf_parse_token(PgfParse* parse, PgfToken tok, bool robust, GuPool* pool);
|
||||
PgfParseState*
|
||||
pgf_parser_next_state(PgfParseState* prev, PgfToken tok,
|
||||
GuPool* pool);
|
||||
/**<
|
||||
* @param parse The current parse state
|
||||
*
|
||||
@@ -87,7 +89,7 @@ typedef GuEnum PgfExprEnum;
|
||||
|
||||
/// Retrieve the current parses from the parse state.
|
||||
PgfExprEnum*
|
||||
pgf_parse_result(PgfParse* parse, GuPool* pool);
|
||||
pgf_parse_result(PgfParseState* state, GuPool* pool);
|
||||
/**<
|
||||
* @param parse A parse state
|
||||
*
|
||||
@@ -101,7 +103,7 @@ pgf_parse_result(PgfParse* parse, GuPool* pool);
|
||||
*/
|
||||
|
||||
PgfExpr
|
||||
pgf_parse_best_result(PgfParse* parse, GuPool* pool);
|
||||
pgf_parse_best_result(PgfParseState* state, GuPool* pool);
|
||||
|
||||
|
||||
int
|
||||
|
||||
@@ -40,7 +40,7 @@ pgf_print_cat(GuMapItor* fn, const void* key, void* value,
|
||||
pgf_print_hypo(hypo, 4, wtr, err);
|
||||
}
|
||||
|
||||
gu_puts(" ;\n", wtr, err);
|
||||
gu_printf(wtr, err, " ; -- %f\n",cat->meta_prob);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -56,7 +56,7 @@ pgf_print_absfun(GuMapItor* fn, const void* key, void* value,
|
||||
gu_string_write(name, wtr, err);
|
||||
gu_puts(" : ", wtr, err);
|
||||
pgf_print_type(fun->type, 0, wtr, err);
|
||||
gu_puts(" ;\n", wtr, err);
|
||||
gu_printf(wtr, err, " ; -- %f\n", fun->ep.prob);
|
||||
}
|
||||
static void
|
||||
pgf_print_abstract(PgfCId absname, PgfAbstr* abstr,
|
||||
|
||||
@@ -34,7 +34,6 @@
|
||||
#define GU_LOG_ENABLE
|
||||
#include <gu/log.h>
|
||||
|
||||
typedef struct PgfIdContext PgfIdContext;
|
||||
|
||||
typedef GuMap PgfContsMap;
|
||||
|
||||
@@ -443,6 +442,7 @@ pgf_read_to_PgfCCatId(GuType* type, PgfReader* rdr, void* to)
|
||||
ccat->lindefs = gu_map_get(rdr->curr_lindefs, &fid, PgfFunIds*);
|
||||
ccat->n_synprods = 0;
|
||||
ccat->prods = gu_null_seq;
|
||||
ccat->viterbi_prob = 0;
|
||||
ccat->fid = fid;
|
||||
|
||||
gu_map_put(rdr->curr_concr->ccats, &fid, PgfCCat*, ccat);
|
||||
@@ -465,6 +465,7 @@ pgf_read_to_PgfCCat(GuType* type, PgfReader* rdr, void* to)
|
||||
ccat->cnccat = NULL;
|
||||
ccat->lindefs = gu_map_get(rdr->curr_lindefs, fidp, PgfFunIds*);
|
||||
ccat->prods = gu_new_seq(PgfProduction, n_prods, rdr->opool);
|
||||
ccat->viterbi_prob = 0;
|
||||
ccat->fid = *fidp;
|
||||
|
||||
size_t top = 0;
|
||||
@@ -600,7 +601,7 @@ pgf_read_new_PgfFunDecl(GuType* type, PgfReader* rdr, GuPool* pool, size_t* size
|
||||
}
|
||||
|
||||
absfun->ep.prob = - log(gu_in_f64be(rdr->in, rdr->err));
|
||||
|
||||
|
||||
PgfExprFun* expr_fun =
|
||||
gu_new_variant(PGF_EXPR_FUN,
|
||||
PgfExprFun,
|
||||
@@ -638,11 +639,33 @@ pgf_read_to_PgfFunId(GuType* type, PgfReader* rdr, void* to)
|
||||
*(PgfFunId*) to = gu_list_elems(rdr->curr_concr->cncfuns)[id];
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
GuMapItor fn;
|
||||
PgfReader* rdr;
|
||||
} PgfIndexFn;
|
||||
|
||||
static void
|
||||
pgf_compute_meta_probs(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
{
|
||||
(void) (key && err);
|
||||
|
||||
PgfCat* cat = *((PgfCat**) value);
|
||||
|
||||
double mass = 0;
|
||||
for (size_t i = 0; i < cat->n_functions; i++) {
|
||||
mass += cat->functions[i].prob;
|
||||
}
|
||||
cat->meta_prob = - log(fabs(1 - mass));
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_read_to_PgfAbstr(GuType* type, PgfReader* rdr, void* to)
|
||||
{
|
||||
rdr->curr_abstr = to;
|
||||
pgf_read_to_struct(type, rdr, to);
|
||||
|
||||
PgfIndexFn clo = { { pgf_compute_meta_probs }, rdr };
|
||||
gu_map_iter(rdr->curr_abstr->cats, &clo.fn, NULL);
|
||||
}
|
||||
|
||||
static GU_DEFINE_TYPE(PgfLinDefs, GuIntMap, gu_ptr_type(PgfFunIds),
|
||||
@@ -691,11 +714,6 @@ pgf_ccat_set_cnccat(PgfCCat* ccat)
|
||||
return ccat->cnccat;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
GuMapItor fn;
|
||||
PgfReader* rdr;
|
||||
} PgfIndexFn;
|
||||
|
||||
static void
|
||||
pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
{
|
||||
@@ -771,12 +789,6 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool,
|
||||
concr->total_cats = pgf_read_int(rdr);
|
||||
concr->max_fid = concr->total_cats;
|
||||
|
||||
PgfIndexFn clo1 = { { pgf_read_ccat_cb }, rdr };
|
||||
gu_map_iter(concr->ccats, &clo1.fn, NULL);
|
||||
|
||||
PgfIndexFn clo2 = { { pgf_index_prods }, rdr };
|
||||
gu_map_iter(concr->ccats, &clo2.fn, NULL);
|
||||
|
||||
// set the function ids
|
||||
int n_funs = gu_list_length(concr->cncfuns);
|
||||
for (int funid = 0; funid < n_funs; funid++) {
|
||||
@@ -788,6 +800,12 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool,
|
||||
cncfun->ep = (absfun == NULL) ? NULL : &absfun->ep;
|
||||
}
|
||||
|
||||
PgfIndexFn clo1 = { { pgf_read_ccat_cb }, rdr };
|
||||
gu_map_iter(concr->ccats, &clo1.fn, NULL);
|
||||
|
||||
PgfIndexFn clo2 = { { pgf_index_prods }, rdr };
|
||||
gu_map_iter(concr->ccats, &clo2.fn, NULL);
|
||||
|
||||
return concr;
|
||||
}
|
||||
|
||||
@@ -821,6 +839,7 @@ pgf_read_new_PgfCncCat(GuType* type, PgfReader* rdr, GuPool* pool,
|
||||
ccat->lindefs = gu_map_get(rdr->curr_lindefs, &fid, PgfFunIds*);
|
||||
ccat->n_synprods = 0;
|
||||
ccat->prods = gu_null_seq;
|
||||
ccat->viterbi_prob = 0;
|
||||
ccat->fid = fid;
|
||||
|
||||
gu_map_put(rdr->curr_concr->ccats, &fid, PgfCCat*, ccat);
|
||||
|
||||
Reference in New Issue
Block a user