libpgf: now the linearization index is created during the grammar loading which also makes the types PgfLzr and PgfParser redundant.

This commit is contained in:
kr.angelov
2012-02-18 16:22:40 +00:00
parent aed7cc429a
commit 47e5e8c966
7 changed files with 76 additions and 175 deletions

View File

@@ -182,13 +182,21 @@ extern PgfCCat pgf_ccat_string, pgf_ccat_int, pgf_ccat_float, pgf_ccat_var;
typedef PgfCIdMap PgfPrintNames;
extern GU_DECLARE_TYPE(PgfPrintNames, GuStringMap);
typedef GuStringMap PgfFunIndices;
extern GU_DECLARE_TYPE(PgfFunIndices, GuStringMap);
typedef GuMap PgfCoerceIdx;
extern GU_DECLARE_TYPE(PgfCoerceIdx, GuMap);
struct PgfConcr {
PgfFlags* cflags;
PgfPrintNames* printnames;
GuMap* ccats;
PgfCCatSeq extra_ccats;
PgfFunIndices* fun_indices;
PgfCoerceIdx* coerce_idx;
PgfCncFuns* cncfuns;
PgfSequences* sequences;
PgfSequences* sequences;
PgfCIdMap* cnccats;
int max_fid;
};

View File

@@ -105,42 +105,28 @@ static GU_DEFINE_TYPE(PgfInferMap, GuMap,
gu_ptr_type(PgfLinInfers), &gu_null_struct);
typedef GuStringMap PgfFunIndices;
static GU_DEFINE_TYPE(PgfFunIndices, GuStringMap, gu_ptr_type(PgfInferMap),
GU_DEFINE_TYPE(PgfFunIndices, GuStringMap, gu_ptr_type(PgfInferMap),
&gu_null_struct);
typedef GuBuf PgfCCatBuf;
static GU_DEFINE_TYPE(PgfCCatBuf, GuBuf, gu_ptr_type(PgfCCat));
typedef GuMap PgfCoerceIdx;
static GU_DEFINE_TYPE(PgfCoerceIdx, GuMap,
GU_DEFINE_TYPE(PgfCoerceIdx, GuMap,
gu_type(PgfCCat), NULL,
gu_ptr_type(PgfCCatBuf), &gu_null_struct);
struct PgfLzr {
PgfConcr* cnc;
GuPool* pool;
PgfFunIndices* fun_indices;
PgfCoerceIdx* coerce_idx;
};
GU_DEFINE_TYPE(
PgfLzr, struct,
GU_MEMBER_P(PgfLzr, cnc, PgfConcr),
GU_MEMBER_P(PgfLzr, fun_indices, PgfFunIndices),
GU_MEMBER_P(PgfLzr, coerce_idx, PgfCoerceIdx));
static void
pgf_lzr_add_infer_entry(PgfLzr* lzr,
pgf_lzr_add_infer_entry(
PgfInferMap* infer_table,
PgfCCat* cat,
PgfProductionApply* papply)
PgfProductionApply* papply,
GuPool *pool)
{
PgfPArgs args = papply->args;
size_t n_args = gu_seq_length(args);
PgfCCatIds* arg_cats = gu_new_list(PgfCCatIds, lzr->pool, n_args);
PgfCCatIds* arg_cats = gu_new_list(PgfCCatIds, pool, n_args);
for (size_t i = 0; i < n_args; i++) {
// XXX: What about the hypos in the args?
gu_list_index(arg_cats, i) = gu_seq_get(args, PgfPArg, i).ccat;
@@ -153,7 +139,7 @@ pgf_lzr_add_infer_entry(PgfLzr* lzr,
PgfLinInfers* entries =
gu_map_get(infer_table, &arg_cats, PgfLinInfers*);
if (!entries) {
entries = gu_new_buf(PgfLinInferEntry, lzr->pool);
entries = gu_new_buf(PgfLinInferEntry, pool);
gu_map_put(infer_table, &arg_cats, PgfLinInfers*, entries);
} else {
// XXX: arg_cats is duplicate, we ought to free it
@@ -168,32 +154,33 @@ pgf_lzr_add_infer_entry(PgfLzr* lzr,
}
static void
pgf_lzr_index(PgfLzr* lzr, PgfCCat* cat, PgfProduction prod)
void
pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod,
GuPool *pool)
{
void* data = gu_variant_data(prod);
switch (gu_variant_tag(prod)) {
case PGF_PRODUCTION_APPLY: {
PgfProductionApply* papply = data;
PgfInferMap* infer =
gu_map_get(lzr->fun_indices, &papply->fun->fun,
gu_map_get(concr->fun_indices, &papply->fun->fun,
PgfInferMap*);
gu_debug("index: %s -> %d", papply->fun->fun, cat->fid);
if (!infer) {
infer = gu_map_type_new(PgfInferMap, lzr->pool);
gu_map_put(lzr->fun_indices,
infer = gu_map_type_new(PgfInferMap, pool);
gu_map_put(concr->fun_indices,
&papply->fun->fun, PgfInferMap*, infer);
}
pgf_lzr_add_infer_entry(lzr, infer, cat, papply);
pgf_lzr_add_infer_entry(infer, cat, papply, pool);
break;
}
case PGF_PRODUCTION_COERCE: {
PgfProductionCoerce* pcoerce = data;
PgfCCatBuf* cats = gu_map_get(lzr->coerce_idx, pcoerce->coerce,
PgfCCatBuf* cats = gu_map_get(concr->coerce_idx, pcoerce->coerce,
PgfCCatBuf*);
if (!cats) {
cats = gu_new_buf(PgfCCat*, lzr->pool);
gu_map_put(lzr->coerce_idx,
cats = gu_new_buf(PgfCCat*, pool);
gu_map_put(concr->coerce_idx,
pcoerce->coerce, PgfCCatBuf*, cats);
}
gu_debug("coerce_idx: %d -> %d", pcoerce->coerce->fid, cat->fid);
@@ -206,48 +193,10 @@ pgf_lzr_index(PgfLzr* lzr, PgfCCat* cat, PgfProduction prod)
}
}
typedef struct {
GuMapItor fn;
PgfLzr* lzr;
} PgfLzrIndexFn;
static void
pgf_lzr_index_cnccat_cb(GuMapItor* fn, const void* key, void* value,
GuExn* err)
{
PgfLzrIndexFn* clo = (PgfLzrIndexFn*) fn;
PgfCCat *ccat = *((PgfCCat **) value);
gu_debug("ccat: %d", ccat->fid);
if (gu_seq_is_null(ccat->prods)) {
return;
}
size_t n_prods = gu_seq_length(ccat->prods);
for (size_t i = 0; i < n_prods; i++) {
PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i);
pgf_lzr_index(clo->lzr, ccat, prod);
}
}
PgfLzr*
pgf_new_lzr(PgfConcr* cnc, GuPool* pool)
{
PgfLzr* lzr = gu_new(PgfLzr, pool);
lzr->cnc = cnc;
lzr->pool = pool;
lzr->fun_indices = gu_map_type_new(PgfFunIndices, pool);
lzr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool);
PgfLzrIndexFn clo = { { pgf_lzr_index_cnccat_cb }, lzr };
gu_map_iter(cnc->ccats, &clo.fn, NULL);
// TODO: prune productions with zero linearizations
return lzr;
}
typedef struct PgfLzn PgfLzn;
struct PgfLzn {
PgfLzr* lzr;
PgfConcr* concr;
GuChoice* ch;
PgfExpr expr;
GuEnum en;
@@ -282,7 +231,7 @@ pgf_lzn_pick_supercat(PgfLzn* lzn, PgfCCat* cat)
gu_enter("->");
while (true) {
PgfCCatBuf* supers =
gu_map_get(lzn->lzr->coerce_idx, cat, PgfCCatBuf*);
gu_map_get(lzn->concr->coerce_idx, cat, PgfCCatBuf*);
if (!supers) {
break;
}
@@ -347,7 +296,7 @@ pgf_lzn_infer_application(PgfLzn* lzn, PgfApplication* appl,
GuPool* pool, PgfCncTree* ctree_out)
{
PgfInferMap* infer =
gu_map_get(lzn->lzr->fun_indices, &appl->fun, PgfInferMap*);
gu_map_get(lzn->concr->fun_indices, &appl->fun, PgfInferMap*);
gu_enter("-> f: %s, n_args: %d", appl->fun, appl->n_args);
if (infer == NULL) {
gu_exit("<- couldn't find f");
@@ -458,10 +407,10 @@ pgf_cnc_tree_enum_next(GuEnum* self, void* to, GuPool* pool)
}
PgfCncTreeEnum*
pgf_lzr_concretize(PgfLzr* lzr, PgfExpr expr, GuPool* pool)
pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuPool* pool)
{
PgfLzn* lzn = gu_new(PgfLzn, pool);
lzn->lzr = lzr;
lzn->concr = concr;
lzn->expr = expr;
lzn->ch = gu_new_choice(pool);
lzn->en.next = pgf_cnc_tree_enum_next;
@@ -487,7 +436,7 @@ pgf_cnc_tree_dimension(PgfCncTree ctree)
}
void
pgf_lzr_linearize(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs** fnsp)
pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs** fnsp)
{
PgfLinFuncs* fns = *fnsp;
GuVariantInfo cti = gu_variant_open(ctree);
@@ -521,7 +470,7 @@ pgf_lzr_linearize(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs** f
PgfSymbolIdx* sidx = sym_i.data;
gu_assert((unsigned) sidx->d < fapp->n_args);
PgfCncTree argf = fapp->args[sidx->d];
pgf_lzr_linearize(lzr, argf, sidx->r, fnsp);
pgf_lzr_linearize(concr, argf, sidx->r, fnsp);
break;
}
case PGF_SYMBOL_KS: {
@@ -581,7 +530,7 @@ static PgfLinFuncs pgf_file_lin_funcs = {
};
void
pgf_lzr_linearize_simple(PgfLzr* lzr, PgfCncTree ctree,
pgf_lzr_linearize_simple(PgfConcr* concr, PgfCncTree ctree,
size_t lin_idx, GuWriter* wtr, GuExn* err)
{
PgfSimpleLin flin = {
@@ -589,5 +538,5 @@ pgf_lzr_linearize_simple(PgfLzr* lzr, PgfCncTree ctree,
.wtr = wtr,
.err = err
};
pgf_lzr_linearize(lzr, ctree, lin_idx, &flin.funcs);
pgf_lzr_linearize(concr, ctree, lin_idx, &flin.funcs);
}

View File

@@ -25,40 +25,6 @@
/// Linearization of abstract syntax trees.
/// @file
/** @name Linearizers
*
* Linearization begins by choosing a concrete category (#PgfConcr) for some
* grammar, and creating a new linearizer (#PgfLzr) which can then be used to
* linearize abstract syntax trees (#PgfExpr) of that grammar into the given
* concrete category.
*
* @{
*/
/// A linearizer.
typedef struct PgfLzr PgfLzr;
/**<
*
* A #PgfLzr object transforms abstract syntax trees of a PGF grammar
* into sequences of token events for a single concrete category of
* that grammar.
*
*/
GU_DECLARE_TYPE(PgfLzr, struct);
/// Create a new linearizer.
PgfLzr*
pgf_new_lzr(PgfConcr* cnc, GuPool* pool);
/**<
* @param cnc The concrete category to linearize to.
*
* @pool
*
* @return A new linearizer.
*/
/** @}
*
* @name Enumerating concrete syntax trees
@@ -80,7 +46,7 @@ typedef GuEnum PgfCncTreeEnum;
/// Begin enumerating concrete syntax variants.
PgfCncTreeEnum*
pgf_lzr_concretize(PgfLzr* lzr, PgfExpr expr, GuPool* pool);
pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuPool* pool);
/** @}
*
@@ -127,13 +93,13 @@ struct PgfLinFuncs
/// Linearize a concrete syntax tree.
void
pgf_lzr_linearize(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx,
pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx,
PgfLinFuncs** fnsp);
/// Linearize a concrete syntax tree as space-separated tokens.
void
pgf_lzr_linearize_simple(PgfLzr* lzr, PgfCncTree ctree,
pgf_lzr_linearize_simple(PgfConcr* concr, PgfCncTree ctree,
size_t lin_idx, GuWriter* wtr, GuExn* err);

View File

@@ -18,12 +18,8 @@ typedef GuMap PgfTransitions;
typedef GuBuf PgfCCatBuf;
struct PgfParser {
PgfConcr* concr;
};
struct PgfParse {
PgfParser* parser;
PgfConcr* concr;
PgfItemBuf* agenda;
int max_fid;
};
@@ -650,10 +646,10 @@ pgf_new_parsing(PgfLexCallback* callback, int max_fid,
}
static PgfParse*
pgf_new_parse(PgfParser* parser, int max_fid, GuPool* pool)
pgf_new_parse(PgfConcr* concr, int max_fid, GuPool* pool)
{
PgfParse* parse = gu_new(PgfParse, pool);
parse->parser = parser;
parse->concr = concr;
parse->agenda = NULL;
parse->max_fid = max_fid;
return parse;
@@ -692,7 +688,7 @@ pgf_parse_token(PgfParse* parse, PgfToken tok, GuPool* pool)
PgfParse* next_parse = NULL;
if (gu_buf_length(agenda) > 0) {
next_parse = pgf_new_parse(parse->parser, parse->max_fid, pool);
next_parse = pgf_new_parse(parse->concr, parse->max_fid, pool);
next_parse->agenda = agenda;
next_parse->max_fid= parsing->max_fid;
}
@@ -805,7 +801,7 @@ pgf_parse_result(PgfParse* parse, GuPool* pool)
PgfExprEnum* en =
&gu_new_i(pool, PgfParseResult,
.concr = parse->parser->concr,
.concr = parse->concr,
.completed = parsing->completed,
.choice = gu_new_choice(pool),
.en.next = pgf_parse_result_enum_next)->en;
@@ -818,17 +814,17 @@ pgf_parse_result(PgfParse* parse, GuPool* pool)
// TODO: s/CId/Cat, add the cid to Cat, make Cat the key to CncCat
PgfParse*
pgf_parser_parse(PgfParser* parser, PgfCId cat, size_t lin_idx, GuPool* pool)
pgf_parser_parse(PgfConcr* concr, PgfCId cat, size_t lin_idx, GuPool* pool)
{
PgfCncCat* cnccat =
gu_map_get(parser->concr->cnccats, &cat, PgfCncCat*);
gu_map_get(concr->cnccats, &cat, PgfCncCat*);
if (!cnccat) {
// error ...
gu_impossible();
}
gu_assert(lin_idx < cnccat->n_lins);
PgfParse* parse = pgf_new_parse(parser, parser->concr->max_fid, pool);
PgfParse* parse = pgf_new_parse(concr, concr->max_fid, pool);
parse->agenda = gu_new_buf(PgfItem*, pool);
PgfItemBuf* conts = gu_new_buf(PgfItem*, pool);
@@ -856,12 +852,3 @@ pgf_parser_parse(PgfParser* parser, PgfCId cat, size_t lin_idx, GuPool* pool)
}
return parse;
}
PgfParser*
pgf_new_parser(PgfConcr* concr, GuPool* pool)
{
gu_require(concr != NULL);
PgfParser* parser = gu_new(PgfParser, pool);
parser->concr = concr;
return parser;
}

View File

@@ -17,30 +17,6 @@
typedef struct PgfParse PgfParse;
/** @name Creating a new parser
*
* A #PgfParser object can parse sentences of a single concrete category into
* abstract syntax trees (#PgfExpr). The parser is created with
* #pgf_new_parser.
*
* @{
*/
/// A parser for a single concrete category
typedef struct PgfParser PgfParser;
/// Create a new parser
PgfParser*
pgf_new_parser(PgfConcr* concr, GuPool* pool);
/**<
* @param concr The concrete category whose sentences are to be parsed
*
* @pool
*
* @return A newly created parser for the concrete category \p concr
*/
/** @}
*
* @name Parsing a sentence
@@ -57,7 +33,7 @@ pgf_new_parser(PgfConcr* concr, GuPool* pool);
/// Begin parsing
PgfParse*
pgf_parser_parse(PgfParser* parser, PgfCId cat, size_t lin_idx, GuPool* pool);
pgf_parser_parse(PgfConcr* concr, PgfCId cat, size_t lin_idx, GuPool* pool);
/**<
* @param parser The parser to use
*

View File

@@ -655,13 +655,32 @@ pgf_ccat_set_cnccat(PgfCCat* ccat)
return ccat->cnccat;
}
typedef struct {
GuMapItor fn;
PgfConcr* concr;
GuPool *pool;
} PgfIndexFn;
void
pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod,
GuPool *pool);
static void
pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
{
(void) (key && err);
PgfCCat** ccatp = value;
pgf_ccat_set_cnccat(*ccatp);
PgfIndexFn* clo = (PgfIndexFn*) fn;
PgfCCat* ccat = *((PgfCCat**) value);
pgf_ccat_set_cnccat(ccat);
if (!gu_seq_is_null(ccat->prods)) {
size_t n_prods = gu_seq_length(ccat->prods);
for (size_t i = 0; i < n_prods; i++) {
PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i);
pgf_lzr_index(clo->concr, ccat, prod, clo->pool);
}
}
}
static void*
@@ -684,14 +703,16 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool,
GuMapType* ccats_t = gu_type_cast(gu_type(PgfCCatMap), GuMap);
concr->ccats =
gu_new_int_map(PgfCCat*, &gu_null_struct, pool);
concr->fun_indices = gu_map_type_new(PgfFunIndices, pool);
concr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool);
rdr->curr_ccats = concr->ccats;
pgf_read_into_map(ccats_t, rdr, concr->ccats, rdr->opool);
pgf_read_into_map(ccats_t, rdr, concr->ccats, rdr->opool);
concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap),
rdr->opool, NULL);
concr->max_fid = pgf_read_int(rdr);
GuMapItor fn = { pgf_read_ccat_cb };
gu_map_iter(concr->ccats, &fn, NULL);
PgfIndexFn clo = { { pgf_read_ccat_cb }, concr, pool };
gu_map_iter(concr->ccats, &clo.fn, NULL);
return concr;
}

View File

@@ -68,12 +68,6 @@ int main(int argc, char* argv[]) {
goto fail_concr;
}
// Create the parser for the source category
PgfParser* parser = pgf_new_parser(from_concr, pool);
// Create a linearizer for the destination category
PgfLzr* lzr = pgf_new_lzr(to_concr, pool);
// Arbitrarily choose linearization index 0. Usually the initial
// categories we are interested in only have one field.
int lin_idx = 0;
@@ -110,7 +104,7 @@ int main(int argc, char* argv[]) {
// Begin parsing a sentence of the specified category
PgfParse* parse =
pgf_parser_parse(parser, cat, lin_idx, pool);
pgf_parser_parse(from_concr, cat, lin_idx, pool);
if (parse == NULL) {
fprintf(stderr, "Couldn't begin parsing\n");
status = EXIT_FAILURE;
@@ -149,7 +143,7 @@ int main(int argc, char* argv[]) {
// Enumerate the concrete syntax trees corresponding
// to the abstract tree.
GuEnum* cts = pgf_lzr_concretize(lzr, expr, ppool);
GuEnum* cts = pgf_lzr_concretize(to_concr, expr, ppool);
while (true) {
PgfCncTree ctree =
gu_next(cts, PgfCncTree, ppool);
@@ -159,7 +153,7 @@ int main(int argc, char* argv[]) {
gu_puts(" ", wtr, err);
// Linearize the concrete tree as a simple
// sequence of strings.
pgf_lzr_linearize_simple(lzr, ctree, lin_idx,
pgf_lzr_linearize_simple(to_concr , ctree, lin_idx,
wtr, err);
gu_putc('\n', wtr, err);
gu_writer_flush(wtr, err);