From 47e5e8c9663c35495419ae59ab4d1cd9709ec0e2 Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Sat, 18 Feb 2012 16:22:40 +0000 Subject: [PATCH] libpgf: now the linearization index is created during the grammar loading which also makes the types PgfLzr and PgfParser redundant. --- src/runtime/c/pgf/data.h | 10 ++- src/runtime/c/pgf/linearize.c | 103 +++++++--------------------- src/runtime/c/pgf/linearize.h | 40 +---------- src/runtime/c/pgf/parser.c | 29 +++----- src/runtime/c/pgf/parser.h | 26 +------ src/runtime/c/pgf/reader.c | 31 +++++++-- src/runtime/c/utils/pgf-translate.c | 12 +--- 7 files changed, 76 insertions(+), 175 deletions(-) diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index ea466b0c3..d97b0b49d 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -182,13 +182,21 @@ extern PgfCCat pgf_ccat_string, pgf_ccat_int, pgf_ccat_float, pgf_ccat_var; typedef PgfCIdMap PgfPrintNames; extern GU_DECLARE_TYPE(PgfPrintNames, GuStringMap); +typedef GuStringMap PgfFunIndices; +extern GU_DECLARE_TYPE(PgfFunIndices, GuStringMap); + +typedef GuMap PgfCoerceIdx; +extern GU_DECLARE_TYPE(PgfCoerceIdx, GuMap); + struct PgfConcr { PgfFlags* cflags; PgfPrintNames* printnames; GuMap* ccats; PgfCCatSeq extra_ccats; + PgfFunIndices* fun_indices; + PgfCoerceIdx* coerce_idx; PgfCncFuns* cncfuns; - PgfSequences* sequences; + PgfSequences* sequences; PgfCIdMap* cnccats; int max_fid; }; diff --git a/src/runtime/c/pgf/linearize.c b/src/runtime/c/pgf/linearize.c index 56634d9b1..77cf65813 100644 --- a/src/runtime/c/pgf/linearize.c +++ b/src/runtime/c/pgf/linearize.c @@ -105,42 +105,28 @@ static GU_DEFINE_TYPE(PgfInferMap, GuMap, gu_ptr_type(PgfLinInfers), &gu_null_struct); typedef GuStringMap PgfFunIndices; -static GU_DEFINE_TYPE(PgfFunIndices, GuStringMap, gu_ptr_type(PgfInferMap), +GU_DEFINE_TYPE(PgfFunIndices, GuStringMap, gu_ptr_type(PgfInferMap), &gu_null_struct); typedef GuBuf PgfCCatBuf; static GU_DEFINE_TYPE(PgfCCatBuf, GuBuf, gu_ptr_type(PgfCCat)); typedef GuMap PgfCoerceIdx; -static GU_DEFINE_TYPE(PgfCoerceIdx, GuMap, +GU_DEFINE_TYPE(PgfCoerceIdx, GuMap, gu_type(PgfCCat), NULL, gu_ptr_type(PgfCCatBuf), &gu_null_struct); -struct PgfLzr { - PgfConcr* cnc; - GuPool* pool; - PgfFunIndices* fun_indices; - PgfCoerceIdx* coerce_idx; -}; - -GU_DEFINE_TYPE( - PgfLzr, struct, - GU_MEMBER_P(PgfLzr, cnc, PgfConcr), - GU_MEMBER_P(PgfLzr, fun_indices, PgfFunIndices), - GU_MEMBER_P(PgfLzr, coerce_idx, PgfCoerceIdx)); - - - static void -pgf_lzr_add_infer_entry(PgfLzr* lzr, +pgf_lzr_add_infer_entry( PgfInferMap* infer_table, PgfCCat* cat, - PgfProductionApply* papply) + PgfProductionApply* papply, + GuPool *pool) { PgfPArgs args = papply->args; size_t n_args = gu_seq_length(args); - PgfCCatIds* arg_cats = gu_new_list(PgfCCatIds, lzr->pool, n_args); + PgfCCatIds* arg_cats = gu_new_list(PgfCCatIds, pool, n_args); for (size_t i = 0; i < n_args; i++) { // XXX: What about the hypos in the args? gu_list_index(arg_cats, i) = gu_seq_get(args, PgfPArg, i).ccat; @@ -153,7 +139,7 @@ pgf_lzr_add_infer_entry(PgfLzr* lzr, PgfLinInfers* entries = gu_map_get(infer_table, &arg_cats, PgfLinInfers*); if (!entries) { - entries = gu_new_buf(PgfLinInferEntry, lzr->pool); + entries = gu_new_buf(PgfLinInferEntry, pool); gu_map_put(infer_table, &arg_cats, PgfLinInfers*, entries); } else { // XXX: arg_cats is duplicate, we ought to free it @@ -168,32 +154,33 @@ pgf_lzr_add_infer_entry(PgfLzr* lzr, } -static void -pgf_lzr_index(PgfLzr* lzr, PgfCCat* cat, PgfProduction prod) +void +pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod, + GuPool *pool) { void* data = gu_variant_data(prod); switch (gu_variant_tag(prod)) { case PGF_PRODUCTION_APPLY: { PgfProductionApply* papply = data; PgfInferMap* infer = - gu_map_get(lzr->fun_indices, &papply->fun->fun, + gu_map_get(concr->fun_indices, &papply->fun->fun, PgfInferMap*); gu_debug("index: %s -> %d", papply->fun->fun, cat->fid); if (!infer) { - infer = gu_map_type_new(PgfInferMap, lzr->pool); - gu_map_put(lzr->fun_indices, + infer = gu_map_type_new(PgfInferMap, pool); + gu_map_put(concr->fun_indices, &papply->fun->fun, PgfInferMap*, infer); } - pgf_lzr_add_infer_entry(lzr, infer, cat, papply); + pgf_lzr_add_infer_entry(infer, cat, papply, pool); break; } case PGF_PRODUCTION_COERCE: { PgfProductionCoerce* pcoerce = data; - PgfCCatBuf* cats = gu_map_get(lzr->coerce_idx, pcoerce->coerce, + PgfCCatBuf* cats = gu_map_get(concr->coerce_idx, pcoerce->coerce, PgfCCatBuf*); if (!cats) { - cats = gu_new_buf(PgfCCat*, lzr->pool); - gu_map_put(lzr->coerce_idx, + cats = gu_new_buf(PgfCCat*, pool); + gu_map_put(concr->coerce_idx, pcoerce->coerce, PgfCCatBuf*, cats); } gu_debug("coerce_idx: %d -> %d", pcoerce->coerce->fid, cat->fid); @@ -206,48 +193,10 @@ pgf_lzr_index(PgfLzr* lzr, PgfCCat* cat, PgfProduction prod) } } -typedef struct { - GuMapItor fn; - PgfLzr* lzr; -} PgfLzrIndexFn; - -static void -pgf_lzr_index_cnccat_cb(GuMapItor* fn, const void* key, void* value, - GuExn* err) -{ - PgfLzrIndexFn* clo = (PgfLzrIndexFn*) fn; - PgfCCat *ccat = *((PgfCCat **) value); - - gu_debug("ccat: %d", ccat->fid); - if (gu_seq_is_null(ccat->prods)) { - return; - } - size_t n_prods = gu_seq_length(ccat->prods); - for (size_t i = 0; i < n_prods; i++) { - PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i); - pgf_lzr_index(clo->lzr, ccat, prod); - } -} - - -PgfLzr* -pgf_new_lzr(PgfConcr* cnc, GuPool* pool) -{ - PgfLzr* lzr = gu_new(PgfLzr, pool); - lzr->cnc = cnc; - lzr->pool = pool; - lzr->fun_indices = gu_map_type_new(PgfFunIndices, pool); - lzr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool); - PgfLzrIndexFn clo = { { pgf_lzr_index_cnccat_cb }, lzr }; - gu_map_iter(cnc->ccats, &clo.fn, NULL); - // TODO: prune productions with zero linearizations - return lzr; -} - typedef struct PgfLzn PgfLzn; struct PgfLzn { - PgfLzr* lzr; + PgfConcr* concr; GuChoice* ch; PgfExpr expr; GuEnum en; @@ -282,7 +231,7 @@ pgf_lzn_pick_supercat(PgfLzn* lzn, PgfCCat* cat) gu_enter("->"); while (true) { PgfCCatBuf* supers = - gu_map_get(lzn->lzr->coerce_idx, cat, PgfCCatBuf*); + gu_map_get(lzn->concr->coerce_idx, cat, PgfCCatBuf*); if (!supers) { break; } @@ -347,7 +296,7 @@ pgf_lzn_infer_application(PgfLzn* lzn, PgfApplication* appl, GuPool* pool, PgfCncTree* ctree_out) { PgfInferMap* infer = - gu_map_get(lzn->lzr->fun_indices, &appl->fun, PgfInferMap*); + gu_map_get(lzn->concr->fun_indices, &appl->fun, PgfInferMap*); gu_enter("-> f: %s, n_args: %d", appl->fun, appl->n_args); if (infer == NULL) { gu_exit("<- couldn't find f"); @@ -458,10 +407,10 @@ pgf_cnc_tree_enum_next(GuEnum* self, void* to, GuPool* pool) } PgfCncTreeEnum* -pgf_lzr_concretize(PgfLzr* lzr, PgfExpr expr, GuPool* pool) +pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuPool* pool) { PgfLzn* lzn = gu_new(PgfLzn, pool); - lzn->lzr = lzr; + lzn->concr = concr; lzn->expr = expr; lzn->ch = gu_new_choice(pool); lzn->en.next = pgf_cnc_tree_enum_next; @@ -487,7 +436,7 @@ pgf_cnc_tree_dimension(PgfCncTree ctree) } void -pgf_lzr_linearize(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs** fnsp) +pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs** fnsp) { PgfLinFuncs* fns = *fnsp; GuVariantInfo cti = gu_variant_open(ctree); @@ -521,7 +470,7 @@ pgf_lzr_linearize(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs** f PgfSymbolIdx* sidx = sym_i.data; gu_assert((unsigned) sidx->d < fapp->n_args); PgfCncTree argf = fapp->args[sidx->d]; - pgf_lzr_linearize(lzr, argf, sidx->r, fnsp); + pgf_lzr_linearize(concr, argf, sidx->r, fnsp); break; } case PGF_SYMBOL_KS: { @@ -581,7 +530,7 @@ static PgfLinFuncs pgf_file_lin_funcs = { }; void -pgf_lzr_linearize_simple(PgfLzr* lzr, PgfCncTree ctree, +pgf_lzr_linearize_simple(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, GuWriter* wtr, GuExn* err) { PgfSimpleLin flin = { @@ -589,5 +538,5 @@ pgf_lzr_linearize_simple(PgfLzr* lzr, PgfCncTree ctree, .wtr = wtr, .err = err }; - pgf_lzr_linearize(lzr, ctree, lin_idx, &flin.funcs); + pgf_lzr_linearize(concr, ctree, lin_idx, &flin.funcs); } diff --git a/src/runtime/c/pgf/linearize.h b/src/runtime/c/pgf/linearize.h index db36343f2..c3a1cc2ca 100644 --- a/src/runtime/c/pgf/linearize.h +++ b/src/runtime/c/pgf/linearize.h @@ -25,40 +25,6 @@ /// Linearization of abstract syntax trees. /// @file -/** @name Linearizers - * - * Linearization begins by choosing a concrete category (#PgfConcr) for some - * grammar, and creating a new linearizer (#PgfLzr) which can then be used to - * linearize abstract syntax trees (#PgfExpr) of that grammar into the given - * concrete category. - * - * @{ - */ - - -/// A linearizer. -typedef struct PgfLzr PgfLzr; -/**< - * - * A #PgfLzr object transforms abstract syntax trees of a PGF grammar - * into sequences of token events for a single concrete category of - * that grammar. - * - */ -GU_DECLARE_TYPE(PgfLzr, struct); - - -/// Create a new linearizer. -PgfLzr* -pgf_new_lzr(PgfConcr* cnc, GuPool* pool); -/**< - * @param cnc The concrete category to linearize to. - * - * @pool - * - * @return A new linearizer. - */ - /** @} * * @name Enumerating concrete syntax trees @@ -80,7 +46,7 @@ typedef GuEnum PgfCncTreeEnum; /// Begin enumerating concrete syntax variants. PgfCncTreeEnum* -pgf_lzr_concretize(PgfLzr* lzr, PgfExpr expr, GuPool* pool); +pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuPool* pool); /** @} * @@ -127,13 +93,13 @@ struct PgfLinFuncs /// Linearize a concrete syntax tree. void -pgf_lzr_linearize(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx, +pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs** fnsp); /// Linearize a concrete syntax tree as space-separated tokens. void -pgf_lzr_linearize_simple(PgfLzr* lzr, PgfCncTree ctree, +pgf_lzr_linearize_simple(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, GuWriter* wtr, GuExn* err); diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 4e27a54d6..7f98070b8 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -18,12 +18,8 @@ typedef GuMap PgfTransitions; typedef GuBuf PgfCCatBuf; -struct PgfParser { - PgfConcr* concr; -}; - struct PgfParse { - PgfParser* parser; + PgfConcr* concr; PgfItemBuf* agenda; int max_fid; }; @@ -650,10 +646,10 @@ pgf_new_parsing(PgfLexCallback* callback, int max_fid, } static PgfParse* -pgf_new_parse(PgfParser* parser, int max_fid, GuPool* pool) +pgf_new_parse(PgfConcr* concr, int max_fid, GuPool* pool) { PgfParse* parse = gu_new(PgfParse, pool); - parse->parser = parser; + parse->concr = concr; parse->agenda = NULL; parse->max_fid = max_fid; return parse; @@ -692,7 +688,7 @@ pgf_parse_token(PgfParse* parse, PgfToken tok, GuPool* pool) PgfParse* next_parse = NULL; if (gu_buf_length(agenda) > 0) { - next_parse = pgf_new_parse(parse->parser, parse->max_fid, pool); + next_parse = pgf_new_parse(parse->concr, parse->max_fid, pool); next_parse->agenda = agenda; next_parse->max_fid= parsing->max_fid; } @@ -805,7 +801,7 @@ pgf_parse_result(PgfParse* parse, GuPool* pool) PgfExprEnum* en = &gu_new_i(pool, PgfParseResult, - .concr = parse->parser->concr, + .concr = parse->concr, .completed = parsing->completed, .choice = gu_new_choice(pool), .en.next = pgf_parse_result_enum_next)->en; @@ -818,17 +814,17 @@ pgf_parse_result(PgfParse* parse, GuPool* pool) // TODO: s/CId/Cat, add the cid to Cat, make Cat the key to CncCat PgfParse* -pgf_parser_parse(PgfParser* parser, PgfCId cat, size_t lin_idx, GuPool* pool) +pgf_parser_parse(PgfConcr* concr, PgfCId cat, size_t lin_idx, GuPool* pool) { PgfCncCat* cnccat = - gu_map_get(parser->concr->cnccats, &cat, PgfCncCat*); + gu_map_get(concr->cnccats, &cat, PgfCncCat*); if (!cnccat) { // error ... gu_impossible(); } gu_assert(lin_idx < cnccat->n_lins); - PgfParse* parse = pgf_new_parse(parser, parser->concr->max_fid, pool); + PgfParse* parse = pgf_new_parse(concr, concr->max_fid, pool); parse->agenda = gu_new_buf(PgfItem*, pool); PgfItemBuf* conts = gu_new_buf(PgfItem*, pool); @@ -856,12 +852,3 @@ pgf_parser_parse(PgfParser* parser, PgfCId cat, size_t lin_idx, GuPool* pool) } return parse; } - -PgfParser* -pgf_new_parser(PgfConcr* concr, GuPool* pool) -{ - gu_require(concr != NULL); - PgfParser* parser = gu_new(PgfParser, pool); - parser->concr = concr; - return parser; -} diff --git a/src/runtime/c/pgf/parser.h b/src/runtime/c/pgf/parser.h index 127bed5dc..26cc61210 100644 --- a/src/runtime/c/pgf/parser.h +++ b/src/runtime/c/pgf/parser.h @@ -17,30 +17,6 @@ typedef struct PgfParse PgfParse; -/** @name Creating a new parser - * - * A #PgfParser object can parse sentences of a single concrete category into - * abstract syntax trees (#PgfExpr). The parser is created with - * #pgf_new_parser. - * - * @{ - */ - -/// A parser for a single concrete category -typedef struct PgfParser PgfParser; - - -/// Create a new parser -PgfParser* -pgf_new_parser(PgfConcr* concr, GuPool* pool); -/**< - * @param concr The concrete category whose sentences are to be parsed - * - * @pool - * - * @return A newly created parser for the concrete category \p concr - */ - /** @} * * @name Parsing a sentence @@ -57,7 +33,7 @@ pgf_new_parser(PgfConcr* concr, GuPool* pool); /// Begin parsing PgfParse* -pgf_parser_parse(PgfParser* parser, PgfCId cat, size_t lin_idx, GuPool* pool); +pgf_parser_parse(PgfConcr* concr, PgfCId cat, size_t lin_idx, GuPool* pool); /**< * @param parser The parser to use * diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index 400ffee9a..0fd6297ba 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -655,13 +655,32 @@ pgf_ccat_set_cnccat(PgfCCat* ccat) return ccat->cnccat; } +typedef struct { + GuMapItor fn; + PgfConcr* concr; + GuPool *pool; +} PgfIndexFn; + +void +pgf_lzr_index(PgfConcr* concr, PgfCCat* cat, PgfProduction prod, + GuPool *pool); static void pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err) { (void) (key && err); - PgfCCat** ccatp = value; - pgf_ccat_set_cnccat(*ccatp); + PgfIndexFn* clo = (PgfIndexFn*) fn; + PgfCCat* ccat = *((PgfCCat**) value); + + pgf_ccat_set_cnccat(ccat); + + if (!gu_seq_is_null(ccat->prods)) { + size_t n_prods = gu_seq_length(ccat->prods); + for (size_t i = 0; i < n_prods; i++) { + PgfProduction prod = gu_seq_get(ccat->prods, PgfProduction, i); + pgf_lzr_index(clo->concr, ccat, prod, clo->pool); + } + } } static void* @@ -684,14 +703,16 @@ pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool, GuMapType* ccats_t = gu_type_cast(gu_type(PgfCCatMap), GuMap); concr->ccats = gu_new_int_map(PgfCCat*, &gu_null_struct, pool); + concr->fun_indices = gu_map_type_new(PgfFunIndices, pool); + concr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool); rdr->curr_ccats = concr->ccats; - pgf_read_into_map(ccats_t, rdr, concr->ccats, rdr->opool); + pgf_read_into_map(ccats_t, rdr, concr->ccats, rdr->opool); concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap), rdr->opool, NULL); concr->max_fid = pgf_read_int(rdr); - GuMapItor fn = { pgf_read_ccat_cb }; - gu_map_iter(concr->ccats, &fn, NULL); + PgfIndexFn clo = { { pgf_read_ccat_cb }, concr, pool }; + gu_map_iter(concr->ccats, &clo.fn, NULL); return concr; } diff --git a/src/runtime/c/utils/pgf-translate.c b/src/runtime/c/utils/pgf-translate.c index 7837d3831..6d0a8804d 100644 --- a/src/runtime/c/utils/pgf-translate.c +++ b/src/runtime/c/utils/pgf-translate.c @@ -68,12 +68,6 @@ int main(int argc, char* argv[]) { goto fail_concr; } - // Create the parser for the source category - PgfParser* parser = pgf_new_parser(from_concr, pool); - - // Create a linearizer for the destination category - PgfLzr* lzr = pgf_new_lzr(to_concr, pool); - // Arbitrarily choose linearization index 0. Usually the initial // categories we are interested in only have one field. int lin_idx = 0; @@ -110,7 +104,7 @@ int main(int argc, char* argv[]) { // Begin parsing a sentence of the specified category PgfParse* parse = - pgf_parser_parse(parser, cat, lin_idx, pool); + pgf_parser_parse(from_concr, cat, lin_idx, pool); if (parse == NULL) { fprintf(stderr, "Couldn't begin parsing\n"); status = EXIT_FAILURE; @@ -149,7 +143,7 @@ int main(int argc, char* argv[]) { // Enumerate the concrete syntax trees corresponding // to the abstract tree. - GuEnum* cts = pgf_lzr_concretize(lzr, expr, ppool); + GuEnum* cts = pgf_lzr_concretize(to_concr, expr, ppool); while (true) { PgfCncTree ctree = gu_next(cts, PgfCncTree, ppool); @@ -159,7 +153,7 @@ int main(int argc, char* argv[]) { gu_puts(" ", wtr, err); // Linearize the concrete tree as a simple // sequence of strings. - pgf_lzr_linearize_simple(lzr, ctree, lin_idx, + pgf_lzr_linearize_simple(to_concr , ctree, lin_idx, wtr, err); gu_putc('\n', wtr, err); gu_writer_flush(wtr, err);