GuString is now an ordinary C string - it makes live easier. In addition PgfSymbolKS, PgfExprFun and PgfLiteralStr now keep their strings as embedded flexible arrays. The latest change gives us the same compactness as the old representation but it is a lot easier to use.

This commit is contained in:
kr.angelov
2013-10-04 12:04:39 +00:00
parent 27091048ce
commit e8335806af
25 changed files with 412 additions and 779 deletions

View File

@@ -15,7 +15,7 @@ pgf_tokens_equal(PgfTokens* t1, PgfTokens* t2)
for (size_t i = 0; i < len1; i++) {
GuString s1 = gu_seq_get(t1, PgfToken, i);
GuString s2 = gu_seq_get(t2, PgfToken, i);
if (!gu_string_eq(s1, s2)) {
if (strcmp(s1, s2) != 0) {
return false;
}
}
@@ -51,7 +51,9 @@ GU_DEFINE_TYPE(PgfMetaChildMap, GuMap,
GU_DEFINE_TYPE(PgfAbsCat, abstract);
static GuString empty_string = "";
GU_DEFINE_TYPE(
PgfPrintNames, PgfCIdMap, gu_type(GuString), &gu_empty_string);
PgfPrintNames, PgfCIdMap, gu_type(GuString), &empty_string);
GU_DEFINE_TYPE(PgfConcr, abstract);

View File

@@ -190,7 +190,7 @@ typedef struct {
typedef PgfSymbolIdx PgfSymbolCat, PgfSymbolLit, PgfSymbolVar;
typedef struct {
PgfToken token;
char token[0]; // a flexible array that contains the token
} PgfSymbolKS;
typedef struct PgfSymbolKP

View File

@@ -152,7 +152,7 @@ struct PgfExprParser {
GuPool* expr_pool;
GuPool* tmp_pool;
PGF_TOKEN_TAG token_tag;
GuCharBuf* token_value;
GuBuf* token_value;
int ch;
};
@@ -232,7 +232,7 @@ pgf_expr_parser_token(PgfExprParser* parser)
parser->token_tag = PGF_TOKEN_WILD;
break;
default: {
GuCharBuf* chars = gu_new_buf(char, parser->tmp_pool);
GuBuf* chars = gu_new_buf(char, parser->tmp_pool);
if (isalpha(parser->ch)) {
while (isalnum(parser->ch) ||
@@ -241,6 +241,7 @@ pgf_expr_parser_token(PgfExprParser* parser)
gu_buf_push(chars, char, parser->ch);
pgf_expr_parser_getc(parser);
}
gu_buf_push(chars, char, 0);
parser->token_tag = PGF_TOKEN_IDENT;
parser->token_value = chars;
} else if (isdigit(parser->ch)) {
@@ -257,9 +258,11 @@ pgf_expr_parser_token(PgfExprParser* parser)
gu_buf_push(chars, char, parser->ch);
pgf_expr_parser_getc(parser);
}
gu_buf_push(chars, char, 0);
parser->token_tag = PGF_TOKEN_FLT;
parser->token_value = chars;
} else {
gu_buf_push(chars, char, 0);
parser->token_tag = PGF_TOKEN_INT;
parser->token_value = chars;
}
@@ -273,6 +276,7 @@ pgf_expr_parser_token(PgfExprParser* parser)
if (parser->ch == '"') {
pgf_expr_parser_getc(parser);
gu_buf_push(chars, char, 0);
parser->token_tag = PGF_TOKEN_STR;
parser->token_value = chars;
}
@@ -341,18 +345,20 @@ pgf_expr_parser_term(PgfExprParser* parser)
0);
}
case PGF_TOKEN_IDENT: {
char* str =
gu_char_buf_str(parser->token_value, parser->tmp_pool);
PgfCId id = gu_str_string(str, parser->expr_pool);
PgfCId id = gu_buf_data(parser->token_value);
pgf_expr_parser_token(parser);
return gu_new_variant_i(parser->expr_pool,
PGF_EXPR_FUN,
PgfExprFun,
id);
PgfExpr e;
PgfExprFun* fun =
gu_new_flex_variant(PGF_EXPR_FUN,
PgfExprFun,
fun, strlen(id)+1,
&e, parser->expr_pool);
strcpy(fun->fun, id);
return e;
}
case PGF_TOKEN_INT: {
char* str =
gu_char_buf_str(parser->token_value, parser->tmp_pool);
gu_buf_data(parser->token_value);
int n = atoi(str);
pgf_expr_parser_token(parser);
PgfLiteral lit =
@@ -367,22 +373,23 @@ pgf_expr_parser_term(PgfExprParser* parser)
}
case PGF_TOKEN_STR: {
char* str =
gu_char_buf_str(parser->token_value, parser->tmp_pool);
GuString s = gu_str_string(str, parser->expr_pool);
gu_buf_data(parser->token_value);
pgf_expr_parser_token(parser);
PgfLiteral lit =
gu_new_variant_i(parser->expr_pool,
PGF_LITERAL_STR,
PgfLiteralStr,
s);
PgfLiteral lit;
PgfLiteralStr* plit =
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, strlen(str)+1,
&lit, parser->expr_pool);
strcpy(plit->val, str);
return gu_new_variant_i(parser->expr_pool,
PGF_EXPR_LIT,
PgfExprLit,
lit);
PGF_EXPR_LIT,
PgfExprLit,
lit);
}
case PGF_TOKEN_FLT: {
char* str =
gu_char_buf_str(parser->token_value, parser->tmp_pool);
gu_buf_data(parser->token_value);
double d = atof(str);
pgf_expr_parser_token(parser);
PgfLiteral lit =
@@ -442,12 +449,11 @@ pgf_expr_parser_bind(PgfExprParser* parser, GuBuf* binds)
for (;;) {
if (parser->token_tag == PGF_TOKEN_IDENT) {
char* str =
gu_char_buf_str(parser->token_value, parser->tmp_pool);
var = gu_str_string(str, parser->expr_pool);
var =
gu_string_copy(gu_buf_data(parser->token_value), parser->expr_pool);
pgf_expr_parser_token(parser);
} else if (parser->token_tag == PGF_TOKEN_WILD) {
var = gu_str_string("_", parser->expr_pool);
var = "_";
pgf_expr_parser_token(parser);
} else {
return false;
@@ -562,12 +568,11 @@ pgf_expr_parser_hypos(PgfExprParser* parser, GuBuf* hypos)
}
if (parser->token_tag == PGF_TOKEN_IDENT) {
char* str =
gu_char_buf_str(parser->token_value, parser->tmp_pool);
var = gu_str_string(str, parser->expr_pool);
var =
gu_string_copy(gu_buf_data(parser->token_value), parser->expr_pool);
pgf_expr_parser_token(parser);
} else if (parser->token_tag == PGF_TOKEN_WILD) {
var = gu_str_string("_", parser->expr_pool);
var = "_";
pgf_expr_parser_token(parser);
} else {
return false;
@@ -603,9 +608,8 @@ pgf_expr_parser_atom(PgfExprParser* parser)
if (parser->token_tag != PGF_TOKEN_IDENT)
return NULL;
char* str =
gu_char_buf_str(parser->token_value, parser->tmp_pool);
PgfCId cid = gu_str_string(str, parser->expr_pool);
PgfCId cid =
gu_string_copy(gu_buf_data(parser->token_value), parser->expr_pool);
pgf_expr_parser_token(parser);
GuBuf* args = gu_new_buf(PgfExpr, parser->tmp_pool);
@@ -663,7 +667,7 @@ pgf_expr_parser_type(PgfExprParser* parser)
} else {
PgfHypo* hypo = gu_buf_extend(hypos);
hypo->bind_type = PGF_BIND_TYPE_EXPLICIT;
hypo->cid = gu_str_string("_", parser->expr_pool);
hypo->cid = "_";
hypo->type = NULL;
}
@@ -699,7 +703,7 @@ pgf_expr_parser_type(PgfExprParser* parser)
PgfHypo* hypo = gu_buf_extend(hypos);
hypo->bind_type = PGF_BIND_TYPE_EXPLICIT;
hypo->cid = gu_str_string("_", parser->expr_pool);
hypo->cid = "_";
hypo->type = type;
}
}
@@ -761,7 +765,7 @@ pgf_literal_eq(PgfLiteral lit1, PgfLiteral lit2)
case PGF_LITERAL_STR: {
PgfLiteralStr* lit1 = ei1.data;
PgfLiteralStr* lit2 = ei2.data;
return gu_string_eq(lit1->val, lit2->val);
return strcmp(lit1->val, lit2->val) == 0;
}
case PGF_LITERAL_INT: {
PgfLiteralInt* lit1 = ei1.data;
@@ -793,7 +797,7 @@ pgf_expr_eq(PgfExpr e1, PgfExpr e2)
case PGF_EXPR_ABS: {
PgfExprAbs* abs1 = ei1.data;
PgfExprAbs* abs2 = ei2.data;
return gu_string_eq(abs1->id, abs2->id) &&
return strcmp(abs1->id, abs2->id) == 0 &&
pgf_expr_eq(abs1->body, abs2->body);
}
case PGF_EXPR_APP: {
@@ -815,7 +819,7 @@ pgf_expr_eq(PgfExpr e1, PgfExpr e2)
case PGF_EXPR_FUN: {
PgfExprFun* fun1 = ei1.data;
PgfExprFun* fun2 = ei2.data;
return gu_string_eq(fun1->fun, fun2->fun);
return strcmp(fun1->fun, fun2->fun) == 0;
}
case PGF_EXPR_VAR: {
PgfExprVar* var1 = ei1.data;
@@ -1076,9 +1080,8 @@ pgf_print_hypo(PgfHypo *hypo, PgfPrintContext* ctxt, int prec,
gu_puts(")", out, err);
} else {
GuPool* tmp_pool = gu_new_pool();
GuString tmp = gu_str_string("_", tmp_pool);
if (!gu_string_eq(hypo->cid, tmp)) {
if (strcmp(hypo->cid, "_") != 0) {
gu_puts("(", out, err);
gu_string_write(hypo->cid, out, err);
gu_puts(" : ", out, err);
@@ -1158,14 +1161,14 @@ pgf_type_eq(PgfType* t1, PgfType* t2)
if (hypo1->bind_type != hypo2->bind_type)
return false;
if (!gu_string_eq(hypo1->cid, hypo2->cid))
if (strcmp(hypo1->cid, hypo2->cid) != 0)
return false;
if (!pgf_type_eq(hypo1->type, hypo2->type))
return false;
}
if (!gu_string_eq(t1->cid, t2->cid))
if (strcmp(t1->cid, t2->cid) != 0)
return false;
if (t1->n_exprs != t2->n_exprs)

View File

@@ -37,7 +37,7 @@ typedef enum {
} PgfLiteralTag;
typedef struct {
GuString val;
char val[0]; // a flexible array that contains the value
} PgfLiteralStr;
typedef struct {
@@ -102,7 +102,7 @@ typedef struct {
} PgfExprMeta;
typedef struct {
PgfCId fun;
char fun[0];
} PgfExprFun;
typedef struct {

View File

@@ -112,7 +112,6 @@ typedef struct {
size_t level;
GuBuf* internals;
GuBuf* leaves;
GuString wildcard;
} PgfBracketLznState;
static void
@@ -167,7 +166,7 @@ pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int linde
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
if (gu_string_eq(cat, state->wildcard))
if (strcmp(cat, "_") == 0)
return;
state->level++;
@@ -203,7 +202,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex,
{
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
if (gu_string_eq(cat, state->wildcard))
if (strcmp(cat, "_") == 0)
return;
state->level--;
@@ -281,7 +280,6 @@ pgf_graphviz_parse_tree(PgfConcr* concr, PgfExpr expr, GuOut* out, GuExn* err)
state.level = -1;
state.internals = gu_new_buf(GuBuf*, tmp_pool);
state.leaves = gu_new_buf(PgfParseNode*, tmp_pool);
state.wildcard = gu_str_string("_", tmp_pool);
pgf_lzr_linearize(concr, ctree, 0, &state.funcs);
size_t len = gu_buf_length(state.internals);

View File

@@ -201,7 +201,7 @@ pgf_jit_predicate(PgfJitState* state, PgfCIdMap* abscats,
// call the predicate for the category in hypo->type->cid
PgfAbsCat* arg =
gu_map_get(abscats, &hypo->type->cid, PgfAbsCat*);
gu_map_get(abscats, hypo->type->cid, PgfAbsCat*);
#ifdef PGF_JIT_DEBUG
gu_puts(" CALL ", wtr, err);
@@ -314,7 +314,7 @@ pgf_jit_done(PgfJitState* state, PgfAbstr* abstr)
PgfCallPatch* patch =
gu_buf_index(state->patches, PgfCallPatch, i);
PgfAbsCat* arg =
gu_map_get(abstr->cats, &patch->cid, PgfAbsCat*);
gu_map_get(abstr->cats, patch->cid, PgfAbsCat*);
gu_assert(arg != NULL);
jit_patch_calli(patch->ref,(jit_insn*) arg->predicate);

View File

@@ -108,7 +108,7 @@ pgf_new_simple_lexer(GuIn *in, GuPool *pool)
{
PgfSimpleLexer* lexer = gu_new(PgfSimpleLexer, pool);
lexer->base.read_token = pgf_simple_lexer_read_token;
lexer->base.tok = gu_empty_string;
lexer->base.tok = "";
lexer->in = in;
lexer->pool = pool;
lexer->ucs = ' ';

View File

@@ -45,12 +45,12 @@ pgf_lzr_index(PgfConcr* concr,
case PGF_PRODUCTION_APPLY: {
PgfProductionApply* papply = data;
PgfCncOverloadMap* overl_table =
gu_map_get(concr->fun_indices, &papply->fun->absfun->name,
gu_map_get(concr->fun_indices, papply->fun->absfun->name,
PgfCncOverloadMap*);
if (!overl_table) {
overl_table = gu_map_type_new(PgfCncOverloadMap, pool);
gu_map_put(concr->fun_indices,
&papply->fun->absfun->name, PgfCncOverloadMap*, overl_table);
papply->fun->absfun->name, PgfCncOverloadMap*, overl_table);
}
pgf_lzr_add_overl_entry(overl_table, ccat, papply, pool);
break;
@@ -227,11 +227,12 @@ pgf_lzn_resolve_def(PgfLzn* lzn, PgfCncFuns* lindefs, GuString s, GuPool* pool)
PgfCncTreeLit,
&lit, pool);
clit->fid = lzn->fid++;
clit->lit =
gu_new_variant_i(pool,
PGF_LITERAL_STR,
PgfLiteralStr,
s);
PgfLiteralStr* lit_str =
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, strlen(s),
&clit->lit, pool);
strcpy((char*) lit_str->val, (char*) s);
if (lindefs == NULL)
return lit;
@@ -322,8 +323,7 @@ pgf_lzn_resolve(PgfLzn* lzn, PgfExpr expr, PgfCCat* ccat, GuPool* pool)
goto done;
}
GuString s = gu_str_string("?", pool);
ret = pgf_lzn_resolve_def(lzn, ccat->lindefs, s, pool);
ret = pgf_lzn_resolve_def(lzn, ccat->lindefs, "?", pool);
goto done;
}
}
@@ -331,7 +331,7 @@ pgf_lzn_resolve(PgfLzn* lzn, PgfExpr expr, PgfCCat* ccat, GuPool* pool)
PgfExprFun* efun = i.data;
PgfCncOverloadMap* overl_table =
gu_map_get(lzn->concr->fun_indices, &efun->fun, PgfCncOverloadMap*);
gu_map_get(lzn->concr->fun_indices, efun->fun, PgfCncOverloadMap*);
if (overl_table == NULL) {
if (ccat != NULL && ccat->lindefs == NULL) {
goto done;
@@ -345,7 +345,7 @@ pgf_lzn_resolve(PgfLzn* lzn, PgfExpr expr, PgfCCat* ccat, GuPool* pool)
gu_putc('[', out, err);
gu_string_write(efun->fun, out, err);
gu_putc(']', out, err);
GuString s = gu_string_buf_freeze(sbuf, pool);
GuString s = gu_string_buf_freeze(sbuf, tmp_pool);
if (ccat != NULL) {
ret = pgf_lzn_resolve_def(lzn, ccat->lindefs, s, pool);
@@ -356,10 +356,11 @@ pgf_lzn_resolve(PgfLzn* lzn, PgfExpr expr, PgfCCat* ccat, GuPool* pool)
&ret, pool);
clit->fid = lzn->fid++;
PgfLiteralStr* lit =
gu_new_variant(PGF_LITERAL_STR,
PgfLiteralStr,
&clit->lit, pool);
lit->val = s;
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, strlen(s)+1,
&clit->lit, pool);
strcpy(lit->val, s);
}
gu_pool_free(tmp_pool);
@@ -557,7 +558,7 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs
if (fns->begin_phrase) {
fns->begin_phrase(fnsp,
cat, flit->fid, 0,
gu_empty_string);
"");
}
if (fns->expr_literal) {
@@ -567,7 +568,7 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs
if (fns->end_phrase) {
fns->end_phrase(fnsp,
cat, flit->fid, 0,
gu_empty_string);
"");
}
break;
@@ -697,7 +698,7 @@ pgf_get_tokens(PgfSequence* seq, uint16_t seq_idx, GuPool* pool)
pgf_lzr_linearize_sequence(NULL, NULL, seq, seq_idx, &flin.funcs);
GuString tokens = gu_ok(err) ? gu_string_buf_freeze(sbuf, pool)
: gu_empty_string;
: "";
gu_pool_free(tmp_pool);

View File

@@ -39,10 +39,11 @@ pgf_match_string_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
PgfExprLit,
&ep->expr, pool);
PgfLiteralStr *lit_str =
gu_new_variant(PGF_LITERAL_STR,
PgfLiteralStr,
&expr_lit->lit, pool);
lit_str->val = sks->token;
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, strlen(sks->token)+1,
&expr_lit->lit, pool);
strcpy(lit_str->val, sks->token);
*out_ep = ep;
accepted = false;
@@ -185,12 +186,10 @@ pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
GuString hyp = gu_str_string("-", tmp_pool);
bool iscap = false;
if (gu_string_eq(tok, hyp)) {
if (strcmp(tok, "-") == 0) {
iscap = true;
} else if (!gu_string_eq(tok, gu_empty_string)) {
} else if (*tok) {
GuIn* in = gu_string_in(tok, tmp_pool);
iscap = iswupper(gu_in_utf8(in, err));
}
@@ -218,21 +217,24 @@ pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
gu_new_variant(PGF_EXPR_APP,
PgfExprApp,
&ep->expr, pool);
GuString con = "MkSymb";
PgfExprFun *expr_fun =
gu_new_variant(PGF_EXPR_FUN,
PgfExprFun,
&expr_app->fun, pool);
expr_fun->fun = gu_str_string("MkSymb", pool);
gu_new_flex_variant(PGF_EXPR_FUN,
PgfExprFun,
fun, strlen(con)+1,
&expr_app->fun, pool);
strcpy(expr_fun->fun, con);
PgfExprLit *expr_lit =
gu_new_variant(PGF_EXPR_LIT,
PgfExprLit,
&expr_app->arg, pool);
GuString val = gu_string_buf_freeze(sbuf, tmp_pool);
PgfLiteralStr *lit_str =
gu_new_variant(PGF_LITERAL_STR,
PgfLiteralStr,
&expr_lit->lit, pool);
lit_str->val = gu_string_buf_freeze(sbuf, pool);
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, strlen(val)+1,
&expr_lit->lit, pool);
strcpy(lit_str->val, val);
*out_ep = ep;
} else {
*out_ep = NULL;

View File

@@ -145,24 +145,7 @@ GU_DEFINE_TYPE(PgfLeftcornerTokIdx, GuStringMap,
static PgfSymbol
pgf_prev_extern_sym(PgfSymbol sym)
{
GuVariantInfo i = gu_variant_open(sym);
switch (i.tag) {
case PGF_SYMBOL_CAT:
return *((PgfSymbol*) (((PgfSymbolCat*) i.data)+1));
case PGF_SYMBOL_KP:
return *((PgfSymbol*) (((PgfSymbolKP*) i.data)+1));
case PGF_SYMBOL_KS:
return *((PgfSymbol*) (((PgfSymbolKS*) i.data)+1));
case PGF_SYMBOL_LIT:
return *((PgfSymbol*) (((PgfSymbolLit*) i.data)+1));
case PGF_SYMBOL_VAR:
return *((PgfSymbol*) (((PgfSymbolVar*) i.data)+1));
case PGF_SYMBOL_NE:
return *((PgfSymbol*) (((PgfSymbolNE*) i.data)+1));
default:
gu_impossible();
return gu_null_variant;
}
return *(((PgfSymbol*) gu_variant_data(sym))-1);
}
size_t
@@ -1126,19 +1109,19 @@ pgf_parsing_meta_scan(PgfParseState* before, PgfParseState* after,
PgfItem* meta_item, prob_t meta_prob)
{
PgfToken tok = after->ts->fn->get_token(after->ts);
if (!gu_string_eq(tok, gu_empty_string)) {
if (*tok == 0) {
PgfItem* item = pgf_item_copy(meta_item, before->ps->pool, before->ps);
item->inside_prob += meta_prob;
PgfSymbol prev = item->curr_sym;
PgfSymbolKS* sks = (PgfSymbolKS*)
gu_alloc_variant(PGF_SYMBOL_KS,
sizeof(PgfSymbolKS)+sizeof(PgfSymbol),
sizeof(PgfSymbol)+sizeof(PgfSymbolKS)+strlen(tok)+1,
gu_alignof(PgfSymbolKS),
&item->curr_sym, after->ps->pool);
*((PgfSymbol*)(sks+1)) = prev;
sks->token = tok;
*(((PgfSymbol*) sks)-1) = prev;
strcpy((char*) sks->token, (char*) tok);
gu_buf_heap_push(before->agenda, &pgf_item_prob_order, &item);
}
@@ -1162,7 +1145,7 @@ pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err
PgfItem* meta_item = clo->meta_item;
PgfCncCat* cnccat =
gu_map_get(state->ps->concr->cnccats, &abscat->name, PgfCncCat*);
gu_map_get(state->ps->concr->cnccats, abscat->name, PgfCncCat*);
if (cnccat == NULL)
return;
@@ -1193,7 +1176,7 @@ pgf_parsing_meta_predict(GuMapItor* fn, const void* key, void* value, GuExn* err
sizeof(PgfSymbolCat)+sizeof(PgfSymbol),
gu_alignof(PgfSymbolCat),
&item->curr_sym, state->ps->pool);
*((PgfSymbol*)(scat+1)) = prev;
*(((PgfSymbol*)scat)-1) = prev;
scat->d = nargs;
scat->r = lin_idx;
@@ -1342,6 +1325,10 @@ pgf_parsing_symbol(PgfParseState* before, PgfParseState* after,
pgf_item_free(before, after, item);
break;
}
case PGF_SYMBOL_BIND: {
pgf_item_free(before, after, item);
break;
}
default:
gu_impossible();
}
@@ -1412,7 +1399,7 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
} else {
PgfToken tok = (after != NULL)
? after->ts->fn->get_token(after->ts)
: gu_empty_string;
: "";
PgfExprProb *ep = NULL;
bool accepted =
@@ -1428,11 +1415,11 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
PgfSymbol prev = item->curr_sym;
PgfSymbolKS* sks = (PgfSymbolKS*)
gu_alloc_variant(PGF_SYMBOL_KS,
sizeof(PgfSymbolKS)+sizeof(PgfSymbol),
sizeof(PgfSymbol)+sizeof(PgfSymbolKS)+strlen(tok)+1,
gu_alignof(PgfSymbolKS),
&item->curr_sym, after->ps->pool);
*((PgfSymbol*)(sks+1)) = prev;
sks->token = tok;
*(((PgfSymbol*) sks)-1) = prev;
strcpy((char*) sks->token, (char*) tok);
item->seq_idx++;
pgf_parsing_add_transition(before, after, tok, item);
@@ -1546,9 +1533,7 @@ pgf_parsing_proceed(PgfParseState* state)
static prob_t
pgf_parsing_default_beam_size(PgfConcr* concr)
{
GuPool* tmp_pool = gu_new_pool();
PgfCId flag_name = gu_str_string("beam_size", tmp_pool);
PgfLiteral lit = gu_map_get(concr->cflags, &flag_name, PgfLiteral);
PgfLiteral lit = gu_map_get(concr->cflags, "beam_size", PgfLiteral);
if (gu_variant_is_null(lit))
return 0;
@@ -1677,7 +1662,7 @@ typedef struct {
static bool
pgf_real_match_token(PgfTokenState* ts, PgfToken tok, PgfItem* item)
{
return gu_string_eq(gu_container(ts, PgfRealTokenState, ts)->tok, tok);
return strcmp(gu_container(ts, PgfRealTokenState, ts)->tok, tok) == 0;
}
static PgfToken
@@ -1707,7 +1692,7 @@ pgf_parser_next_state(PgfParseState* prev, PgfToken tok)
pgf_new_token_state(PgfRealTokenState, prev->ps->pool);
ts->tok = tok;
ts->lexicon_idx = gu_map_get(prev->ps->concr->leftcorner_tok_idx,
&tok, PgfProductionIdx*);
tok, PgfProductionIdx*);
if (ts->lexicon_idx != NULL) {
PgfLexiconFn clo = { { pgf_parser_compute_lexicon_prob }, &ts->ts };
gu_map_iter(ts->lexicon_idx, &clo.fn, NULL);
@@ -1758,7 +1743,7 @@ pgf_prefix_match_token(PgfTokenState* ts0, PgfToken tok, PgfItem* item)
static PgfToken
pgf_prefix_get_token(PgfTokenState* ts) {
return gu_empty_string;
return "";
}
static PgfProductionIdx*
@@ -2165,7 +2150,7 @@ pgf_parser_init_state(PgfConcr* concr, PgfCId cat, size_t lin_idx,
GuPool* pool, GuPool* out_pool)
{
PgfCncCat* cnccat =
gu_map_get(concr->cnccats, &cat, PgfCncCat*);
gu_map_get(concr->cnccats, cat, PgfCncCat*);
if (!cnccat)
return NULL;
@@ -2226,7 +2211,7 @@ pgf_parser_add_literal(PgfConcr *concr, PgfCId cat,
PgfLiteralCallback* callback)
{
PgfCncCat* cnccat =
gu_map_get(concr->cnccats, &cat, PgfCncCat*);
gu_map_get(concr->cnccats, cat, PgfCncCat*);
if (cnccat == NULL)
return;
@@ -2281,7 +2266,7 @@ pgf_morpho_iter(GuMapItor* fn, const void* key, void* value, GuExn* err)
PgfToken tok1 = symks->token;
PgfToken tok2 = gu_seq_get(clo->tokens, PgfToken, pos++);
if (!gu_string_eq(tok1, tok2))
if (strcmp(tok1, tok2) != 0)
goto cont;
}
default:
@@ -2320,7 +2305,7 @@ pgf_lookup_morpho(PgfConcr *concr, PgfLexer *lexer,
}
PgfProductionIdx* lexicon_idx =
gu_map_get(concr->leftcorner_tok_idx, &tok, PgfProductionIdx*);
gu_map_get(concr->leftcorner_tok_idx, tok, PgfProductionIdx*);
if (lexicon_idx == NULL) {
gu_pool_free(tmp_pool);
return;
@@ -2374,10 +2359,10 @@ pgf_fullform_iter(GuMapItor* fn, const void* key, void* value, GuExn* err)
// create a new production index with keys that
// are multiword units
PgfProductionIdx* lexicon_idx =
gu_map_get(st->new_idx, &tokens, PgfProductionIdx*);
gu_map_get(st->new_idx, tokens, PgfProductionIdx*);
if (lexicon_idx == NULL) {
lexicon_idx = gu_map_type_new(PgfProductionIdx, st->pool);
gu_map_put(st->new_idx, &tokens, PgfProductionIdx*, lexicon_idx);
gu_map_put(st->new_idx, tokens, PgfProductionIdx*, lexicon_idx);
}
PgfProductionBuf* prods =
@@ -2443,7 +2428,7 @@ pgf_fullform_lexicon(PgfConcr *concr, GuPool* pool)
GuString
pgf_fullform_get_string(PgfFullFormEntry* entry)
{
return *((GuString*) entry->key);
return (GuString) entry->key;
}
void
@@ -2462,10 +2447,10 @@ pgf_parser_index_token(PgfConcr* concr,
GuPool *pool)
{
PgfProductionIdx* set =
gu_map_get(concr->leftcorner_tok_idx, &tok, PgfProductionIdx*);
gu_map_get(concr->leftcorner_tok_idx, tok, PgfProductionIdx*);
if (set == NULL) {
set = gu_map_type_new(PgfProductionIdx, pool);
gu_map_put(concr->leftcorner_tok_idx, &tok, PgfProductionIdx*, set);
gu_map_put(concr->leftcorner_tok_idx, tok, PgfProductionIdx*, set);
}
PgfCFCat cfc = {ccat, lin_idx};
@@ -2527,6 +2512,7 @@ pgf_parser_index_symbol(PgfConcr* concr, PgfSymbol sym,
case PGF_SYMBOL_CAT:
case PGF_SYMBOL_LIT:
case PGF_SYMBOL_NE:
case PGF_SYMBOL_BIND:
case PGF_SYMBOL_VAR:
// Nothing to be done here
break;

View File

@@ -111,7 +111,7 @@ pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_id
if (phrase->start == start &&
phrase->end == end &&
gu_string_eq(phrase->cat, cat) &&
strcmp(phrase->cat, cat) == 0 &&
phrase->lin_idx == lin_idx) {
state->matches++;
break;

View File

@@ -49,30 +49,28 @@ pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath,
GuPool* tmp_pool = gu_new_pool();
for (;;) {
char cat1_s[21];
char cat2_s[21];
char cat1[21];
char cat2[21];
prob_t prob;
if (fscanf(fp, "%20s\t%20s\t%f", cat1_s, cat2_s, &prob) < 3)
if (fscanf(fp, "%20s\t%20s\t%f", cat1, cat2, &prob) < 3)
break;
prob = - log(prob);
GuString cat1 = gu_str_string(cat1_s, tmp_pool);
PgfAbsCat* abscat1 =
gu_map_get(pgf->abstract.cats, &cat1, PgfAbsCat*);
gu_map_get(pgf->abstract.cats, cat1, PgfAbsCat*);
if (abscat1 == NULL) {
gu_raise(err, PgfExn);
goto close;
}
if (strcmp(cat2_s, "*") == 0) {
if (strcmp(cat2, "*") == 0) {
abscat1->meta_prob = prob;
} else if (strcmp(cat2_s, "_") == 0) {
} else if (strcmp(cat2, "_") == 0) {
abscat1->meta_token_prob = prob;
} else {
GuString cat2 = gu_str_string(cat2_s, tmp_pool);
PgfAbsCat* abscat2 = gu_map_get(pgf->abstract.cats, &cat2, PgfAbsCat*);
PgfAbsCat* abscat2 = gu_map_get(pgf->abstract.cats, cat2, PgfAbsCat*);
if (abscat2 == NULL) {
gu_raise(err, PgfExn);
goto close;
@@ -107,7 +105,7 @@ pgf_iter_languages(PgfPGF* pgf, GuMapItor* fn, GuExn* err)
PgfConcr*
pgf_get_language(PgfPGF* pgf, PgfCId lang)
{
return gu_map_get(pgf->concretes, &lang, PgfConcr*);
return gu_map_get(pgf->concretes, lang, PgfConcr*);
}
GuString
@@ -123,16 +121,13 @@ pgf_iter_categories(PgfPGF* pgf, GuMapItor* fn, GuExn* err)
}
PgfCId
pgf_start_cat(PgfPGF* pgf, GuPool* pool)
pgf_start_cat(PgfPGF* pgf)
{
GuPool* tmp_pool = gu_local_pool();
GuString s = gu_str_string("startcat", tmp_pool);
PgfLiteral lit =
gu_map_get(pgf->abstract.aflags, &s, PgfLiteral);
gu_map_get(pgf->abstract.aflags, "startcat", PgfLiteral);
if (gu_variant_is_null(lit))
return gu_str_string("S", pool);
return "S";
GuVariantInfo i = gu_variant_open(lit);
switch (i.tag) {
@@ -142,20 +137,17 @@ pgf_start_cat(PgfPGF* pgf, GuPool* pool)
}
}
return gu_str_string("S", pool);
return "S";
}
GuString
pgf_language_code(PgfConcr* concr)
{
GuPool* tmp_pool = gu_local_pool();
GuString s = gu_str_string("language", tmp_pool);
PgfLiteral lit =
gu_map_get(concr->cflags, &s, PgfLiteral);
gu_map_get(concr->cflags, "language", PgfLiteral);
if (gu_variant_is_null(lit))
return gu_empty_string;
return "";
GuVariantInfo i = gu_variant_open(lit);
switch (i.tag) {
@@ -165,7 +157,7 @@ pgf_language_code(PgfConcr* concr)
}
}
return gu_empty_string;
return "";
}
void
@@ -188,8 +180,8 @@ pgf_filter_by_cat(GuMapItor* fn, const void* key, void* value, GuExn* err)
PgfFunByCatIter* clo = (PgfFunByCatIter*) fn;
PgfAbsFun* absfun = *((PgfAbsFun**) value);
if (gu_string_eq(absfun->type->cid, clo->catname)) {
clo->client_fn->fn(clo->client_fn, &absfun->name, NULL, err);
if (strcmp(absfun->type->cid, clo->catname) == 0) {
clo->client_fn->fn(clo->client_fn, absfun->name, NULL, err);
}
}
@@ -205,10 +197,10 @@ PgfType*
pgf_function_type(PgfPGF* pgf, PgfCId funname)
{
PgfAbsFun* absfun =
gu_map_get(pgf->abstract.funs, &funname, PgfAbsFun*);
gu_map_get(pgf->abstract.funs, funname, PgfAbsFun*);
if (absfun == NULL)
return NULL;
return absfun->type;
}
@@ -216,8 +208,8 @@ GuString
pgf_print_name(PgfConcr* concr, PgfCId id)
{
PgfCId name =
gu_map_get(concr->printnames, &id, PgfCId);
if (gu_string_eq(name, gu_empty_string))
gu_map_get(concr->printnames, id, PgfCId);
if (*name == 0)
name = id;
return name;
}
@@ -226,7 +218,7 @@ void
pgf_linearize(PgfConcr* concr, PgfExpr expr, GuOut* out, GuExn* err)
{
GuPool* tmp_pool = gu_local_pool();
GuEnum* cts =
pgf_lzr_concretize(concr, expr, tmp_pool);
PgfCncTree ctree = gu_next(cts, PgfCncTree, tmp_pool);

View File

@@ -102,7 +102,7 @@ void
pgf_iter_categories(PgfPGF* pgf, GuMapItor* fn, GuExn* err);
PgfCId
pgf_start_cat(PgfPGF* pgf, GuPool* pool);
pgf_start_cat(PgfPGF* pgf);
void
pgf_iter_functions(PgfPGF* pgf, GuMapItor* fn, GuExn* err);

View File

@@ -13,7 +13,7 @@
#include <gu/exn.h>
#include <gu/utf8.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
//
// PgfReader
@@ -116,11 +116,21 @@ pgf_read_literal(PgfReader* rdr)
uint8_t tag = pgf_read_tag(rdr);
switch (tag) {
case PGF_LITERAL_STR: {
GuLength len = pgf_read_len(rdr);
uint8_t* buf = alloca(len*6+1);
uint8_t* p = buf;
for (size_t i = 0; i < len; i++) {
gu_in_utf8_buf(&p, rdr->in, rdr->err);
gu_return_on_exn(rdr->err, gu_null_variant);
}
*p++ = 0;
PgfLiteralStr *lit_str =
gu_new_variant(PGF_LITERAL_STR,
PgfLiteralStr,
&lit, rdr->opool);
lit_str->val = pgf_read_string(rdr);
gu_new_flex_variant(PGF_LITERAL_STR,
PgfLiteralStr,
val, p-buf,
&lit, rdr->opool);
strcpy((char*) lit_str->val, (char*) buf);
break;
}
case PGF_LITERAL_INT: {
@@ -160,7 +170,7 @@ pgf_read_flags(PgfReader* rdr)
PgfLiteral value = pgf_read_literal(rdr);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(flags, &name, PgfLiteral, value);
gu_map_put(flags, name, PgfLiteral, value);
}
return flags;
@@ -224,11 +234,16 @@ pgf_read_expr_(PgfReader* rdr)
break;
}
case PGF_EXPR_FUN: {
size_t len = pgf_read_len(rdr);
PgfExprFun *efun =
gu_new_variant(PGF_EXPR_FUN,
PgfExprFun,
&expr, rdr->opool);
efun->fun = pgf_read_cid(rdr, rdr->opool);
gu_new_flex_variant(PGF_EXPR_FUN,
PgfExprFun,
fun, len+1,
&expr, rdr->opool);
gu_in_bytes(rdr->in, (uint8_t*)efun->fun, len, rdr->err);
efun->fun[len] = 0;
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
@@ -403,7 +418,17 @@ pgf_read_absfun(PgfReader* rdr)
{
PgfAbsFun* absfun = gu_new(PgfAbsFun, rdr->opool);
absfun->name = pgf_read_cid(rdr, rdr->opool);
size_t len = pgf_read_len(rdr);
PgfExprFun *efun =
gu_new_flex_variant(PGF_EXPR_FUN,
PgfExprFun,
fun, len+1,
&absfun->ep.expr, rdr->opool);
gu_in_bytes(rdr->in, (uint8_t*)efun->fun, len, rdr->err);
efun->fun[len] = 0;
absfun->name = efun->fun;
gu_return_on_exn(rdr->err, NULL);
absfun->type = pgf_read_type_(rdr);
@@ -449,12 +474,6 @@ pgf_read_absfun(PgfReader* rdr)
absfun->ep.prob = - log(gu_in_f64be(rdr->in, rdr->err));
PgfExprFun* expr_fun =
gu_new_variant(PGF_EXPR_FUN,
PgfExprFun,
&absfun->ep.expr, rdr->opool);
expr_fun->fun = absfun->name;
return absfun;
}
@@ -474,7 +493,7 @@ pgf_read_absfuns(PgfReader* rdr)
PgfAbsFun* absfun = pgf_read_absfun(rdr);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(absfuns, &absfun->name, PgfAbsFun*, absfun);
gu_map_put(absfuns, absfun->name, PgfAbsFun*, absfun);
}
return absfuns;
@@ -514,7 +533,8 @@ pgf_read_abscat(PgfReader* rdr, PgfAbstr* abstr, PgfCIdMap* abscats)
gu_return_on_exn(rdr->err, NULL);
PgfAbsFun* absfun =
gu_map_get(abstr->funs, &name, PgfAbsFun*);
gu_map_get(abstr->funs, name, PgfAbsFun*);
assert(absfun != NULL);
gu_buf_push(functions, PgfAbsFun*, absfun);
}
@@ -539,7 +559,7 @@ pgf_read_abscats(PgfReader* rdr, PgfAbstr* abstr)
PgfAbsCat* abscat = pgf_read_abscat(rdr, abstr, abscats);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(abscats, &abscat->name, PgfAbsCat*, abscat);
gu_map_put(abscats, abscat->name, PgfAbsCat*, abscat);
}
return abscats;
@@ -567,7 +587,7 @@ pgf_read_printnames(PgfReader* rdr)
GuMapType* map_type = (GuMapType*)
GU_TYPE_LIT(GuStringMap, _,
gu_type(GuString),
&gu_empty_string);
&"");
PgfCIdMap* printnames = gu_map_type_make(map_type, rdr->opool);
size_t len = pgf_read_len(rdr);
@@ -580,7 +600,7 @@ pgf_read_printnames(PgfReader* rdr)
GuString printname = pgf_read_string(rdr);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(printnames, &name, GuString, printname);
gu_map_put(printnames, name, GuString, printname);
}
return printnames;
@@ -654,12 +674,21 @@ pgf_read_symbol(PgfReader* rdr)
break;
}
case PGF_SYMBOL_KS: {
GuLength len = pgf_read_len(rdr);
uint8_t* buf = alloca(len*6+1);
uint8_t* p = buf;
for (size_t i = 0; i < len; i++) {
gu_in_utf8_buf(&p, rdr->in, rdr->err);
gu_return_on_exn(rdr->err, gu_null_variant);
}
*p++ = 0;
PgfSymbolKS *sym_ks =
gu_new_variant(PGF_SYMBOL_KS,
PgfSymbolKS,
&sym, rdr->opool);
sym_ks->token = pgf_read_string(rdr);
gu_return_on_exn(rdr->err, gu_null_variant);
gu_new_flex_variant(PGF_SYMBOL_KS,
PgfSymbolKS,
token, p-buf,
&sym, rdr->opool);
strcpy((char*) sym_ks->token, (char*) buf);
break;
}
case PGF_SYMBOL_KP: {
@@ -747,7 +776,7 @@ pgf_read_cncfun(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, int funid)
gu_return_on_exn(rdr->err, NULL);
PgfAbsFun* absfun =
gu_map_get(abstr->funs, &name, PgfAbsFun*);
gu_map_get(abstr->funs, name, PgfAbsFun*);
PgfCncFun* cncfun = gu_new_flex(rdr->opool, PgfCncFun, lins, len);
cncfun->absfun = absfun;
@@ -956,7 +985,7 @@ pgf_read_cnccat(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, PgfCId name)
gu_malloc(rdr->opool, sizeof(PgfCncCat)+n_lins*sizeof(GuString));
cnccat->abscat =
gu_map_get(abstr->cats, &name, PgfAbsCat*);
gu_map_get(abstr->cats, name, PgfAbsCat*);
gu_assert(cnccat->abscat != NULL);
int len = last + 1 - first;
@@ -1011,7 +1040,7 @@ pgf_read_cnccats(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr)
pgf_read_cnccat(rdr, abstr, concr, name);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(cnccats, &name, PgfCncCat*, cnccat);
gu_map_put(cnccats, name, PgfCncCat*, cnccat);
}
return cnccats;
@@ -1100,7 +1129,7 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr)
pgf_read_cid(rdr, rdr->opool);
gu_return_on_exn(rdr->err, NULL);
concr->cflags =
concr->cflags =
pgf_read_flags(rdr);
gu_return_on_exn(rdr->err, NULL);
@@ -1150,7 +1179,7 @@ pgf_read_concretes(PgfReader* rdr, PgfAbstr* abstr)
PgfConcr* concr = pgf_read_concrete(rdr, abstr);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(concretes, &concr->name, PgfConcr*, concr);
gu_map_put(concretes, concr->name, PgfConcr*, concr);
}
return concretes;

View File

@@ -230,14 +230,14 @@ pgf_reasoner_try_first(PgfReasoner* rs, PgfExprState* parent, PgfAbsFun* absfun)
{
PgfCId cat = absfun->type->cid;
PgfAnswers* answers = gu_map_get(rs->table, &cat, PgfAnswers*);
PgfAnswers* answers = gu_map_get(rs->table, cat, PgfAnswers*);
if (answers == NULL) {
answers = gu_new(PgfAnswers, rs->tmp_pool);
answers->parents = gu_new_buf(PgfExprState*, rs->tmp_pool);
answers->exprs = gu_new_buf(PgfExprProb*, rs->tmp_pool);
answers->outside_prob = parent->base.prob;
gu_map_put(rs->table, &cat, PgfAnswers*, answers);
gu_map_put(rs->table, cat, PgfAnswers*, answers);
}
gu_buf_push(answers->parents, PgfExprState*, parent);
@@ -397,9 +397,9 @@ pgf_generate_all(PgfPGF* pgf, PgfCId cat, GuPool* pool)
answers->parents = gu_new_buf(PgfExprState*, rs->tmp_pool);
answers->exprs = rs->exprs;
answers->outside_prob = 0;
gu_map_put(rs->table, &cat, PgfAnswers*, answers);
gu_map_put(rs->table, cat, PgfAnswers*, answers);
PgfAbsCat* abscat = gu_map_get(rs->abstract->cats, &cat, PgfAbsCat*);
PgfAbsCat* abscat = gu_map_get(rs->abstract->cats, cat, PgfAbsCat*);
if (abscat != NULL) {
((PgfPredicate) abscat->predicate)(rs, NULL);
}