forked from GitHub/gf-core
a major refactoring in the C and the Haskell runtimes. Note incompatible change in the PGF format!!!
The following are the outcomes:
- Predef.nonExist is fully supported by both the Haskell and the C runtimes
- Predef.BIND is now an internal compiler defined token. For now
it behaves just as usual for the Haskell runtime, i.e. it generates &+.
However, the special treatment will let us to handle it properly in
the C runtime.
- This required a major change in the PGF format since both
nonExist and BIND may appear inside 'pre' and this was not supported
before.
This commit is contained in:
@@ -133,8 +133,8 @@ struct PgfItem {
|
||||
PgfPArgs* args;
|
||||
PgfSymbol curr_sym;
|
||||
uint16_t seq_idx;
|
||||
uint8_t tok_idx;
|
||||
uint8_t alt;
|
||||
uint8_t alt_idx; // position in the pre alternative
|
||||
uint8_t alt; // the number of the alternative
|
||||
prob_t inside_prob;
|
||||
};
|
||||
|
||||
@@ -694,7 +694,7 @@ pgf_new_item(PgfItemConts* conts, PgfProduction prod,
|
||||
item->prod = prod;
|
||||
item->curr_sym = gu_null_variant;
|
||||
item->seq_idx = 0;
|
||||
item->tok_idx = 0;
|
||||
item->alt_idx = 0;
|
||||
item->alt = 0;
|
||||
|
||||
conts->ref_count++;
|
||||
@@ -758,8 +758,12 @@ pgf_item_update_arg(PgfItem* item, size_t d, PgfCCat *new_ccat,
|
||||
static void
|
||||
pgf_item_advance(PgfItem* item, GuPool* pool)
|
||||
{
|
||||
item->seq_idx++;
|
||||
pgf_item_set_curr_symbol(item, pool);
|
||||
if (GU_LIKELY(item->alt == 0)) {
|
||||
item->seq_idx++;
|
||||
pgf_item_set_curr_symbol(item, pool);
|
||||
}
|
||||
else
|
||||
item->alt_idx++;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1133,8 +1137,7 @@ pgf_parsing_meta_scan(PgfParseState* before, PgfParseState* after,
|
||||
gu_alignof(PgfSymbolKS),
|
||||
&item->curr_sym, after->ps->pool);
|
||||
*((PgfSymbol*)(sks+1)) = prev;
|
||||
sks->tokens = gu_new_seq(PgfToken, 1, after->ps->pool);
|
||||
gu_seq_set(sks->tokens, PgfToken, 0, tok);
|
||||
sks->token = tok;
|
||||
|
||||
gu_buf_heap_push(before->agenda, &pgf_item_prob_order, &item);
|
||||
}
|
||||
@@ -1218,76 +1221,54 @@ pgf_parsing_symbol(PgfParseState* before, PgfParseState* after,
|
||||
case PGF_SYMBOL_KS: {
|
||||
if (after != NULL) {
|
||||
PgfSymbolKS* sks = gu_variant_data(sym);
|
||||
gu_assert(item->tok_idx < gu_seq_length(sks->tokens));
|
||||
PgfToken tok =
|
||||
gu_seq_get(sks->tokens, PgfToken, item->tok_idx++);
|
||||
if (item->tok_idx == gu_seq_length(sks->tokens)) {
|
||||
item->tok_idx = 0;
|
||||
pgf_item_advance(item, after->ps->pool);
|
||||
}
|
||||
pgf_parsing_add_transition(before, after, tok, item);
|
||||
pgf_item_advance(item, after->ps->pool);
|
||||
pgf_parsing_add_transition(before, after, sks->token, item);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KP: {
|
||||
if (after != NULL) {
|
||||
PgfSymbolKP* skp = gu_variant_data(sym);
|
||||
size_t idx = item->tok_idx;
|
||||
uint8_t alt = item->alt;
|
||||
gu_assert(idx < gu_seq_length(skp->default_form));
|
||||
if (idx == 0) {
|
||||
PgfToken tok;
|
||||
|
||||
PgfSymbol sym;
|
||||
if (item->alt == 0) {
|
||||
PgfItem* new_item;
|
||||
|
||||
tok = gu_seq_get(skp->default_form, PgfToken, 0);
|
||||
|
||||
new_item = pgf_item_copy(item, after->ps->pool, after->ps);
|
||||
new_item->tok_idx++;
|
||||
if (new_item->tok_idx == gu_seq_length(skp->default_form)) {
|
||||
new_item->tok_idx = 0;
|
||||
pgf_item_advance(new_item, after->ps->pool);
|
||||
}
|
||||
pgf_parsing_add_transition(before, after, tok, new_item);
|
||||
new_item->alt = 1;
|
||||
new_item->alt_idx = 0;
|
||||
sym = gu_seq_get(skp->default_form, PgfSymbol, new_item->alt_idx);
|
||||
pgf_parsing_symbol(before, after, new_item, sym);
|
||||
|
||||
for (size_t i = 0; i < skp->n_forms; i++) {
|
||||
// XXX: do nubbing properly
|
||||
PgfTokens* toks = skp->forms[i].form;
|
||||
PgfTokens* toks2 = skp->default_form;
|
||||
bool skip = pgf_tokens_equal(toks, toks2);
|
||||
PgfSequence* syms = skp->forms[i].form;
|
||||
PgfSequence* syms2 = skp->default_form;
|
||||
bool skip = false; /*pgf_tokens_equal(toks, toks2);
|
||||
for (size_t j = 0; j < i; j++) {
|
||||
PgfTokens* toks2 = skp->forms[j].form;
|
||||
skip |= pgf_tokens_equal(toks, toks2);
|
||||
}
|
||||
}*/
|
||||
if (!skip) {
|
||||
tok = gu_seq_get(toks, PgfToken, 0);
|
||||
new_item = pgf_item_copy(item, after->ps->pool, after->ps);
|
||||
new_item->tok_idx++;
|
||||
new_item->alt = i;
|
||||
if (new_item->tok_idx == gu_seq_length(toks)) {
|
||||
new_item->tok_idx = 0;
|
||||
pgf_item_advance(new_item, after->ps->pool);
|
||||
}
|
||||
pgf_parsing_add_transition(before, after, tok, new_item);
|
||||
new_item->alt = i+2;
|
||||
new_item->alt_idx = 0;
|
||||
sym = gu_seq_get(syms, PgfSymbol, new_item->alt_idx);
|
||||
pgf_parsing_symbol(before, after, new_item, sym);
|
||||
}
|
||||
}
|
||||
} else if (alt == 0) {
|
||||
PgfToken tok =
|
||||
gu_seq_get(skp->default_form, PgfToken, idx);
|
||||
item->tok_idx++;
|
||||
if (item->tok_idx == gu_seq_length(skp->default_form)) {
|
||||
item->tok_idx = 0;
|
||||
pgf_item_advance(item, after->ps->pool);
|
||||
}
|
||||
pgf_parsing_add_transition(before, after, tok, item);
|
||||
} else {
|
||||
gu_assert(alt <= skp->n_forms);
|
||||
PgfTokens* toks = skp->forms[alt - 1].form;
|
||||
PgfToken tok = gu_seq_get(toks, PgfToken, idx);
|
||||
item->tok_idx++;
|
||||
if (item->tok_idx == gu_seq_length(toks)) {
|
||||
item->tok_idx = 0;
|
||||
PgfSequence* syms =
|
||||
(item->alt == 1) ? skp->default_form :
|
||||
skp->forms[item->alt-2].form;
|
||||
|
||||
if (item->alt_idx < gu_seq_length(syms)) {
|
||||
sym = gu_seq_get(syms, PgfSymbol, item->alt_idx);
|
||||
pgf_parsing_symbol(before, after, item, sym);
|
||||
} else {
|
||||
item->alt = 0;
|
||||
pgf_item_advance(item, after->ps->pool);
|
||||
gu_buf_heap_push(before->agenda, &pgf_item_prob_order, &item);
|
||||
}
|
||||
pgf_parsing_add_transition(before, after, tok, item);
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -1357,7 +1338,7 @@ pgf_parsing_symbol(PgfParseState* before, PgfParseState* after,
|
||||
// XXX TODO proper support
|
||||
break;
|
||||
case PGF_SYMBOL_NE: {
|
||||
// Nothing to be done here
|
||||
pgf_item_free(before, after, item);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -1450,8 +1431,7 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
|
||||
gu_alignof(PgfSymbolKS),
|
||||
&item->curr_sym, after->ps->pool);
|
||||
*((PgfSymbol*)(sks+1)) = prev;
|
||||
sks->tokens = gu_new_seq(PgfToken, 1, after->ps->pool);
|
||||
gu_seq_set(sks->tokens, PgfToken, 0, tok);
|
||||
sks->token = tok;
|
||||
|
||||
item->seq_idx++;
|
||||
pgf_parsing_add_transition(before, after, tok, item);
|
||||
@@ -1755,9 +1735,7 @@ typedef struct {
|
||||
} PgfPrefixTokenState;
|
||||
|
||||
static GuString
|
||||
pgf_get_tokens(PgfSequence* seq,
|
||||
uint16_t seq_idx, uint8_t tok_idx,
|
||||
GuPool* pool)
|
||||
pgf_get_tokens(PgfSequence* seq, uint16_t seq_idx, GuPool* pool)
|
||||
{
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
|
||||
@@ -1773,17 +1751,7 @@ pgf_get_tokens(PgfSequence* seq,
|
||||
switch (i.tag) {
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* symks = i.data;
|
||||
size_t len = gu_seq_length(symks->tokens);
|
||||
for (size_t i = tok_idx; i < len; i++) {
|
||||
if (i > 0) {
|
||||
gu_putc(' ', out, err);
|
||||
}
|
||||
|
||||
PgfToken tok = gu_seq_get(symks->tokens, PgfToken, i);
|
||||
gu_string_write(tok, out, err);
|
||||
}
|
||||
|
||||
tok_idx = 0;
|
||||
gu_string_write(symks->token, out, err);
|
||||
}
|
||||
default:
|
||||
goto end;
|
||||
@@ -1809,18 +1777,9 @@ pgf_prefix_match_token(PgfTokenState* ts0, PgfToken tok, PgfItem* item)
|
||||
PgfSequence* seq;
|
||||
pgf_item_sequence(item, &lin_idx, &seq, ts->pool);
|
||||
|
||||
uint16_t seq_idx = item->seq_idx;
|
||||
uint8_t tok_idx = item->tok_idx;
|
||||
|
||||
// go one token back
|
||||
if (tok_idx > 0)
|
||||
tok_idx--;
|
||||
else
|
||||
seq_idx--;
|
||||
|
||||
ts->tp = gu_new(PgfTokenProb, ts->pool);
|
||||
ts->tp->tok =
|
||||
pgf_get_tokens(seq, seq_idx, tok_idx, ts->pool);
|
||||
pgf_get_tokens(seq, item->seq_idx-1, ts->pool);
|
||||
ts->tp->cat = item->conts->ccat->cnccat->abscat->name;
|
||||
ts->tp->prob = item->inside_prob+item->conts->outside_prob;
|
||||
}
|
||||
@@ -2346,17 +2305,15 @@ pgf_morpho_iter(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
switch (i.tag) {
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* symks = i.data;
|
||||
size_t len = gu_seq_length(symks->tokens);
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
if (pos >= gu_seq_length(clo->tokens))
|
||||
goto cont;
|
||||
|
||||
if (pos >= gu_seq_length(clo->tokens))
|
||||
goto cont;
|
||||
|
||||
PgfToken tok1 = gu_seq_get(symks->tokens, PgfToken, i);
|
||||
PgfToken tok2 = gu_seq_get(clo->tokens, PgfToken, pos++);
|
||||
|
||||
if (!gu_string_eq(tok1, tok2))
|
||||
goto cont;
|
||||
}
|
||||
PgfToken tok1 = symks->token;
|
||||
PgfToken tok2 = gu_seq_get(clo->tokens, PgfToken, pos++);
|
||||
|
||||
if (!gu_string_eq(tok1, tok2))
|
||||
goto cont;
|
||||
}
|
||||
default:
|
||||
continue;
|
||||
@@ -2443,7 +2400,7 @@ pgf_fullform_iter(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
PgfProductionApply* papp = i.data;
|
||||
|
||||
PgfSequence* seq = papp->fun->lins[cfc.lin_idx];
|
||||
GuString tokens = pgf_get_tokens(seq, 0, 0, st->pool);
|
||||
GuString tokens = pgf_get_tokens(seq, 0, st->pool);
|
||||
|
||||
// create a new production index with keys that
|
||||
// are multiword units
|
||||
@@ -2531,12 +2488,10 @@ pgf_fullform_get_analyses(PgfFullFormEntry* entry,
|
||||
|
||||
static void
|
||||
pgf_parser_index_token(PgfConcr* concr,
|
||||
PgfTokens* tokens,
|
||||
PgfToken tok,
|
||||
PgfCCat* ccat, size_t lin_idx, PgfProduction prod,
|
||||
GuPool *pool)
|
||||
{
|
||||
PgfToken tok = gu_seq_get(tokens, PgfToken, 0);
|
||||
|
||||
PgfProductionIdx* set =
|
||||
gu_map_get(concr->leftcorner_tok_idx, &tok, PgfProductionIdx*);
|
||||
if (set == NULL) {
|
||||
@@ -2570,6 +2525,47 @@ pgf_parser_index_epsilon(PgfConcr* concr,
|
||||
gu_buf_push(prods, PgfProduction, prod);
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_parser_index_symbol(PgfConcr* concr, PgfSymbol sym,
|
||||
PgfCCat* ccat, size_t lin_idx, PgfProduction prod,
|
||||
GuPool *pool)
|
||||
{
|
||||
GuVariantInfo i = gu_variant_open(sym);
|
||||
switch (i.tag) {
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* sks = i.data;
|
||||
pgf_parser_index_token(concr,
|
||||
sks->token,
|
||||
ccat, lin_idx, prod,
|
||||
pool);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KP: {
|
||||
PgfSymbolKP* skp = i.data;
|
||||
PgfSymbol sym =
|
||||
gu_seq_get(skp->default_form, PgfSymbol, 0);
|
||||
pgf_parser_index_symbol(concr, sym,
|
||||
ccat, lin_idx, prod,
|
||||
pool);
|
||||
for (size_t i = 0; i < skp->n_forms; i++) {
|
||||
sym = gu_seq_get(skp->forms[i].form, PgfSymbol, 0);
|
||||
pgf_parser_index_symbol(concr, sym,
|
||||
ccat, lin_idx, prod,
|
||||
pool);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAT:
|
||||
case PGF_SYMBOL_LIT:
|
||||
case PGF_SYMBOL_NE:
|
||||
case PGF_SYMBOL_VAR:
|
||||
// Nothing to be done here
|
||||
break;
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pgf_parser_index(PgfConcr* concr,
|
||||
PgfCCat* ccat, PgfProduction prod,
|
||||
@@ -2586,39 +2582,9 @@ pgf_parser_index(PgfConcr* concr,
|
||||
|
||||
PgfSequence* seq = papp->fun->lins[lin_idx];
|
||||
if (gu_seq_length(seq) > 0) {
|
||||
GuVariantInfo i = gu_variant_open(gu_seq_get(seq, PgfSymbol, 0));
|
||||
switch (i.tag) {
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* sks = i.data;
|
||||
pgf_parser_index_token(concr,
|
||||
sks->tokens,
|
||||
ccat, lin_idx, prod,
|
||||
pool);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KP: {
|
||||
PgfSymbolKP* skp = i.data;
|
||||
pgf_parser_index_token(concr,
|
||||
skp->default_form,
|
||||
ccat, lin_idx, prod,
|
||||
pool);
|
||||
for (size_t i = 0; i < skp->n_forms; i++) {
|
||||
pgf_parser_index_token(concr,
|
||||
skp->forms[i].form,
|
||||
ccat, lin_idx, prod,
|
||||
pool);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAT:
|
||||
case PGF_SYMBOL_LIT:
|
||||
case PGF_SYMBOL_NE:
|
||||
case PGF_SYMBOL_VAR:
|
||||
// Nothing to be done here
|
||||
break;
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
pgf_parser_index_symbol(concr, gu_seq_get(seq, PgfSymbol, 0),
|
||||
ccat, lin_idx, prod,
|
||||
pool);
|
||||
} else {
|
||||
pgf_parser_index_epsilon(concr,
|
||||
ccat, lin_idx, prod,
|
||||
|
||||
Reference in New Issue
Block a user