forked from GitHub/gf-core
added all orthographic primitives
This commit is contained in:
@@ -13,7 +13,7 @@ typedef struct {
|
||||
size_t n_matches;
|
||||
GuExn* err;
|
||||
bool bind;
|
||||
bool capit;
|
||||
PgfCapitState capit;
|
||||
GuPool* out_pool;
|
||||
GuPool* tmp_pool;
|
||||
} PgfAlignerLin;
|
||||
@@ -107,18 +107,38 @@ pgf_aligner_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||
gu_buf_flush(alin->parent_current);
|
||||
|
||||
pgf_aligner_push_parent(alin, fid);
|
||||
|
||||
if (alin->capit == PGF_CAPIT_NEXT)
|
||||
alin->capit = PGF_CAPIT_NONE;
|
||||
}
|
||||
|
||||
GuOut* out = gu_string_buf_out(alin->sbuf);
|
||||
|
||||
if (alin->capit) {
|
||||
switch (alin->capit) {
|
||||
case PGF_CAPIT_NONE:
|
||||
gu_string_write(tok, out, alin->err);
|
||||
break;
|
||||
case PGF_CAPIT_FIRST: {
|
||||
GuUCS c = gu_utf8_decode((const uint8_t**) &tok);
|
||||
c = gu_ucs_to_upper(c);
|
||||
gu_out_utf8(c, out, alin->err);
|
||||
alin->capit = false;
|
||||
gu_string_write(tok, out, alin->err);
|
||||
alin->capit = PGF_CAPIT_NONE;
|
||||
break;
|
||||
}
|
||||
case PGF_CAPIT_ALL:
|
||||
alin->capit = PGF_CAPIT_NEXT;
|
||||
// continue
|
||||
case PGF_CAPIT_NEXT: {
|
||||
const uint8_t* p = (uint8_t*) tok;
|
||||
while (*p) {
|
||||
GuUCS c = gu_utf8_decode(&p);
|
||||
c = gu_ucs_to_upper(c);
|
||||
gu_out_utf8(c, out, alin->err);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
gu_string_write(tok, out, alin->err);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -150,10 +170,10 @@ pgf_aligner_lzn_symbol_bind(PgfLinFuncs** funcs)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_aligner_lzn_symbol_capit(PgfLinFuncs** funcs)
|
||||
pgf_aligner_lzn_symbol_capit(PgfLinFuncs** funcs, PgfCapitState capit)
|
||||
{
|
||||
PgfAlignerLin* alin = gu_container(funcs, PgfAlignerLin, funcs);
|
||||
alin->capit = true;
|
||||
alin->capit = capit;
|
||||
}
|
||||
|
||||
static PgfLinFuncs pgf_file_lin_funcs = {
|
||||
@@ -194,7 +214,7 @@ pgf_align_words(PgfConcr* concr, PgfExpr expr,
|
||||
.n_matches = 0,
|
||||
.err = err,
|
||||
.bind = true,
|
||||
.capit = false,
|
||||
.capit = PGF_CAPIT_NONE,
|
||||
.out_pool = pool,
|
||||
.tmp_pool = tmp_pool
|
||||
};
|
||||
|
||||
@@ -209,7 +209,9 @@ typedef enum {
|
||||
PGF_SYMBOL_KP,
|
||||
PGF_SYMBOL_BIND,
|
||||
PGF_SYMBOL_SOFT_BIND,
|
||||
PGF_SYMBOL_SOFT_SPACE,
|
||||
PGF_SYMBOL_CAPIT,
|
||||
PGF_SYMBOL_ALL_CAPIT,
|
||||
PGF_SYMBOL_NE
|
||||
} PgfSymbolTag;
|
||||
|
||||
|
||||
@@ -652,6 +652,7 @@ typedef enum {
|
||||
PGF_CACHED_END,
|
||||
PGF_CACHED_BIND,
|
||||
PGF_CACHED_CAPIT,
|
||||
PGF_CACHED_ALL_CAPIT,
|
||||
PGF_CACHED_NE
|
||||
} PgfLzrCachedTag;
|
||||
|
||||
@@ -718,7 +719,12 @@ pgf_lzr_cache_flush(PgfLzrCache* cache, PgfSymbols* form)
|
||||
break;
|
||||
case PGF_CACHED_CAPIT:
|
||||
if ((*cache->lzr->funcs)->symbol_capit) {
|
||||
(*cache->lzr->funcs)->symbol_capit(cache->lzr->funcs);
|
||||
(*cache->lzr->funcs)->symbol_capit(cache->lzr->funcs, PGF_CAPIT_FIRST);
|
||||
}
|
||||
break;
|
||||
case PGF_CACHED_ALL_CAPIT:
|
||||
if ((*cache->lzr->funcs)->symbol_capit) {
|
||||
(*cache->lzr->funcs)->symbol_capit(cache->lzr->funcs, PGF_CAPIT_ALL);
|
||||
}
|
||||
break;
|
||||
case PGF_CACHED_NE:
|
||||
@@ -797,11 +803,11 @@ pgf_lzr_cache_symbol_bind(PgfLinFuncs** funcs)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_lzr_cache_symbol_capit(PgfLinFuncs** funcs)
|
||||
pgf_lzr_cache_symbol_capit(PgfLinFuncs** funcs, PgfCapitState capit)
|
||||
{
|
||||
PgfLzrCache* cache = gu_container(funcs, PgfLzrCache, funcs);
|
||||
PgfLzrCached* event = gu_buf_extend(cache->events);
|
||||
event->tag = PGF_CACHED_CAPIT;
|
||||
event->tag = (capit == PGF_CAPIT_ALL) ? PGF_CACHED_ALL_CAPIT : PGF_CACHED_CAPIT;
|
||||
}
|
||||
|
||||
static PgfLinFuncs pgf_lzr_cache_funcs = {
|
||||
@@ -921,9 +927,18 @@ pgf_lzr_linearize_symbols(PgfLzr* lzr, PgfCncTreeApp* fapp,
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_SOFT_SPACE: {
|
||||
// SOFT_SPACE should be just ignored in linearization
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAPIT:
|
||||
if ((*lzr->funcs)->symbol_capit) {
|
||||
(*lzr->funcs)->symbol_capit(lzr->funcs);
|
||||
(*lzr->funcs)->symbol_capit(lzr->funcs, PGF_CAPIT_FIRST);
|
||||
}
|
||||
break;
|
||||
case PGF_SYMBOL_ALL_CAPIT:
|
||||
if ((*lzr->funcs)->symbol_capit) {
|
||||
(*lzr->funcs)->symbol_capit(lzr->funcs, PGF_CAPIT_ALL);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@@ -1045,20 +1060,11 @@ typedef struct PgfSimpleLin PgfSimpleLin;
|
||||
struct PgfSimpleLin {
|
||||
PgfLinFuncs* funcs;
|
||||
bool bind;
|
||||
bool capit;
|
||||
PgfCapitState capit;
|
||||
GuOut* out;
|
||||
GuExn* err;
|
||||
};
|
||||
|
||||
static void
|
||||
pgf_file_lzn_put_space(PgfSimpleLin* flin)
|
||||
{
|
||||
if (flin->bind)
|
||||
flin->bind = false;
|
||||
else
|
||||
gu_putc(' ', flin->out, flin->err);
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_file_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||
{
|
||||
@@ -1067,16 +1073,39 @@ pgf_file_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||
return;
|
||||
}
|
||||
|
||||
pgf_file_lzn_put_space(flin);
|
||||
if (flin->bind)
|
||||
flin->bind = false;
|
||||
else {
|
||||
gu_putc(' ', flin->out, flin->err);
|
||||
if (flin->capit == PGF_CAPIT_NEXT)
|
||||
flin->capit = PGF_CAPIT_NONE;
|
||||
}
|
||||
|
||||
if (flin->capit) {
|
||||
switch (flin->capit) {
|
||||
case PGF_CAPIT_NONE:
|
||||
gu_string_write(tok, flin->out, flin->err);
|
||||
break;
|
||||
case PGF_CAPIT_FIRST: {
|
||||
GuUCS c = gu_utf8_decode((const uint8_t**) &tok);
|
||||
c = gu_ucs_to_upper(c);
|
||||
gu_out_utf8(c, flin->out, flin->err);
|
||||
flin->capit = false;
|
||||
gu_string_write(tok, flin->out, flin->err);
|
||||
flin->capit = PGF_CAPIT_NONE;
|
||||
break;
|
||||
}
|
||||
case PGF_CAPIT_ALL:
|
||||
flin->capit = PGF_CAPIT_NEXT;
|
||||
// continue
|
||||
case PGF_CAPIT_NEXT: {
|
||||
const uint8_t* p = (uint8_t*) tok;
|
||||
while (*p) {
|
||||
GuUCS c = gu_utf8_decode(&p);
|
||||
c = gu_ucs_to_upper(c);
|
||||
gu_out_utf8(c, flin->out, flin->err);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
gu_string_write(tok, flin->out, flin->err);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1094,10 +1123,10 @@ pgf_file_lzn_symbol_bind(PgfLinFuncs** funcs)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_file_lzn_symbol_capit(PgfLinFuncs** funcs)
|
||||
pgf_file_lzn_symbol_capit(PgfLinFuncs** funcs, PgfCapitState capit)
|
||||
{
|
||||
PgfSimpleLin* flin = gu_container(funcs, PgfSimpleLin, funcs);
|
||||
flin->capit = true;
|
||||
flin->capit = capit;
|
||||
}
|
||||
|
||||
static PgfLinFuncs pgf_file_lin_funcs = {
|
||||
@@ -1117,7 +1146,7 @@ pgf_lzr_linearize_simple(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx,
|
||||
PgfSimpleLin flin = {
|
||||
.funcs = &pgf_file_lin_funcs,
|
||||
.bind = true,
|
||||
.capit = false,
|
||||
.capit = PGF_CAPIT_NONE,
|
||||
.out = out,
|
||||
.err = err
|
||||
};
|
||||
|
||||
@@ -38,6 +38,13 @@ pgf_lzr_wrap_linref(PgfCncTree ctree, GuPool* pool);
|
||||
|
||||
typedef struct PgfLinFuncs PgfLinFuncs;
|
||||
|
||||
typedef enum {
|
||||
PGF_CAPIT_NONE,
|
||||
PGF_CAPIT_FIRST,
|
||||
PGF_CAPIT_ALL,
|
||||
PGF_CAPIT_NEXT
|
||||
} PgfCapitState;
|
||||
|
||||
struct PgfLinFuncs
|
||||
{
|
||||
/// Output tokens
|
||||
@@ -56,7 +63,7 @@ struct PgfLinFuncs
|
||||
void (*symbol_bind)(PgfLinFuncs** self);
|
||||
|
||||
/// capitalization
|
||||
void (*symbol_capit)(PgfLinFuncs** self);
|
||||
void (*symbol_capit)(PgfLinFuncs** self, PgfCapitState capit);
|
||||
};
|
||||
|
||||
/// Linearize a concrete syntax tree.
|
||||
|
||||
@@ -134,8 +134,10 @@ pgf_prev_extern_sym(PgfSymbol sym)
|
||||
return *((PgfSymbol*) (((PgfSymbolVar*) i.data)+1));
|
||||
case PGF_SYMBOL_BIND:
|
||||
case PGF_SYMBOL_SOFT_BIND:
|
||||
case PGF_SYMBOL_SOFT_SPACE:
|
||||
return *((PgfSymbol*) (((PgfSymbolBIND*) i.data)+1));
|
||||
case PGF_SYMBOL_CAPIT:
|
||||
case PGF_SYMBOL_ALL_CAPIT:
|
||||
return *((PgfSymbol*) (((PgfSymbolCAPIT*) i.data)+1));
|
||||
case PGF_SYMBOL_NE:
|
||||
return *((PgfSymbol*) (((PgfSymbolNE*) i.data)+1));
|
||||
@@ -768,7 +770,6 @@ pgf_item_update_arg(PgfItem* item, size_t d, PgfCCat *new_ccat,
|
||||
static void
|
||||
pgf_item_advance(PgfItem* item, GuPool* pool)
|
||||
{
|
||||
|
||||
if (GU_LIKELY(item->alt == 0)) {
|
||||
item->sym_idx++;
|
||||
pgf_item_set_curr_symbol(item, pool);
|
||||
@@ -1063,7 +1064,11 @@ pgf_symbols_cmp(GuString* psent, BIND_TYPE* pbind, PgfSymbols* syms)
|
||||
*pbind = BIND_SOFT;
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAPIT: {
|
||||
case PGF_SYMBOL_SOFT_SPACE: {
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAPIT:
|
||||
case PGF_SYMBOL_ALL_CAPIT: {
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_NE: {
|
||||
@@ -1541,7 +1546,8 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_SOFT_BIND: {
|
||||
case PGF_SYMBOL_SOFT_BIND:
|
||||
case PGF_SYMBOL_SOFT_SPACE: {
|
||||
if (ps->before->start_offset == ps->before->end_offset) {
|
||||
if (ps->before->needs_bind) {
|
||||
PgfParseState* state =
|
||||
@@ -1562,7 +1568,8 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAPIT: {
|
||||
case PGF_SYMBOL_CAPIT:
|
||||
case PGF_SYMBOL_ALL_CAPIT: {
|
||||
pgf_item_advance(item, ps->pool);
|
||||
pgf_parsing_symbol(ps, item, item->curr_sym);
|
||||
break;
|
||||
|
||||
@@ -276,10 +276,18 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err)
|
||||
gu_puts("SOFT_BIND", out, err);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_SOFT_SPACE: {
|
||||
gu_puts("SOFT_SPACE", out, err);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAPIT: {
|
||||
gu_puts("CAPIT", out, err);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_ALL_CAPIT: {
|
||||
gu_puts("ALL_CAPIT", out, err);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
|
||||
@@ -708,6 +708,13 @@ pgf_read_symbol(PgfReader* rdr)
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_SOFT_SPACE: {
|
||||
gu_new_variant(PGF_SYMBOL_SOFT_SPACE,
|
||||
PgfSymbolBIND,
|
||||
&sym, rdr->opool);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAPIT: {
|
||||
gu_new_variant(PGF_SYMBOL_CAPIT,
|
||||
PgfSymbolCAPIT,
|
||||
@@ -715,6 +722,13 @@ pgf_read_symbol(PgfReader* rdr)
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_ALL_CAPIT: {
|
||||
gu_new_variant(PGF_SYMBOL_ALL_CAPIT,
|
||||
PgfSymbolCAPIT,
|
||||
&sym, rdr->opool);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
pgf_read_tag_error(rdr);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user