added all orthographic primitives

This commit is contained in:
krasimir
2015-05-11 13:01:39 +00:00
parent 13998e3287
commit 1e0d7be4f4
23 changed files with 180 additions and 52 deletions

View File

@@ -13,7 +13,7 @@ typedef struct {
size_t n_matches;
GuExn* err;
bool bind;
bool capit;
PgfCapitState capit;
GuPool* out_pool;
GuPool* tmp_pool;
} PgfAlignerLin;
@@ -107,18 +107,38 @@ pgf_aligner_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
gu_buf_flush(alin->parent_current);
pgf_aligner_push_parent(alin, fid);
if (alin->capit == PGF_CAPIT_NEXT)
alin->capit = PGF_CAPIT_NONE;
}
GuOut* out = gu_string_buf_out(alin->sbuf);
if (alin->capit) {
switch (alin->capit) {
case PGF_CAPIT_NONE:
gu_string_write(tok, out, alin->err);
break;
case PGF_CAPIT_FIRST: {
GuUCS c = gu_utf8_decode((const uint8_t**) &tok);
c = gu_ucs_to_upper(c);
gu_out_utf8(c, out, alin->err);
alin->capit = false;
gu_string_write(tok, out, alin->err);
alin->capit = PGF_CAPIT_NONE;
break;
}
case PGF_CAPIT_ALL:
alin->capit = PGF_CAPIT_NEXT;
// continue
case PGF_CAPIT_NEXT: {
const uint8_t* p = (uint8_t*) tok;
while (*p) {
GuUCS c = gu_utf8_decode(&p);
c = gu_ucs_to_upper(c);
gu_out_utf8(c, out, alin->err);
}
break;
}
}
gu_string_write(tok, out, alin->err);
}
static void
@@ -150,10 +170,10 @@ pgf_aligner_lzn_symbol_bind(PgfLinFuncs** funcs)
}
static void
pgf_aligner_lzn_symbol_capit(PgfLinFuncs** funcs)
pgf_aligner_lzn_symbol_capit(PgfLinFuncs** funcs, PgfCapitState capit)
{
PgfAlignerLin* alin = gu_container(funcs, PgfAlignerLin, funcs);
alin->capit = true;
alin->capit = capit;
}
static PgfLinFuncs pgf_file_lin_funcs = {
@@ -194,7 +214,7 @@ pgf_align_words(PgfConcr* concr, PgfExpr expr,
.n_matches = 0,
.err = err,
.bind = true,
.capit = false,
.capit = PGF_CAPIT_NONE,
.out_pool = pool,
.tmp_pool = tmp_pool
};

View File

@@ -209,7 +209,9 @@ typedef enum {
PGF_SYMBOL_KP,
PGF_SYMBOL_BIND,
PGF_SYMBOL_SOFT_BIND,
PGF_SYMBOL_SOFT_SPACE,
PGF_SYMBOL_CAPIT,
PGF_SYMBOL_ALL_CAPIT,
PGF_SYMBOL_NE
} PgfSymbolTag;

View File

@@ -652,6 +652,7 @@ typedef enum {
PGF_CACHED_END,
PGF_CACHED_BIND,
PGF_CACHED_CAPIT,
PGF_CACHED_ALL_CAPIT,
PGF_CACHED_NE
} PgfLzrCachedTag;
@@ -718,7 +719,12 @@ pgf_lzr_cache_flush(PgfLzrCache* cache, PgfSymbols* form)
break;
case PGF_CACHED_CAPIT:
if ((*cache->lzr->funcs)->symbol_capit) {
(*cache->lzr->funcs)->symbol_capit(cache->lzr->funcs);
(*cache->lzr->funcs)->symbol_capit(cache->lzr->funcs, PGF_CAPIT_FIRST);
}
break;
case PGF_CACHED_ALL_CAPIT:
if ((*cache->lzr->funcs)->symbol_capit) {
(*cache->lzr->funcs)->symbol_capit(cache->lzr->funcs, PGF_CAPIT_ALL);
}
break;
case PGF_CACHED_NE:
@@ -797,11 +803,11 @@ pgf_lzr_cache_symbol_bind(PgfLinFuncs** funcs)
}
static void
pgf_lzr_cache_symbol_capit(PgfLinFuncs** funcs)
pgf_lzr_cache_symbol_capit(PgfLinFuncs** funcs, PgfCapitState capit)
{
PgfLzrCache* cache = gu_container(funcs, PgfLzrCache, funcs);
PgfLzrCached* event = gu_buf_extend(cache->events);
event->tag = PGF_CACHED_CAPIT;
event->tag = (capit == PGF_CAPIT_ALL) ? PGF_CACHED_ALL_CAPIT : PGF_CACHED_CAPIT;
}
static PgfLinFuncs pgf_lzr_cache_funcs = {
@@ -921,9 +927,18 @@ pgf_lzr_linearize_symbols(PgfLzr* lzr, PgfCncTreeApp* fapp,
}
break;
}
case PGF_SYMBOL_SOFT_SPACE: {
// SOFT_SPACE should be just ignored in linearization
break;
}
case PGF_SYMBOL_CAPIT:
if ((*lzr->funcs)->symbol_capit) {
(*lzr->funcs)->symbol_capit(lzr->funcs);
(*lzr->funcs)->symbol_capit(lzr->funcs, PGF_CAPIT_FIRST);
}
break;
case PGF_SYMBOL_ALL_CAPIT:
if ((*lzr->funcs)->symbol_capit) {
(*lzr->funcs)->symbol_capit(lzr->funcs, PGF_CAPIT_ALL);
}
break;
default:
@@ -1045,20 +1060,11 @@ typedef struct PgfSimpleLin PgfSimpleLin;
struct PgfSimpleLin {
PgfLinFuncs* funcs;
bool bind;
bool capit;
PgfCapitState capit;
GuOut* out;
GuExn* err;
};
static void
pgf_file_lzn_put_space(PgfSimpleLin* flin)
{
if (flin->bind)
flin->bind = false;
else
gu_putc(' ', flin->out, flin->err);
}
static void
pgf_file_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
{
@@ -1067,16 +1073,39 @@ pgf_file_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
return;
}
pgf_file_lzn_put_space(flin);
if (flin->bind)
flin->bind = false;
else {
gu_putc(' ', flin->out, flin->err);
if (flin->capit == PGF_CAPIT_NEXT)
flin->capit = PGF_CAPIT_NONE;
}
if (flin->capit) {
switch (flin->capit) {
case PGF_CAPIT_NONE:
gu_string_write(tok, flin->out, flin->err);
break;
case PGF_CAPIT_FIRST: {
GuUCS c = gu_utf8_decode((const uint8_t**) &tok);
c = gu_ucs_to_upper(c);
gu_out_utf8(c, flin->out, flin->err);
flin->capit = false;
gu_string_write(tok, flin->out, flin->err);
flin->capit = PGF_CAPIT_NONE;
break;
}
case PGF_CAPIT_ALL:
flin->capit = PGF_CAPIT_NEXT;
// continue
case PGF_CAPIT_NEXT: {
const uint8_t* p = (uint8_t*) tok;
while (*p) {
GuUCS c = gu_utf8_decode(&p);
c = gu_ucs_to_upper(c);
gu_out_utf8(c, flin->out, flin->err);
}
break;
}
}
gu_string_write(tok, flin->out, flin->err);
}
static void
@@ -1094,10 +1123,10 @@ pgf_file_lzn_symbol_bind(PgfLinFuncs** funcs)
}
static void
pgf_file_lzn_symbol_capit(PgfLinFuncs** funcs)
pgf_file_lzn_symbol_capit(PgfLinFuncs** funcs, PgfCapitState capit)
{
PgfSimpleLin* flin = gu_container(funcs, PgfSimpleLin, funcs);
flin->capit = true;
flin->capit = capit;
}
static PgfLinFuncs pgf_file_lin_funcs = {
@@ -1117,7 +1146,7 @@ pgf_lzr_linearize_simple(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx,
PgfSimpleLin flin = {
.funcs = &pgf_file_lin_funcs,
.bind = true,
.capit = false,
.capit = PGF_CAPIT_NONE,
.out = out,
.err = err
};

View File

@@ -38,6 +38,13 @@ pgf_lzr_wrap_linref(PgfCncTree ctree, GuPool* pool);
typedef struct PgfLinFuncs PgfLinFuncs;
typedef enum {
PGF_CAPIT_NONE,
PGF_CAPIT_FIRST,
PGF_CAPIT_ALL,
PGF_CAPIT_NEXT
} PgfCapitState;
struct PgfLinFuncs
{
/// Output tokens
@@ -56,7 +63,7 @@ struct PgfLinFuncs
void (*symbol_bind)(PgfLinFuncs** self);
/// capitalization
void (*symbol_capit)(PgfLinFuncs** self);
void (*symbol_capit)(PgfLinFuncs** self, PgfCapitState capit);
};
/// Linearize a concrete syntax tree.

View File

@@ -134,8 +134,10 @@ pgf_prev_extern_sym(PgfSymbol sym)
return *((PgfSymbol*) (((PgfSymbolVar*) i.data)+1));
case PGF_SYMBOL_BIND:
case PGF_SYMBOL_SOFT_BIND:
case PGF_SYMBOL_SOFT_SPACE:
return *((PgfSymbol*) (((PgfSymbolBIND*) i.data)+1));
case PGF_SYMBOL_CAPIT:
case PGF_SYMBOL_ALL_CAPIT:
return *((PgfSymbol*) (((PgfSymbolCAPIT*) i.data)+1));
case PGF_SYMBOL_NE:
return *((PgfSymbol*) (((PgfSymbolNE*) i.data)+1));
@@ -768,7 +770,6 @@ pgf_item_update_arg(PgfItem* item, size_t d, PgfCCat *new_ccat,
static void
pgf_item_advance(PgfItem* item, GuPool* pool)
{
if (GU_LIKELY(item->alt == 0)) {
item->sym_idx++;
pgf_item_set_curr_symbol(item, pool);
@@ -1063,7 +1064,11 @@ pgf_symbols_cmp(GuString* psent, BIND_TYPE* pbind, PgfSymbols* syms)
*pbind = BIND_SOFT;
break;
}
case PGF_SYMBOL_CAPIT: {
case PGF_SYMBOL_SOFT_SPACE: {
break;
}
case PGF_SYMBOL_CAPIT:
case PGF_SYMBOL_ALL_CAPIT: {
break;
}
case PGF_SYMBOL_NE: {
@@ -1541,7 +1546,8 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
}
break;
}
case PGF_SYMBOL_SOFT_BIND: {
case PGF_SYMBOL_SOFT_BIND:
case PGF_SYMBOL_SOFT_SPACE: {
if (ps->before->start_offset == ps->before->end_offset) {
if (ps->before->needs_bind) {
PgfParseState* state =
@@ -1562,7 +1568,8 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
}
break;
}
case PGF_SYMBOL_CAPIT: {
case PGF_SYMBOL_CAPIT:
case PGF_SYMBOL_ALL_CAPIT: {
pgf_item_advance(item, ps->pool);
pgf_parsing_symbol(ps, item, item->curr_sym);
break;

View File

@@ -276,10 +276,18 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err)
gu_puts("SOFT_BIND", out, err);
break;
}
case PGF_SYMBOL_SOFT_SPACE: {
gu_puts("SOFT_SPACE", out, err);
break;
}
case PGF_SYMBOL_CAPIT: {
gu_puts("CAPIT", out, err);
break;
}
case PGF_SYMBOL_ALL_CAPIT: {
gu_puts("ALL_CAPIT", out, err);
break;
}
default:
gu_impossible();
}

View File

@@ -708,6 +708,13 @@ pgf_read_symbol(PgfReader* rdr)
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_SYMBOL_SOFT_SPACE: {
gu_new_variant(PGF_SYMBOL_SOFT_SPACE,
PgfSymbolBIND,
&sym, rdr->opool);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_SYMBOL_CAPIT: {
gu_new_variant(PGF_SYMBOL_CAPIT,
PgfSymbolCAPIT,
@@ -715,6 +722,13 @@ pgf_read_symbol(PgfReader* rdr)
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
case PGF_SYMBOL_ALL_CAPIT: {
gu_new_variant(PGF_SYMBOL_ALL_CAPIT,
PgfSymbolCAPIT,
&sym, rdr->opool);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
default:
pgf_read_tag_error(rdr);
}

View File

@@ -213,8 +213,10 @@ instance Binary Symbol where
put (SymKP d vs) = putWord8 4 >> put (d,vs)
put SymBIND = putWord8 5
put SymSOFT_BIND = putWord8 6
put SymCAPIT = putWord8 7
put SymNE = putWord8 8
put SymSOFT_SPACE = putWord8 7
put SymCAPIT = putWord8 8
put SymALL_CAPIT = putWord8 9
put SymNE = putWord8 10
get = do tag <- getWord8
case tag of
0 -> liftM2 SymCat get get
@@ -224,8 +226,10 @@ instance Binary Symbol where
4 -> liftM2 (\d vs -> SymKP d vs) get get
5 -> return SymBIND
6 -> return SymSOFT_BIND
7 -> return SymCAPIT
8 -> return SymNE
7 -> return SymSOFT_SPACE
8 -> return SymCAPIT
9 -> return SymALL_CAPIT
10-> return SymNE
_ -> decodingError
instance Binary PArg where

View File

@@ -62,7 +62,9 @@ data Symbol
| SymKP [Symbol] [([Symbol],[String])]
| SymBIND -- the special BIND token
| SymSOFT_BIND -- the special SOFT_BIND token
| SymSOFT_SPACE -- the special SOFT_SPACE token
| SymCAPIT -- the special CAPIT token
| SymALL_CAPIT -- the special ALL_CAPIT token
| SymNE -- non exist (this should be last constructor to simplify the binary search in the runtime)
deriving (Eq,Ord,Show)
data Production

View File

@@ -21,14 +21,14 @@ table vs = let m = M.fromList (zip enumAll vs) in (M.!) m
type Str = [Tok] -- token sequence
-- | Tokens
data Tok = TK String | TP [([Prefix],Str)] Str | BIND | SOFT_BIND | CAPIT
data Tok = TK String | TP [([Prefix],Str)] Str | BIND | SOFT_BIND | SOFT_SPACE | CAPIT | ALL_CAPIT
deriving (Eq,Ord,Show)
type Prefix = String -- ^ To be matched with the prefix of a following token
-- | Render a token sequence as a 'String'
fromStr :: Str -> String
fromStr = from False False
fromStr = from False id
where
from space cap ts =
case ts of
@@ -36,16 +36,19 @@ fromStr = from False False
TK s:ts -> put s++from True cap ts
BIND:ts -> from False cap ts
SOFT_BIND:ts -> from False cap ts
CAPIT:ts -> from space True ts
SOFT_SPACE:ts -> from True cap ts
CAPIT:ts -> from space toUpper1 ts
ALL_CAPIT:ts -> from space toUpperAll ts
TP alts def:ts -> from space cap (pick alts def r++[TK r]) -- hmm
where r = fromStr ts
where
put s = [' '|space]++up s
up = if cap then toUpper1 else id
put s = [' '|space]++cap s
toUpper1 (c:s) = toUpper c:s
toUpper1 s = s
toUpperAll = map toUpper
pick alts def r = head ([str|(ps,str)<-alts,any (`isPrefixOf` r) ps]++[def])
-- *** Common record types

View File

@@ -220,7 +220,9 @@ computeSeq filter seq args = concatMap compute seq
compute SymNE = [LeafNE]
compute SymBIND = [LeafKS "&+"]
compute SymSOFT_BIND = []
compute SymSOFT_SPACE = []
compute SymCAPIT = [LeafKS "&|"]
compute SymALL_CAPIT = [LeafKS "&|"]
compute (SymKP syms alts) = [LeafKP (concatMap compute syms) [(concatMap compute syms,cs) | (syms,cs) <- alts]]
getArg d r

View File

@@ -240,7 +240,9 @@ splitLexicalRules cnc p_prods =
seq2prefix (SymNE :syms) = TrieMap.empty
seq2prefix (SymBIND :syms) = TrieMap.fromList [wf ["&+"]]
seq2prefix (SymSOFT_BIND :syms) = TrieMap.fromList [wf []]
seq2prefix (SymSOFT_SPACE :syms) = TrieMap.fromList [wf []]
seq2prefix (SymCAPIT :syms) = TrieMap.fromList [wf ["&|"]]
seq2prefix (SymALL_CAPIT :syms) = TrieMap.fromList [wf ["&|"]]
updateConcrete abs cnc =
let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc)

View File

@@ -311,13 +311,18 @@ process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc cha
SymBIND -> let !acc' = ftok_ ["&+"] (Active j (ppos+1) funid seqid args key0) acc
in process flit ftok cnc items acc' chart
SymSOFT_BIND->process flit ftok cnc ((Active j (ppos+1) funid seqid args key0):items) acc chart
SymSOFT_SPACE->process flit ftok cnc ((Active j (ppos+1) funid seqid args key0):items) acc chart
SymCAPIT -> let !acc' = ftok_ ["&|"] (Active j (ppos+1) funid seqid args key0) acc
in process flit ftok cnc items acc' chart
SymALL_CAPIT->let !acc' = ftok_ ["&|"] (Active j (ppos+1) funid seqid args key0) acc
in process flit ftok cnc items acc' chart
SymKP syms vars
-> let to_tok (SymKS t) = [t]
to_tok SymBIND = ["&+"]
to_tok SymSOFT_BIND = []
to_tok SymSOFT_SPACE= []
to_tok SymCAPIT = ["&|"]
to_tok SymALL_CAPIT = ["&|"]
to_tok _ = []
!acc' = foldl (\acc syms -> ftok_ (concatMap to_tok syms) (Active j (ppos+1) funid seqid args key0) acc) acc

View File

@@ -95,7 +95,9 @@ ppSymbol (SymKS t) = doubleQuotes (text t)
ppSymbol SymNE = text "nonExist"
ppSymbol SymBIND = text "BIND"
ppSymbol SymSOFT_BIND = text "SOFT_BIND"
ppSymbol SymSOFT_SPACE= text "SOFT_SPACE"
ppSymbol SymCAPIT = text "CAPIT"
ppSymbol SymALL_CAPIT = text "ALL_CAPIT"
ppSymbol (SymKP syms alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppSymbol syms) : map ppAlt alts)))
ppAlt (syms,ps) = hsep (map ppSymbol syms) <+> char '/' <+> hsep (map (doubleQuotes . text) ps)