mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
added all orthographic primitives
This commit is contained in:
@@ -36,9 +36,12 @@ resource Predef = {
|
||||
oper toStr : (L : Type) -> L -> Str = variants {} ; -- find the "first" string
|
||||
oper mapStr : (L : Type) -> (Str -> Str) -> L -> L = variants {} ;
|
||||
-- map all strings in a data structure; experimental ---
|
||||
oper nonExist : Str = variants {} ; -- a placeholder for non-existant morphological forms
|
||||
oper BIND : Str = variants {} ; -- a token for gluing
|
||||
oper SOFT_BIND : Str = variants {} ; -- a token for soft gluing
|
||||
oper CAPIT : Str = variants {} ; -- a token for capitalization
|
||||
|
||||
oper nonExist : Str = variants {} ; -- a placeholder for non-existant morphological forms
|
||||
oper BIND : Str = variants {} ; -- a token for gluing
|
||||
oper SOFT_BIND : Str = variants {} ; -- a token for soft gluing
|
||||
oper SOFT_SPACE : Str = variants {} ; -- a token for soft space
|
||||
oper CAPIT : Str = variants {} ; -- a token for capitalization
|
||||
oper ALL_CAPIT : Str = variants {} ; -- a token for capitalization of abreviations
|
||||
|
||||
} ;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
-- This file defines some prelude facilities usable in all grammars.
|
||||
|
||||
resource Prelude = Predef[nonExist, BIND, SOFT_BIND, CAPIT] ** open (Predef=Predef) in {
|
||||
resource Prelude = Predef[nonExist, BIND, SOFT_BIND, SOFT_SPACE, CAPIT, ALL_CAPIT] ** open (Predef=Predef) in {
|
||||
|
||||
oper
|
||||
|
||||
|
||||
@@ -79,7 +79,8 @@ predefList =
|
||||
-- Canonical values:
|
||||
(cPBool,PBool),(cPFalse,PFalse),(cPTrue,PTrue),(cInt,Int),
|
||||
(cInts,Ints),(cNonExist,NonExist)
|
||||
,(cBIND,BIND),(cSOFT_BIND,SOFT_BIND),(cCAPIT,CAPIT)]
|
||||
,(cBIND,BIND),(cSOFT_BIND,SOFT_BIND),(cSOFT_SPACE,SOFT_SPACE)
|
||||
,(cCAPIT,CAPIT),(cALL_CAPIT,ALL_CAPIT)]
|
||||
--- add more functions!!!
|
||||
|
||||
delta f vs =
|
||||
@@ -109,7 +110,9 @@ delta f vs =
|
||||
NonExist-> canonical
|
||||
BIND -> canonical
|
||||
SOFT_BIND->canonical
|
||||
SOFT_SPACE->canonical
|
||||
CAPIT -> canonical
|
||||
ALL_CAPIT->canonical
|
||||
where
|
||||
canonical = delay
|
||||
delay = return (VApp f vs) -- wrong number of arguments
|
||||
|
||||
@@ -52,5 +52,5 @@ data Predefined = Drop | Take | Tk | Dp | EqStr | Occur | Occurs | ToUpper
|
||||
| Error
|
||||
-- Canonical values below:
|
||||
| PBool | PFalse | PTrue | Int | Ints | NonExist
|
||||
| BIND | SOFT_BIND | CAPIT
|
||||
| BIND | SOFT_BIND | SOFT_SPACE | CAPIT | ALL_CAPIT
|
||||
deriving (Show,Eq,Ord,Ix,Bounded,Enum)
|
||||
|
||||
@@ -248,9 +248,11 @@ convert' va gId vs gr = ppT
|
||||
|
||||
ppPredef n =
|
||||
case predef n of
|
||||
Ok BIND -> single (c "BIND")
|
||||
Ok SOFT_BIND -> single (c "SOFT_BIND")
|
||||
Ok CAPIT -> single (c "CAPIT")
|
||||
Ok BIND -> single (c "BIND")
|
||||
Ok SOFT_BIND -> single (c "SOFT_BIND")
|
||||
Ok SOFT_SPACE -> single (c "SOFT_SPACE")
|
||||
Ok CAPIT -> single (c "CAPIT")
|
||||
Ok ALL_CAPIT -> single (c "ALL_CAPIT")
|
||||
_ -> Var n
|
||||
|
||||
ppP p =
|
||||
|
||||
@@ -385,14 +385,18 @@ convertTerm opts sel ctype (Alts s alts)= do CStr s <- convertTerm opts CNil cty
|
||||
_ -> fail (render ("not valid pattern in pre expression" <+> ppPatt Unqualified 0 p))
|
||||
|
||||
convertTerm opts sel ctype (Q (m,f))
|
||||
| m == cPredef &&
|
||||
f == cNonExist = return (CStr [SymNE])
|
||||
| m == cPredef &&
|
||||
f == cBIND = return (CStr [SymBIND])
|
||||
| m == cPredef &&
|
||||
f == cSOFT_BIND = return (CStr [SymSOFT_BIND])
|
||||
| m == cPredef &&
|
||||
f == cSOFT_SPACE = return (CStr [SymSOFT_SPACE])
|
||||
| m == cPredef &&
|
||||
f == cCAPIT = return (CStr [SymCAPIT])
|
||||
| m == cPredef &&
|
||||
f == cALL_CAPIT = return (CStr [SymALL_CAPIT])
|
||||
| m == cPredef &&
|
||||
f == cNonExist = return (CStr [SymNE])
|
||||
{-
|
||||
convertTerm opts sel@(CProj l _) ctype (ExtR t1 t2@(R rs2))
|
||||
| l `elem` map fst rs2 = convertTerm opts sel ctype t2
|
||||
|
||||
@@ -86,10 +86,12 @@ sym2js (SymLit n l) = new "SymLit" [JS.EInt n, JS.EInt l]
|
||||
sym2js (SymVar n l) = new "SymVar" [JS.EInt n, JS.EInt l]
|
||||
sym2js (SymKS t) = new "SymKS" [JS.EStr t]
|
||||
sym2js (SymKP ts alts) = new "SymKP" [JS.EArray (map sym2js ts), JS.EArray (map alt2js alts)]
|
||||
sym2js SymNE = new "SymNE" []
|
||||
sym2js SymBIND = new "SymKS" [JS.EStr "&+"]
|
||||
sym2js SymSOFT_BIND = new "SymKS" [JS.EStr "&+"]
|
||||
sym2js SymSOFT_SPACE = new "SymKS" [JS.EStr "&+"]
|
||||
sym2js SymCAPIT = new "SymKS" [JS.EStr "&|"]
|
||||
sym2js SymALL_CAPIT = new "SymKS" [JS.EStr "&|"]
|
||||
sym2js SymNE = new "SymNE" []
|
||||
|
||||
alt2js (ps,ts) = new "Alt" [JS.EArray (map sym2js ps), JS.EArray (map JS.EStr ts)]
|
||||
|
||||
|
||||
@@ -80,7 +80,9 @@ pySymbol (SymKP ts alts) = pyDict 0 pyStr id [("pre", pyList 0 pySymbol ts), ("a
|
||||
where alt2py (ps,ts) = pyTuple 0 (pyList 0 pyStr) [map pySymbol ps, ts]
|
||||
pySymbol SymBIND = pyStr "&+"
|
||||
pySymbol SymSOFT_BIND = pyStr "&+"
|
||||
pySymbol SymSOFT_SPACE = pyStr "&+"
|
||||
pySymbol SymCAPIT = pyStr "&|"
|
||||
pySymbol SymALL_CAPIT = pyStr "&|"
|
||||
pySymbol SymNE = pyDict 0 pyStr id [("nonExist", pyTuple 0 id [])]
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -53,8 +53,12 @@ primitives = Map.fromList
|
||||
[] typeStr []))) Nothing)
|
||||
, (cSOFT_BIND, ResOper (Just (noLoc (mkProd -- Str
|
||||
[] typeStr []))) Nothing)
|
||||
, (cSOFT_SPACE,ResOper (Just (noLoc (mkProd -- Str
|
||||
[] typeStr []))) Nothing)
|
||||
, (cCAPIT , ResOper (Just (noLoc (mkProd -- Str
|
||||
[] typeStr []))) Nothing)
|
||||
, (cALL_CAPIT, ResOper (Just (noLoc (mkProd -- Str
|
||||
[] typeStr []))) Nothing)
|
||||
]
|
||||
where
|
||||
fun from to = oper (mkFunType from to)
|
||||
|
||||
@@ -32,7 +32,9 @@ cUndefinedType = identS "UndefinedType"
|
||||
cNonExist = identS "nonExist"
|
||||
cBIND = identS "BIND"
|
||||
cSOFT_BIND = identS "SOFT_BIND"
|
||||
cSOFT_SPACE = identS "SOFT_SPACE"
|
||||
cCAPIT = identS "CAPIT"
|
||||
cALL_CAPIT = identS "ALL_CAPIT"
|
||||
|
||||
isPredefCat :: Ident -> Bool
|
||||
isPredefCat c = elem c [cInt,cString,cFloat]
|
||||
|
||||
@@ -97,7 +97,9 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
|
||||
---- AR 3/6/2010
|
||||
symbolToCFSymbol SymBIND = [Terminal "&+"]
|
||||
symbolToCFSymbol SymSOFT_BIND = []
|
||||
symbolToCFSymbol SymSOFT_SPACE = []
|
||||
symbolToCFSymbol SymCAPIT = [Terminal "&|"]
|
||||
symbolToCFSymbol SymALL_CAPIT = [Terminal "&|"]
|
||||
symbolToCFSymbol SymNE = []
|
||||
|
||||
fixProfile :: Array DotPos Symbol -> Int -> Profile
|
||||
|
||||
@@ -13,7 +13,7 @@ typedef struct {
|
||||
size_t n_matches;
|
||||
GuExn* err;
|
||||
bool bind;
|
||||
bool capit;
|
||||
PgfCapitState capit;
|
||||
GuPool* out_pool;
|
||||
GuPool* tmp_pool;
|
||||
} PgfAlignerLin;
|
||||
@@ -107,18 +107,38 @@ pgf_aligner_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||
gu_buf_flush(alin->parent_current);
|
||||
|
||||
pgf_aligner_push_parent(alin, fid);
|
||||
|
||||
if (alin->capit == PGF_CAPIT_NEXT)
|
||||
alin->capit = PGF_CAPIT_NONE;
|
||||
}
|
||||
|
||||
GuOut* out = gu_string_buf_out(alin->sbuf);
|
||||
|
||||
if (alin->capit) {
|
||||
switch (alin->capit) {
|
||||
case PGF_CAPIT_NONE:
|
||||
gu_string_write(tok, out, alin->err);
|
||||
break;
|
||||
case PGF_CAPIT_FIRST: {
|
||||
GuUCS c = gu_utf8_decode((const uint8_t**) &tok);
|
||||
c = gu_ucs_to_upper(c);
|
||||
gu_out_utf8(c, out, alin->err);
|
||||
alin->capit = false;
|
||||
gu_string_write(tok, out, alin->err);
|
||||
alin->capit = PGF_CAPIT_NONE;
|
||||
break;
|
||||
}
|
||||
case PGF_CAPIT_ALL:
|
||||
alin->capit = PGF_CAPIT_NEXT;
|
||||
// continue
|
||||
case PGF_CAPIT_NEXT: {
|
||||
const uint8_t* p = (uint8_t*) tok;
|
||||
while (*p) {
|
||||
GuUCS c = gu_utf8_decode(&p);
|
||||
c = gu_ucs_to_upper(c);
|
||||
gu_out_utf8(c, out, alin->err);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
gu_string_write(tok, out, alin->err);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -150,10 +170,10 @@ pgf_aligner_lzn_symbol_bind(PgfLinFuncs** funcs)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_aligner_lzn_symbol_capit(PgfLinFuncs** funcs)
|
||||
pgf_aligner_lzn_symbol_capit(PgfLinFuncs** funcs, PgfCapitState capit)
|
||||
{
|
||||
PgfAlignerLin* alin = gu_container(funcs, PgfAlignerLin, funcs);
|
||||
alin->capit = true;
|
||||
alin->capit = capit;
|
||||
}
|
||||
|
||||
static PgfLinFuncs pgf_file_lin_funcs = {
|
||||
@@ -194,7 +214,7 @@ pgf_align_words(PgfConcr* concr, PgfExpr expr,
|
||||
.n_matches = 0,
|
||||
.err = err,
|
||||
.bind = true,
|
||||
.capit = false,
|
||||
.capit = PGF_CAPIT_NONE,
|
||||
.out_pool = pool,
|
||||
.tmp_pool = tmp_pool
|
||||
};
|
||||
|
||||
@@ -209,7 +209,9 @@ typedef enum {
|
||||
PGF_SYMBOL_KP,
|
||||
PGF_SYMBOL_BIND,
|
||||
PGF_SYMBOL_SOFT_BIND,
|
||||
PGF_SYMBOL_SOFT_SPACE,
|
||||
PGF_SYMBOL_CAPIT,
|
||||
PGF_SYMBOL_ALL_CAPIT,
|
||||
PGF_SYMBOL_NE
|
||||
} PgfSymbolTag;
|
||||
|
||||
|
||||
@@ -652,6 +652,7 @@ typedef enum {
|
||||
PGF_CACHED_END,
|
||||
PGF_CACHED_BIND,
|
||||
PGF_CACHED_CAPIT,
|
||||
PGF_CACHED_ALL_CAPIT,
|
||||
PGF_CACHED_NE
|
||||
} PgfLzrCachedTag;
|
||||
|
||||
@@ -718,7 +719,12 @@ pgf_lzr_cache_flush(PgfLzrCache* cache, PgfSymbols* form)
|
||||
break;
|
||||
case PGF_CACHED_CAPIT:
|
||||
if ((*cache->lzr->funcs)->symbol_capit) {
|
||||
(*cache->lzr->funcs)->symbol_capit(cache->lzr->funcs);
|
||||
(*cache->lzr->funcs)->symbol_capit(cache->lzr->funcs, PGF_CAPIT_FIRST);
|
||||
}
|
||||
break;
|
||||
case PGF_CACHED_ALL_CAPIT:
|
||||
if ((*cache->lzr->funcs)->symbol_capit) {
|
||||
(*cache->lzr->funcs)->symbol_capit(cache->lzr->funcs, PGF_CAPIT_ALL);
|
||||
}
|
||||
break;
|
||||
case PGF_CACHED_NE:
|
||||
@@ -797,11 +803,11 @@ pgf_lzr_cache_symbol_bind(PgfLinFuncs** funcs)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_lzr_cache_symbol_capit(PgfLinFuncs** funcs)
|
||||
pgf_lzr_cache_symbol_capit(PgfLinFuncs** funcs, PgfCapitState capit)
|
||||
{
|
||||
PgfLzrCache* cache = gu_container(funcs, PgfLzrCache, funcs);
|
||||
PgfLzrCached* event = gu_buf_extend(cache->events);
|
||||
event->tag = PGF_CACHED_CAPIT;
|
||||
event->tag = (capit == PGF_CAPIT_ALL) ? PGF_CACHED_ALL_CAPIT : PGF_CACHED_CAPIT;
|
||||
}
|
||||
|
||||
static PgfLinFuncs pgf_lzr_cache_funcs = {
|
||||
@@ -921,9 +927,18 @@ pgf_lzr_linearize_symbols(PgfLzr* lzr, PgfCncTreeApp* fapp,
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_SOFT_SPACE: {
|
||||
// SOFT_SPACE should be just ignored in linearization
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAPIT:
|
||||
if ((*lzr->funcs)->symbol_capit) {
|
||||
(*lzr->funcs)->symbol_capit(lzr->funcs);
|
||||
(*lzr->funcs)->symbol_capit(lzr->funcs, PGF_CAPIT_FIRST);
|
||||
}
|
||||
break;
|
||||
case PGF_SYMBOL_ALL_CAPIT:
|
||||
if ((*lzr->funcs)->symbol_capit) {
|
||||
(*lzr->funcs)->symbol_capit(lzr->funcs, PGF_CAPIT_ALL);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@@ -1045,20 +1060,11 @@ typedef struct PgfSimpleLin PgfSimpleLin;
|
||||
struct PgfSimpleLin {
|
||||
PgfLinFuncs* funcs;
|
||||
bool bind;
|
||||
bool capit;
|
||||
PgfCapitState capit;
|
||||
GuOut* out;
|
||||
GuExn* err;
|
||||
};
|
||||
|
||||
static void
|
||||
pgf_file_lzn_put_space(PgfSimpleLin* flin)
|
||||
{
|
||||
if (flin->bind)
|
||||
flin->bind = false;
|
||||
else
|
||||
gu_putc(' ', flin->out, flin->err);
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_file_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||
{
|
||||
@@ -1067,16 +1073,39 @@ pgf_file_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||
return;
|
||||
}
|
||||
|
||||
pgf_file_lzn_put_space(flin);
|
||||
if (flin->bind)
|
||||
flin->bind = false;
|
||||
else {
|
||||
gu_putc(' ', flin->out, flin->err);
|
||||
if (flin->capit == PGF_CAPIT_NEXT)
|
||||
flin->capit = PGF_CAPIT_NONE;
|
||||
}
|
||||
|
||||
if (flin->capit) {
|
||||
switch (flin->capit) {
|
||||
case PGF_CAPIT_NONE:
|
||||
gu_string_write(tok, flin->out, flin->err);
|
||||
break;
|
||||
case PGF_CAPIT_FIRST: {
|
||||
GuUCS c = gu_utf8_decode((const uint8_t**) &tok);
|
||||
c = gu_ucs_to_upper(c);
|
||||
gu_out_utf8(c, flin->out, flin->err);
|
||||
flin->capit = false;
|
||||
gu_string_write(tok, flin->out, flin->err);
|
||||
flin->capit = PGF_CAPIT_NONE;
|
||||
break;
|
||||
}
|
||||
case PGF_CAPIT_ALL:
|
||||
flin->capit = PGF_CAPIT_NEXT;
|
||||
// continue
|
||||
case PGF_CAPIT_NEXT: {
|
||||
const uint8_t* p = (uint8_t*) tok;
|
||||
while (*p) {
|
||||
GuUCS c = gu_utf8_decode(&p);
|
||||
c = gu_ucs_to_upper(c);
|
||||
gu_out_utf8(c, flin->out, flin->err);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
gu_string_write(tok, flin->out, flin->err);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1094,10 +1123,10 @@ pgf_file_lzn_symbol_bind(PgfLinFuncs** funcs)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_file_lzn_symbol_capit(PgfLinFuncs** funcs)
|
||||
pgf_file_lzn_symbol_capit(PgfLinFuncs** funcs, PgfCapitState capit)
|
||||
{
|
||||
PgfSimpleLin* flin = gu_container(funcs, PgfSimpleLin, funcs);
|
||||
flin->capit = true;
|
||||
flin->capit = capit;
|
||||
}
|
||||
|
||||
static PgfLinFuncs pgf_file_lin_funcs = {
|
||||
@@ -1117,7 +1146,7 @@ pgf_lzr_linearize_simple(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx,
|
||||
PgfSimpleLin flin = {
|
||||
.funcs = &pgf_file_lin_funcs,
|
||||
.bind = true,
|
||||
.capit = false,
|
||||
.capit = PGF_CAPIT_NONE,
|
||||
.out = out,
|
||||
.err = err
|
||||
};
|
||||
|
||||
@@ -38,6 +38,13 @@ pgf_lzr_wrap_linref(PgfCncTree ctree, GuPool* pool);
|
||||
|
||||
typedef struct PgfLinFuncs PgfLinFuncs;
|
||||
|
||||
typedef enum {
|
||||
PGF_CAPIT_NONE,
|
||||
PGF_CAPIT_FIRST,
|
||||
PGF_CAPIT_ALL,
|
||||
PGF_CAPIT_NEXT
|
||||
} PgfCapitState;
|
||||
|
||||
struct PgfLinFuncs
|
||||
{
|
||||
/// Output tokens
|
||||
@@ -56,7 +63,7 @@ struct PgfLinFuncs
|
||||
void (*symbol_bind)(PgfLinFuncs** self);
|
||||
|
||||
/// capitalization
|
||||
void (*symbol_capit)(PgfLinFuncs** self);
|
||||
void (*symbol_capit)(PgfLinFuncs** self, PgfCapitState capit);
|
||||
};
|
||||
|
||||
/// Linearize a concrete syntax tree.
|
||||
|
||||
@@ -134,8 +134,10 @@ pgf_prev_extern_sym(PgfSymbol sym)
|
||||
return *((PgfSymbol*) (((PgfSymbolVar*) i.data)+1));
|
||||
case PGF_SYMBOL_BIND:
|
||||
case PGF_SYMBOL_SOFT_BIND:
|
||||
case PGF_SYMBOL_SOFT_SPACE:
|
||||
return *((PgfSymbol*) (((PgfSymbolBIND*) i.data)+1));
|
||||
case PGF_SYMBOL_CAPIT:
|
||||
case PGF_SYMBOL_ALL_CAPIT:
|
||||
return *((PgfSymbol*) (((PgfSymbolCAPIT*) i.data)+1));
|
||||
case PGF_SYMBOL_NE:
|
||||
return *((PgfSymbol*) (((PgfSymbolNE*) i.data)+1));
|
||||
@@ -768,7 +770,6 @@ pgf_item_update_arg(PgfItem* item, size_t d, PgfCCat *new_ccat,
|
||||
static void
|
||||
pgf_item_advance(PgfItem* item, GuPool* pool)
|
||||
{
|
||||
|
||||
if (GU_LIKELY(item->alt == 0)) {
|
||||
item->sym_idx++;
|
||||
pgf_item_set_curr_symbol(item, pool);
|
||||
@@ -1063,7 +1064,11 @@ pgf_symbols_cmp(GuString* psent, BIND_TYPE* pbind, PgfSymbols* syms)
|
||||
*pbind = BIND_SOFT;
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAPIT: {
|
||||
case PGF_SYMBOL_SOFT_SPACE: {
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAPIT:
|
||||
case PGF_SYMBOL_ALL_CAPIT: {
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_NE: {
|
||||
@@ -1541,7 +1546,8 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_SOFT_BIND: {
|
||||
case PGF_SYMBOL_SOFT_BIND:
|
||||
case PGF_SYMBOL_SOFT_SPACE: {
|
||||
if (ps->before->start_offset == ps->before->end_offset) {
|
||||
if (ps->before->needs_bind) {
|
||||
PgfParseState* state =
|
||||
@@ -1562,7 +1568,8 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAPIT: {
|
||||
case PGF_SYMBOL_CAPIT:
|
||||
case PGF_SYMBOL_ALL_CAPIT: {
|
||||
pgf_item_advance(item, ps->pool);
|
||||
pgf_parsing_symbol(ps, item, item->curr_sym);
|
||||
break;
|
||||
|
||||
@@ -276,10 +276,18 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err)
|
||||
gu_puts("SOFT_BIND", out, err);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_SOFT_SPACE: {
|
||||
gu_puts("SOFT_SPACE", out, err);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAPIT: {
|
||||
gu_puts("CAPIT", out, err);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_ALL_CAPIT: {
|
||||
gu_puts("ALL_CAPIT", out, err);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
|
||||
@@ -708,6 +708,13 @@ pgf_read_symbol(PgfReader* rdr)
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_SOFT_SPACE: {
|
||||
gu_new_variant(PGF_SYMBOL_SOFT_SPACE,
|
||||
PgfSymbolBIND,
|
||||
&sym, rdr->opool);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAPIT: {
|
||||
gu_new_variant(PGF_SYMBOL_CAPIT,
|
||||
PgfSymbolCAPIT,
|
||||
@@ -715,6 +722,13 @@ pgf_read_symbol(PgfReader* rdr)
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_ALL_CAPIT: {
|
||||
gu_new_variant(PGF_SYMBOL_ALL_CAPIT,
|
||||
PgfSymbolCAPIT,
|
||||
&sym, rdr->opool);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
pgf_read_tag_error(rdr);
|
||||
}
|
||||
|
||||
@@ -213,8 +213,10 @@ instance Binary Symbol where
|
||||
put (SymKP d vs) = putWord8 4 >> put (d,vs)
|
||||
put SymBIND = putWord8 5
|
||||
put SymSOFT_BIND = putWord8 6
|
||||
put SymCAPIT = putWord8 7
|
||||
put SymNE = putWord8 8
|
||||
put SymSOFT_SPACE = putWord8 7
|
||||
put SymCAPIT = putWord8 8
|
||||
put SymALL_CAPIT = putWord8 9
|
||||
put SymNE = putWord8 10
|
||||
get = do tag <- getWord8
|
||||
case tag of
|
||||
0 -> liftM2 SymCat get get
|
||||
@@ -224,8 +226,10 @@ instance Binary Symbol where
|
||||
4 -> liftM2 (\d vs -> SymKP d vs) get get
|
||||
5 -> return SymBIND
|
||||
6 -> return SymSOFT_BIND
|
||||
7 -> return SymCAPIT
|
||||
8 -> return SymNE
|
||||
7 -> return SymSOFT_SPACE
|
||||
8 -> return SymCAPIT
|
||||
9 -> return SymALL_CAPIT
|
||||
10-> return SymNE
|
||||
_ -> decodingError
|
||||
|
||||
instance Binary PArg where
|
||||
|
||||
@@ -62,7 +62,9 @@ data Symbol
|
||||
| SymKP [Symbol] [([Symbol],[String])]
|
||||
| SymBIND -- the special BIND token
|
||||
| SymSOFT_BIND -- the special SOFT_BIND token
|
||||
| SymSOFT_SPACE -- the special SOFT_SPACE token
|
||||
| SymCAPIT -- the special CAPIT token
|
||||
| SymALL_CAPIT -- the special ALL_CAPIT token
|
||||
| SymNE -- non exist (this should be last constructor to simplify the binary search in the runtime)
|
||||
deriving (Eq,Ord,Show)
|
||||
data Production
|
||||
|
||||
@@ -21,14 +21,14 @@ table vs = let m = M.fromList (zip enumAll vs) in (M.!) m
|
||||
type Str = [Tok] -- token sequence
|
||||
|
||||
-- | Tokens
|
||||
data Tok = TK String | TP [([Prefix],Str)] Str | BIND | SOFT_BIND | CAPIT
|
||||
data Tok = TK String | TP [([Prefix],Str)] Str | BIND | SOFT_BIND | SOFT_SPACE | CAPIT | ALL_CAPIT
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
type Prefix = String -- ^ To be matched with the prefix of a following token
|
||||
|
||||
-- | Render a token sequence as a 'String'
|
||||
fromStr :: Str -> String
|
||||
fromStr = from False False
|
||||
fromStr = from False id
|
||||
where
|
||||
from space cap ts =
|
||||
case ts of
|
||||
@@ -36,16 +36,19 @@ fromStr = from False False
|
||||
TK s:ts -> put s++from True cap ts
|
||||
BIND:ts -> from False cap ts
|
||||
SOFT_BIND:ts -> from False cap ts
|
||||
CAPIT:ts -> from space True ts
|
||||
SOFT_SPACE:ts -> from True cap ts
|
||||
CAPIT:ts -> from space toUpper1 ts
|
||||
ALL_CAPIT:ts -> from space toUpperAll ts
|
||||
TP alts def:ts -> from space cap (pick alts def r++[TK r]) -- hmm
|
||||
where r = fromStr ts
|
||||
where
|
||||
put s = [' '|space]++up s
|
||||
up = if cap then toUpper1 else id
|
||||
put s = [' '|space]++cap s
|
||||
|
||||
toUpper1 (c:s) = toUpper c:s
|
||||
toUpper1 s = s
|
||||
|
||||
toUpperAll = map toUpper
|
||||
|
||||
pick alts def r = head ([str|(ps,str)<-alts,any (`isPrefixOf` r) ps]++[def])
|
||||
|
||||
-- *** Common record types
|
||||
|
||||
@@ -220,7 +220,9 @@ computeSeq filter seq args = concatMap compute seq
|
||||
compute SymNE = [LeafNE]
|
||||
compute SymBIND = [LeafKS "&+"]
|
||||
compute SymSOFT_BIND = []
|
||||
compute SymSOFT_SPACE = []
|
||||
compute SymCAPIT = [LeafKS "&|"]
|
||||
compute SymALL_CAPIT = [LeafKS "&|"]
|
||||
compute (SymKP syms alts) = [LeafKP (concatMap compute syms) [(concatMap compute syms,cs) | (syms,cs) <- alts]]
|
||||
|
||||
getArg d r
|
||||
|
||||
@@ -240,7 +240,9 @@ splitLexicalRules cnc p_prods =
|
||||
seq2prefix (SymNE :syms) = TrieMap.empty
|
||||
seq2prefix (SymBIND :syms) = TrieMap.fromList [wf ["&+"]]
|
||||
seq2prefix (SymSOFT_BIND :syms) = TrieMap.fromList [wf []]
|
||||
seq2prefix (SymSOFT_SPACE :syms) = TrieMap.fromList [wf []]
|
||||
seq2prefix (SymCAPIT :syms) = TrieMap.fromList [wf ["&|"]]
|
||||
seq2prefix (SymALL_CAPIT :syms) = TrieMap.fromList [wf ["&|"]]
|
||||
|
||||
updateConcrete abs cnc =
|
||||
let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc)
|
||||
|
||||
@@ -311,13 +311,18 @@ process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc cha
|
||||
SymBIND -> let !acc' = ftok_ ["&+"] (Active j (ppos+1) funid seqid args key0) acc
|
||||
in process flit ftok cnc items acc' chart
|
||||
SymSOFT_BIND->process flit ftok cnc ((Active j (ppos+1) funid seqid args key0):items) acc chart
|
||||
SymSOFT_SPACE->process flit ftok cnc ((Active j (ppos+1) funid seqid args key0):items) acc chart
|
||||
SymCAPIT -> let !acc' = ftok_ ["&|"] (Active j (ppos+1) funid seqid args key0) acc
|
||||
in process flit ftok cnc items acc' chart
|
||||
SymALL_CAPIT->let !acc' = ftok_ ["&|"] (Active j (ppos+1) funid seqid args key0) acc
|
||||
in process flit ftok cnc items acc' chart
|
||||
SymKP syms vars
|
||||
-> let to_tok (SymKS t) = [t]
|
||||
to_tok SymBIND = ["&+"]
|
||||
to_tok SymSOFT_BIND = []
|
||||
to_tok SymSOFT_SPACE= []
|
||||
to_tok SymCAPIT = ["&|"]
|
||||
to_tok SymALL_CAPIT = ["&|"]
|
||||
to_tok _ = []
|
||||
|
||||
!acc' = foldl (\acc syms -> ftok_ (concatMap to_tok syms) (Active j (ppos+1) funid seqid args key0) acc) acc
|
||||
|
||||
@@ -95,7 +95,9 @@ ppSymbol (SymKS t) = doubleQuotes (text t)
|
||||
ppSymbol SymNE = text "nonExist"
|
||||
ppSymbol SymBIND = text "BIND"
|
||||
ppSymbol SymSOFT_BIND = text "SOFT_BIND"
|
||||
ppSymbol SymSOFT_SPACE= text "SOFT_SPACE"
|
||||
ppSymbol SymCAPIT = text "CAPIT"
|
||||
ppSymbol SymALL_CAPIT = text "ALL_CAPIT"
|
||||
ppSymbol (SymKP syms alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppSymbol syms) : map ppAlt alts)))
|
||||
|
||||
ppAlt (syms,ps) = hsep (map ppSymbol syms) <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
|
||||
|
||||
Reference in New Issue
Block a user