diff --git a/src/compiler/GF/Compile/Compute/Predef.hs b/src/compiler/GF/Compile/Compute/Predef.hs index 768dc2e6b..ab74f1f63 100644 --- a/src/compiler/GF/Compile/Compute/Predef.hs +++ b/src/compiler/GF/Compile/Compute/Predef.hs @@ -79,7 +79,8 @@ predefList = -- Canonical values: (cPBool,PBool),(cPFalse,PFalse),(cPTrue,PTrue),(cInt,Int), (cInts,Ints),(cNonExist,NonExist) - ,(cBIND,BIND),(cSOFT_BIND,SOFT_BIND),(cCAPIT,CAPIT)] + ,(cBIND,BIND),(cSOFT_BIND,SOFT_BIND),(cSOFT_SPACE,SOFT_SPACE) + ,(cCAPIT,CAPIT),(cALL_CAPIT,ALL_CAPIT)] --- add more functions!!! delta f vs = @@ -109,7 +110,9 @@ delta f vs = NonExist-> canonical BIND -> canonical SOFT_BIND->canonical + SOFT_SPACE->canonical CAPIT -> canonical + ALL_CAPIT->canonical where canonical = delay delay = return (VApp f vs) -- wrong number of arguments diff --git a/src/compiler/GF/Compile/Compute/Value.hs b/src/compiler/GF/Compile/Compute/Value.hs index 016c6572e..9bc258562 100644 --- a/src/compiler/GF/Compile/Compute/Value.hs +++ b/src/compiler/GF/Compile/Compute/Value.hs @@ -52,5 +52,5 @@ data Predefined = Drop | Take | Tk | Dp | EqStr | Occur | Occurs | ToUpper | Error -- Canonical values below: | PBool | PFalse | PTrue | Int | Ints | NonExist - | BIND | SOFT_BIND | CAPIT + | BIND | SOFT_BIND | SOFT_SPACE | CAPIT | ALL_CAPIT deriving (Show,Eq,Ord,Ix,Bounded,Enum) diff --git a/src/compiler/GF/Compile/ConcreteToHaskell.hs b/src/compiler/GF/Compile/ConcreteToHaskell.hs index 3dc71b3f5..6dc572b39 100644 --- a/src/compiler/GF/Compile/ConcreteToHaskell.hs +++ b/src/compiler/GF/Compile/ConcreteToHaskell.hs @@ -248,9 +248,11 @@ convert' va gId vs gr = ppT ppPredef n = case predef n of - Ok BIND -> single (c "BIND") - Ok SOFT_BIND -> single (c "SOFT_BIND") - Ok CAPIT -> single (c "CAPIT") + Ok BIND -> single (c "BIND") + Ok SOFT_BIND -> single (c "SOFT_BIND") + Ok SOFT_SPACE -> single (c "SOFT_SPACE") + Ok CAPIT -> single (c "CAPIT") + Ok ALL_CAPIT -> single (c "ALL_CAPIT") _ -> Var n ppP p = diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index 4aefd3b5f..f0c256775 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -385,14 +385,18 @@ convertTerm opts sel ctype (Alts s alts)= do CStr s <- convertTerm opts CNil cty _ -> fail (render ("not valid pattern in pre expression" <+> ppPatt Unqualified 0 p)) convertTerm opts sel ctype (Q (m,f)) - | m == cPredef && - f == cNonExist = return (CStr [SymNE]) | m == cPredef && f == cBIND = return (CStr [SymBIND]) | m == cPredef && f == cSOFT_BIND = return (CStr [SymSOFT_BIND]) + | m == cPredef && + f == cSOFT_SPACE = return (CStr [SymSOFT_SPACE]) | m == cPredef && f == cCAPIT = return (CStr [SymCAPIT]) + | m == cPredef && + f == cALL_CAPIT = return (CStr [SymALL_CAPIT]) + | m == cPredef && + f == cNonExist = return (CStr [SymNE]) {- convertTerm opts sel@(CProj l _) ctype (ExtR t1 t2@(R rs2)) | l `elem` map fst rs2 = convertTerm opts sel ctype t2 diff --git a/src/compiler/GF/Compile/PGFtoJS.hs b/src/compiler/GF/Compile/PGFtoJS.hs index 6c391f717..1d53cbc3b 100644 --- a/src/compiler/GF/Compile/PGFtoJS.hs +++ b/src/compiler/GF/Compile/PGFtoJS.hs @@ -86,10 +86,12 @@ sym2js (SymLit n l) = new "SymLit" [JS.EInt n, JS.EInt l] sym2js (SymVar n l) = new "SymVar" [JS.EInt n, JS.EInt l] sym2js (SymKS t) = new "SymKS" [JS.EStr t] sym2js (SymKP ts alts) = new "SymKP" [JS.EArray (map sym2js ts), JS.EArray (map alt2js alts)] -sym2js SymNE = new "SymNE" [] sym2js SymBIND = new "SymKS" [JS.EStr "&+"] sym2js SymSOFT_BIND = new "SymKS" [JS.EStr "&+"] +sym2js SymSOFT_SPACE = new "SymKS" [JS.EStr "&+"] sym2js SymCAPIT = new "SymKS" [JS.EStr "&|"] +sym2js SymALL_CAPIT = new "SymKS" [JS.EStr "&|"] +sym2js SymNE = new "SymNE" [] alt2js (ps,ts) = new "Alt" [JS.EArray (map sym2js ps), JS.EArray (map JS.EStr ts)] diff --git a/src/compiler/GF/Compile/PGFtoPython.hs b/src/compiler/GF/Compile/PGFtoPython.hs index 10ff5a7cc..379a71598 100644 --- a/src/compiler/GF/Compile/PGFtoPython.hs +++ b/src/compiler/GF/Compile/PGFtoPython.hs @@ -80,7 +80,9 @@ pySymbol (SymKP ts alts) = pyDict 0 pyStr id [("pre", pyList 0 pySymbol ts), ("a where alt2py (ps,ts) = pyTuple 0 (pyList 0 pyStr) [map pySymbol ps, ts] pySymbol SymBIND = pyStr "&+" pySymbol SymSOFT_BIND = pyStr "&+" +pySymbol SymSOFT_SPACE = pyStr "&+" pySymbol SymCAPIT = pyStr "&|" +pySymbol SymALL_CAPIT = pyStr "&|" pySymbol SymNE = pyDict 0 pyStr id [("nonExist", pyTuple 0 id [])] ---------------------------------------------------------------------- diff --git a/src/compiler/GF/Compile/TypeCheck/Primitives.hs b/src/compiler/GF/Compile/TypeCheck/Primitives.hs index a318e74b0..d82cd1568 100644 --- a/src/compiler/GF/Compile/TypeCheck/Primitives.hs +++ b/src/compiler/GF/Compile/TypeCheck/Primitives.hs @@ -53,8 +53,12 @@ primitives = Map.fromList [] typeStr []))) Nothing) , (cSOFT_BIND, ResOper (Just (noLoc (mkProd -- Str [] typeStr []))) Nothing) + , (cSOFT_SPACE,ResOper (Just (noLoc (mkProd -- Str + [] typeStr []))) Nothing) , (cCAPIT , ResOper (Just (noLoc (mkProd -- Str [] typeStr []))) Nothing) + , (cALL_CAPIT, ResOper (Just (noLoc (mkProd -- Str + [] typeStr []))) Nothing) ] where fun from to = oper (mkFunType from to) diff --git a/src/compiler/GF/Grammar/Predef.hs b/src/compiler/GF/Grammar/Predef.hs index eec53788d..e330f583c 100644 --- a/src/compiler/GF/Grammar/Predef.hs +++ b/src/compiler/GF/Grammar/Predef.hs @@ -32,7 +32,9 @@ cUndefinedType = identS "UndefinedType" cNonExist = identS "nonExist" cBIND = identS "BIND" cSOFT_BIND = identS "SOFT_BIND" +cSOFT_SPACE = identS "SOFT_SPACE" cCAPIT = identS "CAPIT" +cALL_CAPIT = identS "ALL_CAPIT" isPredefCat :: Ident -> Bool isPredefCat c = elem c [cInt,cString,cFloat] diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs index e5090fa0f..49c679aea 100644 --- a/src/compiler/GF/Speech/PGFToCFG.hs +++ b/src/compiler/GF/Speech/PGFToCFG.hs @@ -97,7 +97,9 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co ---- AR 3/6/2010 symbolToCFSymbol SymBIND = [Terminal "&+"] symbolToCFSymbol SymSOFT_BIND = [] + symbolToCFSymbol SymSOFT_SPACE = [] symbolToCFSymbol SymCAPIT = [Terminal "&|"] + symbolToCFSymbol SymALL_CAPIT = [Terminal "&|"] symbolToCFSymbol SymNE = [] fixProfile :: Array DotPos Symbol -> Int -> Profile diff --git a/src/runtime/c/pgf/aligner.c b/src/runtime/c/pgf/aligner.c index a3eb4e2c0..b23b3d2e9 100644 --- a/src/runtime/c/pgf/aligner.c +++ b/src/runtime/c/pgf/aligner.c @@ -13,7 +13,7 @@ typedef struct { size_t n_matches; GuExn* err; bool bind; - bool capit; + PgfCapitState capit; GuPool* out_pool; GuPool* tmp_pool; } PgfAlignerLin; @@ -107,18 +107,38 @@ pgf_aligner_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) gu_buf_flush(alin->parent_current); pgf_aligner_push_parent(alin, fid); + + if (alin->capit == PGF_CAPIT_NEXT) + alin->capit = PGF_CAPIT_NONE; } GuOut* out = gu_string_buf_out(alin->sbuf); - if (alin->capit) { + switch (alin->capit) { + case PGF_CAPIT_NONE: + gu_string_write(tok, out, alin->err); + break; + case PGF_CAPIT_FIRST: { GuUCS c = gu_utf8_decode((const uint8_t**) &tok); c = gu_ucs_to_upper(c); gu_out_utf8(c, out, alin->err); - alin->capit = false; + gu_string_write(tok, out, alin->err); + alin->capit = PGF_CAPIT_NONE; + break; + } + case PGF_CAPIT_ALL: + alin->capit = PGF_CAPIT_NEXT; + // continue + case PGF_CAPIT_NEXT: { + const uint8_t* p = (uint8_t*) tok; + while (*p) { + GuUCS c = gu_utf8_decode(&p); + c = gu_ucs_to_upper(c); + gu_out_utf8(c, out, alin->err); + } + break; + } } - - gu_string_write(tok, out, alin->err); } static void @@ -150,10 +170,10 @@ pgf_aligner_lzn_symbol_bind(PgfLinFuncs** funcs) } static void -pgf_aligner_lzn_symbol_capit(PgfLinFuncs** funcs) +pgf_aligner_lzn_symbol_capit(PgfLinFuncs** funcs, PgfCapitState capit) { PgfAlignerLin* alin = gu_container(funcs, PgfAlignerLin, funcs); - alin->capit = true; + alin->capit = capit; } static PgfLinFuncs pgf_file_lin_funcs = { @@ -194,7 +214,7 @@ pgf_align_words(PgfConcr* concr, PgfExpr expr, .n_matches = 0, .err = err, .bind = true, - .capit = false, + .capit = PGF_CAPIT_NONE, .out_pool = pool, .tmp_pool = tmp_pool }; diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index 5bdf1d38f..a8d37add7 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -209,7 +209,9 @@ typedef enum { PGF_SYMBOL_KP, PGF_SYMBOL_BIND, PGF_SYMBOL_SOFT_BIND, + PGF_SYMBOL_SOFT_SPACE, PGF_SYMBOL_CAPIT, + PGF_SYMBOL_ALL_CAPIT, PGF_SYMBOL_NE } PgfSymbolTag; diff --git a/src/runtime/c/pgf/linearizer.c b/src/runtime/c/pgf/linearizer.c index 409d60a2c..e7031950d 100644 --- a/src/runtime/c/pgf/linearizer.c +++ b/src/runtime/c/pgf/linearizer.c @@ -652,6 +652,7 @@ typedef enum { PGF_CACHED_END, PGF_CACHED_BIND, PGF_CACHED_CAPIT, + PGF_CACHED_ALL_CAPIT, PGF_CACHED_NE } PgfLzrCachedTag; @@ -718,7 +719,12 @@ pgf_lzr_cache_flush(PgfLzrCache* cache, PgfSymbols* form) break; case PGF_CACHED_CAPIT: if ((*cache->lzr->funcs)->symbol_capit) { - (*cache->lzr->funcs)->symbol_capit(cache->lzr->funcs); + (*cache->lzr->funcs)->symbol_capit(cache->lzr->funcs, PGF_CAPIT_FIRST); + } + break; + case PGF_CACHED_ALL_CAPIT: + if ((*cache->lzr->funcs)->symbol_capit) { + (*cache->lzr->funcs)->symbol_capit(cache->lzr->funcs, PGF_CAPIT_ALL); } break; case PGF_CACHED_NE: @@ -797,11 +803,11 @@ pgf_lzr_cache_symbol_bind(PgfLinFuncs** funcs) } static void -pgf_lzr_cache_symbol_capit(PgfLinFuncs** funcs) +pgf_lzr_cache_symbol_capit(PgfLinFuncs** funcs, PgfCapitState capit) { PgfLzrCache* cache = gu_container(funcs, PgfLzrCache, funcs); PgfLzrCached* event = gu_buf_extend(cache->events); - event->tag = PGF_CACHED_CAPIT; + event->tag = (capit == PGF_CAPIT_ALL) ? PGF_CACHED_ALL_CAPIT : PGF_CACHED_CAPIT; } static PgfLinFuncs pgf_lzr_cache_funcs = { @@ -921,9 +927,18 @@ pgf_lzr_linearize_symbols(PgfLzr* lzr, PgfCncTreeApp* fapp, } break; } + case PGF_SYMBOL_SOFT_SPACE: { + // SOFT_SPACE should be just ignored in linearization + break; + } case PGF_SYMBOL_CAPIT: if ((*lzr->funcs)->symbol_capit) { - (*lzr->funcs)->symbol_capit(lzr->funcs); + (*lzr->funcs)->symbol_capit(lzr->funcs, PGF_CAPIT_FIRST); + } + break; + case PGF_SYMBOL_ALL_CAPIT: + if ((*lzr->funcs)->symbol_capit) { + (*lzr->funcs)->symbol_capit(lzr->funcs, PGF_CAPIT_ALL); } break; default: @@ -1045,20 +1060,11 @@ typedef struct PgfSimpleLin PgfSimpleLin; struct PgfSimpleLin { PgfLinFuncs* funcs; bool bind; - bool capit; + PgfCapitState capit; GuOut* out; GuExn* err; }; -static void -pgf_file_lzn_put_space(PgfSimpleLin* flin) -{ - if (flin->bind) - flin->bind = false; - else - gu_putc(' ', flin->out, flin->err); -} - static void pgf_file_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) { @@ -1067,16 +1073,39 @@ pgf_file_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) return; } - pgf_file_lzn_put_space(flin); + if (flin->bind) + flin->bind = false; + else { + gu_putc(' ', flin->out, flin->err); + if (flin->capit == PGF_CAPIT_NEXT) + flin->capit = PGF_CAPIT_NONE; + } - if (flin->capit) { + switch (flin->capit) { + case PGF_CAPIT_NONE: + gu_string_write(tok, flin->out, flin->err); + break; + case PGF_CAPIT_FIRST: { GuUCS c = gu_utf8_decode((const uint8_t**) &tok); c = gu_ucs_to_upper(c); gu_out_utf8(c, flin->out, flin->err); - flin->capit = false; + gu_string_write(tok, flin->out, flin->err); + flin->capit = PGF_CAPIT_NONE; + break; + } + case PGF_CAPIT_ALL: + flin->capit = PGF_CAPIT_NEXT; + // continue + case PGF_CAPIT_NEXT: { + const uint8_t* p = (uint8_t*) tok; + while (*p) { + GuUCS c = gu_utf8_decode(&p); + c = gu_ucs_to_upper(c); + gu_out_utf8(c, flin->out, flin->err); + } + break; + } } - - gu_string_write(tok, flin->out, flin->err); } static void @@ -1094,10 +1123,10 @@ pgf_file_lzn_symbol_bind(PgfLinFuncs** funcs) } static void -pgf_file_lzn_symbol_capit(PgfLinFuncs** funcs) +pgf_file_lzn_symbol_capit(PgfLinFuncs** funcs, PgfCapitState capit) { PgfSimpleLin* flin = gu_container(funcs, PgfSimpleLin, funcs); - flin->capit = true; + flin->capit = capit; } static PgfLinFuncs pgf_file_lin_funcs = { @@ -1117,7 +1146,7 @@ pgf_lzr_linearize_simple(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfSimpleLin flin = { .funcs = &pgf_file_lin_funcs, .bind = true, - .capit = false, + .capit = PGF_CAPIT_NONE, .out = out, .err = err }; diff --git a/src/runtime/c/pgf/linearizer.h b/src/runtime/c/pgf/linearizer.h index 6cd2e1782..8178facdb 100644 --- a/src/runtime/c/pgf/linearizer.h +++ b/src/runtime/c/pgf/linearizer.h @@ -38,6 +38,13 @@ pgf_lzr_wrap_linref(PgfCncTree ctree, GuPool* pool); typedef struct PgfLinFuncs PgfLinFuncs; +typedef enum { + PGF_CAPIT_NONE, + PGF_CAPIT_FIRST, + PGF_CAPIT_ALL, + PGF_CAPIT_NEXT +} PgfCapitState; + struct PgfLinFuncs { /// Output tokens @@ -56,7 +63,7 @@ struct PgfLinFuncs void (*symbol_bind)(PgfLinFuncs** self); /// capitalization - void (*symbol_capit)(PgfLinFuncs** self); + void (*symbol_capit)(PgfLinFuncs** self, PgfCapitState capit); }; /// Linearize a concrete syntax tree. diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 3848ad174..d03d8a275 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -134,8 +134,10 @@ pgf_prev_extern_sym(PgfSymbol sym) return *((PgfSymbol*) (((PgfSymbolVar*) i.data)+1)); case PGF_SYMBOL_BIND: case PGF_SYMBOL_SOFT_BIND: + case PGF_SYMBOL_SOFT_SPACE: return *((PgfSymbol*) (((PgfSymbolBIND*) i.data)+1)); case PGF_SYMBOL_CAPIT: + case PGF_SYMBOL_ALL_CAPIT: return *((PgfSymbol*) (((PgfSymbolCAPIT*) i.data)+1)); case PGF_SYMBOL_NE: return *((PgfSymbol*) (((PgfSymbolNE*) i.data)+1)); @@ -768,7 +770,6 @@ pgf_item_update_arg(PgfItem* item, size_t d, PgfCCat *new_ccat, static void pgf_item_advance(PgfItem* item, GuPool* pool) { - if (GU_LIKELY(item->alt == 0)) { item->sym_idx++; pgf_item_set_curr_symbol(item, pool); @@ -1063,7 +1064,11 @@ pgf_symbols_cmp(GuString* psent, BIND_TYPE* pbind, PgfSymbols* syms) *pbind = BIND_SOFT; break; } - case PGF_SYMBOL_CAPIT: { + case PGF_SYMBOL_SOFT_SPACE: { + break; + } + case PGF_SYMBOL_CAPIT: + case PGF_SYMBOL_ALL_CAPIT: { break; } case PGF_SYMBOL_NE: { @@ -1541,7 +1546,8 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym) } break; } - case PGF_SYMBOL_SOFT_BIND: { + case PGF_SYMBOL_SOFT_BIND: + case PGF_SYMBOL_SOFT_SPACE: { if (ps->before->start_offset == ps->before->end_offset) { if (ps->before->needs_bind) { PgfParseState* state = @@ -1562,7 +1568,8 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym) } break; } - case PGF_SYMBOL_CAPIT: { + case PGF_SYMBOL_CAPIT: + case PGF_SYMBOL_ALL_CAPIT: { pgf_item_advance(item, ps->pool); pgf_parsing_symbol(ps, item, item->curr_sym); break; diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c index e3e014c4b..6b82986f3 100644 --- a/src/runtime/c/pgf/printer.c +++ b/src/runtime/c/pgf/printer.c @@ -276,10 +276,18 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err) gu_puts("SOFT_BIND", out, err); break; } + case PGF_SYMBOL_SOFT_SPACE: { + gu_puts("SOFT_SPACE", out, err); + break; + } case PGF_SYMBOL_CAPIT: { gu_puts("CAPIT", out, err); break; } + case PGF_SYMBOL_ALL_CAPIT: { + gu_puts("ALL_CAPIT", out, err); + break; + } default: gu_impossible(); } diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index 1c250f559..606884332 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -708,6 +708,13 @@ pgf_read_symbol(PgfReader* rdr) gu_return_on_exn(rdr->err, gu_null_variant); break; } + case PGF_SYMBOL_SOFT_SPACE: { + gu_new_variant(PGF_SYMBOL_SOFT_SPACE, + PgfSymbolBIND, + &sym, rdr->opool); + gu_return_on_exn(rdr->err, gu_null_variant); + break; + } case PGF_SYMBOL_CAPIT: { gu_new_variant(PGF_SYMBOL_CAPIT, PgfSymbolCAPIT, @@ -715,6 +722,13 @@ pgf_read_symbol(PgfReader* rdr) gu_return_on_exn(rdr->err, gu_null_variant); break; } + case PGF_SYMBOL_ALL_CAPIT: { + gu_new_variant(PGF_SYMBOL_ALL_CAPIT, + PgfSymbolCAPIT, + &sym, rdr->opool); + gu_return_on_exn(rdr->err, gu_null_variant); + break; + } default: pgf_read_tag_error(rdr); } diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs index b2695acbb..e1054447e 100644 --- a/src/runtime/haskell/PGF/Binary.hs +++ b/src/runtime/haskell/PGF/Binary.hs @@ -213,8 +213,10 @@ instance Binary Symbol where put (SymKP d vs) = putWord8 4 >> put (d,vs) put SymBIND = putWord8 5 put SymSOFT_BIND = putWord8 6 - put SymCAPIT = putWord8 7 - put SymNE = putWord8 8 + put SymSOFT_SPACE = putWord8 7 + put SymCAPIT = putWord8 8 + put SymALL_CAPIT = putWord8 9 + put SymNE = putWord8 10 get = do tag <- getWord8 case tag of 0 -> liftM2 SymCat get get @@ -224,8 +226,10 @@ instance Binary Symbol where 4 -> liftM2 (\d vs -> SymKP d vs) get get 5 -> return SymBIND 6 -> return SymSOFT_BIND - 7 -> return SymCAPIT - 8 -> return SymNE + 7 -> return SymSOFT_SPACE + 8 -> return SymCAPIT + 9 -> return SymALL_CAPIT + 10-> return SymNE _ -> decodingError instance Binary PArg where diff --git a/src/runtime/haskell/PGF/Data.hs b/src/runtime/haskell/PGF/Data.hs index 7d4750b13..a3d7c1f93 100644 --- a/src/runtime/haskell/PGF/Data.hs +++ b/src/runtime/haskell/PGF/Data.hs @@ -62,7 +62,9 @@ data Symbol | SymKP [Symbol] [([Symbol],[String])] | SymBIND -- the special BIND token | SymSOFT_BIND -- the special SOFT_BIND token + | SymSOFT_SPACE -- the special SOFT_SPACE token | SymCAPIT -- the special CAPIT token + | SymALL_CAPIT -- the special ALL_CAPIT token | SymNE -- non exist (this should be last constructor to simplify the binary search in the runtime) deriving (Eq,Ord,Show) data Production diff --git a/src/runtime/haskell/PGF/Haskell.hs b/src/runtime/haskell/PGF/Haskell.hs index 81e8cffaa..38c082be4 100644 --- a/src/runtime/haskell/PGF/Haskell.hs +++ b/src/runtime/haskell/PGF/Haskell.hs @@ -21,14 +21,14 @@ table vs = let m = M.fromList (zip enumAll vs) in (M.!) m type Str = [Tok] -- token sequence -- | Tokens -data Tok = TK String | TP [([Prefix],Str)] Str | BIND | SOFT_BIND | CAPIT +data Tok = TK String | TP [([Prefix],Str)] Str | BIND | SOFT_BIND | SOFT_SPACE | CAPIT | ALL_CAPIT deriving (Eq,Ord,Show) type Prefix = String -- ^ To be matched with the prefix of a following token -- | Render a token sequence as a 'String' fromStr :: Str -> String -fromStr = from False False +fromStr = from False id where from space cap ts = case ts of @@ -36,16 +36,19 @@ fromStr = from False False TK s:ts -> put s++from True cap ts BIND:ts -> from False cap ts SOFT_BIND:ts -> from False cap ts - CAPIT:ts -> from space True ts + SOFT_SPACE:ts -> from True cap ts + CAPIT:ts -> from space toUpper1 ts + ALL_CAPIT:ts -> from space toUpperAll ts TP alts def:ts -> from space cap (pick alts def r++[TK r]) -- hmm where r = fromStr ts where - put s = [' '|space]++up s - up = if cap then toUpper1 else id + put s = [' '|space]++cap s toUpper1 (c:s) = toUpper c:s toUpper1 s = s + toUpperAll = map toUpper + pick alts def r = head ([str|(ps,str)<-alts,any (`isPrefixOf` r) ps]++[def]) -- *** Common record types diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs index 8ca2a4f29..42d16683a 100644 --- a/src/runtime/haskell/PGF/Macros.hs +++ b/src/runtime/haskell/PGF/Macros.hs @@ -220,7 +220,9 @@ computeSeq filter seq args = concatMap compute seq compute SymNE = [LeafNE] compute SymBIND = [LeafKS "&+"] compute SymSOFT_BIND = [] + compute SymSOFT_SPACE = [] compute SymCAPIT = [LeafKS "&|"] + compute SymALL_CAPIT = [LeafKS "&|"] compute (SymKP syms alts) = [LeafKP (concatMap compute syms) [(concatMap compute syms,cs) | (syms,cs) <- alts]] getArg d r diff --git a/src/runtime/haskell/PGF/Optimize.hs b/src/runtime/haskell/PGF/Optimize.hs index dd876baaf..45b4311a5 100644 --- a/src/runtime/haskell/PGF/Optimize.hs +++ b/src/runtime/haskell/PGF/Optimize.hs @@ -240,7 +240,9 @@ splitLexicalRules cnc p_prods = seq2prefix (SymNE :syms) = TrieMap.empty seq2prefix (SymBIND :syms) = TrieMap.fromList [wf ["&+"]] seq2prefix (SymSOFT_BIND :syms) = TrieMap.fromList [wf []] + seq2prefix (SymSOFT_SPACE :syms) = TrieMap.fromList [wf []] seq2prefix (SymCAPIT :syms) = TrieMap.fromList [wf ["&|"]] + seq2prefix (SymALL_CAPIT :syms) = TrieMap.fromList [wf ["&|"]] updateConcrete abs cnc = let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc) diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs index c62522c1e..2cfd91ca5 100644 --- a/src/runtime/haskell/PGF/Parse.hs +++ b/src/runtime/haskell/PGF/Parse.hs @@ -311,13 +311,18 @@ process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc cha SymBIND -> let !acc' = ftok_ ["&+"] (Active j (ppos+1) funid seqid args key0) acc in process flit ftok cnc items acc' chart SymSOFT_BIND->process flit ftok cnc ((Active j (ppos+1) funid seqid args key0):items) acc chart + SymSOFT_SPACE->process flit ftok cnc ((Active j (ppos+1) funid seqid args key0):items) acc chart SymCAPIT -> let !acc' = ftok_ ["&|"] (Active j (ppos+1) funid seqid args key0) acc in process flit ftok cnc items acc' chart + SymALL_CAPIT->let !acc' = ftok_ ["&|"] (Active j (ppos+1) funid seqid args key0) acc + in process flit ftok cnc items acc' chart SymKP syms vars -> let to_tok (SymKS t) = [t] to_tok SymBIND = ["&+"] to_tok SymSOFT_BIND = [] + to_tok SymSOFT_SPACE= [] to_tok SymCAPIT = ["&|"] + to_tok SymALL_CAPIT = ["&|"] to_tok _ = [] !acc' = foldl (\acc syms -> ftok_ (concatMap to_tok syms) (Active j (ppos+1) funid seqid args key0) acc) acc diff --git a/src/runtime/haskell/PGF/Printer.hs b/src/runtime/haskell/PGF/Printer.hs index 5022cbb82..fbe9db596 100644 --- a/src/runtime/haskell/PGF/Printer.hs +++ b/src/runtime/haskell/PGF/Printer.hs @@ -95,7 +95,9 @@ ppSymbol (SymKS t) = doubleQuotes (text t) ppSymbol SymNE = text "nonExist" ppSymbol SymBIND = text "BIND" ppSymbol SymSOFT_BIND = text "SOFT_BIND" +ppSymbol SymSOFT_SPACE= text "SOFT_SPACE" ppSymbol SymCAPIT = text "CAPIT" +ppSymbol SymALL_CAPIT = text "ALL_CAPIT" ppSymbol (SymKP syms alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppSymbol syms) : map ppAlt alts))) ppAlt (syms,ps) = hsep (map ppSymbol syms) <+> char '/' <+> hsep (map (doubleQuotes . text) ps)