mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
added Predef.SOFT_BIND. This special token allows zero or more spaces between ordinary tokens. It is also used in the English RGL to attach the commas to the previous word.
This commit is contained in:
@@ -22,7 +22,7 @@ concrete NounEng of Noun = CatEng ** open MorphoEng, ResEng, Prelude in {
|
||||
} ;
|
||||
|
||||
RelNP np rs = {
|
||||
s = \\c => np.s ! c ++ "," ++ rs.s ! np.a ++ finalComma ;
|
||||
s = \\c => np.s ! c ++ frontComma ++ rs.s ! np.a ++ finalComma ;
|
||||
a = np.a
|
||||
} ;
|
||||
|
||||
@@ -32,7 +32,7 @@ concrete NounEng of Noun = CatEng ** open MorphoEng, ResEng, Prelude in {
|
||||
} ;
|
||||
|
||||
ExtAdvNP np adv = {
|
||||
s = \\c => np.s ! c ++ "," ++ adv.s ++ finalComma;
|
||||
s = \\c => np.s ! c ++ frontComma ++ adv.s ++ finalComma;
|
||||
a = np.a
|
||||
} ;
|
||||
|
||||
|
||||
@@ -69,7 +69,7 @@ lin pot3plus n m = {
|
||||
|
||||
oper
|
||||
commaIf : DTail -> Str = \t -> case t of {
|
||||
T3 => "," ;
|
||||
T3 => frontComma ;
|
||||
_ => []
|
||||
} ;
|
||||
|
||||
|
||||
@@ -90,8 +90,8 @@ lin
|
||||
compl = vp.s2 ! np.a
|
||||
in
|
||||
case o of {
|
||||
ODir => compl ++ "," ++ np.s ! npNom ++ verb.aux ++ vp.ad ++ verb.fin ++ verb.adv ++ verb.inf ;
|
||||
OQuest => verb.aux ++ compl ++ "," ++ np.s ! npNom ++ verb.adv ++ vp.ad ++ verb.fin ++ verb.inf
|
||||
ODir => compl ++ frontComma ++ np.s ! npNom ++ verb.aux ++ vp.ad ++ verb.fin ++ verb.adv ++ verb.inf ;
|
||||
OQuest => verb.aux ++ compl ++ frontComma ++ np.s ! npNom ++ verb.adv ++ vp.ad ++ verb.fin ++ verb.inf
|
||||
}
|
||||
} ;
|
||||
|
||||
@@ -102,7 +102,7 @@ lin
|
||||
compl = vp.s2 ! np.a
|
||||
in
|
||||
case o of {
|
||||
ODir => compl ++ "," ++ verb.aux ++ verb.adv ++ vp.ad ++ verb.fin ++ verb.inf ++ np.s ! npNom ;
|
||||
ODir => compl ++ frontComma ++ verb.aux ++ verb.adv ++ vp.ad ++ verb.fin ++ verb.inf ++ np.s ! npNom ;
|
||||
OQuest => verb.aux ++ compl ++ verb.adv ++ vp.ad ++ verb.fin ++ verb.inf ++ np.s ! npNom
|
||||
}
|
||||
} ;
|
||||
@@ -137,7 +137,7 @@ lin
|
||||
} ;
|
||||
|
||||
ApposNP np1 np2 = {
|
||||
s = \\c => np1.s ! c ++ "," ++ np2.s ! npNom ++ finalComma ;
|
||||
s = \\c => np1.s ! c ++ frontComma ++ np2.s ! npNom ++ finalComma ;
|
||||
a = np1.a
|
||||
} ;
|
||||
|
||||
|
||||
@@ -23,6 +23,6 @@ concrete PhraseEng of Phrase = CatEng ** open Prelude, ResEng in {
|
||||
PConjConj conj = {s = conj.s2} ; ---
|
||||
|
||||
NoVoc = {s = []} ;
|
||||
VocNP np = {s = "," ++ np.s ! npNom} ;
|
||||
VocNP np = {s = frontComma ++ np.s ! npNom} ;
|
||||
|
||||
}
|
||||
|
||||
@@ -543,6 +543,7 @@ resource ResEng = ParamX ** open Prelude in {
|
||||
}
|
||||
} ;
|
||||
|
||||
finalComma : Str = pre {"," | "." => []; "" => ","; _ => []} ;
|
||||
finalComma : Str = pre {"," | "." => []; "" => SOFT_BIND ++ ","; _ => []} ;
|
||||
frontComma : Str = SOFT_BIND ++ "," ;
|
||||
|
||||
}
|
||||
|
||||
@@ -57,11 +57,11 @@ concrete SentenceEng of Sentence = CatEng ** open Prelude, ResEng in {
|
||||
} ;
|
||||
|
||||
AdvS a s = {s = a.s ++ s.s} ;
|
||||
ExtAdvS a s = {s = a.s ++ "," ++ s.s} ;
|
||||
ExtAdvS a s = {s = a.s ++ frontComma ++ s.s} ;
|
||||
|
||||
SSubjS a s b = {s = a.s ++ "," ++ s.s ++ b.s} ;
|
||||
SSubjS a s b = {s = a.s ++ frontComma ++ s.s ++ b.s} ;
|
||||
|
||||
RelS s r = {s = s.s ++ "," ++ r.s ! agrP3 Sg} ;
|
||||
RelS s r = {s = s.s ++ frontComma ++ r.s ! agrP3 Sg} ;
|
||||
|
||||
oper
|
||||
ctr : CPolarity -> CPolarity = \x -> x ;
|
||||
|
||||
@@ -33,7 +33,7 @@ lin
|
||||
MkSymb s = s ;
|
||||
|
||||
BaseSymb = infixSS "and" ;
|
||||
ConsSymb = infixSS "," ;
|
||||
ConsSymb = infixSS frontComma ;
|
||||
|
||||
oper
|
||||
-- Note: this results in a space before 's, but there's
|
||||
|
||||
@@ -39,7 +39,7 @@ concrete VerbEng of Verb = CatEng ** open ResEng, Prelude in {
|
||||
UseComp comp = insertObj comp.s (predAux auxBe) ;
|
||||
|
||||
AdvVP vp adv = insertObj (\\_ => adv.s) vp ;
|
||||
ExtAdvVP vp adv = insertObj (\\_ => "," ++ adv.s ++ finalComma) vp ;
|
||||
ExtAdvVP vp adv = insertObj (\\_ => frontComma ++ adv.s ++ finalComma) vp ;
|
||||
AdVVP adv vp = insertAdV adv.s vp ;
|
||||
|
||||
AdvVPSlash vp adv = insertObj (\\_ => adv.s) vp ** {c2 = vp.c2 ; gapInMiddle = vp.gapInMiddle} ;
|
||||
|
||||
@@ -38,5 +38,6 @@ resource Predef = {
|
||||
-- map all strings in a data structure; experimental ---
|
||||
oper nonExist : Str = variants {} ; -- a placeholder for non-existant morphological forms
|
||||
oper BIND : Str = variants {} ; -- a token for gluing
|
||||
oper SOFT_BIND : Str = variants {} ; -- a token for soft gluing
|
||||
|
||||
} ;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
-- This file defines some prelude facilities usable in all grammars.
|
||||
|
||||
resource Prelude = open (Predef=Predef) in {
|
||||
resource Prelude = Predef[nonExist, BIND, SOFT_BIND] ** open (Predef=Predef) in {
|
||||
|
||||
oper
|
||||
|
||||
@@ -33,10 +33,6 @@ oper
|
||||
|
||||
--2 Optional elements
|
||||
|
||||
-- Missing form.
|
||||
|
||||
nonExist : Str = Predef.nonExist;
|
||||
|
||||
-- Optional string with preference on the string vs. empty.
|
||||
|
||||
optStr : Str -> Str = \s -> variants {s ; []} ;
|
||||
@@ -114,7 +110,6 @@ oper
|
||||
|
||||
-- These should be hidden, and never changed since they are hardcoded in (un)lexers
|
||||
|
||||
BIND : Str = Predef.BIND;
|
||||
PARA : Str = "&-" ;
|
||||
CAPIT : Str = "&|" ;
|
||||
|
||||
|
||||
@@ -86,6 +86,8 @@ primitives = Map.fromList
|
||||
[] typeStr []))) Nothing)
|
||||
, (cBIND , ResOper (Just (noLoc (mkProd -- Str
|
||||
[] typeStr []))) Nothing)
|
||||
, (cSOFT_BIND, ResOper (Just (noLoc (mkProd -- Str
|
||||
[] typeStr []))) Nothing)
|
||||
]
|
||||
where
|
||||
fun from to = oper (mkFunType from to)
|
||||
|
||||
@@ -78,7 +78,8 @@ predefList =
|
||||
(cError,Error),
|
||||
-- Canonical values:
|
||||
(cPBool,PBool),(cPFalse,PFalse),(cPTrue,PTrue),(cInt,Int),
|
||||
(cInts,Ints),(cNonExist,NonExist),(cBIND,BIND)]
|
||||
(cInts,Ints),(cNonExist,NonExist)
|
||||
,(cBIND,BIND),(cSOFT_BIND,SOFT_BIND)]
|
||||
--- add more functions!!!
|
||||
|
||||
delta f vs =
|
||||
@@ -107,6 +108,7 @@ delta f vs =
|
||||
PTrue -> canonical
|
||||
NonExist-> canonical
|
||||
BIND -> canonical
|
||||
SOFT_BIND->canonical
|
||||
where
|
||||
canonical = delay
|
||||
delay = return (VApp f vs) -- wrong number of arguments
|
||||
|
||||
@@ -51,5 +51,6 @@ data Predefined = Drop | Take | Tk | Dp | EqStr | Occur | Occurs | ToUpper
|
||||
{- | Show | Read | ToStr | MapStr | EqVal -}
|
||||
| Error
|
||||
-- Canonical values below:
|
||||
| PBool | PFalse | PTrue | Int | Ints | NonExist | BIND
|
||||
| PBool | PFalse | PTrue | Int | Ints | NonExist
|
||||
| BIND | SOFT_BIND
|
||||
deriving (Show,Eq,Ord,Ix,Bounded,Enum)
|
||||
|
||||
@@ -414,6 +414,8 @@ convertTerm opts sel ctype (Q (m,f))
|
||||
f == cNonExist = return (CStr [SymNE])
|
||||
| m == cPredef &&
|
||||
f == cBIND = return (CStr [SymBIND])
|
||||
| m == cPredef &&
|
||||
f == cSOFT_BIND = return (CStr [SymSOFT_BIND])
|
||||
|
||||
convertTerm opts sel@(CProj l _) ctype (ExtR t1 t2@(R rs2))
|
||||
| l `elem` map fst rs2 = convertTerm opts sel ctype t2
|
||||
|
||||
@@ -89,6 +89,7 @@ sym2js (SymKS t) = new "SymKS" [JS.EStr t]
|
||||
sym2js (SymKP ts alts) = new "SymKP" [JS.EArray (map sym2js ts), JS.EArray (map alt2js alts)]
|
||||
sym2js SymNE = new "SymNE" []
|
||||
sym2js SymBIND = new "SymKS" [JS.EStr "&+"]
|
||||
sym2js SymSOFT_BIND = new "SymKS" [JS.EStr "&+"]
|
||||
|
||||
alt2js (ps,ts) = new "Alt" [JS.EArray (map sym2js ps), JS.EArray (map JS.EStr ts)]
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ cOverload = identS "overload"
|
||||
cUndefinedType = identS "UndefinedType"
|
||||
cNonExist = identS "nonExist"
|
||||
cBIND = identS "BIND"
|
||||
cSOFT_BIND = identS "SOFT_BIND"
|
||||
|
||||
isPredefCat :: Ident -> Bool
|
||||
isPredefCat c = elem c [cInt,cString,cFloat]
|
||||
|
||||
@@ -167,6 +167,7 @@ typedef enum {
|
||||
PGF_SYMBOL_KS,
|
||||
PGF_SYMBOL_KP,
|
||||
PGF_SYMBOL_BIND,
|
||||
PGF_SYMBOL_SOFT_BIND,
|
||||
PGF_SYMBOL_NE
|
||||
} PgfSymbolTag;
|
||||
|
||||
|
||||
@@ -723,7 +723,8 @@ pgf_lzr_linearize_symbols(PgfConcr* concr, PgfCncTreeApp* fapp,
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_BIND: {
|
||||
case PGF_SYMBOL_BIND:
|
||||
case PGF_SYMBOL_SOFT_BIND: {
|
||||
if ((*fnsp)->symbol_bind) {
|
||||
(*fnsp)->symbol_bind(fnsp);
|
||||
}
|
||||
|
||||
@@ -145,6 +145,7 @@ pgf_prev_extern_sym(PgfSymbol sym)
|
||||
case PGF_SYMBOL_VAR:
|
||||
return *((PgfSymbol*) (((PgfSymbolVar*) i.data)+1));
|
||||
case PGF_SYMBOL_BIND:
|
||||
case PGF_SYMBOL_SOFT_BIND:
|
||||
return *((PgfSymbol*) (((PgfSymbolBIND*) i.data)+1));
|
||||
case PGF_SYMBOL_NE:
|
||||
return *((PgfSymbol*) (((PgfSymbolNE*) i.data)+1));
|
||||
@@ -1137,6 +1138,10 @@ pgf_symbols_cmp(GuString* psent, size_t sent_len, BIND_TYPE* pbind, PgfSymbols*
|
||||
*pbind = BIND_HARD;
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_SOFT_BIND: {
|
||||
*pbind = BIND_SOFT;
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_NE: {
|
||||
return -2;
|
||||
}
|
||||
@@ -1635,6 +1640,31 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_SOFT_BIND: {
|
||||
if (ps->before->start_offset == ps->before->end_offset) {
|
||||
if (ps->before->needs_bind) {
|
||||
PgfParseState* state =
|
||||
pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD);
|
||||
if (state != NULL) {
|
||||
if (state->next == NULL) {
|
||||
state->viterbi_prob =
|
||||
item->inside_prob+item->conts->outside_prob;
|
||||
}
|
||||
|
||||
pgf_item_advance(item, ps->pool);
|
||||
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
|
||||
} else {
|
||||
pgf_item_free(ps, item);
|
||||
}
|
||||
} else {
|
||||
pgf_item_free(ps, item);
|
||||
}
|
||||
} else {
|
||||
pgf_item_advance(item, ps->pool);
|
||||
gu_buf_heap_push(ps->before->agenda, pgf_item_prob_order, &item);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
|
||||
@@ -280,6 +280,10 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err)
|
||||
gu_puts("BIND", out, err);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_SOFT_BIND: {
|
||||
gu_puts("SOFT_BIND", out, err);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
|
||||
@@ -723,6 +723,13 @@ pgf_read_symbol(PgfReader* rdr)
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_SOFT_BIND: {
|
||||
gu_new_variant(PGF_SYMBOL_SOFT_BIND,
|
||||
PgfSymbolBIND,
|
||||
&sym, rdr->opool);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
pgf_read_tag_error(rdr);
|
||||
}
|
||||
|
||||
@@ -152,7 +152,8 @@ instance Binary Symbol where
|
||||
put (SymKS ts) = putWord8 3 >> put ts
|
||||
put (SymKP d vs) = putWord8 4 >> put (d,vs)
|
||||
put SymBIND = putWord8 5
|
||||
put SymNE = putWord8 6
|
||||
put SymSOFT_BIND = putWord8 6
|
||||
put SymNE = putWord8 7
|
||||
get = do tag <- getWord8
|
||||
case tag of
|
||||
0 -> liftM2 SymCat get get
|
||||
@@ -161,7 +162,8 @@ instance Binary Symbol where
|
||||
3 -> liftM SymKS get
|
||||
4 -> liftM2 (\d vs -> SymKP d vs) get get
|
||||
5 -> return SymBIND
|
||||
6 -> return SymNE
|
||||
6 -> return SymSOFT_BIND
|
||||
7 -> return SymNE
|
||||
_ -> decodingError
|
||||
|
||||
instance Binary PArg where
|
||||
|
||||
@@ -62,6 +62,7 @@ data Symbol
|
||||
| SymKS Token
|
||||
| SymKP [Symbol] [([Symbol],[String])]
|
||||
| SymBIND -- the special BIND token
|
||||
| SymSOFT_BIND -- the special SOFT_BIND token
|
||||
| SymNE -- non exist (this should be last constructor to simplify the binary search in the runtime)
|
||||
deriving (Eq,Ord,Show)
|
||||
data Production
|
||||
|
||||
@@ -160,6 +160,7 @@ data BracketedTokn
|
||||
| LeafKS Token
|
||||
| LeafNE
|
||||
| LeafBIND
|
||||
| LeafSOFT_BIND
|
||||
| LeafKP [BracketedTokn] [([BracketedTokn],[String])]
|
||||
deriving Eq
|
||||
|
||||
@@ -222,6 +223,7 @@ computeSeq filter seq args = concatMap compute seq
|
||||
compute (SymKS t) = [LeafKS t]
|
||||
compute SymNE = [LeafNE]
|
||||
compute SymBIND = [LeafKS "&+"]
|
||||
compute SymSOFT_BIND = []
|
||||
compute (SymKP syms alts) = [LeafKP (concatMap compute syms) [(concatMap compute syms,cs) | (syms,cs) <- alts]]
|
||||
|
||||
getArg d r
|
||||
|
||||
@@ -228,6 +228,7 @@ splitLexicalRules cnc p_prods =
|
||||
[seq2prefix (syms1 ++ syms) | (syms1,ps) <- alts])
|
||||
seq2prefix (SymNE :syms) = TrieMap.empty
|
||||
seq2prefix (SymBIND :syms) = TrieMap.fromList [wf ["&+"]]
|
||||
seq2prefix (SymSOFT_BIND :syms) = TrieMap.fromList [wf []]
|
||||
|
||||
updateConcrete abs cnc =
|
||||
let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc)
|
||||
|
||||
@@ -309,10 +309,12 @@ process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc cha
|
||||
SymNE -> process flit ftok cnc items acc chart
|
||||
SymBIND -> let !acc' = ftok_ ["&+"] (Active j (ppos+1) funid seqid args key0) acc
|
||||
in process flit ftok cnc items acc' chart
|
||||
SymSOFT_BIND->process flit ftok cnc ((Active j (ppos+1) funid seqid args key0):items) acc chart
|
||||
SymKP syms vars
|
||||
-> let to_tok (SymKS t) = [t]
|
||||
to_tok SymBIND = ["&+"]
|
||||
to_tok _ = []
|
||||
-> let to_tok (SymKS t) = [t]
|
||||
to_tok SymBIND = ["&+"]
|
||||
to_tok SymSOFT_BIND = []
|
||||
to_tok _ = []
|
||||
|
||||
!acc' = foldl (\acc syms -> ftok_ (concatMap to_tok syms) (Active j (ppos+1) funid seqid args key0) acc) acc
|
||||
(syms:[syms' | (syms',_) <- vars])
|
||||
|
||||
@@ -92,6 +92,7 @@ ppSymbol (SymVar d r) = char '<' <> int d <> comma <> char '$' <> int r <> char
|
||||
ppSymbol (SymKS t) = doubleQuotes (text t)
|
||||
ppSymbol SymNE = text "nonExist"
|
||||
ppSymbol SymBIND = text "BIND"
|
||||
ppSymbol SymSOFT_BIND = text "SOFT_BIND"
|
||||
ppSymbol (SymKP syms alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppSymbol syms) : map ppAlt alts)))
|
||||
|
||||
ppAlt (syms,ps) = hsep (map ppSymbol syms) <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
|
||||
|
||||
Reference in New Issue
Block a user