added Predef.SOFT_BIND. This special token allows zero or more spaces between ordinary tokens. It is also used in the English RGL to attach the commas to the previous word.

This commit is contained in:
kr.angelov
2013-11-12 09:54:57 +00:00
parent 2da15f558e
commit 1d7b6f44b6
27 changed files with 86 additions and 28 deletions

View File

@@ -22,7 +22,7 @@ concrete NounEng of Noun = CatEng ** open MorphoEng, ResEng, Prelude in {
} ; } ;
RelNP np rs = { RelNP np rs = {
s = \\c => np.s ! c ++ "," ++ rs.s ! np.a ++ finalComma ; s = \\c => np.s ! c ++ frontComma ++ rs.s ! np.a ++ finalComma ;
a = np.a a = np.a
} ; } ;
@@ -32,7 +32,7 @@ concrete NounEng of Noun = CatEng ** open MorphoEng, ResEng, Prelude in {
} ; } ;
ExtAdvNP np adv = { ExtAdvNP np adv = {
s = \\c => np.s ! c ++ "," ++ adv.s ++ finalComma; s = \\c => np.s ! c ++ frontComma ++ adv.s ++ finalComma;
a = np.a a = np.a
} ; } ;

View File

@@ -69,7 +69,7 @@ lin pot3plus n m = {
oper oper
commaIf : DTail -> Str = \t -> case t of { commaIf : DTail -> Str = \t -> case t of {
T3 => "," ; T3 => frontComma ;
_ => [] _ => []
} ; } ;

View File

@@ -90,8 +90,8 @@ lin
compl = vp.s2 ! np.a compl = vp.s2 ! np.a
in in
case o of { case o of {
ODir => compl ++ "," ++ np.s ! npNom ++ verb.aux ++ vp.ad ++ verb.fin ++ verb.adv ++ verb.inf ; ODir => compl ++ frontComma ++ np.s ! npNom ++ verb.aux ++ vp.ad ++ verb.fin ++ verb.adv ++ verb.inf ;
OQuest => verb.aux ++ compl ++ "," ++ np.s ! npNom ++ verb.adv ++ vp.ad ++ verb.fin ++ verb.inf OQuest => verb.aux ++ compl ++ frontComma ++ np.s ! npNom ++ verb.adv ++ vp.ad ++ verb.fin ++ verb.inf
} }
} ; } ;
@@ -102,7 +102,7 @@ lin
compl = vp.s2 ! np.a compl = vp.s2 ! np.a
in in
case o of { case o of {
ODir => compl ++ "," ++ verb.aux ++ verb.adv ++ vp.ad ++ verb.fin ++ verb.inf ++ np.s ! npNom ; ODir => compl ++ frontComma ++ verb.aux ++ verb.adv ++ vp.ad ++ verb.fin ++ verb.inf ++ np.s ! npNom ;
OQuest => verb.aux ++ compl ++ verb.adv ++ vp.ad ++ verb.fin ++ verb.inf ++ np.s ! npNom OQuest => verb.aux ++ compl ++ verb.adv ++ vp.ad ++ verb.fin ++ verb.inf ++ np.s ! npNom
} }
} ; } ;
@@ -137,7 +137,7 @@ lin
} ; } ;
ApposNP np1 np2 = { ApposNP np1 np2 = {
s = \\c => np1.s ! c ++ "," ++ np2.s ! npNom ++ finalComma ; s = \\c => np1.s ! c ++ frontComma ++ np2.s ! npNom ++ finalComma ;
a = np1.a a = np1.a
} ; } ;

View File

@@ -23,6 +23,6 @@ concrete PhraseEng of Phrase = CatEng ** open Prelude, ResEng in {
PConjConj conj = {s = conj.s2} ; --- PConjConj conj = {s = conj.s2} ; ---
NoVoc = {s = []} ; NoVoc = {s = []} ;
VocNP np = {s = "," ++ np.s ! npNom} ; VocNP np = {s = frontComma ++ np.s ! npNom} ;
} }

View File

@@ -543,6 +543,7 @@ resource ResEng = ParamX ** open Prelude in {
} }
} ; } ;
finalComma : Str = pre {"," | "." => []; "" => ","; _ => []} ; finalComma : Str = pre {"," | "." => []; "" => SOFT_BIND ++ ","; _ => []} ;
frontComma : Str = SOFT_BIND ++ "," ;
} }

View File

@@ -57,11 +57,11 @@ concrete SentenceEng of Sentence = CatEng ** open Prelude, ResEng in {
} ; } ;
AdvS a s = {s = a.s ++ s.s} ; AdvS a s = {s = a.s ++ s.s} ;
ExtAdvS a s = {s = a.s ++ "," ++ s.s} ; ExtAdvS a s = {s = a.s ++ frontComma ++ s.s} ;
SSubjS a s b = {s = a.s ++ "," ++ s.s ++ b.s} ; SSubjS a s b = {s = a.s ++ frontComma ++ s.s ++ b.s} ;
RelS s r = {s = s.s ++ "," ++ r.s ! agrP3 Sg} ; RelS s r = {s = s.s ++ frontComma ++ r.s ! agrP3 Sg} ;
oper oper
ctr : CPolarity -> CPolarity = \x -> x ; ctr : CPolarity -> CPolarity = \x -> x ;

View File

@@ -33,7 +33,7 @@ lin
MkSymb s = s ; MkSymb s = s ;
BaseSymb = infixSS "and" ; BaseSymb = infixSS "and" ;
ConsSymb = infixSS "," ; ConsSymb = infixSS frontComma ;
oper oper
-- Note: this results in a space before 's, but there's -- Note: this results in a space before 's, but there's

View File

@@ -39,7 +39,7 @@ concrete VerbEng of Verb = CatEng ** open ResEng, Prelude in {
UseComp comp = insertObj comp.s (predAux auxBe) ; UseComp comp = insertObj comp.s (predAux auxBe) ;
AdvVP vp adv = insertObj (\\_ => adv.s) vp ; AdvVP vp adv = insertObj (\\_ => adv.s) vp ;
ExtAdvVP vp adv = insertObj (\\_ => "," ++ adv.s ++ finalComma) vp ; ExtAdvVP vp adv = insertObj (\\_ => frontComma ++ adv.s ++ finalComma) vp ;
AdVVP adv vp = insertAdV adv.s vp ; AdVVP adv vp = insertAdV adv.s vp ;
AdvVPSlash vp adv = insertObj (\\_ => adv.s) vp ** {c2 = vp.c2 ; gapInMiddle = vp.gapInMiddle} ; AdvVPSlash vp adv = insertObj (\\_ => adv.s) vp ** {c2 = vp.c2 ; gapInMiddle = vp.gapInMiddle} ;

View File

@@ -38,5 +38,6 @@ resource Predef = {
-- map all strings in a data structure; experimental --- -- map all strings in a data structure; experimental ---
oper nonExist : Str = variants {} ; -- a placeholder for non-existant morphological forms oper nonExist : Str = variants {} ; -- a placeholder for non-existant morphological forms
oper BIND : Str = variants {} ; -- a token for gluing oper BIND : Str = variants {} ; -- a token for gluing
oper SOFT_BIND : Str = variants {} ; -- a token for soft gluing
} ; } ;

View File

@@ -2,7 +2,7 @@
-- This file defines some prelude facilities usable in all grammars. -- This file defines some prelude facilities usable in all grammars.
resource Prelude = open (Predef=Predef) in { resource Prelude = Predef[nonExist, BIND, SOFT_BIND] ** open (Predef=Predef) in {
oper oper
@@ -33,10 +33,6 @@ oper
--2 Optional elements --2 Optional elements
-- Missing form.
nonExist : Str = Predef.nonExist;
-- Optional string with preference on the string vs. empty. -- Optional string with preference on the string vs. empty.
optStr : Str -> Str = \s -> variants {s ; []} ; optStr : Str -> Str = \s -> variants {s ; []} ;
@@ -114,7 +110,6 @@ oper
-- These should be hidden, and never changed since they are hardcoded in (un)lexers -- These should be hidden, and never changed since they are hardcoded in (un)lexers
BIND : Str = Predef.BIND;
PARA : Str = "&-" ; PARA : Str = "&-" ;
CAPIT : Str = "&|" ; CAPIT : Str = "&|" ;

View File

@@ -86,6 +86,8 @@ primitives = Map.fromList
[] typeStr []))) Nothing) [] typeStr []))) Nothing)
, (cBIND , ResOper (Just (noLoc (mkProd -- Str , (cBIND , ResOper (Just (noLoc (mkProd -- Str
[] typeStr []))) Nothing) [] typeStr []))) Nothing)
, (cSOFT_BIND, ResOper (Just (noLoc (mkProd -- Str
[] typeStr []))) Nothing)
] ]
where where
fun from to = oper (mkFunType from to) fun from to = oper (mkFunType from to)

View File

@@ -78,7 +78,8 @@ predefList =
(cError,Error), (cError,Error),
-- Canonical values: -- Canonical values:
(cPBool,PBool),(cPFalse,PFalse),(cPTrue,PTrue),(cInt,Int), (cPBool,PBool),(cPFalse,PFalse),(cPTrue,PTrue),(cInt,Int),
(cInts,Ints),(cNonExist,NonExist),(cBIND,BIND)] (cInts,Ints),(cNonExist,NonExist)
,(cBIND,BIND),(cSOFT_BIND,SOFT_BIND)]
--- add more functions!!! --- add more functions!!!
delta f vs = delta f vs =
@@ -107,6 +108,7 @@ delta f vs =
PTrue -> canonical PTrue -> canonical
NonExist-> canonical NonExist-> canonical
BIND -> canonical BIND -> canonical
SOFT_BIND->canonical
where where
canonical = delay canonical = delay
delay = return (VApp f vs) -- wrong number of arguments delay = return (VApp f vs) -- wrong number of arguments

View File

@@ -51,5 +51,6 @@ data Predefined = Drop | Take | Tk | Dp | EqStr | Occur | Occurs | ToUpper
{- | Show | Read | ToStr | MapStr | EqVal -} {- | Show | Read | ToStr | MapStr | EqVal -}
| Error | Error
-- Canonical values below: -- Canonical values below:
| PBool | PFalse | PTrue | Int | Ints | NonExist | BIND | PBool | PFalse | PTrue | Int | Ints | NonExist
| BIND | SOFT_BIND
deriving (Show,Eq,Ord,Ix,Bounded,Enum) deriving (Show,Eq,Ord,Ix,Bounded,Enum)

View File

@@ -414,6 +414,8 @@ convertTerm opts sel ctype (Q (m,f))
f == cNonExist = return (CStr [SymNE]) f == cNonExist = return (CStr [SymNE])
| m == cPredef && | m == cPredef &&
f == cBIND = return (CStr [SymBIND]) f == cBIND = return (CStr [SymBIND])
| m == cPredef &&
f == cSOFT_BIND = return (CStr [SymSOFT_BIND])
convertTerm opts sel@(CProj l _) ctype (ExtR t1 t2@(R rs2)) convertTerm opts sel@(CProj l _) ctype (ExtR t1 t2@(R rs2))
| l `elem` map fst rs2 = convertTerm opts sel ctype t2 | l `elem` map fst rs2 = convertTerm opts sel ctype t2

View File

@@ -89,6 +89,7 @@ sym2js (SymKS t) = new "SymKS" [JS.EStr t]
sym2js (SymKP ts alts) = new "SymKP" [JS.EArray (map sym2js ts), JS.EArray (map alt2js alts)] sym2js (SymKP ts alts) = new "SymKP" [JS.EArray (map sym2js ts), JS.EArray (map alt2js alts)]
sym2js SymNE = new "SymNE" [] sym2js SymNE = new "SymNE" []
sym2js SymBIND = new "SymKS" [JS.EStr "&+"] sym2js SymBIND = new "SymKS" [JS.EStr "&+"]
sym2js SymSOFT_BIND = new "SymKS" [JS.EStr "&+"]
alt2js (ps,ts) = new "Alt" [JS.EArray (map sym2js ps), JS.EArray (map JS.EStr ts)] alt2js (ps,ts) = new "Alt" [JS.EArray (map sym2js ps), JS.EArray (map JS.EStr ts)]

View File

@@ -31,6 +31,7 @@ cOverload = identS "overload"
cUndefinedType = identS "UndefinedType" cUndefinedType = identS "UndefinedType"
cNonExist = identS "nonExist" cNonExist = identS "nonExist"
cBIND = identS "BIND" cBIND = identS "BIND"
cSOFT_BIND = identS "SOFT_BIND"
isPredefCat :: Ident -> Bool isPredefCat :: Ident -> Bool
isPredefCat c = elem c [cInt,cString,cFloat] isPredefCat c = elem c [cInt,cString,cFloat]

View File

@@ -167,6 +167,7 @@ typedef enum {
PGF_SYMBOL_KS, PGF_SYMBOL_KS,
PGF_SYMBOL_KP, PGF_SYMBOL_KP,
PGF_SYMBOL_BIND, PGF_SYMBOL_BIND,
PGF_SYMBOL_SOFT_BIND,
PGF_SYMBOL_NE PGF_SYMBOL_NE
} PgfSymbolTag; } PgfSymbolTag;

View File

@@ -723,7 +723,8 @@ pgf_lzr_linearize_symbols(PgfConcr* concr, PgfCncTreeApp* fapp,
} }
break; break;
} }
case PGF_SYMBOL_BIND: { case PGF_SYMBOL_BIND:
case PGF_SYMBOL_SOFT_BIND: {
if ((*fnsp)->symbol_bind) { if ((*fnsp)->symbol_bind) {
(*fnsp)->symbol_bind(fnsp); (*fnsp)->symbol_bind(fnsp);
} }

View File

@@ -145,6 +145,7 @@ pgf_prev_extern_sym(PgfSymbol sym)
case PGF_SYMBOL_VAR: case PGF_SYMBOL_VAR:
return *((PgfSymbol*) (((PgfSymbolVar*) i.data)+1)); return *((PgfSymbol*) (((PgfSymbolVar*) i.data)+1));
case PGF_SYMBOL_BIND: case PGF_SYMBOL_BIND:
case PGF_SYMBOL_SOFT_BIND:
return *((PgfSymbol*) (((PgfSymbolBIND*) i.data)+1)); return *((PgfSymbol*) (((PgfSymbolBIND*) i.data)+1));
case PGF_SYMBOL_NE: case PGF_SYMBOL_NE:
return *((PgfSymbol*) (((PgfSymbolNE*) i.data)+1)); return *((PgfSymbol*) (((PgfSymbolNE*) i.data)+1));
@@ -1137,6 +1138,10 @@ pgf_symbols_cmp(GuString* psent, size_t sent_len, BIND_TYPE* pbind, PgfSymbols*
*pbind = BIND_HARD; *pbind = BIND_HARD;
break; break;
} }
case PGF_SYMBOL_SOFT_BIND: {
*pbind = BIND_SOFT;
break;
}
case PGF_SYMBOL_NE: { case PGF_SYMBOL_NE: {
return -2; return -2;
} }
@@ -1635,6 +1640,31 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
} }
break; break;
} }
case PGF_SYMBOL_SOFT_BIND: {
if (ps->before->start_offset == ps->before->end_offset) {
if (ps->before->needs_bind) {
PgfParseState* state =
pgf_new_parse_state(ps, ps->before->end_offset, BIND_HARD);
if (state != NULL) {
if (state->next == NULL) {
state->viterbi_prob =
item->inside_prob+item->conts->outside_prob;
}
pgf_item_advance(item, ps->pool);
gu_buf_heap_push(state->agenda, pgf_item_prob_order, &item);
} else {
pgf_item_free(ps, item);
}
} else {
pgf_item_free(ps, item);
}
} else {
pgf_item_advance(item, ps->pool);
gu_buf_heap_push(ps->before->agenda, pgf_item_prob_order, &item);
}
break;
}
default: default:
gu_impossible(); gu_impossible();
} }

View File

@@ -280,6 +280,10 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err)
gu_puts("BIND", out, err); gu_puts("BIND", out, err);
break; break;
} }
case PGF_SYMBOL_SOFT_BIND: {
gu_puts("SOFT_BIND", out, err);
break;
}
default: default:
gu_impossible(); gu_impossible();
} }

View File

@@ -723,6 +723,13 @@ pgf_read_symbol(PgfReader* rdr)
gu_return_on_exn(rdr->err, gu_null_variant); gu_return_on_exn(rdr->err, gu_null_variant);
break; break;
} }
case PGF_SYMBOL_SOFT_BIND: {
gu_new_variant(PGF_SYMBOL_SOFT_BIND,
PgfSymbolBIND,
&sym, rdr->opool);
gu_return_on_exn(rdr->err, gu_null_variant);
break;
}
default: default:
pgf_read_tag_error(rdr); pgf_read_tag_error(rdr);
} }

View File

@@ -152,7 +152,8 @@ instance Binary Symbol where
put (SymKS ts) = putWord8 3 >> put ts put (SymKS ts) = putWord8 3 >> put ts
put (SymKP d vs) = putWord8 4 >> put (d,vs) put (SymKP d vs) = putWord8 4 >> put (d,vs)
put SymBIND = putWord8 5 put SymBIND = putWord8 5
put SymNE = putWord8 6 put SymSOFT_BIND = putWord8 6
put SymNE = putWord8 7
get = do tag <- getWord8 get = do tag <- getWord8
case tag of case tag of
0 -> liftM2 SymCat get get 0 -> liftM2 SymCat get get
@@ -161,7 +162,8 @@ instance Binary Symbol where
3 -> liftM SymKS get 3 -> liftM SymKS get
4 -> liftM2 (\d vs -> SymKP d vs) get get 4 -> liftM2 (\d vs -> SymKP d vs) get get
5 -> return SymBIND 5 -> return SymBIND
6 -> return SymNE 6 -> return SymSOFT_BIND
7 -> return SymNE
_ -> decodingError _ -> decodingError
instance Binary PArg where instance Binary PArg where

View File

@@ -62,6 +62,7 @@ data Symbol
| SymKS Token | SymKS Token
| SymKP [Symbol] [([Symbol],[String])] | SymKP [Symbol] [([Symbol],[String])]
| SymBIND -- the special BIND token | SymBIND -- the special BIND token
| SymSOFT_BIND -- the special SOFT_BIND token
| SymNE -- non exist (this should be last constructor to simplify the binary search in the runtime) | SymNE -- non exist (this should be last constructor to simplify the binary search in the runtime)
deriving (Eq,Ord,Show) deriving (Eq,Ord,Show)
data Production data Production

View File

@@ -160,6 +160,7 @@ data BracketedTokn
| LeafKS Token | LeafKS Token
| LeafNE | LeafNE
| LeafBIND | LeafBIND
| LeafSOFT_BIND
| LeafKP [BracketedTokn] [([BracketedTokn],[String])] | LeafKP [BracketedTokn] [([BracketedTokn],[String])]
deriving Eq deriving Eq
@@ -222,6 +223,7 @@ computeSeq filter seq args = concatMap compute seq
compute (SymKS t) = [LeafKS t] compute (SymKS t) = [LeafKS t]
compute SymNE = [LeafNE] compute SymNE = [LeafNE]
compute SymBIND = [LeafKS "&+"] compute SymBIND = [LeafKS "&+"]
compute SymSOFT_BIND = []
compute (SymKP syms alts) = [LeafKP (concatMap compute syms) [(concatMap compute syms,cs) | (syms,cs) <- alts]] compute (SymKP syms alts) = [LeafKP (concatMap compute syms) [(concatMap compute syms,cs) | (syms,cs) <- alts]]
getArg d r getArg d r

View File

@@ -228,6 +228,7 @@ splitLexicalRules cnc p_prods =
[seq2prefix (syms1 ++ syms) | (syms1,ps) <- alts]) [seq2prefix (syms1 ++ syms) | (syms1,ps) <- alts])
seq2prefix (SymNE :syms) = TrieMap.empty seq2prefix (SymNE :syms) = TrieMap.empty
seq2prefix (SymBIND :syms) = TrieMap.fromList [wf ["&+"]] seq2prefix (SymBIND :syms) = TrieMap.fromList [wf ["&+"]]
seq2prefix (SymSOFT_BIND :syms) = TrieMap.fromList [wf []]
updateConcrete abs cnc = updateConcrete abs cnc =
let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc) let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc)

View File

@@ -309,10 +309,12 @@ process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc cha
SymNE -> process flit ftok cnc items acc chart SymNE -> process flit ftok cnc items acc chart
SymBIND -> let !acc' = ftok_ ["&+"] (Active j (ppos+1) funid seqid args key0) acc SymBIND -> let !acc' = ftok_ ["&+"] (Active j (ppos+1) funid seqid args key0) acc
in process flit ftok cnc items acc' chart in process flit ftok cnc items acc' chart
SymSOFT_BIND->process flit ftok cnc ((Active j (ppos+1) funid seqid args key0):items) acc chart
SymKP syms vars SymKP syms vars
-> let to_tok (SymKS t) = [t] -> let to_tok (SymKS t) = [t]
to_tok SymBIND = ["&+"] to_tok SymBIND = ["&+"]
to_tok _ = [] to_tok SymSOFT_BIND = []
to_tok _ = []
!acc' = foldl (\acc syms -> ftok_ (concatMap to_tok syms) (Active j (ppos+1) funid seqid args key0) acc) acc !acc' = foldl (\acc syms -> ftok_ (concatMap to_tok syms) (Active j (ppos+1) funid seqid args key0) acc) acc
(syms:[syms' | (syms',_) <- vars]) (syms:[syms' | (syms',_) <- vars])

View File

@@ -92,6 +92,7 @@ ppSymbol (SymVar d r) = char '<' <> int d <> comma <> char '$' <> int r <> char
ppSymbol (SymKS t) = doubleQuotes (text t) ppSymbol (SymKS t) = doubleQuotes (text t)
ppSymbol SymNE = text "nonExist" ppSymbol SymNE = text "nonExist"
ppSymbol SymBIND = text "BIND" ppSymbol SymBIND = text "BIND"
ppSymbol SymSOFT_BIND = text "SOFT_BIND"
ppSymbol (SymKP syms alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppSymbol syms) : map ppAlt alts))) ppSymbol (SymKP syms alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppSymbol syms) : map ppAlt alts)))
ppAlt (syms,ps) = hsep (map ppSymbol syms) <+> char '/' <+> hsep (map (doubleQuotes . text) ps) ppAlt (syms,ps) = hsep (map ppSymbol syms) <+> char '/' <+> hsep (map (doubleQuotes . text) ps)