lib/src/translator: refactored wide-coverage translator grammar structured as described in lib/doc/translate.html. Implemented for Eng and Swe. To do: more languages, makefile entries (to be converted from entries for Parse), some internal refactoring in translator/Extensions.gf.

This commit is contained in:
aarne
2014-01-20 20:59:26 +00:00
parent b8f3914209
commit f22647f73c
10 changed files with 195376 additions and 1 deletions

View File

@@ -10,7 +10,6 @@ abstract Extra = Cat ** {
GenNP : NP -> Quant ; -- this man's
GenIP : IP -> IQuant ; -- whose
GenRP : Num -> CN -> RP ; -- whose car
ComplBareVS : VS -> S -> VP ; -- know you go
CompBareCN : CN -> Comp ; -- (est) professeur
StrandRelSlash : RP -> ClSlash -> RCl ; -- that he lives in

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,100 @@
abstract Extensions =
Cat,
Extra [VPI]
** {
---- from ExtraEngAbs
cat
---- hard to merge VPI and VPS
VPI ;
[VPI] {2} ;
VPS ;
[VPS] {2} ;
fun
MkVPI : VP -> VPI ;
ConjVPI : Conj -> [VPI] -> VPI ;
ComplVPIVV : VV -> VPI -> VP ;
MkVPS : Temp -> Pol -> VP -> VPS ;
ConjVPS : Conj -> [VPS] -> VPS ;
PredVPS : NP -> VPS -> S ;
-- generalizing Grammar
PassVPSlash : VPSlash -> VP ; -- be forced to sleep
PassAgentVPSlash : VPSlash -> NP -> VP ; -- be begged by her to go
ComplVV : VV -> Ant -> Pol -> VP -> VP ; -- want not to have slept
---- merge?
SlashV2V : V2V -> Ant -> Pol -> VP -> VPSlash ; -- force (her) not to have slept
SlashVPIV2V : V2V -> Pol -> VPI -> VPSlash ; -- force (her) not to sleep and dream
-- new structures
GenNP : NP -> Quant ; -- this man's
GenIP : IP -> IQuant ; -- whose
GenRP : Num -> CN -> RP ; -- whose car(s)
---- should be covered by variants
---- ComplBareVS : VS -> S -> VP ; -- know you go
---- SlashBareV2S : V2S -> S -> VPSlash ; -- answer (to him) it is good
---- ComplSlashPartLast : VPSlash -> NP -> VP ;
---- merge VPS and VPI
---- MkVPS, BaseVPS, ConsVPS, ConjVPS, PredVPS,
---- VPIForm, VPIInf, VPIPresPart, MkVPI, BaseVPI, ConsVPI, ConjVPI, ComplVPIVV,
---- merge these two?
CompoundCN : Num -> N -> CN -> CN ;
DashCN : N -> N -> N ;
---- merge ?
---- NominalizeVPSlashNP : VPSlash -> NP -> NP ; -- publishing of the document
---- EmbedPresPart : VP -> SC ; -- looking at Mary (is fun)
GerundN : V -> N ; -- sleeping
---- merge ?
---- PartVP : VP -> AP ; -- (man) looking at Mary
GerundAP : V -> AP ; -- sleeping (man)
PastPartAP : V2 -> AP ; -- lost (opportunity)
---- why is this needed?
OrdCompar : A -> Ord ; -- (my) better (side)
UseQuantPN : Quant -> PN -> NP; -- this John
SlashSlashV2V : V2V -> Ant -> Pol -> VPSlash -> VPSlash ; ---- what is this?
---- eliminate as topicalizations
PredVPosv,PredVPovs : NP -> VP -> Cl ;
---- merge with IdRP?
that_RP : RP ;
who_RP : RP ;
EmptyRelSlash : ClSlash -> RCl ; -- (the city) he lives in
---- overgenerating?
VPSlashVS : VS -> VP -> VPSlash ; -- to believe her to sleep
PastPartRS : Ant -> Pol -> VPSlash -> RS ; -- (man) not seen by her
PresPartRS : Ant -> Pol -> VP -> RS ; -- (man) not having seen her
ApposNP : NP -> NP -> NP ; -- Mr Hollande, the president of France,
---- move to standard RGL?
AdAdV : AdA -> AdV -> AdV ; -- almost always
UttAdV : AdV -> Utt; -- always!
PositAdVAdj : A -> AdV ; -- (that she) positively (sleeps)
CompS : S -> Comp ; -- (the fact is) that she sleeps
CompQS : QS -> Comp ; -- (the question is) who sleeps
CompVP : Ant -> Pol -> VP -> Comp ; -- (she is) to go
}

View File

@@ -0,0 +1,145 @@
--# -path=.:../abstract
concrete ExtensionsEng of Extensions =
CatEng ** open MorphoEng, ResEng, ParadigmsEng, (S = SentenceEng), (E = ExtraEng), Prelude in {
lincat
VPI = E.VPI ;
ListVPI = E.ListVPI ;
VPS = E.VPS ;
ListVPS = E.ListVPS ;
lin
MkVPI = E.MkVPI ;
ConjVPI = E.ConjVPI ;
ComplVPIVV = E.ComplVPIVV ;
MkVPS = E.MkVPS ;
ConjVPS = E.ConjVPS ;
PredVPS = E.PredVPS ;
BaseVPI = E.BaseVPI ;
ConsVPI = E.ConsVPI ;
BaseVPS = E.BaseVPS ;
ConsVPS = E.ConsVPS ;
GenNP = E.GenNP ;
GenIP = E.GenIP ;
GenRP = E.GenRP ;
PassVPSlash = E.PassVPSlash ;
PassAgentVPSlash = E.PassAgentVPSlash ;
EmptyRelSlash = E.EmptyRelSlash ;
lin
CompoundCN num noun cn = {
s = \\n,c => num.s ! Nom ++ noun.s ! num.n ! Nom ++ cn.s ! n ! c ;
g = cn.g
} ;
DashCN noun1 noun2 = {
s = \\n,c => noun1.s ! Sg ! Nom ++ "-" ++ noun2.s ! n ! c ;
g = noun2.g
} ;
GerundN v = {
s = \\n,c => v.s ! VPresPart ;
g = Neutr
} ;
GerundAP v = {
s = \\agr => v.s ! VPresPart ;
isPre = True
} ;
PastPartAP v = {
s = \\agr => v.s ! VPPart ;
isPre = True
} ;
OrdCompar a = {s = \\c => a.s ! AAdj Compar c } ;
PositAdVAdj a = {s = a.s ! AAdv} ;
UseQuantPN q pn = {s = \\c => q.s ! False ! Sg ++ pn.s ! npcase2case c ; a = agrgP3 Sg pn.g} ;
SlashV2V v ant p vp = insertObjc (\\a => v.c3 ++ ant.s ++ p.s ++
infVP v.typ vp ant.a p.p a)
(predVc v) ;
SlashSlashV2V v ant p vp = insertObjc (\\a => v.c3 ++ ant.s ++ p.s ++
infVP v.typ vp ant.a p.p a)
(predVc v) ;
SlashVPIV2V v p vpi = insertObjc (\\a => p.s ++
v.c3 ++
vpi.s ! VVAux ! a)
(predVc v) ;
ComplVV v a p vp = insertObj (\\agr => a.s ++ p.s ++
infVP v.typ vp a.a p.p agr)
(predVV v) ;
PredVPosv np vp = {
s = \\t,a,b,o =>
let
verb = vp.s ! t ! a ! b ! o ! np.a ;
compl = vp.s2 ! np.a
in
case o of {
ODir _ => compl ++ frontComma ++ np.s ! npNom ++ verb.aux ++ vp.ad ! np.a ++ verb.fin ++ verb.adv ++ verb.inf ;
OQuest => verb.aux ++ compl ++ frontComma ++ np.s ! npNom ++ verb.adv ++ vp.ad ! np.a ++ verb.fin ++ verb.inf
}
} ;
PredVPovs np vp = {
s = \\t,a,b,o =>
let
verb = vp.s ! t ! a ! b ! o ! np.a ;
compl = vp.s2 ! np.a
in
case o of {
ODir _ => compl ++ frontComma ++ verb.aux ++ verb.adv ++ vp.ad ! np.a ++ verb.fin ++ verb.inf ++ np.s ! npNom ;
OQuest => verb.aux ++ compl ++ verb.adv ++ vp.ad ! np.a ++ verb.fin ++ verb.inf ++ np.s ! npNom
}
} ;
that_RP = {
s = \\_ => "that" ;
a = RNoAg
} ;
who_RP = {
s = \\_ => "who" ;
a = RNoAg
} ;
CompS s = {s = \\_ => "that" ++ s.s} ;
CompQS qs = {s = \\_ => qs.s ! QIndir} ;
CompVP ant p vp = {s = \\a => ant.s ++ p.s ++
infVP VVInf vp ant.a p.p a} ;
VPSlashVS vs vp =
insertObj (\\a => infVP VVInf vp Simul CPos a) (predV vs) **
{c2 = ""; gapInMiddle = False} ;
PastPartRS ant pol vps = {
s = \\agr => vps.ad ! agr ++ vps.ptp ++ vps.s2 ! agr ;
c = npNom
} ;
PresPartRS ant pol vp = {
s = \\agr => vp.ad ! agr ++ vp.prp ++ vp.p ++ vp.s2 ! agr;
c = npNom
} ;
ApposNP np1 np2 = {
s = \\c => np1.s ! c ++ frontComma ++ np2.s ! npNom ++ finalComma ;
a = np1.a
} ;
AdAdV = cc2 ;
UttAdV adv = adv;
}

View File

@@ -0,0 +1,123 @@
--# -path=.:../abstract
concrete ExtensionsSwe of Extensions =
CatSwe ** open MorphoSwe, ResSwe, ParadigmsSwe, (E = ExtraSwe), (G = GrammarSwe), SyntaxSwe, CommonScand, Prelude in {
lincat
VPI = E.VPI ;
ListVPI = E.ListVPI ;
VPS = E.VPS ;
ListVPS = E.ListVPS ;
lin
MkVPI = E.MkVPI ;
ConjVPI = E.ConjVPI ;
ComplVPIVV = E.ComplVPIVV ;
MkVPS = E.MkVPS ;
ConjVPS = E.ConjVPS ;
PredVPS = E.PredVPS ;
BaseVPI = E.BaseVPI ;
ConsVPI = E.ConsVPI ;
BaseVPS = E.BaseVPS ;
ConsVPS = E.ConsVPS ;
GenNP = E.GenNP ;
---- GenIP = E.GenIP ;
---- GenRP = E.GenRP ;
PassVPSlash = E.PassVPSlash ;
PassAgentVPSlash = E.PassAgentVPSlash ;
EmptyRelSlash = E.EmptyRelSlash ;
lin
CompoundCN num noun cn = {
s = \\n,d,c => num.s ! cn.g ++ noun.co ++ BIND ++ cn.s ! n ! d ! c ;
g = cn.g ;
isMod = False
} ;
DashCN noun1 noun2 = {
s = \\n,d,c => noun1.co ++ BIND ++ noun2.s ! n ! d ! c ;
g = noun2.g ;
co = noun1.co ++ BIND ++ noun2.co ---- add s if not already there
} ;
GerundN v = {
s = \\n,d,c => v.s ! VI (VPtPres n d c) ;
g = Neutr ;
co = v.s ! VI (VPtPres Sg Indef Nom) ;
} ;
GerundAP v = {
s = \\_ => v.s ! VI (VPtPres Sg Indef Nom) ;
isPre = True
} ;
PastPartAP v = {
s = \\afpos => v.s ! VI (VPtPret afpos Nom) ;
isPre = True
} ;
OrdCompar a = {
s = case a.isComp of {
True => "mera" ++ a.s ! AF (APosit (Weak Sg)) Nom ;
_ => a.s ! AF ACompar Nom
} ;
isDet = True
} ;
PositAdVAdj a = {s = a.s ! G.adverbForm} ;
UseQuantPN q pn = {
s = \\c => q.s ! Sg ! True ! False ! pn.g ++ pn.s ! caseNP c ;
a = agrP3 pn.g Sg
} ;
SlashV2V v ant p vp = predV v ** {
n3 = \\a => v.c3.s ++ ant.s ++ p.s ++ infVPPlus vp a ant.a p.p ;
c2 = v.c2
} ;
SlashVPIV2V v p vpi = predV v ** {
n3 = \\a => v.c3.s ++ p.s ++ negation ! p.p ++ vpi.s ! VPIInf ! a ;
c2 = v.c2
} ;
ComplVV v ant pol vp = insertObjPost (\\a => v.c2.s ++ ant.s ++ pol.s ++ infVPPlus vp a ant.a pol.p) (predV v) ;
PredVPosv np vp = mkCl np vp ; ---- TODO restructure all this using Extra.Foc
PredVPovs np vp = mkCl np vp ; ----
that_RP = which_RP ; -- som
who_RP = which_RP ;
CompS s = {s = \\_ => "att" ++ s.s ! Sub} ;
CompQS qs = {s = \\_ => qs.s ! QIndir} ;
CompVP ant p vp = {s = \\a => ant.s ++ p.s ++ infVPPlus vp a ant.a p.p} ;
-- VPSlashVS : VS -> VP -> VPSlash
---VPSlashVS vs vp =
--- insertObj (\\a => infVP VVInf vp Simul CPos a) (predV vs) **
--- {c2 = ""; gapInMiddle = False} ;
PastPartRS ant pol vps = mkRS ant pol (mkRCl which_RP <lin VP vps : VP> ) ; ---- maybe as participle construction?
PresPartRS ant pol vp = mkRS ant pol (mkRCl which_RP vp) ; --- probably not as participle construction
ApposNP np1 np2 = {
s = \\c => np1.s ! c ++ comma ++ np2.s ! NPNom ;
a = np1.a
} ;
AdAdV = cc2 ;
UttAdV adv = adv;
}

View File

@@ -0,0 +1,34 @@
abstract Translate =
-- modules in Grammar, excluding Structural
Tense,
Noun - [PPartNP], -- to be generalized
Adjective,
Numeral,
Conjunction,
Verb - [
SlashV2V, PassV2, ComplVV, -- to be generalized
UseCopula ---- overgenerating ??
],
Adverb,
Phrase,
Sentence,
Question,
Relative,
Idiom [NP, VP, Cl, Tense, ProgrVP, ExistNP, SelfAdvVP, SelfAdVVP, SelfNP], ---- why only these?
Symbol [PN, Symb, String, CN, Card, NP, MkSymb, SymbPN, CNNumNP], ---- why only these?
Construction,
Extensions,
Dictionary,
Documentation
** {
flags
startcat=Phr;
heuristic_search_factor=0.60;
meta_prob=1.0e-5;
meta_token_prob=1.1965149246222233e-9;
}

View File

@@ -0,0 +1,39 @@
--# -path=.:../abstract:../english
concrete TranslateEng of Translate =
TenseX - [Pol, PNeg, PPos],
CatEng,
NounEng - [PPartNP],
AdjectiveEng,
NumeralEng,
SymbolEng [
PN, Symb, String, CN, Card, NP, MkSymb, SymbPN, CNNumNP,
addGenitiveS
],
ConjunctionEng,
VerbEng - [SlashV2V, PassV2, UseCopula, ComplVV],
AdverbEng,
PhraseEng,
SentenceEng - [UseCl], -- replaced by UseCl | ContractedUseCl
QuestionEng,
RelativeEng,
IdiomEng [NP, VP, Tense, Cl, ProgrVP, ExistNP, SelfAdvVP, SelfAdVVP, SelfNP],
ConstructionEng,
DocumentationEng,
ExtensionsEng,
DictionaryEng **
open MorphoEng, ResEng, ParadigmsEng, (S = SentenceEng), (E = ExtraEng), Prelude in {
flags
literal=Symb ;
-- exceptional linearizations
lin
UseCl t p cl = S.UseCl t p cl | E.ContractedUseCl t p cl ;
PPos = {s = [] ; p = CPos} ;
PNeg = {s = [] ; p = CNeg True} | {s = [] ; p = CNeg False} ;
}

View File

@@ -0,0 +1,31 @@
--# -path=.:../swedish/:../scandinavian:../english/:../abstract
concrete TranslateSwe of Translate =
TenseSwe,
NounSwe - [PPartNP],
AdjectiveSwe,
NumeralSwe,
SymbolSwe [
PN, Symb, String, CN, Card, NP, MkSymb, SymbPN, CNNumNP
],
ConjunctionSwe,
VerbSwe - [SlashV2V, PassV2, UseCopula, ComplVV],
AdverbSwe,
PhraseSwe,
SentenceSwe,
QuestionSwe,
RelativeSwe,
IdiomSwe [
NP, VP, Tense, Cl, ProgrVP, ExistNP, SelfAdvVP, SelfAdVVP, SelfNP,
neutr, sjalv
],
ConstructionSwe,
DocumentationSwe,
ExtensionsSwe,
DictionarySwe **
open MorphoSwe, ResSwe, ParadigmsSwe, SyntaxSwe, CommonScand, (E = ExtraSwe), Prelude in {
flags
literal=Symb ;
}