Estonian - the 29th complete RGL language. Copied from Kaarel Kaljurand's and Inari Listenmaa's repository in https://github.com/GF-Estonian/GF-Estonian, where later developments will continue to take place.

This commit is contained in:
aarne
2013-10-18 11:45:06 +00:00
parent 02cbf49516
commit 90ee7c7288
35 changed files with 111190 additions and 1 deletions

View File

@@ -37,6 +37,7 @@ langsCoding = [
(("danish", "Dan"),"Scand"),
(("dutch", "Dut"),""),
(("english", "Eng"),""),
(("estonian", "Est"),""),
(("finnish", "Fin"),""),
(("french", "Fre"),"Romance"),
(("german", "Ger"),""),
@@ -76,7 +77,7 @@ langsLangAll = langs
langsLang = langs `except` langsIncomplete
-- languagues that have notpresent marked
langsPresent = langsLang `except` ["Chi","Gre","Heb","Jpn","Mlt","Nep","Pes","Snd","Tha","Thb"]
langsPresent = langsLang `except` ["Chi","Est","Gre","Heb","Jpn","Mlt","Nep","Pes","Snd","Tha","Thb"]
-- languages for which Lang can be compiled but which are incomplete
langsIncomplete = ["Amh","Ara","Heb","Lat","Tur","Thb"]

View File

@@ -0,0 +1,6 @@
--# -path=.:alltenses:prelude
resource CombinatorsEst = Combinators with
(Cat = CatEst),
(Structural = StructuralEst),
(Constructors = ConstructorsEst) ;

View File

@@ -0,0 +1,3 @@
--# -path=.:alltenses:prelude
resource ConstructorsEst = Constructors with (Grammar = GrammarEst) ;

View File

@@ -0,0 +1,5 @@
--# -path=.:present:mathematical:prelude
resource SymbolicEst = Symbolic with
(Symbol = SymbolEst),
(Grammar = GrammarEst) ;

4
lib/src/api/SyntaxEst.gf Normal file
View File

@@ -0,0 +1,4 @@
--# -path=.:alltenses:prelude
instance SyntaxEst of Syntax = ConstructorsEst, CatEst, StructuralEst, CombinatorsEst ;

3
lib/src/api/TryEst.gf Normal file
View File

@@ -0,0 +1,3 @@
--# -path=.:alltenses:prelude
resource TryEst = SyntaxEst, LexiconEst, ParadigmsEst - [mkAdv,mkAdN,mkDet,mkQuant,mkPConj] ;

View File

@@ -0,0 +1,64 @@
concrete AdjectiveEst of Adjective = CatEst ** open ResEst, Prelude in {
-- gfc size from 2864336 to 6786 - i.e. factor 422
flags optimize=all_subs ; coding=utf8;
lin
PositA a = {
s = \\_,nf => a.s ! Posit ! AN nf ;
infl = a.infl
} ;
ComparA a np = {
s = \\isMod,af => case isMod of {
True => np.s ! NPCase Elat ++ a.s ! Compar ! AN af ; -- minust suurem
_ => a.s ! Compar ! AN af ++ "kui" ++ np.s ! NPCase Nom -- suurem kui mina
} ;
infl = Regular ; --a.infl
} ;
CAdvAP ad ap np = {
s = \\m,af => ad.s ++ ap.s ! m ! af ++ ad.p ++ np.s ! NPCase Nom ;
infl = ap.infl
} ;
UseComparA a = {
s = \\_,nf => a.s ! Compar ! AN nf ;
infl = Regular ; --a.infl
} ;
-- $SuperlA$ belongs to determiner syntax in $Noun$.
AdjOrd ord = {
s = \\_ => ord.s ;
infl = Regular
} ;
ComplA2 adj np = {
s = \\isMod,af =>
preOrPost isMod (appCompl True Pos adj.c2 np) (adj.s ! Posit ! AN af) ;
infl = adj.infl
} ;
ReflA2 adj = {
s = \\isMod,af =>
preOrPost isMod
(appCompl True Pos adj.c2 (reflPron (agrP3 Sg))) (adj.s ! Posit ! AN af) ;
infl = adj.infl
} ;
SentAP ap sc = {
s = \\b,a => ap.s ! b ! a ++ sc.s ;
infl = ap.infl
} ;
AdAP ada ap = {
s = \\b,af => ada.s ++ ap.s ! b ! af ;
infl = ap.infl
} ;
UseA2 a = {
s = \\_,nf => a.s ! Posit ! AN nf ;
infl = a.infl
} ;
}

View File

@@ -0,0 +1,25 @@
concrete AdverbEst of Adverb = CatEst ** open ResEst, Prelude in {
flags coding=utf8;
lin
PositAdvAdj a = {s = a.s ! Posit ! AAdv} ;
ComparAdvAdj cadv a np = {
s = cadv.s ++ a.s ! Posit ! AAdv ++ cadv.p ++ np.s ! NPCase Nom
} ;
ComparAdvAdjS cadv a s = {
s = cadv.s ++ a.s ! Posit ! AAdv ++ cadv.p ++ s.s
} ;
PrepNP prep np = {s = preOrPost prep.isPre prep.s (np.s ! prep.c)} ;
AdAdv = cc2 ;
PositAdAAdj a = {s = a.s ! Posit ! AN (NCase Sg Gen)} ; -- älyttömän
SubjS = cc2 ;
----b AdvSC s = s ;
AdnCAdv cadv = {s = cadv.s ++ "kui"} ;
}

View File

@@ -0,0 +1,6 @@
--# -path=.:../abstract:../common:prelude
concrete AllEst of AllEstAbs =
LangEst, -- - [SlashV2VNP,SlashVV, TFut], ---- to speed up linking; to remove spurious parses
ExtraEst -- - [ProDrop, ProDropPoss, S_OSV, S_VSO, S_ASV] -- to exclude spurious parses
** {} ;

View File

@@ -0,0 +1,4 @@
abstract AllEstAbs =
Lang,
ExtraEstAbs
** {} ;

View File

@@ -0,0 +1,99 @@
concrete CatEst of Cat = CommonX ** open HjkEst, ResEst, Prelude in {
flags optimize=all_subs ; coding=utf8;
lincat
-- Tensed/Untensed
S = {s : Str} ; --TODO {s : Order => Str}, like in German?
QS = {s : Str} ;
RS = {s : Agr => Str ; c : NPForm} ;
SSlash = {s : Str ; c2 : Compl} ;
-- Sentence
Cl = {s : ResEst.Tense => Anteriority => Polarity => SType => Str} ;
ClSlash = {s : ResEst.Tense => Anteriority => Polarity => Str ; c2 : Compl} ;
Imp = {s : Polarity => Agr => Str} ;
-- Question
QCl = {s : ResEst.Tense => Anteriority => Polarity => Str} ;
IP = {s : NPForm => Str ; n : Number} ;
IComp = {s : Agr => Str} ;
IDet = {s : Case => Str ; n : Number ; isNum : Bool} ;
IQuant = {s : Number => Case => Str} ;
-- Relative
RCl = {s : ResEst.Tense => Anteriority => Polarity => Agr => Str ; c : NPForm} ;
RP = {s : Number => NPForm => Str ; a : RAgr} ;
-- Verb
VP = ResEst.VP ;
VPSlash = ResEst.VP ** {c2 : Compl} ;
Comp = {s : Agr => Str} ;
-- Adjective
-- The $Bool$ in s tells whether usage is modifying (as opposed to
-- predicative), e.g. "x on suurem kui y" vs. "y:st suurem arv".
-- The $Infl$ in infl tells whether the adjective inflects as a
-- modifier: e.g. "väsinud mehele" vs. "mees muutus väsinuks".
AP = {s : Bool => NForm => Str ; infl : Infl} ;
-- Noun
CN = {s : NForm => Str} ;
Pron = {s : NPForm => Str ; a : Agr} ;
NP = {s : NPForm => Str ; a : Agr ; isPron : Bool} ;
Det = {
s : Case => Str ; -- minun kolme
sp : Case => Str ; -- se (substantival form)
n : Number ; -- Pl (agreement feature for verb)
isNum : Bool ; -- True (a numeral is present)
isDef : Bool -- True (verb agrees in Pl, Nom is not Part) --I: actually, can we get rid of this?
} ;
---- QuantSg, QuantPl = {s : Case => Str ; isDef : Bool} ;
Ord = {s : NForm => Str} ;
Predet = {s : Number => NPForm => Str} ;
Quant = {s,sp : Number => Case => Str ; isDef : Bool} ;
Card = {s : Number => Case => Str ; n : Number} ;
Num = {s : Number => Case => Str ; isNum : Bool ; n : Number} ;
-- Numeral
Numeral = {s : CardOrd => Str ; n : Number} ;
Digits = {s : CardOrd => Str ; n : Number} ;
-- Structural
Conj = {s1,s2 : Str ; n : Number} ;
----b DConj = {s1,s2 : Str ; n : Number} ;
Subj = {s : Str} ;
Prep = Compl ;
-- Open lexical classes, e.g. Lexicon
V, VS, VQ = Verb1 ; -- = {s : VForm => Str ; sc : Case} ;
V2, VA, V2Q, V2S = Verb1 ** {c2 : Compl} ;
V2A = Verb1 ** {c2, c3 : Compl} ;
VV = Verb1 ** {vi : InfForm} ; ---- infinitive form
V2V = Verb1 ** {c2 : Compl ; vi : InfForm} ; ---- infinitive form
V3 = Verb1 ** {c2, c3 : Compl} ;
A = Adjective ** {infl : Infl} ;
A2 = A ** {c2 : Compl} ;
N = Noun ;
N2 = CommonNoun ** {c2 : Compl ; isPre : Bool ; lock_N2 : {}} ;
N3 = CommonNoun ** {c2,c3 : Compl ; isPre,isPre2 : Bool ; lock_N3 : {}} ;
PN = {s : Case => Str} ;
oper Verb1 = Verb ** { sc : NPForm} ; --what is this for? --subject case, i.e. "ma näen kassi"/"mul on kass"
}

View File

@@ -0,0 +1,109 @@
concrete ConjunctionEst of Conjunction =
CatEst ** open ResEst, Coordination, Prelude in {
flags optimize=all_subs ;
lin
ConjS = conjunctDistrSS ;
ConjAdv = conjunctDistrSS ;
ConjNP conj ss = conjunctDistrTable NPForm conj ss ** {
a = conjAgr (Ag conj.n P3) ss.a ; -- P3 is the maximum
isPron = False
} ;
-- ConjAP conj ss = conjunctDistrTable2 Bool NForm conj ss ** {
ConjAP conj ss = conjunctDistrTableAdj conj ss ** {
infl = True ;
lock_AP = <>
} ;
ConjRS conj ss = conjunctDistrTable Agr conj ss ** {
c = ss.c
} ;
-- These fun's are generated from the list cat's.
BaseS = twoSS ;
ConsS = consrSS comma ;
BaseAdv = twoSS ;
ConsAdv = consrSS comma ;
BaseNP x y = twoTable NPForm x y ** {a = conjAgr x.a y.a} ;
ConsNP xs x = consrTable NPForm comma xs x ** {a = conjAgr xs.a x.a} ;
BaseAP x y = twoTableAdj x y ;
ConsAP xs x = consrTableAdj comma x xs ;
-- BaseAP x y = twoTable2 Bool NForm x y ;
-- ConsAP xs x = consrTable2 Bool NForm comma xs x ;
BaseRS x y = twoTable Agr x y ** {c = y.c} ;
ConsRS xs x = consrTable Agr comma xs x ** {c = xs.c} ;
lincat
[S] = {s1,s2 : Str} ;
[Adv] = {s1,s2 : Str} ;
[NP] = {s1,s2 : NPForm => Str ; a : Agr} ;
[AP] = {s1,s2 : {s : Bool => NForm => Str ; infl : Infl }} ;
[RS] = {s1,s2 : Agr => Str ; c : NPForm} ;
oper
--Modified from prelude/Coordination.gf generic functions
twoTableAdj : (_,_ : AP) -> [AP] = \x,y ->
lin ListAP {
s1 = x ;
s2 = y ;
lock_ListAP = <>
} ;
consrTableAdj : Str -> [AP] -> {s : Bool => NForm => Str ; infl : Infl} -> [AP] = \c,xs,x ->
let
ap1 = xs.s1 ;
ap2 = xs.s2
in
lin ListAP {s1 =
{s = \\isMod,nf =>
case isMod of {
True => case <ap1.infl, ap2.infl> of {
<(Participle|Invariable),(Participle|Invariable)> =>
ap1.s ! isMod ! (NCase Sg Nom) ++ c ++ ap2.s ! isMod ! (NCase Sg Nom) ; --valmis ja täis kassid
<(Participle|Invariable),Regular> =>
ap1.s ! isMod ! (NCase Sg Nom) ++ c++ ap2.s ! isMod ! nf ; --valmis ja suured kassid
<Regular,(Participle|Invariable)> =>
ap1.s ! isMod ! nf ++ c ++ ap2.s ! isMod ! (NCase Sg Nom) ; --suured ja valmis kassid
_ => ap1.s ! isMod ! nf ++ c ++ ap2.s ! isMod ! nf --suured ja mustad kassid
} ;
False => ap1.s ! isMod ! nf ++ c ++ ap2.s ! isMod ! nf --kassid on valmid ja suured
} ;
infl = Regular ;
lock_AP = <> } ;
s2 = x ;
lock_ListAP = <>
} ;
conjunctDistrTableAdj : ConjunctionDistr -> [AP] -> AP = \or,xs ->
let
ap1 = xs.s1 ;
ap2 = xs.s2 ;
in
lin AP {s = \\isMod,nf =>
case isMod of {
True => case <ap1.infl, ap2.infl> of {
<(Participle|Invariable),(Participle|Invariable)> =>
or.s1 ++ ap1.s ! isMod ! (NCase Sg Nom) ++
or.s2 ++ ap2.s ! isMod ! (NCase Sg Nom) ;
<(Participle|Invariable),Regular> =>
or.s1 ++ ap1.s ! isMod ! (NCase Sg Nom) ++
or.s2 ++ ap2.s ! isMod ! nf ;
<Regular,(Participle|Invariable)> =>
or.s1 ++ ap1.s ! isMod ! nf ++
or.s2 ++ ap2.s ! isMod ! (NCase Sg Nom) ;
_ => or.s1 ++ ap1.s ! isMod ! nf ++ or.s2 ++ ap2.s ! isMod ! nf
} ;
False => or.s1 ++ ap1.s ! isMod ! nf ++ or.s2 ++ ap2.s ! isMod ! nf
} ;
infl = Regular ;
lock_AP = <>
} ;
}

53105
lib/src/estonian/DictEst.gf Normal file

File diff suppressed because it is too large Load Diff

53095
lib/src/estonian/DictEstAbs.gf Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,239 @@
concrete ExtraEst of ExtraEstAbs = CatEst **
open ResEst, MorphoEst, Coordination, Prelude, NounEst, StructuralEst, (R = ParamX) in {
lin
GenNP np = {
s,sp = \\_,_ => np.s ! NPCase Gen ;
isNum = False ;
isDef = True ; --- "Jussin kolme autoa ovat" ; thus "...on" is missing
isNeg = False
} ;
GenCN n1 n2 = {s = \\nf => n1.s ! NPCase Gen ++ n2.s ! nf} ;
lincat
VPI = {s : Str} ;
[VPI] = {s1,s2 : Str} ;
lin
BaseVPI = twoSS ;
ConsVPI = consrSS comma ;
MkVPI vp = {s = infVP (NPCase Nom) Pos (agrP3 Sg) vp InfDa} ;
ConjVPI = conjunctDistrSS ;
ComplVPIVV vv vpi =
insertObj (\\_,_,_ => vpi.s) (predV vv) ;
lincat
VPS = {
s : Agr => Str ;
sc : NPForm ; --- can be different for diff parts
} ;
[VPS] = {
s1,s2 : Agr => Str ;
sc : NPForm ; --- take the first: minä osaan kutoa ja täytyy virkata
} ;
lin
BaseVPS x y = twoTable Agr x y ** {sc = x.sc} ;
ConsVPS x y = consrTable Agr comma x y ** {sc = x.sc} ;
ConjVPS conj ss = conjunctDistrTable Agr conj ss ** {
sc = ss.sc
} ;
MkVPS t p vp = { -- Temp -> Pol -> VP -> VPS ;
s = \\a => let vps = vp.s ! VIFin t.t ! t.a ! p.p ! a
in
t.s ++ p.s ++
vps.fin ++ vps.inf ++
vp.s2 ! True ! p.p ! a ++
vp.adv ! p.p ++
vp.ext ;
sc = vp.sc ;
} ;
PredVPS np vps = { -- NP -> VPS -> S ;
s = subjForm np vps.sc Pos ++ vps.s ! np.a
} ;
AdvExistNP adv np =
mkClause (\_ -> adv.s) np.a (insertObj
(\\_,b,_ => np.s ! NPCase Nom) (predV (verbOlema ** {sc = NPCase Nom}))) ;
RelExistNP prep rp np = {
s = \\t,ant,bo,ag =>
let
n = complNumAgr ag ;
cl = mkClause
(\_ -> appCompl True Pos prep (rp2np n rp))
np.a
(insertObj
(\\_,b,_ => np.s ! NPCase Nom)
(predV (verbOlema ** {sc = NPCase Nom}))) ;
in
cl.s ! t ! ant ! bo ! SDecl ;
c = NPCase Nom
} ;
AdvPredNP adv v np =
mkClause (\_ -> adv.s) np.a (insertObj
(\\_,b,_ => np.s ! NPCase Nom) (predV v)) ;
ICompExistNP adv np =
let cl = mkClause (\_ -> adv.s ! np.a) np.a (insertObj
(\\_,b,_ => np.s ! NPCase Nom) (predV (verbOlema ** {sc = NPCase Nom}))) ;
in {
s = \\t,a,p => cl.s ! t ! a ! p ! SDecl
} ;
IAdvPredNP iadv v np =
let cl = mkClause (\_ -> iadv.s) np.a (insertObj
(\\_,b,_ => np.s ! v.sc) (predV v)) ;
in {
s = \\t,a,p => cl.s ! t ! a ! p ! SDecl
} ;
-- i_implicPron = mkPronoun [] "minun" "minua" "minuna" "minuun" Sg P1 ;
whatPart_IP = {
s = table {
NPCase Nom | NPAcc => "mitä" ;
c => whatSg_IP.s ! c
} ;
n = Sg
} ;
PartCN cn =
let
acn = DetCN (DetQuant IndefArt NumSg) cn
in {
s = table {
NPCase Nom | NPAcc => acn.s ! NPCase Part ;
c => acn.s ! c
} ;
a = acn.a ;
isPron = False ; isNeg = False
} ;
vai_Conj = {s1 = [] ; s2 = "vai" ; n = Sg} ;
--The reflexive possessive "oma"
--for "ta näeb oma koera" instead of *"tema koera"
OmaPoss = {s,sp = \\_,_ => "oma" ; isDef,isNeg,isNum = False} ;
ma_Pron = mkPronoun "ma" "mu" "mind" Sg P1 ;
sa_Pron = mkPronoun "sa" "su" "sind" Sg P2;
ta_Pron = mkPronoun "ta" "ta" "teda" Sg P3 ;
me_Pron =
{s = table {
NPCase Nom => "me" ;
n => (we_Pron.s) ! n
} ;
a = Ag Pl P1 } ;
te_Pron =
{s = table {
NPCase Nom => "te" ;
n => (youPl_Pron.s) ! n
} ;
a = Ag Pl P2 } ;
nad_Pron =
{s = table {
NPCase Nom => "nad" ;
n => (they_Pron.s) ! n
} ;
a = Ag Pl P3 } ;
---- copied from VerbEst.CompAP, should be shared
ICompAP ap = {
s = \\agr =>
let
n = complNumAgr agr ;
c = case n of {
Sg => Nom ; -- Fin (Nom): minä olen iso ; te olette iso
Pl => Nom -- Fin (Part): me olemme isoja ; te olette isoja
} --- definiteness of NP ?
in "kui" ++ ap.s ! False ! (NCase n c)
} ;
IAdvAdv adv = {s = "kui" ++ adv.s} ;
ProDrop p = {
s = table {NPCase (Nom | Gen) => [] ; c => p.s ! c} ;
---- drop Gen only works in adjectival position
a = p.a
} ;
ProDropPoss p = {
s = \\_,_ => "oma" ; --???
sp = \\_,_ => p.s ! NPCase Gen ;
isNum = False ;
isDef = True ; --- "minun kolme autoani ovat" ; thus "...on" is missing
isNeg = False
} ;
lincat
ClPlus, ClPlusObj, ClPlusAdv = ClausePlus ;
Part = {s : Str} ;
lin
S_SVO part t p clp =
let
cl = clp.s ! t.t ! t.a ! p.p ;
pa = part.s ----
in
{s = t.s ++ p.s ++ cl.subj ++ pa ++ cl.fin ++ cl.inf ++ cl.compl ++ cl.adv ++ cl.ext} ;
S_OSV part t p clp =
let
cl = clp.s ! t.t ! t.a ! p.p ;
pa = part.s ----
in
{s = t.s ++ p.s ++ cl.compl ++ pa ++ cl.subj ++ cl.fin ++ cl.inf ++ cl.adv ++ cl.ext} ;
S_VSO part t p clp =
let
cl = clp.s ! t.t ! t.a ! p.p ;
pa = part.s
in
{s = t.s ++ p.s ++ cl.fin ++ pa ++ cl.subj ++ cl.inf ++ cl.compl ++ cl.adv ++ cl.ext} ;
S_ASV part t p clp =
let
cl = clp.s ! t.t ! t.a ! p.p ;
pa = part.s
in
{s = t.s ++ p.s ++ cl.adv ++ pa ++ cl.subj ++ cl.fin ++ cl.inf ++ cl.compl ++ cl.ext} ;
S_OVS part t p clp =
let
cl = clp.s ! t.t ! t.a ! p.p ;
pa = part.s ----
in
{s = t.s ++ p.s ++ cl.compl ++ pa ++ cl.fin ++ cl.inf ++ cl.subj ++ cl.adv ++ cl.ext} ;
PredClPlus np vp = mkClausePlus (subjForm np vp.sc) np.a vp ;
PredClPlusFocSubj np vp = insertKinClausePlus 0 (mkClausePlus (subjForm np vp.sc) np.a vp) ;
PredClPlusFocVerb np vp = insertKinClausePlus 1 (mkClausePlus (subjForm np vp.sc) np.a vp) ;
PredClPlusObj np vps obj =
insertObjClausePlus 0 False (\\b => appCompl True b vps.c2 obj) (mkClausePlus (subjForm np vps.sc) np.a vps) ;
PredClPlusFocObj np vps obj =
insertObjClausePlus 0 True (\\b => appCompl True b vps.c2 obj) (mkClausePlus (subjForm np vps.sc) np.a vps) ;
PredClPlusAdv np vp adv =
insertObjClausePlus 1 False (\\_ => adv.s) (mkClausePlus (subjForm np vp.sc) np.a vp) ;
PredClPlusFocAdv np vp adv =
insertObjClausePlus 1 True (\\_ => adv.s) (mkClausePlus (subjForm np vp.sc) np.a vp) ;
ClPlusWithObj c = c ;
ClPlusWithAdv c = c ;
noPart = {s = []} ;
{-han_Part = mkPart "han" "hän" ;
pa_Part = mkPart "pa" "pä" ;
pas_Part = mkPart "pas" "päs" ;
ko_Part = mkPart "ko" "kö" ;
kos_Part = mkPart "kos" "kös" ;
kohan_Part = mkPart "kohan" "köhän" ;
pahan_Part = mkPart "pahan" "pähän" ;
-}
}

View File

@@ -0,0 +1,67 @@
abstract ExtraEstAbs = Extra [
GenNP,
VPI,ListVPI,BaseVPI,ConsVPI,MkVPI,ComplVPIVV,ConjVPI,
VPS,ListVPS,BaseVPS,ConsVPS,ConjVPS,MkVPS,PredVPS,ConjVPS,Tense,Temp,Pol,S,
VV,VP,Conj,NP,Quant,IAdv,IComp,ICompAP,IAdvAdv,Adv,AP, Pron, ProDrop] ** {
fun
GenCN : NP -> CN -> CN ; -- auton merkki
AdvExistNP : Adv -> NP -> Cl ; -- kuvassa olemme me
AdvPredNP : Adv -> V -> NP -> Cl ; -- kuvassa hymyilee Veikko
ICompExistNP : IComp -> NP -> QCl ; -- missä/kuka on Veikko
IAdvPredNP : IAdv -> V -> NP -> QCl ; -- mistä alkaa Ruotsi
RelExistNP : Prep -> RP -> NP -> RCl ; -- jossa on jazzia
-- i_implicPron : Pron ; -- (minä), minut, ...
whatPart_IP : IP ;
PartCN : CN -> NP ; -- olutta
vai_Conj : Conj ; -- minä vai sinä? ("or" in question)
--Short forms of the pronouns
ma_Pron : Pron ;
sa_Pron : Pron ;
ta_Pron : Pron ;
me_Pron : Pron ;
te_Pron : Pron ;
nad_Pron : Pron ;
OmaPoss : Quant ; -- Reflexive possessive "oma"
ProDropPoss : Pron -> Quant ; -- vaimoni --TODO Is this relevant in Estonian? Is the agreement of pronoun ever needed, or is it the same as oma?
cat
ClPlus ; -- clause with more variation
ClPlusObj ; -- which has a focusable object
ClPlusAdv ; -- which has a focusable adverb
Part ; -- discourse particle
fun
S_SVO : Part -> Temp -> Pol -> ClPlus -> S ; -- mepäs juomme maitoa nyt
S_OSV : Part -> Temp -> Pol -> ClPlusObj -> S ; -- maitoapas me juomme nyt
S_VSO : Part -> Temp -> Pol -> ClPlus -> S ; -- juommepas me maitoa nyt
S_ASV : Part -> Temp -> Pol -> ClPlusAdv -> S ; -- nytpäs me juomme maitoa
-- S_SOV : Part -> Temp -> Pol -> ClPlus -> S ; -- mepäs maitoa juomme
S_OVS : Part -> Temp -> Pol -> ClPlus -> S ; -- maitoapas juomme me
-- S_VOS : Part -> Temp -> Pol -> ClPlus -> S ; -- juommepas maitoa me
PredClPlus : NP -> VP -> ClPlus ; -- me nukumme
PredClPlusFocSubj : NP -> VP -> ClPlus ; -- mekin nukumme
PredClPlusFocVerb : NP -> VP -> ClPlus ; -- me nukummekin
PredClPlusObj : NP -> VPSlash -> NP -> ClPlusObj ; -- maitoa me juomme
PredClPlusFocObj : NP -> VPSlash -> NP -> ClPlusObj ; -- maitoakin me juomme
PredClPlusAdv : NP -> VP -> Adv -> ClPlusAdv ; -- nyt me nukumme
PredClPlusFocAdv : NP -> VP -> Adv -> ClPlusAdv ; -- nytkin me nukumme
ClPlusWithObj : ClPlusObj -> ClPlus ; -- to make non-fronted obj focusable
ClPlusWithAdv : ClPlusAdv -> ClPlus ; -- to make non-fronted adv focusable
noPart, han_Part, pa_Part, pas_Part, ko_Part, kos_Part,
kohan_Part, pahan_Part : Part ;
}

View File

@@ -0,0 +1,20 @@
concrete GrammarEst of Grammar =
NounEst,
VerbEst,
AdjectiveEst,
AdverbEst,
NumeralEst,
SentenceEst,
QuestionEst,
RelativeEst,
ConjunctionEst,
PhraseEst,
TextX,
IdiomEst,
StructuralEst,
TenseX
** {
flags startcat = Phr ; unlexer = finnish ; lexer = text ;
} ;

618
lib/src/estonian/HjkEst.gf Normal file
View File

@@ -0,0 +1,618 @@
resource HjkEst = open ResEst, Prelude, Predef in {
-- Implementation of the noun inflection rules from
-- Heiki-Jaan Kaalep. "Eesti käänamissüsteemi seaduspärasused" (2012)
--
-- @author Kaarel Kaljurand
-- @version 2013-09-09
flags
coding = utf8 ;
-- TODO: change the name of this file and the names of the opers in this file
param
-- S1: stress on the last syllable
-- S2: stress on the penultimate syllable
-- S3: stress not on the last 2 syllables
-- If the S2 word ends with a vowel then we distinguish between:
-- S21: 1st quantity: blo.gi, ta.la
-- S22: 2nd quantity: rat.su, vol.le
-- S23: 3rd quantity: aas.ta
SylType = S1 | S2 | S21 | S22 | S23 | S3 ;
oper
NFS = {s : NForm => Str} ;
foreign : pattern Str = #("z" | "ž" | "š") ;
-- Foreign vowel endings
foreign_v : pattern Str = #("ko" | "po" | "to" | "fo" | "ka" | "pa" | "ta" | "fa" | "ku" | "pu" | "tu" | "fu") ;
v : pattern Str = #("a" | "e" | "i" | "o" | "u" | "õ" | "ä" | "ö" | "ü" | "w") ;
vv : pattern Str = #("aa" | "ee" | "ii" | "oo" | "uu" | "õõ" | "ää" | "öö" | "üü") ;
c : pattern Str = #("m" | "n" | "p" | "b" | "t" | "d" | "k" | "g" | "f" | "v" | "s" | "h" | "l" | "j" | "r" | "z" | "ž" | "š" | "c" | "q") ;
lmnr : pattern Str = #("l" | "m" | "n" | "r") ;
kpt : pattern Str = #("k" | "p" | "t" | "f" | "š") ;
gbd : pattern Str = #("g" | "b" | "d") ;
-- Types that map singular nominative to the full paradigm.
-- VI and VII include gradation which is described separately.
hjk_type,
hjk_type_I_koi,
hjk_type_II_ema,
hjk_type_III_ratsu,
hjk_type_IVa_aasta,
hjk_type_IVb_maakas,
hjk_type_Va_otsene,
hjk_type_Vb_oluline,
hjk_type_VI_link,
hjk_type_VI_imelik,
hjk_type_VI_meeskond,
hjk_type_VI_seminar,
hjk_type_VII_touge : Str -> NForms ;
-- hjk_type_VII_touge : Str -> NFS ;
-- IVa additionally needs the stem vowel.
hjk_type_IVb_audit,
hjk_type_IVb_audit1 : Str -> Str -> NForms ; --NFS
hjk_type_VI_tukk : Str -> Str -> NForms ;
-- Definition of the mapping rules.
-- Verbatim from HJKEKS.
hjk_type_I_koi x =
nForms6 x x (x+"d") (x+"sse") (x+"de") (x+"sid") ;
hjk_type_II_ema x =
nForms6 x x x (x+"sse") (x+"de") (x+"sid") ;
hjk_type_III_ratsu x =
nForms6 x x (x+"t") (x+"sse") (x+"de") (x+"sid") ;
-- if ends with 'i' ('arvuti') then last form is 'arvut' + 'e' + 'id'
-- There are ~50 such words in the WordNet.
hjk_type_IVa_aasta x =
let
x1 : Str = case x of { _ + "i" => (init x) + "e" ; _ => x }
in
nForms6 x x (x+"t") (x+"sse") (x+"te") (x1+"id") ;
-- (audit "a") can be used with comparative and superlative adjectives.
hjk_type_IVb_audit x v_g =
let
v_pl = case v_g of { "i" => "e" ; _ => v_g }
in
nForms6 x (x+v_g) (x+v_g+"t") (x+v_g+"sse") (x+v_g+"te") (x+v_pl+"id") ;
-- TODO: clean this up
hjk_type_IVb_audit1 x y =
nForms6 x (y + "i") (y+"it") (y+"isse") (y+"ite") (y+"eid") ;
hjk_type_IVb_maakas x =
let
gen = init x
in
nForms6 x gen (gen+"t") (gen+"sse") (gen+"te") (gen+"id") ;
--Maakas is for maakas:maaka:maakat, this is for hammas:hamba:hammast
--Not sure if this is already covered by some hjk_type,
--anyway the grades are explicit with two args, more reliable
dHammas : (_,_ : Str) -> NForms ;
dHammas hammas hamba =
nForms6 hammas hamba (hammas+"t") (hamba+"sse") (hammas+"te") (hamba+"id") ;
dMeri : (_,_ : Str) -> NForms ;
dMeri meri mere =
let
mer = init mere ;
in
nForms6 meri mere (mer+"d") (mere+"sse") (mere+"de") (mere+"sid") ;
-- This rule handles the removal of -ne and -s endings, and the addition of 'e'
-- in the case of Cne-nouns (e.g. 'raudne').
-- vastus - vastuse - vastust
-- otsene - otsese - otsest
-- raudne - raudse - raudsEt - raudsesse - raudsEte - raudseid (additional 'e')
-- TODO: variant: vastusesse | vastusse
hjk_type_Va_otsene x =
let
f : Str = case x of {
y + c@(#c) + "ne" => y + c + "se" ;
y + "ne" => y + "s" ;
_ => x
} ;
f1 : Str = case x of {
y + "ne" => y + "s" ;
_ => x
}
in
nForms6 x (f1+"e") (f+"t") (f1+"esse") (f+"te") (f1+"eid") ;
-- TODO: variant: olulisesse | olulisse
hjk_type_Vb_oluline x =
let
f : Str = case x of {
y + "ne" => y + "s" ;
y + "ke" => y + "kes" ;
_ => x
}
in
nForms6 x (f+"e") (f+"t") (f+"esse") (f+"te") (f+"i") ;
hjk_type_VI_link x =
let
x_n : Str = weaker_noun x
in
nForms6 x (x_n+"i") (x+"i") (x+"i") (x+"ide") (x+"e") ;
--like link but
-- gen form given (takes care of vowel and consonant gradation)
-- -sid for pl.part (todo: generate short forms depending on vowel?)
hjk_type_VI_tukk x x_gen =
let
v_g : Str = last x_gen ;
{- pl_part : Str =
case v_g of {
"i" => "e" ;
_ => v_g + "sid" } ;
-}
in
nForms6 x x_gen (x+v_g) (x+v_g) (x+v_g+"de") (x+v_g+"sid") ;
hjk_type_VI_imelik x =
let
x_t : Str = stronger_noun x
in
nForms6 x (x+"u") (x_t+"u") (x_t+"u") (x+"e") (x_t+"e") ;
hjk_type_VI_meeskond x =
let
x_n : Str = weaker_noun x
in
nForms6 x (x_n+"a") (x+"a") (x+"a") (x+"ade") (x+"i") ;
hjk_type_VI_seminar x =
nForms6 x (x+"i") (x+"i") (x+"i") (x+"ide") (x+"e") ;
hjk_type_VII_touge x =
let
x_t : Str = (stronger_noun (init x)) + "e"
in
nForms6 x x_t (x+"t") (x_t+"sse") (x+"te") (x_t+"id") ;
--Identical to the above, just taking 2 arguments (nom + gen)
--There are 67 nouns in test cases where stronger_noun gets it wrong
--handles liige:liikme as well
hjk_type_VII_touge2 : (_,_ : Str) -> NForms ;
hjk_type_VII_touge2 touge touke =
let
liikme : Str = case touke of {
_ + "me" => touke ;
_ + "mne" => touke ;
_ => touge }
in
nForms6 touge touke (touge+"t") (touke+"sse") (liikme+"te") (touke+"id") ;
-- Use this only to weaken the verbs
weaker : Str -> Str ;
weaker link =
let
li = Predef.tk 2 link ;
nk = Predef.dp 2 link
in
case nk of {
"kk" => li + "k" ;
"pp" => li + "p" ;
"tt" => li + "t" ;
"ff" => li + "f" ;
("üt"|"üs") => li + "ö" ; --süsi,söe ; ütlema,öelda
--"ad" => li + "aj" ; --sada,saja; maybe remove
V@(#v) + "k" => li + V + "g" ;
V@(#v) + "p" => li + V + "b" ;
V@(#v) + "t" => li + V + "d" ;
V@(#v) + "g" => li + V ; --liuglema,liuelda
V@(#v) + "b" => li + V + "v" ; --leib,leiva
V@(#v) + "d" => li + V ; --hoidma,hoiab
N@(#lmnr) + "k" => li + N + "g" ;
N@(#lmnr) + "p" => li + N + "b" ;
N@(#lmnr) + "t" => li + N + "d" ;
N@(#lmnr) + "d" => li + N + N ;
N@(#lmnr) + "b" => li + N + N ;
N@("l"|"r") + "g" => li + N ; --algama,alata
"sk" => li + "s" ;
"h" + #kpt => li + "h" ;
_ => link
} ;
-- Weakening of nouns.
-- Only the very stable weakening that happens to nouns.
-- TODO: verify correctness/completeness based on some other implementation.
weaker_noun : Str -> Str ;
weaker_noun link =
case link of {
li + "kk" => li + "k" ;
li + "pp" => li + "p" ;
li + "tt" => li + "t" ;
li + "ff" => li + "f" ;
li + "šš" => li + "š" ;
li + N@(#lmnr) + "ss" => li + N + "s" ;
li + V@(#v) + "k" => li + V + "g" ;
li + V@(#v) + "p" => li + V + "b" ;
li + V@(#v) + "t" => li + V + "d" ;
li + N@(#lmnr) + "k" => li + N + "g" ;
li + N@(#lmnr) + "p" => li + N + "b" ;
li + N@(#lmnr) + "t" => li + N + "d" ;
li + "h" + #kpt => li + "h" ;
li + "kond" => li + "konn" ;
_ => link
} ;
-- Strengthening of nouns.
-- Input must not have the last vowel.
stronger_noun : Str -> Str ;
stronger_noun x =
case x of {
y + "lg" => y + "lg" ;
y + "hk" => y + "hk" ; -- tahke
y + "tk" => y + "tk" ; -- katke
y + "rs" => y + "rs" ; -- morse
y + "rr" => y + "rd" ; -- murre
y + "ks" => y + "ks" ; -- makse
y + "us" => y + "us" ; -- lause
y + "sk" => y + "sk" ; -- raske (?)
y + "ts" => y + "ts" ; -- katse
y + "ps" => y + "psm" ; -- ripse -> ripsme
y + "nt" => y + "nt" ; -- tante
y + "st" => y + "st" ; -- TODO: sometimes stm: iste, kaste
y + k@("k"|"p"|"t"|"s") => y + k + k ;
y + "g" => y + "k" ;
y + "d" => y + "t" ;
y + "b" => y + "p" ;
y + v@(#v) + "v" => y + v + "b" ; -- works for 'iive' but not 'irve'
y + "mm" => y + "mb" ; -- komme -> kombe
y + "nn" => y + "nd" ;
_ => x
} ;
-- Strengthening of verbs.
stronger : Str -> Str ;
stronger x =
let
beginning = tk 2 x ;
ending = dp 2 x
in
beginning + case ending of {
y + k@("k"|"p"|"t"|"s") + e => y + k + k + e ;
y + "g" + e => y + "k" + e ;
y + "d" + e => y + "t" + e ;
y + "b" + e => y + "p" + e ;
_ => ending
} ;
-- Mapping of singular nominative to HJKEKS types.
-- This implements the patterns from HJKEKS section 8 but
-- makes the rule ordering explicit, handles things like dropping 'e'
-- in 'reegel' -> 'reegli', etc.
-- Works ~90% correctly, ~100% correctly with input longer than 10 letters.
-- If this rule delivers an incorrect form, then use the 6-arg oper.
-- This is also needed if another legal form is desired,
-- e.g. palk -> palga (the default is palk -> palgi).
--
-- This rule does not cover:
-- - exceptional words (workaround: take these from the lexicon)
-- - compound words (workaround: mark the compound border manually)
-- - comparative and superlative adjective forms (workaround: use mkA instead)
-- - type VII (t6uge -> t6uke), as one needs to detect derivation from verb
-- - last syllable superlong (rostbiif)
hjk_type x =
case <(syl_type x), x> of {
<S3, _ + "ke">
=> hjk_type_Vb_oluline x ;
<_, _ + "kond">
=> hjk_type_VI_meeskond x ;
-- Some S2 -ik words (voolik), we only cover words with double vowel
<_, _ + #vv + ("lik"|"nik"|"stik")>
=> hjk_type_IVb_audit x "u" ;
-- Other -ik words as in HJKEKS,
-- but added 'ndik' which fixes fractions ('kaheksandik')
-- and is wrong only for 'kandik'.
<_, _ + ("lik"|"nik"|"stik"|"ndik")>
=> hjk_type_VI_imelik x ;
-- Remaining -k words (but need to be S2)
-- but not 'konjak'
<S2, _ + ("a"|"e"|"i") + ("ng"|"k")>
=> hjk_type_IVb_audit x "u" ;
-- Other -ik words (not in HJKEKS)
-- including also: alevik, asemik, lobudik, hämarik, sarapik, põletik
<_, _ + ("vik"|"mik"|"dik"|"rik"|"pik"|"tik")>
=> hjk_type_VI_imelik x ;
-- kikas
<_, ? + #v + #c + #v + "s">
=> hjk_type_Va_otsene x ;
<_, _ + ("ngas"|"kas"|"jas"|"nud"|"tud")>
=> hjk_type_IVb_maakas x ;
<S1, _ + #v + #v>
=> hjk_type_I_koi x ;
-- 'statiiv' (not like 'karjuv')
<S1, _ + #vv + #c>
=> hjk_type_VI_link x ;
<S3, _ + #c + #v + #lmnr>
=> hjk_type_VI_seminar x ;
<S1, _ + #v + #v + #c>
=> hjk_type_VI_link x ;
<_, _ + ("us"|"is")>
=> hjk_type_Vb_oluline x ;
<S3, _ + #v + #v + #c>
=> hjk_type_VI_link x ;
<(S1|S3), _ + #v + #c + #c>
=> hjk_type_VI_link x ;
<(S1|S3), _ + #v + #c + #c + #c>
=> hjk_type_VI_link x ;
<_, _ + "nna">
=> hjk_type_III_ratsu x ;
<-(S21|S22), _ + ("nu"|"tu")>
=> hjk_type_IVa_aasta x ;
-- TODO: improve foreign detection
<S2, _ + #foreign + _ + "in">
=> hjk_type_IVb_audit x "i" ;
-- TODO: this is not in HJKEKS
-- 'absurd' vs 'ebard'
<S2, _ + #v + #lmnr + "d">
=> hjk_type_IVb_audit x "i" ;
-- sometimes 'a' (laurits) TODO: this is not in HJKEKS
<S2, _ + #v + #kpt + "s">
=> hjk_type_IVb_audit x "i" ;
-- TODO: next 3 rules: last syllable must be long
-- portfell, TODO: not 'karask'
<S2, _ + #v + #c + #c>
=> hjk_type_VI_link x ;
-- rostbiif, not viiul
<S2, _ + #c + #v + #v + #c>
=> hjk_type_VI_link x ;
-- impulss
<S2, _ + #v + #c + #c + #c>
=> hjk_type_VI_link x ;
-- TODO: sometimes masked by 'maakas'
<_, _ + #v + "s">
=> hjk_type_Va_otsene x ;
<_, _ + ("v"|"tav")>
=> hjk_type_IVb_audit x "a" ;
-- The choice between Va (pl part: -seid) and Vb (pl part: -si)
-- is based on checking the derivational ending.
-- We just check the ending of the word and require at least 2 letters
-- to precede the ending.
-- We added also -tine and -ldane (which occur with adjectives).
<_, _ + ? + ? + ("line"|"lane"|"mine"|"kene"|"tine"|"ldane")>
=> hjk_type_Vb_oluline x ;
-- k6ne
<S21, _ + "e">
=> hjk_type_III_ratsu x ;
-- Many adjectives end with "ne" (40% in WordNet)
-- We require them to be at least 5 letters long (excluding 'öine'),
-- to give a chance to VII_touge (next rule).
<_, _ + ? + ? + ? + "ne">
=> hjk_type_Va_otsene x ;
-- Note: this rule does not actually check the derivation from verb.
-- verb + e, TODO: masked by S21/e
<(S2|S22), _ + "e">
=> hjk_type_VII_touge x ;
-- ufo, pita, lito
<S21, _ + #foreign_v>
=> hjk_type_III_ratsu x ;
<S21, _ + #v>
=> hjk_type_II_ema x ;
<S22, _ + #v>
=> hjk_type_III_ratsu x ;
<S23, _ + #v>
=> hjk_type_IVa_aasta x ;
<S2, _ + "in">
=> hjk_type_IVb_audit x "a" ;
-- 'e' deletion
-- kringel -> kringli, amper -> ampri, meeter -> meetri, reegel -> reegli
-- kaabel-> kaabli (TODO: not: juubel -> juubli)
-- spikker -> spikri (TODO: not: pokker -> pokkeri)
-- Note: pintsel -> pintsli, but not pitser -> pitsri
-- Note: 'redel' and 'paber' do not lose the 'e'.
<S2, y + kk@("kk"|"pp"|"tt"|"hh") + "e" + l@("l"|"r")>
=> hjk_type_IVb_audit1 x (y + (init kk) + l) ;
-- aaker -> aakri, teater -> teatri
<S2, y + vvkpt@(#v + #v + #kpt) + "e" + l@("l"|"r")>
=> hjk_type_IVb_audit1 x (y+vvkpt+l) ;
<S2, y + vv@(#vv) + gbd@(#gbd) + "e" + l@("l"|"r")>
=> hjk_type_IVb_audit1 x (y+vv+gbd+l) ;
-- Disabled, 50-50 correctness
--<S2, y + vv@(#vv) + lmnr@(#lmnr) + "e" + l@("l"|"r")>
-- => hjk_type_IVb_audit1 x (y+vv+lmnr+l) ; -- 50-50
<S2, y + vv@(#vv) + s@("s"|"v") + "e" + l@("l"|"r")>
=> hjk_type_IVb_audit1 x (y+vv+s+l) ;
<S2, y + n@("ht"|"hk"|"hv"|"nts"|"ld"|"lv"|"lb"|"ng"|"nd"|"mb"|"mp"|"nt"|"ps"|"ks"|"sk"|"st") + "e" + l@("l"|"r")>
=> hjk_type_IVb_audit1 x (y+n+l) ;
<S2, y + "e" + l@("l"|"r")>
=> hjk_type_IVb_audit x "i" ;
-- TODO: sometimes masked by 'link'
<S2, _ + #c>
=> hjk_type_IVb_audit x "i" ;
<S3, _ + #v>
=> hjk_type_IVa_aasta x ;
-- verb + 'e'
<_, _ + "e">
=> hjk_type_VII_touge x ;
-- catch all that end with consonant
<_, _ + #c>
=> hjk_type_IVb_audit x "i" ;
-- TODO: not in HJKEKS
<_, _ + ("ia"|"ja")> --kündja, not gerilja
=> hjk_type_IVa_aasta x ;
--added by Inari 07.10.
<S23, _ + #c + ("la")> --haigla, not gorilla
=> hjk_type_IVa_aasta x ;
-- catch all
<_, _>
=> hjk_type_III_ratsu x
} ;
-- Assigns stress/quantity indicator (SylType) to the word based on
-- its character composition.
-- Note: you cannot use recursion (circular definitions) in these rules
-- Note: patterns must be linear (GF book C.4.13), i.e. you cannot write
-- oi@(#v + #v) + oi => S2 ; -- oi-oi, ai-ai, oo-oo
syl_type : Str -> SylType ;
syl_type x =
case x of {
-- all 1-letters
? => S1 ;
-- all 2-letters
? + ? => S1 ;
-- all 3-letters
#v + #c + #v => S21 ;
#v + #v + #v => S22 ;
? + ? + ? => S1 ; -- koi, kae
-- all 4-letters
#c + #v + #v + #c => S1 ; -- siid
#c + #v + #c + #c => S1 ; -- link
#v + #c + #v + #c => S2 ;
#v + #vv + #c => S1 ; -- auul, ioon, oaas
#v + #v + #v + #c => S2 ; -- aiak (?)
#v + #v + #c + #v => S22 ; -- aine, aade; not: 6ige
#v + #c + #v + #v => S1 ; -- epee, oboe
#v + #c + #c + #v => S22 ; -- iste, iglu; not: 6htu
#c + #v + #c + #v => S21 ;
#c + #v + #v + #v => S22 ; -- muie, neiu, riie
? + ? + ? + ? => S1 ;
-- all 5-letters
_ + #c + "ia" => S2 ; -- aaria, minia, orgia, kirurgia, nostalgia
#v + #c + #c + #v + #v => S1 ; -- armee
#c + #v + #c + #v + #v => S1 ; -- depoo
#c + #c + #v + #c + #c => S1 ; -- tramm
#c + #v + #c + #c + #c => S1 ;
#c + #v + #vv + #c => S1 ; -- poeem
#c + #v + #v + #v + #c => S2 ; -- hoius, laius, maius
#c + #v + #c + #v + #c => S2 ; -- redel
#c + #v + #c + #gbd + "e" => S23 ; -- valge, k6rge; p6rge, hange
#c + #v + #v + #gbd + "e" => S22 ; -- haige, kauge; t6uge
#c + #v + #v + #c + #v => S22 ; -- lause; TODO: leitu, rootu (S23)
#c + #v + #c + #c + #v => S22 ; -- ratsu; not: surnu
#v + #c + #c + #c + #v => S23 ;
#v + #c + #c + #v + #c => S2 ; -- amper
#v + #c + #v + #c + #c => S2 ; -- avang
_ + #c + #vv + #c + #c => S1 ; -- loots (double vowel, otherwise the same as below)
#c + #v + #v + #c + #c => S2 ; -- laeng, loend
#c + #c + #v + #v + #c => S1 ; -- bluus, kruus, kreem
#v + #c + #v + #v + #c => S1 ; -- ukaas, TODO: not 'avaus'
#v + #v + #c + #v + #c => S2 ; -- aatom
#v + #v + #c + #c + #v => S23 ; -- aasta
#v + #v + #c + #v + #v => S1 ; -- aaloe (?)
#c + #c + #v + #c + #v => S21 ; -- blogi
_ + ? + #v + #vv + #c => S1 ; -- -ioos, kruiis
#c + #c + #v + #v + #v + #c => S2 ; -- flaier
_ + ? + #c + #v + #c + #v => S3 ; -- oluline
-- all 6-letters
#v + #c + #c + #v + #v + #c => S1 ; -- aplaus
#v + #c + #c + #v + #c + #c => S2 ; -- astang, ellips
#c + #vv + #c + #v + #v => S23 ; -- muumia, raadio, TODO: exclude 'vaarao'
#c + #v + #v + #c + #v + #v => S1 ; -- peoleo
#c + #v + #v + #c + #c + #v => S23 ; -- haigla --added by Inari, not sure if always correct
#c + #v + #c + #c + #c + #v => S23 ; -- vangla --added by Inari, not sure if always correct
#c + #v + #c + #vv + #c => S1 ; -- deviis (double vowel in the last syllable)
#v + #c + #v + #c + #v + #v => S1 ; -- agoraa
#c + #v + #c + #v + #c + #c => S2 ;
#c + #v + #c + #v + #c + #v => S3 ;
#v + #c + #v + #c + #c + #v => S3 ; -- yheksa
#c + #v + #c + #c + #v + #c => S2 ; -- rektor
#c + #v + #c + #v + #v + #c => S2 ; -- paleus
#c + #v + #v + #c + #v + #c => S2 ; -- meeter, reegel
#v + #v + #c + #c + #v + #c => S2 ; -- aastak
#v + #c + #c + #c + #v + #c => S2 ; -- andmik
#v + #c + #c + #v + #c + #v => S3 ;
_ + #v + #c + #v + #c + #v + #c => S3 ; -- alevik, elanik
-- all 7-letters
_ + ? + ? + #c + #vv + #c => S1 ; -- double vowel in the last syllable: bensiin, benseen, bensool
#c + #v + #v + #c + #c + #v + #c => S2 ; -- jooksik
#c + #v + #c + #c + #c + #v + #c => S2 ; -- hurtsik
#c + #v + #c + #c + #v + #c + #c => S2 ; -- kitsend
#c + #v + #c + #c + #v + #v + #c => S2 ; -- pension
#c + #v + #c + #v + #c + #v + #c => S3 ; -- seminar
#c + #c + #v + #c + #c + #v + #c => S2 ; -- kringel, plastik
_ + #v + #c + #v + #kpt + #kpt + #v + #c => S2 ; -- elekter, adapter
_ + #c + #v + #lmnr + #gbd + #v + #c => S2 ; -- (k)alender, (dets)ember
_ + #c + #v + #lmnr + #kpt + #v + #c => S2 ; -- (re)porter
_ + #c + #v + "stik" => S3 ; -- kuristik (TODO: not logistik)
_ + #c + #v + "s" + #kpt + #v + #c => S2 ; -- (k)anister
#v + #c + #v + #c + #c + #v + #c => S3 ; -- apelsin
#v + #c + #c + #v + #c + #v + #c => S3 ; -- admiral
#c + #v + #c + #v + #c + #c + #v => S3 ; -- kaheksa
#c + #c + #v + #c + #v + #c + #c => S2 ; -- klopits
#c + #v + #v + #c + #v + #c + #c => S2 ; -- haarang
#c + #v + #v + #c + #v + #v + #c => S2 ; -- raadius, kauneim
_ + #c + #v + #v + #c + #v + #c => S2 ; -- araabik
_ + #lmnr + #gbd + #v + #c + #c + #v + #c => S3 ; -- (pa)lderjan, (ko)rgitser
-- other
_ + #c + #v + #c + #c + #v + #c + #v + #c => S3 ; -- karneval
#c + #v + #c + #v + #c + #c + #v + #c => S3 ; -- ragastik (kalender is handled above)
_ + #v + #v + #c + #v + #c + #c + #v + #c => S3 ; -- ainestik
_ + #c + #c + #v + #c + #c + #v + #c + #c => S3 ; -- ampersand
_ + #c + #v + #c + #v + #c + #c => S1 ; -- dividend
_ + #v + #c + #c + #c + #v + #v => S1 ; -- displei
_ + #c + #v + #c + #c + #v + #v => S1 ; -- politsei
_ + #c + #v + #c + #v + #v => S1 ; -- defilee, kompanii
_ => S2 -- the default is S2, but the above rules should catch most of the words
} ;
}

View File

@@ -0,0 +1,79 @@
concrete IdiomEst of Idiom = CatEst **
open MorphoEst, ParadigmsEst, Prelude in {
flags optimize=all_subs ; coding=utf8;
lin
ExistNP np =
let
cas : Polarity -> NPForm = \p -> case p of {
Pos => NPCase Nom ; -- on olemas lammas
Neg => NPCase Part -- ei ole olemas lammast
}
in
mkClause noSubj (agrP3 Sg) (insertObj
(\\_,b,_ => "olemas" ++ np.s ! cas b) (predV olla)) ;
ExistIP ip =
let
cas : NPForm = NPCase Nom ; ---- also partitive in Extra
vp = insertObj (\\_,b,_ => "olemas") (predV olla) ;
cl = mkClause (subjForm (ip ** {isPron = False ; a = agrP3 ip.n}) cas) (agrP3 Sg) vp
in {
s = \\t,a,p => cl.s ! t ! a ! p ! SDecl
} ;
-- Notice the nominative in the cleft $NP$: "se on Matti josta Liisa pitää"
-- Est: "see on Mati, kellest Liis lugu peab"
CleftNP np rs = mkClause (\_ -> "see") (agrP3 Sg)
(insertExtrapos (rs.s ! np.a)
(insertObj (\\_,_,_ => np.s ! NPCase Nom) (predV olla))) ;
-- This gives the almost forbidden "se on Porissa kun Matti asuu".
-- Est: "see on Toris, kus Mati elab" (?)
CleftAdv ad s = mkClause (\_ -> "see") (agrP3 Sg)
(insertExtrapos ("kus" ++ s.s)
(insertObj (\\_,_,_ => ad.s) (predV olla))) ;
ImpersCl vp = mkClause noSubj (agrP3 Sg) vp ;
GenericCl vp = mkClause noSubj (agrP3 Sg) {
s = \\_ => vp.s ! VIPass ;
s2 = vp.s2 ;
adv = vp.adv ;
p = vp.p ;
ext = vp.ext ;
sc = vp.sc ;
} ;
ProgrVP vp =
let
inf = (vp.s ! VIInf InfMas ! Simul ! Pos ! agrP3 Sg).fin ;
on = predV olla
in {
s = on.s ;
s2 = \\b,p,a => vp.s2 ! b ! p ! a ++ inf ;
adv = vp.adv ;
p = vp.p ;
ext = vp.ext ;
sc = vp.sc ;
} ;
-- This gives "otetaan oluet" instead of "ottakaamme oluet".
-- The imperative is not available in a $VP$.
ImpPl1 vp =
let vps = vp.s ! VIPass ! Simul ! Pos ! Ag Pl P1
in
{s = vps.fin ++ vps.inf ++
vp.s2 ! True ! Pos ! Ag Pl P1 ++ vp.p ++ vp.ext
} ;
oper
olla = verbOlema ** {sc = NPCase Nom} ;
noSubj : Polarity -> Str = \_ -> [] ;
}

View File

@@ -0,0 +1,15 @@
flags coding=utf8;
kayda_V = mkV "käima" ;
tuntea_V = mkV "tundma" ;
nahda_V = mkV "nägema" ;
tehda_V = mkV "tegema" ;
tietaa_V = mkV "teadma" ;
taitaa_V
siita_V

View File

@@ -0,0 +1,10 @@
-- TODO: include the full GrammarEst
-- The Slash* is currently excluded only for performance reasons.
concrete LangEst of Lang =
GrammarEst - [Slash2V3,SlashV2A,Slash3V3,SlashV2VNP,SlashVV], ---- to speed up compilation
LexiconEst
** {
flags startcat = Phr ; unlexer = text ; lexer = finnish ;
} ;

View File

@@ -0,0 +1,378 @@
concrete LexiconEst of Lexicon = CatEst ** open MorphoEst, ParadigmsEst, Prelude in {
flags
optimize=values ; coding=utf8;
lin
airplane_N = mkN "lennuk" ;
alas_Interj = ss "paraku" ;
answer_V2S = mkV2 (mkV "vastama" "vastata") callative ;
apartment_N = mkN "korter" ;
apple_N = mkN "õun" ;
art_N = mkN "kunst" ;
ask_V2Q = mkV2 (mkV "küsima") (casePrep ablative) ;
baby_N = mkN "beebi" ;
bad_A = mkA (mkN "halb" "halva" "halba" "halba" "halbade" "halbu") "halvem" "halvim" ;
bank_N = mkN "pank" ;
beautiful_A = mkA (mkN "kaunis" "kauni" "kaunist" "kaunisse" "kaunite" "kauneid") ;
become_VA = mkVA (mkV "saama") ctranslative ; -- tema saab vanaks
beer_N = mkN "õlu" ;
beg_V2V = mkV2V (mkV "paluma") cpartitive ;
big_A = mkA (mkN "suur" "suure" "suurt" "suurde" "suurte" "suuri");
bike_N = mkN "ratas" ;
bird_N = mkN "lind" ;
black_A = mkA (mkN "must" "musta" "musta") ;
blue_A = mkA "sinine" ;
boat_N = mkN "paat" ;
book_N = mkN "raamat" "raamatu" "raamatut" "raamatusse" "raamatute" "raamatuid" ;
boot_N = mkN "saabas" ;
boss_N = mkN "boss" ;
boy_N = mkN "poiss" ;
bread_N = mkN "leib" ;
break_V2 = mkV2 (mkV "katki" (mkV "tegema" "teha")) ;
broad_A = mkA (mkN "lai" "laia" "laia" "laia" "laiade" "laiu") "laiem" "TODO" ;
brother_N2 = mkN2 (mkN "vend" "venna" "venda" "venda" "vendade" "vendi") ;
brown_A = mkA "pruun" ;
butter_N = mkN "või" ;
buy_V2 = mkV2 (mkV "ostma") ;
camera_N = mkN "kaamera" ;
cap_N = mkN "müts" ;
car_N = mkN "auto" ;
carpet_N = mkN "vaip" ;
cat_N = mkN "kass" ;
ceiling_N = mkN "lagi" ;
chair_N = mkN "tool" ;
cheese_N = mkN "juust" ;
child_N = mkN "laps" "lapse" "last" "lapsesse" "laste" "lapsi" ;
church_N = mkN "kirik" ;
city_N = mkN "linn" "linna" "linna" "linna" "linnade" "linnu" ;
clean_A = mkA (mkN "puhas" "puhta" "puhast" "puhtasse" "puhaste" "puhtaid");
clever_A = mkA "tark" ;
close_V2 = mkV2 (mkV "sulgema") ;
coat_N = mkN "mantel" ;
cold_A = mkA "külm" ;
come_V = mkV "tulema" ;
computer_N = mkN "arvuti" ;
country_N = mkN "maa" ;
cousin_N = mkN "nõbu" ;
cow_N = mkN "lehm" ;
die_V = mkV "surema" ;
dirty_A = mkA "räpane" ;
-- Using 'terminative' for the destination works better with nouns.
-- And also with placenames, as one does not need to decide between
-- illative (Tartusse) and allative (Põltsamaale).
-- Unfortunately, we cannot use a similar trick for the source (*Põltsamaast vs Põltsamaalt).
distance_N3 = mkN3 (mkN "kaugus") celative (casePrep terminative) ;
doctor_N = mkN "arst" ;
dog_N = mkN "koer" ;
door_N = mkN "uks" ;
drink_V2 = mkV2 (mkV "jooma") cpartitive ;
easy_A2V = mkA2 (mkA (mkN "lihtne")) callative ;
eat_V2 = mkV2 (mkV "sööma") cpartitive ;
empty_A = mkA "tühi" ;
enemy_N = mkN "vaenlane" ;
factory_N = mkN "tehas" ;
father_N2 = mkN2 (mkN "isa") ;
fear_VS = mkVS (mk2V "kartma" "karta") ;
find_V2 = mkV2 (mk2V "leidma" "leida") ;
fish_N = mkN "kala" ;
floor_N = mkN "põrand" ;
forget_V2 = mkV2 (mkV "unustama") ;
fridge_N = mkN "külm" (mkN "kapp") ;
friend_N = mkN "sõber" ;
fruit_N = mkN "puu" (mkN "vili") ;
fun_AV = mkAV (mkA (mkN "lõbus")) ;
garden_N = mkN "aed" ;
girl_N = mkN "tüdruk" ;
glove_N = mkN "kinnas" ;
gold_N = mkN "kuld" ;
good_A = mkA (mkN "hea") "parem" "parim" ;
go_V = mkV "minema" "minna" "läheb" "minnakse"
"minge" "läks" "läinud" "mindud" ;
green_A = mkA "roheline" ;
harbour_N = mkN "sadam" ;
hate_V2 = mkV2 (mkV "vihkama") cpartitive ;
hat_N = mkN "müts" ;
hear_V2 = mkV2 (mkV "kuulma") ;
hill_N = mkN "küngas" "künka" "küngast" "künkasse" "küngaste" "künkaid" ;
hope_VS = mkVS (mkV "lootma") ;
horse_N = mkN "hobune" ;
hot_A = mkA "kuum" ;
house_N = mkN "maja" ;
important_A = mkA "tähtis" ;
industry_N = mkN "tööstus" ;
iron_N = mkN "raud" ;
king_N = mkN "kuningas" ;
know_VS = mkVS know_V ;
know_VQ = mkVQ know_V ;
know_V2 = mkV2 (mkV "tundma") ;
lake_N = mkN "järv" ;
lamp_N = mkN "lamp" ;
learn_V2 = mkV2 (mkV "õppima") ;
leather_N = mkN "nahk" ;
leave_V2 = mkV2 (mkV "jätma") ;
like_V2 = mkV2 (mkV "lugu" (mkV "pidama" "pidada" "peab" "peetakse")) celative ;
listen_V2 = mkV2 (mkV "kuulama") partitive ;
live_V = mkV "elama" ;
long_A = mkA "pikk" ;
lose_V2 = mkV2 (mkV "kaotama") ;
love_N = mkN "armastus" ;
love_V2 = mkV2 (mkV "armastama") partitive ;
man_N = mkN "mees" "mehe" "meest" "mehesse" "meeste" "mehi" ;
married_A2 = mkA2 (mkA "abielus") (casePrep comitative) ;
meat_N = mkN "liha" ;
milk_N = mkN "piim" ;
moon_N = mkN "kuu" ;
mother_N2 = mkN2 (mkN "ema") ;
mountain_N = mkN "mägi" "mäe" "mäge" "mäkke" "mägede" "mägesid" ;
music_N = mkN "muusika" ;
narrow_A = mkA "kitsas" ;
new_A = mkA (mkN "uus" "uue" "uut" "uude" "uute" "uusi") "uuem" "uusim" ;
newspaper_N = mkN "aja" (mkN "leht") ;
oil_N = mkN "õli" ;
old_A = mkA (mkN "vana" "vana" "vana" "vanasse" "vanade" "vanasid") "vanem" "vanim" ;
open_V2 = mkV2 (mkV "avama") ;
paint_V2A = mkV2A (mkV "maalima") accPrep ctranslative ;
paper_N = mkN "paber" ;
paris_PN = mkPN (mkN "Pariis") ;
peace_N = mkN "rahu" ;
pen_N = mkN "pastakas" ;
planet_N = mkN "planeet" ;
plastic_N = mkN "kile" ;
play_V2 = mkV2 (mkV "mängima") cpartitive ;
policeman_N = mkN "politseinik" ;
priest_N = mkN "preester" ;
probable_AS = mkAS (mkA (mkN "tõenäoline")) ;
queen_N = mkN "kuninganna" ;
radio_N = mkN "raadio" ;
rain_V0 = mkV0 (mkV "sadama" "sadada" "sajab") ;
read_V2 = mkV2 (mkV "lugema" "lugeda" "loeb") ;
red_A = mkA "punane" ;
religion_N = mkN "usk" ;
restaurant_N = mkN "restoran" ;
river_N = mkN "jõgi" ;
rock_N = mkN "kalju" ;
roof_N = mkN "katus" ;
rubber_N = mkN "kumm" ;
run_V = mkV "jooksma" "joosta" ;
say_VS = mkVS (mkV "ütlema") ;
school_N = mkN "kool" ;
science_N = mkN "teadus" ;
sea_N = mkN "meri" ;
seek_V2 = mkV2 (mkV "otsima") cpartitive ;
see_V2 = mkV2 (mkV "nägema" "näha") ;
sell_V3 = mkV3 (mkV "müüma") accPrep callative ;
send_V3 = mkV3 (mkV "saatma") accPrep callative ;
sheep_N = mkN "lammas" ;
ship_N = mkN "laev" ;
shirt_N = mkN "särk" ;
shoe_N = mkN "king" ;
shop_N = mkN "kauplus" ;
short_A = mkA "lühike" ;
silver_N = mkN "hõbe" ;
sister_N = mkN "õde" "õe" "õde" "õesse" "õdede" "õdesid" ;
sleep_V = mkV "magama" ;
small_A = mkA (mkN "väike" "väikse" "väikest" "väiksesse" "väikeste" "väikseid") ;
snake_N = mkN "uss" ;
sock_N = mkN "sukk" ;
speak_V2 = mkV2 talk_V cpartitive ;
star_N = mkN "täht" ;
steel_N = mkN "teras" ;
stone_N = mkN "kivi" ;
stove_N = mkN "ahi" ;
student_N = mkN "tudeng" ;
stupid_A = mkA "loll" ;
sun_N = mkN "päike" ;
switch8off_V2 = mkV2 (mkV "välja" (mkV "lülitama")) ;
switch8on_V2 = mkV2 (mkV "sisse" (mkV "lülitama")) ;
table_N = mkN "laud" ;
talk_V3 = mkV3 talk_V callative celative ;
teacher_N = mkN "õpetaja" ;
teach_V2 = mkV2 (mkV "õpetama") ;
television_N = mkN "televisioon" ;
thick_A = mkA "paks" ;
thin_A = mkA "õhuke" ;
train_N = mkN "rong" ;
travel_V = mkV "reisima" ;
tree_N = mkN "puu" ;
ugly_A = mkA "kole" ;
understand_V2 = mkV2 (mkV "aru" (mkV "saama")) celative ;
university_N = mkN "ülikool" ;
village_N = mkN "küla" ;
wait_V2 = mkV2 (mkV "ootama") partitive ;
walk_V = mkV "kõndima" ;
warm_A = mkA (mkN "soe" "sooja" "sooja" "sooja" "soojade" "soojasid") "soojem" "soojim" ;
war_N = mkN "sõda" ;
watch_V2 = mkV2 (mkV "vaatama") cpartitive ;
water_N = mkN "vesi" ;
white_A = mkA "valge" ;
window_N = mkN "aken" ;
wine_N = mkN "vein" ;
win_V2 = mkV2 (mkV "võitma") ;
woman_N = mkN "naine" "naise" "naist" "naisesse" "naiste" "naisi" ;
wonder_VQ = mkVQ (mkV "arutlema" "arutleda") ; -- TODO
wood_N = mkN "puu" ;
write_V2 = mkV2 (mkV "kirjutama") ;
yellow_A = mkA "kollane" ;
young_A = mkA "noor" ;
do_V2 = mkV2 (mkV "tegema" "teha") ;
now_Adv = mkAdv "nüüd" ;
already_Adv = mkAdv "juba" ;
song_N = mkN "laul" ;
add_V3 = mkV3 (mkV "lisama") accPrep callative ;
number_N = mkN "number" ;
put_V2 = mkV2 (mkV "panema") ;
stop_V = mkV "peatuma" ;
jump_V = mkV "hüppama" ;
left_Ord = mkOrd1 (mkN "vasak") ;
right_Ord = mkOrd1 (mkN "parem") ;
far_Adv = mkAdv "kaugel" ;
correct_A = mkA "õige" ;
dry_A = mkA "kuiv" ;
dull_A = mkA "igav" ;
full_A = mkA (mkN "täis") "täiem" "täiim" Invariable ; -- 'täis' is one of the non-inflecting adjectives
heavy_A = mkA "raske" ;
near_A = mkA "lähedane" ;
rotten_A = mkA "mäda" ;
round_A = mkA "ümmargune" ;
sharp_A = mkA "terav" ;
smooth_A = mkA "sile" ;
straight_A = mkA "sirge" ;
wet_A = mkA "märg" ;
wide_A = mkA "lai" ;
animal_N = mkN "loom" ;
ashes_N = mkN "tuhk" ;
back_N = mkN "selg" ;
bark_N = mkN "koor" ;
belly_N = mkN "kõht" ;
blood_N = mkN "veri" ;
bone_N = mkN "luu" ;
breast_N = mkN "rind" ;
cloud_N = mkN "pilv" ;
day_N = mkN "päev" ;
dust_N = mkN "tolm" ;
ear_N = mkN "kõrv" ;
earth_N = mkN "maa" ;
egg_N = mkN "muna" ;
eye_N = mkN "silm" ;
fat_N = mkN "rasv" ;
feather_N = mkN "sulg" ;
fingernail_N = mkN "küüs" ;
fire_N = mkN "tuli" ;
flower_N = mkN "lill" ;
fog_N = mkN "udu" ;
foot_N = mkN "jalg" ;
forest_N = mkN "mets" ;
grass_N = mkN "rohi" ;
guts_N = mkN "soolestik" ;
hair_N = mkN "juuksed" ; -- TODO: plural
hand_N = mkN "käsi" ;
head_N = mkN "pea" ;
heart_N = mkN "süda" ;
horn_N = mkN "sarv" ;
husband_N = man_N ;
ice_N = mkN "jää" ;
knee_N = mkN "põlv" ;
leaf_N = mkN "leht" ;
leg_N = mkN "jalg" ;
liver_N = mkN "maks" ;
louse_N = mkN "täi" ;
mouth_N = mkN "suu" ;
name_N = mkN "nimi" ;
neck_N = mkN "kael" ;
night_N = mkN "öö" ;
nose_N = mkN "nina" ;
person_N = mkN "inimene" ;
rain_N = mkN "vihm" ;
road_N = mkN "tee" ;
root_N = mkN "juur" ;
rope_N = mkN "köis" ;
salt_N = mkN "sool" ;
sand_N = mkN "liiv" ;
seed_N = mkN "seeme" ;
skin_N = mkN "nahk" ;
sky_N = mkN "taevas" ;
smoke_N = mkN "suits" ;
snow_N = mkN "lumi" ;
stick_N = mkN "kepp" ;
tail_N = mkN "saba" ;
tongue_N = mkN "keel" ;
tooth_N = mkN "hammas" ;
wife_N = mkN "naine" ;
wind_N = mkN "tuul" ;
wing_N = mkN "tiib" ;
worm_N = mkN "uss" ;
year_N = mkN "aasta" ;
bite_V2 = mkV2 (mkV "purema") ;
blow_V = mkV "puhuma" ;
burn_V = mkV "põlema" ;
count_V2 = mkV2 (mkV "loendama") ;
cut_V2 = mkV2 (mk2V "lõikama" "lõigata") ;
dig_V = mkV "kaevama" ;
fall_V = mkV "kukkuma" ;
fear_V2 = mkV2 (mkV "kartma") cpartitive ;
fight_V2 = mkV2 (mkV "võitlema") (postPrep partitive "vastu") ;
float_V = mkV "liuglema" ;
flow_V = mkV "voolama" ;
fly_V = mkV "lendama" ;
freeze_V = mkV "jäätuma" ;
give_V3 = mkV3 (mkV "andma") accPrep callative ;
hit_V2 = mkV2 (mkV "lööma") cpartitive ;
hold_V2 = mkV2 (mkV "hoidma") cpartitive ;
hunt_V2 = mkV2 (mkV "jahti" (mkV "pidama")) cpartitive ;
kill_V2 = mkV2 (mkV "tapma") ;
laugh_V = mkV "naerma" ;
lie_V = mkV "lamama" ; -- TODO: maybe: valetama
play_V = mkV "mängima" ;
pull_V2 = mkV2 (mkV "tõmbama") ;
push_V2 = mkV2 (mkV "suruma") ;
rub_V2 = mkV2 (mkV "hõõruma") cpartitive ;
scratch_V2 = mkV2 (mkV "kraapima") cpartitive ;
sew_V = mkV "külvama" ;
sing_V = mkV "laulma" "laulda" "laulab" "lauldakse" ;
sit_V = mkV "istuma" ;
smell_V = mk2V "haistma" "haista";
spit_V = mkV "sülitama" ;
split_V2 = mkV2 (mkV "poolitama") ;
squeeze_V2 = mkV2 (mkV "pigistama") cpartitive ;
stab_V2 = mkV2 (mkV "pussitama") cpartitive ;
stand_V = mkV "seisma" "seista" "seisab" "seistakse" ;
suck_V2 = mkV2 (mkV "imema") cpartitive ;
swell_V = mkV "paistetama" ; -- TODO: üles paistetama
swim_V = mkV "ujuma" ;
think_V = mkV "mõtlema" "mõtelda" "mõtleb" "mõeldakse";
throw_V2 = mkV2 (mkV "viskama") ;
tie_V2 = mkV2 (mkV "siduma" "siduda" "seob") ;
turn_V = mkV "pöörama" ;
vomit_V = mkV "oksendama" ;
wash_V2 = mkV2 (mkV "pesema") ;
wipe_V2 = mkV2 (mkV "pühkima") ;
breathe_V = mkV "hingama" ;
grammar_N = mkN "grammatika" ;
language_N = mkN "keel" ;
rule_N = mkN "reegel" ;
john_PN = mkPN "Juhan" ;
question_N = mkN "küsimus" ;
ready_A = mkA (mkN "valmis" "valmi" "valmit" "valmisse" "valmite" "valmeid") "valmim" "valmeim" Invariable ; -- 'valmis' is one of the non-inflecting adjectives
reason_N = mkN "põhjus" ;
today_Adv = mkAdv "täna" ;
uncertain_A = mkA "ebakindel" ;
oper
mkOrd1 : N -> Ord ;
mkOrd1 x = {s = x.s ; lock_Ord = <> } ;
cpartitive = casePrep partitive ;
ctranslative = casePrep translative ;
celative = casePrep elative ;
callative = casePrep allative ;
know_V = (mkV "teadma" "teada" "teab") ;
talk_V = mkV "rääkima" "rääkida" "räägib" ;
} ;

View File

@@ -0,0 +1,11 @@
resource MakeStructuralEst = open CatEst, ParadigmsEst, MorphoEst, Prelude in {
oper
mkConj : Str -> Str -> ParadigmsEst.Number -> Conj = \x,y,n ->
{s1 = x ; s2 = y ; n = n ; lock_Conj = <>} ;
mkSubj : Str -> Subj = \x ->
{s = x ; lock_Subj = <>} ;
mkIQuant : Str -> IQuant = \s ->
{s = \\n,c => s ; lock_IQuant = <>} ; ----
}

View File

@@ -0,0 +1,612 @@
--1 A Simple Estonian Resource Morphology
--
-- Inari Listenmaa, Kaarel Kaljurand, based on Aarne Ranta's Finnish grammar
--
-- This resource morphology contains definitions needed in the resource
-- syntax. To build a lexicon, it is better to use $ParadigmsEst$, which
-- gives a higher-level access to this module.
resource MorphoEst = ResEst ** open Prelude, Predef, HjkEst in {
flags optimize=all ; coding=utf8;
oper
----------------------
-- morph. paradigms --
----------------------
--Noun paradigms in HjkEst
--Comparative adjectives
--(could just use hjk_type_IVb_audit "suurem" "a")
-- Comparative adjectives inflect in the same way
-- TODO: confirm this
dSuurempi : Str -> NForms = \suurem ->
let
suurema = suurem + "a" ;
in nForms6
suurem (suurema) (suurema + "t") (suurema + "sse")
(suurema + "te") (suurema + "id") ;
-- Superlatives follow the exact same pattern as comparatives
-- TODO: confirm this
dSuurin : Str -> NForms = \suurim -> dSuurempi suurim ;
--Verb paradigms
-- TS 49
-- d in da, takse, dud ; imperfect 3sg ends in i
cSaama : (_ : Str) -> VForms = \saama ->
let
saa = Predef.tk 2 saama ;
sa = init saa ;
sai = sa + "i" ;
in vForms8
saama
(saa + "da")
(saa + "b")
(saa + "dakse")
(saa + "ge") -- Imper Pl
sai
(saa + "nud")
(saa + "dud") ;
-- TS 49
-- no d/t in da, takse ; imperfect 3sg ends in s
cKaima : (_ : Str) -> VForms = \kaima ->
let
kai = Predef.tk 2 kaima ;
in vForms8
kaima
(kai + "a")
(kai + "b")
(kai + "akse")
(kai + "ge")
(kai + "s")
(kai + "nud")
(kai + "dud") ;
-- TS 49
-- vowel changes in da, takse, no d/t ; imperfect 3sg ends in i
cJooma : (_ : Str) -> VForms = \jooma ->
let
j = Predef.tk 4 jooma ;
joo = Predef.tk 2 jooma;
o = last joo ;
u = case o of {
"o" => "u" ;
"ö" => "ü" ;
_ => o
} ;
q = case o of {
("o"|"ö") => "õ" ;
_ => o
} ;
juua = j + u + u + "a" ;
j6i = j + q + "i" ;
in vForms8
jooma
juua
(joo + "b")
(juua + "kse")
(joo + "ge")
j6i
(joo + "nud")
(joo + "dud") ;
-- TS 50-52 (elama, muutuma, kirjutama), 53 (tegelema) alt forms
-- t in takse, tud; no cons.grad
cElama : (_ : Str) -> VForms = \elama ->
let
ela = Predef.tk 2 elama;
in vForms8
elama
(ela + "da")
(ela + "b")
(ela + "takse")
(ela + "ge") -- Imperative P1 Pl
(ela + "s") -- Imperfect P3 Sg
(ela + "nud")
(ela + "tud") ;
-- TS 53 (tegelema)
-- d in takse, tud; g in ge; consonant stem in takse, tud, nud, ge; no cons.grad
cTegelema : (_ : Str) -> VForms = \tegelema ->
let
tegele = Predef.tk 2 tegelema ;
tegel = init tegele ;
in vForms8
tegelema
(tegel + "da")
(tegele + "b")
(tegel + "dakse")
(tegel + "ge") -- Imperative P1 Pl
(tegele + "s") -- Imperfect P3 Sg
(tegel + "nud")
(tegel + "dud") ;
-- TS 54 (tulema)
-- consonant assimilation (l,r,n) in da, takse
-- d in tud, g in ge
-- imperfect 3sg ends in i
cTulema : (_ : Str) -> VForms = \tulema ->
let
tul = Predef.tk 3 tulema ;
l = last tul ;
tull = tul + l ;
in
vForms8
tulema
(tull + "a")
(tul + "eb")
(tull + "akse")
(tul + "ge")
(tul + "i")
(tul + "nud")
(tul + "dud") ;
-- TS 55-56 (õppima, sündima)
-- t in takse, tud ; consonant gradation on stem
cLeppima : (_ : Str) -> VForms = \leppima ->
let
leppi = Predef.tk 2 leppima ;
i = last leppi ;
lepp = init leppi ;
lepi = (weaker lepp) + i
in vForms8
leppima
(leppi + "da")
(lepi + "b")
(lepi + "takse")
(leppi + "ge") -- Imperative P1 Pl
(leppi + "s") -- Imperfect P3 Sg
(leppi + "nud")
(lepi + "tud") ;
-- TS 57 (lugema)
-- Like 55-56 but irregular gradation patterns, that shouldn't be in HjkEst.weaker
--including also marssima,valssima
cLugema : (_ : Str) -> VForms = \lugema ->
let
luge = Predef.tk 2 lugema ;
lug = init luge ;
l = Predef.tk 3 luge ;
e = last luge ;
loe = case luge of {
_ + ("aju"|"adu"|"agu") => l + "ao" ;
_ + "adi" => l + "ae" ;
"haudu" => "hau" ;
_ + ("idu"|"igu") => l + "eo" ;
_ + "ida" => l + "ea" ;
_ + "udu" => l + "oo" ;
_ + ("uge"|"ude") => l + "oe" ;
_ + #c + "ssi" => (init lug) + e;
_ => (weaker lug) + e
} ;
in vForms8
lugema
(luge + "da")
(loe + "b")
(loe + "takse")
(luge + "ge") -- Imperative P1 Pl
(luge + "s") -- Imperfect P3 Sg
(luge + "nud")
(loe + "tud") ;
-- TS 58 muutma, saatma,
-- like laskma (TS 62, 64), but no reduplication of stem consonant (muutma~muuta, not *muutta)
-- like andma (TS 63) but different takse (muudetakse vs. antakse)
cMuutma : (_ : Str) -> VForms = \muutma ->
let
muut = Predef.tk 2 muutma ;
muud = weaker muut ;
in vForms8
muutma
(muut + "a")
(muud + "ab")
(muud + "etakse") -- always e?
(muut + "ke")
(muut + "is")
(muut + "nud")
(muud + "etud") ; -- always e?
-- TS 59-60 (petma~petetakse, jätma~jäetakse)
-- takse given as second argument
cPetma : (_,_ : Str) -> VForms = \petma,jaetakse ->
let
pet = Predef.tk 2 petma ;
pett = stronger pet ;
jaet = Predef.tk 4 jaetakse ;
jaetud = jaet + "ud"
in vForms8
petma
(pett + "a")
(pet + "ab")
jaetakse
(pet + "ke")
(pett + "is")
(pet + "nud")
jaetud ;
{- -- TS 60 (jatma)
-- weak stem in ma, strong in da ; irregular takse, tud
cJatma : (_ : Str) -> VForms = \jatma ->
let
jat = Predef.tk 2 jatma ;
jatt = stronger jat ;
ko = (weaker (weaker jat))
--weaker jät = jäd ; weaker (weaker jät) = jä
--weaker küt = kö ; weaker (weaker küt) = kö
--HjkEst.weaker takes care of kütma->köetud
in vForms8
jatma
(jatt + "a")
(jat + "ab")
(ko + "etakse") --always e?
(jat + "ke")
(jatt + "is")
(jat + "nud")
(ko + "etud") ;
-}
-- TS 61 (laulma)
--vowel (a/e) given with the second argument
--veenma,naerma
cKuulma : (_,_ : Str) -> VForms = \kuulma,kuuleb ->
let
kuul = Predef.tk 2 kuulma ;
in vForms8
kuulma
(kuul + "da")
kuuleb
(kuul + "dakse")
(kuul + "ge")
(kuul + "is")
(kuul + "nud")
(kuul + "dud") ;
-- TS 62 (tõusma), 64 (mõksma)
-- vowel (a/e) given with the second argument
-- doesn't give alt. forms joosta, joostes
cLaskma : (_,_ : Str) -> VForms = \laskma,laseb ->
let
lask = Predef.tk 2 laskma ;
las = weaker lask ; --no effect on tõusma
in vForms8
laskma
(las + "ta")
laseb
(las + "takse")
(las + "ke")
(lask + "is")
(lask + "nud")
(las + "tud") ;
-- TS 62 alt forms
cJooksma : (_ : Str) -> VForms = \jooksma ->
let
jooks = Predef.tk 2 jooksma ;
joos = (Predef.tk 2 jooks) + "s" ;
in vForms8
jooksma
(joos + "ta")
(jooks + "eb")
(joos + "takse")
(joos + "ke")
(jooks + "is")
(jooks + "nud")
(joos + "tud") ;
-- TS 63 (andma, murdma, hoidma)
-- vowel given in second arg (andma~annab; tundma~tunneb)
cAndma : (_,_ : Str) -> VForms = \andma,annab ->
let
and = Predef.tk 2 andma ; --murd(ma), hoid(ma)
an = init and ; --mur(d), hoi(d)
ann = weaker and ; --murr, hoi
te = case (last ann) of { --to prevent teadma~teaab
"a" => init ann ;
_ => ann
} ;
in vForms8
andma
(and + "a")
annab
(an + "takse")
(and + "ke")
(and + "is")
(and + "nud")
(an + "tud") ;
-- TS 65 (pesema)
-- a consonant stem verb in disguise
cPesema : (_ : Str) -> VForms = \pesema ->
let
pese = Predef.tk 2 pesema ;
pes = init pese ;
in vForms8
pesema
(pes + "ta")
(pese + "b")
(pes + "takse")
(pes + "ke")
(pes + "i")
(pes + "nud")
(pes + "tud") ;
-- TS 66 (nägema)
-- näg, näh and näi stems
cNagema : (_ : Str) -> VForms = \nagema ->
let
nage = Predef.tk 2 nagema ;
nag = init nage ;
na = init nag ;
nah = na + "h" ;
nai = na + "i" ;
in vForms8
nagema
(nah + "a")
(na + "eb")
(nah + "akse")
(nah + "ke")
(nag + "i")
(nai + "nud")
(nah + "tud") ;
-- TS 67-68 (hüppama, tõmbama)
-- strong stem in ma, b, s
-- weak stem in da, takse, ge, nud, tud
-- t in da, takse; k in ge
cHyppama : (_ : Str) -> VForms = \hyppama ->
let
hyppa = Predef.tk 2 hyppama ;
hypp = init hyppa ;
a = last hyppa ;
hypa = (weaker hypp) + a
in vForms8
hyppama
(hypa + "ta")
(hyppa + "b")
(hypa + "takse") -- Passive
(hypa + "ke") -- Imperative P1 Pl
(hyppa + "s") -- Imperfect Sg P3
(hypa + "nud") -- PastPartAct
(hypa + "tud") ; -- PastPartPass
-- TS 69 (õmblema)
cOmblema : (_ : Str) -> VForms = \omblema ->
let
omble = Predef.tk 2 omblema ;
e = last omble ;
l = last (init omble) ;
omb = Predef.tk 2 omble ;
omm = case omb of {
"mõt" => "mõe" ; --some "double weak" patterns; however weaker (weaker omb) makes the coverage worse
_ => weaker omb
} ;
ommel = omm + e + l ;
in vForms8
omblema
(ommel + "da")
(omble + "b")
(ommel + "dakse") -- Passive
(ommel + "ge") -- Imperative P1 Pl
(omble + "s") -- Imperfect Sg P3
(ommel + "nud") -- PastPartAct
(ommel + "dud") ; -- PastPartPass
-- 2-arg paradigm to distinguish between 50-52 and 55-57
cSattumaPettuma : (_,_ : Str) -> VForms = \pettuma,satub ->
let
pettu = Predef.tk 2 pettuma ;
satu = init satub ;
in vForms8
pettuma
(pettu + "da")
(satu + "b")
(satu + "takse") -- Passive
(pettu + "ge") -- Imperative P1 Pl
(pettu + "s") -- Imperfect Sg P3
(pettu + "nud") -- PastPartAct
(satu + "tud") ; -- PastPartPass
-----------------
-- auxiliaries --
-----------------
{- Noun internal opers moved to ResEst
These used to be here:
NForms : Type = Predef.Ints 5 => Str ;
Noun : Type = {s: NForm => Str } ;
nForms6 : (x1,_,_,_,_,x6 : Str) -> NForms ;
n2nforms : Noun -> NForms ;
nForms2N : NForms -> Noun ;
-}
-- Adjective forms
AForms : Type = {
posit : NForms ;
compar : NForms ;
superl : NForms ;
adv_posit, adv_compar, adv_superl : Str ;
} ;
aForms2A : AForms -> Adjective = \afs -> {
s = table {
Posit => table {
AN n => (nForms2N afs.posit).s ! n ;
AAdv => afs.adv_posit
} ;
Compar => table {
AN n => (nForms2N afs.compar).s ! n ;
AAdv => afs.adv_compar
} ;
Superl => table {
AN n => (nForms2N afs.superl).s ! n ;
AAdv => afs.adv_superl
}
} ;
lock_A = <>
} ;
nforms2aforms : NForms -> AForms = \nforms ->
let
suure = init (nforms ! 1) ;
suur = Predef.tk 4 (nforms ! 8) ;
in {
posit = nforms ;
compar = dSuurempi (suure ++ "m") ;
superl = dSuurin (suur ++ "im") ;
adv_posit = suure + "sti" ;
adv_compar = suure + "mmin" ;
adv_superl = suur + "immin" ;
} ;
{- Verb internal opers moved to ResEst
These used to be here:
VForms : Type = Predef.Ints 7 => Str ;
vForms8 : (x1,_,_,_,_,_,_,x8 : Str) -> VForms ;
regVForms : (x1,_,_,x4 : Str) -> VForms ;
vforms2V : VForms -> Verb ;
-}
-----------------------
-- for Structural
-----------------------
caseTable : Number -> CommonNoun -> Case => Str = \n,cn ->
\\c => cn.s ! NCase n c ;
mkDet : Number -> CommonNoun -> {
s,sp : Case => Str ; -- minun kolme
n : Number ; -- Pl (agreement feature for verb)
isNum : Bool ; -- True (a numeral is present)
isDef : Bool -- True (verb agrees in Pl, Nom is not Part)
} = \n, noun -> heavyDet {
s = \\c => noun.s ! NCase n c ;
n = n ;
isNum = False ;
isDef = True --- does this hold for all new dets?
} ;
-- Here we define personal and relative pronouns.
-- input forms: Nom, Gen, Part
-- Note that the Fin version required 5 input forms, the
-- Est pronouns thus seem to be much simpler.
-- TODO: remove NPAcc?
-- I: keep NPAcc; see appCompl in ResEst, it takes care of finding a right case for various types of complements; incl. when pronouns get different treatment than nouns (PassVP).
mkPronoun : (_,_,_ : Str) -> Number -> Person ->
{s : NPForm => Str ; a : Agr} =
\mina, minu, mind, n, p ->
let {
minu_short = ie_to_i minu
} in
{s = table {
NPCase Nom => mina ;
NPCase Gen => minu ;
NPCase Part => mind ;
NPCase Transl => minu + "ks" ;
NPCase Ess => minu + "na" ;
NPCase Iness => minu_short + "s" ;
NPCase Elat => minu_short + "st" ;
NPCase Illat => minu_short + "sse" ;
NPCase Adess => minu_short + "l" ;
NPCase Ablat => minu_short + "lt" ;
NPCase Allat => minu_short + "le" ;
NPCase Abess => minu + "ta" ;
NPCase Comit => minu + "ga" ;
NPCase Termin => minu + "ni" ;
NPAcc => mind
} ;
a = Ag n p
} ;
-- meiesse/teiesse -> meisse/teisse
ie_to_i : Str -> Str ;
ie_to_i x =
case x of {
x1 + "ie" + x2 => x1 + "i" + x2 ;
_ => x
} ;
-- TODO: this does not seem to be called from anyway
mkDemPronoun : (_,_,_,_,_ : Str) -> Number ->
{s : NPForm => Str ; a : Agr} =
\tuo, tuon, tuota, tuona, tuohon, n ->
let pro = mkPronoun tuo tuon tuota n P3
in {
s = table {
NPAcc => tuo ;
c => pro.s ! c
} ;
a = pro.a
} ;
-- The relative pronoun, "joka", is inflected in case and number,
-- like common nouns, but it does not take possessive suffixes.
-- The inflextion shows a surprising similarity with "suo".
oper
-- TODO: fix: Nom => kelled
-- TODO: mis
relPron : Number => Case => Str =
let kes = nForms2N (nForms6 "kes" "kelle" "keda" "kellesse" "kelle" "keda") in
\\n,c => kes.s ! NCase n c ;
ProperName = {s : Case => Str} ;
-- TODO: generate using mkPronoun
pronSe : ProperName = {
s = table {
Nom => "see" ;
Gen => "selle" ;
Part => "seda" ;
Transl => "selleks" ;
Ess => "sellena" ;
Iness => "selles" ;
Elat => "sellest" ;
Illat => "sellesse" ;
Adess => "sellel" ;
Ablat => "sellelt" ;
Allat => "sellele" ;
Abess => "selleta" ;
Comit => "sellega" ;
Termin => "selleni"
} ;
} ;
-- TODO: generate using mkPronoun
pronNe : ProperName = {
s = table {
Nom => "need" ;
Gen => "nende" ;
Part => "neid" ;
Transl => "nendeks" ;
Ess => "nendena" ;
Iness => "nendes" ;
Elat => "nendest" ;
Illat => "nendesse" ;
Adess => "nendel" ;
Ablat => "nendelt" ;
Allat => "nendele" ;
Abess => "nendeta" ;
Comit => "nendega" ;
Termin => "nendeni"
} ;
} ;
}

226
lib/src/estonian/NounEst.gf Normal file
View File

@@ -0,0 +1,226 @@
concrete NounEst of Noun = CatEst ** open ResEst, HjkEst, MorphoEst, Prelude in {
flags optimize=all_subs ; coding=utf8;
lin
-- The $Number$ is subtle: "nuo autot", "nuo kolme autoa" are both plural
-- for verb agreement, but the noun form is singular in the latter.
DetCN det cn =
let
n : Number = case det.isNum of {
True => Sg ;
_ => det.n
} ;
ncase : NPForm -> Case * NForm = \c ->
let k = npform2case n c
in
case <n, c, det.isNum, det.isDef> of {
<_, NPAcc, True,_> => <Nom,NCase Sg Part> ; -- kolm kassi (as object)
<_, NPCase Nom, True,_> => <Nom,NCase Sg Part> ; -- kolm kassi (as subject)
--Only the last word gets case ending.
<_, NPCase Comit, _, _> => <Gen,NCase n Comit> ; -- kolme kassiga
<_, NPCase Abess, _, _> => <Gen,NCase n Abess> ; -- kolme kassita
<_, NPCase Ess, _, _> => <Gen,NCase n Ess> ; -- kolme kassina
<_, NPCase Termin,_, _> => <Gen,NCase n Termin> ; -- kolme kassini
<_, _, True,_> => <k, NCase Sg k> ; -- kolmeks kassiks (all other cases)
_ => <k, NCase n k> -- kass, kassi, ... (det is not a number)
}
in {
s = \\c => let
k = ncase c ;
in
det.s ! k.p1 ++ cn.s ! k.p2 ;
a = agrP3 det.n ;
-- (case det.isNum of {
-- True => Sg ;
-- _ => det.n
-- }) ;
isPron = False
} ;
DetNP det =
let
n : Number = case det.isNum of {
True => Sg ;
_ => det.n
} ;
in {
s = \\c => let k = npform2case n c in
det.sp ! k ;
a = agrP3 (case det.isDef of {
False => Sg ; -- autoja menee; kolme autoa menee
_ => det.n
}) ;
isPron = False
} ;
UsePN pn = {
s = \\c => pn.s ! npform2case Sg c ;
a = agrP3 Sg ;
isPron = False
} ;
UsePron p = p ** {isPron = True} ;
PredetNP pred np = {
s = \\c => pred.s ! complNumAgr np.a ! c ++ np.s ! c ;
a = np.a ;
isPron = np.isPron -- kaikki minun - ni
} ;
PPartNP np v2 =
let
num : Number = complNumAgr np.a ;
part : Str = v2.s ! (PastPart Pass) ;
adj : NForms = hjk_type_IVb_maakas part ;
partGen : Str = adj ! 1 ;
partEss : Str = partGen + "na"
in {
s = \\c => np.s ! c ++ part ; --partEss ;
a = np.a ;
isPron = np.isPron -- minun täällä - ni
} ;
AdvNP np adv = {
s = \\c => np.s ! c ++ adv.s ;
a = np.a ;
isPron = np.isPron -- minun täällä - ni
} ;
DetQuantOrd quant num ord = {
s = \\c => quant.s ! num.n ! c ++ num.s ! Sg ! c ++ ord.s ! NCase num.n c ;
sp = \\c => quant.sp ! num.n ! c ++ num.s ! Sg ! c ++ ord.s ! NCase num.n c ;
n = num.n ;
isNum = num.isNum ;
isDef = quant.isDef
} ;
DetQuant quant num = {
s = \\c => quant.s ! num.n ! c ++ num.s ! Sg ! c ;
sp = \\c => quant.sp ! num.n ! c ++ num.s ! Sg ! c ;
n = num.n ;
isNum = num.isNum ; -- case num.n of {Sg => False ; _ => True} ;
isDef = quant.isDef
} ;
PossPron p = {
s,sp = \\_,_ => p.s ! NPCase Gen ;
isNum = False ;
isDef = True --- "minun kolme autoani ovat" ; thus "...on" is missing
} ;
NumSg = {s = \\_,_ => [] ; isNum = False ; n = Sg} ;
NumPl = {s = \\_,_ => [] ; isNum = False ; n = Pl} ;
NumCard n = n ** {isNum = case n.n of {Sg => False ; _ => True}} ; -- üks raamat/kaks raamatut
NumDigits numeral = {
s = \\n,c => numeral.s ! NCard (NCase n c) ;
n = numeral.n
} ;
OrdDigits numeral = {s = \\nc => numeral.s ! NOrd nc} ;
NumNumeral numeral = {
s = \\n,c => numeral.s ! NCard (NCase n c) ;
n = numeral.n
} ;
OrdNumeral numeral = {s = \\nc => numeral.s ! NOrd nc} ;
AdNum adn num = {
s = \\n,c => adn.s ++ num.s ! n ! c ;
n = num.n
} ;
-- OrdSuperl a = {s = \\nc => a.s ! Superl ! AN nc} ;
-- TODO: it is more robust to use: kõige + Compar
OrdSuperl a = {s = \\nc => "kõige" ++ a.s ! Compar ! AN nc} ;
DefArt = {
s = \\_,_ => [] ;
sp = table {Sg => pronSe.s ; Pl => pronNe.s} ;
isNum = False ;
isDef = True -- autot ovat
} ;
IndefArt = {
s = \\_,_ => [] ; --use isDef in DetCN
sp = \\n,c =>
(nForms2N (nForms6 "üks" "ühe" "üht" "ühesse" "ühtede"
"ühtesid")).s ! NCase n c ;
isNum,isDef = False -- autoja on
} ;
MassNP cn =
let
n : Number = Sg ;
ncase : Case -> NForm = \c -> NCase n c ;
in {
s = \\c => let k = npform2case n c in
cn.s ! ncase k ;
a = agrP3 Sg ;
isPron = False
} ;
UseN n = n ;
UseN2 n = n ;
Use2N3 f = lin N2 {
s = f.s ;
c2 = f.c2 ;
isPre = f.isPre
} ;
Use3N3 f = lin N2 {
s = f.s ;
c2 = f.c3 ;
isPre = f.isPre2
} ;
ComplN2 f x = {
s = \\nf => preOrPost f.isPre (f.s ! nf) (appCompl True Pos f.c2 x)
} ;
ComplN3 f x = lin N2 {
s = \\nf => preOrPost f.isPre (f.s ! nf) (appCompl True Pos f.c2 x) ;
c2 = f.c3 ;
isPre = f.isPre2
} ;
AdjCN ap cn = {
s = \\nf =>
case ap.infl of {
(Invariable|Participle) => ap.s ! True ! (NCase Sg Nom) ++ cn.s ! nf ; --valmis kassile; väsinud kassile
Regular => case nf of {
NCase num (Ess|Abess|Comit|Termin) => ap.s ! True ! (NCase num Gen) ++ cn.s ! nf ; --suure kassiga, not *suurega kassiga
_ => ap.s ! True ! nf ++ cn.s ! nf
}
}
} ;
RelCN cn rs = {s = \\nf => cn.s ! nf ++ rs.s ! agrP3 (numN nf)} ;
RelNP np rs = {
s = \\c => np.s ! c ++ "," ++ rs.s ! np.a ;
a = np.a ;
isPron = np.isPron ---- correct ?
} ;
AdvCN cn ad = {s = \\nf => cn.s ! nf ++ ad.s} ;
SentCN cn sc = {s = \\nf=> cn.s ! nf ++ sc.s} ;
ApposCN cn np = {s = \\nf=> cn.s ! nf ++ np.s ! NPCase Nom} ; --- luvun x
oper
numN : NForm -> Number = \nf -> case nf of {
NCase n _ => n ;
_ => Sg ---
} ;
}

View File

@@ -0,0 +1,160 @@
concrete NumeralEst of Numeral = CatEst [Numeral,Digits] ** open Prelude, ParadigmsEst, MorphoEst in {
-- Notice: possessive forms are not used. They get wrong, since every
-- part is made to agree in them.
flags optimize=all_subs ; coding=utf8;
lincat
Sub1000000 = {s : CardOrd => Str ; n : MorphoEst.Number} ;
Digit = {s : CardOrd => Str} ;
Sub10, Sub100, Sub1000 = {s : NumPlace => CardOrd => Str ; n : MorphoEst.Number} ;
lin
num x = x ;
n2 = co (mkN "kaks" "kahe" "kahte" "kahte" "kahtede" "kahtesid")
(mkN "teine" "teise" "teist" "teisesse" "teiste" "teisi") ;
n3 = co (mkN "kolm" "kolme" "kolme" "kolme" "kolmede" "kolmi")
(mkN "kolmas" "kolmanda" "kolmandat" "kolmandasse" "kolmandate" "kolmandaid") ;
n4 = co (mkN "neli" "nelja" "nelja" "nelja" "neljade" "neljasid")
(mkN "neljas" "neljanda" "neljandat" "neljandasse" "neljandate" "neljandaid") ;
n5 = co (mkN "viis" "viie" "viit" "viide" "viite" "viisi")
(mkN "viies" "viienda" "viiendat" "viiendasse" "viiendate" "viiendaid") ;
n6 = co (mkN "kuus" "kuue" "kuut" "kuude" "kuute" "kuusi")
(mkN "kuues" "kuuenda" "kuuendat" "kuuendasse" "kuuendate" "kuuendaid") ;
n7 = co (mkN "seitse" "seitsme" "seitset" "seitsmesse" "seitsmete" "seitsmeid")
(mkN "seitsmes" "seitsmenda" "seitsmendat" "seitsmendasse" "seitsmendate" "seitsmendaid") ;
n8 = co (mkN "kaheksa")
(mkN "kaheksas" "kaheksanda" "kaheksandat" "kaheksandasse" "kaheksandate" "kaheksandaid") ;
n9 = co (mkN "üheksa")
(mkN "üheksas" "üheksanda" "üheksandat" "üheksandasse" "üheksandate" "üheksandaid") ;
pot01 =
{s = table {
NumAttr => \\_ => [] ;
NumIndep => yksN.s
} ;
n = Sg
} ;
pot0 d = {n = Pl ; s = \\_ => d.s} ;
pot110 =
{s = \\_ => kymmeN.s ;
n = Pl
} ;
pot111 = {n = Pl ; s = \\_,c => yksN.s ! c ++ BIND ++ "teist"} ;
pot1to19 d = {n = Pl ; s = \\_,c => d.s ! c ++ BIND ++ "teist"} ;
pot0as1 n = n ;
pot1 d = {n = Pl ; s = \\_,c => d.s ! c ++ BIND ++ kymmendN.s ! c} ;
pot1plus d e = {
n = Pl ;
s = \\_,c => d.s ! c ++ BIND ++ kymmendN.s ! c ++ e.s ! NumIndep ! c
} ;
pot1as2 n = n ;
pot2 d = {n = Pl ; s = \\_,c => d.s ! NumAttr ! c ++ nBIND d.n ++ sataaN.s ! d.n ! c} ;
pot2plus d e = {
n = Pl ;
s = \\_,c => d.s ! NumAttr ! c ++ nBIND d.n ++ sataaN.s ! d.n ! c ++
e.s ! NumIndep ! c
} ;
pot2as3 n = {n = n.n ; s = n.s ! NumIndep} ;
pot3 d = {n = Pl ; s = \\c => d.s ! NumAttr ! c ++ tuhattaN.s ! d.n ! c} ;
pot3plus d e = {
n = Pl ;
s = \\c => d.s ! NumAttr ! c ++ tuhattaN.s ! d.n ! c ++ e.s ! NumIndep ! c
} ;
oper
co : (c,o : {s : NForm => Str}) -> {s : CardOrd => Str} = \c,o -> {
s = table {
NCard nf => c.s ! nf ;
NOrd nf => o.s ! nf
}
} ;
nBIND : Number -> Str = \n -> case n of {Sg => [] ; _ => BIND} ; -- no BIND after silent 1
param
NumPlace = NumIndep | NumAttr ;
oper
yksN = co
(mkN "üks" "ühe" "ühte" "ühte" "ühtede" "ühtesid")
(mkN "esimene" "esimese" "esimest" "esimesse" "esimeste" "esimesi") ;
kymmeN = co
(mkN "kümme" "kümne" "kümmet" "kümnesse" "kümnete" "kümneid")
(mkN "kümnes" "kümnenda" "kümnendat" "kümnendasse" "kümnendate" "kümnendaid") ;
sadaN = co
(mkN "sada" "saja" "sadat" "sajasse" "sadade" "sadu")
(mkN "sajas" "sajanda" "sajandat" "sajandasse" "sajandate" "sajandaid") ;
tuhatN = co
(mkN "tuhat" "tuhande" "tuhandet" "tuhandesse" "tuhandete" "tuhandeid")
(mkN "tuhandes" "tuhandenda" "tuhandendat" "tuhandendasse" "tuhandete" "tuhandendaid") ;
kymmendN =
{s = table {
NCard (NCase Sg Nom) => "kümmend" ;
k => kymmeN.s ! k
}
} ;
sataaN : {s : MorphoEst.Number => CardOrd => Str} = {s = table {
Sg => sadaN.s ;
Pl => table {
NCard (NCase Sg Nom) => "sada" ;
k => sadaN.s ! k
}
}
} ;
tuhattaN = {s = table {
Sg => tuhatN.s ;
Pl => table {
NCard (NCase Sg Nom) => "tuhat" ;
k => tuhatN.s ! k
}
}
} ;
lincat
Dig = TDigit ;
lin
IDig d = d ;
IIDig d i = {
s = \\o => d.s ! NCard (NCase Sg Nom) ++ i.s ! o ;
n = Pl
} ;
D_0 = mkDig "0" ;
D_1 = mk3Dig "1" "1." MorphoEst.Sg ;
D_2 = mkDig "2" ;
D_3 = mkDig "3" ;
D_4 = mkDig "4" ;
D_5 = mkDig "5" ;
D_6 = mkDig "6" ;
D_7 = mkDig "7" ;
D_8 = mkDig "8" ;
D_9 = mkDig "9" ;
oper
mk2Dig : Str -> Str -> TDigit = \c,o -> mk3Dig c o MorphoEst.Pl ;
mkDig : Str -> TDigit = \c -> mk2Dig c (c + ".") ;
mk3Dig : Str -> Str -> MorphoEst.Number -> TDigit = \c,o,n -> {
s = table {NCard _ => c ; NOrd _ => o} ;
n = n
} ;
TDigit = {
n : MorphoEst.Number ;
s : CardOrd => Str
} ;
}

View File

@@ -0,0 +1,786 @@
--1 Estonian Lexical Paradigms
--
-- Based on the Finnish Lexical Paradigms by Aarne Ranta 2003--2008
--
-- This is an API to the user of the resource grammar
-- for adding lexical items. It gives functions for forming
-- expressions of open categories: nouns, adjectives, verbs.
--
-- Closed categories (determiners, pronouns, conjunctions) are
-- accessed through the resource syntax API and $Structural.gf$.
--
-- The main difference with $MorphoEst.gf$ is that the types
-- referred to are compiled resource grammar types. We have moreover
-- had the design principle of always having existing forms, rather
-- than stems, as string arguments of the paradigms.
--
-- The structure of functions for each word class $C$ is the following:
-- there is a polymorphic constructor $mkC$, which takes one or
-- a few arguments. In Estonian, one argument is enough in ??? % of
-- cases in average.
resource ParadigmsEst = open
(Predef=Predef),
Prelude,
MorphoEst,
HjkEst,
CatEst
in {
flags optimize=noexpand ; coding=utf8;
--2 Parameters
--
-- To abstract over gender, number, and (some) case names,
-- we define the following identifiers. The application programmer
-- should always use these constants instead of the constructors
-- defined in $ResEst$.
oper
Number : Type ;
singular : Number ;
plural : Number ;
Case : Type ;
nominative : Case ; -- e.g. "karp"
genitive : Case ; -- e.g. "karbi"
partitive : Case ; -- e.g. "karpi"
illative : Case ; -- e.g. "karbisse/karpi"
inessive : Case ; -- e.g. "karbis"
elative : Case ; -- e.g. "karbist"
allative : Case ; -- e.g. "karbile"
adessive : Case ; -- e.g. "karbil"
ablative : Case ; -- e.g. "karbilt"
translative : Case ; -- e.g. "karbiks"
terminative : Case ; -- e.g. "karbini"
essive : Case ; -- e.g. "karbina"
abessive : Case ; -- e.g. "karbita"
comitative : Case ; -- e.g. "karbiga"
infDa : InfForm ; -- e.g. "lugeda"
infDes : InfForm ;
infMa : InfForm ; -- e.g. "lugema"
infMas : InfForm ; -- e.g. "lugemas"
infMaks : InfForm ; -- e.g. "lugemaks"
infMast : InfForm ; -- e.g. "lugemast"
infMata : InfForm ; -- e.g. "lugemata"
-- The following type is used for defining *rection*, i.e. complements
-- of many-place verbs and adjective. A complement can be defined by
-- just a case, or a pre/postposition and a case.
prePrep : Case -> Str -> Prep ; -- preposition, e.g. comitative "koos"
postPrep : Case -> Str -> Prep ; -- postposition, e.g. genitive "taga"
postGenPrep : Str -> Prep ; -- genitive postposition, e.g. "taga"
casePrep : Case -> Prep ; -- just case, e.g. adessive
-- TODO build the dict
NW : Type ; -- Noun from DictEst (WordNet)
AW : Type ; -- Adjective from DictEst (WordNet)
VW : Type ; -- Verb from DictEst (WordNet)
AdvW : Type ; -- Adverb from DictEst (WordNet)
--2 Nouns
-- The worst case gives six forms.
-- In practice just a couple of forms are needed to define the different
-- stems, vowel alternation, and vowel harmony.
oper
-- The regular noun heuristic takes just one form (singular
-- nominative) and analyses it to pick the correct paradigm.
-- It does automatic grade alternation, and is hence not usable
-- for words like "auto" (whose genitive would become "audon").
--
-- If the one-argument paradigm does not give the correct result, one can try and give
-- two or three forms. Most notably, the two-argument variant is used
-- for nouns like "kivi - kiviä", which would otherwise become like
-- "rivi - rivejä". Three arguments are used e.g. for
-- "auto - auton - autoja", which would otherwise become
-- "auto - audon".
mkN : overload {
mkN : (kukko : Str) -> N ; -- predictable nouns, covers 82%
mkN : (savi,savia : Str) -> N ; -- different pl.part
mkN : (vesi,veden,vesia : Str) -> N ; -- also different sg.gen
mkN : (pank,panga,panka,panku : Str) -> N ; -- sg nom,gen,part, pl.part
-- mkN : (olo,n,a,na,oon,jen,ja,ina,issa,ihin : Str) -> N ; -- worst case, 10 forms
mkN : (oun,ouna,ouna,ounasse,ounte,ounu : Str) -> N ; -- worst case, 6 forms
mkN : (oun,ouna,ouna,ounasse,ounte,ounu,ountesse : Str) -> N ; -- worst case, 7 forms
mkN : (pika : Str) -> (juna : N) -> N ; -- compound with invariable prefix
mkN : (oma : N) -> (tunto : N) -> N ; -- compound with inflecting prefix
mkN : NW -> N ; -- noun from DictEst (WordNet)
} ;
-- Nouns used as functions need a case, of which the default is
-- the genitive.
mkN2 : overload {
mkN2 : N -> N2 ; -- relational noun with genitive
mkN2 : N -> Prep -> N2 -- relational noun another prep.
} ;
mkN3 : N -> Prep -> Prep -> N3 ; -- relation with two complements
-- Proper names can be formed by using declensions for nouns.
-- The plural forms are filtered away by the compiler.
mkPN : overload {
mkPN : Str -> PN ; -- predictable noun made into name
mkPN : N -> PN -- any noun made into name
} ;
--2 Adjectives
-- Non-comparison one-place adjectives are just like nouns.
-- The regular adjectives are based on $regN$ in the positive.
-- Comparison adjectives have three forms.
-- The comparative and the superlative
-- are always inflected in the same way, so the nominative of them is actually
-- enough (TODO: confirm).
-- TODO: update these types to include the new boolean non-inflection marker
mkA : overload {
mkA : Str -> A ; -- regular noun made into adjective
mkA : N -> A ; -- any noun made into adjective
mkA : N -> (infl : Bool) -> A ; -- noun made into adjective, agreement type specified
mkA : N -> (parem, parim : Str) -> A ; -- deviating comparison forms
mkA : AW -> A ; -- adjective from DictEst (WordNet)
} ;
-- Two-place adjectives need a case for the second argument.
mkA2 : A -> Prep -> A2 -- e.g. "jaollinen" casePrep adessive
= \a,p -> a ** {c2 = p ; lock_A2 = <>};
genAttrA : Str -> A ; -- genitive attributes ; no agreement to head, no comparison forms.
--2 Verbs
--
-- The grammar does not cover the quotative mood and some nominal
-- forms. One way to see the coverage is to linearize a verb to
-- a table.
-- The worst case needs eight forms, as shown in the following.
mkV : overload {
mkV : (lugema : Str) -> V ; -- predictable verbs, covers n %
mkV : (lugema,lugeda : Str) -> V ; -- deviating past 3sg
mkV : (lugema,loeb,lugeda : Str) -> V ; -- also deviating pres. 1sg
mkV : (lugema,lugeda,loeb,loetakse : Str) -> V ;
mkV : (tegema,teha,teeb,tehakse,tehke,tegi,teinud,tehtud : Str) -> V ; -- worst-case verb
mkV : (saama : V) -> (aru : Str) -> V ; -- püsiühendid TODO
mkV : VW -> V ; -- verb from DictEst (WordNet)
} ;
-- All the patterns above have $nominative$ as subject case.
-- If another case is wanted, use the following.
caseV : Case -> V -> V ; -- deviating subj. case, e.g. genitive "täytyä"
-- The verbs "be" and "go" are special.
vOlema : V ; -- the verb "be"
vMinema : V ; -- the verb "go"
--3 Two-place verbs
--
-- Two-place verbs need an object case, and can have a pre- or postposition.
-- The default is direct (accusative) object. There is also a special case
-- with case only. The string-only argument case yields a regular verb with
-- accusative object.
mkV2 : overload {
mkV2 : Str -> V2 ; -- predictable direct transitive
mkV2 : V -> V2 ; -- direct transitive
mkV2 : V -> Case -> V2 ; -- complement just case
mkV2 : V -> Prep -> V2 ; -- complement pre/postposition
} ;
--3 Three-place verbs
--
-- Three-place (ditransitive) verbs need two prepositions, of which
-- the first one or both can be absent.
mkV3 : V -> Prep -> Prep -> V3 ; -- e.g. puhua, allative, elative
dirV3 : V -> Case -> V3 ; -- siirtää, (accusative), illative
dirdirV3 : V -> V3 ; -- antaa, (accusative), (allative)
--3 Other complement patterns
--
-- Verbs and adjectives can take complements such as sentences,
-- questions, verb phrases, and adjectives.
mkV0 : V -> V0 ; --%
mkVS : V -> VS ;
mkV2S : V -> Prep -> V2S ; -- e.g. "sanoa" allative
mkVV : V -> VV ; -- e.g. "alkaa"
mkVVf : V -> InfForm -> VV ; -- e.g. "hakkama" infMa
mkV2V : V -> Prep -> V2V ; -- e.g. "käskeä" genitive
mkV2Vf : V -> Prep -> InfForm -> V2V ; -- e.g. "kieltää" partitive infMast
mkVA : V -> Prep -> VA ; -- e.g. "maistua" ablative
mkV2A : V -> Prep -> Prep -> V2A ; -- e.g. "maalata" accusative translative
mkVQ : V -> VQ ;
mkV2Q : V -> Prep -> V2Q ; -- e.g. "kysyä" ablative
mkAS : A -> AS ; --%
mkA2S : A -> Prep -> A2S ; --%
mkAV : A -> AV ; --%
mkA2V : A -> Prep -> A2V ; --%
-- Notice: categories $AS, A2S, AV, A2V$ are just $A$,
-- and the second argument is given
-- as an adverb. Likewise
-- $V0$ is just $V$.
V0 : Type ; --%
AS, A2S, AV, A2V : Type ; --%
--.
-- The definitions should not bother the user of the API. So they are
-- hidden from the document.
Case = MorphoEst.Case ;
Number = MorphoEst.Number ;
singular = Sg ;
plural = Pl ;
nominative = Nom ;
genitive = Gen ;
partitive = Part ;
illative = Illat ;
inessive = Iness ;
elative = Elat ;
allative = Allat ;
adessive = Adess ;
ablative = Ablat ;
translative = Transl ;
terminative = Termin ;
essive = Ess ;
abessive = Abess ;
comitative = Comit ;
infDa = InfDa ; infMa = InfMa ; infMast = InfMast ;
infDes = InfDes ; infMas = InfMas ; infMaks = InfMaks ; infMata = InfMata ;
prePrep : Case -> Str -> Prep =
\c,p -> {c = NPCase c ; s = p ; isPre = True ; lock_Prep = <>} ;
postPrep : Case -> Str -> Prep =
\c,p -> {c = NPCase c ; s = p ; isPre = False ; lock_Prep = <>} ;
postGenPrep p = {
c = NPCase genitive ; s = p ; isPre = False ; lock_Prep = <>} ;
casePrep : Case -> Prep =
\c -> {c = NPCase c ; s = [] ; isPre = True ; lock_Prep = <>} ;
accPrep = {c = NPAcc ; s = [] ; isPre = True ; lock_Prep = <>} ;
NW = {s : NForms ; lock_NW : {}} ;
AW = {s : NForms ; lock_AW : {}} ;
VW = {s : VForms ; lock_VW : {}} ;
AdvW = {s : Str ; lock_AdvW : {}} ;
mkN = overload {
mkN : (nisu : Str) -> N = mk1N ;
mkN : (link,lingi : Str) -> N = mk2N ;
mkN : (tukk,tuku,tukku : Str) -> N = mk3N ;
mkN : (paat,paadi,paati,paatide : Str) -> N = mk4N ;
mkN : (oun,ouna,ouna,ounasse,ounte,ounu : Str) -> N = mk6N ;
mkN : (sora : Str) -> (tie : N) -> N = mkStrN ;
mkN : (oma,tunto : N) -> N = mkNN ;
mkN : (sana : NW) -> N = \w -> nForms2N w.s ;
} ;
-- Adjective forms (incl. comp and sup) are derived from noun forms
mk1A : Str -> A = \suur ->
let aforms = aForms2A (nforms2aforms (hjk_type suur))
in aforms ** {infl = Regular } ;
mkNA : N -> A = \suur ->
let aforms = aForms2A (nforms2aforms (n2nforms suur)) ;
in aforms ** {infl = Regular } ;
mk1N : (link : Str) -> N = \s -> nForms2N (hjk_type s) ** {lock_N = <> } ;
mk2N : (link,lingi : Str) -> N = \link,lingi ->
let nfs : NForms = (nForms2 link lingi) ;
nfs_fixed : NForms = table {
0 => link ;
1 => lingi ;
2 => nfs ! 2 ;
3 => nfs ! 3 ;
4 => nfs ! 4 ;
5 => nfs ! 5
} ;
in nForms2N nfs_fixed ** {lock_N = <> } ;
mk3N : (tukk,tuku,tukku : Str) -> N = \tukk,tuku,tukku ->
let nfs : NForms = (nForms3 tukk tuku tukku) ;
nfs_fixed : NForms = table {
0 => tukk ;
1 => tuku ;
2 => tukku ;
3 => nfs ! 3 ;
4 => nfs ! 4 ;
5 => nfs ! 5
} ;
in nForms2N nfs_fixed ** {lock_N = <> } ;
{- mk1N : (link : Str) -> N = \s -> nForms2N (hjk_type s) ** {lock_N = <> } ;
mk2N : (link,lingi : Str) -> N = \s,t -> nForms2N (nForms2 s t) ** {lock_N = <>} ;
mk3N : (tukk,tuku,tukku : Str) -> N = \s,t,u -> nForms2N (nForms3 s t u) ** {lock_N = <>} ;
--regular mk4N
mk4N : (paat,paadi,paati,paate : Str) -> N = \s,t,u,v -> nForms2N (nForms4 s t u v) ** {lock_N = <>} ;
-}
--experimental: make sure that the user specified forms end up in the paradigm, even though the rest is wrong
--this is using pl part
mk4N : (paat,paadi,paati,paatide : Str) -> N = \paat,paadi,paati,paate ->
let nfs : NForms = (nForms4 paat paadi paati paate) ;
nfs_fixed : NForms = table {
0 => paat ;
1 => paadi ;
2 => paati ;
3 => nfs ! 3 ;
4 => nfs ! 4 ;
5 => paate
} ;
in nForms2N nfs_fixed ** {lock_N = <> } ;
mk6N : (oun,ouna,ouna,ounasse,ounte,ounu : Str) -> N =
\a,b,c,d,e,f -> nForms2N (nForms6 a b c d e f) ** {lock_N = <> } ;
mkStrN : Str -> N -> N = \sora,tie -> {
s = \\c => sora + tie.s ! c ; lock_N = <>
} ;
mkNN : N -> N -> N = \oma,tunto -> {
s = \\c => oma.s ! c + tunto.s ! c ; lock_N = <>
} ; ---- TODO: oma in possessive suffix forms
nForms2 : (_,_ : Str) -> NForms = \link,lingi ->
let
i = last lingi ;
reegl = init lingi ;
in
case <link,lingi> of {
<_ + "el", _ + #c + "li"> => hjk_type_IVb_audit1 link reegl ;
<_ + "er", _ + #c + "ri"> => hjk_type_IVb_audit1 link reegl ;
<_ + #c + "el", _ + #c + "eli"> => hjk_type_IVb_audit link i ;
<_ + #c + "er", _ + #c + "eri"> => hjk_type_IVb_audit link i ;
<_ + "be", _ + "pe"> => hjk_type_VII_touge2 link lingi ;
<_ + "de", _ + "te"> => hjk_type_VII_touge2 link lingi ;
<_ + "ge", _ + "ke"> => hjk_type_VII_touge2 link lingi ;
<_ + "pe", _ + "ppe"> => hjk_type_VII_touge2 link lingi ;
<_ + "te", _ + "tte"> => hjk_type_VII_touge2 link lingi ;
<_ + "ke", _ + "kke"> => hjk_type_VII_touge2 link lingi ;
<_ + "nne", _ + "nde"> => hjk_type_VII_touge2 link lingi ;
--below 3 don't add much, could just delete
--t6uge recognition is easy, because that doesn't introduce lot of other errors
--but probably pointless to fill this with all cases
<_ + "e", _ + #c + "me"> => hjk_type_VII_touge2 link lingi ;
<_ + "se", _ + "ske"> => hjk_type_VII_touge2 link lingi ;
<_ + "re", _ + "rde"> => hjk_type_VII_touge2 link lingi ;
<_ + #v + "e", _+"de"> => hjk_type_VII_touge2 link lingi ; --riie:riide
--improved total count a little, but introduced new errors
--not recommended, not stable and productive word class
--<_ + "i", _ + "e"> => dMeri link lingi ;
--introduced a couple of errors, "aine" recognized as "kõne"
--<_ + "ne", _ + "ne"> => hjk_type_III_ratsu link ;
--heuristics to catch palk:palga but not maakas:maaka (for longer words, same with more ?s)
--didn't work, don't try this
--<? + ? + #c, ? + ? + #c + #v> => hjk_type_IVb_audit link i ;
_ => hjk_type link
} ;
nForms3 : (_,_,_ : Str) -> NForms = \tukk,tuku,tukku ->
let u = last tuku ;
in case <tukk,tuku,tukku> of {
--cases handled reliabl(ish) by 1- and 2-arg opers
<_+"nd",_,_> => hjk_type tukk ;
<_+"el",_,_> => nForms2 tukk tuku ;
<_+"er",_,_> => nForms2 tukk tuku ;
--distinguish between hammas and maakas
<_+"as",_+"a",_+"ast"> => dHammas tukk tuku ;
<_+"es",_+"e",_+"est"> => dHammas tukk tuku ;
<_+"us",_+"u",_+"ust"> => dHammas tukk tuku ;
<_+"as",_,_+"at"> => hjk_type_IVb_maakas tukk ;
<_ + "ik", _ + "iku", _ + "ikku"> => hjk_type_VI_imelik tukk ; --imelik:_:imelikku caught here
<_ + #c, _ + #v, _ + #v> => hjk_type_VI_tukk tukk tuku ;
<_ + "ud", _ + "u", _ + "ut"> => nForms2 tukk tuku ; -- -nud/-tud participles are not like 'voolik'
<_ + #c, _ + #v, _ + #v + "t"> => hjk_type_IVb_audit tukk u ; --voolik:_:voolikut caught here
_ => nForms2 tukk tuku
} ;
nForms4 : (_,_,_,_ : Str) -> NForms = \paat,paadi,paati,paate ->
case <paat,paadi,paati,paate> of {
-- distinguish between joonis and segadus
<_ +("ne"|"s"), _+"se", _+"st", _+"seid"> => hjk_type_Va_otsene paat ;
<_ +("ne"|"s"), _+"se", _+"st", _+"si"> => hjk_type_Vb_oluline paat ;
<_ +"ne", _+"se", _+"set", _+"seid"> => nForms3 paat paadi paati ; -- -ne adjectives ('algne') are not like 'tõuge'
--distinguish between kõne and aine
<_ +"e", _+"e", _+"et", _+"sid"> => hjk_type_III_ratsu paat ;
<_ +"e", _+"e", _+"et", _+"eid"> => hjk_type_VII_touge2 paat paadi ;
_ => nForms3 paat paadi paati
} ;
{-
--Version that uses pl gen instead of pl part
nForms4 : (_,_,_,_ : Str) -> NForms = \paat,paadi,paati,paatide ->
case <paat,paadi,paati,paatide> of {
-- pl gen can't distinguish between joonis and segadus
-- <_ +("ne"|"s"), _+"se", _+"st", _+"seid"> => hjk_type_Va_otsene paat ;
-- <_ +("ne"|"s"), _+"se", _+"st", _+"si"> => hjk_type_Vb_oluline paat ;
--pl gen can distinguish between kõne and aine
--plus side that any noun that is formed with 4-arg,
--the user given forms are inserted to the paradigm,
--and more forms are created from pl gen, none from pl part
<_ +"e", _+"e", _+"et", _+"de"> => hjk_type_III_ratsu paat ;
<_ +"e", _+"e", _+"et", _+"te"> => hjk_type_VII_touge2 paat paadi ;
_ => nForms3 paat paadi paati
} ;
-}
mkN2 = overload {
mkN2 : N -> N2 = \n -> mmkN2 n (casePrep genitive) ;
mkN2 : N -> Prep -> N2 = mmkN2
} ;
mmkN2 : N -> Prep -> N2 = \n,c -> n ** {c2 = c ; isPre = mkIsPre c ; lock_N2 = <>} ;
mkN3 = \n,c,e -> n ** {c2 = c ; c3 = e ;
isPre = mkIsPre c ; -- matka Lontoosta Pariisiin
isPre2 = mkIsPre e ; -- Suomen voitto Ruotsista
lock_N3 = <>
} ;
mkIsPre : Prep -> Bool = \p -> case p.c of {
NPCase Gen => notB p.isPre ; -- Jussin veli (prep is <Gen,"",True>, isPre becomes False)
_ => True -- syyte Jussia vastaan, puhe Jussin puolesta
} ;
mkPN = overload {
mkPN : Str -> PN = mkPN_1 ;
mkPN : N -> PN = \s -> {s = \\c => s.s ! NCase Sg c ; lock_PN = <>} ;
} ;
mkPN_1 : Str -> PN = \s -> {s = \\c => (mk1N s).s ! NCase Sg c ; lock_PN = <>} ;
-- adjectives
mkA = overload {
mkA : Str -> A = mkA_1 ;
mkA : N -> A = \n -> noun2adjDeg n ** {infl = Regular ; lock_A = <>} ;
mkA : N -> (parem,parim : Str) -> A = regAdjective ;
mkA : N -> (infl : Infl) -> A = \n,infl -> noun2adjDeg n ** {infl = infl ; lock_A = <>} ;
-- TODO: temporary usage of regAdjective1
mkA : N -> (valmim,valmeim : Str) -> (infl : Infl) -> A =
\n,c,s,infl -> (regAdjective1 n c s) ** {infl = infl ; lock_A = <>} ;
mkA : (sana : AW) -> A = \w -> noun2adjDeg (nForms2N w.s) ** {infl = Regular} ;
} ;
genAttrA balti = {s = \\_,_ => balti ; infl = Invariable ; lock_A = <>} ;
mkA_1 : Str -> A = \x -> noun2adjDeg (mk1N x) ** {infl = Regular ; lock_A = <>} ;
-- auxiliaries
mkAdjective : (_,_,_ : Adj) -> A = \hea,parem,parim ->
{s = table {
Posit => hea.s ;
Compar => parem.s ;
Superl => parim.s
} ;
infl = Regular ;
lock_A = <>
} ;
-- Adjectives whose comparison forms are explicitly given.
-- The inflection of these forms with the audit-rule always works.
regAdjective : Noun -> Str -> Str -> A = \posit,compar,superl ->
mkAdjective
(noun2adj posit)
(noun2adjComp False (nForms2N (hjk_type_IVb_audit compar "a")))
(noun2adjComp False (nForms2N (hjk_type_IVb_audit superl "a"))) ;
-- TODO: this is a temporary hack that converts A ~> Adjective.
-- The caller needs this otherwise ** fails.
-- This should be cleaned up but I don't know how (K).
regAdjective1 : Noun -> Str -> Str -> Adjective = regAdjective ;
-- Adjectives whose comparison forms can be derived from the sg gen.
-- In case of comparative this fails only for 70 adjectives.
-- Superlative is more complex, and does not always exist,
-- e.g. lai -> laiem -> laiim? / laieim?
-- See also: http://www.eki.ee/books/ekk09/index.php?p=3&p1=4&id=208
-- Rather use "kõige" + Comp instead of the superlative.
noun2adjDeg : Noun -> Adjective = \kaunis ->
let
kauni = (kaunis.s ! NCase Sg Gen) ;
-- Convert the final 'i' to 'e' for the superlative
kaune : Str = case kauni of { kaun@(_) + "i" => kaun + "e" ; _ => kauni }
in
regAdjective kaunis (kauni + "m") (kaune + "im") ;
-- verbs
mkV = overload {
mkV : (lugema : Str) -> V = mk1V ;
mkV : (lugema,lugeda : Str) -> V = mk2V ;
mkV : (lugema,lugeda,loeb : Str) -> V = mk3V ;
mkV : (lugema,lugeda,loeb,loetakse : Str) -> V = mk4V ;
mkV : (tegema,teha,teeb,tehakse,tehke,tegi,teinud,tehtud : Str) -> V = mk8V ;
mkV : (aru : Str) -> (saama : V) -> V = mkPV ; -- particle verbs
mkV : (sana : VW) -> V = \w -> vforms2V w.s ** {sc = NPCase Nom ; lock_V = <>} ;
} ;
mk1V : Str -> V = \s ->
let vfs = vforms2V (vForms1 s) in
vfs ** {sc = NPCase Nom ; lock_V = <>} ;
mk2V : (_,_ : Str) -> V = \x,y ->
let
vfs = vforms2V (vForms2 x y)
in vfs ** {sc = NPCase Nom ; lock_V = <>} ;
mk3V : (_,_,_ : Str) -> V = \x,y,z ->
let
vfs = vforms2V (vForms3 x y z)
in vfs ** {sc = NPCase Nom ; lock_V = <>} ;
mk4V : (x1,_,_,x4 : Str) -> V = \a,b,c,d ->
let
vfs = vforms2V (vForms4 a b c d)
in vfs ** {sc = NPCase Nom ; lock_V = <>} ;
mk8V : (x1,_,_,_,_,_,_,x8 : Str) -> V = \a,b,c,d,e,f,g,h ->
let
vfs = vforms2V (vForms8 a b c d e f g h)
in vfs ** {sc = NPCase Nom ; lock_V = <>} ;
mkPV : (aru : Str) -> (saama : V) -> V = \aru,saama ->
{s = saama.s ; p = aru ; sc = saama.sc ; lock_V = <> } ;
-- This used to be the last case: _ => Predef.error (["expected infinitive, found"] ++ ottaa)
-- regexp example: ("" | ?) + ("a" | "e" | "i") + _ + "aa" =>
vForms1 : Str -> VForms = \lugema ->
let
luge = Predef.tk 2 lugema ;
loe = weaker luge ;
in
case lugema of {
-- TS 49
-- Small class of CVVma
? + ("ä"|"õ"|"i") + "ima" =>
cKaima lugema ; --käima,viima,võima
? + ("aa"|"ee"|"ää") + "ma" =>
cSaama lugema ; -- saama,jääma,keema
? + ("oo"|"öö"|"üü") + "ma" =>
cJooma lugema ; --jooma,looma,lööma,müüma,pooma,sööma,tooma
-- TS 53
_ + #c + #v + "elema" =>
cTegelema lugema ; --not aelema
-- TS 54
-- Small class, just list all members
("tule"|"sure"|"pane") + "ma" =>
cTulema lugema ;
-- TS 55-57
-- Consonant gradation
-- Regular (55-56)'leppima' and irregular (57) 'lugema'
-- For reliable results regarding consonant gradation, use mk3V
_ + "ndima" =>
cLeppima lugema ;
_ + #lmnr + ("k"|"p"|"t"|"b") + ("ima"|"uma") =>
cLeppima lugema ;
_ + ("sk"|"ps"|"ks"|"ts"|"pl") + ("ima") => --|"uma") =>
cLeppima lugema ;
_ + ("hk"|"hm"|"hn"|"hr"|"ht") + ("ima") => --most *hCuma are TS 51 (muutuma)
cLeppima lugema ;
_ + #c + "ssima" => --weaker *ss = *ss; should be weaker Css = Cs
cLugema lugema ;
_ + ("pp"|"kk"|"tt"|"ss"|"ff"|"nn"|"mm"|"ll"|"rr") + ("ima"|"uma") =>
cLeppima lugema ;
-- TS 59 (petma, tapma)
-- Use mk4V for TS 60 (jätma, võtma)
? + #v + ("tma"|"pma") =>
cPetma lugema (luge + "etakse") ;
-- TS 58 for rest that end tma (muutma,kartma,...)
_ + "tma" =>
cMuutma lugema ;
-- TS 61 (laulma,kuulma,naerma,möönma)
-- Default vowel e for lma, a for (r|n)ma.
-- Other vowel with mk3V.
_ + "lma" =>
cKuulma lugema (loe + "eb") ;
_ + ("r"|"n") + "ma" =>
cKuulma lugema (loe + "ab") ;
-- TS 63 (andma,hoidma)
-- Other vowel than a (tundma~tunneb) with mk3V
_ + "dma" =>
cAndma lugema (loe + "ab") ;
-- TS 62, 64 (tõusma,mõskma), default vowel e
-- 62 alt form (jooksma,joosta) with mk2V
-- Other vowel than e with mk3V
_ + #c + "ma" =>
cLaskma lugema (loe + "eb") ;
-- TS 65 (pesema)
#c + #v + "sema" =>
cPesema lugema ;
-- TS 66 (nägema)
-- Small class, just list all members
("nägema"|"tegema") =>
cNagema lugema ;
-- TS 67-68 with mk2V
-- no 100% way to distinguish from 50-52 that end in ama
-- TS 69
(?|"") + (?|"") + ? + "tlema" => --vestlema,mõtlema,ütlema; not õnnitlema
cOmblema lugema ;
_ + "tlema" =>
cElama lugema ;
_ + #c + "lema" =>
cOmblema lugema ;
-- TS 50-52
-- Default case
_ =>
cElama lugema
} ;
vForms2 : (_,_ : Str) -> VForms = \petma,petta ->
-- Arguments: ma infinitive, da infinitive
-- Use this for the following cases:
-- * 62 alt form (Csma, sta)
-- * 50-52 (elama) recognized as 69 (õmblema)
-- * 66 (nägema~näha)
-- * 54 (tulema~tulla)
-- * 67-68 (hüppama~hüpata)
case <petma,petta> of {
<_ + "ksma", _ + "sta"> => cJooksma petma ; --62 alt forms
<_, _ + "ata"> => cHyppama petma ; --67-68
<_, _ + "ha"> => cNagema petma ; --66
<_, _ + ("rra"|"lla"|"nna")> => cTulema petma ; --54
<_ + #c + "lema",
_ + #c + "leda"> => cElama petma ; --50-52 (õnnitlema) recognized as 69 (mõtlema)
_ => vForms1 petma
} ;
vForms3 : (_,_,_ : Str) -> VForms = \taguma,taguda,taob ->
-- Arguments: ma infinitive, da infinitive, b
-- Use this for the following cases:
-- * Irregular gradation (taguma~taob)
-- * Non-detectable gradation (sattuma~satub ; pettuma~pettub)
-- * Non-default vowel in b for TS 58-64 (laulma~laulab)
case <taguma,taguda,taob> of {
--to be sure about vowel in b
<_ + "dma", _ + "da", _> => cAndma taguma taob ;
<_, _ + #vv + #lmnr + "da", _> => cKuulma taguma taob ;
<_, _ + #c + "ta", _> => cLaskma taguma taob ;
--irregular gradation
<_, _, (""|#c) + #c + #v + #v + "b"> => cLugema taguma ; --57
--to be sure about consonant gradation
<_ + #c + "lema", _, _> => vForms2 taguma taguda ; --catch "-Clema" first
<_ + #v + "ma", _+"da", _> => cSattumaPettuma taguma taob ;
<_,_,_> => vForms2 taguma taguda
} ;
vForms4 : (x1,_,_,x4 : Str) -> VForms = \jatma,jatta,jatab,jaetakse ->
-- 4 forms needed to get full paradigm for regular verbs
-- (source: http://www.eki.ee/books/ekk09/index.php?p=3&p1=5&id=227)
-- regVForms in MorphoEst handles majority of these.
-- Filter out known irregularities and give rest to regVForms.
-- Not trying to match TS 49 ; can't separate käima (49) from täima (50), or detect compounds like taaslooma.
case <jatma,jatta,jatab,jaetakse> of {
<_, _+("kka"|"ppa"|"tta"),
_, _+"takse"> => cPetma jatma jaetakse ;
<_ + "dma", _,
_, _+"takse"> => cAndma jatma jatab ;
<_ + ("ts"|"ks"|"sk") + "ma", _,_,_> => cLaskma jatma jatab ;
<_, _ + ("lla"|"nna"|"rra"), _, _> => cTulema jatma ;
<_, _ + "ha", _, _> => cNagema jatma ;
<_ + #v + "sema", _ + "sta", _, _> => cPesema jatma ;
<_,_,_,_> => regVForms jatma jatta jatab jaetakse
} ;
caseV c v = {s = v.s ; p = v.p; sc = NPCase c ; lock_V = <>} ;
vOlema = verbOlema ** {sc = NPCase Nom ; lock_V = <>} ;
vMinema = verbMinema ** {sc = NPCase Nom ; lock_V = <>} ;
mk2V2 : V -> Prep -> V2 = \v,c -> v ** {c2 = c ; lock_V2 = <>} ;
caseV2 : V -> Case -> V2 = \v,c -> mk2V2 v (casePrep c) ;
dirV2 v = mk2V2 v accPrep ;
mkAdv = overload {
mkAdv : Str -> Adv = \s -> {s = s ; lock_Adv = <>} ;
mkAdv : AdvW -> Adv = \s -> {s = s.s ; lock_Adv = <>} ;
} ;
mkV2 = overload {
mkV2 : Str -> V2 = \s -> dirV2 (mk1V s) ;
mkV2 : V -> V2 = dirV2 ;
mkV2 : V -> Case -> V2 = caseV2 ;
mkV2 : V -> Prep -> V2 = mk2V2 ;
} ;
mk2V2 : V -> Prep -> V2 ;
caseV2 : V -> Case -> V2 ;
dirV2 : V -> V2 ;
mkV3 v p q = v ** {c2 = p ; c3 = q ; lock_V3 = <>} ;
dirV3 v p = mkV3 v accPrep (casePrep p) ;
dirdirV3 v = dirV3 v allative ;
mkVS v = v ** {lock_VS = <>} ;
mkVV v = mkVVf v infDa ;
mkVVf v f = v ** {vi = f ; lock_VV = <>} ;
mkVQ v = v ** {lock_VQ = <>} ;
V0 : Type = V ;
AS, A2S, AV : Type = A ;
A2V : Type = A2 ;
mkV0 v = v ** {lock_V = <>} ;
mkV2S v p = mk2V2 v p ** {lock_V2S = <>} ;
mkV2V v p = mkV2Vf v p infMa ;
mkV2Vf v p f = mk2V2 v p ** {vi = f ; lock_V2V = <>} ;
mkVA v p = v ** {c2 = p ; lock_VA = <>} ;
mkV2A v p q = v ** {c2 = p ; c3 = q ; lock_V2A = <>} ;
mkV2Q v p = mk2V2 v p ** {lock_V2Q = <>} ;
mkAS v = v ** {lock_A = <>} ;
mkA2S v p = mkA2 v p ** {lock_A = <>} ;
mkAV v = v ** {lock_A = <>} ;
mkA2V v p = mkA2 v p ** {lock_A2 = <>} ;
} ;

View File

@@ -0,0 +1,27 @@
concrete PhraseEst of Phrase = CatEst ** open ResEst, (P = Prelude) in {
lin
PhrUtt pconj utt voc = {s = pconj.s ++ utt.s ++ voc.s} ;
UttS s = s ;
UttQS qs = {s = qs.s} ;
UttImpSg pol imp = {s = pol.s ++ imp.s ! pol.p ! Ag Sg P2} ;
UttImpPl pol imp = {s = pol.s ++ imp.s ! pol.p ! Ag Pl P2} ;
UttImpPol pol imp = {s = pol.s ++ imp.s ! pol.p ! AgPol} ;
UttIP ip = {s = ip.s ! NPCase Nom} ;
UttIAdv iadv = iadv ;
UttNP np = {s = np.s ! NPCase Nom} ;
UttVP vp = {s = infVP (NPCase Nom) Pos (agrP3 Sg) vp InfDa} ;
UttAdv adv = adv ;
UttCN np = {s = np.s ! NCase Sg Nom} ;
UttAP np = {s = np.s ! P.False ! NCase Sg Nom} ;
UttCard n = {s = n.s ! Sg ! Nom} ;
NoPConj = {s = []} ;
PConjConj conj = {s = conj.s2} ;
NoVoc = {s = []} ;
VocNP np = {s = "," ++ np.s ! NPCase Nom} ;
}

View File

@@ -0,0 +1,102 @@
concrete QuestionEst of Question = CatEst ** open ResEst, Prelude in {
flags optimize=all_subs ; coding=utf8;
lin
QuestCl cl = {
s = \\t,a,p => cl.s ! t ! a ! p ! SQuest
} ;
QuestVP ip vp =
let
cl = mkClause (subjForm (ip ** {isPron = False ; a = agrP3 ip.n}) vp.sc) (agrP3 ip.n) vp
in {
s = \\t,a,p => cl.s ! t ! a ! p ! SDecl
} ;
QuestSlash ip slash = {
s = \\t,a,p =>
let
cls = slash.s ! t ! a ! p ;
who = appCompl True p slash.c2 (ip ** {a = agrP3 ip.n ; isPron = False})
in
who ++ cls
} ;
QuestIAdv iadv cl = {
s = \\t,a,p => iadv.s ++ cl.s ! t ! a ! p ! SDecl
} ;
QuestIComp icomp np = {
s = \\t,a,p =>
let
vp = predV (verbOlema ** {sc = NPCase Nom}) ;
cl = mkClause (subjForm np vp.sc) np.a vp ;
in
icomp.s ! np.a ++ cl.s ! t ! a ! p ! SDecl
} ;
PrepIP p ip = {s =
appCompl True Pos p (ip ** {a = agrP3 ip.n ; isPron = False})} ;
AdvIP ip adv = {
s = \\c => ip.s ! c ++ adv.s ;
n = ip.n
} ;
-- The computation of $ncase$ is a special case of that in $NounEst.DetCN$,
-- since we don't have possessive suffixes or definiteness.
--- It could still be nice to have a common oper...
IdetCN idet cn = let n = idet.n in {
s = \\c =>
let
k : Case = npform2case n c ;
icase : Case = Nom ; --case k of { --mis kassiga
-- (Ess|Abess|Comit|Termin) => Gen ;
-- _ => k
-- } ;
ncase : NForm = case <icase,idet.isNum> of {
<Nom, True> => NCase Sg Part ; -- mitkä kolme kytkintä
<_, True> => NCase Sg k ; -- miksi kolmeksi kytkimeksi
_ => NCase n k -- mitkä kytkimet
}
in
idet.s ! icase ++ cn.s ! ncase ;
n = n
} ;
IdetIP idet = let n = idet.n in {
s = \\c =>
let
k = npform2case n c ;
in
idet.s ! k ;
n = n
} ;
IdetQuant idet num =
let
n = num.n ;
isn = num.isNum
in {
s = \\k =>
let
ncase = case <k,isn> of {
<Nom, True> => NCase Sg Part ; -- mitkä kolme kytkintä
<_, True> => NCase Sg k ; -- miksi kolmeksi kytkimeksi
_ => NCase n k -- mitkä kytkimet
}
in
idet.s ! n ! k ++ num.s ! Sg ! k ;
n = n ;
isNum = isn
} ;
AdvIAdv i a = {s = i.s ++ a.s} ;
CompIAdv a = {s = \\_ => a.s} ;
CompIP ip = {s = \\_ => ip.s ! NPCase Nom} ;
}

View File

@@ -0,0 +1,50 @@
concrete RelativeEst of Relative = CatEst ** open Prelude, ResEst, MorphoEst in {
flags optimize=all_subs ; coding=utf8;
lin
RelCl cl = {
s = \\t,a,p,_ => "nii" ++ "et" ++ cl.s ! t ! a ! p ! SDecl ;
---- sellainen
c = NPCase Nom
} ;
RelVP rp vp = {
s = \\t,ant,b,ag =>
let
agr = case rp.a of {
RNoAg => ag ;
RAg a => a
} ;
cl = mkClause
(subjForm {s = rp.s ! (complNumAgr agr) ;
a = agr ; isPron = False} vp.sc) agr vp
in
cl.s ! t ! ant ! b ! SDecl ;
c = NPCase Nom
} ;
RelSlash rp slash = {
s = \\t,a,p,ag =>
let
cls = slash.s ! t ! a ! p ;
who = appCompl True p slash.c2 (rp2np (complNumAgr ag) rp)
in
who ++ cls ;
c = slash.c2.c
} ;
FunRP p np rp = {
s = \\n,c => appCompl True Pos p (rp2np n rp) ++ np.s ! c ; --- is c OK?
a = RAg np.a
} ;
IdRP = {
s = \\n,c => relPron ! n ! npform2case n c ;
a = RNoAg
} ;
}

713
lib/src/estonian/ResEst.gf Normal file
View File

@@ -0,0 +1,713 @@
--1 Estonian auxiliary operations.
-- This module contains operations that are needed to make the
-- resource syntax work. To define everything that is needed to
-- implement $Test$, it moreover contains regular lexical
-- patterns needed for $Lex$.
resource ResEst = ParamX ** open Prelude in {
flags optimize=all ; coding=utf8;
--2 Parameters for $Noun$
-- This is the $Case$ as needed for both nouns and $NP$s.
param
Case = Nom | Gen | Part
| Illat | Iness | Elat | Allat | Adess | Ablat
| Transl | Ess | Termin | Abess | Comit;
NForm = NCase Number Case ;
-- Agreement of $NP$ has number*person and the polite second ("te olette valmis").
Agr = Ag Number Person | AgPol ;
oper
complNumAgr : Agr -> Number = \a -> case a of {
Ag n _ => n ;
AgPol => Sg
} ;
verbAgr : Agr -> {n : Number ; p : Person} = \a -> case a of {
Ag n p => {n = n ; p = p} ;
AgPol => {n = Pl ; p = P2}
} ;
oper
NP = {s : NPForm => Str ; a : Agr ; isPron : Bool} ;
--
--2 Adjectives
--
-- The major division is between the comparison degrees. A degree fixed,
-- an adjective is like common nouns, except for the adverbial form.
param
AForm = AN NForm | AAdv ;
Infl = Regular | Participle | Invariable ;
oper
Adjective : Type = {s : Degree => AForm => Str; lock_A : {}} ;
--2 Noun phrases
--
-- Two forms of *virtual accusative* are needed for nouns in singular,
-- the nominative and the genitive one ("loen raamatu"/"loe raamat").
-- For nouns in plural, only a nominative accusative exists in positive clauses.
-- Pronouns use the partitive as their accusative form ("mind", "sind"), in both
-- positive and negative, indicative and imperative clauses.
param
NPForm = NPCase Case | NPAcc ;
oper
npform2case : Number -> NPForm -> Case = \n,f ->
-- type signature: workaround for gfc bug 9/11/2007
case <<f,n> : NPForm * Number> of {
<NPCase c,_> => c ;
<NPAcc,Sg> => Gen ;-- appCompl does the job
<NPAcc,Pl> => Nom
} ;
--2 For $Verb$
-- A special form is needed for the negated plural imperative.
param
VForm =
Inf InfForm
| Presn Number Person
| Impf Number Person
| Condit Number Person
| Imper Number
| ImperP3
| ImperP1Pl
| ImpNegPl
| PassPresn Bool
| PassImpf Bool
| Quotative Voice
| PresPart Voice
| PastPart Voice
;
Voice = Act | Pass ;
InfForm =
InfDa -- lugeda
| InfDes -- lugedes
| InfMa -- lugema
| InfMas -- lugemas
| InfMast -- lugemast
| InfMata -- lugemata
| InfMaks -- lugemaks
;
SType = SDecl | SQuest | SInv ;
--2 For $Relative$
RAgr = RNoAg | RAg Agr ;
--2 For $Numeral$
CardOrd = NCard NForm | NOrd NForm ;
--2 Transformations between parameter types
oper
agrP3 : Number -> Agr = \n ->
Ag n P3 ;
conjAgr : Agr -> Agr -> Agr = \a,b -> case <a,b> of {
<Ag n p, Ag m q> => Ag (conjNumber n m) (conjPerson p q) ;
<Ag n p, AgPol> => Ag Pl (conjPerson p P2) ;
<AgPol, Ag n p> => Ag Pl (conjPerson p P2) ;
_ => b
} ;
---
Compl : Type = {s : Str ; c : NPForm ; isPre : Bool} ;
appCompl : Bool -> Polarity -> Compl -> NP -> Str = \isFin,b,co,np ->
let
c = case co.c of {
NPAcc => case b of {
Neg => NPCase Part ; -- ma ei näe raamatut/sind
Pos => case isFin of {
True => NPAcc ; -- ma näen raamatu/sind
_ => case np.isPron of {
False => NPCase Nom ; --tuleb see raamat lugeda
_ => NPAcc --tuleb sind näha --TODO I: is this correct?
}
}
} ;
_ => co.c
} ;
{-
c = case <isFin, b, co.c, np.isPron> of {
<_, Neg, NPAcc,_> => NPCase Part ; -- en näe taloa/sinua
<_, Pos, NPAcc,True> => NPAcc ; -- näen/täytyy sinut
<False,Pos, NPAcc,False> => NPCase Nom ; -- täytyy nähdä talo
<_,_,coc,_> => coc
} ;
-}
nps = np.s ! c
in
preOrPost co.isPre co.s nps ;
-- For $Verb$.
Verb : Type = {
s : VForm => Str ;
p : Str -- particle verbs
} ;
param
VIForm =
VIFin Tense
| VIInf InfForm
| VIPass
| VIImper
;
oper
VP = {
s : VIForm => Anteriority => Polarity => Agr => {fin, inf : Str} ;
s2 : Bool => Polarity => Agr => Str ; -- raamat/raamatu/raamatut
adv : Polarity => Str ; -- ainakin/ainakaan --TODO relevant for Est?
p : Str ; --uninflecting component in multi-word verbs
ext : Str ;
sc : NPForm ;
} ;
predV : (Verb ** {sc : NPForm}) -> VP = \verb -> {
s = \\vi,ant,b,agr0 =>
let
agr = verbAgr agr0 ;
verbs = verb.s ;
part : Str = case vi of {
VIPass => verbs ! (PastPart Pass) ;
_ => verbs ! (PastPart Act)
} ;
eiv : Str = case agr of {
_ => "ei"
} ;
einegole : Str * Str * Str = case <vi,agr.n> of {
<VIFin Pres, _> => <eiv, verbs ! Imper Sg, "ole"> ;
<VIFin Fut, _> => <eiv, verbs ! Imper Sg, "ole"> ;
<VIFin Cond, _> => <eiv, verbs ! Condit Sg P3, "oleks"> ;
<VIFin Past, Sg> => <eiv, part, "olnud"> ;
<VIFin Past, Pl> => <eiv, part, "olnud"> ;
<VIImper, Sg> => <"ära", verbs ! Imper Sg, "ole"> ;
<VIImper, Pl> => <"ärge", verbs ! ImpNegPl, "olge"> ;
<VIPass, _> => <"ei", verbs ! PassPresn False, "ole"> ;
<VIInf i, _> => <"ei", verbs ! Inf i, "olla">
} ;
ei : Str = einegole.p1 ;
neg : Str = einegole.p2 ;
ole : Str = einegole.p3 ;
olema : VForm => Str = verbOlema.s ;
vf : Str -> Str -> {fin, inf : Str} = \x,y -> {fin = x ; inf = y} ;
mkvf : VForm -> {fin, inf : Str} = \p -> case <ant,b> of {
<Simul,Pos> => vf (verbs ! p) [] ;
<Anter,Pos> => vf (olema ! p) part ;
-- <Anter,Neg> => vf ei (ole ++ part) ;
-- <Simul,Neg> => vf ei neg
<Simul,Neg> => vf (ei ++ neg) [] ; --changed grouping from Fin
<Anter,Neg> => vf (ei ++ ole) part
}
in case vi of {
VIFin Past => mkvf (Impf agr.n agr.p) ;
VIFin Cond => mkvf (Condit agr.n agr.p) ;
VIFin Fut => mkvf (Presn agr.n agr.p) ;
VIFin Pres => mkvf (Presn agr.n agr.p) ;
VIImper => mkvf (Imper agr.n) ;
VIPass => mkvf (PassPresn True) ;
VIInf i => mkvf (Inf i)
} ;
s2 = \\_,_,_ => [] ;
adv = \\_ => [] ;
ext = [] ; --relative clause
p = verb.p ; --particle verbs
sc = verb.sc
} ;
insertObj : (Bool => Polarity => Agr => Str) -> VP -> VP = \obj,vp -> {
s = vp.s ;
s2 = \\fin,b,a => vp.s2 ! fin ! b ! a ++ obj ! fin ! b ! a ;
adv = vp.adv ;
p = vp.p ;
ext = vp.ext ;
sc = vp.sc ;
} ;
insertObjPre : (Bool => Polarity => Agr => Str) -> VP -> VP = \obj,vp -> {
s = vp.s ;
s2 = \\fin,b,a => obj ! fin ! b ! a ++ vp.s2 ! fin ! b ! a ;
adv = vp.adv ;
p = vp.p ;
ext = vp.ext ;
sc = vp.sc ;
} ;
insertAdv : (Polarity => Str) -> VP -> VP = \adv,vp -> {
s = vp.s ;
s2 = vp.s2 ;
p = vp.p ;
ext = vp.ext ;
adv = \\b => vp.adv ! b ++ adv ! b ;
sc = vp.sc ;
} ;
insertExtrapos : Str -> VP -> VP = \obj,vp -> {
s = vp.s ;
s2 = vp.s2 ;
p = vp.p ;
ext = vp.ext ++ obj ;
adv = vp.adv ;
sc = vp.sc ;
} ;
-- For $Sentence$.
Clause : Type = {
s : Tense => Anteriority => Polarity => SType => Str
} ;
ClausePlus : Type = {
s : Tense => Anteriority => Polarity => {subj,fin,inf,compl,adv,p,ext : Str}
} ;
-- The Finnish version of SQuest featured a word order change and
-- the question particle "ko". The Estonian version just prefixes the
-- declarative sentence with the yes/no-queryword "kas".
-- SQuest: "kas" + SDecl
-- It would be also correct to use the Finnish structure, just without the ko-particle.
-- Inari: added a third SType, SInv.
-- Not sure if SInv is needed, but keeping it for possible future use.
-- There's need for an inverted word order with auxiliary verbs; infVP handles that. ComplVV calls infVP, which inverts the word order for the complement VP, and puts it into the resulting VP's `compl' field.
-- SInv made by mkClause would be for cases where you just need to construct an inverted word order, and then call it from some other place; application grammar (TODO: api oper for SType) or ExtraEst.
mkClause : (Polarity -> Str) -> Agr -> VP -> Clause =
\sub,agr,vp -> {
s = \\t,a,b =>
let
c = (mkClausePlus sub agr vp).s ! t ! a ! b ;
-- saan sinust aru 0
-- ma olen täna sinust aru saanud
declCl = c.subj ++ c.fin ++ c.adv ++ c.compl ++ c.p ++ c.inf ++ c.ext ;
-- [sind näha] 0 tahtnud
-- täna olen ma sinust aru saanud
invCl = c.adv ++ c.fin ++ c.subj ++ c.compl ++ c.p ++ c.inf ++ c.ext
in
table {
SDecl => declCl ;
SQuest => "kas" ++ declCl ;
SInv => invCl
}
} ;
mkClausePlus : (Polarity -> Str) -> Agr -> VP -> ClausePlus =
\sub,agr,vp -> {
s = \\t,a,b =>
let
agrfin = case vp.sc of {
NPCase Nom => <agr,True> ;
_ => <agrP3 Sg,False> -- minun täytyy, minulla on
} ;
verb = vp.s ! VIFin t ! a ! b ! agrfin.p1 ;
in {subj = sub b ;
fin = verb.fin ;
inf = verb.inf ;
compl = vp.s2 ! agrfin.p2 ! b ! agr ;
p = vp.p ;
adv = vp.adv ! b ;
ext = vp.ext ;
}
} ;
insertKinClausePlus : Predef.Ints 1 -> ClausePlus -> ClausePlus = \p,cl -> {
s = \\t,a,b =>
let
c = cl.s ! t ! a ! b
in
case p of {
0 => {subj = c.subj ++ gi ; fin = c.fin ; inf = c.inf ; -- Jussikin nukkuu
compl = c.compl ; p = c.p ; adv = c.adv ; ext = c.ext ; h = c.h} ;
1 => {subj = c.subj ; fin = c.fin ++ gi ; inf = c.inf ; -- Jussi nukkuukin
compl = c.compl ; p = c.p ; adv = c.adv ; ext = c.ext ; h = c.h}
}
} ;
insertObjClausePlus : Predef.Ints 1 -> Bool -> (Polarity => Str) -> ClausePlus -> ClausePlus =
\p,ifKin,obj,cl -> {
s = \\t,a,b =>
let
c = cl.s ! t ! a ! b ;
co = obj ! b ++ if_then_Str ifKin (kin b) [] ;
in case p of {
0 => {subj = c.subj ; fin = c.fin ; inf = c.inf ;
compl = co ; p = c.p ; adv = c.compl ++ c.adv ; ext = c.ext ; h = c.h} ; -- Jussi juo maitoakin
1 => {subj = c.subj ; fin = c.fin ; inf = c.inf ;
compl = c.compl ; p = c.p ; adv = co ; ext = c.adv ++ c.ext ; h = c.h} -- Jussi nukkuu nytkin
}
} ;
kin : Polarity -> Str =
\p -> case p of {Pos => "gi" ; Neg => "gi"} ;
--allomorph "ki", depends only on phonetic rules "üks+ki", "ühe+gi"
--waiting for post construction in GF :P
gi : Str = "gi" ;
glueTok : Str -> Str = \s -> "&+" ++ s ;
-- This is used for subjects of passives: therefore isFin in False.
subjForm : NP -> NPForm -> Polarity -> Str = \np,sc,b ->
appCompl False b {s = [] ; c = sc ; isPre = True} np ;
infVP : NPForm -> Polarity -> Agr -> VP -> InfForm -> Str =
\sc,pol,agr,vp,vi ->
let
fin = case sc of { -- subject case
NPCase Nom => True ; -- mina tahan joosta
_ => False -- minul peab auto olema
} ;
verb = vp.s ! VIInf vi ! Simul ! Pos ! agr ; -- no "ei"
compl = vp.s2 ! fin ! pol ! agr ; -- but compl. case propagated
adv = vp.adv ! pol
in
-- inverted word order; e.g.
--sinust kunagi aru saada tahtnud rel. clause
compl ++ adv ++ vp.p ++ verb.inf ++ verb.fin ++ vp.ext ;
--TODO adv placement?
--TODO inf ++ fin or fin ++ inf? does it ever become a case here?
-- The definitions below were moved here from $MorphoEst$ so that
-- auxiliary of predication can be defined.
verbOlema : Verb =
let olema = mkVerb
"olema" "olla" "olen" "ollakse"
"olge" "oli" "olnud" "oldud"
in {s = table {
Presn _ P3 => "on" ;
v => olema.s ! v
} ;
p = []
} ;
verbMinema : Verb =
let minema = mkVerb
"minema" "minna" "läheb" "minnakse"
"minge" "läks" "läinud" "mindud"
in {s = table {
Impf Sg P1 => "läksin" ;
Impf Sg P2 => "läksid" ;
Impf Pl P1 => "läksime" ;
Impf Pl P2 => "läksite" ;
Impf Pl P3 => "läksid" ;
Imper Sg => "mine" ;
v => minema.s ! v
} ;
p = []
} ;
--3 Verbs
--Auxiliary for internal use
mkVerb : (x1,_,_,_,_,_,_,x8 : Str) -> Verb =
\tulema,tulla,tuleb,tullakse,tulge,tuli,tulnud,tuldud ->
vforms2V (vForms8
tulema tulla tuleb tullakse tulge tuli tulnud tuldud
) ;
--below moved here from MorphoEst
VForms : Type = Predef.Ints 7 => Str ;
vForms8 : (x1,_,_,_,_,_,_,x8 : Str) -> VForms =
\tulema,tulla,tuleb,tullakse,tulge,tuli,tulnud,tuldud ->
table {
0 => tulema ;
1 => tulla ;
2 => tuleb ;
3 => tullakse ;
4 => tulge ;
5 => tuli ;
6 => tulnud ;
7 => tuldud
} ;
vforms2V : VForms -> Verb = \vh ->
let
tulema = vh ! 0 ;
tulla = vh ! 1 ;
tuleb = vh ! 2 ;
tullakse = vh ! 3 ; --juuakse; loetakse
tulge = vh ! 4 ; --necessary for tulla, surra (otherwise *tulege, *surege)
tuli = vh ! 5 ; --necessary for jooma-juua-jõi
tulnud = vh ! 6 ;
tuldud = vh ! 7 ; --necessary for t/d in tuldi; loeti
tull_ = init tulla ; --juu(a); saad(a); tull(a);
tulles = tull_ + "es" ; --juues; saades; tulles;
tule_ = init tuleb ;
lask_ = Predef.tk 2 tulema ;
laulev = case (last lask_) of { --sooma~soov ; laulma~laulev
("a"|"e"|"i"|"o"|"u"|"õ"|"ä"|"ö"|"ü") => lask_ + "v" ;
_ => lask_ + "ev" } ; --consonant stem in -ma, add e
--imperfect stem
kaisi_ = case (Predef.dp 3 tuli) of {
"sis" => lask_ + "i" ; --tõusin, tõusis
_ + "i" => tuli ; --jõin, jõi
_ => lask_ + "si" --käisin, käis; muutsin, muutis
};
tuld_ = Predef.tk 2 tuldud ; --d/t choice for tuldi etc.
tulgu = (init tulge) + "u" ;
in
{s = table {
Inf InfDa => tulla ;
Inf InfDes => tulles ;
Presn Sg P1 => tule_ + "n" ;
Presn Sg P2 => tule_ + "d" ;
Presn Sg P3 => tuleb ;
Presn Pl P1 => tule_ + "me" ;
Presn Pl P2 => tule_ + "te" ;
Presn Pl P3 => tule_ + "vad" ;
Impf Sg P1 => kaisi_ + "n" ; --# notpresent
Impf Sg P2 => kaisi_ + "d" ; --# notpresent
Impf Sg P3 => tuli ; --# notpresent
Impf Pl P1 => kaisi_ + "me" ; --# notpresent
Impf Pl P2 => kaisi_ + "te" ; --# notpresent
Impf Pl P3 => kaisi_ + "d" ; --# notpresent
Condit Sg P1 => tule_ + "ksin" ; --# notpresent
Condit Sg P2 => tule_ + "ksid" ; --# notpresent
Condit Sg P3 => tule_ + "ks"; --# notpresent
Condit Pl P1 => tule_ + "ksime" ; --# notpresent
Condit Pl P2 => tule_ + "ksite" ; --# notpresent
Condit Pl P3 => tule_ + "ksid" ; --# notpresent
Imper Sg => tule_ ; -- tule
Imper Pl => tulge ; -- tulge
ImperP3 => tulgu ; -- tulgu (ta/nad)
ImperP1Pl => tulge + "m" ; -- tulgem
ImpNegPl => tulge ; -- ärge tulge
PassPresn True => tullakse ;
PassPresn False => tuld_ + "a" ; --da or ta
PassImpf True => tuld_ + "i" ; --di or ti
PassImpf False => tuldud ;
Quotative Act => lask_ + "vat" ;
Quotative Pass => tuld_ + "avat" ; --d or t
PresPart Act => laulev ;
PresPart Pass => tuld_ + "av" ; --d or t
PastPart Act => tulnud ;
PastPart Pass => tuldud ;
Inf InfMa => tulema ;
Inf InfMas => tulema + "s" ;
Inf InfMast => tulema + "st" ;
Inf InfMata => tulema + "ta" ;
Inf InfMaks => tulema + "ks"
} ;
sc = NPCase Nom ;
p = [] ;
lock_V = <>
} ;
-- For regular verbs, paradigm from 4 base forms
-- Analoogiaseosed pöördsõna paradigmas
-- http://www.eki.ee/books/ekk09/index.php?p=3&p1=5&id=227
regVForms : (x1,_,_,x4 : Str) -> VForms = \vestlema,vestelda,vestleb,vesteldakse ->
let
vestle_ = Predef.tk 2 vestlema ;
vesteld_ = init vestelda ;
vestel_ = init vesteld_ ;
lase_ = init vestleb ;
jaet_ = Predef.tk 4 vesteldakse ;
g = case (last vesteld_) of { --doesn't work for anda~andke
"t" => "k" ;
_ => "g"
} ;
toit_ = case (last vestle_) of {
("t"|"d") => vesteld_ ; --toit(ma) -> toitke;
_ => vestel_ --vestle(ma) -> vestelge
} ;
laski_ = case (last vestle_) of {
("a"|"e"|"i"|"o"|"u"|"õ"|"ä"|"ö"|"ü")
=> vestle_ ; --vestle(ma) -> vestles
_ => vestle_ + "i" --lask(ma) -> laskis
} ;
in
vForms8
vestlema
vestelda
vestleb
vesteldakse
(toit_ + g + "e") --da: käskiva kõneviisi ainsuse 3. pööre ja mitmus;
(laski_ + "s") --ma: kindla kõneviisi lihtmineviku pöörded;
(toit_ + "nud") --da: isikulise tegumoe mineviku kesksõna
(jaet_ + "ud"); --takse: ülejäänud umbisikulise tgm vormid
regVerb : (_,_,_,_ : Str) -> Verb = \kinkima,kinkida,kingib,kingitakse ->
vforms2V (regVForms kinkima kinkida kingib kingitakse) ;
noun2adj : CommonNoun -> Adj = noun2adjComp True ;
-- noun2adj : Noun -> Adj = noun2adjComp True ;
-- TODO: remove the unused arguments and clean up the code
-- TODO: AAdv is current just Sg Ablat, which seems OK in most cases, although
-- ilus -> ilusti | ilusalt?
-- hea -> hästi
-- parem -> paremini
-- parim -> kõige paremini | parimalt?
noun2adjComp : Bool -> CommonNoun -> Adj = \isPos,tuore ->
-- noun2adjComp : Bool -> Noun -> Adj = \isPos,tuore ->
let
tuoreesti = Predef.tk 1 (tuore.s ! NCase Sg Gen) + "sti" ;
tuoreemmin = Predef.tk 2 (tuore.s ! NCase Sg Gen) + "in"
in {s = table {
AN f => tuore.s ! f ;
-- AAdv => if_then_Str isPos tuoreesti tuoreemmin
AAdv => tuore.s ! NCase Sg Ablat
} ;
} ;
CommonNoun = {s : NForm => Str} ;
-- To form an adjective, it is usually enough to give a noun declension: the
-- adverbial form is regular.
Adj : Type = {s : AForm => Str} ;
-- Reflexive pronoun.
--- Possessive could be shared with the more general $NounFin.DetCN$.
--oper
--Estonian version started
reflPron : Agr -> NP = \agr ->
let
ise = nForms2N (nForms6 "ise" "enda" "ennast" "endasse" "IGNORE" "IGNORE")
in {
s = table {
NPAcc => "ennast" ;
NPCase c => ise.s ! NCase Sg c
} ;
a = agr ;
isPron = False -- no special acc form
} ;
Noun = CommonNoun ** {lock_N : {}} ;
NForms : Type = Predef.Ints 5 => Str ;
nForms6 : (x1,_,_,_,_,x6 : Str) -> NForms =
\jogi,joe,joge,joesse, -- sg nom, gen, part, ill
jogede,jogesid -> table { -- pl gen, part,
0 => jogi ;
1 => joe ;
2 => joge ;
3 => joesse ;
4 => jogede ;
5 => jogesid
} ;
n2nforms : CommonNoun -> NForms = \ukko -> table {
0 => ukko.s ! NCase Sg Nom ;
1 => ukko.s ! NCase Sg Gen ;
2 => ukko.s ! NCase Sg Part ;
3 => ukko.s ! NCase Sg Illat ;
4 => ukko.s ! NCase Pl Gen ;
5 => ukko.s ! NCase Pl Part
} ;
-- Converts 6 given strings (Nom, Gen, Part, Illat, Gen, Part) into Noun
-- http://www.eki.ee/books/ekk09/index.php?p=3&p1=5&id=226
nForms2N : NForms -> CommonNoun = \f ->
let
jogi = f ! 0 ;
joe = f ! 1 ;
joge = f ! 2 ;
joesse = f ! 3 ;
jogede = f ! 4 ;
jogesid = f ! 5 ;
in
{s = table {
NCase Sg Nom => jogi ;
NCase Sg Gen => joe ;
NCase Sg Part => joge ;
NCase Sg Transl => joe + "ks" ;
NCase Sg Ess => joe + "na" ;
NCase Sg Iness => joe + "s" ;
NCase Sg Elat => joe + "st" ;
NCase Sg Illat => joesse ;
NCase Sg Adess => joe + "l" ;
NCase Sg Ablat => joe + "lt" ;
NCase Sg Allat => joe + "le" ;
NCase Sg Abess => joe + "ta" ;
NCase Sg Comit => joe + "ga" ;
NCase Sg Termin => joe + "ni" ;
NCase Pl Nom => joe + "d" ;
NCase Pl Gen => jogede ;
NCase Pl Part => jogesid ;
NCase Pl Transl => jogede + "ks" ;
NCase Pl Ess => jogede + "na" ;
NCase Pl Iness => jogede + "s" ;
NCase Pl Elat => jogede + "st" ;
NCase Pl Illat => jogede + "sse" ;
NCase Pl Adess => jogede + "l" ;
NCase Pl Ablat => jogede + "lt" ;
NCase Pl Allat => jogede + "le" ;
NCase Pl Abess => jogede + "ta" ;
NCase Pl Comit => jogede + "ga" ;
NCase Pl Termin => jogede + "ni"
} --;
-- lock_N = <>
} ;
oper
rp2np : Number -> {s : Number => NPForm => Str ; a : RAgr} -> NP = \n,rp -> {
s = rp.s ! n ;
a = agrP3 Sg ; -- does not matter (--- at least in Slash)
isPron = False -- has no special accusative
} ;
etta_Conj : Str = "et" ;
heavyDet : PDet -> PDet ** {sp : Case => Str} = \d -> d ** {sp = d.s} ;
PDet : Type = {
s : Case => Str ;
n : Number ;
isNum : Bool ;
isDef : Bool
} ;
heavyQuant : PQuant -> PQuant ** {sp : Number => Case => Str} = \d ->
d ** {sp = d.s} ;
PQuant : Type =
{s : Number => Case => Str ; isDef : Bool} ;
}

View File

@@ -0,0 +1,67 @@
concrete SentenceEst of Sentence = CatEst ** open Prelude, ResEst in {
flags optimize=all_subs ; coding=utf8;
lin
PredVP np vp = mkClause (subjForm np vp.sc) np.a vp ;
PredSCVP sc vp = mkClause (\_ -> sc.s) (agrP3 Sg) vp ;
ImpVP vp = {
s = \\pol,agr =>
let
verb = vp.s ! VIImper ! Simul ! pol ! agr ;
compl = vp.s2 ! False ! pol ! agr ++ vp.ext --- False = like inf (osta auto)
in --(ära) loe raamat(ut) läbi
verb.fin ++ verb.inf ++ compl ++ vp.p ;
} ;
-- The object case is formed at the use site of $c2$, in $Relative$ and $Question$.
SlashVP np vp = {
s = \\t,a,p => (mkClause (subjForm np vp.sc) np.a vp).s ! t ! a ! p ! SDecl ;
c2 = vp.c2
} ;
AdvSlash slash adv = {
s = \\t,a,b => slash.s ! t ! a ! b ++ adv.s ;
c2 = slash.c2
} ;
SlashPrep cl prep = {
s = \\t,a,p => cl.s ! t ! a ! p ! SDecl ;
c2 = prep
} ;
SlashVS np vs slash = {
s = \\t,a,p =>
(mkClause (subjForm np vs.sc) np.a
(insertExtrapos (etta_Conj ++ slash.s)
(predV vs))
).s ! t ! a ! p ! SDecl ;
c2 = slash.c2
} ;
EmbedS s = {s = etta_Conj ++ s.s} ;
EmbedQS qs = {s = qs.s} ;
EmbedVP vp = {s = infVP (NPCase Nom) Pos (agrP3 Sg) vp InfDa} ; --- case,pol,agr,infform
UseCl t p cl = {s = t.s ++ p.s ++ cl.s ! t.t ! t.a ! p.p ! SDecl} ;
UseQCl t p cl = {s = t.s ++ p.s ++ cl.s ! t.t ! t.a ! p.p} ;
UseRCl t p cl = {
s = \\r => t.s ++ p.s ++ cl.s ! t.t ! t.a ! p.p ! r ;
c = cl.c
} ;
UseSlash t p cl = {
s = t.s ++ p.s ++ cl.s ! t.t ! t.a ! p.p ;
c2 = cl.c2
} ;
AdvS a s = {s = a.s ++ s.s} ;
ExtAdvS a s = {s = a.s ++ "," ++ s.s} ;
RelS s r = {s = s.s ++ "," ++ r.s ! agrP3 Sg} ; ---- mikä
}

View File

@@ -0,0 +1,302 @@
concrete StructuralEst of Structural = CatEst **
open MorphoEst, ParadigmsEst, (X = ConstructX), MakeStructuralEst, Prelude in {
flags optimize=all ; coding=utf8 ;
lin
above_Prep = postGenPrep "peal" ;
after_Prep = postGenPrep "järel" ;
all_Predet = {s = \\n,c =>
let
kaiket = caseTable n (mkN "kõik")
in
case npform2case n c of {
Nom => "kõik" ;
k => kaiket ! k
}
} ;
almost_AdA, almost_AdN = ss "peaaegu" ;
although_Subj = ss "kuigi" ;
always_AdV = ss "alati" ;
and_Conj = {s1 = [] ; s2 = "ja" ; n = Pl} ;
because_Subj = ss "sellepärast" ;
before_Prep = prePrep partitive "enne" ;
behind_Prep = postGenPrep "taga" ;
between_Prep = postGenPrep "vahel" ;
both7and_DConj = sd2 "nii" "kui ka" ** {n = Pl} ;
but_PConj = ss "aga" ;
by8agent_Prep = postGenPrep "poolt" ;
by8means_Prep = casePrep adessive ;
can8know_VV = mkVV (mkV "oskama" "osata") ;
can_VV = mkVV (mkV "võima" "võida") ;
during_Prep = postGenPrep "ajal" ;
either7or_DConj = sd2 "kas" "või" ** {n = Sg} ;
everybody_NP = makeNP (mkN "igaüks") Sg ;
every_Det = mkDet Sg (mkN "iga") ;
everything_NP = makeNP ((mkN "kõik") ** {lock_N = <>}) Sg ;
everywhere_Adv = ss "kõikjal" ;
few_Det = mkDet Sg (mkN "mõni") ;
--- first_Ord = {s = \\n,c => (mkN "ensimmäinen").s ! NCase n c} ;
for_Prep = casePrep allative ;
from_Prep = casePrep elative ;
he_Pron = mkPronoun "tema" "tema" "teda" Sg P3 ;
here_Adv = ss "siin" ;
here7to_Adv = ss "siia" ;
here7from_Adv = ss "siit" ;
how_IAdv = ss "kuidas" ;
how8much_IAdv = ss "kui palju" ;
how8many_IDet = {
s = \\c => "kui" ++ (mkN "mitu" "mitme" "mitut" "TODO" "TODO" "TODO").s ! NCase Sg c ;
n = Sg ;
isNum = False
} ;
if_Subj = ss "kui" ;
in8front_Prep = postGenPrep "ees" ;
i_Pron = mkPronoun "mina" "minu" "mind" Sg P1 ;
in_Prep = casePrep inessive ;
it_Pron = {
s = \\c => pronSe.s ! npform2case Sg c ;
a = agrP3 Sg ;
isPron = False
} ;
less_CAdv = X.mkCAdv "vähem" "kui" ;
many_Det = mkDet Sg (mkN "mitu") ;
more_CAdv = X.mkCAdv "rohkem" "kui" ;
most_Predet = {s = \\n,c => (nForms2N (dSuurin "MOST")).s ! NCase n (npform2case n c)} ;
much_Det = mkDet Sg {s = \\_ => "palju"} ;
must_VV = mkVVf (mkV "pidama" "pidada" "peab" "peetakse" "pidage" "pidi" "pidanud" "peetud") infMa ;
no_Utt = ss "ei" ;
on_Prep = casePrep adessive ;
--- one_Quant = mkDet Sg DEPREC
only_Predet = {s = \\_,_ => "ainult"} ;
or_Conj = {s1 = [] ; s2 = "või" ; n = Pl} ;
otherwise_PConj = ss "muidu" ;
part_Prep = casePrep partitive ;
please_Voc = ss ["palun"] ; --- number
possess_Prep = casePrep genitive ;
quite_Adv = ss "üsna" ;
she_Pron = mkPronoun "tema" "tema" "teda" Sg P3 ;
so_AdA = ss "nii" ;
somebody_NP = {
s = \\c => jokuPron ! Sg ! npform2case Sg c ;
a = agrP3 Sg ;
isPron = False
} ;
someSg_Det = heavyDet {
s = jokuPron ! Sg ;
isNum = False ; isDef = True ; n = Sg
} ;
somePl_Det = heavyDet {
s = jokuPron ! Pl ;
isNum = False ; isDef = True ;
n = Pl
} ;
something_NP = {
s = \\c => mikaInt ! Sg ! npform2case Sg c ;
a = agrP3 Sg ;
isPron = False
} ;
somewhere_Adv = ss "kuskil" ;
that_Quant = heavyQuant {
s = table (MorphoEst.Number) {
Sg => table (MorphoEst.Case) {
c => (mkPronoun "too" "tolle" "toda" Sg P3).s ! NPCase c
} ;
Pl => table (MorphoEst.Case) {
c => (mkPronoun "nood" "nonde" "noid" Sg P3).s ! NPCase c
}
} ;
isNum = False ; isDef = True ;
} ;
that_Subj = ss "et" ;
there_Adv = ss "seal" ;
there7to_Adv = ss "sinna" ;
there7from_Adv = ss "sealt" ;
therefore_PConj = ss "sellepärast" ;
they_Pron = mkPronoun "nemad" "nende" "neid" Pl P3 ;
this_Quant = heavyQuant {
s = table (MorphoEst.Number) {
Sg => table (MorphoEst.Case) {
c => (mkPronoun "see" "selle" "seda" Sg P3).s ! NPCase c
} ;
Pl => table (MorphoEst.Case) {
c => (mkPronoun "need" "nende" "neid" Sg P3).s ! NPCase c
}
} ;
isNum = False ; isDef = True ;
} ;
through_Prep = postGenPrep "kaudu" ;
too_AdA = ss "liiga" ;
to_Prep = casePrep allative ;
under_Prep = postGenPrep "all" ;
very_AdA = ss "väga" ;
want_VV = mkVV (mkV "tahtma") ;
we_Pron = mkPronoun "meie" "meie" "meid" Pl P1 ;
whatPl_IP = {
s = table {NPAcc => "mida" ; c => mikaInt ! Pl ! npform2case Pl c} ;
n = Pl
} ;
whatSg_IP = {
s = \\c => mikaInt ! Sg ! npform2case Sg c ;
n = Sg
} ;
when_IAdv = ss "kui" ;
when_Subj = ss "kui" ;
where_IAdv = ss "kus" ;
which_IQuant = { s = mikaInt } ;
whoSg_IP = {
s = table {NPAcc => "keda" ; c => kukaInt ! Sg ! npform2case Sg c} ;
n = Sg
} ;
whoPl_IP = {
s = table {NPAcc => "keda" ; c => kukaInt ! Pl ! npform2case Pl c} ;
n = Pl
} ;
why_IAdv = ss "miks" ;
without_Prep = prePrep partitive "ilma" ;
with_Prep = prePrep comitative "koos" ;
yes_Utt = ss "jah" ;
youSg_Pron = mkPronoun "sina" "sinu" "sind" Sg P2 ;
youPl_Pron = mkPronoun "teie" "teie" "teid" Pl P2 ;
youPol_Pron =
let p = mkPronoun "teie" "teie" "teid" Pl P2 in
{s = p.s ; a = AgPol} ;
oper
jokuPron : MorphoEst.Number => (MorphoEst.Case) => Str =
let
keegi = mkN "keegi" "kellegi" "kedagi" "kellegisse" "kellegi" "kedagi"
in
table {
Sg => table {
c => keegi.s ! NCase Sg c
} ;
Pl => table {
c => keegi.s ! NCase Sg c
}
} ;
-- TODO: maybe remove
jokinPron : MorphoEst.Number => (MorphoEst.Case) => Str =
table {
Sg => table {
Nom => "miski" ;
Iness => "milleski" ;
Elat => "millestki" ;
Ablat => "milleltki" ;
Transl => "millekski" ;
c => mikaInt ! Sg ! c + "gi"
} ;
Pl => table { --TODO correct these plurals
Nom => "miski" ;
Iness => "milleski" ;
Elat => "millestki" ;
Ablat => "milleltki" ;
Transl => "millekski" ;
c => mikaInt ! Pl ! c + "gi"
}
} ;
--TODO does this work?
mikaInt : MorphoEst.Number => (MorphoEst.Case) => Str =
let {
mi = mkN "mille"
} in
table {
Sg => table {
Nom => "mis" ;
Gen => "mille" ;
Part => "mida" ;
c => mi.s ! NCase Sg c
} ;
Pl => table {
Nom => "mis" ;
Gen => "mille" ;
Part => "mida" ;
c => mi.s ! NCase Pl c
}
} ;
kukaInt : MorphoEst.Number => (MorphoEst.Case) => Str =
let
kuka = mkN "kes" "kelle" "keda" "kellesse"
"kellede" "keda" ;
in
table {
Sg => table {
c => kuka.s ! NCase Sg c
} ;
Pl => table {
Nom => "kes" ;
c => kuka.s ! NCase Pl c
}
} ;
mikaanPron : MorphoEst.Number => (MorphoEst.Case) => Str = \\n,c =>
case <n,c> of {
<_,Nom> => "ükski" ;
<_,Part> => "ühtegi" ;
<_,Gen> => "ühegi" ;
_ => mikaInt ! n ! c + "gi" ----ki
} ;
kukaanPron : MorphoEst.Number => (MorphoEst.Case) => Str =
table {
Sg => table {
Nom => "keegi" ;
Part => "kedagi" ;
c => kukaInt ! Sg ! c + "gi"
} ;
Pl => table {
Nom => "ketkään" ;
Part => "keitään" ;
c => kukaInt ! Pl ! c + "gi"
}
} ;
oper
makeNP : N -> MorphoEst.Number -> CatEst.NP ;
makeNP noun num = {
s = \\c => noun.s ! NCase num (npform2case num c) ;
a = agrP3 num ;
isPron = False ;
lock_NP = <>
} ;
lin
not_Predet = {s = \\_,_ => "ei"} ;
no_Quant = heavyQuant {
s = \\n,c => "mitte" ++ mikaanPron ! n ! c ;
isNum = False ; isDef = True ;
} ;
if_then_Conj = {s1 = "kui" ; s2 = "siis" ; n = Sg} ;
nobody_NP = {
s = \\c => "mitte" ++ kukaanPron ! Sg ! npform2case Sg c ;
a = agrP3 Sg ;
isPron = False
} ;
nothing_NP = {
s = \\c => "mitte" ++ mikaanPron ! Sg ! npform2case Sg c ;
a = agrP3 Sg ;
isPron = False
} ;
at_least_AdN = ss "vähemalt" ;
at_most_AdN = ss "kuni" ;
as_CAdv = X.mkCAdv "sama palju" "kui" ;
except_Prep = postPrep partitive "väljaarvatud" ;
have_V2 = mkV2 (caseV adessive vOlema) ;
-- Kaarel: TODO: not sure what this is...
-- Name of the language in that language, s/suomi/eesti/ :--P
lin language_title_Utt = ss "eesti" ;
}

View File

@@ -0,0 +1,42 @@
concrete SymbolEst of Symbol = CatEst ** open Prelude, NounEst, ResEst in {
lin
SymbPN i = {s = \\c => i.s} ; --- c
IntPN i = {s = \\c => i.s} ; --- c
FloatPN i = {s = \\c => i.s} ; --- c
NumPN i = {s = \\c => i.s!Sg!Nom } ; --- c
CNIntNP cn i = {
s = \\c => cn.s ! NCase Sg (npform2case Sg c) ++ i.s ;
a = agrP3 Sg ;
isPron = False
} ;
CNSymbNP det cn xs = let detcn = NounEst.DetCN det cn in {
s = \\c => detcn.s ! c ++ xs.s ;
a = detcn.a ;
isPron = False
} ;
CNNumNP cn i = {
s = \\c => cn.s ! NCase Sg (npform2case Sg c) ++ i.s ! Sg ! Nom ;
a = agrP3 Sg ;
isPron = False
} ;
SymbS sy = sy ;
SymbNum n = {s = \\_,_ => n.s ; isNum = True ; n = Pl} ;
SymbOrd n = {s = \\_ => n.s ++ "."} ;
lincat
Symb, [Symb] = SS ;
lin
MkSymb s = s ;
BaseSymb = infixSS "ja" ;
ConsSymb = infixSS "," ;
}

136
lib/src/estonian/VerbEst.gf Normal file
View File

@@ -0,0 +1,136 @@
--1 Verb Phrases in Estonian
concrete VerbEst of Verb = CatEst ** open Prelude, ResEst in {
flags optimize=all_subs ; coding=utf8;
lin
UseV = predV ;
SlashV2a v = predV v ** {c2 = v.c2} ;
Slash2V3 v np =
insertObj
(\\fin,b,_ => appCompl fin b v.c2 np) (predV v) ** {c2 = v.c3} ;
Slash3V3 v np =
insertObj
(\\fin,b,_ => appCompl fin b v.c3 np) (predV v) ** {c2 = v.c2} ;
ComplVV v vp =
insertObj
(\\_,b,a => infVP v.sc b a vp v.vi)
(predV {s = v.s ;
p = v.p ;
sc = case vp.sc of {
NPCase Nom => v.sc ; -- minul tuleb kirjutada (VV 'tulema' determines the subject case)
c => c -- minul peab auto olema (VP 'olema' determines the subject case)
}
}
) ;
ComplVS v s = insertExtrapos (etta_Conj ++ s.s) (predV v) ;
ComplVQ v q = insertExtrapos ( q.s) (predV v) ;
ComplVA v ap =
insertObj
(\\_,b,agr =>
let n = (complNumAgr agr) in
ap.s ! False ! (NCase n (npform2case n v.c2.c))) --- v.cs.s ignored
(predV v) ;
SlashV2S v s =
insertExtrapos (etta_Conj ++ s.s) (predV v) ** {c2 = v.c2} ;
SlashV2Q v q =
insertExtrapos (q.s) (predV v) ** {c2 = v.c2} ;
SlashV2V v vp =
insertObj (\\_,b,a => infVP v.sc b a vp v.vi) (predV v) ** {c2 = v.c2} ;
---- different infinitives
SlashV2A v ap =
insertObj
(\\fin,b,_ =>
ap.s ! False ! (NCase Sg (npform2case Sg v.c3.c))) ----agr to obj
(predV v) ** {c2 = v.c2} ;
ComplSlash vp np = insertObjPre (\\fin,b,_ => appCompl fin b vp.c2 np) vp ;
UseComp comp =
insertObj (\\_,_ => comp.s) (predV (verbOlema ** {sc = NPCase Nom})) ;
SlashVV v vp =
insertObj
(\\_,b,a => infVP v.sc b a vp v.vi)
(predV {s = v.s ;
p = v.p ;
sc = case vp.sc of {
NPCase Nom => v.sc ; -- minun täytyy pestä auto
c => c -- minulla täytyy olla auto
}
}
) ** {c2 = vp.c2} ; ---- correct ??
SlashV2VNP v np vp =
insertObjPre
(\\fin,b,a => appCompl True b v.c2 np ++ ---- fin -> stack overflow
infVP v.sc b a vp v.vi)
(predV v) ** {c2 = vp.c2} ;
AdvVP vp adv = insertAdv (\\_ => adv.s) vp ;
AdVVP adv vp = insertAdv (\\_ => adv.s) vp ;
ReflVP v = insertObjPre (\\fin,b,agr => appCompl fin b v.c2 (reflPron agr)) v ;
PassV2 v =
let
vp = predV v ;
subjCase = case v.c2.c of { --this is probably a reason to not get rid of NPAcc; TODO check
NPCase Gen => NPCase Nom ; --valisin koera -> koer valitakse
_ => v.c2.c --rääkisin koerale -> koerale räägitakse
}
in {
s = \\_ => vp.s ! VIPass ;
s2 = \\_,_,_ => [] ;
adv = \\_ => [] ;
p = vp.p ;
ext = vp.ext ;
sc = subjCase -- koer valitakse ; koerale räägitakse
} ;
----b UseVS, UseVQ = \v -> v ** {c2 = {s = [] ; c = NPAcc ; isPre = True}} ;
CompAP ap = {
s = \\agr =>
let
n = complNumAgr agr ;
in ap.s ! False ! (NCase n Nom)
} ;
CompCN cn = {
s = \\agr =>
let
n = complNumAgr agr ;
in cn.s ! (NCase n Nom)
} ;
CompNP np = {s = \\_ => np.s ! NPCase Nom} ;
CompAdv a = {s = \\_ => a.s} ;
}
--2 The object case
--
-- The rules involved are ComplV2 and ComplVV above.
-- The work is done jointly in ResEst.infVP and appCompl.
-- Cases to test: l -table (to see negated forms)
--```
-- minun täytyy ostaa auto
-- PredVP (UsePron i_Pron) (ComplVV must_VV
-- (ComplV2 buy_V2 (DetCN (DetSg (SgQuant DefArt) NoOrd) (UseN car_N))))
-- minä tahdon ostaa auton
-- PredVP (UsePron i_Pron) (ComplVV want_VV
-- (ComplV2 buy_V2 (DetCN (DetSg (SgQuant DefArt) NoOrd) (UseN car_N))))
-- minulla täytyy olla auto
-- PredVP (UsePron i_Pron) (ComplVV must_VV
-- (ComplV2 have_V2 (DetCN (DetSg (SgQuant DefArt) NoOrd) (UseN car_N))))
--```
-- Unfortunately, there is no nice way to say "I want to have a car".
-- (Other than the paraphrases "I want a car" or "I want to own a car".)