fixes to estonian

This commit is contained in:
inari
2013-10-22 12:23:52 +00:00
parent 098619ceee
commit 34d15b505d
3 changed files with 244 additions and 282 deletions

View File

@@ -23,8 +23,6 @@ resource HjkEst = open ResEst, Prelude, Predef in {
oper
NFS = {s : NForm => Str} ;
foreign : pattern Str = #("z" | "ž" | "š") ;
-- Foreign vowel endings
foreign_v : pattern Str = #("ko" | "po" | "to" | "fo" | "ka" | "pa" | "ta" | "fa" | "ku" | "pu" | "tu" | "fu") ;
@@ -50,13 +48,14 @@ resource HjkEst = open ResEst, Prelude, Predef in {
hjk_type_VI_meeskond,
hjk_type_VI_seminar,
hjk_type_VII_touge : Str -> NForms ;
-- hjk_type_VII_touge : Str -> NFS ;
-- IVa additionally needs the stem vowel.
hjk_type_IVb_audit,
hjk_type_IVb_audit1 : Str -> Str -> NForms ; --NFS
hjk_type_IVb_audit1 : Str -> Str -> NForms ;
hjk_type_VI_tukk : Str -> Str -> NForms ;
hjk_type_VI_link2 : Str -> Str -> NForms ;
hjk_type2 : Str -> Str -> NForms ;
-- Definition of the mapping rules.
@@ -147,24 +146,19 @@ resource HjkEst = open ResEst, Prelude, Predef in {
in
nForms6 x (x_n+"i") (x+"i") (x+"i") (x+"ide") (x+"e") ;
--like link but
-- gen form given (takes care of vowel and consonant gradation)
-- -sid for pl.part (todo: generate short forms depending on vowel?)
hjk_type_VI_tukk x x_gen =
-- same as hjk_type_VI_link but additionally takes the genitive ending
hjk_type_VI_link2 x i =
let
v_g : Str = last x_gen ;
{- pl_part : Str =
case v_g of {
"i" => "e" ;
_ => v_g + "sid" } ;
-}
x_n : Str = weaker_noun x ;
-- TODO: think about it
e : Str = case i of {
"a" => "asid" ; -- pikk/pika -> pikkasid
_ => "e"
}
in
nForms6 x x_gen (x+v_g) (x+v_g) (x+v_g+"de") (x+v_g+"sid") ;
nForms6 x (x_n+i) (x+i) (x+i) (x+i+"de") (x+e) ;
hjk_type_VI_imelik x =
let
x_t : Str = stronger_noun x
@@ -312,103 +306,105 @@ resource HjkEst = open ResEst, Prelude, Predef in {
-- - comparative and superlative adjective forms (workaround: use mkA instead)
-- - type VII (t6uge -> t6uke), as one needs to detect derivation from verb
-- - last syllable superlong (rostbiif)
hjk_type x =
case <(syl_type x), x> of {
<S3, _ + "ke">
hjk_type x = hjk_type2 x "i" ;
hjk_type2 x i =
case <(syl_type x), x, i> of {
<S3, _ + "ke", _>
=> hjk_type_Vb_oluline x ;
<_, _ + "kond">
<_, _ + "kond", _>
=> hjk_type_VI_meeskond x ;
-- Some S2 -ik words (voolik), we only cover words with double vowel
<_, _ + #vv + ("lik"|"nik"|"stik")>
<_, _ + #vv + ("lik"|"nik"|"stik"), _>
=> hjk_type_IVb_audit x "u" ;
-- Other -ik words as in HJKEKS,
-- but added 'ndik' which fixes fractions ('kaheksandik')
-- and is wrong only for 'kandik'.
<_, _ + ("lik"|"nik"|"stik"|"ndik")>
<_, _ + ("lik"|"nik"|"stik"|"ndik"), _>
=> hjk_type_VI_imelik x ;
-- Remaining -k words (but need to be S2)
-- but not 'konjak'
<S2, _ + ("a"|"e"|"i") + ("ng"|"k")>
<S2, _ + ("a"|"e"|"i") + ("ng"|"k"), _>
=> hjk_type_IVb_audit x "u" ;
-- Other -ik words (not in HJKEKS)
-- including also: alevik, asemik, lobudik, hämarik, sarapik, põletik
<_, _ + ("vik"|"mik"|"dik"|"rik"|"pik"|"tik")>
<_, _ + ("vik"|"mik"|"dik"|"rik"|"pik"|"tik"), _>
=> hjk_type_VI_imelik x ;
-- kikas
<_, ? + #v + #c + #v + "s">
<_, ? + #v + #c + #v + "s", _>
=> hjk_type_Va_otsene x ;
<_, _ + ("ngas"|"kas"|"jas"|"nud"|"tud")>
<_, _ + ("ngas"|"kas"|"jas"|"nud"|"tud"), _>
=> hjk_type_IVb_maakas x ;
<S1, _ + #v + #v>
<S1, _ + #v + #v, _>
=> hjk_type_I_koi x ;
-- 'statiiv' (not like 'karjuv')
<S1, _ + #vv + #c>
=> hjk_type_VI_link x ;
<S1, _ + #vv + #c, i>
=> hjk_type_VI_link2 x i ;
<S3, _ + #c + #v + #lmnr>
<S3, _ + #c + #v + #lmnr, _>
=> hjk_type_VI_seminar x ;
<S1, _ + #v + #v + #c>
=> hjk_type_VI_link x ;
<S1, _ + #v + #v + #c, i>
=> hjk_type_VI_link2 x i ;
<_, _ + ("us"|"is")>
<_, _ + ("us"|"is"), _>
=> hjk_type_Vb_oluline x ;
<S3, _ + #v + #v + #c>
=> hjk_type_VI_link x ;
<S3, _ + #v + #v + #c, i>
=> hjk_type_VI_link2 x i ;
<(S1|S3), _ + #v + #c + #c>
=> hjk_type_VI_link x ;
<(S1|S3), _ + #v + #c + #c, i>
=> hjk_type_VI_link2 x i ;
<(S1|S3), _ + #v + #c + #c + #c>
=> hjk_type_VI_link x ;
<(S1|S3), _ + #v + #c + #c + #c, i>
=> hjk_type_VI_link2 x i ;
<_, _ + "nna">
<_, _ + "nna", _>
=> hjk_type_III_ratsu x ;
<-(S21|S22), _ + ("nu"|"tu")>
<-(S21|S22), _ + ("nu"|"tu"), _>
=> hjk_type_IVa_aasta x ;
-- TODO: improve foreign detection
<S2, _ + #foreign + _ + "in">
=> hjk_type_IVb_audit x "i" ;
<S2, _ + #foreign + _ + "in", i>
=> hjk_type_IVb_audit x i ;
-- TODO: this is not in HJKEKS
-- 'absurd' vs 'ebard'
<S2, _ + #v + #lmnr + "d">
=> hjk_type_IVb_audit x "i" ;
<S2, _ + #v + #lmnr + "d", i>
=> hjk_type_IVb_audit x i ;
-- sometimes 'a' (laurits) TODO: this is not in HJKEKS
<S2, _ + #v + #kpt + "s">
=> hjk_type_IVb_audit x "i" ;
<S2, _ + #v + #kpt + "s", i>
=> hjk_type_IVb_audit x i ;
-- TODO: next 3 rules: last syllable must be long
-- portfell, TODO: not 'karask'
<S2, _ + #v + #c + #c>
=> hjk_type_VI_link x ;
<S2, _ + #v + #c + #c, i>
=> hjk_type_VI_link2 x i ;
-- rostbiif, not viiul
<S2, _ + #c + #v + #v + #c>
=> hjk_type_VI_link x ;
<S2, _ + #c + #v + #v + #c, i>
=> hjk_type_VI_link2 x i ;
-- impulss
<S2, _ + #v + #c + #c + #c>
=> hjk_type_VI_link x ;
<S2, _ + #v + #c + #c + #c, i>
=> hjk_type_VI_link2 x i ;
-- TODO: sometimes masked by 'maakas'
<_, _ + #v + "s">
<_, _ + #v + "s", _>
=> hjk_type_Va_otsene x ;
<_, _ + ("v"|"tav")>
<_, _ + ("v"|"tav"), _>
=> hjk_type_IVb_audit x "a" ;
-- The choice between Va (pl part: -seid) and Vb (pl part: -si)
@@ -416,38 +412,38 @@ resource HjkEst = open ResEst, Prelude, Predef in {
-- We just check the ending of the word and require at least 2 letters
-- to precede the ending.
-- We added also -tine and -ldane (which occur with adjectives).
<_, _ + ? + ? + ("line"|"lane"|"mine"|"kene"|"tine"|"ldane")>
<_, _ + ? + ? + ("line"|"lane"|"mine"|"kene"|"tine"|"ldane"), _>
=> hjk_type_Vb_oluline x ;
-- k6ne
<S21, _ + "e">
<S21, _ + "e", _>
=> hjk_type_III_ratsu x ;
-- Many adjectives end with "ne" (40% in WordNet)
-- We require them to be at least 5 letters long (excluding 'öine'),
-- to give a chance to VII_touge (next rule).
<_, _ + ? + ? + ? + "ne">
<_, _ + ? + ? + ? + "ne", _>
=> hjk_type_Va_otsene x ;
-- Note: this rule does not actually check the derivation from verb.
-- verb + e, TODO: masked by S21/e
<(S2|S22), _ + "e">
<(S2|S22), _ + "e", _>
=> hjk_type_VII_touge x ;
-- ufo, pita, lito
<S21, _ + #foreign_v>
<S21, _ + #foreign_v, _>
=> hjk_type_III_ratsu x ;
<S21, _ + #v>
<S21, _ + #v, _>
=> hjk_type_II_ema x ;
<S22, _ + #v>
<S22, _ + #v, _>
=> hjk_type_III_ratsu x ;
<S23, _ + #v>
<S23, _ + #v, _>
=> hjk_type_IVa_aasta x ;
<S2, _ + "in">
<S2, _ + "in", _>
=> hjk_type_IVb_audit x "a" ;
-- 'e' deletion
@@ -456,54 +452,54 @@ resource HjkEst = open ResEst, Prelude, Predef in {
-- spikker -> spikri (TODO: not: pokker -> pokkeri)
-- Note: pintsel -> pintsli, but not pitser -> pitsri
-- Note: 'redel' and 'paber' do not lose the 'e'.
<S2, y + kk@("kk"|"pp"|"tt"|"hh") + "e" + l@("l"|"r")>
<S2, y + kk@("kk"|"pp"|"tt"|"hh") + "e" + l@("l"|"r"), _>
=> hjk_type_IVb_audit1 x (y + (init kk) + l) ;
-- aaker -> aakri, teater -> teatri
<S2, y + vvkpt@(#v + #v + #kpt) + "e" + l@("l"|"r")>
<S2, y + vvkpt@(#v + #v + #kpt) + "e" + l@("l"|"r"), _>
=> hjk_type_IVb_audit1 x (y+vvkpt+l) ;
<S2, y + vv@(#vv) + gbd@(#gbd) + "e" + l@("l"|"r")>
<S2, y + vv@(#vv) + gbd@(#gbd) + "e" + l@("l"|"r"), _>
=> hjk_type_IVb_audit1 x (y+vv+gbd+l) ;
-- Disabled, 50-50 correctness
--<S2, y + vv@(#vv) + lmnr@(#lmnr) + "e" + l@("l"|"r")>
--<S2, y + vv@(#vv) + lmnr@(#lmnr) + "e" + l@("l"|"r"), _>
-- => hjk_type_IVb_audit1 x (y+vv+lmnr+l) ; -- 50-50
<S2, y + vv@(#vv) + s@("s"|"v") + "e" + l@("l"|"r")>
<S2, y + vv@(#vv) + s@("s"|"v") + "e" + l@("l"|"r"), _>
=> hjk_type_IVb_audit1 x (y+vv+s+l) ;
<S2, y + n@("ht"|"hk"|"hv"|"nts"|"ld"|"lv"|"lb"|"ng"|"nd"|"mb"|"mp"|"nt"|"ps"|"ks"|"sk"|"st") + "e" + l@("l"|"r")>
<S2, y + n@("ht"|"hk"|"hv"|"nts"|"ld"|"lv"|"lb"|"ng"|"nd"|"mb"|"mp"|"nt"|"ps"|"ks"|"sk"|"st") + "e" + l@("l"|"r"), _>
=> hjk_type_IVb_audit1 x (y+n+l) ;
<S2, y + "e" + l@("l"|"r")>
=> hjk_type_IVb_audit x "i" ;
<S2, y + "e" + l@("l"|"r"), i>
=> hjk_type_IVb_audit x i ;
-- TODO: sometimes masked by 'link'
<S2, _ + #c>
=> hjk_type_IVb_audit x "i" ;
<S2, _ + #c, i>
=> hjk_type_IVb_audit x i ;
<S3, _ + #v>
<S3, _ + #v, _>
=> hjk_type_IVa_aasta x ;
-- verb + 'e'
<_, _ + "e">
<_, _ + "e", _>
=> hjk_type_VII_touge x ;
-- catch all that end with consonant
<_, _ + #c>
=> hjk_type_IVb_audit x "i" ;
<_, _ + #c, i>
=> hjk_type_IVb_audit x i ;
-- TODO: not in HJKEKS
<_, _ + ("ia"|"ja")> --kündja, not gerilja
<_, _ + ("ia"|"ja"), _> --kündja, not gerilja
=> hjk_type_IVa_aasta x ;
--added by Inari 07.10.
<S23, _ + #c + ("la")> --haigla, not gorilla
<S23, _ + #c + ("la"), _> --haigla, not gorilla
=> hjk_type_IVa_aasta x ;
-- catch all
<_, _>
<_, _, _>
=> hjk_type_III_ratsu x
} ;

View File

@@ -9,27 +9,27 @@ lin
alas_Interj = ss "paraku" ;
answer_V2S = mkV2 (mkV "vastama" "vastata") callative ;
apartment_N = mkN "korter" ;
apple_N = mkN "õun" ;
apple_N = mkN "õun" "õuna" "õuna";
art_N = mkN "kunst" ;
ask_V2Q = mkV2 (mkV "küsima") (casePrep ablative) ;
baby_N = mkN "beebi" ;
bad_A = mkA (mkN "halb" "halva" "halba" "halba" "halbade" "halbu") "halvem" "halvim" ;
bank_N = mkN "pank" ;
bank_N = mkN "pank" "panga" "panka";
beautiful_A = mkA (mkN "kaunis" "kauni" "kaunist" "kaunisse" "kaunite" "kauneid") ;
become_VA = mkVA (mkV "saama") ctranslative ; -- tema saab vanaks
beer_N = mkN "õlu" ;
beg_V2V = mkV2V (mkV "paluma") cpartitive ;
big_A = mkA (mkN "suur" "suure" "suurt" "suurde" "suurte" "suuri");
bike_N = mkN "ratas" ;
bird_N = mkN "lind" ;
bike_N = mkN "ratas" "ratta" "ratast" ;
bird_N = mkN "lind" "linnu" "lindu" ;
black_A = mkA (mkN "must" "musta" "musta") ;
blue_A = mkA "sinine" ;
boat_N = mkN "paat" ;
book_N = mkN "raamat" "raamatu" "raamatut" "raamatusse" "raamatute" "raamatuid" ;
boot_N = mkN "saabas" ;
boot_N = mkN "saabas" "saapa" "saabast" ;
boss_N = mkN "boss" ;
boy_N = mkN "poiss" ;
bread_N = mkN "leib" ;
bread_N = mkN "leib" "leiva" "leiba" ;
break_V2 = mkV2 (mkV "katki" (mkV "tegema" "teha")) ;
broad_A = mkA (mkN "lai" "laia" "laia" "laia" "laiade" "laiu") "laiem" "TODO" ;
brother_N2 = mkN2 (mkN "vend" "venna" "venda" "venda" "vendade" "vendi") ;
@@ -39,24 +39,24 @@ lin
camera_N = mkN "kaamera" ;
cap_N = mkN "müts" ;
car_N = mkN "auto" ;
carpet_N = mkN "vaip" ;
carpet_N = mkN "vaip" "vaiba" "vaipa" ;
cat_N = mkN "kass" ;
ceiling_N = mkN "lagi" ;
ceiling_N = mkN "lagi" "lae" "lage" "lakke" "lagede" "lagesid" ;
chair_N = mkN "tool" ;
cheese_N = mkN "juust" ;
cheese_N = mkN "juust" "juustu" "juustu" ;
child_N = mkN "laps" "lapse" "last" "lapsesse" "laste" "lapsi" ;
church_N = mkN "kirik" ;
city_N = mkN "linn" "linna" "linna" "linna" "linnade" "linnu" ;
clean_A = mkA (mkN "puhas" "puhta" "puhast" "puhtasse" "puhaste" "puhtaid");
clever_A = mkA "tark" ;
clever_A = mkA (mkN "tark" "targa" "tarka") ;
close_V2 = mkV2 (mkV "sulgema") ;
coat_N = mkN "mantel" ;
cold_A = mkA "külm" ;
cold_A = mkA (mkN "külm" "külma" "külma") ;
come_V = mkV "tulema" ;
computer_N = mkN "arvuti" ;
country_N = mkN "maa" ;
cousin_N = mkN "nõbu" ;
cow_N = mkN "lehm" ;
cow_N = mkN "lehm" "lehma" "lehma" ;
die_V = mkV "surema" ;
dirty_A = mkA "räpane" ;
-- Using 'terminative' for the destination works better with nouns.
@@ -65,71 +65,70 @@ lin
-- Unfortunately, we cannot use a similar trick for the source (*Põltsamaast vs Põltsamaalt).
distance_N3 = mkN3 (mkN "kaugus") celative (casePrep terminative) ;
doctor_N = mkN "arst" ;
dog_N = mkN "koer" ;
door_N = mkN "uks" ;
dog_N = mkN "koer" "koera" "koera" ;
door_N = mkN "uks" "ukse" "ust" ;
drink_V2 = mkV2 (mkV "jooma") cpartitive ;
easy_A2V = mkA2 (mkA (mkN "lihtne")) callative ;
easy_A2V = mkA2 (mkA (mkN "lihtne" "lihtsa" "lihtsat")) callative ;
eat_V2 = mkV2 (mkV "sööma") cpartitive ;
empty_A = mkA "tühi" ;
empty_A = mkA (mkN "tühi" "tühja" "tühja" "tühja" "tühjade" "tühje") ;
enemy_N = mkN "vaenlane" ;
factory_N = mkN "tehas" ;
father_N2 = mkN2 (mkN "isa") ;
fear_VS = mkVS (mk2V "kartma" "karta") ;
find_V2 = mkV2 (mk2V "leidma" "leida") ;
fish_N = mkN "kala" ;
floor_N = mkN "põrand" ;
floor_N = mkN "põrand" "põranda" "põrandat" ;
forget_V2 = mkV2 (mkV "unustama") ;
fridge_N = mkN "külm" (mkN "kapp") ;
friend_N = mkN "sõber" ;
fruit_N = mkN "puu" (mkN "vili") ;
fun_AV = mkAV (mkA (mkN "lõbus")) ;
garden_N = mkN "aed" ;
girl_N = mkN "tüdruk" ;
glove_N = mkN "kinnas" ;
gold_N = mkN "kuld" ;
friend_N = mkN "sõber" "sõbra" "sõpra" "sõpra" "sõprade" "sõpru" ;
fruit_N = mkN "puu" (mkN "vili" "vilja" "vilja") ;
fun_AV = mkAV (mkA (mkN "lõbus" "lõbusa" "lõbusat")) ;
garden_N = mkN "aed" "aia" "aeda";
girl_N = mkN "tüdruk" "tüdruku" "tüdrukut" ;
glove_N = mkN "kinnas" "kinda" "kinnast" ;
gold_N = mkN "kuld" "kulla" "kulda" ;
good_A = mkA (mkN "hea") "parem" "parim" ;
go_V = mkV "minema" "minna" "läheb" "minnakse"
"minge" "läks" "läinud" "mindud" ;
go_V = vMinema ; -- irregular
green_A = mkA "roheline" ;
harbour_N = mkN "sadam" ;
hate_V2 = mkV2 (mkV "vihkama") cpartitive ;
harbour_N = mkN "sadam" "sadama" "sadamat" ;
hate_V2 = mkV2 (mkV "vihkama" "vihata") cpartitive ;
hat_N = mkN "müts" ;
hear_V2 = mkV2 (mkV "kuulma") ;
hill_N = mkN "küngas" "künka" "küngast" "künkasse" "küngaste" "künkaid" ;
hope_VS = mkVS (mkV "lootma") ;
horse_N = mkN "hobune" ;
hot_A = mkA "kuum" ;
hot_A = mkA (mkN "kuum" "kuuma" "kuuma") ;
house_N = mkN "maja" ;
important_A = mkA "tähtis" ;
important_A = mkA (mkN "tähtis" "tähtsa" "tähtsat") ;
industry_N = mkN "tööstus" ;
iron_N = mkN "raud" ;
iron_N = mkN "raud" "raua" "rauda" ;
king_N = mkN "kuningas" ;
know_VS = mkVS know_V ;
know_VQ = mkVQ know_V ;
know_V2 = mkV2 (mkV "tundma") ;
lake_N = mkN "järv" ;
lake_N = mkN "järv" "järve" "järve" ;
lamp_N = mkN "lamp" ;
learn_V2 = mkV2 (mkV "õppima") ;
leather_N = mkN "nahk" ;
leather_N = skin_N ;
leave_V2 = mkV2 (mkV "jätma") ;
like_V2 = mkV2 (mkV "lugu" (mkV "pidama" "pidada" "peab" "peetakse")) celative ;
listen_V2 = mkV2 (mkV "kuulama") partitive ;
listen_V2 = mkV2 (mkV "kuulama" "kuulata") partitive ;
live_V = mkV "elama" ;
long_A = mkA "pikk" ;
long_A = mkA (mkN "pikk" "pika" "pikka") ;
lose_V2 = mkV2 (mkV "kaotama") ;
love_N = mkN "armastus" ;
love_V2 = mkV2 (mkV "armastama") partitive ;
man_N = mkN "mees" "mehe" "meest" "mehesse" "meeste" "mehi" ;
married_A2 = mkA2 (mkA "abielus") (casePrep comitative) ;
meat_N = mkN "liha" ;
milk_N = mkN "piim" ;
milk_N = mkN "piim" "piima" "piima" ;
moon_N = mkN "kuu" ;
mother_N2 = mkN2 (mkN "ema") ;
mountain_N = mkN "mägi" "mäe" "mäge" "mäkke" "mägede" "mägesid" ;
music_N = mkN "muusika" ;
narrow_A = mkA "kitsas" ;
new_A = mkA (mkN "uus" "uue" "uut" "uude" "uute" "uusi") "uuem" "uusim" ;
newspaper_N = mkN "aja" (mkN "leht") ;
newspaper_N = mkN "aja" (mkN "leht" "lehe" "lehte") ;
oil_N = mkN "õli" ;
old_A = mkA (mkN "vana" "vana" "vana" "vanasse" "vanade" "vanasid") "vanem" "vanim" ;
open_V2 = mkV2 (mkV "avama") ;
@@ -149,9 +148,9 @@ lin
rain_V0 = mkV0 (mkV "sadama" "sadada" "sajab") ;
read_V2 = mkV2 (mkV "lugema" "lugeda" "loeb") ;
red_A = mkA "punane" ;
religion_N = mkN "usk" ;
religion_N = mkN "usk" "usu" "usku" ;
restaurant_N = mkN "restoran" ;
river_N = mkN "jõgi" ;
river_N = mkN "jõgi" "jõe" "jõge" ;
rock_N = mkN "kalju" ;
roof_N = mkN "katus" ;
rubber_N = mkN "kumm" ;
@@ -159,55 +158,55 @@ lin
say_VS = mkVS (mkV "ütlema") ;
school_N = mkN "kool" ;
science_N = mkN "teadus" ;
sea_N = mkN "meri" ;
sea_N = mkN "meri" "mere" "merd" ;
seek_V2 = mkV2 (mkV "otsima") cpartitive ;
see_V2 = mkV2 (mkV "nägema" "näha") ;
sell_V3 = mkV3 (mkV "müüma") accPrep callative ;
send_V3 = mkV3 (mkV "saatma") accPrep callative ;
sheep_N = mkN "lammas" ;
ship_N = mkN "laev" ;
sheep_N = mkN "lammas" "lamba" "lammast" ;
ship_N = mkN "laev" "laeva" "laeva" ;
shirt_N = mkN "särk" ;
shoe_N = mkN "king" ;
shoe_N = mkN "king" "kinga" "kinga" ;
shop_N = mkN "kauplus" ;
short_A = mkA "lühike" ;
silver_N = mkN "hõbe" ;
silver_N = mkN "hõbe" "hõbeda" "hõbedat" ;
sister_N = mkN "õde" "õe" "õde" "õesse" "õdede" "õdesid" ;
sleep_V = mkV "magama" ;
small_A = mkA (mkN "väike" "väikse" "väikest" "väiksesse" "väikeste" "väikseid") ;
snake_N = mkN "uss" ;
sock_N = mkN "sukk" ;
sock_N = mkN "sukk" "suka" "sukka" ;
speak_V2 = mkV2 talk_V cpartitive ;
star_N = mkN "täht" ;
star_N = mkN "täht" "tähe" "tähte" ;
steel_N = mkN "teras" ;
stone_N = mkN "kivi" ;
stove_N = mkN "ahi" ;
stove_N = mkN "ahi" "ahju" "ahju" ;
student_N = mkN "tudeng" ;
stupid_A = mkA "loll" ;
sun_N = mkN "päike" ;
sun_N = mkN "päike" "päikse" "päikest" ;
switch8off_V2 = mkV2 (mkV "välja" (mkV "lülitama")) ;
switch8on_V2 = mkV2 (mkV "sisse" (mkV "lülitama")) ;
table_N = mkN "laud" ;
table_N = mkN "laud" "laua" "lauda" ;
talk_V3 = mkV3 talk_V callative celative ;
teacher_N = mkN "õpetaja" ;
teach_V2 = mkV2 (mkV "õpetama") ;
television_N = mkN "televisioon" ;
thick_A = mkA "paks" ;
thick_A = mkA (mkN "paks" "paksu" "paksu") ;
thin_A = mkA "õhuke" ;
train_N = mkN "rong" ;
travel_V = mkV "reisima" ;
tree_N = mkN "puu" ;
ugly_A = mkA "kole" ;
ugly_A = mkA (mkN "kole" "koleda" "koledat") ;
understand_V2 = mkV2 (mkV "aru" (mkV "saama")) celative ;
university_N = mkN "ülikool" ;
village_N = mkN "küla" ;
wait_V2 = mkV2 (mkV "ootama") partitive ;
wait_V2 = mkV2 (mkV "ootama" "oodata") partitive ;
walk_V = mkV "kõndima" ;
warm_A = mkA (mkN "soe" "sooja" "sooja" "sooja" "soojade" "soojasid") "soojem" "soojim" ;
war_N = mkN "sõda" ;
watch_V2 = mkV2 (mkV "vaatama") cpartitive ;
water_N = mkN "vesi" ;
war_N = mkN "sõda" "sõja" "sõda" ;
watch_V2 = mkV2 (mkV "vaatama" "vaadata") cpartitive ;
water_N = mkN "vesi" "vee" "vett" ;
white_A = mkA "valge" ;
window_N = mkN "aken" ;
window_N = mkN "aken" "akna" "akent" ;
wine_N = mkN "vein" ;
win_V2 = mkV2 (mkV "võitma") ;
woman_N = mkN "naine" "naise" "naist" "naisesse" "naiste" "naisi" ;
@@ -215,95 +214,95 @@ lin
wood_N = mkN "puu" ;
write_V2 = mkV2 (mkV "kirjutama") ;
yellow_A = mkA "kollane" ;
young_A = mkA "noor" ;
young_A = mkA (mkN "noor" "noore" "noort") ;
do_V2 = mkV2 (mkV "tegema" "teha") ;
now_Adv = mkAdv "nüüd" ;
already_Adv = mkAdv "juba" ;
song_N = mkN "laul" ;
song_N = mkN "laul" "laulu" "laulu" ;
add_V3 = mkV3 (mkV "lisama") accPrep callative ;
number_N = mkN "number" ;
put_V2 = mkV2 (mkV "panema") ;
stop_V = mkV "peatuma" ;
jump_V = mkV "hüppama" ;
jump_V = mkV "hüppama" "hüpata" ;
left_Ord = mkOrd1 (mkN "vasak") ;
right_Ord = mkOrd1 (mkN "parem") ;
right_Ord = mkOrd1 (mkN "parem" "parema" "paremat") ;
far_Adv = mkAdv "kaugel" ;
correct_A = mkA "õige" ;
dry_A = mkA "kuiv" ;
correct_A = mkA (mkN "õige" "õige" "õiget" "õigesse" "õigete" "õigeid") ;
dry_A = mkA (mkN "kuiv" "kuiva" "kuiva") ;
dull_A = mkA "igav" ;
full_A = mkA (mkN "täis") "täiem" "täiim" Invariable ; -- 'täis' is one of the non-inflecting adjectives
full_A = mkA (mkN "täis" "täie" "täit") "täiem" "täiim" Invariable ; -- 'täis' is one of the non-inflecting adjectives
heavy_A = mkA "raske" ;
near_A = mkA "lähedane" ;
rotten_A = mkA "mäda" ;
round_A = mkA "ümmargune" ;
sharp_A = mkA "terav" ;
smooth_A = mkA "sile" ;
smooth_A = mkA (mkN "sile" "sileda" "siledat" "siledasse" "siledate" "siledaid") ;
straight_A = mkA "sirge" ;
wet_A = mkA "märg" ;
wide_A = mkA "lai" ;
animal_N = mkN "loom" ;
ashes_N = mkN "tuhk" ;
back_N = mkN "selg" ;
bark_N = mkN "koor" ;
belly_N = mkN "kõht" ;
blood_N = mkN "veri" ;
wet_A = mkA (mkN "märg" "märja" "märga") ;
wide_A = mkA (mkN "lai" "laia" "laia") ;
animal_N = mkN "loom" "looma" "looma" ;
ashes_N = mkN "tuhk" "tuha" "tuhka" ;
back_N = mkN "selg" "selja" "selga" ;
bark_N = mkN "koor" "koore" "koort" ;
belly_N = mkN "kõht" "kõhu" "kõhtu" ;
blood_N = mkN "veri" "vere" "verd" "verre" "verede" "veresid" ;
bone_N = mkN "luu" ;
breast_N = mkN "rind" ;
cloud_N = mkN "pilv" ;
day_N = mkN "päev" ;
dust_N = mkN "tolm" ;
ear_N = mkN "kõrv" ;
breast_N = mkN "rind" "rinna" "rinda" ;
cloud_N = mkN "pilv" "pilve" "pilve" ;
day_N = mkN "päev" "päeva" "päeva" ;
dust_N = mkN "tolm" "tolmu" "tolmu" ;
ear_N = mkN "kõrv" "kõrva" "kõrva" ;
earth_N = mkN "maa" ;
egg_N = mkN "muna" ;
eye_N = mkN "silm" ;
fat_N = mkN "rasv" ;
feather_N = mkN "sulg" ;
fingernail_N = mkN "küüs" ;
fire_N = mkN "tuli" ;
flower_N = mkN "lill" ;
eye_N = mkN "silm" "silma" "silma" ;
fat_N = mkN "rasv" "rasva" "rasva" ;
feather_N = mkN "sulg" "sule" "sulge" ;
fingernail_N = mkN "küüs" "küüne" "küünt" ;
fire_N = mkN "tuli" "tule" "tuld" "tulle" "tulede" "tulesid" ;
flower_N = mkN "lill" "lille" "lille" ;
fog_N = mkN "udu" ;
foot_N = mkN "jalg" ;
forest_N = mkN "mets" ;
grass_N = mkN "rohi" ;
foot_N = mkN "jalg" "jala" "jalga" ;
forest_N = mkN "mets" "metsa" "metsa" ;
grass_N = mkN "rohi" "rohu" "rohtu" ;
guts_N = mkN "soolestik" ;
hair_N = mkN "juuksed" ; -- TODO: plural
hand_N = mkN "käsi" ;
hair_N = mkN "juuksed" "juuste" "juukseid" "juustesse" "juuste" "juukseid" ; -- TODO: plural
hand_N = mkN "käsi" "käe" "kätt" ;
head_N = mkN "pea" ;
heart_N = mkN "süda" ;
horn_N = mkN "sarv" ;
heart_N = mkN "süda" "südame" "südant" ;
horn_N = mkN "sarv" "sarve" "sarve" ;
husband_N = man_N ;
ice_N = mkN "jää" ;
knee_N = mkN "põlv" ;
leaf_N = mkN "leht" ;
leg_N = mkN "jalg" ;
liver_N = mkN "maks" ;
knee_N = mkN "põlv" "põlve" "põlve" ;
leaf_N = mkN "leht" "lehe" "lehte" ;
leg_N = mkN "jalg" "jala" "jalga" ;
liver_N = mkN "maks" "maksa" "maksa" ;
louse_N = mkN "täi" ;
mouth_N = mkN "suu" ;
name_N = mkN "nimi" ;
neck_N = mkN "kael" ;
name_N = mkN "nimi" "nime" "nime" ;
neck_N = mkN "kael" "kaela" "kaela" ;
night_N = mkN "öö" ;
nose_N = mkN "nina" ;
person_N = mkN "inimene" ;
rain_N = mkN "vihm" ;
rain_N = mkN "vihm" "vihma" "vihma" ;
road_N = mkN "tee" ;
root_N = mkN "juur" ;
rope_N = mkN "köis" ;
salt_N = mkN "sool" ;
sand_N = mkN "liiv" ;
seed_N = mkN "seeme" ;
skin_N = mkN "nahk" ;
sky_N = mkN "taevas" ;
smoke_N = mkN "suits" ;
snow_N = mkN "lumi" ;
root_N = mkN "juur" "juure" "juurt" ;
rope_N = mkN "köis" "köie" "köit" ;
salt_N = mkN "sool" "soola" "soola" ;
sand_N = mkN "liiv" "liiva" "liiva" ;
seed_N = mkN "seeme" "seemne" "seemet" ;
skin_N = mkN "nahk" "naha" "nahka" ;
sky_N = mkN "taevas" "taeva" "taevast" ;
smoke_N = mkN "suits" "suitsu" "suitsu" ;
snow_N = mkN "lumi" "lume" "lund" ;
stick_N = mkN "kepp" ;
tail_N = mkN "saba" ;
tongue_N = mkN "keel" ;
tooth_N = mkN "hammas" ;
tongue_N = mkN "keel" "keele" "keelt" ;
tooth_N = mkN "hammas" "hamba" "hammast" ;
wife_N = mkN "naine" ;
wind_N = mkN "tuul" ;
wing_N = mkN "tiib" ;
wind_N = mkN "tuul" "tuule" "tuult" ;
wing_N = mkN "tiib" "tiiva" "tiiba" ;
worm_N = mkN "uss" ;
year_N = mkN "aasta" ;
bite_V2 = mkV2 (mkV "purema") ;
@@ -311,13 +310,13 @@ lin
burn_V = mkV "põlema" ;
count_V2 = mkV2 (mkV "loendama") ;
cut_V2 = mkV2 (mk2V "lõikama" "lõigata") ;
dig_V = mkV "kaevama" ;
dig_V = mkV "kaevama" "kaevata" ;
fall_V = mkV "kukkuma" ;
fear_V2 = mkV2 (mkV "kartma") cpartitive ;
fight_V2 = mkV2 (mkV "võitlema") (postPrep partitive "vastu") ;
float_V = mkV "liuglema" ;
flow_V = mkV "voolama" ;
fly_V = mkV "lendama" ;
flow_V = mkV "voolama" "voolata" ;
fly_V = mkV "lendama" "lennata" ;
freeze_V = mkV "jäätuma" ;
give_V3 = mkV3 (mkV "andma") accPrep callative ;
hit_V2 = mkV2 (mkV "lööma") cpartitive ;
@@ -327,11 +326,11 @@ lin
laugh_V = mkV "naerma" ;
lie_V = mkV "lamama" ; -- TODO: maybe: valetama
play_V = mkV "mängima" ;
pull_V2 = mkV2 (mkV "tõmbama") ;
pull_V2 = mkV2 (mkV "tõmbama" "tõmmata") ;
push_V2 = mkV2 (mkV "suruma") ;
rub_V2 = mkV2 (mkV "hõõruma") cpartitive ;
scratch_V2 = mkV2 (mkV "kraapima") cpartitive ;
sew_V = mkV "külvama" ;
sew_V = mkV "külvama" "külvata" ;
sing_V = mkV "laulma" "laulda" "laulab" "lauldakse" ;
sit_V = mkV "istuma" ;
smell_V = mk2V "haistma" "haista";
@@ -344,9 +343,9 @@ lin
swell_V = mkV "paistetama" ; -- TODO: üles paistetama
swim_V = mkV "ujuma" ;
think_V = mkV "mõtlema" "mõtelda" "mõtleb" "mõeldakse";
throw_V2 = mkV2 (mkV "viskama") ;
throw_V2 = mkV2 (mkV "viskama" "visata") ;
tie_V2 = mkV2 (mkV "siduma" "siduda" "seob") ;
turn_V = mkV "pöörama" ;
turn_V = mkV "pöörama" "pöörata" ;
vomit_V = mkV "oksendama" ;
wash_V2 = mkV2 (mkV "pesema") ;
wipe_V2 = mkV2 (mkV "pühkima") ;
@@ -354,7 +353,7 @@ lin
breathe_V = mkV "hingama" ;
grammar_N = mkN "grammatika" ;
language_N = mkN "keel" ;
language_N = tongue_N ;
rule_N = mkN "reegel" ;
john_PN = mkPN "Juhan" ;
@@ -362,7 +361,7 @@ lin
ready_A = mkA (mkN "valmis" "valmi" "valmit" "valmisse" "valmite" "valmeid") "valmim" "valmeim" Invariable ; -- 'valmis' is one of the non-inflecting adjectives
reason_N = mkN "põhjus" ;
today_Adv = mkAdv "täna" ;
uncertain_A = mkA "ebakindel" ;
uncertain_A = mkA (mkN "ebakindel" "ebakindla" "ebakindlat") ;
oper

View File

@@ -16,8 +16,12 @@
--
-- The structure of functions for each word class $C$ is the following:
-- there is a polymorphic constructor $mkC$, which takes one or
-- a few arguments. In Estonian, one argument is enough in ??? % of
-- a few arguments. In Estonian, one argument is enough in 90% of
-- cases in average.
--
-- @author Inari Listenmaa
-- @author Kaarel Kaljurand
-- @version 2013-10-21
resource ParadigmsEst = open
(Predef=Predef),
@@ -59,7 +63,7 @@ oper
comitative : Case ; -- e.g. "karbiga"
infDa : InfForm ; -- e.g. "lugeda"
infDes : InfForm ;
infDes : InfForm ; -- e.g. "lugedes"
infMa : InfForm ; -- e.g. "lugema"
infMas : InfForm ; -- e.g. "lugemas"
infMaks : InfForm ; -- e.g. "lugemaks"
@@ -70,49 +74,30 @@ oper
-- of many-place verbs and adjective. A complement can be defined by
-- just a case, or a pre/postposition and a case.
prePrep : Case -> Str -> Prep ; -- preposition, e.g. comitative "koos"
prePrep : Case -> Str -> Prep ; -- preposition, e.g. abessive "ilma"
postPrep : Case -> Str -> Prep ; -- postposition, e.g. genitive "taga"
postGenPrep : Str -> Prep ; -- genitive postposition, e.g. "taga"
casePrep : Case -> Prep ; -- just case, e.g. adessive
-- TODO build the dict
NW : Type ; -- Noun from DictEst (WordNet)
AW : Type ; -- Adjective from DictEst (WordNet)
VW : Type ; -- Verb from DictEst (WordNet)
AdvW : Type ; -- Adverb from DictEst (WordNet)
--2 Nouns
-- The worst case gives six forms.
-- In practice just a couple of forms are needed to define the different
-- stems, vowel alternation, and vowel harmony.
oper
-- The regular noun heuristic takes just one form (singular
-- nominative) and analyses it to pick the correct paradigm.
-- It does automatic grade alternation, and is hence not usable
-- for words like "auto" (whose genitive would become "audon").
--
-- If the one-argument paradigm does not give the correct result, one can try and give
-- two or three forms. Most notably, the two-argument variant is used
-- for nouns like "kivi - kiviä", which would otherwise become like
-- "rivi - rivejä". Three arguments are used e.g. for
-- "auto - auton - autoja", which would otherwise become
-- "auto - audon".
-- If the 1-argument paradigm does not give the correct result,
-- one can try and give 2, 3, 4, or 6 forms.
mkN : overload {
mkN : (kukko : Str) -> N ; -- predictable nouns, covers 82%
mkN : (savi,savia : Str) -> N ; -- different pl.part
mkN : (vesi,veden,vesia : Str) -> N ; -- also different sg.gen
mkN : (ema : Str) -> N ; -- predictable nouns, covers 90%
mkN : (tukk,tuku : Str) -> N ; -- sg nom,gen: unpredictable stem vowel
mkN : (tukk,tuku,tukku : Str) -> N ; -- sg nom,gen,part
mkN : (pank,panga,panka,panku : Str) -> N ; -- sg nom,gen,part, pl.part
-- mkN : (olo,n,a,na,oon,jen,ja,ina,issa,ihin : Str) -> N ; -- worst case, 10 forms
mkN : (oun,ouna,ouna,ounasse,ounte,ounu : Str) -> N ; -- worst case, 6 forms
mkN : (oun,ouna,ouna,ounasse,ounte,ounu,ountesse : Str) -> N ; -- worst case, 7 forms
mkN : (pika : Str) -> (juna : N) -> N ; -- compound with invariable prefix
mkN : (oma : N) -> (tunto : N) -> N ; -- compound with inflecting prefix
mkN : NW -> N ; -- noun from DictEst (WordNet)
-- mkN : (pika : Str) -> (juna : N) -> N ; -- compound with invariable prefix
-- mkN : (oma : N) -> (tunto : N) -> N ; -- compound with inflecting prefix
} ;
-- Nouns used as functions need a case, of which the default is
@@ -141,22 +126,20 @@ oper
-- The comparative and the superlative
-- are always inflected in the same way, so the nominative of them is actually
-- enough (TODO: confirm).
-- TODO: update these types to include the new boolean non-inflection marker
mkA : overload {
mkA : Str -> A ; -- regular noun made into adjective
mkA : N -> A ; -- any noun made into adjective
mkA : N -> (infl : Bool) -> A ; -- noun made into adjective, agreement type specified
mkA : N -> (infl : Infl) -> A ; -- noun made into adjective, agreement type specified
mkA : N -> (parem, parim : Str) -> A ; -- deviating comparison forms
mkA : AW -> A ; -- adjective from DictEst (WordNet)
} ;
-- Two-place adjectives need a case for the second argument.
mkA2 : A -> Prep -> A2 -- e.g. "jaollinen" casePrep adessive
mkA2 : A -> Prep -> A2 -- e.g. "vihane" (postGenPrep "peale")
= \a,p -> a ** {c2 = p ; lock_A2 = <>};
genAttrA : Str -> A ; -- genitive attributes ; no agreement to head, no comparison forms.
invA : Str -> A ; -- invariable adjectives, such as genitive attributes ; no agreement to head, no comparison forms.
--2 Verbs
--
@@ -166,19 +149,18 @@ oper
-- The worst case needs eight forms, as shown in the following.
mkV : overload {
mkV : (lugema : Str) -> V ; -- predictable verbs, covers n %
mkV : (lugema,lugeda : Str) -> V ; -- deviating past 3sg
mkV : (lugema,loeb,lugeda : Str) -> V ; -- also deviating pres. 1sg
mkV : (lugema,lugeda,loeb,loetakse : Str) -> V ;
mkV : (tegema,teha,teeb,tehakse,tehke,tegi,teinud,tehtud : Str) -> V ; -- worst-case verb
mkV : (saama : V) -> (aru : Str) -> V ; -- püsiühendid TODO
mkV : VW -> V ; -- verb from DictEst (WordNet)
mkV : (lugema : Str) -> V ; -- predictable verbs, covers 90 %
mkV : (lugema,lugeda : Str) -> V ; -- ma infinitive, da infinitive
mkV : (lugema,lugeda,loeb : Str) -> V ; -- ma, da, present sg 3
mkV : (lugema,lugeda,loeb,loetakse : Str) -> V ; --ma, da, pres sg 3, pres passive
mkV : (tegema,teha,teeb,tehakse,tehke,tegi,teinud,tehtud : Str) -> V ; -- worst-case verb, 8 forms
mkV : (saama : V) -> (aru : Str) -> V ; -- multi-word verbs
} ;
-- All the patterns above have $nominative$ as subject case.
-- If another case is wanted, use the following.
caseV : Case -> V -> V ; -- deviating subj. case, e.g. genitive "täytyä"
caseV : Case -> V -> V ; -- deviating subj. case, e.g. allative "meeldima"
-- The verbs "be" and "go" are special.
@@ -206,9 +188,9 @@ oper
-- Three-place (ditransitive) verbs need two prepositions, of which
-- the first one or both can be absent.
mkV3 : V -> Prep -> Prep -> V3 ; -- e.g. puhua, allative, elative
dirV3 : V -> Case -> V3 ; -- siirtää, (accusative), illative
dirdirV3 : V -> V3 ; -- antaa, (accusative), (allative)
mkV3 : V -> Prep -> Prep -> V3 ; -- e.g. rääkima, allative, elative
dirV3 : V -> Case -> V3 ; -- liigutama, (accusative), illative
dirdirV3 : V -> V3 ; -- andma, (accusative), (allative)
--3 Other complement patterns
@@ -218,15 +200,15 @@ oper
mkV0 : V -> V0 ; --%
mkVS : V -> VS ;
mkV2S : V -> Prep -> V2S ; -- e.g. "sanoa" allative
mkVV : V -> VV ; -- e.g. "alkaa"
mkV2S : V -> Prep -> V2S ; -- e.g. "ütlema" allative
mkVV : V -> VV ; -- e.g. "hakkama"
mkVVf : V -> InfForm -> VV ; -- e.g. "hakkama" infMa
mkV2V : V -> Prep -> V2V ; -- e.g. "käskeä" genitive
mkV2Vf : V -> Prep -> InfForm -> V2V ; -- e.g. "kieltää" partitive infMast
mkVA : V -> Prep -> VA ; -- e.g. "maistua" ablative
mkV2A : V -> Prep -> Prep -> V2A ; -- e.g. "maalata" accusative translative
mkV2V : V -> Prep -> V2V ; -- e.g. "käskima" adessive
mkV2Vf : V -> Prep -> InfForm -> V2V ; -- e.g. "keelama" partitive infMast
mkVA : V -> Prep -> VA ; -- e.g. "muutuma" translative
mkV2A : V -> Prep -> Prep -> V2A ; -- e.g. "värvima" genitive translative
mkVQ : V -> VQ ;
mkV2Q : V -> Prep -> V2Q ; -- e.g. "kysyä" ablative
mkV2Q : V -> Prep -> V2Q ; -- e.g. "küsima" ablative
mkAS : A -> AS ; --%
mkA2S : A -> Prep -> A2S ; --%
@@ -279,22 +261,16 @@ oper
\c -> {c = NPCase c ; s = [] ; isPre = True ; lock_Prep = <>} ;
accPrep = {c = NPAcc ; s = [] ; isPre = True ; lock_Prep = <>} ;
NW = {s : NForms ; lock_NW : {}} ;
AW = {s : NForms ; lock_AW : {}} ;
VW = {s : VForms ; lock_VW : {}} ;
AdvW = {s : Str ; lock_AdvW : {}} ;
mkN = overload {
mkN : (nisu : Str) -> N = mk1N ;
mkN : (link,lingi : Str) -> N = mk2N ;
mkN : (tukk,tuku,tukku : Str) -> N = mk3N ;
mkN : (paat,paadi,paati,paatide : Str) -> N = mk4N ;
mkN : (paat,paadi,paati,paate : Str) -> N = mk4N ;
mkN : (oun,ouna,ouna,ounasse,ounte,ounu : Str) -> N = mk6N ;
mkN : (sora : Str) -> (tie : N) -> N = mkStrN ;
mkN : (oma,tunto : N) -> N = mkNN ;
mkN : (sana : NW) -> N = \w -> nForms2N w.s ;
} ;
-- Adjective forms (incl. comp and sup) are derived from noun forms
@@ -309,6 +285,8 @@ oper
mk1N : (link : Str) -> N = \s -> nForms2N (hjk_type s) ** {lock_N = <> } ;
-- mk2N, mk3N, mk4N make sure that the user specified forms end up in the paradigm,
-- even though the rest is wrong
mk2N : (link,lingi : Str) -> N = \link,lingi ->
let nfs : NForms = (nForms2 link lingi) ;
nfs_fixed : NForms = table {
@@ -334,24 +312,16 @@ oper
} ;
in nForms2N nfs_fixed ** {lock_N = <> } ;
{- mk1N : (link : Str) -> N = \s -> nForms2N (hjk_type s) ** {lock_N = <> } ;
mk2N : (link,lingi : Str) -> N = \s,t -> nForms2N (nForms2 s t) ** {lock_N = <>} ;
mk3N : (tukk,tuku,tukku : Str) -> N = \s,t,u -> nForms2N (nForms3 s t u) ** {lock_N = <>} ;
--regular mk4N
mk4N : (paat,paadi,paati,paate : Str) -> N = \s,t,u,v -> nForms2N (nForms4 s t u v) ** {lock_N = <>} ;
-}
--experimental: make sure that the user specified forms end up in the paradigm, even though the rest is wrong
--this is using pl part
mk4N : (paat,paadi,paati,paatide : Str) -> N = \paat,paadi,paati,paate ->
let nfs : NForms = (nForms4 paat paadi paati paate) ;
mk4N : (paat,paadi,paati,paate : Str) -> N = \paat,paadi,paati,paate ->
let nfs : NForms = (nForms4 paat paadi paati paate) ;
nfs_fixed : NForms = table {
0 => paat ;
1 => paadi ;
2 => paati ;
3 => nfs ! 3 ;
4 => nfs ! 4 ;
5 => paate
5 => paate
} ;
in nForms2N nfs_fixed ** {lock_N = <> } ;
@@ -403,7 +373,7 @@ oper
--heuristics to catch palk:palga but not maakas:maaka (for longer words, same with more ?s)
--didn't work, don't try this
--<? + ? + #c, ? + ? + #c + #v> => hjk_type_IVb_audit link i ;
_ => hjk_type link
_ => hjk_type2 link i
} ;
nForms3 : (_,_,_ : Str) -> NForms = \tukk,tuku,tukku ->
@@ -422,13 +392,12 @@ oper
<_ + "ik", _ + "iku", _ + "ikku"> => hjk_type_VI_imelik tukk ; --imelik:_:imelikku caught here
<_ + #c, _ + #v, _ + #v> => hjk_type_VI_tukk tukk tuku ;
<_ + "ud", _ + "u", _ + "ut"> => nForms2 tukk tuku ; -- -nud/-tud participles are not like 'voolik'
<_ + #c, _ + #v, _ + #v + "t"> => hjk_type_IVb_audit tukk u ; --voolik:_:voolikut caught here
_ => nForms2 tukk tuku
} ;
nForms4 : (_,_,_,_ : Str) -> NForms = \paat,paadi,paati,paate ->
nForms4 : (_,_,_,_ : Str) -> NForms = \paat,paadi,paati,paate ->
case <paat,paadi,paati,paate> of {
-- distinguish between joonis and segadus
<_ +("ne"|"s"), _+"se", _+"st", _+"seid"> => hjk_type_Va_otsene paat ;
@@ -442,6 +411,7 @@ oper
_ => nForms3 paat paadi paati
} ;
{-
--Version that uses pl gen instead of pl part
nForms4 : (_,_,_,_ : Str) -> NForms = \paat,paadi,paati,paatide ->
@@ -468,7 +438,7 @@ oper
mmkN2 : N -> Prep -> N2 = \n,c -> n ** {c2 = c ; isPre = mkIsPre c ; lock_N2 = <>} ;
mkN3 = \n,c,e -> n ** {c2 = c ; c3 = e ;
isPre = mkIsPre c ; -- matka Lontoosta Pariisiin
isPre = mkIsPre c ; -- matka Londonist Pariisi
isPre2 = mkIsPre e ; -- Suomen voitto Ruotsista
lock_N3 = <>
} ;
@@ -495,10 +465,9 @@ oper
-- TODO: temporary usage of regAdjective1
mkA : N -> (valmim,valmeim : Str) -> (infl : Infl) -> A =
\n,c,s,infl -> (regAdjective1 n c s) ** {infl = infl ; lock_A = <>} ;
mkA : (sana : AW) -> A = \w -> noun2adjDeg (nForms2N w.s) ** {infl = Regular} ;
} ;
genAttrA balti = {s = \\_,_ => balti ; infl = Invariable ; lock_A = <>} ;
invA balti = {s = \\_,_ => balti ; infl = Invariable ; lock_A = <>} ;
mkA_1 : Str -> A = \x -> noun2adjDeg (mk1N x) ** {infl = Regular ; lock_A = <>} ;
@@ -550,7 +519,6 @@ oper
mkV : (lugema,lugeda,loeb,loetakse : Str) -> V = mk4V ;
mkV : (tegema,teha,teeb,tehakse,tehke,tegi,teinud,tehtud : Str) -> V = mk8V ;
mkV : (aru : Str) -> (saama : V) -> V = mkPV ; -- particle verbs
mkV : (sana : VW) -> V = \w -> vforms2V w.s ** {sc = NPCase Nom ; lock_V = <>} ;
} ;
mk1V : Str -> V = \s ->
@@ -742,7 +710,6 @@ oper
mkAdv = overload {
mkAdv : Str -> Adv = \s -> {s = s ; lock_Adv = <>} ;
mkAdv : AdvW -> Adv = \s -> {s = s.s ; lock_Adv = <>} ;
} ;
mkV2 = overload {