From 330a2ae094e6208bf13449f55ffe010844ae2a39 Mon Sep 17 00:00:00 2001 From: aarne Date: Thu, 14 Mar 2013 20:55:59 +0000 Subject: [PATCH] Stemmed Finnish for N and A. Cuts down the number of forms stored in the lexicon to 30%, requiring more often -bind in linearization. Stored as alternative files in finnish/stemmed; perhaps to be functorized later. --- lib/src/finnish/StemFin.gf | 95 ---- lib/src/finnish/stemmed/AdjectiveFin.gf | 53 ++ lib/src/finnish/stemmed/AdverbFin.gf | 23 + lib/src/finnish/stemmed/CatFin.gf | 101 ++++ lib/src/finnish/stemmed/LangFin.gf | 11 + lib/src/finnish/stemmed/LexiconFin.gf | 388 +++++++++++++ lib/src/finnish/stemmed/NounFin.gf | 259 +++++++++ lib/src/finnish/stemmed/NumeralFin.gf | 184 +++++++ lib/src/finnish/stemmed/ParadigmsFin.gf | 669 +++++++++++++++++++++++ lib/src/finnish/stemmed/StemFin.gf | 132 +++++ lib/src/finnish/stemmed/StructuralFin.gf | 315 +++++++++++ lib/src/finnish/stemmed/VerbFin.gf | 145 +++++ 12 files changed, 2280 insertions(+), 95 deletions(-) delete mode 100644 lib/src/finnish/StemFin.gf create mode 100644 lib/src/finnish/stemmed/AdjectiveFin.gf create mode 100644 lib/src/finnish/stemmed/AdverbFin.gf create mode 100644 lib/src/finnish/stemmed/CatFin.gf create mode 100644 lib/src/finnish/stemmed/LangFin.gf create mode 100644 lib/src/finnish/stemmed/LexiconFin.gf create mode 100644 lib/src/finnish/stemmed/NounFin.gf create mode 100644 lib/src/finnish/stemmed/NumeralFin.gf create mode 100644 lib/src/finnish/stemmed/ParadigmsFin.gf create mode 100644 lib/src/finnish/stemmed/StemFin.gf create mode 100644 lib/src/finnish/stemmed/StructuralFin.gf create mode 100644 lib/src/finnish/stemmed/VerbFin.gf diff --git a/lib/src/finnish/StemFin.gf b/lib/src/finnish/StemFin.gf deleted file mode 100644 index a2e5d89a7..000000000 --- a/lib/src/finnish/StemFin.gf +++ /dev/null @@ -1,95 +0,0 @@ --- stem-based morphology, with just the minimum number of concatenative stems - -resource StemFin = open MorphoFin, Prelude in { - -flags coding = utf8 ; - -oper - SNoun : Type = {s : Predef.Ints 9 => Str ; h : Harmony} ; --- SVerb : Type ; - - - nforms2snoun : NForms -> SNoun = \nf -> { - s = table { - 0 => nf ! 0 ; -- ukko - 1 => Predef.tk 1 (nf ! 1) ; -- uko(n) - 2 => nf ! 2 ; -- ukkoa - 3 => nf ! 3 ; -- ukkona - 4 => Predef.tk 1 (nf ! 4) ; -- ukkoo(n) - 5 => Predef.tk 1 (nf ! 5) ; -- ukkoje(n) - 6 => nf ! 6 ; -- ukkoja - 7 => Predef.tk 2 (nf ! 7) ; -- ukkoi(na) - 8 => Predef.tk 3 (nf ! 8) ; -- ukoi(ssa) - 9 => Predef.tk 1 (nf ! 9) -- ukkoihi(n) - } ; - h = aHarmony (last (nf ! 2)) ; - } ; - - snoun2noun : SNoun -> Noun = \sn -> - let - f = sn.s ; - - ukko = f ! 0 ; - uko = f ! 1 ; - ukkoa = f ! 2 ; - ukkona = f ! 3 ; - ukkoo = f ! 4 ; - ukkoje = f ! 5 ; - ukkoja = f ! 6 ; - ukkoi = f ! 7 ; - ukoi = f ! 8 ; - ukkoihi = f ! 9 ; - - a = harmonyA sn.h ; - in - - {s = table { - NCase Sg Nom => ukko ; - NCase Sg Gen => uko + "n" ; - NCase Sg Part => ukkoa ; - NCase Sg Transl => uko + "ksi" ; - NCase Sg Ess => ukkona ; - NCase Sg Iness => uko + ("ss" + a) ; - NCase Sg Elat => uko + ("st" + a) ; - NCase Sg Illat => ukkoo + "n" ; - NCase Sg Adess => uko + ("ll" + a) ; - NCase Sg Ablat => uko + ("lt" + a) ; - NCase Sg Allat => uko + "lle" ; - NCase Sg Abess => uko + ("tt" + a) ; - - NCase Pl Nom => uko + "t" ; - NCase Pl Gen => ukkoje + "n" ; - NCase Pl Part => ukkoja ; - NCase Pl Transl => ukoi + "ksi" ; - NCase Pl Ess => ukkoi + ("n" + a) ; - NCase Pl Iness => ukoi + ("ss" + a) ; - NCase Pl Elat => ukoi + ("st" + a) ; - NCase Pl Illat => ukkoihi + "n" ; - NCase Pl Adess => ukoi + ("ll" + a) ; - NCase Pl Ablat => ukoi + ("lt" + a) ; - NCase Pl Allat => ukoi + "lle" ; - NCase Pl Abess => ukoi + ("tt" + a) ; - - NComit => ukkoi + "ne" ; - NInstruct => ukoi + "n" ; - - NPossNom _ => ukko ; - NPossGen Sg => ukko ; - NPossGen Pl => ukkoje ; - NPossTransl Sg => uko + "kse" ; - NPossTransl Pl => ukoi + "kse" ; - NPossIllat Sg => ukkoo ; - NPossIllat Pl => ukkoihi - } ; - h = sn.h ; - lock_N = <> - } ; - - - aHarmony : Str -> Harmony = \a -> case a of - {"a" => Back ; _ => Front} ; - - harmonyA : Harmony -> Str = \h -> case h of - {Back => "a" ; Front => "ä"} ; - -} \ No newline at end of file diff --git a/lib/src/finnish/stemmed/AdjectiveFin.gf b/lib/src/finnish/stemmed/AdjectiveFin.gf new file mode 100644 index 000000000..556dc3521 --- /dev/null +++ b/lib/src/finnish/stemmed/AdjectiveFin.gf @@ -0,0 +1,53 @@ +concrete AdjectiveFin of Adjective = CatFin ** open ResFin, StemFin, Prelude in { + + flags optimize=all_subs ; -- gfc size from 2864336 to 6786 - i.e. factor 422 + + lin + + PositA a = { + s = \\_ => (snoun2nounSep {s = \\f => a.s ! Posit ! SAN f ; h = a.h}).s + } ; + ComparA a np = + let acomp = (snoun2nounSep {s = \\f => a.s ! Posit ! SAN f ; h = a.h}).s in { + s = \\isMod,af => case isMod of { + True => np.s ! NPCase Part ++ acomp ! af ; -- minua isompi + _ => acomp ! af ++ "kuin" ++ np.s ! NPCase Nom -- isompi kuin minä + } + } ; + CAdvAP ad ap np = { + s = \\m,af => ad.s ++ ap.s ! m ! af ++ ad.p ++ np.s ! NPCase Nom + } ; + UseComparA a = { + s = \\_ => (snoun2nounSep {s = \\f => a.s ! Compar ! SAN f ; h = a.h}).s + } ; + +-- $SuperlA$ belongs to determiner syntax in $Noun$. + AdjOrd ord = { + s = \\_ => ord.s + } ; + + + ComplA2 a np = { + s = \\isMod,af => + preOrPost isMod (appCompl True Pos a.c2 np) ((snoun2nounSep {s = \\f => a.s ! Posit ! SAN f ; h = a.h}).s ! af) + } ; + + ReflA2 a = { + s = \\isMod,af => + preOrPost isMod + (appCompl True Pos a.c2 (reflPron (agrP3 Sg))) ((snoun2nounSep {s = \\f => a.s ! Posit ! SAN f ; h = a.h}).s ! af) + } ; + + SentAP ap sc = { + s = \\b,a => ap.s ! b ! a ++ sc.s + } ; + + AdAP ada ap = { + s = \\b,af => ada.s ++ ap.s ! b ! af + } ; + + UseA2 a = { + s = \\_ => (snoun2nounSep {s = \\f => a.s ! Posit ! SAN f ; h = a.h}).s + } ; + +} diff --git a/lib/src/finnish/stemmed/AdverbFin.gf b/lib/src/finnish/stemmed/AdverbFin.gf new file mode 100644 index 000000000..3eb439b13 --- /dev/null +++ b/lib/src/finnish/stemmed/AdverbFin.gf @@ -0,0 +1,23 @@ +concrete AdverbFin of Adverb = CatFin ** open ResFin, Prelude, StemFin in { + + lin + PositAdvAdj a = {s = a.s ! Posit ! SAAdv} ; + ComparAdvAdj cadv a np = { + s = cadv.s ++ a.s ! Posit ! SAAdv ++ cadv.p ++ np.s ! NPCase Nom + } ; + ComparAdvAdjS cadv a s = { + s = cadv.s ++ a.s ! Posit ! SAAdv ++ cadv.p ++ s.s + } ; + + PrepNP prep np = {s = preOrPost prep.isPre prep.s (np.s ! prep.c)} ; + + AdAdv = cc2 ; + + PositAdAAdj a = {s = glue (a.s ! Posit ! SAN 1) "n"} ; -- älyttömän + + SubjS = cc2 ; +----b AdvSC s = s ; + + AdnCAdv cadv = {s = cadv.s ++ "kuin"} ; + +} diff --git a/lib/src/finnish/stemmed/CatFin.gf b/lib/src/finnish/stemmed/CatFin.gf new file mode 100644 index 000000000..6a84f5237 --- /dev/null +++ b/lib/src/finnish/stemmed/CatFin.gf @@ -0,0 +1,101 @@ +concrete CatFin of Cat = CommonX ** open ResFin, StemFin, Prelude in { + + flags optimize=all_subs ; + + lincat + +-- Tensed/Untensed + + S = {s : Str} ; + QS = {s : Str} ; + RS = {s : Agr => Str ; c : NPForm} ; + SSlash = {s : Str ; c2 : Compl} ; + +-- Sentence + + Cl = {s : ResFin.Tense => Anteriority => Polarity => SType => Str} ; + ClSlash = {s : ResFin.Tense => Anteriority => Polarity => Str ; c2 : Compl} ; + Imp = {s : Polarity => Agr => Str} ; + +-- Question + + QCl = {s : ResFin.Tense => Anteriority => Polarity => Str} ; + IP = {s : NPForm => Str ; n : Number} ; + IComp = {s : Agr => Str} ; + IDet = {s : Case => Str ; n : Number ; isNum : Bool} ; + IQuant = {s : Number => Case => Str} ; + +-- Relative + + RCl = {s : ResFin.Tense => Anteriority => Polarity => Agr => Str ; c : NPForm} ; + RP = {s : Number => NPForm => Str ; a : RAgr} ; + +-- Verb + + VP = ResFin.VP ; + VPSlash = ResFin.VP ** {c2 : Compl} ; + Comp = {s : Agr => Str} ; + +-- Adjective + +-- The $Bool$ tells whether usage is modifying (as opposed to +-- predicative), e.g. "x on suurempi kuin y" vs. "y:tä suurempi luku". + + AP = {s : Bool => NForm => Str} ; + +-- Noun + +-- The $Bool$ tells if a possessive suffix is attached, which affects the case. + + CN = {s : NForm => Str ; h : Harmony} ; + Pron = {s : NPForm => Str ; a : Agr} ; + NP = {s : NPForm => Str ; a : Agr ; isPron : Bool ; isNeg : Bool} ; + Det = { + s1 : Case => Str ; -- minun kolme + s2 : Harmony => Str ; -- -ni (Front for -nsä, Back for -nsa) + sp : Case => Str ; -- se (substantival form) + n : Number ; -- Pl (agreement feature for verb) + isNum : Bool ; -- True (a numeral is present) + isPoss : Bool ; -- True (a possessive suffix is present) + isDef : Bool ; -- True (verb agrees in Pl, Nom is not Part) + isNeg : Bool -- False (only True for "mikään", "kukaan") + } ; +---- QuantSg, QuantPl = {s1 : Case => Str ; s2 : Str ; isPoss, isDef : Bool} ; + Ord = {s : NForm => Str} ; + Predet = {s : Number => NPForm => Str} ; + Quant = {s1,sp : Number => Case => Str ; s2 : Harmony => Str ; isPoss : Bool ; isDef : Bool ; isNeg : Bool} ; + Card = {s : Number => Case => Str ; n : Number} ; + Num = {s : Number => Case => Str ; isNum : Bool ; n : Number} ; + +-- Numeral + + Numeral = {s : CardOrd => Str ; n : Number} ; + Digits = {s : CardOrd => Str ; n : Number} ; + +-- Structural + + Conj = {s1,s2 : Str ; n : Number} ; +----b DConj = {s1,s2 : Str ; n : Number} ; + Subj = {s : Str} ; + Prep = Compl ; + +-- Open lexical classes, e.g. Lexicon + + V, VS, VQ = Verb1 ; -- = {s : VForm => Str ; sc : Case} ; + V2, VA, V2Q, V2S = Verb1 ** {c2 : Compl} ; + V2A = Verb1 ** {c2, c3 : Compl} ; + VV = Verb1 ** {vi : InfForm} ; ---- infinitive form + V2V = Verb1 ** {c2 : Compl ; vi : InfForm} ; ---- infinitive form + V3 = Verb1 ** {c2, c3 : Compl} ; + + A = {s : Degree => SAForm => Str ; h : Harmony} ; + A2 = {s : Degree => SAForm => Str ; h : Harmony ; c2 : Compl} ; + + N = SNoun ; + N2 = SNoun ** {c2 : Compl ; isPre : Bool} ; + N3 = SNoun ** {c2,c3 : Compl ; isPre,isPre2 : Bool} ; + PN = SNoun ; + +oper Verb1 = {s : VForm => Str ; sc : NPForm ; qp : Bool ; p : Str} ; + +} diff --git a/lib/src/finnish/stemmed/LangFin.gf b/lib/src/finnish/stemmed/LangFin.gf new file mode 100644 index 000000000..5f1e7d1e9 --- /dev/null +++ b/lib/src/finnish/stemmed/LangFin.gf @@ -0,0 +1,11 @@ +--# -path=.:..:../../abstract:../../common + +concrete LangFin of Lang = +-- GrammarFin - [SlashV2VNP,SlashVV], ---- to speed up compilation + GrammarFin, + LexiconFin + ** { + +flags startcat = Phr ; unlexer = text ; lexer = finnish ; + +} ; diff --git a/lib/src/finnish/stemmed/LexiconFin.gf b/lib/src/finnish/stemmed/LexiconFin.gf new file mode 100644 index 000000000..aab8d8b6c --- /dev/null +++ b/lib/src/finnish/stemmed/LexiconFin.gf @@ -0,0 +1,388 @@ +--# -path=.:prelude + +concrete LexiconFin of Lexicon = CatFin ** open MorphoFin, StemFin, ParadigmsFin in { + +flags + optimize=values ; + + +lin + airplane_N = mkN "lentokone" ; + answer_V2S = mkV2 (mkV "vastata") (casePrep allative) ; + apartment_N = mkN "asunto" ; + apple_N = mkN "omena" ; --- omenia, not omenoita + art_N = mkN "taide" ; + ask_V2Q = mkV2 (mkV "kysyä") (casePrep ablative) ; + baby_N = mkN "vauva" ; + bad_A = mkA (mkN "paha") "pahempi" "pahin" ; + bank_N = mkN "pankki" ; + beautiful_A = mkA (mkN "kaunis") "kauniimpi" "kaunein" ; + become_VA = mkVA (mkV "tulla") (casePrep translative) ; + beer_N = mkN "olut" "oluita" ; + beg_V2V = mkV2V (mk2V "pyytää" "pyysi") (casePrep partitive) ; + big_A = mkA (mkN "suuri" "suuria") "suurempi" "suurin" ; + bike_N = mkN "polku" (mkN "pyörä") ; + bird_N = mkN "lintu" ; + black_A = mkA (mkN "musta") "mustempi" "mustin" ; + blue_A = mkA (mkN "sininen") "sinisempi" "sinisin" ; + boat_N = mkN "vene" ; + book_N = mkN "kirja" ; + boot_N = mkN "saapas" ; + boss_N = mkN "pomo" ; + boy_N = mkN "poika" "pojan" "poikia" ; + bread_N = mkN "leipä" ; + break_V2 = mkV2 (mkV "rikkoa") ; + broad_A = mkA (mkN "leveä") "leveämpi" "levein" ; + brother_N2 = mkN2 ( + mkN "veli" "veljen" "veljenä" "veljeä" "veljeen" + "veljinä" "veljissä" "veljien" "veljiä" "veljiin") ; + brown_A = mkA (mkN "ruskea") "ruskeampi" "ruskein" ; + butter_N = mk3N "voi" "voin" "voita" ; ---- errors in Part + buy_V2 = mkV2 (mkV "ostaa") ; + camera_N = mkN "kamera" ; + cap_N = mkN "lakki" ; + car_N = mkN "auto" "auton" "autoja" ; -- mkN: audon + carpet_N = mkN "matto" ; + cat_N = mkN "kissa" ; + ceiling_N = mkN "katto" ; + chair_N = mkN "tuoli" ; + cheese_N = mkN "juusto" ; + child_N = mkN "lapsi" "lapsen" "lasta" "lapsena" "lapseen" + "lasten" "lapsia" "lapsina" "lapsissa" "lapsiin" ; + church_N = mkN "kirkko" ; + city_N = mkN "kaupunki" ; + clean_A = mkA (mkN "puhdas") ; + clever_A = mkA (mkN "viisas") ; + close_V2 = mkV2 (mkV "sulkea") ; + coat_N = mkN "takki" ; + cold_A = mkA (mkN "kylmä") "kylmempi" "kylmin" ; + come_V = mkV "tulla" ; + computer_N = mkN "tietokone" ; + country_N = mkN "maa" ; + cousin_N = mkN "serkku" ; + cow_N = mkN "lehmä" ; + die_V = mkV "kuolla" ; + dirty_A = mkA (mkN "likainen") "likaisempi" "likaisin" ; + distance_N3 = mkN3 (mkN "etäisyys") (casePrep elative) (casePrep illative) ; + doctor_N = mk2N "tohtori" "tohtoreita" ; + dog_N = mkN "koira" ; + door_N = mkN "ovi" "ovia" ; + drink_V2 = mkV2 (mkV "juoda") (casePrep partitive) ; + easy_A2V = mkA2 (mkA (mkN "helppo") "helpompi" "helpoin") + (casePrep allative) ; + eat_V2 = mkV2 (mkV "syödä") (casePrep partitive) ; + empty_A = mkA (mkN "tyhjä") "tyhjempi" "tyhjin" ; + enemy_N = mkN "vihollinen" ; + factory_N = mkN "tehdas" ; + father_N2 = mkN2 (mkN "isä") ; + fear_VS = mkVS (mk2V "pelätä" "pelkäsi") ; + find_V2 = mkV2 (mk2V "löytää" "löysi") ; + fish_N = mkN "kala" ; + floor_N = mk2N "lattia" "lattioita" ; + forget_V2 = mkV2 (mkV "unohtaa") ; + fridge_N = mkN "jääkaappi" ; + friend_N = mkN "ystävä" ; + fruit_N = mkN "hedelmä" ; + fun_AV = mkAV (mkA (mkN "hauska") "hauskempi" "hauskin") ; + garden_N = mkN "puutarha" "puutarhan" "puutarhoja" ; + girl_N = mkN "tyttö" ; + glove_N = mkN "käsine" ; + gold_N = mkN "kulta" ; + good_A = mkA (mkN "hyvä") "parempi" "parhain" ; --- paras + go_V = mkV "mennä" ; + green_A = mkA (mkN "vihreä") "vihreämpi" "vihrein" ; + harbour_N = mkN "satama" "sataman" "satamia" ; + hate_V2 = mkV2 (mkV "vihata") cpartitive ; + hat_N = mkN "hattu" ; + hear_V2 = mkV2 (mkV "kuulla") ; + hill_N = mkN "kukkula" ; + hope_VS = mkVS (mkV "toivoa") ; + horse_N = mkN "hevonen" ; + hot_A = mkA (mkN "kuuma") "kuumempi" "kuumin" ; + house_N = mkN "talo" ; + important_A = mkA (mkN "tärkeä") "tärkeämpi" "tärkein" ; + industry_N = mkN "teollisuus" ; + iron_N = mkN "rauta" ; + king_N = mkN "kuningas" ; + know_VS = mkVS (mkV "tietää" "tiesi") ; + know_VQ = mkVQ (mkV "tietää" "tiesi") ; + know_V2 = mkV2 (mkV "tuntea" "tunsi") ; + lake_N = mkN "järvi" "järviä" ; + lamp_N = mkN "lamppu" ; + learn_V2 = + mkV2 (mk12V "oppia" "opin" "oppii" "oppivat" "oppikaa" "opitaan" + "opin" "oppi" "oppisi" "oppinut" "opittu" "opitun") ; + leather_N = mkN "nahka" ; --- nahan + leave_V2 = mkV2 (mkV "jättää") ; + like_V2 = mkV2 (mkV "pitää") elative ; + listen_V2 = mkV2 (mkV "kuunnella" "kuunteli") partitive ; + live_V = mkV "elää" ; + long_A = mkA (mkN "pitkä") "pitempi" "pisin" ; + lose_V2 = mkV2 (mkV "hävitä" "hävisi") ; --- hukata + love_N = mk3N "rakkaus" "rakkauden" "rakkauksia" ; + love_V2 = mkV2 (mkV "rakastaa") partitive ; + man_N = mkN "mies" "miehen" "miestä" "miehenä" "mieheen" + "miesten" "miehiä" "miehinä" "miehissä" "miehiin" ; + married_A2 = mkA2 (mkA "avioitunut") (postPrep genitive "kanssa") ; ---- infl + meat_N = mkN "liha" ; + milk_N = mkN "maito" ; + moon_N = mkN "kuu" ; + mother_N2 = mkN2 (mkN "äiti") ; + mountain_N = mkN "vuori" "vuoria" ; + music_N = mkN "musiikki" ; + narrow_A = mkA (mkN "kapea") "kapeampi" "kapein" ; + new_A = mkA (mk3N "uusi" "uuden" "uusia") "uudempi" "uusin" ; + newspaper_N = mkN "sanoma" (mkN "lehti" "lehtiä") ; --- for correct vowel harmony + oil_N = mkN "öljy" ; + old_A = mkA (mkN "vanha") "vanhempi" "vanhin" ; + open_V2 = mkV2 (mkV "avata" "avasi") ; + paint_V2A = mkV2A (mkV "maalata") accPrep (casePrep translative) ; + paper_N = mk2N "paperi" "papereita" ; + paris_PN = mkPN (mkN "Pariisi") ; + peace_N = mkN "rauha" ; + pen_N = mkN "kynä" ; + planet_N = mkN "planeetta" ; + plastic_N = mkN "muovi" ; + play_V2 = mkV2 (mkV "pelata") cpartitive ; --- leikkiä, soittaa + policeman_N = mkN "poliisi" ; + priest_N = mkN "pappi" ; + probable_AS = mkAS --- for vowel harmony + (mkA (mkN "todennäköinen") "tonennäköisempi" "todennäköisin") ; ---- sta + queen_N = mkN "kuningatar" ; + radio_N = mk2N "radio" "radioita" ; + rain_V0 = mkV0 (mk2V "sataa" "satoi") ; + read_V2 = mkV2 (mkV "lukea") ; + red_A = mkA "punainen" ; + religion_N = mkN "uskonto" ; + restaurant_N = mkN "ravintola" ; + river_N = mkN "joki" "jokia" ; + rock_N = mk2N "kallio" "kallioita" ; + roof_N = mkN "katto" ; + rubber_N = mkN "kumi" ; + run_V = mk2V "juosta" "juoksi" ; + say_VS = mkVS (mkV "sanoa") ; + school_N = mkN "koulu" ; + science_N = mkN "tiede" ; + sea_N = mkN "meri" "meren" "meriä" "merta" ; + seek_V2 = mkV2 (mkV "etsiä") cpartitive ; + see_V2 = mkV2 ( + mk12V "nähdä" "näen" "näkee" "näkevät" "nähkää" "nähdään" + "näin" "näki" "näkisi" "nähnyt" "nähty" "nähdyn") ; + sell_V3 = mkV3 (mkV "myydä") accPrep (casePrep allative) ; + send_V3 = mkV3 (mkV "lähettää") accPrep (casePrep allative) ; + sheep_N = mkN "lammas" ; + ship_N = mkN "laiva" ; + shirt_N = mkN "paita" ; + shoe_N = mkN "kenkä" ; + shop_N = mkN "kauppa" ; + short_A = mkA (mkN "lyhyt" "lyhyitä") ; + silver_N = mkN "hopea" ; + sister_N = mkN "sisko" ; + sleep_V = mkV "nukkua" ; + small_A = mkA (mk2N "pieni" "pieniä") "pienempi" "pienin" ; + snake_N = mkN "käärme" ; + sock_N = mkN "sukka" ; + speak_V2 = mkV2 (mkV "puhua") cpartitive ; + star_N = mkN "tähti" "tähtiä" ; + steel_N = mkN "teräs" ; + stone_N = mkN "kivi" "kiviä" ; + stove_N = mk3N "liesi" "lieden" "liesiä" ; + student_N = mk2N "opiskelija" "opiskelijoita" ; + stupid_A = mkA "tyhmä" ; + sun_N = mkN "aurinko" ; + switch8off_V2 = mkV2 (mkV "sammuttaa") ; --- + switch8on_V2 = mkV2 (mkV "sytyttää") ; --- + table_N = mkN "pöytä" ; + talk_V3 = mkV3 (mkV "puhua") (casePrep allative) (casePrep elative) ; + teacher_N = mkN "opettaja" ; + teach_V2 = mkV2 (mkV "opettaa") ; + television_N = mk2N "televisio" "televisioita" ; + thick_A = mkA "paksu" ; + thin_A = mkA (mkN "ohut" "ohuita") ; + train_N = mkN "juna" ; + travel_V = mkV "matkustaa" ; + tree_N = mkN "puu" ; + ---- trousers_N = mkN "trousers" ; + ugly_A = mkA (mkN "ruma") "rumempi" "rumin" ; + understand_V2 = mkV2 (mkV "ymmärtää" "ymmärrän" "ymmärsi") ; + university_N = mkN "yliopisto" ; + village_N = mkN "kylä" ; + wait_V2 = mkV2 (mkV "odottaa") partitive ; + walk_V = mkV "kävellä" "käveli" ; + warm_A = mkA + (mkN "lämmin" "lämpimän" "lämmintä" "lämpimänä" "lämpimään" + "lämpiminä" "lämpimiä" "lämpimien" "lämpimissä" "lämpimiin" + ) + "lämpimämpi" "lämpimin" ; + war_N = mkN "sota" ; + watch_V2 = mkV2 (mkV "katsella") cpartitive ; + water_N = mk3N "vesi" "veden" "vesiä" ; + white_A = mkA "valkoinen" ; + window_N = mk2N "ikkuna" "ikkunoita" ; + wine_N = mkN "viini" ; + win_V2 = mkV2 (mkV "voittaa") ; + woman_N = mkN "nainen" ; + wonder_VQ = mkVQ (mkV "ihmetellä") ; + wood_N = mkN "puu" ; + write_V2 = mkV2 (mkV "kirjoittaa") ; + yellow_A = mkA "keltainen" ; + young_A = mkA (mkN "nuori" "nuoria") "nuorempi" "nuorin" ; + + do_V2 = mkV2 ( + mkV "tehdä" "teen" "tekee" "tekevät" "tehkää" "tehdään" + "tein" "teki" "tekisi" "tehnyt" "tehty" "tehdyn") ; + + now_Adv = mkAdv "nyt" ; + already_Adv = mkAdv "jo" ; + song_N = mkN "laulu" ; + add_V3 = mkV3 (mkV "lisätä") accPrep (casePrep illative) ; + number_N = mk2N "numero" "numeroita" ; + put_V2 = mkV2 (mkV "panna") ; + stop_V = mkV "pysähtyä" ; + jump_V = mkV "hypätä" ; + left_Ord = mkOrd (snoun2nounBind (mkN "vasen")) ; + right_Ord = mkOrd (snoun2nounBind (mkN "oikea")) ; + far_Adv = mkAdv "kaukana" ; + correct_A = mkA "oikea" ; + dry_A = mkA (mkN "kuiva") "kuivempi" "kuivin" ; + dull_A = mkA (mkN "tylsä") "tylsempi" "tylsin" ; + full_A = mkA (mk3N "täysi" "täyden" "täysiä") "täydempi" "täysin" ; + heavy_A = mkA "raskas" ; + near_A = mkA (mkN "läheinen") "läheisempi" "lähin" ; + rotten_A = mkA "mätä" ; + round_A = mkA "pyöreä" ; + sharp_A = mkA "terävä" ; + smooth_A = mkA "sileä" ; + straight_A = mkA (mkN "suora") "suorempi" "suorin" ; + wet_A = mkA (mkN "märkä") "märempi" "märin" ; + wide_A = mkA "leveä" ; + animal_N = mk3N "eläin" "eläimen" "eläimiä" ; + ashes_N = mkN "tuhka" ; + back_N = mkN "selkä" ; + bark_N = mkN "kaarna" ; + belly_N = mkN "vatsa" ; + blood_N = mkN "veri" "veren" "veriä" "verta" ; + bone_N = mkN "luu" ; + breast_N = mkN "rinta" ; + cloud_N = mk2N "pilvi" "pilviä" ; + day_N = mkN "päivä" ; + dust_N = mkN "pöly" ; + ear_N = mkN "korva" ; + earth_N = mkN "maa" ; + egg_N = mkN "muna" ; + eye_N = mkN "silmä" ; + fat_N = mkN "rasva" ; + feather_N = mk3N "höyhen" "höyhenen" "höyheniä" ; + fingernail_N = mk3N "kynsi" "kynnen" "kynsiä" ; + fire_N = mk2N "tuli" "tulia" ; + flower_N = mkN "kukka" ; + fog_N = mkN "sumu" ; + foot_N = mkN "jalka" ; + forest_N = mkN "metsä" ; + grass_N = mkN "ruoho" ; + guts_N = mkN "sisälmys" ; --- suoli + hair_N = mkN "hius" ; + hand_N = mk3N "käsi" "käden" "käsiä" ; + head_N = mkN "pää" ; + heart_N = mkN "sydän" "sydämen" "sydäntä" "sydämenä" "sydämeen" + "sydänten" "sydämiä" "sydäminä" "sydämissä" "sydämiin" ; + horn_N = mk2N "sarvi" "sarvia" ; + husband_N = mkN "mies" "miehen" "miestä" "miehenä" "mieheen" + "miesten" "miehiä" "miehinä" "miehissä" "miehiin" ; + ice_N = mkN "jää" ; + knee_N = mk2N "polvi" "polvia" ; + leaf_N = mk2N "lehti" "lehtiä" ; + leg_N = mkN "jalka" ; --- sääri + liver_N = mkN "maksa" ; + louse_N = mkN "lude" ; + mouth_N = mkN "suu" ; + name_N = mk2N "nimi" "nimiä" ; + neck_N = mkN "niska" ; + night_N = mkN "yö" ; + nose_N = mkN "nenä" ; + person_N = mkN "henkilö" ; + rain_N = mkN "sade" ; + road_N = mkN "tie" ; + root_N = mk2N "juuri" "juuria" ; + rope_N = mk3N "köysi" "köyden" "köysiä" ; + salt_N = mkN "suola" ; + sand_N = mkN "hiekka" ; + seed_N = mkN "siemen" ; + skin_N = mkN "nahka" ; + sky_N = mk3N "taivas" "taivaan" "taivaita" ; + smoke_N = mkN "savu" ; + snow_N = mkN "lumi" "lumen" "lumia" "lunta" ; + stick_N = mkN "keppi" ; + tail_N = mkN "häntä" ; + tongue_N = mk2N "kieli" "kieliä" ; + tooth_N = mkN "hammas" ; + wife_N = mkN "vaimo" ; + wind_N = mk2N "tuuli" "tuulia" ; + wing_N = mk2N "siipi" "siipiä" ; + worm_N = mkN "mato" ; + year_N = mk3N "vuosi" "vuoden" "vuosia" ; + bite_V2 = mkV2 (mkV "purra") ; + blow_V = mkV "puhaltaa" ; + burn_V = mkV "palaa" ; + count_V2 = mkV2 (mkV "laskea") ; + cut_V2 = mkV2 (mk2V "leikata" "leikkasi") ; + dig_V = mkV "kaivaa" ; + fall_V = mkV "pudota" "putoan" "putosi" ; + fear_V2 = mkV2 (mkV "pelätä" "pelkään" "pelkäsi") cpartitive ; + fight_V2 = mkV2 (mkV "taistella") (postPrep partitive "vastaan") ; + float_V = mkV "kellua" ; + flow_V = mkV "virrata" "virtaan" "virtasi" ; + fly_V = mkV "lentää" ; + freeze_V = mkV "jäätyä" ; + give_V3 = mkV3 (mkV "antaa" "annan" "antoi") accPrep (casePrep allative) ; + hit_V2 = mkV2 (mkV "lyödä") cpartitive ; + hold_V2 = mkV2 (mkV "pitää") cpartitive ; + hunt_V2 = mkV2 (mkV "metsästää") cpartitive ; + kill_V2 = mkV2 (mkV "tappaa") ; + laugh_V = mkV "nauraa" "nauroi" ; + lie_V = mkV "maata" "makasi" ; + play_V = mkV "pelata" ; + pull_V2 = mkV2 (mkV "vetää") ; + push_V2 = mkV2 (mkV "työntää") ; + rub_V2 = mkV2 (mkV "hieroa") cpartitive ; + scratch_V2 = mkV2 (mkV "raapia") cpartitive ; + sew_V = mkV "ommella" ; + sing_V = mkV "laulaa" ; + sit_V = mkV "istua" ; + smell_V = mk2V "haista" "haisi" ; + spit_V = mkV "sylkeä" ; + split_V2 = mkV2 (mk2V "halkaista" "halkaisi") ; + squeeze_V2 = mkV2 (mkV "puristaa") cpartitive ; + stab_V2 = mkV2 (mkV "pistää") cpartitive ; + stand_V = mk12V "seistä" "seison" "seisoo" "seisovat" "seiskää" "seistään" + "seisoin" "seisoi" "seisoisi" "seissyt" "seisty" "seistyn" ; --- *seisoivät + suck_V2 = mkV2 (mkV "imeä") cpartitive ; + swell_V = mkV "turvota" "turposi" ; + swim_V = mkV "uida" "uin" "ui" ; + think_V = mkV "ajatella" "ajattelen" "ajatteli" ; + throw_V2 = mkV2 (mkV "heittää") ; + tie_V2 = mkV2 (mkV "sitoa") ; + turn_V = mkV "kääntyä" ; + vomit_V = mkV "oksentaa" ; + wash_V2 = mkV2 (mkV "pestä") ; + wipe_V2 = mkV2 (mkV "pyyhkiä") ; + + breathe_V = mkV "hengittää" ; + + grammar_N = mkN "kielioppi" ; + language_N = mk2N "kieli" "kieliä" ; + rule_N = mkN "sääntö" ; + + john_PN = mkPN "Jussi" ; + question_N = mkN "kysymys" ; + ready_A = mkA (mkN "valmis") ; + reason_N = mkN "syy" ; + today_Adv = mkAdv "tänään" ; + uncertain_A = mkA "epävarma" ; + + oper + mkOrd : Noun -> Ord ; + mkOrd x = {s = x.s ; lock_Ord = <> } ; + cpartitive = casePrep partitive ; + +} ; diff --git a/lib/src/finnish/stemmed/NounFin.gf b/lib/src/finnish/stemmed/NounFin.gf new file mode 100644 index 000000000..202222bf4 --- /dev/null +++ b/lib/src/finnish/stemmed/NounFin.gf @@ -0,0 +1,259 @@ +--# -path=.:..:../../abstract:../../common + +concrete NounFin of Noun = CatFin ** open ResFin, MorphoFin, StemFin, Prelude in { + + flags optimize=all_subs ; + + lin + +-- The $Number$ is subtle: "nuo autot", "nuo kolme autoa" are both plural +-- for verb agreement, but the noun form is singular in the latter. + + DetCN det cn = + let + n : Number = case det.isNum of { + True => Sg ; + _ => det.n + } ; + ncase : NPForm -> Case * NForm = \c -> + let k = npform2case n c + in + case of { + <_, NPAcc, True,_,_> => ; -- myin kolme kytkintä(ni) + <_, NPCase Nom, True,_,_> => ; -- kolme kytkintä(ni) on + <_, _, True,False,_> => ; -- kolmeksi kytkimeksi + => ; -- myin kytkimiä + <_, NPAcc, _,True,_> => ; -- myin kytkime+ni + <_, NPCase Nom,_,True,_> => ; -- kytkime+ni on/ovat... + <_, NPCase Gen,_,True,_> => ; -- kytkime+ni vika + <_, NPCase Transl,_,True,_> => ; -- kytkim(e|i)kse+ni + <_, NPCase Illat,_,True,_> => ; -- kytkim(ee|ii)+ni + + _ => -- kytkin, kytkimen,... + } + in { + s = \\c => let + k = ncase c ; + in + det.s1 ! k.p1 ++ cn.s ! k.p2 ++ det.s2 ! cn.h ; + a = agrP3 (case of { + => Sg ; -- kolme kytkintä on + _ => det.n + }) ; + isPron = False ; isNeg = det.isNeg + } ; + + DetNP det = + let + n : Number = case det.isNum of { + True => Sg ; + _ => det.n + } ; + in { + s = \\c => let k = npform2case n c in + det.sp ! k ; -- det.s2 is possessive suffix + a = agrP3 (case det.isDef of { + False => Sg ; -- autoja menee; kolme autoa menee + _ => det.n + }) ; + isPron = False ; isNeg = det.isNeg + } ; + + UsePN pn = { + s = snoun2np Sg pn ; ---\\c => (snoun2nounSep pn).s ! NCase Sg (npform2case Sg c) ; + a = agrP3 Sg ; + isPron = False ; isNeg = False + } ; + UsePron p = p ** {isPron = True ; isNeg = False} ; + + PredetNP pred np = { + s = \\c => pred.s ! complNumAgr np.a ! c ++ np.s ! c ; + a = np.a ; + isPron = np.isPron ; -- kaikki minun - ni + isNeg = np.isNeg + } ; + + PPartNP np v2 = { + s = \\c => np.s ! c ++ v2.s ! PastPartPass (AN (NCase (complNumAgr np.a) Ess)) ; + a = np.a ; + isPron = np.isPron ; -- minun täällä - ni + isNeg = np.isNeg + } ; + + AdvNP np adv = { + s = \\c => np.s ! c ++ adv.s ; + a = np.a ; + isPron = np.isPron ; -- minun täällä - ni + isNeg = np.isNeg + } ; + + DetQuantOrd quant num ord = { + s1 = \\c => quant.s1 ! num.n ! c ++ num.s ! Sg ! c ++ ord.s ! NCase num.n c ; + sp = \\c => quant.sp ! num.n ! c ++ num.s ! Sg ! c ++ ord.s ! NCase num.n c ; + s2 = quant.s2 ; + n = num.n ; + isNum = num.isNum ; + isPoss = quant.isPoss ; + isDef = quant.isDef ; + isNeg = quant.isNeg + } ; + + DetQuant quant num = { + s1 = \\c => quant.s1 ! num.n ! c ++ num.s ! Sg ! c ; + sp = \\c => quant.sp ! num.n ! c ++ num.s ! Sg ! c ; + s2 = quant.s2 ; + n = num.n ; + isNum = num.isNum ; -- case num.n of {Sg => False ; _ => True} ; + isPoss = quant.isPoss ; + isDef = quant.isDef ; isNeg = quant.isNeg + } ; + + PossPron p = { + s1,sp = \\_,_ => p.s ! NPCase Gen ; + s2 = table {Front => BIND ++ possSuffixFront p.a ; + Back => BIND ++ possSuffix p.a } ; + isNum = False ; + isPoss = True ; + isDef = True ; --- "minun kolme autoani ovat" ; thus "...on" is missing + isNeg = False + } ; + + NumSg = {s = \\_,_ => [] ; isNum = False ; n = Sg} ; + NumPl = {s = \\_,_ => [] ; isNum = False ; n = Pl} ; + + NumCard n = n ** {isNum = case n.n of {Sg => False ; _ => True}} ; -- yksi talo/kaksi taloa + + NumDigits numeral = { + s = \\n,c => numeral.s ! NCard (NCase n c) ; + n = numeral.n + } ; + OrdDigits numeral = {s = \\f => numeral.s ! NOrd f} ; + + NumNumeral numeral = { + s = \\n,c => numeral.s ! NCard (NCase n c) ; + n = numeral.n + } ; + OrdNumeral numeral = {s = \\f => numeral.s ! NOrd f} ; + + AdNum adn num = { + s = \\n,c => adn.s ++ num.s ! n ! c ; + n = num.n + } ; + + OrdSuperl a = snoun2nounSep {s = \\nc => a.s ! Superl ! SAN nc ; h = a.h} ; + + DefArt = { + s1 = \\_,_ => [] ; + sp = table {Sg => pronSe.s ; Pl => pronNe.s} ; + s2 = \\_ => [] ; + isNum,isPoss,isNeg = False ; + isDef = True -- autot ovat + } ; + + IndefArt = { + s1 = \\_,_ => [] ; -- Nom is Part in Pl: use isDef in DetCN + sp = \\n,c => + (nhn (mkSubst "ä" "yksi" "yhde" "yhte" "yhtä" "yhteen" "yksi" "yksi" + "yksien" "yksiä" "yksiin")).s ! NCase n c ; + s2 = \\_ => [] ; + isNum,isPoss,isDef,isNeg = False -- autoja on + } ; + + MassNP cn = + let + n : Number = Sg ; + ncase : Case -> NForm = \c -> NCase n c ; + in { + s = \\c => let k = npform2case n c in + cn.s ! ncase k ; + a = agrP3 Sg ; + isPron = False ; isNeg = False + } ; + + UseN n = snoun2nounSep n ; + + UseN2 n = snoun2nounSep n ; + + Use2N3 f = { + s = (snoun2nounSep f).s ; + c2 = f.c2 ; + h = f.h ; + isPre = f.isPre + } ; + Use3N3 f = { + s = (snoun2nounSep f).s ; + c2 = f.c3 ; + h = f.h ; + isPre = f.isPre2 + } ; + + +--- If a possessive suffix is added here it goes after the complements... + + ComplN2 f x = { + s = \\nf => preOrPost f.isPre ((snoun2nounSep f).s ! nf) (appCompl True Pos f.c2 x) ; + h = f.h } ; + ComplN3 f x = { + s = \\nf => preOrPost f.isPre (f.s ! nf) (appCompl True Pos f.c2 x) ; + c2 = f.c3 ; + h = f.h ; + isPre = f.isPre2 + } ; + + AdjCN ap cn = { + s = \\nf => ap.s ! True ! (n2nform nf) ++ cn.s ! nf ; + h = cn.h } ; + + RelCN cn rs = {s = \\nf => cn.s ! nf ++ rs.s ! agrP3 (numN nf) ; + h = cn.h } ; + + RelNP np rs = { + s = \\c => np.s ! c ++ "," ++ rs.s ! np.a ; + a = np.a ; + isPron = np.isPron ; ---- correct ? + isNeg = np.isNeg + } ; + + AdvCN cn ad = {s = \\nf => cn.s ! nf ++ ad.s ; + h = cn.h} ; + + SentCN cn sc = {s = \\nf=> cn.s ! nf ++ sc.s; + h = cn.h } ; + + ApposCN cn np = {s = \\nf=> cn.s ! nf ++ np.s ! NPCase Nom ; + h = cn.h } ; --- luvun x + + PossNP cn np = {s = \\nf => np.s ! NPCase Gen ++ cn.s ! nf ; + h = cn.h + } ; + + PartNP cn np = {s = \\nf => cn.s ! nf ++ np.s ! NPCase Part ; + h = cn.h ---- gives "lasin viiniänsa" ; should be "lasinsa viiniä" + } ; + + + CountNP det np = + let + n : Number = case det.isNum of { + True => Sg ; + _ => det.n + } ; + in { + s = \\c => let k = npform2case n c in + det.sp ! k ++ np.s ! NPCase Elat ; -- cf DetNP above + a = agrP3 (case det.isDef of { + False => Sg ; -- autoja menee; kolme autoa menee + _ => det.n + }) ; + isPron = False ; isNeg = det.isNeg + } ; + + + oper + numN : NForm -> Number = \nf -> case nf of { + NCase n _ => n ; + _ => Sg --- + } ; + + +} diff --git a/lib/src/finnish/stemmed/NumeralFin.gf b/lib/src/finnish/stemmed/NumeralFin.gf new file mode 100644 index 000000000..b083cbbe1 --- /dev/null +++ b/lib/src/finnish/stemmed/NumeralFin.gf @@ -0,0 +1,184 @@ +concrete NumeralFin of Numeral = CatFin [Numeral,Digits] ** open Prelude, ParadigmsFin, MorphoFin, StemFin in { + +-- Notice: possessive forms are not used. They get wrong, since every +-- part is made to agree in them. + +flags optimize = all_subs ; + +lincat + Sub1000000 = {s : CardOrd => Str ; n : MorphoFin.Number} ; + Digit = {s : CardOrd => Str} ; + Sub10, Sub100, Sub1000 = {s : NumPlace => CardOrd => Str ; n : MorphoFin.Number} ; + +lin + num x = x ; + n2 = co + (nhn (mkSubst "a" "kaksi" "kahde" "kahte" "kahta" "kahteen" "kaksi" "kaksi" + "kaksien" "kaksia" "kaksiin")) + (ordN "a" "kahdes") ; --- toinen + n3 = co + (nhn (mkSubst "a" "kolme" "kolme" "kolme" "kolmea" "kolmeen" "kolmi" "kolmi" + "kolmien" "kolmia" "kolmiin")) + (ordN "a" "kolmas") ; + n4 = co (snoun2nounBind (mkN "neljä")) (ordN "ä" "neljäs") ; + n5 = co (snoun2nounBind (mkN "viisi" "viiden" "viisiä")) (ordN "ä" "viides") ; + n6 = co (snoun2nounBind (mkN "kuusi" "kuuden" "kuusia")) (ordN "a" "kuudes") ; + n7 = co + (nhn (mkSubst "ä" "seitsemän" "seitsemä" "seitsemä" "seitsemää" + "seitsemään" "seitsemi" "seitsemi" "seitsemien" "seitsemiä" + "seitsemiin")) + (ordN "ä" "seitsemäs") ; + n8 = co + (nhn (mkSubst "a" "kahdeksan" "kahdeksa" "kahdeksa" "kahdeksaa" + "kahdeksaan" "kahdeksi" "kahdeksi" "kahdeksien" "kahdeksia" + "kahdeksiin")) + (ordN "a" "kahdeksas") ; + n9 = co + (nhn (mkSubst "ä" "yhdeksän" "yhdeksä" "yhdeksä" "yhdeksää" + "yhdeksään" "yhdeksi" "yhdeksi" "yhdeksien" "yhdeksiä" "yhdeksiin")) + (ordN "ä" "yhdeksäs") ; + + pot01 = + {s = table { + NumAttr => \\_ => [] ; + NumIndep => yksiN.s + } ; + n = Sg + } ; + pot0 d = {n = Pl ; s = \\_ => d.s} ; + pot110 = + {s = \\_ => kymmenenN.s ; + n = Pl + } ; + + pot111 = {n = Pl ; s = \\_,c => yksiN.s ! c ++ BIND ++ "toista"} ; ---- yhdes + pot1to19 d = {n = Pl ; s = \\_,c => d.s ! c ++ BIND ++ "toista"} ; + pot0as1 n = n ; + + pot1 d = {n = Pl ; s = \\_,c => d.s ! c ++ BIND ++ kymmentaN.s ! c} ; + pot1plus d e = { + n = Pl ; + s = \\_,c => d.s ! c ++ BIND ++ kymmentaN.s ! c ++ BIND ++ e.s ! NumIndep ! c + } ; + pot1as2 n = n ; + pot2 d = {n = Pl ; s = \\_,c => d.s ! NumAttr ! c ++ nBIND d.n ++ sataaN.s ! d.n ! c} ; ---- + pot2plus d e = { + n = Pl ; + s = \\_,c => d.s ! NumAttr ! c ++ nBIND d.n ++ sataaN.s ! d.n ! c ++ + BIND ++ e.s ! NumIndep ! c + } ; + pot2as3 n = {n = n.n ; s = n.s ! NumIndep} ; + pot3 d = {n = Pl ; s = \\c => d.s ! NumAttr ! c ++ nBIND d.n ++ tuhattaN.s ! d.n ! c} ; ---- + pot3plus d e = { + n = Pl ; + s = \\c => d.s ! NumAttr ! c ++ nBIND d.n ++ tuhattaN.s ! d.n ! c ++ e.s ! NumIndep ! c + } ; + +oper +-- co : (c,o : {s : NForm => Str}) -> {s : CardOrd => Str} = \c,o -> { + co : (c,o : CommonNoun) -> {s : CardOrd => Str} = \c,o -> { + s = table { + NCard nf => c.s ! nf ; + NOrd nf => o.s ! nf + } + } ; + + nBIND : MorphoFin.Number -> Str = \n -> case n of {Sg => [] ; _ => BIND} ; -- no BIND after silent 1 + +-- Too much trouble to infer vowel, cf. "kuudes" vs. "viides". + +-- ordN : Str -> Str -> {s : NForm => Str} = \a,sadas -> + ordN : Str -> Str -> CommonNoun = \a,sadas -> + let + sada = init sadas + in + snoun2nounBind (mkN + sadas (sada + "nnen") (sada + "tt" + a) (sada + "nten" + a) (sada + "nteen") + (sada + "nsien") (sada + "nsi" + a) (sada + "nsin" + a) + (sada + "nsiss" + a) (sada + "nsiin")) ; + +param + NumPlace = NumIndep | NumAttr ; + +oper + yksiN = co + (nhn (mkSubst "ä" "yksi" "yhde" "yhte" "yhtä" "yhteen" "yksi" "yksi" + "yksien" "yksiä" "yksiin")) + (ordN "ä" "yhdes") ; ---- ensimmäinen + kymmenenN = co + (nhn (mkSubst "ä" "kymmenen" "kymmene" "kymmene" "kymmentä" + "kymmeneen" "kymmeni" "kymmeni" "kymmenien" "kymmeniä" "kymmeniin")) + (ordN "ä" "kymmenes") ; + sataN = co + (snoun2nounBind (mkN "sata")) + (ordN "a" "sadas") ; + + tuhatN = co + (snoun2nounBind (mkN "tuhat" "tuhannen" "tuhatta" "ruhantena" "tuhanteen" + "tuhansien" "tuhansia" "tuhansina" "tuhansissa" "tuhansiin")) + (ordN "a" "tuhannes") ; + + kymmentaN = + {s = table { + NCard (NCase Sg Nom) => "kymmentä" ; + k => kymmenenN.s ! k + } + } ; + + sataaN : {s : MorphoFin.Number => CardOrd => Str} = {s = table { + Sg => sataN.s ; + Pl => table { + NCard (NCase Sg Nom) => "sataa" ; + k => sataN.s ! k + } + } + } ; + + tuhattaN = {s = table { + Sg => tuhatN.s ; + Pl => table { + NCard (NCase Sg Nom) => "tuhatta" ; + k => tuhatN.s ! k + } + } + } ; + + + lincat + Dig = TDigit ; + + lin + IDig d = d ; + + IIDig d i = { + s = \\o => d.s ! NCard (NCase Sg Nom) ++ i.s ! o ; + n = Pl + } ; + + D_0 = mkDig "0" ; + D_1 = mk3Dig "1" "1." MorphoFin.Sg ; + D_2 = mkDig "2" ; + D_3 = mkDig "3" ; + D_4 = mkDig "4" ; + D_5 = mkDig "5" ; + D_6 = mkDig "6" ; + D_7 = mkDig "7" ; + D_8 = mkDig "8" ; + D_9 = mkDig "9" ; + + oper + mk2Dig : Str -> Str -> TDigit = \c,o -> mk3Dig c o MorphoFin.Pl ; + mkDig : Str -> TDigit = \c -> mk2Dig c (c + ".") ; + + mk3Dig : Str -> Str -> MorphoFin.Number -> TDigit = \c,o,n -> { + s = table {NCard _ => c ; NOrd _ => o} ; + n = n + } ; + + TDigit = { + n : MorphoFin.Number ; + s : CardOrd => Str + } ; + +} + diff --git a/lib/src/finnish/stemmed/ParadigmsFin.gf b/lib/src/finnish/stemmed/ParadigmsFin.gf new file mode 100644 index 000000000..0d445bddf --- /dev/null +++ b/lib/src/finnish/stemmed/ParadigmsFin.gf @@ -0,0 +1,669 @@ +--# -path=.:../abstract:../common:../../prelude + +--1 Finnish Lexical Paradigms +-- +-- Aarne Ranta 2003--2008 +-- +-- This is an API to the user of the resource grammar +-- for adding lexical items. It gives functions for forming +-- expressions of open categories: nouns, adjectives, verbs. +-- +-- Closed categories (determiners, pronouns, conjunctions) are +-- accessed through the resource syntax API and $Structural.gf$. +-- +-- The main difference with $MorphoFin.gf$ is that the types +-- referred to are compiled resource grammar types. We have moreover +-- had the design principle of always having existing forms, rather +-- than stems, as string arguments of the paradigms. +-- +-- The structure of functions for each word class $C$ is the following: +-- there is a polymorphic constructor $mkC$, which takes one or +-- a few arguments. In Finnish, one argument is enough in 80-90% of +-- cases in average. + +resource ParadigmsFin = open + (Predef=Predef), + Prelude, + MorphoFin, + CatFin, StemFin + in { + + flags optimize=noexpand ; + +--2 Parameters +-- +-- To abstract over gender, number, and (some) case names, +-- we define the following identifiers. The application programmer +-- should always use these constants instead of the constructors +-- defined in $ResFin$. + +oper + Number : Type ; + + singular : Number ; + plural : Number ; + + Case : Type ; + nominative : Case ; -- e.g. "talo" + genitive : Case ; -- e.g. "talon" + partitive : Case ; -- e.g. "taloa" + essive : Case ; -- e.g. "talona" + translative : Case ; -- e.g. "taloksi" + inessive : Case ; -- e.g. "talossa" + elative : Case ; -- e.g. "talosta" + illative : Case ; -- e.g. "taloon" + adessive : Case ; -- e.g. "talolla" + ablative : Case ; -- e.g. "talolta" + allative : Case ; -- e.g. "talolle" + + infFirst : InfForm ; -- e.g. "tehdä" + infElat : InfForm ; -- e.g. "tekemästä" + infIllat : InfForm ; -- e.g. "tekemään" + +-- The following type is used for defining *rection*, i.e. complements +-- of many-place verbs and adjective. A complement can be defined by +-- just a case, or a pre/postposition and a case. + + prePrep : Case -> Str -> Prep ; -- preposition, e.g. partitive "ilman" + postPrep : Case -> Str -> Prep ; -- postposition, e.g. genitive "takana" + postGenPrep : Str -> Prep ; -- genitive postposition, e.g. "takana" + casePrep : Case -> Prep ; -- just case, e.g. adessive + + NK : Type ; -- Noun from DictFin (Kotus) + AK : Type ; -- Adjective from DictFin (Kotus) + VK : Type ; -- Verb from DictFin (Kotus) + AdvK : Type ; -- Adverb from DictFin (Kotus) + +--2 Nouns + +-- The worst case gives ten forms. +-- In practice just a couple of forms are needed to define the different +-- stems, vowel alternation, and vowel harmony. + +oper + +-- The regular noun heuristic takes just one form (singular +-- nominative) and analyses it to pick the correct paradigm. +-- It does automatic grade alternation, and is hence not usable +-- for words like "auto" (whose genitive would become "audon"). +-- +-- If the one-argument paradigm does not give the correct result, one can try and give +-- two or three forms. Most notably, the two-argument variant is used +-- for nouns like "kivi - kiviä", which would otherwise become like +-- "rivi - rivejä". Three arguments are used e.g. for +-- "auto - auton - autoja", which would otherwise become +-- "auto - audon". + + mkN : overload { + mkN : (kukko : Str) -> N ; -- predictable nouns, covers 82% + mkN : (savi,savia : Str) -> N ; -- different pl.part + mkN : (vesi,veden,vesiä : Str) -> N ; -- also different sg.gen + mkN : (vesi,veden,vesiä,vettä : Str) -> N ; -- also different sg.part + mkN : (olo,n,a,na,oon,jen,ja,ina,issa,ihin : Str) -> N ; -- worst case, 10 forms + mkN : (pika : Str) -> (juna : N) -> N ; -- compound with invariable prefix + mkN : (oma : N) -> (tunto : N) -> N ; -- compound with inflecting prefix + mkN : NK -> N ; -- noun from DictFin (Kotus) + } ; + +-- Some nouns are regular except for the singular nominative (e.g. "mies"). + + exceptNomN : N -> Str -> N ; + +-- Nouns used as functions need a case, of which the default is +-- the genitive. + + mkN2 : overload { + mkN2 : N -> N2 ; -- relational noun with genitive + mkN2 : N -> Prep -> N2 -- relational noun another prep. + } ; + + mkN3 : N -> Prep -> Prep -> N3 ; -- relation with two complements + +-- Proper names can be formed by using declensions for nouns. +-- The plural forms are filtered away by the compiler. + + mkPN : overload { + mkPN : Str -> PN ; -- predictable noun made into name + mkPN : N -> PN -- any noun made into name + } ; + +--2 Adjectives + +-- Non-comparison one-place adjectives are just like nouns. +-- The regular adjectives are based on $regN$ in the positive. +-- Comparison adjectives have three forms. +-- The comparative and the superlative +-- are always inflected in the same way, so the nominative of them is actually +-- enough (except for the superlative "paras" of "hyvä"). + + mkA : overload { + mkA : Str -> A ; -- regular noun made into adjective + mkA : N -> A ; -- any noun made into adjective + mkA : N -> (kivempi,kivin : Str) -> A ; -- deviating comparison forms + mkA : (hyva,prmpi,pras : N) -> (hyvin,pmmin,prhten : Str) -> A ; -- worst case adj + mkA : AK -> A ; -- adjective from DictFin (Kotus) + } ; + +-- Two-place adjectives need a case for the second argument. + + mkA2 : A -> Prep -> A2 -- e.g. "jaollinen" casePrep adessive + = \a,p -> a ** {c2 = p ; lock_A2 = <>}; + + + +--2 Verbs +-- +-- The grammar does not cover the potential mood and some nominal +-- forms. One way to see the coverage is to linearize a verb to +-- a table. +-- The worst case needs twelve forms, as shown in the following. + + mkV : overload { + mkV : (huutaa : Str) -> V ; -- predictable verbs, covers 90% + mkV : (huutaa,huusi : Str) -> V ; -- deviating past 3sg + mkV : (huutaa,huudan,huusi : Str) -> V ; -- also deviating pres. 1sg + mkV : (huutaa,dan,taa,tavat,takaa,detaan,sin,si,sisi,tanut,dettu,tanee : Str) -> V ; -- worst-case verb + mkV : VK -> V ; -- verb from DictFin (Kotus) + mkV : V -> Str -> V ; -- hakata päälle (particle verb) + } ; + +-- All the patterns above have $nominative$ as subject case. +-- If another case is wanted, use the following. + + caseV : Case -> V -> V ; -- deviating subj. case, e.g. genitive "täytyä" + +-- The verbs "be" is special. + + vOlla : V ; -- the verb "be" + + +--3 Two-place verbs +-- +-- Two-place verbs need an object case, and can have a pre- or postposition. +-- The default is direct (accusative) object. There is also a special case +-- with case only. The string-only argument case yields a regular verb with +-- accusative object. + + mkV2 : overload { + mkV2 : Str -> V2 ; -- predictable direct transitive + mkV2 : V -> V2 ; -- direct transitive + mkV2 : V -> Case -> V2 ; -- complement just case + mkV2 : V -> Prep -> V2 ; -- complement pre/postposition + mkV2 : VK -> V2 ; -- direct transitive of Kotus verb + } ; + + +--3 Three-place verbs +-- +-- Three-place (ditransitive) verbs need two prepositions, of which +-- the first one or both can be absent. + + mkV3 : V -> Prep -> Prep -> V3 ; -- e.g. puhua, allative, elative + dirV3 : V -> Case -> V3 ; -- siirtää, (accusative), illative + dirdirV3 : V -> V3 ; -- antaa, (accusative), (allative) + + +--3 Other complement patterns +-- +-- Verbs and adjectives can take complements such as sentences, +-- questions, verb phrases, and adjectives. + + mkV0 : V -> V0 ; --% + mkVS : V -> VS ; + mkV2S : V -> Prep -> V2S ; -- e.g. "sanoa" allative + mkVV : V -> VV ; -- e.g. "alkaa" + mkVVf : V -> InfForm -> VV ; -- e.g. "ruveta" infIllat + mkV2V : V -> Prep -> V2V ; -- e.g. "käskeä" genitive + mkV2Vf : V -> Prep -> InfForm -> V2V ; -- e.g. "kieltää" partitive infElat + mkVA : V -> Prep -> VA ; -- e.g. "maistua" ablative + mkV2A : V -> Prep -> Prep -> V2A ; -- e.g. "maalata" accusative translative + mkVQ : V -> VQ ; + mkV2Q : V -> Prep -> V2Q ; -- e.g. "kysyä" ablative + + mkAS : A -> AS ; --% + mkA2S : A -> Prep -> A2S ; --% + mkAV : A -> AV ; --% + mkA2V : A -> Prep -> A2V ; --% + +-- Notice: categories $AS, A2S, AV, A2V$ are just $A$, +-- and the second argument is given +-- as an adverb. Likewise +-- $V0$ is just $V$. + + V0 : Type ; --% + AS, A2S, AV, A2V : Type ; --% + +--. +-- The definitions should not bother the user of the API. So they are +-- hidden from the document. + + Case = MorphoFin.Case ; + Number = MorphoFin.Number ; + + singular = Sg ; + plural = Pl ; + + nominative = Nom ; + genitive = Gen ; + partitive = Part ; + translative = Transl ; + inessive = Iness ; + essive = Ess ; + elative = Elat ; + illative = Illat ; + adessive = Adess ; + ablative = Ablat ; + allative = Allat ; + + infFirst = Inf1 ; infElat = Inf3Elat ; infIllat = Inf3Illat ; + + prePrep : Case -> Str -> Prep = + \c,p -> {c = NPCase c ; s = p ; isPre = True ; lock_Prep = <>} ; + postPrep : Case -> Str -> Prep = + \c,p -> {c = NPCase c ; s = p ; isPre = False ; lock_Prep = <>} ; + postGenPrep p = { + c = NPCase genitive ; s = p ; isPre = False ; lock_Prep = <>} ; + casePrep : Case -> Prep = + \c -> {c = NPCase c ; s = [] ; isPre = True ; lock_Prep = <>} ; + accPrep = {c = NPAcc ; s = [] ; isPre = True ; lock_Prep = <>} ; + + NK = {s : NForms ; lock_NK : {}} ; + AK = {s : NForms ; lock_AK : {}} ; + VK = {s : VForms ; lock_VK : {}} ; + AdvK = {s : Str ; lock_AdvK : {}} ; + + + mkN = overload { + mkN : (talo : Str) -> N = mk1N ; + -- \s -> nforms2snoun (nForms1 s) ; + mkN : (talo,talon : Str) -> N = mk2N ; + -- \s,t -> nforms2snoun (nForms2 s t) ; + mkN : (talo,talon,taloja : Str) -> N = mk3N ; + -- \s,t,u -> nforms2snoun (nForms3 s t u) ; + mkN : (talo,talon,taloja,taloa : Str) -> N = mk4N ; + -- \s,t,u,v -> nforms2snoun (nForms4 s t u v) ; + mkN : + (talo,talon,taloa,talona,taloon,talojen,taloja,taloina,taloissa,taloihin + : Str) -> N = mk10N ; + mkN : (sora : Str) -> (tie : N) -> N = mkStrN ; + mkN : (oma,tunto : N) -> N = mkNN ; + mkN : (sana : NK) -> N = \w -> nforms2snoun w.s ; + } ; + + exceptNomN : N -> Str -> N = \noun,nom -> lin N { + s = table { + 0 => nom ; + f => noun.s ! f + } ; + h = noun.h + } ; + + +---- mk1A : Str -> A = \jalo -> aForms2A (nforms2aforms (nForms1 jalo)) ; +---- mkNA : N -> A = snoun2sadj ; + + mk1N : (talo : Str) -> N = \s -> nforms2snoun (nForms1 s) ; + mk2N : (talo,talon : Str) -> N = \s,t -> nforms2snoun (nForms2 s t) ; + mk3N : (talo,talon,taloja : Str) -> N = \s,t,u -> nforms2snoun (nForms3 s t u) ; + mk4N : (talo,talon,taloa,taloja : Str) -> N = \s,t,u,v -> + nforms2snoun (nForms4 s t u v) ; + mk10N : + (talo,talon,taloa,talona,taloon,talojen,taloja,taloina,taloissa,taloihin + : Str) -> N = \a,b,c,d,e,f,g,h,i,j -> + lin N (nforms2snoun (nForms10 a b c d e f g h i j)) ; + + mkStrN : Str -> N -> N = \sora,tie -> { + s = \\c => sora + tie.s ! c ; + h = tie.h ; + lock_N = <> + } ; + mkNN : N -> N -> N = \oma,tunto -> { + s = \\c => oma.s ! c + tunto.s ! c ; + h = tunto.h ; + lock_N = <> + } ; ---- TODO: oma in possessive suffix forms + + nForms1 : Str -> NForms = \ukko -> + let + ukk = init ukko ; + uko = weakGrade ukko ; + ukon = uko + "n" ; + o = case last ukko of {"ä" => "ö" ; "a" => "o"} ; -- only used then + renka = strongGrade (init ukko) ; + rake = strongGrade ukko ; + in + case ukko of { + _ + "nen" => dNainen ukko ; + _ + ("aa" | "ee" | "ii" | "oo" | "uu" | "yy" |"ää"|"öö") => dPuu ukko ; + _ + ("ai" | "ei" | "oi" | "ui" | "yi" | "äi" | "öi") => dPuu ukko ; + _ + ("ie" | "uo" | "yö") => dSuo ukko ; + _ + ("ea" | "eä") => dKorkea ukko ; + _ + "is" => dKaunis ukko ; + _ + ("i" | "u") + "n" => dLiitin ukko (renka + "men") ; + _ + ("ton" | "tön") => dOnneton ukko ; + _ + "e" => dRae ukko (rake + "en") ; + _ + ("ut" | "yt") => dOttanut ukko ; + _ + ("as" | "äs") => dRae ukko (renka + last renka + "n") ; + _ + ("uus" | "yys" | "eus" | "eys") => dLujuus ukko ; + _ + "s" => dJalas ukko ; + +-- {- heuristics for 3-syllable nouns ending a/ä + _ + ("a" | "e" | "i" | "o" | "u" | "y" | "ä" | "ö") + ? + + _ + "i" + ? + a@("a" | "ä") => + dSilakka ukko (ukko + "n") (ukk + o + "it" + a) ; -- pesijä + _ + ("a" | "e" | "i" | "o" | "u" | "y" | "ä" | "ö") + ? + _ + + ("a" | "e" | "o" | "u" | "y" | "ä" | "ö") + + ("l" | "r" | "n") + a@("a" | "ä") => + dSilakka ukko (ukko + "n") (ukk + o + "it" + a) ; -- sarana, omena + _ + ("a" | "e" | "i" | "o" | "u" | "y" | "ä" | "ö") + ? + _ + + ("a" | "e" | "i" | "o" | "u" | "y" | "ä" | "ö") + + ("n" | "k" | "s") + "k" + a@("a" | "ä") => + dSilakka ukko (uko + "n") (init uko + o + "it" + a) ; -- silakka + _ + ("a" | "e" | "i" | "o" | "u" | "y" | "ä" | "ö") + ? + _ + + ("a" | "e" | "i" | "o" | "u" | "y" | "ä" | "ö") + + ("n" | "t" | "s") + "t" + a@("a" | "ä") => + dSilakka ukko (uko + "n") (ukk + o + "j" + a) ; -- yhdyntä (but not isäntä) + _ + ("a" | "e" | "i" | "o" | "u") + ? + _ + + ("a" | "e" | "o" | "u") + ? + "a" => + dSilakka ukko (ukko + "n") (ukk + "ia") ; -- asema, johtaja +-- -} + _ + "i" +o@("o"|"ö") => dSilakka ukko (ukko+"n") (ukko+"it"+getHarmony o); + _ + "i" + "a" => dSilakka ukko (ukko + "n") (ukk + "oita") ; + _ + "i" + "ä" => dSilakka ukko (ukko + "n") (ukk + "öitä") ; + _ + ("a" | "o" | "u" | "y" | "ä" | "ö") => dUkko ukko ukon ; + _ + "i" => dPaatti ukko ukon ; + _ + ("ar" | "är") => dPiennar ukko (renka + "ren") ; + _ + "e" + ("l" | "n") => dPiennar ukko (ukko + "en") ; + _ => dUnix ukko + } ; + + + nForms2 : (_,_ : Str) -> NForms = \ukko,ukkoja -> + let + ukot = nForms1 ukko ; + ukon = weakGrade ukko + "n" ; + in + case of { + <_, _ + ":" + ? + ("a" | "ä")> => dSDP ukko ; + <_ + "ea", _ + "oita"> => + dSilakka ukko ukon ukkoja ; -- idea, but not korkea + <_ + ("aa" | "ee" | "ii" | "oo" | "uu" | "yy" | "ää" | "öö" | + "ie" | "uo" | "yö" | "ea" | "eä" | + "ia" | "iä" | "io" | "iö"), _ + ("a" | "ä")> => + nForms1 ukko ; --- to protect --- how to get "dioja"? + <_ + ("a" | "ä" | "o" | "ö"), _ + ("a" | "ä")> => + dSilakka ukko ukon ukkoja ; + => + dArpi ukko (init (weakGrade ukko) + "en") ; + <_ + "i", _ + ("eita" | "eitä")> => + dTohtori ukko ; + <_ + ("ut" | "yt"),_ + ("uita" | "yitä")> => dRae ukko (init ukko + "en") ; + <_ + "e", nuk + ("eja" | "ejä")> => + dNukke ukko ukon ; + <_ + "s", _ + "ksi" + ?> => dJalas ukko ; + <_ + ("l" | "n" | "r" | "s"), _ + ("eja" | "ejä")> => dUnix ukko ; + <_, _ + ("a" | "ä")> => ukot ; + _ => + Predef.error + (["last argument should end in a/ä, not"] ++ ukkoja) + } ; + + nForms3 : (_,_,_ : Str) -> NForms = \ukko,ukon,ukkoja -> + let + ukk = init ukko ; + ukot = nForms2 ukko ukkoja ; + in + case of { + <_, _ + ":n"> => dSDP ukko ; + <_ + ("aa" | "ee" | "ii" | "oo" | "uu" | "yy" | "ää" | "öö" | + "ie" | "uo" | "yö" | "ea" | "eä" | + "ia" | "iä" | "io" | "iö" | "ja" | "jä"), _ + "n"> => + ukot ; --- to protect + <_ + ("a" | "o" | "u" | "y" | "ä" | "ö"), _ + "n"> => + dSilakka ukko ukon ukkoja ; -- auto,auton + <_ + "mpi", _ + ("emman" | "emmän")> => dSuurempi ukko ; + <_ + "in", _ + ("imman" | "immän")> => dSuurin ukko ; + => + dRae ukko ukon ; + => + dRae ukko ukon ; + => dRae ukko ukon ; + => dArpi ukko ukon ; + <_ + ("us" | "ys"), _ + "den"> => dLujuus ukko ; + <_, _ + "n"> => ukot ; + _ => + Predef.error (["second argument should end in n, not"] ++ ukon) + } ; + + nForms4 : (_,_,_,_ : Str) -> NForms = \ukko,ukon,ukkoja,ukkoa -> + let + ukot = nForms3 ukko ukon ukkoja ; + in + case of { + <_,_ + "n", _ + ("a" | "ä"), _ + ("a" | "ä")> => + table { + 2 => ukkoa ; + n => ukot ! n + } ; + _ => + Predef.error + (["last arguments should end in n, a/ä, and a/ä, not"] ++ + ukon ++ ukkoja ++ ukkoa) + } ; + + mkN2 = overload { + mkN2 : N -> N2 = \n -> mmkN2 n (casePrep genitive) ; + mkN2 : N -> Prep -> N2 = mmkN2 + } ; + + mmkN2 : N -> Prep -> N2 = \n,c -> n ** {c2 = c ; isPre = mkIsPre c ; lock_N2 = <>} ; + mkN3 = \n,c,e -> n ** {c2 = c ; c3 = e ; + isPre = mkIsPre c ; -- matka Lontoosta Pariisiin + isPre2 = mkIsPre e ; -- Suomen voitto Ruotsista + lock_N3 = <> + } ; + + mkIsPre : Prep -> Bool = \p -> case p.c of { + NPCase Gen => notB p.isPre ; -- Jussin veli (prep is , isPre becomes False) + _ => True -- syyte Jussia vastaan, puhe Jussin puolesta + } ; + + mkPN = overload { + mkPN : Str -> PN = mkPN_1 ; + mkPN : N -> PN = \s -> lin PN s ; + } ; + + mkPN_1 : Str -> PN = \s -> lin PN (mk1N s) ; + +-- adjectives + + mkA = overload { + mkA : Str -> A = mkA_1 ; + mkA : N -> A = \n -> noun2adjDeg n ** {lock_A = <>} ; + mkA : N -> (kivempaa,kivinta : Str) -> A = \n -> regAdjective n ; + mkA : (sana : AK) -> A = \w -> noun2adjDeg (nForms2N w.s) ; + + mkA : (hyva,parempi,paras : N) -> (hyvin,paremmin,parhaiten : Str) -> A = \h,p,ps,hn,pn,ph -> lin A { + s = table { + Posit => table { + SAN nf => h.s ! nf ; + SAAdv => hn + } ; + Compar => table { + SAN nf => p.s ! nf ; + SAAdv => pn + } ; + Superl => table { + SAN nf => ps.s ! nf ; + SAAdv => ph + } + } ; + h = h.h + } ; + } ; + + mkA_1 : Str -> A = \x -> lin A (noun2adjDeg (mk1N x)) ; + +-- auxiliaries + mkAdjective : (_,_,_ : SAdj) -> A = \hyva,parempi,paras -> + {s = table { + Posit => hyva.s ; + Compar => parempi.s ; + Superl => paras.s + } ; + h = hyva.h ; ---- different for parempi, paras + lock_A = <> + } ; + regAdjective : SNoun -> Str -> Str -> A = \kiva, kivempi, kivin -> + mkAdjective + (snoun2sadj kiva) + (snoun2sadjComp False (nforms2snoun (dSuurempi kivempi))) + (snoun2sadjComp False (nforms2snoun (dSuurin kivin))) ; + noun2adjDeg : SNoun -> A = \suuri -> + regAdjective + suuri + (suuri.s ! 1 + "mpi") ---- to check + (suuri.s ! 8 + "n") ; ---- + + + + +-- verbs + + mkV = overload { + mkV : (huutaa : Str) -> V = mk1V ; + mkV : (huutaa,huusi : Str) -> V = mk2V ; + mkV : (huutaa,huudan,huusi : Str) -> V = mk3V ; + mkV : ( + huutaa,huudan,huutaa,huutavat,huutakaa,huudetaan, + huusin,huusi,huusisi,huutanut,huudettu,huutanee : Str) -> V = mk12V ; + mkV : (sana : VK) -> V = \w -> vforms2V w.s ** {sc = NPCase Nom ; lock_V = <> ; p = []} ; + mkV : V -> Str -> V = \w,p -> vforms2V w.s ** {sc = NPCase Nom ; lock_V = <> ; p = p} ; + } ; + + mk1V : Str -> V = \s -> + let vfs = vforms2V (vForms1 s) in + vfs ** {sc = NPCase Nom ; lock_V = <> ; p = []} ; + mk2V : (_,_ : Str) -> V = \x,y -> + let vfs = vforms2V (vForms2 x y) in vfs ** {sc = NPCase Nom ; lock_V = <> ; p = []} ; + mk3V : (huutaa,huudan,huusi : Str) -> V = \x,_,y -> mk2V x y ; ---- + mk12V : ( + huutaa,huudan,huutaa,huutavat,huutakaa,huudetaan, + huusin,huusi,huusisi,huutanut,huudettu,huutanee : Str) -> V = + \a,b,c,d,e,f,g,h,i,j,k,l -> + vforms2V (vForms12 a b c d e f g h i j k l) ** {sc = NPCase Nom ; lock_V = <> ; p = []} ; + + vForms1 : Str -> VForms = \ottaa -> + let + a = last ottaa ; + otta = init ottaa ; + ott = init otta ; + ots = init ott + "s" ; + ota = weakGrade otta ; + otin = init (strongGrade (init ott)) + "elin" ; + ot = init ota ; + in + case ottaa of { + _ + ("e" | "i" | "o" | "u" | "y" | "ö") + ("a" | "ä") => + cHukkua ottaa (ota + "n") ; + _ + ("l" | "n" | "r") + ("taa" | "tää") => + cOttaa ottaa (ota + "n") (ots + "in") (ots + "i") ; + ("" | ?) + ("a" | "e" | "i" | "o" | "u") + ? + _ + + ("a" | "e" | "i" | "o" | "u") + _ + "aa" => + cOttaa ottaa (ota + "n") (ot + "in") (ott + "i") ; + ("" | ?) + ("a" | "e" | "i") + _ + "aa" => + cOttaa ottaa (ota + "n") (ot + "oin") (ott + "oi") ; + _ + ("aa" | "ää") => + cOttaa ottaa (ota + "n") (ot + "in") (ott + "i") ; + _ + ("ella" | "ellä") => + cKuunnella ottaa otin ; + _ + ("osta" | "östä") => + cJuosta ottaa (init ott + "ksen") ; + _ + ("st" | "nn" | "ll" | "rr") + ("a" | "ä") => + cJuosta ottaa (ott + "en") ; + _ + ("ita" | "itä") => + cHarkita ottaa ; + _ + ("eta" | "etä" | "ota" | "ata" | "uta" | "ytä" | "ätä" | "ötä") => + cPudota ottaa (strongGrade ott + "si") ; + _ + ("da" | "dä") => + cJuoda ottaa ; + _ => Predef.error (["expected infinitive, found"] ++ ottaa) + } ; + + vForms2 : (_,_ : Str) -> VForms = \huutaa,huusi -> + let + huuda = weakGrade (init huutaa) ; + huusin = weakGrade huusi + "n" ; + autoin = weakGrade (init huusi) + "in" ; + in + case of { + <_ + ("taa" | "tää"), _ + ("oi" | "öi")> => + cOttaa huutaa (huuda + "n") autoin huusi ; + <_ + ("aa" | "ää"), _ + "i"> => + cOttaa huutaa (huuda + "n") huusin huusi ; + <_ + ("eta" | "etä"), _ + "eni"> => + cValjeta huutaa huusi ; + <_ + ("sta" | "stä"), _ + "si"> => + vForms1 huutaa ; -- pestä, halkaista + <_ + ("ta" | "tä"), _ + "si"> => + cPudota huutaa huusi ; + <_ + ("lla" | "llä"), _ + "li"> => + cKuunnella huutaa huusin ; + _ => vForms1 huutaa + } ; + + + + caseV c v = {s = v.s ; sc = NPCase c ; qp = v.qp ; lock_V = <> ; p = v.p} ; + + vOlla = verbOlla ** {sc = NPCase Nom ; qp = True ; lock_V = <> ; p = []} ; ---- lieneekö + + mk2V2 : V -> Prep -> V2 = \v,c -> v ** {c2 = c ; lock_V2 = <>} ; + caseV2 : V -> Case -> V2 = \v,c -> mk2V2 v (casePrep c) ; + dirV2 v = mk2V2 v accPrep ; + + mkAdv = overload { + mkAdv : Str -> Adv = \s -> {s = s ; lock_Adv = <>} ; + mkAdv : AdvK -> Adv = \s -> {s = s.s ; lock_Adv = <>} ; + } ; + + mkV2 = overload { + mkV2 : Str -> V2 = \s -> dirV2 (mk1V s) ; + mkV2 : V -> V2 = dirV2 ; + mkV2 : V -> Case -> V2 = caseV2 ; + mkV2 : V -> Prep -> V2 = mk2V2 ; + mkV2 : VK -> V2 = \w -> dirV2 (vforms2V w.s ** {sc = NPCase Nom ; lock_V = <> ; p = []}) ; + } ; + + mk2V2 : V -> Prep -> V2 ; + caseV2 : V -> Case -> V2 ; + dirV2 : V -> V2 ; + + mkV3 v p q = v ** {c2 = p ; c3 = q ; lock_V3 = <>} ; + dirV3 v p = mkV3 v accPrep (casePrep p) ; + dirdirV3 v = dirV3 v allative ; + + mkVS v = v ** {lock_VS = <>} ; + mkVV v = mkVVf v infFirst ; + mkVVf v f = v ** {vi = f ; lock_VV = <>} ; + mkVQ v = v ** {lock_VQ = <>} ; + + V0 : Type = V ; + AS, A2S, AV : Type = A ; + A2V : Type = A2 ; + + mkV0 v = v ** {lock_V = <>} ; + mkV2S v p = mk2V2 v p ** {lock_V2S = <>} ; + mkV2V v p = mkV2Vf v p infIllat ; + mkV2Vf v p f = mk2V2 v p ** {vi = f ; lock_V2V = <>} ; + + mkVA v p = v ** {c2 = p ; lock_VA = <>} ; + mkV2A v p q = v ** {c2 = p ; c3 = q ; lock_V2A = <>} ; + mkV2Q v p = mk2V2 v p ** {lock_V2Q = <>} ; + + mkAS v = v ** {lock_A = <>} ; + mkA2S v p = mkA2 v p ** {lock_A = <>} ; + mkAV v = v ** {lock_A = <>} ; + mkA2V v p = mkA2 v p ** {lock_A2 = <>} ; + +} ; diff --git a/lib/src/finnish/stemmed/StemFin.gf b/lib/src/finnish/stemmed/StemFin.gf new file mode 100644 index 000000000..768da6dbd --- /dev/null +++ b/lib/src/finnish/stemmed/StemFin.gf @@ -0,0 +1,132 @@ +-- stem-based morphology, with just the minimum number of concatenative stems + +resource StemFin = open MorphoFin, Prelude in { + +flags coding = utf8 ; + +oper + SNForm : Type = Predef.Ints 9 ; + SNoun : Type = {s : SNForm => Str ; h : Harmony} ; +-- SVerb : Type ; + + nforms2snoun : NForms -> SNoun = \nf -> { + s = table { + 0 => nf ! 0 ; -- ukko + 1 => Predef.tk 1 (nf ! 1) ; -- uko(n) + 2 => nf ! 2 ; -- ukkoa + 3 => nf ! 3 ; -- ukkona + 4 => Predef.tk 1 (nf ! 4) ; -- ukkoo(n) + 5 => Predef.tk 1 (nf ! 5) ; -- ukkoje(n) + 6 => nf ! 6 ; -- ukkoja + 7 => Predef.tk 2 (nf ! 7) ; -- ukkoi(na) + 8 => Predef.tk 3 (nf ! 8) ; -- ukoi(ssa) + 9 => Predef.tk 1 (nf ! 9) -- ukkoihi(n) + } ; + h = aHarmony (last (nf ! 2)) ; + } ; + + snoun2nounBind : SNoun -> Noun = snoun2noun True ; + snoun2nounSep : SNoun -> Noun = snoun2noun False ; + + snoun2noun : Bool -> SNoun -> Noun = \b,sn -> + let + plus : Str -> Str -> Str = \x,y -> case b of { + True => x + y ; + False => glue x y + } ; + + f = sn.s ; + + ukko = f ! 0 ; + uko = f ! 1 ; + ukkoa = f ! 2 ; + ukkona = f ! 3 ; + ukkoo = f ! 4 ; + ukkoje = f ! 5 ; + ukkoja = f ! 6 ; + ukkoi = f ! 7 ; + ukoi = f ! 8 ; + ukkoihi = f ! 9 ; + + a = harmonyA sn.h ; + in + + {s = table { + NCase Sg Nom => ukko ; + NCase Sg Gen => plus uko "n" ; + NCase Sg Part => ukkoa ; + NCase Sg Transl => plus uko "ksi" ; + NCase Sg Ess => ukkona ; + NCase Sg Iness => plus uko ("ss" + a) ; + NCase Sg Elat => plus uko ("st" + a) ; + NCase Sg Illat => plus ukkoo "n" ; + NCase Sg Adess => plus uko ("ll" + a) ; + NCase Sg Ablat => plus uko ("lt" + a) ; + NCase Sg Allat => plus uko "lle" ; + NCase Sg Abess => plus uko ("tt" + a) ; + + NCase Pl Nom => plus uko "t" ; + NCase Pl Gen => plus ukkoje "n" ; + NCase Pl Part => ukkoja ; + NCase Pl Transl => plus ukoi "ksi" ; + NCase Pl Ess => plus ukkoi ("n" + a) ; + NCase Pl Iness => plus ukoi ("ss" + a) ; + NCase Pl Elat => plus ukoi ("st" + a) ; + NCase Pl Illat => plus ukkoihi "n" ; + NCase Pl Adess => plus ukoi ("ll" + a) ; + NCase Pl Ablat => plus ukoi ("lt" + a) ; + NCase Pl Allat => plus ukoi "lle" ; + NCase Pl Abess => plus ukoi ("tt" + a) ; + + NComit => plus ukkoi "ne" ; + NInstruct => plus ukoi "n" ; + + NPossNom _ => ukko ; + NPossGen Sg => ukko ; + NPossGen Pl => ukkoje ; + NPossTransl Sg => plus uko "kse" ; + NPossTransl Pl => plus ukoi "kse" ; + NPossIllat Sg => ukkoo ; + NPossIllat Pl => ukkoihi + } ; + h = sn.h ; + lock_N = <> + } ; + + + snoun2np : Number -> SNoun -> NPForm => Str = \n,sn -> + \\c => (snoun2nounSep sn).s ! NCase n (npform2case n c) ; + + noun2snoun : Noun -> SNoun = \n -> nforms2snoun (n2nforms n) ; + + aHarmony : Str -> Harmony = \a -> case a of + {"a" => Back ; _ => Front} ; + + harmonyA : Harmony -> Str = \h -> case h of + {Back => "a" ; Front => "ä"} ; + + +-- Adjectives + +param + SAForm = SAN SNForm | SAAdv ; + +oper + SAdj = {s : SAForm => Str ; h : Harmony} ; + + snoun2sadj : SNoun -> SAdj = snoun2sadjComp True ; + + snoun2sadjComp : Bool -> SNoun -> SAdj = \isPos,tuore -> + let + tuoree = tuore.s ! 1 ; + tuoreesti = tuoree + "sti" ; + tuoreemmin = init tuoree ; + in {s = table { + SAN f => tuore.s ! f ; + SAAdv => if_then_Str isPos tuoreesti tuoreemmin + } ; + h = tuore.h + } ; + + +} \ No newline at end of file diff --git a/lib/src/finnish/stemmed/StructuralFin.gf b/lib/src/finnish/stemmed/StructuralFin.gf new file mode 100644 index 000000000..a07158bd8 --- /dev/null +++ b/lib/src/finnish/stemmed/StructuralFin.gf @@ -0,0 +1,315 @@ +concrete StructuralFin of Structural = CatFin ** + open MorphoFin, ParadigmsFin, (X = ConstructX), MakeStructuralFin, StemFin, Prelude in { + + flags optimize=all ; + + lin + above_Prep = postGenPrep "yläpuolella" ; + after_Prep = postGenPrep "jälkeen" ; + + all_Predet = {s = \\n,c => + let + kaiket = caseTable n (snoun2nounBind (mkN "kaikki" "kaiken" "kaikkena")) + in + case npform2case n c of { + Nom => "kaikki" ; + k => kaiket ! k + } + } ; + almost_AdA, almost_AdN = ss "melkein" ; + although_Subj = ss "vaikka" ; + always_AdV = ss "aina" ; + and_Conj = {s1 = [] ; s2 = "ja" ; n = Pl} ; + because_Subj = ss "koska" ; + before_Prep = prePrep partitive "ennen" ; + behind_Prep = postGenPrep "takana" ; + between_Prep = postGenPrep "välissä" ; + both7and_DConj = sd2 "sekä" "että" ** {n = Pl} ; + but_PConj = ss "mutta" ; + by8agent_Prep = postGenPrep "toimesta" ; + by8means_Prep = casePrep adessive ; + can8know_VV = mkVV (mkV "osata" "osasi") ; + can_VV = mkVV (mkV "voida" "voi") ; + during_Prep = postGenPrep "aikana" ; + either7or_DConj = sd2 "joko" "tai" ** {n = Sg} ; + everybody_NP = makeNP (snoun2nounBind (mkN "jokainen")) Sg ; + every_Det = mkDet Sg (snoun2nounBind (mkN "jokainen")) ; + everything_NP = makeNP (((snoun2nounBind (mkN "kaikki" "kaiken" "kaikkena"))) ** + {lock_N = <>}) Sg ; + everywhere_Adv = ss "kaikkialla" ; + few_Det = mkDet Sg (snoun2nounBind (mkN "harva")) ; +--- first_Ord = {s = \\n,c => (mkN "ensimmäinen").s ! NCase n c} ; + for_Prep = casePrep allative ; + from_Prep = casePrep elative ; + he_Pron = mkPronoun "hän" "hänen" "häntä" "hänenä" "häneen" Sg P3 ; + here_Adv = ss "täällä" ; + here7to_Adv = ss "tänne" ; + here7from_Adv = ss "täältä" ; + how_IAdv = ss "miten" ; + how8much_IAdv = ss "kuinka paljon" ; + how8many_IDet = + {s = \\c => "kuinka" ++ (snoun2nounBind (mkN "moni" "monia")).s ! NCase Sg c ; n = Sg ; isNum = False} ; + if_Subj = ss "jos" ; + in8front_Prep = postGenPrep "edessä" ; + i_Pron = mkPronoun "minä" "minun" "minua" "minuna" "minuun" Sg P1 ; + in_Prep = casePrep inessive ; + it_Pron = { + s = \\c => pronSe.s ! npform2case Sg c ; + a = agrP3 Sg ; + isPron = False + } ; + less_CAdv = X.mkCAdv "vähemmän" "kuin" ; + many_Det = mkDet Sg (snoun2nounBind (mkN "moni" "monia")) ; + more_CAdv = X.mkCAdv "enemmän" "kuin" ; + most_Predet = {s = \\n,c => (nForms2N (dSuurin "useinta")).s ! NCase n (npform2case n c)} ; + much_Det = mkDet Sg {s = \\_ => "paljon" ; h = Back} ; --Harmony not relevant, it's just a CommonNoun + must_VV = mkVV (caseV genitive (mkV "täytyä")) ; + no_Utt = ss "ei" ; + on_Prep = casePrep adessive ; +--- one_Quant = mkDet Sg DEPREC + only_Predet = {s = \\_,_ => "vain"} ; + or_Conj = {s1 = [] ; s2 = "tai" ; n = Sg} ; + otherwise_PConj = ss "muuten" ; + part_Prep = casePrep partitive ; + please_Voc = ss ["ole hyvä"] ; --- number + possess_Prep = casePrep genitive ; + quite_Adv = ss "melko" ; + she_Pron = mkPronoun "hän" "hänen" "häntä" "hänenä" "häneen" Sg P3 ; + so_AdA = ss "niin" ; + somebody_NP = { + s = \\c => jokuPron ! Sg ! npform2case Sg c ; + a = agrP3 Sg ; + isPron = False ; isNeg = False + } ; + someSg_Det = heavyDet { + s1 = jokuPron ! Sg ; + s2 = \\_ => [] ; + isNum,isPoss = False ; isDef = True ; isNeg = False ; n = Sg + } ; + somePl_Det = heavyDet { + s1 = jokuPron ! Pl ; + s2 = \\_ => [] ; isNum,isPoss = False ; isNeg = False ; isDef = True ; + n = Pl ; isNeg = False + } ; + something_NP = { + s = \\c => jokinPron ! Sg ! npform2case Sg c ; + a = agrP3 Sg ; + isPron = False ; isNeg = False ; isNeg = False + } ; + somewhere_Adv = ss "jossain" ; + that_Quant = heavyQuant { + s1 = table (MorphoFin.Number) { + Sg => table (MorphoFin.Case) { + c => (mkPronoun "tuo" "tuon" "tuota" "tuona" "tuohon" Sg P3).s ! NPCase c + } ; + Pl => table (MorphoFin.Case) { + c => (mkPronoun "nuo" "noiden" "noita" "noina" "noihin" Sg P3).s ! NPCase c + } + } ; + s2 = \\_ => [] ; isNum,isPoss = False ; isDef = True ; isNeg = False + } ; + that_Subj = ss "että" ; + there_Adv = ss "siellä" ; --- tuolla + there7to_Adv = ss "sinne" ; + there7from_Adv = ss "sieltä" ; + therefore_PConj = ss "siksi" ; + they_Pron = mkPronoun "he" "heidän" "heitä" "heinä" "heihin" Pl P3 ; --- ne + this_Quant = heavyQuant { + s1 = table (MorphoFin.Number) { + Sg => table (MorphoFin.Case) { + c => (mkPronoun "tämä" "tämän" "tätä" "tänä" "tähän" Sg P3).s ! NPCase c + } ; + Pl => table (MorphoFin.Case) { + c => (mkPronoun "nämä" "näiden" "näitä" "näinä" "näihin" Sg P3).s ! NPCase c + } + } ; + s2 = \\_ => [] ; isNum,isPoss = False ; isDef = True ; isNeg = False + } ; + through_Prep = postGenPrep "kautta" ; + too_AdA = ss "liian" ; + to_Prep = casePrep illative ; --- allative + under_Prep = postGenPrep "alla" ; + very_AdA = ss "erittäin" ; + want_VV = mkVV (mkV "tahtoa") ; + we_Pron = mkPronoun "me" "meidän" "meitä" "meinä" "meihin" Pl P1 ; + whatPl_IP = { + s = table {NPAcc => "mitkä" ; c => mikaInt ! Pl ! npform2case Pl c} ; + n = Pl + } ; + whatSg_IP = { + s = \\c => mikaInt ! Sg ! npform2case Sg c ; + n = Sg + } ; + when_IAdv = ss "milloin" ; + when_Subj = ss "kun" ; + where_IAdv = ss "missä" ; + which_IQuant = { + s = mikaInt + } ; + whoSg_IP = { + s = table {NPAcc => "kenet" ; c => kukaInt ! Sg ! npform2case Sg c} ; + n = Sg + } ; + whoPl_IP = { + s = table {NPAcc => "ketkä" ; c => kukaInt ! Pl ! npform2case Pl c} ; + n = Pl + } ; + why_IAdv = ss "miksi" ; + without_Prep = prePrep partitive "ilman" ; + with_Prep = postGenPrep "kanssa" ; + yes_Utt = ss "kyllä" ; + youSg_Pron = mkPronoun "sinä" "sinun" "sinua" "sinuna" "sinuun" Sg P2 ; + youPl_Pron = mkPronoun "te" "teidän" "teitä" "teinä" "teihin" Pl P2 ; + youPol_Pron = + let p = mkPronoun "te" "teidän" "teitä" "teinä" "teihin" Pl P2 in + {s = p.s ; a = AgPol} ; + +oper + jokuPron : MorphoFin.Number => (MorphoFin.Case) => Str = + let + kui = snoun2nounBind (mkN "kuu") + in + table { + Sg => table { + Nom => "joku" ; + Gen => "jonkun" ; + c => relPron ! Sg ! c + "ku" + Predef.drop 3 (kui.s ! NCase Sg c) + } ; + Pl => table { + Nom => "jotkut" ; + c => relPron ! Pl ! c + kui.s ! NCase Pl c + } + } ; + + jokinPron : MorphoFin.Number => (MorphoFin.Case) => Str = + table { + Sg => table { + Nom => "jokin" ; + Gen => "jonkin" ; + c => relPron ! Sg ! c + "kin" + } ; + Pl => table { + Nom => "jotkin" ; + c => relPron ! Pl ! c + "kin" + } + } ; + + mikaInt : MorphoFin.Number => (MorphoFin.Case) => Str = + let { + mi = snoun2nounBind (mkN "mi") + } in + table { + Sg => table { + Nom => "mikä" ; + Gen => "minkä" ; + Part => "mitä" ; + Illat => "mihin" ; + c => mi.s ! NCase Sg c + } ; + Pl => table { + Nom => "mitkä" ; + Gen => "minkä" ; + Part => "mitä" ; + Illat => "mihin" ; + c => mi.s ! NCase Sg c + } + } ; + + kukaInt : MorphoFin.Number => (MorphoFin.Case) => Str = + let + kuka = snoun2nounBind (mkN "kuka" "kenen" "ketä" "kenä" "keneen" + "keiden" "keitä" "keinä" "keissä" "keihin") ; + in + table { + Sg => table { + c => kuka.s ! NCase Sg c + } ; + Pl => table { + Nom => "ketkä" ; + c => kuka.s ! NCase Pl c + } + } ; + mikaanPron : MorphoFin.Number => (MorphoFin.Case) => Str = \\n,c => + case of { + => "mikään" ; + <_,Part> => "mitään" ; + => "minkään" ; + => "mitkään" ; + => "mittenkään" ; + <_,Ess> => "minään" ; + <_,Iness> => "missään" ; + <_,Elat> => "mistään" ; + <_,Adess> => "millään" ; + <_,Ablat> => "miltään" ; + _ => mikaInt ! n ! c + "kään" + } ; + + kukaanPron : MorphoFin.Number => (MorphoFin.Case) => Str = + table { + Sg => table { + Nom => "kukaan" ; + Part => "ketään" ; + Ess => "kenään" ; + Iness => "kessään" ; + Elat => "kestään" ; + Illat => "kehenkään" ; + Adess => "kellään" ; + Ablat => "keltään" ; + c => kukaInt ! Sg ! c + "kään" + } ; + Pl => table { + Nom => "ketkään" ; + Part => "keitään" ; + Ess => "keinään" ; + Iness => "keissään" ; + Elat => "keistään" ; + Adess => "keillään" ; + Ablat => "keiltään" ; + c => kukaInt ! Pl ! c + "kään" + } + } ; + + +oper + makeNP : N -> MorphoFin.Number -> CatFin.NP ; + makeNP noun num = { + s = \\c => noun.s ! NCase num (npform2case num c) ; + a = agrP3 num ; + isPron, isNeg = False ; + lock_NP = <> + } ; + +lin + not_Predet = {s = \\_,_ => "ei"} ; + + no_Quant = heavyQuant { + s1 = \\n,c => mikaanPron ! n ! c ; -- requires negative or question polarity + s2 = \\_ => [] ; isNum,isPoss = False ; isDef = True ; isNeg = True + } ; + + if_then_Conj = {s1 = "jos" ; s2 = "niin" ; n = Sg} ; + nobody_NP = { + s = \\c => kukaanPron ! Sg ! npform2case Sg c ; -- requires negative or question polarity + a = agrP3 Sg ; + isPron = False ; isNeg = True + } ; + + nothing_NP = { + s = \\c => mikaanPron ! Sg ! npform2case Sg c ; --- requires negative or question polarity + a = agrP3 Sg ; + isPron = False ; isNeg = True + } ; + + at_least_AdN = ss "vähintään" ; + at_most_AdN = ss "enintään" ; + + as_CAdv = X.mkCAdv "yhtä" "kuin" ; + + except_Prep = postPrep partitive "lukuunottamatta" ; + + have_V2 = mkV2 (caseV adessive vOlla) ; + + lin language_title_Utt = ss "suomi" ; + +} + diff --git a/lib/src/finnish/stemmed/VerbFin.gf b/lib/src/finnish/stemmed/VerbFin.gf new file mode 100644 index 000000000..014b72c1b --- /dev/null +++ b/lib/src/finnish/stemmed/VerbFin.gf @@ -0,0 +1,145 @@ +--1 Verb Phrases in Finnish + +concrete VerbFin of Verb = CatFin ** open Prelude, ResFin in { + + flags optimize=all_subs ; + + lin + UseV = predV ; + + SlashV2a v = predV v ** {c2 = v.c2} ; + + Slash2V3 v np = + insertObj + (\\fin,b,_ => appCompl fin b v.c2 np) (predV v) ** {c2 = v.c3} ; + Slash3V3 v np = + insertObj + (\\fin,b,_ => appCompl fin b v.c3 np) (predV v) ** {c2 = v.c2} ; + + ComplVV v vp = + insertObj + (\\_,b,a => infVP v.sc b a vp v.vi) + (predV {s = v.s ; + sc = case vp.sc of { + NPCase Nom => v.sc ; -- minun täytyy pestä auto + c => c -- minulla täytyy olla auto + } ; + qp = v.qp ; p = v.p + } + ) ; + + ComplVS v s = insertExtrapos (etta_Conj ++ s.s) (predV v) ; + ComplVQ v q = insertExtrapos ( q.s) (predV v) ; + ComplVA v ap = + insertObj + (\\_,b,agr => + let n = (complNumAgr agr) in + ap.s ! False ! (NCase n (npform2case n v.c2.c))) --- v.cs.s ignored + (predV v) ; + + SlashV2S v s = + insertExtrapos (etta_Conj ++ s.s) (predV v) ** {c2 = v.c2} ; + SlashV2Q v q = + insertExtrapos (q.s) (predV v) ** {c2 = v.c2} ; + SlashV2V v vp = + insertObj (\\_,b,a => infVP v.sc b a vp v.vi) (predV v) ** {c2 = v.c2} ; + ---- different infinitives + SlashV2A v ap = + insertObj + (\\fin,b,_ => + ap.s ! False ! (NCase Sg (npform2case Sg v.c3.c))) ----agr to obj + (predV v) ** {c2 = v.c2} ; + + ComplSlash vp np = insertObjPre np.isNeg (\\fin,b,_ => appCompl fin b vp.c2 np) vp ; + + UseComp comp = + insertObj (\\_,_ => comp.s) (predV (verbOlla ** {sc = NPCase Nom ; qp = True ; p = []})) ; + + UseCopula = predV (verbOlla ** {sc = NPCase Nom ; qp = True ; p = []}) ; + + SlashVV v vp = + insertObj + (\\_,b,a => infVP v.sc b a vp v.vi) + (predV {s = v.s ; + sc = case vp.sc of { + NPCase Nom => v.sc ; -- minun täytyy pestä auto + c => c -- minulla täytyy olla auto + } ; + qp = v.qp ; p = v.p + } + ) ** {c2 = vp.c2} ; ---- correct ?? +{---- 153543936 (210912,312) + SlashV2VNP v np vp = + insertObjPre np.isNeg + (\\fin,b,a => appCompl True b v.c2 np ++ ---- fin -> stack overflow + infVP v.sc b a vp v.vi) + (predV v) ** {c2 = vp.c2} ; +----} + + AdvVP vp adv = insertAdv (\\_ => adv.s) vp ; + + AdVVP adv vp = insertAdv (\\_ => adv.s) vp ; + + AdvVPSlash vps adv = insertAdv (\\_ => adv.s) vps ** {c2 = vps.c2} ; + + AdVVPSlash adv vps = insertAdv (\\_ => adv.s) vps ** {c2 = vps.c2} ; + + ReflVP v = insertObjPre False (\\fin,b,agr => appCompl fin b v.c2 (reflPron agr)) v ; + + PassV2 v = let vp = predV v in { + s = \\_ => vp.s ! VIPass ; + s2 = \\_,_,_ => [] ; + adv = \\_ => [] ; + ext = [] ; + qp = v.qp ; + isNeg = False ; + sc = v.c2.c -- minut valitaan ; minua rakastetaan ; minulle kuiskataan + } ; ---- talon valitaan: should be marked like inf. + +----b UseVS, UseVQ = \v -> v ** {c2 = {s = [] ; c = NPAcc ; isPre = True}} ; + + CompAP ap = { + s = \\agr => + let + n = complNumAgr agr ; + c = case n of { + Sg => Nom ; -- minä olen iso ; te olette iso + Pl => Part -- me olemme isoja ; te olette isoja + } --- definiteness of NP ? + in ap.s ! False ! (NCase n c) + } ; + CompCN cn = { + s = \\agr => + let + n = complNumAgr agr ; + c = case n of { + Sg => Nom ; -- minä olen iso ; te olette iso + Pl => Part -- me olemme isoja ; te olette isoja + } --- definiteness of NP ? + in cn.s ! (NCase n c) + } ; + CompNP np = {s = \\_ => np.s ! NPCase Nom} ; + CompAdv a = {s = \\_ => a.s} ; + + VPSlashPrep vp prep = vp ** {c2 = prep} ; +} + + +--2 The object case +-- +-- The rules involved are ComplV2 and ComplVV above. +-- The work is done jointly in ResFin.infVP and appCompl. +-- Cases to test: l -table (to see negated forms) +--``` +-- minun täytyy ostaa auto +-- PredVP (UsePron i_Pron) (ComplVV must_VV +-- (ComplV2 buy_V2 (DetCN (DetSg (SgQuant DefArt) NoOrd) (UseN car_N)))) +-- minä tahdon ostaa auton +-- PredVP (UsePron i_Pron) (ComplVV want_VV +-- (ComplV2 buy_V2 (DetCN (DetSg (SgQuant DefArt) NoOrd) (UseN car_N)))) +-- minulla täytyy olla auto +-- PredVP (UsePron i_Pron) (ComplVV must_VV +-- (ComplV2 have_V2 (DetCN (DetSg (SgQuant DefArt) NoOrd) (UseN car_N)))) +--``` +-- Unfortunately, there is no nice way to say "I want to have a car". +-- (Other than the paraphrases "I want a car" or "I want to own a car".)