From 8d4bde1787901352adb5d551d0e420c78d947568 Mon Sep 17 00:00:00 2001 From: aarne Date: Sat, 30 Mar 2013 21:52:25 +0000 Subject: [PATCH] a separate compound form for Fin nouns, needed e.g. for nainen->nais --- lib/src/finnish/MorphoFin.gf | 38 +++++++++++++++++++----- lib/src/finnish/ResFin.gf | 6 ++-- lib/src/finnish/stemmed/ParseFin.gf | 4 +-- lib/src/finnish/stemmed/StemFin.gf | 46 +++++++++++++++++------------ lib/src/finnish/stemmed/log.txt | 14 ++++++++- 5 files changed, 76 insertions(+), 32 deletions(-) diff --git a/lib/src/finnish/MorphoFin.gf b/lib/src/finnish/MorphoFin.gf index 02ea8f77f..b2dae68bf 100644 --- a/lib/src/finnish/MorphoFin.gf +++ b/lib/src/finnish/MorphoFin.gf @@ -29,10 +29,10 @@ resource MorphoFin = ResFin ** open Prelude in { let a = vowHarmony nainen ; nais = Predef.tk 3 nainen + "s" - in nForms10 + in nForms11 nainen (nais + "en") (nais + "t" + a) (nais + "en" + a) (nais + "een") (nais + "ten") (nais + "i" + a) - (nais + "in" + a) (nais + "iss" + a) (nais + "iin") ; + (nais + "in" + a) (nais + "iss" + a) (nais + "iin") nais ; dPaluu : Str -> NForms = \paluu -> let @@ -348,7 +348,7 @@ resource MorphoFin = ResFin ** open Prelude in { -- the maximal set of technical stems - NForms : Type = Predef.Ints 9 => Str ; + NForms : Type = Predef.Ints 10 => Str ; nForms10 : (x1,_,_,_,_,_,_,_,_,x10 : Str) -> NForms = \Ukko,ukon,ukkoa,ukkona,ukkoon, @@ -362,7 +362,24 @@ resource MorphoFin = ResFin ** open Prelude in { 6 => ukkoja ; 7 => ukkoina ; 8 => ukoissa ; - 9 => ukkoihin + 9 => ukkoihin ; + 10 => Ukko + } ; + + nForms11 : (x1,_,_,_,_,_,_,_,_,_,x11 : Str) -> NForms = + \Ukko,ukon,ukkoa,ukkona,ukkoon, + ukkojen,ukkoja,ukkoina,ukoissa,ukkoihin,ukko_ -> table { + 0 => Ukko ; + 1 => ukon ; + 2 => ukkoa ; + 3 => ukkona ; + 4 => ukkoon ; + 5 => ukkojen ; + 6 => ukkoja ; + 7 => ukkoina ; + 8 => ukoissa ; + 9 => ukkoihin ; + 10 => ukko_ -- the compound form, e.g. nais- } ; Noun = {s : NForm => Str; h : Harmony; lock_N : {}} ; @@ -373,12 +390,14 @@ resource MorphoFin = ResFin ** open Prelude in { ukon = f ! 1 ; ukkoa = f ! 2 ; ukkona = f ! 3 ; + ukko_ = Predef.tk 2 ukkona ; ukkoon = f ! 4 ; ukkojen = f ! 5 ; ukkoja = f ! 6 ; ukkoina = f ! 7 ; ukoissa = f ! 8 ; ukkoihin = f ! 9 ; + ukkos_ = f ! 10 ; a = last ukkoja ; uko = init ukon ; ukko = Predef.tk 2 ukkona ; @@ -417,13 +436,15 @@ resource MorphoFin = ResFin ** open Prelude in { NComit => ukkoi + "ne" ; NInstruct => ukoi + "n" ; - NPossNom _ => ukko ; - NPossGen Sg => ukko ; + NPossNom _ => ukko_ ; + NPossGen Sg => ukko_ ; NPossGen Pl => init ukkojen ; NPossTransl Sg => uko + "kse" ; NPossTransl Pl => ukoi + "kse" ; NPossIllat Sg => init ukkoon ; - NPossIllat Pl => init ukkoihin + NPossIllat Pl => init ukkoihin ; + + NCompound => ukkos_ } ; h = harmony ; lock_N = <> @@ -439,7 +460,8 @@ resource MorphoFin = ResFin ** open Prelude in { 6 => ukko.s ! NCase Pl Part ; 7 => ukko.s ! NCase Pl Ess ; 8 => ukko.s ! NCase Pl Iness ; - 9 => ukko.s ! NCase Pl Illat + 9 => ukko.s ! NCase Pl Illat ; + 10 => ukko.s ! NCompound } ; -- Adjective forms diff --git a/lib/src/finnish/ResFin.gf b/lib/src/finnish/ResFin.gf index 89a2a9cc9..a58ac7082 100644 --- a/lib/src/finnish/ResFin.gf +++ b/lib/src/finnish/ResFin.gf @@ -24,7 +24,8 @@ resource ResFin = ParamX ** open Prelude in { NForm = NCase Number Case | NComit | NInstruct -- no number dist | NPossNom Number | NPossGen Number --- number needed for syntax of AdjCN - | NPossTransl Number | NPossIllat Number ; + | NPossTransl Number | NPossIllat Number + | NCompound ; -- special compound form, e.g. "nais" -- Agreement of $NP$ has number*person and the polite second ("te olette valmis"). @@ -663,7 +664,8 @@ oper NPossTransl Sg => vede + "kse" ; NPossTransl Pl => vesii + "kse" ; NPossIllat Sg => Predef.tk 1 veteen ; - NPossIllat Pl => Predef.tk 1 vesiin + NPossIllat Pl => Predef.tk 1 vesiin ; + NCompound => vesi } ; h = harmony } ; diff --git a/lib/src/finnish/stemmed/ParseFin.gf b/lib/src/finnish/stemmed/ParseFin.gf index 8cbd49753..b9d823008 100644 --- a/lib/src/finnish/stemmed/ParseFin.gf +++ b/lib/src/finnish/stemmed/ParseFin.gf @@ -50,7 +50,7 @@ lin CompoundCN num noun cn = { - s = \\nf => num.s ! Sg ! Nom ++ noun.s ! 0 ++ BIND ++ cn.s ! nf ; + s = \\nf => num.s ! Sg ! Nom ++ noun.s ! 10 ++ BIND ++ cn.s ! nf ; h = cn.h } ; @@ -72,7 +72,7 @@ oper lin DashCN noun1 noun2 = { - s = \\nf => noun1.s ! 0 ++ BIND ++ noun2.s ! nf ; + s = \\nf => noun1.s ! 10 ++ BIND ++ noun2.s ! nf ; h = noun2.h } ; diff --git a/lib/src/finnish/stemmed/StemFin.gf b/lib/src/finnish/stemmed/StemFin.gf index 7d13862be..0ac72f86a 100644 --- a/lib/src/finnish/stemmed/StemFin.gf +++ b/lib/src/finnish/stemmed/StemFin.gf @@ -5,7 +5,7 @@ resource StemFin = open MorphoFin, Prelude in { flags coding = utf8 ; oper - SNForm : Type = Predef.Ints 9 ; + SNForm : Type = Predef.Ints 10 ; SNoun : Type = {s : SNForm => Str ; h : Harmony} ; nforms2snoun : NForms -> SNoun = \nf -> { @@ -13,13 +13,14 @@ oper 0 => nf ! 0 ; -- ukko 1 => Predef.tk 1 (nf ! 1) ; -- uko(n) 2 => nf ! 2 ; -- ukkoa - 3 => nf ! 3 ; -- ukkona + 3 => Predef.tk 2 (nf ! 3) ; -- ukkona 4 => Predef.tk 1 (nf ! 4) ; -- ukkoo(n) 5 => Predef.tk 1 (nf ! 5) ; -- ukkoje(n) 6 => nf ! 6 ; -- ukkoja 7 => Predef.tk 2 (nf ! 7) ; -- ukkoi(na) 8 => Predef.tk 3 (nf ! 8) ; -- ukoi(ssa) - 9 => Predef.tk 1 (nf ! 9) -- ukkoihi(n) + 9 => Predef.tk 1 (nf ! 9) ; -- ukkoihi(n) + 10 => nf ! 10 -- ukko(-) } ; h = aHarmony (last (nf ! 2)) ; } ; @@ -35,14 +36,14 @@ oper ukko = f ! 0 ; uko = f ! 1 ; ukkoa = f ! 2 ; - ukkona = f ! 3 ; + ukko_ = f ! 3 ; ukkoo = f ! 4 ; ukkoje = f ! 5 ; ukkoja = f ! 6 ; ukkoi = f ! 7 ; ukoi = f ! 8 ; ukkoihi = f ! 9 ; - + ukkos_ = f ! 10 ; a = harmonyA sn.h ; in @@ -51,7 +52,7 @@ oper NCase Sg Gen => plus uko "n" ; NCase Sg Part => ukkoa ; NCase Sg Transl => plus uko "ksi" ; - NCase Sg Ess => ukkona ; + NCase Sg Ess => plus ukko_ ("n" + a) ; NCase Sg Iness => plus uko ("ss" + a) ; NCase Sg Elat => plus uko ("st" + a) ; NCase Sg Illat => plus ukkoo "n" ; @@ -76,13 +77,15 @@ oper NComit => plus ukkoi "ne" ; NInstruct => plus ukoi "n" ; - NPossNom _ => ukko ; - NPossGen Sg => ukko ; + NPossNom _ => ukko_ ; + NPossGen Sg => ukko_ ; NPossGen Pl => ukkoje ; NPossTransl Sg => plus uko "kse" ; NPossTransl Pl => plus ukoi "kse" ; NPossIllat Sg => ukkoo ; - NPossIllat Pl => ukkoihi + NPossIllat Pl => ukkoihi ; + + NCompound => ukkos_ } ; h = sn.h ; lock_N = <> @@ -200,7 +203,8 @@ oper plus tulle ("it" + a) ; plus tulle "i" ; plus tulle "i" ; - plus tulle "isii" + plus tulle "isii" ; + tullut ] ; h = sverb.h } ; @@ -216,7 +220,8 @@ oper plus tultu ("j" + a) ; plus tultu "i" ; plus tullu__ "i" ; - plus tultu "ihi" + plus tultu "ihi" ; + tultu ] ; h = sverb.h } ; @@ -284,13 +289,14 @@ oper 0 => partPlus teke "minen" ; 1 => partPlus teke "mise" ; 2 => partPlus teke "mista" ; ---- vh - 3 => partPlus teke "misena" ; ---- vh + 3 => partPlus teke "mise" ; 4 => partPlus teke "misee" ; 5 => partPlus teke "miste" ; 6 => partPlus teke "misia" ; ---- vh 7 => partPlus teke "misi" ; 8 => partPlus teke "misi" ; - 9 => partPlus teke "misii" + 9 => partPlus teke "misii" ; + 10 => partPlus teke "mis" } ; h = v.h } ; @@ -301,13 +307,14 @@ oper 0 => partPlus teke "va" ; 1 => partPlus teke "va" ; 2 => partPlus teke "vaa" ; ---- vh - 3 => partPlus teke "vana" ; ---- vh + 3 => partPlus teke "va" ; ---- vh 4 => partPlus teke "vaa" ; 5 => partPlus teke "vie" ; 6 => partPlus teke "via" ; ---- vh 7 => partPlus teke "vi" ; 8 => partPlus teke "vi" ; - 9 => partPlus teke "vii" + 9 => partPlus teke "vii" ; + 10 => partPlus teke "va" } ; h = v.h } ; @@ -334,20 +341,21 @@ oper 0 => "" ; 1 => "i" ; 2 => "ia" ; - 3 => "ina" ; + 3 => "i" ; 4 => "ii" ; 5 => "ie" ; 6 => "ia" ; 7 => "i" ; 8 => "i" ; - 9 => "ihi" + 9 => "ihi" ; + 10 => "" } ; bindIfS : SNForm -> Str = \c -> case c of { - 0 => [] ; + 0 | 10 => [] ; _ => BIND } ; bindColonIfS : SNForm -> Str = \c -> case c of { - 0 => [] ; + 0 | 10 => [] ; _ => BIND ++ ":" ++ BIND } ; diff --git a/lib/src/finnish/stemmed/log.txt b/lib/src/finnish/stemmed/log.txt index d93e5ec58..388b81016 100644 --- a/lib/src/finnish/stemmed/log.txt +++ b/lib/src/finnish/stemmed/log.txt @@ -99,7 +99,19 @@ really be needed. Fixed NounFin.IndefArt, which erroneously added "yksi" to the substantival form of numeral determiners. This changed 125 linearizations - but there are some mistaken parses of numbers in the treebank, in particular years. Also fixed the passive -VP in the infinitive form, to better results in 95 sentences - but this structure should be different in Finnish: +VP in the infinitive form, to better results in 95 sentences - but this structure should be different in Finnish. + +Fixing passive past tenses improved 250 sentences! Incredibly, they had been missing in the RGL. As well as the correct +form of the compounds: "minut ollaan nähty" -> "minut on nähty" ("I have been seen"). + +Fixed the form for NPossNom and NPossGen. It had been mistakenly the Nom form. This gave "rakkausnsa" ("his love"). +The proper form is the tk-2 prefix of the essive case: "rakkautensa"; the tk-1 genitive won't do ("rakkaudensa"). +This changed to the better 81 sentences. + +Added NCompound, or form nr 10, to nouns. This may differ from Nom Sg, e.g. käteinenvirtaus -> käteisvirtaus. 107 errors +corrected by this. + +