From 01f12a135b88850c28738e667b89eb34a1084f9a Mon Sep 17 00:00:00 2001 From: aarne Date: Fri, 29 Mar 2013 12:45:24 +0000 Subject: [PATCH] preliminary implementation of GerundN and GerundAP in ParseFin. More coverage, but some strange translations. --- lib/src/finnish/stemmed/ParadigmsFin.gf | 2 ++ lib/src/finnish/stemmed/ParseFin.gf | 13 ++----- lib/src/finnish/stemmed/StemFin.gf | 45 +++++++++++++++++++++++++ lib/src/finnish/stemmed/log.txt | 5 +++ 4 files changed, 54 insertions(+), 11 deletions(-) diff --git a/lib/src/finnish/stemmed/ParadigmsFin.gf b/lib/src/finnish/stemmed/ParadigmsFin.gf index 924143b3a..56a08dc4d 100644 --- a/lib/src/finnish/stemmed/ParadigmsFin.gf +++ b/lib/src/finnish/stemmed/ParadigmsFin.gf @@ -117,6 +117,7 @@ oper mkN : (pika : Str) -> (juna : N) -> N ; -- compound with invariable prefix mkN : (oma : N) -> (tunto : N) -> N ; -- compound with inflecting prefix mkN : NK -> N ; -- noun from DictFin (Kotus) + mkN : V -> N ; -- verbal noun: "tekeminen" } ; -- Some nouns are regular except for the singular nominative (e.g. "mies"). @@ -344,6 +345,7 @@ mkVS = overload { mkN : (sora : Str) -> (tie : N) -> N = mkStrN ; mkN : (oma,tunto : N) -> N = mkNN ; mkN : (sana : NK) -> N = \w -> nforms2snoun w.s ; + mkN : V -> N = \w -> sverb2snoun w ; } ; exceptNomN : N -> Str -> N = \noun,nom -> lin N { diff --git a/lib/src/finnish/stemmed/ParseFin.gf b/lib/src/finnish/stemmed/ParseFin.gf index 7092c21eb..a33daf663 100644 --- a/lib/src/finnish/stemmed/ParseFin.gf +++ b/lib/src/finnish/stemmed/ParseFin.gf @@ -86,18 +86,9 @@ lin isPron = np1.isPron ; isNeg = np1.isNeg } ; - -{- - GerundN v = { -- parsing -- GerundN : V -> N ; peseminen - s = \\n,c => v.s ! VPresPart ; - g = Neutr - } ; + GerundN v = mkN (lin V v) ; - GerundAP v = { -- beckoning -- V -> AP houkutteleva - s = \\agr => v.s ! VPresPart ; - isPre = True - } ; --} + GerundAP v = {s = \\_ => (snoun2nounSep (sverb2nounPresPartAct v)).s} ; OrdCompar a = snoun2nounSep {s = \\nc => a.s ! Compar ! SAN nc ; h = a.h} ; diff --git a/lib/src/finnish/stemmed/StemFin.gf b/lib/src/finnish/stemmed/StemFin.gf index 46b26d29c..4c1075b15 100644 --- a/lib/src/finnish/stemmed/StemFin.gf +++ b/lib/src/finnish/stemmed/StemFin.gf @@ -271,6 +271,51 @@ oper -- (Verb ** {sc : NPForm ; qp : Bool ; p : Str}) -> VP = \verb -> { +-- word formation functions + + sverb2snoun : SVerb1 -> SNoun = \v -> -- syöminen + let teke = v.s ! 3 in { + s = table { + 0 => partPlus teke "minen" ; + 1 => partPlus teke "mise" ; + 2 => partPlus teke "mista" ; ---- vh + 3 => partPlus teke "misena" ; ---- vh + 4 => partPlus teke "misee" ; + 5 => partPlus teke "miste" ; + 6 => partPlus teke "misia" ; ---- vh + 7 => partPlus teke "misi" ; + 8 => partPlus teke "misi" ; + 9 => partPlus teke "misii" + } ; + h = v.h + } ; + + sverb2nounPresPartAct : SVerb1 -> SNoun = \v -> -- syövä + let teke = v.s ! 3 in { + s = table { + 0 => partPlus teke "va" ; + 1 => partPlus teke "va" ; + 2 => partPlus teke "vaa" ; ---- vh + 3 => partPlus teke "vana" ; ---- vh + 4 => partPlus teke "vaa" ; + 5 => partPlus teke "vie" ; + 6 => partPlus teke "via" ; ---- vh + 7 => partPlus teke "vi" ; + 8 => partPlus teke "vi" ; + 9 => partPlus teke "vii" + } ; + h = v.h + } ; + + sverb2nounPresPartPass : SVerb1 -> SNoun = \v -> -- syötävä + let a = harmonyA v.h in + nforms2snoun (dLava (partPlus (v.s ! 3) (partPlus "t" (partPlus a (partPlus "v" a))))) ; + + dLava : Str -> NForms = \s -> dUkko s (s + "n") ; + + --- to use these at run time in ParseFin + partPlus = glue ; + -- auxiliary plusIf : Bool -> Str -> Str -> Str = \b,x,y -> case b of { diff --git a/lib/src/finnish/stemmed/log.txt b/lib/src/finnish/stemmed/log.txt index 8e31bdb7f..cde0b5ed9 100644 --- a/lib/src/finnish/stemmed/log.txt +++ b/lib/src/finnish/stemmed/log.txt @@ -22,4 +22,9 @@ are completely translated (but mostly not so well...) 317 no lin 182 lin with unknowns +After implementing GerundN and GerundNP, only 40 lin with unknowns. But the implementations are bad: +- applying to run-time V prevents correct vowel harmony +- composite forms with "minen" should be "mis", e.g. hinnoitteleminendetaljit + +