From 73976ec803b141c77ae95be841062115012199de Mon Sep 17 00:00:00 2001 From: aarne Date: Tue, 17 Mar 2015 15:04:28 +0000 Subject: [PATCH] started finnish/tagged/ as a variant of the RGL that uses external morphological analysis --- lib/src/finnish/tagged/StemFin.gf | 416 ++++++++++++++++++++++++++++++ lib/src/finnish/tagged/TagFin.gf | 37 +++ 2 files changed, 453 insertions(+) create mode 100644 lib/src/finnish/tagged/StemFin.gf create mode 100644 lib/src/finnish/tagged/TagFin.gf diff --git a/lib/src/finnish/tagged/StemFin.gf b/lib/src/finnish/tagged/StemFin.gf new file mode 100644 index 000000000..566ab85f9 --- /dev/null +++ b/lib/src/finnish/tagged/StemFin.gf @@ -0,0 +1,416 @@ +-- opers implementing the API of stem-based morphology in finnish/stemmed for tagged Finnish + +resource StemFin = open TagFin, MorphoFin, Prelude in { + +flags coding = utf8 ; + +oper + SNForm : Type = {} ; + SNoun : Type = {s : Str} ; + + nforms2snoun : NForms -> SNoun = \nfs => {s = nfs ! 0} ; + + snoun2nounBind : SNoun -> Noun = snoun2noun True ; + snoun2nounSep : SNoun -> Noun = snoun2noun False ; + + snoun2noun : Bool -> SNoun -> Noun = \b,sn -> {s = \\nf => sn.s ++ mkTag "N" + tagNForm nf ; h = Back} ; + + + snoun2np : Number -> SPN -> NPForm => Str = \n,sn -> + \\c => sn.s ! (npform2case n c) ; + +-- noun2snoun : Noun -> SNoun = \n -> n ; not needed for anything + + aHarmony : Str -> Harmony = \a -> case a of + {"a" => Back ; _ => Front} ; + + harmonyA : Harmony -> Str = harmonyV "a" "ä" ; + + harmonyV : Str -> Str -> Harmony -> Str = \u,y,h -> case h of + {Back => u ; Front => y} ; + + + SPN : Type = {s : Case => Str} ; + + snoun2spn : SNoun -> SPN = \n -> {s = \\c => n.s ! NCase Sg c} ; + + exceptNomSNoun : SNoun -> Str -> SNoun = \noun,nom -> { + s = table { + NCase Sg Nom => nom ; + f => noun.s ! f + } ; + h = noun.h + } ; + + + +-- Adjectives --- could be made more compact by pressing comparison forms down to a few + +oper + SAForm : Type = AForm ; + +oper + SAdj = {s : SAForm => Str ; h : Harmony} ; + + snoun2sadj : SNoun -> SAdj = snoun2sadjComp True ; + + snoun2sadjComp : Bool -> SNoun -> SAdj = \isPos,tuore -> + let + tuoree = init (tuore.s ! NCase Sg Gen) ; + tuoreesti = tuoree + "sti" ; + tuoreemmin = init tuoree ; + in {s = table { + AN f => tuore.s ! f ; + AAdv => if_then_Str isPos tuoreesti tuoreemmin + } ; + h = tuore.h + } ; + + sAN : SNForm -> SAForm = \n -> AN n ; ---- without eta exp gives internal error 6/8/2013 + sAAdv : SAForm = AAdv ; + sANGen : (SAForm => Str) -> Str = \a -> a ! AN (NCase Sg Gen) ; + + mkAdj : (hyva,parempi,paras : SNoun) -> (hyvin,paremmin,parhaiten : Str) -> {s : Degree => SAForm => Str ; h : Harmony} = \h,p,ps,hn,pn,ph -> { + s = table { + Posit => table { + AN nf => h.s ! nf ; + AAdv => hn + } ; + Compar => table { + AN nf => p.s ! nf ; + AAdv => pn + } ; + Superl => table { + AN nf => ps.s ! nf ; + AAdv => ph + } + } ; + h = h.h + } ; + + snoun2compar : SNoun -> Str = \n -> init (n.s ! NCase Sg Gen) + "mpi" ; ---- kivempi + snoun2superl : SNoun -> Str = \n -> n.s ! NInstruct ; ---- kivin vs. kivoin + +-- verbs + +oper + SVForm : Type = VForm ; + SVerb : Type = {s : SVForm => Str ; h : Harmony} ; + + ollaSVerbForms : SVForm => Str = verbOlla.s ; + + -- used in Cat + SVerb1 = {s : SVForm => Str ; sc : SubjCase ; h : Harmony ; p : Str} ; + + sverb2verbBind : SVerb -> Verb = sverb2verb True ; + sverb2verbSep : SVerb -> Verb = sverb2verb False ; + + vforms2sverb : VForms -> SVerb = \v -> + {s = (vforms2V v).s ; h = case (last (v ! 0)) of {"a" => Back ; _ => Front}} ; + + sverb2verb : Bool -> SVerb -> Verb = \b,sverb -> {s = sverb.s} ; + + predSV : SVerb1 -> VP = \sv -> + predV sv ; + -- (sverb2verbSep sv ** {p = sv.p ; sc = sv.sc ; h = sv.h}) ; + +-- word formation functions + + sverb2snoun : SVerb1 -> SNoun = \v -> -- syöminen + let tekem = Predef.tk 4 (v.s ! Inf Inf3Iness) in + nforms2snoun (dNainen (tekem + "inen")) ; + +{- + sverb2nounPresPartAct : SVerb1 -> SNoun = \v -> -- syövä + let teke = Predef.tk 5 (v.s ! Inf Inf3Iness) in + nforms2snoun (dLava (teke + "v" + last (v.s ! Inf1))) ; + + sverb2nounPresPartPass : SVerb1 -> SNoun = \v -> -- syötävä + let a = harmonyA v.h in + nforms2snoun (dLava (partPlus (v.s ! 3) (partPlus "t" (partPlus a (partPlus "v" a))))) ; +-} + + dLava : Str -> NForms = \s -> dUkko s (s + "n") ; + + --- to use these at run time in ParseFin + partPlus = glue ; + +-- auxiliary + + plusIf : Bool -> Str -> Str -> Str = \b,x,y -> case b of { + True => x + y ; + False => glue x y + } ; + +-- for Symbol + + addStemEnding : Str -> SPN = \i -> + {s = \\c => i ++ bindColonIfS (NCase Sg c) ++ defaultCaseEnding c} ; + + bindIfS : SNForm -> Str = \c -> case c of { + NCase Sg Nom => [] ; + _ => BIND + } ; + + bindColonIfS : SNForm -> Str = \c -> case c of { + NCase Sg Nom => [] ; + _ => BIND ++ ":" ++ BIND + } ; + +----------------------------------------------------------------------- +---- a hack to make VerbFin compile accurately for library (here), +---- and in a simplified way for ParseFin (in stemmed/) + + slashV2VNP : (SVerb1 ** {c2 : Compl ; vi : VVType}) -> (NP ** {isNeg : Bool}) -> + (VP ** {c2 : Compl}) -> (VP ** {c2 : Compl}) + = \v, np, vp -> + insertObjPre np.isNeg + (\fin,b,a -> appCompl fin b v.c2 np ++ + infVP v.sc b a vp (vvtype2infform v.vi)) + (predSV v) ** {c2 = vp.c2} ; + + +---- VP now stemming-dependent. AR 7/12/2013 + + VP = { + s : SVerb1 ; + s2 : Bool => Polarity => Agr => Str ; -- talo/talon/taloa + adv : Polarity => Str ; -- ainakin/ainakaan + ext : Str ; + vptyp : {isNeg : Bool ; isPass : Bool} ;-- True if some complement is negative, and if the verb is rendered in the passive + } ; + + defaultVPTyp = {isNeg = False ; isPass = False} ; + + HVerb : Type = Verb ** {sc : SubjCase ; h : Harmony ; p : Str} ; + + predV : HVerb -> VP = \verb -> { + s = verb ; + s2 = \\_,_,_ => [] ; + adv = \\_ => verb.p ; -- the particle of the verb + ext = [] ; + vptyp = defaultVPTyp ; + } ; + + old_VP = { + s : VIForm => Anteriority => Polarity => Agr => {fin, inf : Str} ; + s2 : Bool => Polarity => Agr => Str ; -- talo/talon/taloa + adv : Polarity => Str ; -- ainakin/ainakaan + ext : Str ; + sc : SubjCase ; + isNeg : Bool ; -- True if some complement is negative + h : Harmony + } ; + + vp2old_vp : VP -> old_VP = \vp -> + let + verb = vp.s ; + sverb : VIForm => Anteriority => Polarity => Agr => {fin, inf : Str} = \\vi,ant,b,agr0 => + let + agr = verbAgr agr0 ; + verbs = verb.s ; + part : Str = case vi of { + VIPass _ => verbs ! PastPartPass (AN (NCase agr.n Nom)) ; + _ => verbs ! PastPartAct (AN (NCase agr.n Nom)) + } ; + + eiv : Str = case agr of { + {n = Sg ; p = P1} => "en" ; + {n = Sg ; p = P2} => "et" ; + {n = Sg ; p = P3} => "ei" ; + {n = Pl ; p = P1} => "emme" ; + {n = Pl ; p = P2} => "ette" ; + {n = Pl ; p = P3} => "eivät" + } ; + + einegole : Str * Str * Str = case of { + => ; + => ; --# notpresent + => ; --# notpresent + => ; --# notpresent + => ; --# notpresent + => <"älä", verbs ! Imper Sg, "ole"> ; + => <"älkää", verbs ! ImpNegPl, "olko"> ; + => <"ei", verbs ! PassPresn False, "ole"> ; + => <"ei", verbs ! PassPresn False, "ole"> ; --# notpresent + => <"ei", verbs ! PassCondit False, "olisi"> ; --# notpresent + => <"ei", verbs ! PassImpf False, "ollut"> ; --# notpresent + => <"ei", verbs ! Inf i, "olla"> ---- + } ; + + ei : Str = einegole.p1 ; + neg : Str = einegole.p2 ; + ole : Str = einegole.p3 ; + + olla : VForm => Str = table { + PassPresn True => verbOlla.s ! Presn Sg P3 ; + PassImpf True => verbOlla.s ! Impf Sg P3 ; --# notpresent + PassCondit True => verbOlla.s ! Condit Sg P3 ; --# notpresent + vf => verbOlla.s ! vf + } ; + + vf : Str -> Str -> {fin, inf : Str} = \x,y -> + {fin = x ; inf = y} ; + mkvf : VForm -> {fin, inf : Str} = \p -> case of { + => vf (verbs ! p) [] ; + => vf (olla ! p) part ; --# notpresent + => vf ei (ole ++ part) ; --# notpresent + => vf ei neg + } ; + passPol = case b of {Pos => True ; Neg => False} ; + in + case vi of { + VIFin Past => mkvf (Impf agr.n agr.p) ; --# notpresent + VIFin Cond => mkvf (Condit agr.n agr.p) ; --# notpresent + VIFin Fut => mkvf (Presn agr.n agr.p) ; --# notpresent + VIFin Pres => mkvf (Presn agr.n agr.p) ; + VIImper => mkvf (Imper agr.n) ; + VIPass Past => mkvf (PassImpf passPol) ; --# notpresent + VIPass Cond => mkvf (PassCondit passPol) ; --# notpresent + VIPass Fut => mkvf (PassPresn passPol) ; --# notpresent + VIPass Pres => mkvf (PassPresn passPol) ; + VIInf i => mkvf (Inf i) + } + in { + s = case vp.vptyp.isPass of { + True => \\vif,ant,pol,agr => case vif of { + VIFin t => sverb ! VIPass t ! ant ! pol ! agr ; + _ => sverb ! vif ! ant ! pol ! agr + } ; + _ => sverb + } ; + s2 = vp.s2 ; + adv = vp.adv ; -- the particle of the verb + ext = vp.ext ; + sc = verb.sc ; + h = verb.h ; + isNeg = vp.vptyp.isNeg + } ; + + insertObj : (Bool => Polarity => Agr => Str) -> VP -> VP = \obj,vp -> { + s = vp.s ; + s2 = \\fin,b,a => vp.s2 ! fin ! b ! a ++ obj ! fin ! b ! a ; + adv = vp.adv ; + ext = vp.ext ; + sc = vp.sc ; + h = vp.h ; + vptyp = vp.vptyp + } ; + + insertObjPre : Bool -> (Bool -> Polarity -> Agr -> Str) -> VP -> VP = \isNeg, obj,vp -> { + s = vp.s ; + s2 = \\fin,b,a => obj fin b a ++ vp.s2 ! fin ! b ! a ; + adv = vp.adv ; + ext = vp.ext ; + vptyp = {isNeg = orB vp.vptyp.isNeg isNeg ; isPass = vp.vptyp.isPass} ; + } ; + + insertAdv : (Polarity => Str) -> VP -> VP = \adv,vp -> { + s = vp.s ; + s2 = vp.s2 ; + ext = vp.ext ; + adv = \\b => vp.adv ! b ++ adv ! b ; + sc = vp.sc ; + h = vp.h ; + vptyp = vp.vptyp --- missään + } ; + + passVP : VP -> Compl -> VP = \vp,pr -> { + s = {s = vp.s.s ; h = vp.s.h ; p = vp.s.p ; sc = npform2subjcase pr.c} ; -- minusta pidetään ---- TODO minun katsotaan päälle + s2 = \\b,p,a => pr.s.p1 ++ vp.s2 ! b ! p ! a ++ pr.s.p2 ; ---- possessive suffix + ext = vp.ext ; + adv = vp.adv ; + vptyp = {isNeg = vp.vptyp.isNeg ; isPass = True} ; + } ; + + insertExtrapos : Str -> VP -> VP = \obj,vp -> { + s = vp.s ; + s2 = vp.s2 ; + ext = vp.ext ++ obj ; + adv = vp.adv ; + sc = vp.sc ; + h = vp.h ; + vptyp = vp.vptyp + } ; + + mkClausePol : Bool -> (Polarity -> Str) -> Agr -> VP -> Clause = + \isNeg,sub,agr,vp -> { + s = \\t,a,b => + let + pol = case isNeg of { + True => Neg ; + _ => b + } ; + c = (mkClausePlus sub agr vp).s ! t ! a ! pol + in + table { + SDecl => c.subj ++ c.fin ++ c.inf ++ c.compl ++ c.adv ++ c.ext ; + SQuest => c.fin ++ BIND ++ questPart c.h ++ c.subj ++ c.inf ++ c.compl ++ c.adv ++ c.ext + } + } ; + mkClause : (Polarity -> Str) -> Agr -> VP -> Clause = + \sub,agr,vp -> { + s = \\t,a,b => let c = (mkClausePlus sub agr vp).s ! t ! a ! b in + table { + SDecl => c.subj ++ c.fin ++ c.inf ++ c.compl ++ c.adv ++ c.ext ; + SQuest => c.fin ++ BIND ++ questPart c.h ++ c.subj ++ c.inf ++ c.compl ++ c.adv ++ c.ext + } + } ; + + mkClausePlus : (Polarity -> Str) -> Agr -> VP -> ClausePlus = + \sub,agr,vp0 -> let vp = vp2old_vp vp0 in { + s = \\t,a,b => + let + agrfin = case vp.sc of { + SCNom => ; + _ => -- minun täytyy, minulla on + } ; + verb = vp.s ! VIFin t ! a ! b ! agrfin.p1 ; + in {subj = sub b ; + fin = verb.fin ; + inf = verb.inf ; + compl = vp.s2 ! agrfin.p2 ! b ! agr ; + adv = vp.adv ! b ; + ext = vp.ext ; + h = selectPart vp0 a b + } + } ; + + selectPart : VP -> Anteriority -> Polarity -> Harmony = \vp,a,p -> + case p of { + Neg => Front ; -- eikö tule + _ => case a of { + Anter => Back ; -- onko mennyt --# notpresent + _ => vp.s.h -- tuleeko, meneekö + } + } ; + +-- the first Polarity is VP-internal, the second comes form the main verb: +-- ([main] tahdon | en tahdo) ([internal] nukkua | olla nukkumatta) + infVPGen : Polarity -> SubjCase -> Polarity -> Agr -> VP -> InfForm -> Str = + \ipol,sc,pol,agr,vp0,vi -> + let + vp = vp2old_vp vp0 ; + fin = case sc of { -- subject case + SCNom => True ; -- minä tahdon nähdä auton + _ => False -- minun täytyy nähdä auto + } ; + verb = case ipol of { + Pos => ; -- nähdä/näkemään + Neg => <(vp2old_vp (predV (verbOlla ** {sc = SCNom ; h = Back ; p = []}))).s ! VIInf vi ! Simul ! Pos ! agr, + (vp.s ! VIInf Inf3Abess ! Simul ! Pos ! agr).fin> -- olla/olemaan näkemättä + } ; + vph = vp.h ; + poss = case vi of { + InfPresPartAgr => possSuffixGen vph agr ; -- toivon nukkuva + ni + _ => [] + } ; + compl = vp.s2 ! fin ! pol ! agr ++ vp.adv ! pol ++ vp.ext -- compl. case propagated + in + verb.p1.fin ++ verb.p1.inf ++ poss ++ verb.p2 ++ compl ; + + infVP : SubjCase -> Polarity -> Agr -> VP -> InfForm -> Str = infVPGen Pos ; + + vpVerbOlla : HVerb = verbOlla ** {sc = SCNom ; h = Back ; p = []} ; + +} \ No newline at end of file diff --git a/lib/src/finnish/tagged/TagFin.gf b/lib/src/finnish/tagged/TagFin.gf new file mode 100644 index 000000000..02003caea --- /dev/null +++ b/lib/src/finnish/tagged/TagFin.gf @@ -0,0 +1,37 @@ +resource TagFin = open ResFin, Prelude in { + +oper + Tag : Type = Str ; + + mkTag : Str -> Tag = \t -> "+" + t ; + + tagNForm : NForm -> Str = \nf -> case nf of { + NCase n c => tagNumber n + tagCase c ; + NComit => tagNumber Pl + mkTag "Comit" ; + NInstruct => tagNumber Pl + mkTag "Instr" ; + NPossNom n => tagNumber n + tagCase Nom ; + NPossGen n => tagNumber n + tagCase Gen ; + NPossTransl n => tagNumber n + tagCase Transl ; + NPossIllat n => tagNumber n + tagCase Illat ; + NCompound => mkTag "Comp" + } ; + + tagCase : Case -> Str = \c -> case c of { + Nom => mkTag "Nom" ; + Gen => mkTag "Gen" ; + Part => mkTag "Part" ; + Transl => mkTag "Transl" ; + Ess => mkTag "Ess" ; + Iness => mkTag "Iness" ; + Elat => mkTag "Elat" ; + Illat => mkTag "Illat" ; + Adess => mkTag "Adess" ; + Ablat => mkTag "Ablat" ; + Allat => mkTag "Allat" ; + Abess => mkTag "Abess" + } ; + tagNumber : Number -> Str = \n -> case n of { + Sg => mkTag "Sg" ; + Pl => mkTag "Pl" + } ; +} \ No newline at end of file