mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
Omorfi tagging mostly complete for open classes in Fin, some closed classes and syncat words missing
This commit is contained in:
@@ -766,8 +766,8 @@ mkVS = overload {
|
||||
dirV2 v = mk2V2 v accPrep ;
|
||||
|
||||
mkAdv = overload {
|
||||
mkAdv : Str -> Adv = \s -> {s = s ; lock_Adv = <>} ;
|
||||
mkAdv : AdvK -> Adv = \s -> {s = s.s ; lock_Adv = <>} ;
|
||||
mkAdv : Str -> Adv = \s -> {s = tagPOS "ADV" s ; lock_Adv = <>} ;
|
||||
mkAdv : AdvK -> Adv = \s -> {s = tagPOS "ADV" s.s ; lock_Adv = <>} ;
|
||||
} ;
|
||||
|
||||
mkV2 = overload {
|
||||
|
||||
@@ -4,6 +4,11 @@ resource StemFin = open MorphoFin, Prelude in {
|
||||
|
||||
flags coding = utf8 ;
|
||||
|
||||
oper
|
||||
-- other classes not treated below are POS tagged when the grammar is used with Omorfi
|
||||
|
||||
tagPOS : Str -> Str -> Str = \_,s -> s ;
|
||||
|
||||
oper
|
||||
SNForm : Type = NForm ;
|
||||
SNoun : Type = Noun ;
|
||||
|
||||
@@ -18,15 +18,15 @@ concrete StructuralFin of Structural = CatFin **
|
||||
}
|
||||
} ;
|
||||
almost_AdA, almost_AdN = ss "melkein" ;
|
||||
although_Subj = ss "vaikka" ;
|
||||
always_AdV = ss "aina" ;
|
||||
and_Conj = {s1 = [] ; s2 = "ja" ; n = Pl} ;
|
||||
because_Subj = ss "koska" ;
|
||||
although_Subj = ssp "CONJ" "vaikka" ;
|
||||
always_AdV = ssp "ADV" "aina" ;
|
||||
and_Conj = {s1 = [] ; s2 = tagPOS "CONJ" "ja" ; n = Pl} ;
|
||||
because_Subj = ssp "CONJ" "koska" ;
|
||||
before_Prep = prePrep partitive "ennen" ;
|
||||
behind_Prep = postGenPrep "takana" ;
|
||||
between_Prep = postGenPrep "välissä" ;
|
||||
both7and_DConj = sd2 "sekä" "että" ** {n = Pl} ;
|
||||
but_PConj = ss "mutta" ;
|
||||
but_PConj = ssp "CONJ" "mutta" ;
|
||||
by8agent_Prep = postGenPrep "toimesta" ;
|
||||
by8means_Prep = casePrep adessive ;
|
||||
can8know_VV = mkVV (mkV "osata" "osasi") ;
|
||||
@@ -36,20 +36,20 @@ concrete StructuralFin of Structural = CatFin **
|
||||
everybody_NP = lin NP (makeNP (((mkN "jokainen"))) Sg) ;
|
||||
every_Det = MorphoFin.mkDet Sg (snoun2nounBind (mkN "jokainen")) ;
|
||||
everything_NP = makeNP ((((mkN "kaikki" "kaiken" "kaikkena")))) Sg ;
|
||||
everywhere_Adv = ss "kaikkialla" ;
|
||||
everywhere_Adv = mkAdv "kaikkialla" ;
|
||||
few_Det = MorphoFin.mkDet Sg (snoun2nounBind (mkN "harva")) ;
|
||||
--- first_Ord = {s = \\n,c => (mkN "ensimmäinen").s ! NCase n c} ;
|
||||
for_Prep = casePrep allative ;
|
||||
from_Prep = casePrep elative ;
|
||||
he_Pron = mkPronoun "hän" "hänen" "häntä" "hänenä" "häneen" Sg P3 ;
|
||||
here_Adv = ss "täällä" ;
|
||||
here7to_Adv = ss "tänne" ;
|
||||
here7from_Adv = ss "täältä" ;
|
||||
how_IAdv = ss "miten" ;
|
||||
how8much_IAdv = ss "kuinka paljon" ;
|
||||
here_Adv = mkAdv "täällä" ;
|
||||
here7to_Adv = mkAdv "tänne" ;
|
||||
here7from_Adv = mkAdv "täältä" ;
|
||||
how_IAdv = ssp "ADV" "miten" ;
|
||||
how8much_IAdv = ssp "ADV" ("kuinka" ++ tagPOS "ADV" "paljon") ;
|
||||
how8many_IDet =
|
||||
{s = \\c => "kuinka" ++ (snoun2nounBind (mkN "moni" "monia")).s ! NCase Sg c ; n = Sg ; isNum = False} ;
|
||||
if_Subj = ss "jos" ;
|
||||
if_Subj = ssp "CONJ" "jos" ;
|
||||
in8front_Prep = postGenPrep "edessä" ;
|
||||
i_Pron = mkPronoun "minä" "minun" "minua" "minuna" "minuun" Sg P1 ;
|
||||
in_Prep = casePrep inessive ;
|
||||
@@ -65,18 +65,18 @@ concrete StructuralFin of Structural = CatFin **
|
||||
most_Predet = {s = \\n,c => (nForms2N (dSuurin "useinta")).s ! NCase n (npform2case n c)} ;
|
||||
much_Det = MorphoFin.mkDet Sg (snoun2nounBind (exceptNomN (mkN "paljo") "paljon")) ** {isNum = True} ; --Harmony not relevant, it's just a CommonNoun
|
||||
must_VV = mkVV (caseV genitive (mkV "täytyä")) ;
|
||||
no_Utt = ss "ei" ;
|
||||
no_Utt = ssp "INTERJ" "ei" ;
|
||||
on_Prep = casePrep adessive ;
|
||||
--- one_Quant = MorphoFin.mkDet Sg DEPREC
|
||||
only_Predet = {s = \\_,_ => "vain"} ;
|
||||
or_Conj = {s1 = [] ; s2 = "tai" ; n = Sg} ;
|
||||
otherwise_PConj = ss "muuten" ;
|
||||
or_Conj = {s1 = [] ; s2 = tagPOS "CONJ" "tai" ; n = Sg} ;
|
||||
otherwise_PConj = ssp "ADV" "muuten" ;
|
||||
part_Prep = casePrep partitive ;
|
||||
please_Voc = ss ["ole hyvä"] ; --- number
|
||||
possess_Prep = casePrep genitive ;
|
||||
quite_Adv = ss "melko" ;
|
||||
quite_Adv = ssp "ADV" "melko" ;
|
||||
she_Pron = mkPronoun "hän" "hänen" "häntä" "hänenä" "häneen" Sg P3 ;
|
||||
so_AdA = ss "niin" ;
|
||||
so_AdA = ssp "ADV" "niin" ;
|
||||
somebody_NP = {
|
||||
s = \\c => jokuPron ! Sg ! npform2case Sg c ;
|
||||
a = agrP3 Sg ;
|
||||
@@ -97,7 +97,7 @@ concrete StructuralFin of Structural = CatFin **
|
||||
a = agrP3 Sg ;
|
||||
isPron = False ; isNeg = False ; isNeg = False
|
||||
} ;
|
||||
somewhere_Adv = ss "jossain" ;
|
||||
somewhere_Adv = ssp "ADV" "jossain" ;
|
||||
that_Quant = heavyQuant {
|
||||
s1 = table (MorphoFin.Number) {
|
||||
Sg => table (MorphoFin.Case) {
|
||||
@@ -109,11 +109,11 @@ concrete StructuralFin of Structural = CatFin **
|
||||
} ;
|
||||
s2 = \\_ => [] ; isNum,isPoss = False ; isDef = True ; isNeg = False
|
||||
} ;
|
||||
that_Subj = ss "että" ;
|
||||
there_Adv = ss "siellä" ; --- tuolla
|
||||
there7to_Adv = ss "sinne" ;
|
||||
there7from_Adv = ss "sieltä" ;
|
||||
therefore_PConj = ss "siksi" ;
|
||||
that_Subj = ssp "CONJ" "että" ;
|
||||
there_Adv = ssp "ADV" "siellä" ; --- tuolla
|
||||
there7to_Adv = ssp "ADV" "sinne" ;
|
||||
there7from_Adv = ssp "ADV" "sieltä" ;
|
||||
therefore_PConj = ssp "ADV" "siksi" ;
|
||||
they_Pron = mkPronoun "he" "heidän" "heitä" "heinä" "heihin" Pl P3 ; --- ne
|
||||
this_Quant = heavyQuant {
|
||||
s1 = table (MorphoFin.Number) {
|
||||
@@ -127,10 +127,10 @@ concrete StructuralFin of Structural = CatFin **
|
||||
s2 = \\_ => [] ; isNum,isPoss = False ; isDef = True ; isNeg = False
|
||||
} ;
|
||||
through_Prep = postGenPrep "kautta" ;
|
||||
too_AdA = ss "liian" ;
|
||||
too_AdA = ssp "ADV" "liian" ;
|
||||
to_Prep = casePrep illative ; --- allative
|
||||
under_Prep = postGenPrep "alla" ;
|
||||
very_AdA = ss "erittäin" ;
|
||||
very_AdA = ssp "ADV" "erittäin" ;
|
||||
want_VV = mkVV (mkV "tahtoa") ;
|
||||
we_Pron = mkPronoun "me" "meidän" "meitä" "meinä" "meihin" Pl P1 ;
|
||||
whatPl_IP = {
|
||||
@@ -141,9 +141,9 @@ concrete StructuralFin of Structural = CatFin **
|
||||
s = \\c => mikaInt ! Sg ! npform2case Sg c ;
|
||||
n = Sg
|
||||
} ;
|
||||
when_IAdv = ss "milloin" ;
|
||||
when_Subj = ss "kun" ;
|
||||
where_IAdv = ss "missä" ;
|
||||
when_IAdv = ssp "ADV" "milloin" ;
|
||||
when_Subj = ssp "CONJ" "kun" ;
|
||||
where_IAdv = ssp "ADV" "missä" ;
|
||||
which_IQuant = {
|
||||
s = mikaInt
|
||||
} ;
|
||||
@@ -155,10 +155,10 @@ concrete StructuralFin of Structural = CatFin **
|
||||
s = table {NPAcc => "ketkä" ; c => kukaInt ! Pl ! npform2case Pl c} ;
|
||||
n = Pl
|
||||
} ;
|
||||
why_IAdv = ss "miksi" ;
|
||||
why_IAdv = ssp "ADV" "miksi" ;
|
||||
without_Prep = prePrep partitive "ilman" ;
|
||||
with_Prep = postGenPrep "kanssa" ;
|
||||
yes_Utt = ss "kyllä" ;
|
||||
yes_Utt = ssp "INTERJ" "kyllä" ;
|
||||
youSg_Pron = mkPronoun "sinä" "sinun" "sinua" "sinuna" "sinuun" Sg P2 ;
|
||||
youPl_Pron = mkPronoun "te" "teidän" "teitä" "teinä" "teihin" Pl P2 ;
|
||||
youPol_Pron =
|
||||
@@ -300,8 +300,8 @@ lin
|
||||
isPron = False ; isNeg = True
|
||||
} ;
|
||||
|
||||
at_least_AdN = ss "vähintään" ;
|
||||
at_most_AdN = ss "enintään" ;
|
||||
at_least_AdN = ssp "ADV" "vähintään" ;
|
||||
at_most_AdN = ssp "ADV" "enintään" ;
|
||||
|
||||
as_CAdv = X.mkCAdv "yhtä" "kuin" ;
|
||||
|
||||
@@ -311,5 +311,7 @@ lin
|
||||
|
||||
lin language_title_Utt = ss "suomi" ;
|
||||
|
||||
oper
|
||||
ssp : Str -> Str -> {s : Str} = \p,s -> ss (tagPOS p s) ; -- used in tagged/ for Omorfi, otherwise =ss
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,12 @@ resource StemFin = open MorphoFin, Prelude in {
|
||||
|
||||
flags coding = utf8 ;
|
||||
|
||||
oper
|
||||
-- other classes not treated below are POS tagged when the grammar is used with Omorfi
|
||||
|
||||
tagPOS : Str -> Str -> Str = \_,s -> s ;
|
||||
|
||||
|
||||
oper
|
||||
SNForm : Type = Predef.Ints 10 ;
|
||||
SNoun : Type = {s : SNForm => Str ; h : Harmony} ;
|
||||
|
||||
@@ -4,6 +4,11 @@ resource StemFin = open TagFin, MorphoFin, Prelude in {
|
||||
|
||||
flags coding = utf8 ;
|
||||
|
||||
oper
|
||||
-- other classes not treated below are POS tagged when the grammar is used with Omorfi
|
||||
|
||||
tagPOS : Str -> Str -> Str = \p,s -> tagWord p s ;
|
||||
|
||||
oper
|
||||
SNForm : Type = Predef.Ints 0 ; --- not really needed
|
||||
SNoun : Type = {s : SNForm => Str ; h : Harmony} ; --- Harmony needed only for API compatibility
|
||||
@@ -71,6 +76,7 @@ oper
|
||||
snoun2compar : SNoun -> Str = \n -> n.s ! 0 ++ "?Comp" ; ---- TODO
|
||||
snoun2superl : SNoun -> Str = \n -> n.s ! 0 ++ "?Superl" ; ---- TODO
|
||||
|
||||
|
||||
-- verbs
|
||||
|
||||
oper
|
||||
|
||||
@@ -18,20 +18,24 @@ oper
|
||||
consTag : (_,_,_,_,_,_ : Str) -> Tag = \t,u,v,x,y,z -> t + "|" + u + "|" + v + "|" + x + "|" + y + "|" + z ;
|
||||
} ;
|
||||
|
||||
tagNForm : NForm -> Str = \nf -> case nf of {
|
||||
NCase n c => consTag (tagCase c) (tagNumber n) ;
|
||||
NComit => consTag (mkTag "Case" "Com") (tagNumber Pl) ;
|
||||
NInstruct => consTag (mkTag "Case" "Ins") (tagNumber Pl) ;
|
||||
NPossNom n => consTag (tagCase Nom) (tagNumber n) ;
|
||||
NPossGen n => consTag (tagCase Gen) (tagNumber n) ;
|
||||
NPossTransl n => consTag (tagCase Transl) (tagNumber n) ;
|
||||
NPossIllat n => consTag (tagCase Illat) (tagNumber n) ;
|
||||
NCompound => mkTag "Comp" ----
|
||||
pairTag : Tag -> Tag -> Tag * Tag = \t,u -> <t,u> ;
|
||||
|
||||
tagNForm : NForm -> Tag = \nf -> let ts = tagNForms nf in consTag ts.p1 ts.p2 ;
|
||||
|
||||
tagNForms : NForm -> Tag * Tag = \nf -> case nf of { -- keep separate in order to squeeze in Degree of adjectives
|
||||
NCase n c => pairTag (tagCase c) (tagNumber n) ;
|
||||
NComit => pairTag (mkTag "Case" "Com") (tagNumber Pl) ;
|
||||
NInstruct => pairTag (mkTag "Case" "Ins") (tagNumber Pl) ;
|
||||
NPossNom n => pairTag (tagCase Nom) (tagNumber n) ;
|
||||
NPossGen n => pairTag (tagCase Gen) (tagNumber n) ;
|
||||
NPossTransl n => pairTag (tagCase Transl) (tagNumber n) ;
|
||||
NPossIllat n => pairTag (tagCase Illat) (tagNumber n) ;
|
||||
NCompound => pairTag (mkTag "Form" "Comp") (tagNumber Sg) ---- TODO: how is this in UD?
|
||||
} ;
|
||||
|
||||
tagAForm : AForm -> Str = \af -> case af of {
|
||||
AN nf => tagNForm nf ;
|
||||
AAdv => adverbTag
|
||||
tagDegreeAForm : Degree -> AForm -> Str = \d,af -> case af of {
|
||||
AN nf => let ts = tagNForms nf in consTag ts.p1 (tagDegree d) ts.p2 ;
|
||||
AAdv => consTag adverbTag (tagDegree d) ---- TODO: how is this in UD?
|
||||
} ;
|
||||
|
||||
tagVForm : VForm -> Str = \vf -> case vf of {
|
||||
@@ -55,33 +59,43 @@ oper
|
||||
PassPotent False => consTag connegativeTag potentialTag finiteTag passiveTag ;
|
||||
PassImper True => consTag imperativeTag finiteTag passiveTag ;
|
||||
PassImper False => consTag connegativeTag imperativeTag finiteTag passiveTag ;
|
||||
PastPartAct af => participleTag ++ activeTag ++ pastTag ++ tagAForm af ;
|
||||
PastPartPass af => participleTag ++ activeTag ++ pastTag ++ tagAForm af ;
|
||||
PresPartAct af => participleTag ++ activeTag ++ presentTag ++ tagAForm af ;
|
||||
PresPartPass af => participleTag ++ activeTag ++ presentTag ++ tagAForm af ;
|
||||
AgentPart af => participleTag ++ agentTag ++ tagAForm af
|
||||
PastPartAct af => consTag (tagDegreeAForm Posit af) (tagPartForm "Past") participleTag activeTag ;
|
||||
PastPartPass af => consTag (tagDegreeAForm Posit af) (tagPartForm "Past") participleTag passiveTag ;
|
||||
PresPartAct af => consTag (tagDegreeAForm Posit af) (tagPartForm "Pres") participleTag activeTag ;
|
||||
PresPartPass af => consTag (tagDegreeAForm Posit af) (tagPartForm "Pres") participleTag passiveTag ;
|
||||
AgentPart af => consTag (tagDegreeAForm Posit af) (tagPartForm "Agt") participleTag activeTag
|
||||
} ;
|
||||
|
||||
tagInfForm : InfForm -> Str = \vf -> case vf of {
|
||||
Inf1 => infinitiveTag ;
|
||||
Inf1Long => infinitiveTag ;
|
||||
Inf2Iness => infinitiveTag ;
|
||||
Inf2Instr => infinitiveTag ;
|
||||
Inf2InessPass => infinitiveTag ;
|
||||
Inf3Iness => infinitiveTag ;
|
||||
Inf3Elat => infinitiveTag ;
|
||||
Inf3Illat => infinitiveTag ;
|
||||
Inf3Adess => infinitiveTag ;
|
||||
Inf3Abess => infinitiveTag ;
|
||||
Inf3Instr => infinitiveTag ;
|
||||
Inf3InstrPass => infinitiveTag ;
|
||||
Inf4Nom => infinitiveTag ;
|
||||
Inf4Part => infinitiveTag ;
|
||||
Inf5 => infinitiveTag ;
|
||||
InfPresPart => infinitiveTag ;
|
||||
InfPresPartAgr => infinitiveTag
|
||||
Inf1 => infinitiveTag "1" ;
|
||||
Inf1Long => infinitiveTag "1" ; --- insert Person[psor]=3 when used with poss suff
|
||||
Inf2Iness => infinitiveTag "Ine" "2" ;
|
||||
Inf2Instr => infinitiveTag "Ins" "2" ;
|
||||
Inf2InessPass => infinitiveTag "Ins" "2" "Pass" ;
|
||||
Inf3Iness => infinitiveTag "Ine" "3" ;
|
||||
Inf3Elat => infinitiveTag "Ela" "3" ;
|
||||
Inf3Illat => infinitiveTag "Ill" "3" ;
|
||||
Inf3Adess => infinitiveTag "Ade" "3" ;
|
||||
Inf3Abess => infinitiveTag "Abe" "3" ;
|
||||
Inf3Instr => infinitiveTag "Ins" "3" ;
|
||||
Inf3InstrPass => infinitiveTag "Ins" "3" "Pass" ;
|
||||
Inf4Nom => infinitiveTag "Nom" "4" ;
|
||||
Inf4Part => infinitiveTag "Par" "4" ;
|
||||
Inf5 => infinitiveTag "5" ; ---- not in UD
|
||||
InfPresPart => consTag (tagDegreeAForm Posit (AN (NCase Sg Nom))) (tagPartForm "Pres") participleTag activeTag ;
|
||||
InfPresPartAgr => consTag (tagDegreeAForm Posit (AN (NCase Sg Nom))) (tagPartForm "Pres") participleTag activeTag --- poss to add
|
||||
} ;
|
||||
|
||||
infinitiveTag = overload {
|
||||
infinitiveTag : Str -> Tag = \i ->
|
||||
consTag (mkTag "InfForm" i) (tagNumber Sg) (mkTag "VerbForm" "Inf") activeTag ; --- UD wants voice and number
|
||||
infinitiveTag : Str -> Str -> Tag = \c,i ->
|
||||
consTag (mkTag "Case" c) (mkTag "InfForm" i) (tagNumber Sg) (mkTag "VerbForm" "Inf") activeTag ;
|
||||
infinitiveTag : Str -> Str -> Str -> Tag = \c,i,v ->
|
||||
consTag (mkTag "Case" c) (mkTag "InfForm" i) (tagNumber Sg) (mkTag "VerbForm" "Inf") (mkTag "Voice" v) ;
|
||||
} ;
|
||||
|
||||
tagPartForm : Str -> Tag = \pf -> mkTag "PartForm" pf ;
|
||||
|
||||
nounTag = mkTag "NOUN" ;
|
||||
adjectiveTag = mkTag "ADJ" ;
|
||||
@@ -93,9 +107,8 @@ oper
|
||||
|
||||
imperativeTag = mkTag "Mood" "Imp" ;
|
||||
indicativeTag = mkTag "Mood" "Ind" ;
|
||||
participleTag = mkTag "Part" ;
|
||||
participleTag = mkTag "VerbForm" "Part" ;
|
||||
agentTag = mkTag "Agent" ;
|
||||
infinitiveTag = mkTag "Inf" ;
|
||||
finiteTag = mkTag "VerbForm" "Fin" ;
|
||||
|
||||
connegativeTag = mkTag "Connegative" "Yes" ;
|
||||
|
||||
Reference in New Issue
Block a user