Omorfi tagging mostly complete for open classes in Fin, some closed classes and syncat words missing

This commit is contained in:
aarne
2016-09-22 08:23:04 +00:00
parent ddf24a0a4e
commit bd60279dc0
6 changed files with 102 additions and 70 deletions

View File

@@ -766,8 +766,8 @@ mkVS = overload {
dirV2 v = mk2V2 v accPrep ;
mkAdv = overload {
mkAdv : Str -> Adv = \s -> {s = s ; lock_Adv = <>} ;
mkAdv : AdvK -> Adv = \s -> {s = s.s ; lock_Adv = <>} ;
mkAdv : Str -> Adv = \s -> {s = tagPOS "ADV" s ; lock_Adv = <>} ;
mkAdv : AdvK -> Adv = \s -> {s = tagPOS "ADV" s.s ; lock_Adv = <>} ;
} ;
mkV2 = overload {

View File

@@ -4,6 +4,11 @@ resource StemFin = open MorphoFin, Prelude in {
flags coding = utf8 ;
oper
-- other classes not treated below are POS tagged when the grammar is used with Omorfi
tagPOS : Str -> Str -> Str = \_,s -> s ;
oper
SNForm : Type = NForm ;
SNoun : Type = Noun ;

View File

@@ -18,15 +18,15 @@ concrete StructuralFin of Structural = CatFin **
}
} ;
almost_AdA, almost_AdN = ss "melkein" ;
although_Subj = ss "vaikka" ;
always_AdV = ss "aina" ;
and_Conj = {s1 = [] ; s2 = "ja" ; n = Pl} ;
because_Subj = ss "koska" ;
although_Subj = ssp "CONJ" "vaikka" ;
always_AdV = ssp "ADV" "aina" ;
and_Conj = {s1 = [] ; s2 = tagPOS "CONJ" "ja" ; n = Pl} ;
because_Subj = ssp "CONJ" "koska" ;
before_Prep = prePrep partitive "ennen" ;
behind_Prep = postGenPrep "takana" ;
between_Prep = postGenPrep "välissä" ;
both7and_DConj = sd2 "sekä" "että" ** {n = Pl} ;
but_PConj = ss "mutta" ;
but_PConj = ssp "CONJ" "mutta" ;
by8agent_Prep = postGenPrep "toimesta" ;
by8means_Prep = casePrep adessive ;
can8know_VV = mkVV (mkV "osata" "osasi") ;
@@ -36,20 +36,20 @@ concrete StructuralFin of Structural = CatFin **
everybody_NP = lin NP (makeNP (((mkN "jokainen"))) Sg) ;
every_Det = MorphoFin.mkDet Sg (snoun2nounBind (mkN "jokainen")) ;
everything_NP = makeNP ((((mkN "kaikki" "kaiken" "kaikkena")))) Sg ;
everywhere_Adv = ss "kaikkialla" ;
everywhere_Adv = mkAdv "kaikkialla" ;
few_Det = MorphoFin.mkDet Sg (snoun2nounBind (mkN "harva")) ;
--- first_Ord = {s = \\n,c => (mkN "ensimmäinen").s ! NCase n c} ;
for_Prep = casePrep allative ;
from_Prep = casePrep elative ;
he_Pron = mkPronoun "hän" "hänen" "häntä" "hänenä" "häneen" Sg P3 ;
here_Adv = ss "täällä" ;
here7to_Adv = ss "tänne" ;
here7from_Adv = ss "täältä" ;
how_IAdv = ss "miten" ;
how8much_IAdv = ss "kuinka paljon" ;
here_Adv = mkAdv "täällä" ;
here7to_Adv = mkAdv "tänne" ;
here7from_Adv = mkAdv "täältä" ;
how_IAdv = ssp "ADV" "miten" ;
how8much_IAdv = ssp "ADV" ("kuinka" ++ tagPOS "ADV" "paljon") ;
how8many_IDet =
{s = \\c => "kuinka" ++ (snoun2nounBind (mkN "moni" "monia")).s ! NCase Sg c ; n = Sg ; isNum = False} ;
if_Subj = ss "jos" ;
if_Subj = ssp "CONJ" "jos" ;
in8front_Prep = postGenPrep "edessä" ;
i_Pron = mkPronoun "minä" "minun" "minua" "minuna" "minuun" Sg P1 ;
in_Prep = casePrep inessive ;
@@ -65,18 +65,18 @@ concrete StructuralFin of Structural = CatFin **
most_Predet = {s = \\n,c => (nForms2N (dSuurin "useinta")).s ! NCase n (npform2case n c)} ;
much_Det = MorphoFin.mkDet Sg (snoun2nounBind (exceptNomN (mkN "paljo") "paljon")) ** {isNum = True} ; --Harmony not relevant, it's just a CommonNoun
must_VV = mkVV (caseV genitive (mkV "täytyä")) ;
no_Utt = ss "ei" ;
no_Utt = ssp "INTERJ" "ei" ;
on_Prep = casePrep adessive ;
--- one_Quant = MorphoFin.mkDet Sg DEPREC
only_Predet = {s = \\_,_ => "vain"} ;
or_Conj = {s1 = [] ; s2 = "tai" ; n = Sg} ;
otherwise_PConj = ss "muuten" ;
or_Conj = {s1 = [] ; s2 = tagPOS "CONJ" "tai" ; n = Sg} ;
otherwise_PConj = ssp "ADV" "muuten" ;
part_Prep = casePrep partitive ;
please_Voc = ss ["ole hyvä"] ; --- number
possess_Prep = casePrep genitive ;
quite_Adv = ss "melko" ;
quite_Adv = ssp "ADV" "melko" ;
she_Pron = mkPronoun "hän" "hänen" "häntä" "hänenä" "häneen" Sg P3 ;
so_AdA = ss "niin" ;
so_AdA = ssp "ADV" "niin" ;
somebody_NP = {
s = \\c => jokuPron ! Sg ! npform2case Sg c ;
a = agrP3 Sg ;
@@ -97,7 +97,7 @@ concrete StructuralFin of Structural = CatFin **
a = agrP3 Sg ;
isPron = False ; isNeg = False ; isNeg = False
} ;
somewhere_Adv = ss "jossain" ;
somewhere_Adv = ssp "ADV" "jossain" ;
that_Quant = heavyQuant {
s1 = table (MorphoFin.Number) {
Sg => table (MorphoFin.Case) {
@@ -109,11 +109,11 @@ concrete StructuralFin of Structural = CatFin **
} ;
s2 = \\_ => [] ; isNum,isPoss = False ; isDef = True ; isNeg = False
} ;
that_Subj = ss "että" ;
there_Adv = ss "siellä" ; --- tuolla
there7to_Adv = ss "sinne" ;
there7from_Adv = ss "sieltä" ;
therefore_PConj = ss "siksi" ;
that_Subj = ssp "CONJ" "että" ;
there_Adv = ssp "ADV" "siellä" ; --- tuolla
there7to_Adv = ssp "ADV" "sinne" ;
there7from_Adv = ssp "ADV" "sieltä" ;
therefore_PConj = ssp "ADV" "siksi" ;
they_Pron = mkPronoun "he" "heidän" "heitä" "heinä" "heihin" Pl P3 ; --- ne
this_Quant = heavyQuant {
s1 = table (MorphoFin.Number) {
@@ -127,10 +127,10 @@ concrete StructuralFin of Structural = CatFin **
s2 = \\_ => [] ; isNum,isPoss = False ; isDef = True ; isNeg = False
} ;
through_Prep = postGenPrep "kautta" ;
too_AdA = ss "liian" ;
too_AdA = ssp "ADV" "liian" ;
to_Prep = casePrep illative ; --- allative
under_Prep = postGenPrep "alla" ;
very_AdA = ss "erittäin" ;
very_AdA = ssp "ADV" "erittäin" ;
want_VV = mkVV (mkV "tahtoa") ;
we_Pron = mkPronoun "me" "meidän" "meitä" "meinä" "meihin" Pl P1 ;
whatPl_IP = {
@@ -141,9 +141,9 @@ concrete StructuralFin of Structural = CatFin **
s = \\c => mikaInt ! Sg ! npform2case Sg c ;
n = Sg
} ;
when_IAdv = ss "milloin" ;
when_Subj = ss "kun" ;
where_IAdv = ss "missä" ;
when_IAdv = ssp "ADV" "milloin" ;
when_Subj = ssp "CONJ" "kun" ;
where_IAdv = ssp "ADV" "missä" ;
which_IQuant = {
s = mikaInt
} ;
@@ -155,10 +155,10 @@ concrete StructuralFin of Structural = CatFin **
s = table {NPAcc => "ketkä" ; c => kukaInt ! Pl ! npform2case Pl c} ;
n = Pl
} ;
why_IAdv = ss "miksi" ;
why_IAdv = ssp "ADV" "miksi" ;
without_Prep = prePrep partitive "ilman" ;
with_Prep = postGenPrep "kanssa" ;
yes_Utt = ss "kyllä" ;
yes_Utt = ssp "INTERJ" "kyllä" ;
youSg_Pron = mkPronoun "sinä" "sinun" "sinua" "sinuna" "sinuun" Sg P2 ;
youPl_Pron = mkPronoun "te" "teidän" "teitä" "teinä" "teihin" Pl P2 ;
youPol_Pron =
@@ -300,8 +300,8 @@ lin
isPron = False ; isNeg = True
} ;
at_least_AdN = ss "vähintään" ;
at_most_AdN = ss "enintään" ;
at_least_AdN = ssp "ADV" "vähintään" ;
at_most_AdN = ssp "ADV" "enintään" ;
as_CAdv = X.mkCAdv "yhtä" "kuin" ;
@@ -311,5 +311,7 @@ lin
lin language_title_Utt = ss "suomi" ;
oper
ssp : Str -> Str -> {s : Str} = \p,s -> ss (tagPOS p s) ; -- used in tagged/ for Omorfi, otherwise =ss
}

View File

@@ -4,6 +4,12 @@ resource StemFin = open MorphoFin, Prelude in {
flags coding = utf8 ;
oper
-- other classes not treated below are POS tagged when the grammar is used with Omorfi
tagPOS : Str -> Str -> Str = \_,s -> s ;
oper
SNForm : Type = Predef.Ints 10 ;
SNoun : Type = {s : SNForm => Str ; h : Harmony} ;

View File

@@ -4,6 +4,11 @@ resource StemFin = open TagFin, MorphoFin, Prelude in {
flags coding = utf8 ;
oper
-- other classes not treated below are POS tagged when the grammar is used with Omorfi
tagPOS : Str -> Str -> Str = \p,s -> tagWord p s ;
oper
SNForm : Type = Predef.Ints 0 ; --- not really needed
SNoun : Type = {s : SNForm => Str ; h : Harmony} ; --- Harmony needed only for API compatibility
@@ -71,6 +76,7 @@ oper
snoun2compar : SNoun -> Str = \n -> n.s ! 0 ++ "?Comp" ; ---- TODO
snoun2superl : SNoun -> Str = \n -> n.s ! 0 ++ "?Superl" ; ---- TODO
-- verbs
oper

View File

@@ -18,20 +18,24 @@ oper
consTag : (_,_,_,_,_,_ : Str) -> Tag = \t,u,v,x,y,z -> t + "|" + u + "|" + v + "|" + x + "|" + y + "|" + z ;
} ;
tagNForm : NForm -> Str = \nf -> case nf of {
NCase n c => consTag (tagCase c) (tagNumber n) ;
NComit => consTag (mkTag "Case" "Com") (tagNumber Pl) ;
NInstruct => consTag (mkTag "Case" "Ins") (tagNumber Pl) ;
NPossNom n => consTag (tagCase Nom) (tagNumber n) ;
NPossGen n => consTag (tagCase Gen) (tagNumber n) ;
NPossTransl n => consTag (tagCase Transl) (tagNumber n) ;
NPossIllat n => consTag (tagCase Illat) (tagNumber n) ;
NCompound => mkTag "Comp" ----
pairTag : Tag -> Tag -> Tag * Tag = \t,u -> <t,u> ;
tagNForm : NForm -> Tag = \nf -> let ts = tagNForms nf in consTag ts.p1 ts.p2 ;
tagNForms : NForm -> Tag * Tag = \nf -> case nf of { -- keep separate in order to squeeze in Degree of adjectives
NCase n c => pairTag (tagCase c) (tagNumber n) ;
NComit => pairTag (mkTag "Case" "Com") (tagNumber Pl) ;
NInstruct => pairTag (mkTag "Case" "Ins") (tagNumber Pl) ;
NPossNom n => pairTag (tagCase Nom) (tagNumber n) ;
NPossGen n => pairTag (tagCase Gen) (tagNumber n) ;
NPossTransl n => pairTag (tagCase Transl) (tagNumber n) ;
NPossIllat n => pairTag (tagCase Illat) (tagNumber n) ;
NCompound => pairTag (mkTag "Form" "Comp") (tagNumber Sg) ---- TODO: how is this in UD?
} ;
tagAForm : AForm -> Str = \af -> case af of {
AN nf => tagNForm nf ;
AAdv => adverbTag
tagDegreeAForm : Degree -> AForm -> Str = \d,af -> case af of {
AN nf => let ts = tagNForms nf in consTag ts.p1 (tagDegree d) ts.p2 ;
AAdv => consTag adverbTag (tagDegree d) ---- TODO: how is this in UD?
} ;
tagVForm : VForm -> Str = \vf -> case vf of {
@@ -55,33 +59,43 @@ oper
PassPotent False => consTag connegativeTag potentialTag finiteTag passiveTag ;
PassImper True => consTag imperativeTag finiteTag passiveTag ;
PassImper False => consTag connegativeTag imperativeTag finiteTag passiveTag ;
PastPartAct af => participleTag ++ activeTag ++ pastTag ++ tagAForm af ;
PastPartPass af => participleTag ++ activeTag ++ pastTag ++ tagAForm af ;
PresPartAct af => participleTag ++ activeTag ++ presentTag ++ tagAForm af ;
PresPartPass af => participleTag ++ activeTag ++ presentTag ++ tagAForm af ;
AgentPart af => participleTag ++ agentTag ++ tagAForm af
PastPartAct af => consTag (tagDegreeAForm Posit af) (tagPartForm "Past") participleTag activeTag ;
PastPartPass af => consTag (tagDegreeAForm Posit af) (tagPartForm "Past") participleTag passiveTag ;
PresPartAct af => consTag (tagDegreeAForm Posit af) (tagPartForm "Pres") participleTag activeTag ;
PresPartPass af => consTag (tagDegreeAForm Posit af) (tagPartForm "Pres") participleTag passiveTag ;
AgentPart af => consTag (tagDegreeAForm Posit af) (tagPartForm "Agt") participleTag activeTag
} ;
tagInfForm : InfForm -> Str = \vf -> case vf of {
Inf1 => infinitiveTag ;
Inf1Long => infinitiveTag ;
Inf2Iness => infinitiveTag ;
Inf2Instr => infinitiveTag ;
Inf2InessPass => infinitiveTag ;
Inf3Iness => infinitiveTag ;
Inf3Elat => infinitiveTag ;
Inf3Illat => infinitiveTag ;
Inf3Adess => infinitiveTag ;
Inf3Abess => infinitiveTag ;
Inf3Instr => infinitiveTag ;
Inf3InstrPass => infinitiveTag ;
Inf4Nom => infinitiveTag ;
Inf4Part => infinitiveTag ;
Inf5 => infinitiveTag ;
InfPresPart => infinitiveTag ;
InfPresPartAgr => infinitiveTag
Inf1 => infinitiveTag "1" ;
Inf1Long => infinitiveTag "1" ; --- insert Person[psor]=3 when used with poss suff
Inf2Iness => infinitiveTag "Ine" "2" ;
Inf2Instr => infinitiveTag "Ins" "2" ;
Inf2InessPass => infinitiveTag "Ins" "2" "Pass" ;
Inf3Iness => infinitiveTag "Ine" "3" ;
Inf3Elat => infinitiveTag "Ela" "3" ;
Inf3Illat => infinitiveTag "Ill" "3" ;
Inf3Adess => infinitiveTag "Ade" "3" ;
Inf3Abess => infinitiveTag "Abe" "3" ;
Inf3Instr => infinitiveTag "Ins" "3" ;
Inf3InstrPass => infinitiveTag "Ins" "3" "Pass" ;
Inf4Nom => infinitiveTag "Nom" "4" ;
Inf4Part => infinitiveTag "Par" "4" ;
Inf5 => infinitiveTag "5" ; ---- not in UD
InfPresPart => consTag (tagDegreeAForm Posit (AN (NCase Sg Nom))) (tagPartForm "Pres") participleTag activeTag ;
InfPresPartAgr => consTag (tagDegreeAForm Posit (AN (NCase Sg Nom))) (tagPartForm "Pres") participleTag activeTag --- poss to add
} ;
infinitiveTag = overload {
infinitiveTag : Str -> Tag = \i ->
consTag (mkTag "InfForm" i) (tagNumber Sg) (mkTag "VerbForm" "Inf") activeTag ; --- UD wants voice and number
infinitiveTag : Str -> Str -> Tag = \c,i ->
consTag (mkTag "Case" c) (mkTag "InfForm" i) (tagNumber Sg) (mkTag "VerbForm" "Inf") activeTag ;
infinitiveTag : Str -> Str -> Str -> Tag = \c,i,v ->
consTag (mkTag "Case" c) (mkTag "InfForm" i) (tagNumber Sg) (mkTag "VerbForm" "Inf") (mkTag "Voice" v) ;
} ;
tagPartForm : Str -> Tag = \pf -> mkTag "PartForm" pf ;
nounTag = mkTag "NOUN" ;
adjectiveTag = mkTag "ADJ" ;
@@ -93,9 +107,8 @@ oper
imperativeTag = mkTag "Mood" "Imp" ;
indicativeTag = mkTag "Mood" "Ind" ;
participleTag = mkTag "Part" ;
participleTag = mkTag "VerbForm" "Part" ;
agentTag = mkTag "Agent" ;
infinitiveTag = mkTag "Inf" ;
finiteTag = mkTag "VerbForm" "Fin" ;
connegativeTag = mkTag "Connegative" "Yes" ;