Omorfi tagging mostly complete for open classes in Fin, some closed classes and syncat words missing

This commit is contained in:
aarne
2016-09-22 08:23:04 +00:00
parent ddf24a0a4e
commit bd60279dc0
6 changed files with 102 additions and 70 deletions

View File

@@ -766,8 +766,8 @@ mkVS = overload {
dirV2 v = mk2V2 v accPrep ; dirV2 v = mk2V2 v accPrep ;
mkAdv = overload { mkAdv = overload {
mkAdv : Str -> Adv = \s -> {s = s ; lock_Adv = <>} ; mkAdv : Str -> Adv = \s -> {s = tagPOS "ADV" s ; lock_Adv = <>} ;
mkAdv : AdvK -> Adv = \s -> {s = s.s ; lock_Adv = <>} ; mkAdv : AdvK -> Adv = \s -> {s = tagPOS "ADV" s.s ; lock_Adv = <>} ;
} ; } ;
mkV2 = overload { mkV2 = overload {

View File

@@ -4,6 +4,11 @@ resource StemFin = open MorphoFin, Prelude in {
flags coding = utf8 ; flags coding = utf8 ;
oper
-- other classes not treated below are POS tagged when the grammar is used with Omorfi
tagPOS : Str -> Str -> Str = \_,s -> s ;
oper oper
SNForm : Type = NForm ; SNForm : Type = NForm ;
SNoun : Type = Noun ; SNoun : Type = Noun ;

View File

@@ -18,15 +18,15 @@ concrete StructuralFin of Structural = CatFin **
} }
} ; } ;
almost_AdA, almost_AdN = ss "melkein" ; almost_AdA, almost_AdN = ss "melkein" ;
although_Subj = ss "vaikka" ; although_Subj = ssp "CONJ" "vaikka" ;
always_AdV = ss "aina" ; always_AdV = ssp "ADV" "aina" ;
and_Conj = {s1 = [] ; s2 = "ja" ; n = Pl} ; and_Conj = {s1 = [] ; s2 = tagPOS "CONJ" "ja" ; n = Pl} ;
because_Subj = ss "koska" ; because_Subj = ssp "CONJ" "koska" ;
before_Prep = prePrep partitive "ennen" ; before_Prep = prePrep partitive "ennen" ;
behind_Prep = postGenPrep "takana" ; behind_Prep = postGenPrep "takana" ;
between_Prep = postGenPrep "välissä" ; between_Prep = postGenPrep "välissä" ;
both7and_DConj = sd2 "sekä" "että" ** {n = Pl} ; both7and_DConj = sd2 "sekä" "että" ** {n = Pl} ;
but_PConj = ss "mutta" ; but_PConj = ssp "CONJ" "mutta" ;
by8agent_Prep = postGenPrep "toimesta" ; by8agent_Prep = postGenPrep "toimesta" ;
by8means_Prep = casePrep adessive ; by8means_Prep = casePrep adessive ;
can8know_VV = mkVV (mkV "osata" "osasi") ; can8know_VV = mkVV (mkV "osata" "osasi") ;
@@ -36,20 +36,20 @@ concrete StructuralFin of Structural = CatFin **
everybody_NP = lin NP (makeNP (((mkN "jokainen"))) Sg) ; everybody_NP = lin NP (makeNP (((mkN "jokainen"))) Sg) ;
every_Det = MorphoFin.mkDet Sg (snoun2nounBind (mkN "jokainen")) ; every_Det = MorphoFin.mkDet Sg (snoun2nounBind (mkN "jokainen")) ;
everything_NP = makeNP ((((mkN "kaikki" "kaiken" "kaikkena")))) Sg ; everything_NP = makeNP ((((mkN "kaikki" "kaiken" "kaikkena")))) Sg ;
everywhere_Adv = ss "kaikkialla" ; everywhere_Adv = mkAdv "kaikkialla" ;
few_Det = MorphoFin.mkDet Sg (snoun2nounBind (mkN "harva")) ; few_Det = MorphoFin.mkDet Sg (snoun2nounBind (mkN "harva")) ;
--- first_Ord = {s = \\n,c => (mkN "ensimmäinen").s ! NCase n c} ; --- first_Ord = {s = \\n,c => (mkN "ensimmäinen").s ! NCase n c} ;
for_Prep = casePrep allative ; for_Prep = casePrep allative ;
from_Prep = casePrep elative ; from_Prep = casePrep elative ;
he_Pron = mkPronoun "hän" "hänen" "häntä" "hänenä" "häneen" Sg P3 ; he_Pron = mkPronoun "hän" "hänen" "häntä" "hänenä" "häneen" Sg P3 ;
here_Adv = ss "täällä" ; here_Adv = mkAdv "täällä" ;
here7to_Adv = ss "tänne" ; here7to_Adv = mkAdv "tänne" ;
here7from_Adv = ss "täältä" ; here7from_Adv = mkAdv "täältä" ;
how_IAdv = ss "miten" ; how_IAdv = ssp "ADV" "miten" ;
how8much_IAdv = ss "kuinka paljon" ; how8much_IAdv = ssp "ADV" ("kuinka" ++ tagPOS "ADV" "paljon") ;
how8many_IDet = how8many_IDet =
{s = \\c => "kuinka" ++ (snoun2nounBind (mkN "moni" "monia")).s ! NCase Sg c ; n = Sg ; isNum = False} ; {s = \\c => "kuinka" ++ (snoun2nounBind (mkN "moni" "monia")).s ! NCase Sg c ; n = Sg ; isNum = False} ;
if_Subj = ss "jos" ; if_Subj = ssp "CONJ" "jos" ;
in8front_Prep = postGenPrep "edessä" ; in8front_Prep = postGenPrep "edessä" ;
i_Pron = mkPronoun "minä" "minun" "minua" "minuna" "minuun" Sg P1 ; i_Pron = mkPronoun "minä" "minun" "minua" "minuna" "minuun" Sg P1 ;
in_Prep = casePrep inessive ; in_Prep = casePrep inessive ;
@@ -65,18 +65,18 @@ concrete StructuralFin of Structural = CatFin **
most_Predet = {s = \\n,c => (nForms2N (dSuurin "useinta")).s ! NCase n (npform2case n c)} ; most_Predet = {s = \\n,c => (nForms2N (dSuurin "useinta")).s ! NCase n (npform2case n c)} ;
much_Det = MorphoFin.mkDet Sg (snoun2nounBind (exceptNomN (mkN "paljo") "paljon")) ** {isNum = True} ; --Harmony not relevant, it's just a CommonNoun much_Det = MorphoFin.mkDet Sg (snoun2nounBind (exceptNomN (mkN "paljo") "paljon")) ** {isNum = True} ; --Harmony not relevant, it's just a CommonNoun
must_VV = mkVV (caseV genitive (mkV "täytyä")) ; must_VV = mkVV (caseV genitive (mkV "täytyä")) ;
no_Utt = ss "ei" ; no_Utt = ssp "INTERJ" "ei" ;
on_Prep = casePrep adessive ; on_Prep = casePrep adessive ;
--- one_Quant = MorphoFin.mkDet Sg DEPREC --- one_Quant = MorphoFin.mkDet Sg DEPREC
only_Predet = {s = \\_,_ => "vain"} ; only_Predet = {s = \\_,_ => "vain"} ;
or_Conj = {s1 = [] ; s2 = "tai" ; n = Sg} ; or_Conj = {s1 = [] ; s2 = tagPOS "CONJ" "tai" ; n = Sg} ;
otherwise_PConj = ss "muuten" ; otherwise_PConj = ssp "ADV" "muuten" ;
part_Prep = casePrep partitive ; part_Prep = casePrep partitive ;
please_Voc = ss ["ole hyvä"] ; --- number please_Voc = ss ["ole hyvä"] ; --- number
possess_Prep = casePrep genitive ; possess_Prep = casePrep genitive ;
quite_Adv = ss "melko" ; quite_Adv = ssp "ADV" "melko" ;
she_Pron = mkPronoun "hän" "hänen" "häntä" "hänenä" "häneen" Sg P3 ; she_Pron = mkPronoun "hän" "hänen" "häntä" "hänenä" "häneen" Sg P3 ;
so_AdA = ss "niin" ; so_AdA = ssp "ADV" "niin" ;
somebody_NP = { somebody_NP = {
s = \\c => jokuPron ! Sg ! npform2case Sg c ; s = \\c => jokuPron ! Sg ! npform2case Sg c ;
a = agrP3 Sg ; a = agrP3 Sg ;
@@ -97,7 +97,7 @@ concrete StructuralFin of Structural = CatFin **
a = agrP3 Sg ; a = agrP3 Sg ;
isPron = False ; isNeg = False ; isNeg = False isPron = False ; isNeg = False ; isNeg = False
} ; } ;
somewhere_Adv = ss "jossain" ; somewhere_Adv = ssp "ADV" "jossain" ;
that_Quant = heavyQuant { that_Quant = heavyQuant {
s1 = table (MorphoFin.Number) { s1 = table (MorphoFin.Number) {
Sg => table (MorphoFin.Case) { Sg => table (MorphoFin.Case) {
@@ -109,11 +109,11 @@ concrete StructuralFin of Structural = CatFin **
} ; } ;
s2 = \\_ => [] ; isNum,isPoss = False ; isDef = True ; isNeg = False s2 = \\_ => [] ; isNum,isPoss = False ; isDef = True ; isNeg = False
} ; } ;
that_Subj = ss "että" ; that_Subj = ssp "CONJ" "että" ;
there_Adv = ss "siellä" ; --- tuolla there_Adv = ssp "ADV" "siellä" ; --- tuolla
there7to_Adv = ss "sinne" ; there7to_Adv = ssp "ADV" "sinne" ;
there7from_Adv = ss "sieltä" ; there7from_Adv = ssp "ADV" "sieltä" ;
therefore_PConj = ss "siksi" ; therefore_PConj = ssp "ADV" "siksi" ;
they_Pron = mkPronoun "he" "heidän" "heitä" "heinä" "heihin" Pl P3 ; --- ne they_Pron = mkPronoun "he" "heidän" "heitä" "heinä" "heihin" Pl P3 ; --- ne
this_Quant = heavyQuant { this_Quant = heavyQuant {
s1 = table (MorphoFin.Number) { s1 = table (MorphoFin.Number) {
@@ -127,10 +127,10 @@ concrete StructuralFin of Structural = CatFin **
s2 = \\_ => [] ; isNum,isPoss = False ; isDef = True ; isNeg = False s2 = \\_ => [] ; isNum,isPoss = False ; isDef = True ; isNeg = False
} ; } ;
through_Prep = postGenPrep "kautta" ; through_Prep = postGenPrep "kautta" ;
too_AdA = ss "liian" ; too_AdA = ssp "ADV" "liian" ;
to_Prep = casePrep illative ; --- allative to_Prep = casePrep illative ; --- allative
under_Prep = postGenPrep "alla" ; under_Prep = postGenPrep "alla" ;
very_AdA = ss "erittäin" ; very_AdA = ssp "ADV" "erittäin" ;
want_VV = mkVV (mkV "tahtoa") ; want_VV = mkVV (mkV "tahtoa") ;
we_Pron = mkPronoun "me" "meidän" "meitä" "meinä" "meihin" Pl P1 ; we_Pron = mkPronoun "me" "meidän" "meitä" "meinä" "meihin" Pl P1 ;
whatPl_IP = { whatPl_IP = {
@@ -141,9 +141,9 @@ concrete StructuralFin of Structural = CatFin **
s = \\c => mikaInt ! Sg ! npform2case Sg c ; s = \\c => mikaInt ! Sg ! npform2case Sg c ;
n = Sg n = Sg
} ; } ;
when_IAdv = ss "milloin" ; when_IAdv = ssp "ADV" "milloin" ;
when_Subj = ss "kun" ; when_Subj = ssp "CONJ" "kun" ;
where_IAdv = ss "missä" ; where_IAdv = ssp "ADV" "missä" ;
which_IQuant = { which_IQuant = {
s = mikaInt s = mikaInt
} ; } ;
@@ -155,10 +155,10 @@ concrete StructuralFin of Structural = CatFin **
s = table {NPAcc => "ketkä" ; c => kukaInt ! Pl ! npform2case Pl c} ; s = table {NPAcc => "ketkä" ; c => kukaInt ! Pl ! npform2case Pl c} ;
n = Pl n = Pl
} ; } ;
why_IAdv = ss "miksi" ; why_IAdv = ssp "ADV" "miksi" ;
without_Prep = prePrep partitive "ilman" ; without_Prep = prePrep partitive "ilman" ;
with_Prep = postGenPrep "kanssa" ; with_Prep = postGenPrep "kanssa" ;
yes_Utt = ss "kyllä" ; yes_Utt = ssp "INTERJ" "kyllä" ;
youSg_Pron = mkPronoun "sinä" "sinun" "sinua" "sinuna" "sinuun" Sg P2 ; youSg_Pron = mkPronoun "sinä" "sinun" "sinua" "sinuna" "sinuun" Sg P2 ;
youPl_Pron = mkPronoun "te" "teidän" "teitä" "teinä" "teihin" Pl P2 ; youPl_Pron = mkPronoun "te" "teidän" "teitä" "teinä" "teihin" Pl P2 ;
youPol_Pron = youPol_Pron =
@@ -300,8 +300,8 @@ lin
isPron = False ; isNeg = True isPron = False ; isNeg = True
} ; } ;
at_least_AdN = ss "vähintään" ; at_least_AdN = ssp "ADV" "vähintään" ;
at_most_AdN = ss "enintään" ; at_most_AdN = ssp "ADV" "enintään" ;
as_CAdv = X.mkCAdv "yhtä" "kuin" ; as_CAdv = X.mkCAdv "yhtä" "kuin" ;
@@ -311,5 +311,7 @@ lin
lin language_title_Utt = ss "suomi" ; lin language_title_Utt = ss "suomi" ;
oper
ssp : Str -> Str -> {s : Str} = \p,s -> ss (tagPOS p s) ; -- used in tagged/ for Omorfi, otherwise =ss
} }

View File

@@ -4,6 +4,12 @@ resource StemFin = open MorphoFin, Prelude in {
flags coding = utf8 ; flags coding = utf8 ;
oper
-- other classes not treated below are POS tagged when the grammar is used with Omorfi
tagPOS : Str -> Str -> Str = \_,s -> s ;
oper oper
SNForm : Type = Predef.Ints 10 ; SNForm : Type = Predef.Ints 10 ;
SNoun : Type = {s : SNForm => Str ; h : Harmony} ; SNoun : Type = {s : SNForm => Str ; h : Harmony} ;

View File

@@ -4,6 +4,11 @@ resource StemFin = open TagFin, MorphoFin, Prelude in {
flags coding = utf8 ; flags coding = utf8 ;
oper
-- other classes not treated below are POS tagged when the grammar is used with Omorfi
tagPOS : Str -> Str -> Str = \p,s -> tagWord p s ;
oper oper
SNForm : Type = Predef.Ints 0 ; --- not really needed SNForm : Type = Predef.Ints 0 ; --- not really needed
SNoun : Type = {s : SNForm => Str ; h : Harmony} ; --- Harmony needed only for API compatibility SNoun : Type = {s : SNForm => Str ; h : Harmony} ; --- Harmony needed only for API compatibility
@@ -71,6 +76,7 @@ oper
snoun2compar : SNoun -> Str = \n -> n.s ! 0 ++ "?Comp" ; ---- TODO snoun2compar : SNoun -> Str = \n -> n.s ! 0 ++ "?Comp" ; ---- TODO
snoun2superl : SNoun -> Str = \n -> n.s ! 0 ++ "?Superl" ; ---- TODO snoun2superl : SNoun -> Str = \n -> n.s ! 0 ++ "?Superl" ; ---- TODO
-- verbs -- verbs
oper oper

View File

@@ -18,20 +18,24 @@ oper
consTag : (_,_,_,_,_,_ : Str) -> Tag = \t,u,v,x,y,z -> t + "|" + u + "|" + v + "|" + x + "|" + y + "|" + z ; consTag : (_,_,_,_,_,_ : Str) -> Tag = \t,u,v,x,y,z -> t + "|" + u + "|" + v + "|" + x + "|" + y + "|" + z ;
} ; } ;
tagNForm : NForm -> Str = \nf -> case nf of { pairTag : Tag -> Tag -> Tag * Tag = \t,u -> <t,u> ;
NCase n c => consTag (tagCase c) (tagNumber n) ;
NComit => consTag (mkTag "Case" "Com") (tagNumber Pl) ; tagNForm : NForm -> Tag = \nf -> let ts = tagNForms nf in consTag ts.p1 ts.p2 ;
NInstruct => consTag (mkTag "Case" "Ins") (tagNumber Pl) ;
NPossNom n => consTag (tagCase Nom) (tagNumber n) ; tagNForms : NForm -> Tag * Tag = \nf -> case nf of { -- keep separate in order to squeeze in Degree of adjectives
NPossGen n => consTag (tagCase Gen) (tagNumber n) ; NCase n c => pairTag (tagCase c) (tagNumber n) ;
NPossTransl n => consTag (tagCase Transl) (tagNumber n) ; NComit => pairTag (mkTag "Case" "Com") (tagNumber Pl) ;
NPossIllat n => consTag (tagCase Illat) (tagNumber n) ; NInstruct => pairTag (mkTag "Case" "Ins") (tagNumber Pl) ;
NCompound => mkTag "Comp" ---- NPossNom n => pairTag (tagCase Nom) (tagNumber n) ;
NPossGen n => pairTag (tagCase Gen) (tagNumber n) ;
NPossTransl n => pairTag (tagCase Transl) (tagNumber n) ;
NPossIllat n => pairTag (tagCase Illat) (tagNumber n) ;
NCompound => pairTag (mkTag "Form" "Comp") (tagNumber Sg) ---- TODO: how is this in UD?
} ; } ;
tagAForm : AForm -> Str = \af -> case af of { tagDegreeAForm : Degree -> AForm -> Str = \d,af -> case af of {
AN nf => tagNForm nf ; AN nf => let ts = tagNForms nf in consTag ts.p1 (tagDegree d) ts.p2 ;
AAdv => adverbTag AAdv => consTag adverbTag (tagDegree d) ---- TODO: how is this in UD?
} ; } ;
tagVForm : VForm -> Str = \vf -> case vf of { tagVForm : VForm -> Str = \vf -> case vf of {
@@ -55,33 +59,43 @@ oper
PassPotent False => consTag connegativeTag potentialTag finiteTag passiveTag ; PassPotent False => consTag connegativeTag potentialTag finiteTag passiveTag ;
PassImper True => consTag imperativeTag finiteTag passiveTag ; PassImper True => consTag imperativeTag finiteTag passiveTag ;
PassImper False => consTag connegativeTag imperativeTag finiteTag passiveTag ; PassImper False => consTag connegativeTag imperativeTag finiteTag passiveTag ;
PastPartAct af => participleTag ++ activeTag ++ pastTag ++ tagAForm af ; PastPartAct af => consTag (tagDegreeAForm Posit af) (tagPartForm "Past") participleTag activeTag ;
PastPartPass af => participleTag ++ activeTag ++ pastTag ++ tagAForm af ; PastPartPass af => consTag (tagDegreeAForm Posit af) (tagPartForm "Past") participleTag passiveTag ;
PresPartAct af => participleTag ++ activeTag ++ presentTag ++ tagAForm af ; PresPartAct af => consTag (tagDegreeAForm Posit af) (tagPartForm "Pres") participleTag activeTag ;
PresPartPass af => participleTag ++ activeTag ++ presentTag ++ tagAForm af ; PresPartPass af => consTag (tagDegreeAForm Posit af) (tagPartForm "Pres") participleTag passiveTag ;
AgentPart af => participleTag ++ agentTag ++ tagAForm af AgentPart af => consTag (tagDegreeAForm Posit af) (tagPartForm "Agt") participleTag activeTag
} ; } ;
tagInfForm : InfForm -> Str = \vf -> case vf of { tagInfForm : InfForm -> Str = \vf -> case vf of {
Inf1 => infinitiveTag ; Inf1 => infinitiveTag "1" ;
Inf1Long => infinitiveTag ; Inf1Long => infinitiveTag "1" ; --- insert Person[psor]=3 when used with poss suff
Inf2Iness => infinitiveTag ; Inf2Iness => infinitiveTag "Ine" "2" ;
Inf2Instr => infinitiveTag ; Inf2Instr => infinitiveTag "Ins" "2" ;
Inf2InessPass => infinitiveTag ; Inf2InessPass => infinitiveTag "Ins" "2" "Pass" ;
Inf3Iness => infinitiveTag ; Inf3Iness => infinitiveTag "Ine" "3" ;
Inf3Elat => infinitiveTag ; Inf3Elat => infinitiveTag "Ela" "3" ;
Inf3Illat => infinitiveTag ; Inf3Illat => infinitiveTag "Ill" "3" ;
Inf3Adess => infinitiveTag ; Inf3Adess => infinitiveTag "Ade" "3" ;
Inf3Abess => infinitiveTag ; Inf3Abess => infinitiveTag "Abe" "3" ;
Inf3Instr => infinitiveTag ; Inf3Instr => infinitiveTag "Ins" "3" ;
Inf3InstrPass => infinitiveTag ; Inf3InstrPass => infinitiveTag "Ins" "3" "Pass" ;
Inf4Nom => infinitiveTag ; Inf4Nom => infinitiveTag "Nom" "4" ;
Inf4Part => infinitiveTag ; Inf4Part => infinitiveTag "Par" "4" ;
Inf5 => infinitiveTag ; Inf5 => infinitiveTag "5" ; ---- not in UD
InfPresPart => infinitiveTag ; InfPresPart => consTag (tagDegreeAForm Posit (AN (NCase Sg Nom))) (tagPartForm "Pres") participleTag activeTag ;
InfPresPartAgr => infinitiveTag InfPresPartAgr => consTag (tagDegreeAForm Posit (AN (NCase Sg Nom))) (tagPartForm "Pres") participleTag activeTag --- poss to add
} ; } ;
infinitiveTag = overload {
infinitiveTag : Str -> Tag = \i ->
consTag (mkTag "InfForm" i) (tagNumber Sg) (mkTag "VerbForm" "Inf") activeTag ; --- UD wants voice and number
infinitiveTag : Str -> Str -> Tag = \c,i ->
consTag (mkTag "Case" c) (mkTag "InfForm" i) (tagNumber Sg) (mkTag "VerbForm" "Inf") activeTag ;
infinitiveTag : Str -> Str -> Str -> Tag = \c,i,v ->
consTag (mkTag "Case" c) (mkTag "InfForm" i) (tagNumber Sg) (mkTag "VerbForm" "Inf") (mkTag "Voice" v) ;
} ;
tagPartForm : Str -> Tag = \pf -> mkTag "PartForm" pf ;
nounTag = mkTag "NOUN" ; nounTag = mkTag "NOUN" ;
adjectiveTag = mkTag "ADJ" ; adjectiveTag = mkTag "ADJ" ;
@@ -93,9 +107,8 @@ oper
imperativeTag = mkTag "Mood" "Imp" ; imperativeTag = mkTag "Mood" "Imp" ;
indicativeTag = mkTag "Mood" "Ind" ; indicativeTag = mkTag "Mood" "Ind" ;
participleTag = mkTag "Part" ; participleTag = mkTag "VerbForm" "Part" ;
agentTag = mkTag "Agent" ; agentTag = mkTag "Agent" ;
infinitiveTag = mkTag "Inf" ;
finiteTag = mkTag "VerbForm" "Fin" ; finiteTag = mkTag "VerbForm" "Fin" ;
connegativeTag = mkTag "Connegative" "Yes" ; connegativeTag = mkTag "Connegative" "Yes" ;