1
0
forked from GitHub/gf-core

fixes to estonian

This commit is contained in:
inari
2013-10-22 12:23:52 +00:00
parent 098619ceee
commit 34d15b505d
3 changed files with 244 additions and 282 deletions

View File

@@ -23,8 +23,6 @@ resource HjkEst = open ResEst, Prelude, Predef in {
oper
NFS = {s : NForm => Str} ;
foreign : pattern Str = #("z" | "ž" | "š") ;
-- Foreign vowel endings
foreign_v : pattern Str = #("ko" | "po" | "to" | "fo" | "ka" | "pa" | "ta" | "fa" | "ku" | "pu" | "tu" | "fu") ;
@@ -50,13 +48,14 @@ resource HjkEst = open ResEst, Prelude, Predef in {
hjk_type_VI_meeskond,
hjk_type_VI_seminar,
hjk_type_VII_touge : Str -> NForms ;
-- hjk_type_VII_touge : Str -> NFS ;
-- IVa additionally needs the stem vowel.
hjk_type_IVb_audit,
hjk_type_IVb_audit1 : Str -> Str -> NForms ; --NFS
hjk_type_IVb_audit1 : Str -> Str -> NForms ;
hjk_type_VI_tukk : Str -> Str -> NForms ;
hjk_type_VI_link2 : Str -> Str -> NForms ;
hjk_type2 : Str -> Str -> NForms ;
-- Definition of the mapping rules.
@@ -147,24 +146,19 @@ resource HjkEst = open ResEst, Prelude, Predef in {
in
nForms6 x (x_n+"i") (x+"i") (x+"i") (x+"ide") (x+"e") ;
--like link but
-- gen form given (takes care of vowel and consonant gradation)
-- -sid for pl.part (todo: generate short forms depending on vowel?)
hjk_type_VI_tukk x x_gen =
-- same as hjk_type_VI_link but additionally takes the genitive ending
hjk_type_VI_link2 x i =
let
v_g : Str = last x_gen ;
{- pl_part : Str =
case v_g of {
"i" => "e" ;
_ => v_g + "sid" } ;
-}
x_n : Str = weaker_noun x ;
-- TODO: think about it
e : Str = case i of {
"a" => "asid" ; -- pikk/pika -> pikkasid
_ => "e"
}
in
nForms6 x x_gen (x+v_g) (x+v_g) (x+v_g+"de") (x+v_g+"sid") ;
nForms6 x (x_n+i) (x+i) (x+i) (x+i+"de") (x+e) ;
hjk_type_VI_imelik x =
let
x_t : Str = stronger_noun x
@@ -312,103 +306,105 @@ resource HjkEst = open ResEst, Prelude, Predef in {
-- - comparative and superlative adjective forms (workaround: use mkA instead)
-- - type VII (t6uge -> t6uke), as one needs to detect derivation from verb
-- - last syllable superlong (rostbiif)
hjk_type x =
case <(syl_type x), x> of {
<S3, _ + "ke">
hjk_type x = hjk_type2 x "i" ;
hjk_type2 x i =
case <(syl_type x), x, i> of {
<S3, _ + "ke", _>
=> hjk_type_Vb_oluline x ;
<_, _ + "kond">
<_, _ + "kond", _>
=> hjk_type_VI_meeskond x ;
-- Some S2 -ik words (voolik), we only cover words with double vowel
<_, _ + #vv + ("lik"|"nik"|"stik")>
<_, _ + #vv + ("lik"|"nik"|"stik"), _>
=> hjk_type_IVb_audit x "u" ;
-- Other -ik words as in HJKEKS,
-- but added 'ndik' which fixes fractions ('kaheksandik')
-- and is wrong only for 'kandik'.
<_, _ + ("lik"|"nik"|"stik"|"ndik")>
<_, _ + ("lik"|"nik"|"stik"|"ndik"), _>
=> hjk_type_VI_imelik x ;
-- Remaining -k words (but need to be S2)
-- but not 'konjak'
<S2, _ + ("a"|"e"|"i") + ("ng"|"k")>
<S2, _ + ("a"|"e"|"i") + ("ng"|"k"), _>
=> hjk_type_IVb_audit x "u" ;
-- Other -ik words (not in HJKEKS)
-- including also: alevik, asemik, lobudik, hämarik, sarapik, põletik
<_, _ + ("vik"|"mik"|"dik"|"rik"|"pik"|"tik")>
<_, _ + ("vik"|"mik"|"dik"|"rik"|"pik"|"tik"), _>
=> hjk_type_VI_imelik x ;
-- kikas
<_, ? + #v + #c + #v + "s">
<_, ? + #v + #c + #v + "s", _>
=> hjk_type_Va_otsene x ;
<_, _ + ("ngas"|"kas"|"jas"|"nud"|"tud")>
<_, _ + ("ngas"|"kas"|"jas"|"nud"|"tud"), _>
=> hjk_type_IVb_maakas x ;
<S1, _ + #v + #v>
<S1, _ + #v + #v, _>
=> hjk_type_I_koi x ;
-- 'statiiv' (not like 'karjuv')
<S1, _ + #vv + #c>
=> hjk_type_VI_link x ;
<S1, _ + #vv + #c, i>
=> hjk_type_VI_link2 x i ;
<S3, _ + #c + #v + #lmnr>
<S3, _ + #c + #v + #lmnr, _>
=> hjk_type_VI_seminar x ;
<S1, _ + #v + #v + #c>
=> hjk_type_VI_link x ;
<S1, _ + #v + #v + #c, i>
=> hjk_type_VI_link2 x i ;
<_, _ + ("us"|"is")>
<_, _ + ("us"|"is"), _>
=> hjk_type_Vb_oluline x ;
<S3, _ + #v + #v + #c>
=> hjk_type_VI_link x ;
<S3, _ + #v + #v + #c, i>
=> hjk_type_VI_link2 x i ;
<(S1|S3), _ + #v + #c + #c>
=> hjk_type_VI_link x ;
<(S1|S3), _ + #v + #c + #c, i>
=> hjk_type_VI_link2 x i ;
<(S1|S3), _ + #v + #c + #c + #c>
=> hjk_type_VI_link x ;
<(S1|S3), _ + #v + #c + #c + #c, i>
=> hjk_type_VI_link2 x i ;
<_, _ + "nna">
<_, _ + "nna", _>
=> hjk_type_III_ratsu x ;
<-(S21|S22), _ + ("nu"|"tu")>
<-(S21|S22), _ + ("nu"|"tu"), _>
=> hjk_type_IVa_aasta x ;
-- TODO: improve foreign detection
<S2, _ + #foreign + _ + "in">
=> hjk_type_IVb_audit x "i" ;
<S2, _ + #foreign + _ + "in", i>
=> hjk_type_IVb_audit x i ;
-- TODO: this is not in HJKEKS
-- 'absurd' vs 'ebard'
<S2, _ + #v + #lmnr + "d">
=> hjk_type_IVb_audit x "i" ;
<S2, _ + #v + #lmnr + "d", i>
=> hjk_type_IVb_audit x i ;
-- sometimes 'a' (laurits) TODO: this is not in HJKEKS
<S2, _ + #v + #kpt + "s">
=> hjk_type_IVb_audit x "i" ;
<S2, _ + #v + #kpt + "s", i>
=> hjk_type_IVb_audit x i ;
-- TODO: next 3 rules: last syllable must be long
-- portfell, TODO: not 'karask'
<S2, _ + #v + #c + #c>
=> hjk_type_VI_link x ;
<S2, _ + #v + #c + #c, i>
=> hjk_type_VI_link2 x i ;
-- rostbiif, not viiul
<S2, _ + #c + #v + #v + #c>
=> hjk_type_VI_link x ;
<S2, _ + #c + #v + #v + #c, i>
=> hjk_type_VI_link2 x i ;
-- impulss
<S2, _ + #v + #c + #c + #c>
=> hjk_type_VI_link x ;
<S2, _ + #v + #c + #c + #c, i>
=> hjk_type_VI_link2 x i ;
-- TODO: sometimes masked by 'maakas'
<_, _ + #v + "s">
<_, _ + #v + "s", _>
=> hjk_type_Va_otsene x ;
<_, _ + ("v"|"tav")>
<_, _ + ("v"|"tav"), _>
=> hjk_type_IVb_audit x "a" ;
-- The choice between Va (pl part: -seid) and Vb (pl part: -si)
@@ -416,38 +412,38 @@ resource HjkEst = open ResEst, Prelude, Predef in {
-- We just check the ending of the word and require at least 2 letters
-- to precede the ending.
-- We added also -tine and -ldane (which occur with adjectives).
<_, _ + ? + ? + ("line"|"lane"|"mine"|"kene"|"tine"|"ldane")>
<_, _ + ? + ? + ("line"|"lane"|"mine"|"kene"|"tine"|"ldane"), _>
=> hjk_type_Vb_oluline x ;
-- k6ne
<S21, _ + "e">
<S21, _ + "e", _>
=> hjk_type_III_ratsu x ;
-- Many adjectives end with "ne" (40% in WordNet)
-- We require them to be at least 5 letters long (excluding 'öine'),
-- to give a chance to VII_touge (next rule).
<_, _ + ? + ? + ? + "ne">
<_, _ + ? + ? + ? + "ne", _>
=> hjk_type_Va_otsene x ;
-- Note: this rule does not actually check the derivation from verb.
-- verb + e, TODO: masked by S21/e
<(S2|S22), _ + "e">
<(S2|S22), _ + "e", _>
=> hjk_type_VII_touge x ;
-- ufo, pita, lito
<S21, _ + #foreign_v>
<S21, _ + #foreign_v, _>
=> hjk_type_III_ratsu x ;
<S21, _ + #v>
<S21, _ + #v, _>
=> hjk_type_II_ema x ;
<S22, _ + #v>
<S22, _ + #v, _>
=> hjk_type_III_ratsu x ;
<S23, _ + #v>
<S23, _ + #v, _>
=> hjk_type_IVa_aasta x ;
<S2, _ + "in">
<S2, _ + "in", _>
=> hjk_type_IVb_audit x "a" ;
-- 'e' deletion
@@ -456,54 +452,54 @@ resource HjkEst = open ResEst, Prelude, Predef in {
-- spikker -> spikri (TODO: not: pokker -> pokkeri)
-- Note: pintsel -> pintsli, but not pitser -> pitsri
-- Note: 'redel' and 'paber' do not lose the 'e'.
<S2, y + kk@("kk"|"pp"|"tt"|"hh") + "e" + l@("l"|"r")>
<S2, y + kk@("kk"|"pp"|"tt"|"hh") + "e" + l@("l"|"r"), _>
=> hjk_type_IVb_audit1 x (y + (init kk) + l) ;
-- aaker -> aakri, teater -> teatri
<S2, y + vvkpt@(#v + #v + #kpt) + "e" + l@("l"|"r")>
<S2, y + vvkpt@(#v + #v + #kpt) + "e" + l@("l"|"r"), _>
=> hjk_type_IVb_audit1 x (y+vvkpt+l) ;
<S2, y + vv@(#vv) + gbd@(#gbd) + "e" + l@("l"|"r")>
<S2, y + vv@(#vv) + gbd@(#gbd) + "e" + l@("l"|"r"), _>
=> hjk_type_IVb_audit1 x (y+vv+gbd+l) ;
-- Disabled, 50-50 correctness
--<S2, y + vv@(#vv) + lmnr@(#lmnr) + "e" + l@("l"|"r")>
--<S2, y + vv@(#vv) + lmnr@(#lmnr) + "e" + l@("l"|"r"), _>
-- => hjk_type_IVb_audit1 x (y+vv+lmnr+l) ; -- 50-50
<S2, y + vv@(#vv) + s@("s"|"v") + "e" + l@("l"|"r")>
<S2, y + vv@(#vv) + s@("s"|"v") + "e" + l@("l"|"r"), _>
=> hjk_type_IVb_audit1 x (y+vv+s+l) ;
<S2, y + n@("ht"|"hk"|"hv"|"nts"|"ld"|"lv"|"lb"|"ng"|"nd"|"mb"|"mp"|"nt"|"ps"|"ks"|"sk"|"st") + "e" + l@("l"|"r")>
<S2, y + n@("ht"|"hk"|"hv"|"nts"|"ld"|"lv"|"lb"|"ng"|"nd"|"mb"|"mp"|"nt"|"ps"|"ks"|"sk"|"st") + "e" + l@("l"|"r"), _>
=> hjk_type_IVb_audit1 x (y+n+l) ;
<S2, y + "e" + l@("l"|"r")>
=> hjk_type_IVb_audit x "i" ;
<S2, y + "e" + l@("l"|"r"), i>
=> hjk_type_IVb_audit x i ;
-- TODO: sometimes masked by 'link'
<S2, _ + #c>
=> hjk_type_IVb_audit x "i" ;
<S2, _ + #c, i>
=> hjk_type_IVb_audit x i ;
<S3, _ + #v>
<S3, _ + #v, _>
=> hjk_type_IVa_aasta x ;
-- verb + 'e'
<_, _ + "e">
<_, _ + "e", _>
=> hjk_type_VII_touge x ;
-- catch all that end with consonant
<_, _ + #c>
=> hjk_type_IVb_audit x "i" ;
<_, _ + #c, i>
=> hjk_type_IVb_audit x i ;
-- TODO: not in HJKEKS
<_, _ + ("ia"|"ja")> --kündja, not gerilja
<_, _ + ("ia"|"ja"), _> --kündja, not gerilja
=> hjk_type_IVa_aasta x ;
--added by Inari 07.10.
<S23, _ + #c + ("la")> --haigla, not gorilla
<S23, _ + #c + ("la"), _> --haigla, not gorilla
=> hjk_type_IVa_aasta x ;
-- catch all
<_, _>
<_, _, _>
=> hjk_type_III_ratsu x
} ;