1
0
forked from GitHub/gf-core

Estonian - the 29th complete RGL language. Copied from Kaarel Kaljurand's and Inari Listenmaa's repository in https://github.com/GF-Estonian/GF-Estonian, where later developments will continue to take place.

This commit is contained in:
aarne
2013-10-18 11:45:06 +00:00
parent 02cbf49516
commit 90ee7c7288
35 changed files with 111190 additions and 1 deletions

618
lib/src/estonian/HjkEst.gf Normal file
View File

@@ -0,0 +1,618 @@
resource HjkEst = open ResEst, Prelude, Predef in {
-- Implementation of the noun inflection rules from
-- Heiki-Jaan Kaalep. "Eesti käänamissüsteemi seaduspärasused" (2012)
--
-- @author Kaarel Kaljurand
-- @version 2013-09-09
flags
coding = utf8 ;
-- TODO: change the name of this file and the names of the opers in this file
param
-- S1: stress on the last syllable
-- S2: stress on the penultimate syllable
-- S3: stress not on the last 2 syllables
-- If the S2 word ends with a vowel then we distinguish between:
-- S21: 1st quantity: blo.gi, ta.la
-- S22: 2nd quantity: rat.su, vol.le
-- S23: 3rd quantity: aas.ta
SylType = S1 | S2 | S21 | S22 | S23 | S3 ;
oper
NFS = {s : NForm => Str} ;
foreign : pattern Str = #("z" | "ž" | "š") ;
-- Foreign vowel endings
foreign_v : pattern Str = #("ko" | "po" | "to" | "fo" | "ka" | "pa" | "ta" | "fa" | "ku" | "pu" | "tu" | "fu") ;
v : pattern Str = #("a" | "e" | "i" | "o" | "u" | "õ" | "ä" | "ö" | "ü" | "w") ;
vv : pattern Str = #("aa" | "ee" | "ii" | "oo" | "uu" | "õõ" | "ää" | "öö" | "üü") ;
c : pattern Str = #("m" | "n" | "p" | "b" | "t" | "d" | "k" | "g" | "f" | "v" | "s" | "h" | "l" | "j" | "r" | "z" | "ž" | "š" | "c" | "q") ;
lmnr : pattern Str = #("l" | "m" | "n" | "r") ;
kpt : pattern Str = #("k" | "p" | "t" | "f" | "š") ;
gbd : pattern Str = #("g" | "b" | "d") ;
-- Types that map singular nominative to the full paradigm.
-- VI and VII include gradation which is described separately.
hjk_type,
hjk_type_I_koi,
hjk_type_II_ema,
hjk_type_III_ratsu,
hjk_type_IVa_aasta,
hjk_type_IVb_maakas,
hjk_type_Va_otsene,
hjk_type_Vb_oluline,
hjk_type_VI_link,
hjk_type_VI_imelik,
hjk_type_VI_meeskond,
hjk_type_VI_seminar,
hjk_type_VII_touge : Str -> NForms ;
-- hjk_type_VII_touge : Str -> NFS ;
-- IVa additionally needs the stem vowel.
hjk_type_IVb_audit,
hjk_type_IVb_audit1 : Str -> Str -> NForms ; --NFS
hjk_type_VI_tukk : Str -> Str -> NForms ;
-- Definition of the mapping rules.
-- Verbatim from HJKEKS.
hjk_type_I_koi x =
nForms6 x x (x+"d") (x+"sse") (x+"de") (x+"sid") ;
hjk_type_II_ema x =
nForms6 x x x (x+"sse") (x+"de") (x+"sid") ;
hjk_type_III_ratsu x =
nForms6 x x (x+"t") (x+"sse") (x+"de") (x+"sid") ;
-- if ends with 'i' ('arvuti') then last form is 'arvut' + 'e' + 'id'
-- There are ~50 such words in the WordNet.
hjk_type_IVa_aasta x =
let
x1 : Str = case x of { _ + "i" => (init x) + "e" ; _ => x }
in
nForms6 x x (x+"t") (x+"sse") (x+"te") (x1+"id") ;
-- (audit "a") can be used with comparative and superlative adjectives.
hjk_type_IVb_audit x v_g =
let
v_pl = case v_g of { "i" => "e" ; _ => v_g }
in
nForms6 x (x+v_g) (x+v_g+"t") (x+v_g+"sse") (x+v_g+"te") (x+v_pl+"id") ;
-- TODO: clean this up
hjk_type_IVb_audit1 x y =
nForms6 x (y + "i") (y+"it") (y+"isse") (y+"ite") (y+"eid") ;
hjk_type_IVb_maakas x =
let
gen = init x
in
nForms6 x gen (gen+"t") (gen+"sse") (gen+"te") (gen+"id") ;
--Maakas is for maakas:maaka:maakat, this is for hammas:hamba:hammast
--Not sure if this is already covered by some hjk_type,
--anyway the grades are explicit with two args, more reliable
dHammas : (_,_ : Str) -> NForms ;
dHammas hammas hamba =
nForms6 hammas hamba (hammas+"t") (hamba+"sse") (hammas+"te") (hamba+"id") ;
dMeri : (_,_ : Str) -> NForms ;
dMeri meri mere =
let
mer = init mere ;
in
nForms6 meri mere (mer+"d") (mere+"sse") (mere+"de") (mere+"sid") ;
-- This rule handles the removal of -ne and -s endings, and the addition of 'e'
-- in the case of Cne-nouns (e.g. 'raudne').
-- vastus - vastuse - vastust
-- otsene - otsese - otsest
-- raudne - raudse - raudsEt - raudsesse - raudsEte - raudseid (additional 'e')
-- TODO: variant: vastusesse | vastusse
hjk_type_Va_otsene x =
let
f : Str = case x of {
y + c@(#c) + "ne" => y + c + "se" ;
y + "ne" => y + "s" ;
_ => x
} ;
f1 : Str = case x of {
y + "ne" => y + "s" ;
_ => x
}
in
nForms6 x (f1+"e") (f+"t") (f1+"esse") (f+"te") (f1+"eid") ;
-- TODO: variant: olulisesse | olulisse
hjk_type_Vb_oluline x =
let
f : Str = case x of {
y + "ne" => y + "s" ;
y + "ke" => y + "kes" ;
_ => x
}
in
nForms6 x (f+"e") (f+"t") (f+"esse") (f+"te") (f+"i") ;
hjk_type_VI_link x =
let
x_n : Str = weaker_noun x
in
nForms6 x (x_n+"i") (x+"i") (x+"i") (x+"ide") (x+"e") ;
--like link but
-- gen form given (takes care of vowel and consonant gradation)
-- -sid for pl.part (todo: generate short forms depending on vowel?)
hjk_type_VI_tukk x x_gen =
let
v_g : Str = last x_gen ;
{- pl_part : Str =
case v_g of {
"i" => "e" ;
_ => v_g + "sid" } ;
-}
in
nForms6 x x_gen (x+v_g) (x+v_g) (x+v_g+"de") (x+v_g+"sid") ;
hjk_type_VI_imelik x =
let
x_t : Str = stronger_noun x
in
nForms6 x (x+"u") (x_t+"u") (x_t+"u") (x+"e") (x_t+"e") ;
hjk_type_VI_meeskond x =
let
x_n : Str = weaker_noun x
in
nForms6 x (x_n+"a") (x+"a") (x+"a") (x+"ade") (x+"i") ;
hjk_type_VI_seminar x =
nForms6 x (x+"i") (x+"i") (x+"i") (x+"ide") (x+"e") ;
hjk_type_VII_touge x =
let
x_t : Str = (stronger_noun (init x)) + "e"
in
nForms6 x x_t (x+"t") (x_t+"sse") (x+"te") (x_t+"id") ;
--Identical to the above, just taking 2 arguments (nom + gen)
--There are 67 nouns in test cases where stronger_noun gets it wrong
--handles liige:liikme as well
hjk_type_VII_touge2 : (_,_ : Str) -> NForms ;
hjk_type_VII_touge2 touge touke =
let
liikme : Str = case touke of {
_ + "me" => touke ;
_ + "mne" => touke ;
_ => touge }
in
nForms6 touge touke (touge+"t") (touke+"sse") (liikme+"te") (touke+"id") ;
-- Use this only to weaken the verbs
weaker : Str -> Str ;
weaker link =
let
li = Predef.tk 2 link ;
nk = Predef.dp 2 link
in
case nk of {
"kk" => li + "k" ;
"pp" => li + "p" ;
"tt" => li + "t" ;
"ff" => li + "f" ;
("üt"|"üs") => li + "ö" ; --süsi,söe ; ütlema,öelda
--"ad" => li + "aj" ; --sada,saja; maybe remove
V@(#v) + "k" => li + V + "g" ;
V@(#v) + "p" => li + V + "b" ;
V@(#v) + "t" => li + V + "d" ;
V@(#v) + "g" => li + V ; --liuglema,liuelda
V@(#v) + "b" => li + V + "v" ; --leib,leiva
V@(#v) + "d" => li + V ; --hoidma,hoiab
N@(#lmnr) + "k" => li + N + "g" ;
N@(#lmnr) + "p" => li + N + "b" ;
N@(#lmnr) + "t" => li + N + "d" ;
N@(#lmnr) + "d" => li + N + N ;
N@(#lmnr) + "b" => li + N + N ;
N@("l"|"r") + "g" => li + N ; --algama,alata
"sk" => li + "s" ;
"h" + #kpt => li + "h" ;
_ => link
} ;
-- Weakening of nouns.
-- Only the very stable weakening that happens to nouns.
-- TODO: verify correctness/completeness based on some other implementation.
weaker_noun : Str -> Str ;
weaker_noun link =
case link of {
li + "kk" => li + "k" ;
li + "pp" => li + "p" ;
li + "tt" => li + "t" ;
li + "ff" => li + "f" ;
li + "šš" => li + "š" ;
li + N@(#lmnr) + "ss" => li + N + "s" ;
li + V@(#v) + "k" => li + V + "g" ;
li + V@(#v) + "p" => li + V + "b" ;
li + V@(#v) + "t" => li + V + "d" ;
li + N@(#lmnr) + "k" => li + N + "g" ;
li + N@(#lmnr) + "p" => li + N + "b" ;
li + N@(#lmnr) + "t" => li + N + "d" ;
li + "h" + #kpt => li + "h" ;
li + "kond" => li + "konn" ;
_ => link
} ;
-- Strengthening of nouns.
-- Input must not have the last vowel.
stronger_noun : Str -> Str ;
stronger_noun x =
case x of {
y + "lg" => y + "lg" ;
y + "hk" => y + "hk" ; -- tahke
y + "tk" => y + "tk" ; -- katke
y + "rs" => y + "rs" ; -- morse
y + "rr" => y + "rd" ; -- murre
y + "ks" => y + "ks" ; -- makse
y + "us" => y + "us" ; -- lause
y + "sk" => y + "sk" ; -- raske (?)
y + "ts" => y + "ts" ; -- katse
y + "ps" => y + "psm" ; -- ripse -> ripsme
y + "nt" => y + "nt" ; -- tante
y + "st" => y + "st" ; -- TODO: sometimes stm: iste, kaste
y + k@("k"|"p"|"t"|"s") => y + k + k ;
y + "g" => y + "k" ;
y + "d" => y + "t" ;
y + "b" => y + "p" ;
y + v@(#v) + "v" => y + v + "b" ; -- works for 'iive' but not 'irve'
y + "mm" => y + "mb" ; -- komme -> kombe
y + "nn" => y + "nd" ;
_ => x
} ;
-- Strengthening of verbs.
stronger : Str -> Str ;
stronger x =
let
beginning = tk 2 x ;
ending = dp 2 x
in
beginning + case ending of {
y + k@("k"|"p"|"t"|"s") + e => y + k + k + e ;
y + "g" + e => y + "k" + e ;
y + "d" + e => y + "t" + e ;
y + "b" + e => y + "p" + e ;
_ => ending
} ;
-- Mapping of singular nominative to HJKEKS types.
-- This implements the patterns from HJKEKS section 8 but
-- makes the rule ordering explicit, handles things like dropping 'e'
-- in 'reegel' -> 'reegli', etc.
-- Works ~90% correctly, ~100% correctly with input longer than 10 letters.
-- If this rule delivers an incorrect form, then use the 6-arg oper.
-- This is also needed if another legal form is desired,
-- e.g. palk -> palga (the default is palk -> palgi).
--
-- This rule does not cover:
-- - exceptional words (workaround: take these from the lexicon)
-- - compound words (workaround: mark the compound border manually)
-- - comparative and superlative adjective forms (workaround: use mkA instead)
-- - type VII (t6uge -> t6uke), as one needs to detect derivation from verb
-- - last syllable superlong (rostbiif)
hjk_type x =
case <(syl_type x), x> of {
<S3, _ + "ke">
=> hjk_type_Vb_oluline x ;
<_, _ + "kond">
=> hjk_type_VI_meeskond x ;
-- Some S2 -ik words (voolik), we only cover words with double vowel
<_, _ + #vv + ("lik"|"nik"|"stik")>
=> hjk_type_IVb_audit x "u" ;
-- Other -ik words as in HJKEKS,
-- but added 'ndik' which fixes fractions ('kaheksandik')
-- and is wrong only for 'kandik'.
<_, _ + ("lik"|"nik"|"stik"|"ndik")>
=> hjk_type_VI_imelik x ;
-- Remaining -k words (but need to be S2)
-- but not 'konjak'
<S2, _ + ("a"|"e"|"i") + ("ng"|"k")>
=> hjk_type_IVb_audit x "u" ;
-- Other -ik words (not in HJKEKS)
-- including also: alevik, asemik, lobudik, hämarik, sarapik, põletik
<_, _ + ("vik"|"mik"|"dik"|"rik"|"pik"|"tik")>
=> hjk_type_VI_imelik x ;
-- kikas
<_, ? + #v + #c + #v + "s">
=> hjk_type_Va_otsene x ;
<_, _ + ("ngas"|"kas"|"jas"|"nud"|"tud")>
=> hjk_type_IVb_maakas x ;
<S1, _ + #v + #v>
=> hjk_type_I_koi x ;
-- 'statiiv' (not like 'karjuv')
<S1, _ + #vv + #c>
=> hjk_type_VI_link x ;
<S3, _ + #c + #v + #lmnr>
=> hjk_type_VI_seminar x ;
<S1, _ + #v + #v + #c>
=> hjk_type_VI_link x ;
<_, _ + ("us"|"is")>
=> hjk_type_Vb_oluline x ;
<S3, _ + #v + #v + #c>
=> hjk_type_VI_link x ;
<(S1|S3), _ + #v + #c + #c>
=> hjk_type_VI_link x ;
<(S1|S3), _ + #v + #c + #c + #c>
=> hjk_type_VI_link x ;
<_, _ + "nna">
=> hjk_type_III_ratsu x ;
<-(S21|S22), _ + ("nu"|"tu")>
=> hjk_type_IVa_aasta x ;
-- TODO: improve foreign detection
<S2, _ + #foreign + _ + "in">
=> hjk_type_IVb_audit x "i" ;
-- TODO: this is not in HJKEKS
-- 'absurd' vs 'ebard'
<S2, _ + #v + #lmnr + "d">
=> hjk_type_IVb_audit x "i" ;
-- sometimes 'a' (laurits) TODO: this is not in HJKEKS
<S2, _ + #v + #kpt + "s">
=> hjk_type_IVb_audit x "i" ;
-- TODO: next 3 rules: last syllable must be long
-- portfell, TODO: not 'karask'
<S2, _ + #v + #c + #c>
=> hjk_type_VI_link x ;
-- rostbiif, not viiul
<S2, _ + #c + #v + #v + #c>
=> hjk_type_VI_link x ;
-- impulss
<S2, _ + #v + #c + #c + #c>
=> hjk_type_VI_link x ;
-- TODO: sometimes masked by 'maakas'
<_, _ + #v + "s">
=> hjk_type_Va_otsene x ;
<_, _ + ("v"|"tav")>
=> hjk_type_IVb_audit x "a" ;
-- The choice between Va (pl part: -seid) and Vb (pl part: -si)
-- is based on checking the derivational ending.
-- We just check the ending of the word and require at least 2 letters
-- to precede the ending.
-- We added also -tine and -ldane (which occur with adjectives).
<_, _ + ? + ? + ("line"|"lane"|"mine"|"kene"|"tine"|"ldane")>
=> hjk_type_Vb_oluline x ;
-- k6ne
<S21, _ + "e">
=> hjk_type_III_ratsu x ;
-- Many adjectives end with "ne" (40% in WordNet)
-- We require them to be at least 5 letters long (excluding 'öine'),
-- to give a chance to VII_touge (next rule).
<_, _ + ? + ? + ? + "ne">
=> hjk_type_Va_otsene x ;
-- Note: this rule does not actually check the derivation from verb.
-- verb + e, TODO: masked by S21/e
<(S2|S22), _ + "e">
=> hjk_type_VII_touge x ;
-- ufo, pita, lito
<S21, _ + #foreign_v>
=> hjk_type_III_ratsu x ;
<S21, _ + #v>
=> hjk_type_II_ema x ;
<S22, _ + #v>
=> hjk_type_III_ratsu x ;
<S23, _ + #v>
=> hjk_type_IVa_aasta x ;
<S2, _ + "in">
=> hjk_type_IVb_audit x "a" ;
-- 'e' deletion
-- kringel -> kringli, amper -> ampri, meeter -> meetri, reegel -> reegli
-- kaabel-> kaabli (TODO: not: juubel -> juubli)
-- spikker -> spikri (TODO: not: pokker -> pokkeri)
-- Note: pintsel -> pintsli, but not pitser -> pitsri
-- Note: 'redel' and 'paber' do not lose the 'e'.
<S2, y + kk@("kk"|"pp"|"tt"|"hh") + "e" + l@("l"|"r")>
=> hjk_type_IVb_audit1 x (y + (init kk) + l) ;
-- aaker -> aakri, teater -> teatri
<S2, y + vvkpt@(#v + #v + #kpt) + "e" + l@("l"|"r")>
=> hjk_type_IVb_audit1 x (y+vvkpt+l) ;
<S2, y + vv@(#vv) + gbd@(#gbd) + "e" + l@("l"|"r")>
=> hjk_type_IVb_audit1 x (y+vv+gbd+l) ;
-- Disabled, 50-50 correctness
--<S2, y + vv@(#vv) + lmnr@(#lmnr) + "e" + l@("l"|"r")>
-- => hjk_type_IVb_audit1 x (y+vv+lmnr+l) ; -- 50-50
<S2, y + vv@(#vv) + s@("s"|"v") + "e" + l@("l"|"r")>
=> hjk_type_IVb_audit1 x (y+vv+s+l) ;
<S2, y + n@("ht"|"hk"|"hv"|"nts"|"ld"|"lv"|"lb"|"ng"|"nd"|"mb"|"mp"|"nt"|"ps"|"ks"|"sk"|"st") + "e" + l@("l"|"r")>
=> hjk_type_IVb_audit1 x (y+n+l) ;
<S2, y + "e" + l@("l"|"r")>
=> hjk_type_IVb_audit x "i" ;
-- TODO: sometimes masked by 'link'
<S2, _ + #c>
=> hjk_type_IVb_audit x "i" ;
<S3, _ + #v>
=> hjk_type_IVa_aasta x ;
-- verb + 'e'
<_, _ + "e">
=> hjk_type_VII_touge x ;
-- catch all that end with consonant
<_, _ + #c>
=> hjk_type_IVb_audit x "i" ;
-- TODO: not in HJKEKS
<_, _ + ("ia"|"ja")> --kündja, not gerilja
=> hjk_type_IVa_aasta x ;
--added by Inari 07.10.
<S23, _ + #c + ("la")> --haigla, not gorilla
=> hjk_type_IVa_aasta x ;
-- catch all
<_, _>
=> hjk_type_III_ratsu x
} ;
-- Assigns stress/quantity indicator (SylType) to the word based on
-- its character composition.
-- Note: you cannot use recursion (circular definitions) in these rules
-- Note: patterns must be linear (GF book C.4.13), i.e. you cannot write
-- oi@(#v + #v) + oi => S2 ; -- oi-oi, ai-ai, oo-oo
syl_type : Str -> SylType ;
syl_type x =
case x of {
-- all 1-letters
? => S1 ;
-- all 2-letters
? + ? => S1 ;
-- all 3-letters
#v + #c + #v => S21 ;
#v + #v + #v => S22 ;
? + ? + ? => S1 ; -- koi, kae
-- all 4-letters
#c + #v + #v + #c => S1 ; -- siid
#c + #v + #c + #c => S1 ; -- link
#v + #c + #v + #c => S2 ;
#v + #vv + #c => S1 ; -- auul, ioon, oaas
#v + #v + #v + #c => S2 ; -- aiak (?)
#v + #v + #c + #v => S22 ; -- aine, aade; not: 6ige
#v + #c + #v + #v => S1 ; -- epee, oboe
#v + #c + #c + #v => S22 ; -- iste, iglu; not: 6htu
#c + #v + #c + #v => S21 ;
#c + #v + #v + #v => S22 ; -- muie, neiu, riie
? + ? + ? + ? => S1 ;
-- all 5-letters
_ + #c + "ia" => S2 ; -- aaria, minia, orgia, kirurgia, nostalgia
#v + #c + #c + #v + #v => S1 ; -- armee
#c + #v + #c + #v + #v => S1 ; -- depoo
#c + #c + #v + #c + #c => S1 ; -- tramm
#c + #v + #c + #c + #c => S1 ;
#c + #v + #vv + #c => S1 ; -- poeem
#c + #v + #v + #v + #c => S2 ; -- hoius, laius, maius
#c + #v + #c + #v + #c => S2 ; -- redel
#c + #v + #c + #gbd + "e" => S23 ; -- valge, k6rge; p6rge, hange
#c + #v + #v + #gbd + "e" => S22 ; -- haige, kauge; t6uge
#c + #v + #v + #c + #v => S22 ; -- lause; TODO: leitu, rootu (S23)
#c + #v + #c + #c + #v => S22 ; -- ratsu; not: surnu
#v + #c + #c + #c + #v => S23 ;
#v + #c + #c + #v + #c => S2 ; -- amper
#v + #c + #v + #c + #c => S2 ; -- avang
_ + #c + #vv + #c + #c => S1 ; -- loots (double vowel, otherwise the same as below)
#c + #v + #v + #c + #c => S2 ; -- laeng, loend
#c + #c + #v + #v + #c => S1 ; -- bluus, kruus, kreem
#v + #c + #v + #v + #c => S1 ; -- ukaas, TODO: not 'avaus'
#v + #v + #c + #v + #c => S2 ; -- aatom
#v + #v + #c + #c + #v => S23 ; -- aasta
#v + #v + #c + #v + #v => S1 ; -- aaloe (?)
#c + #c + #v + #c + #v => S21 ; -- blogi
_ + ? + #v + #vv + #c => S1 ; -- -ioos, kruiis
#c + #c + #v + #v + #v + #c => S2 ; -- flaier
_ + ? + #c + #v + #c + #v => S3 ; -- oluline
-- all 6-letters
#v + #c + #c + #v + #v + #c => S1 ; -- aplaus
#v + #c + #c + #v + #c + #c => S2 ; -- astang, ellips
#c + #vv + #c + #v + #v => S23 ; -- muumia, raadio, TODO: exclude 'vaarao'
#c + #v + #v + #c + #v + #v => S1 ; -- peoleo
#c + #v + #v + #c + #c + #v => S23 ; -- haigla --added by Inari, not sure if always correct
#c + #v + #c + #c + #c + #v => S23 ; -- vangla --added by Inari, not sure if always correct
#c + #v + #c + #vv + #c => S1 ; -- deviis (double vowel in the last syllable)
#v + #c + #v + #c + #v + #v => S1 ; -- agoraa
#c + #v + #c + #v + #c + #c => S2 ;
#c + #v + #c + #v + #c + #v => S3 ;
#v + #c + #v + #c + #c + #v => S3 ; -- yheksa
#c + #v + #c + #c + #v + #c => S2 ; -- rektor
#c + #v + #c + #v + #v + #c => S2 ; -- paleus
#c + #v + #v + #c + #v + #c => S2 ; -- meeter, reegel
#v + #v + #c + #c + #v + #c => S2 ; -- aastak
#v + #c + #c + #c + #v + #c => S2 ; -- andmik
#v + #c + #c + #v + #c + #v => S3 ;
_ + #v + #c + #v + #c + #v + #c => S3 ; -- alevik, elanik
-- all 7-letters
_ + ? + ? + #c + #vv + #c => S1 ; -- double vowel in the last syllable: bensiin, benseen, bensool
#c + #v + #v + #c + #c + #v + #c => S2 ; -- jooksik
#c + #v + #c + #c + #c + #v + #c => S2 ; -- hurtsik
#c + #v + #c + #c + #v + #c + #c => S2 ; -- kitsend
#c + #v + #c + #c + #v + #v + #c => S2 ; -- pension
#c + #v + #c + #v + #c + #v + #c => S3 ; -- seminar
#c + #c + #v + #c + #c + #v + #c => S2 ; -- kringel, plastik
_ + #v + #c + #v + #kpt + #kpt + #v + #c => S2 ; -- elekter, adapter
_ + #c + #v + #lmnr + #gbd + #v + #c => S2 ; -- (k)alender, (dets)ember
_ + #c + #v + #lmnr + #kpt + #v + #c => S2 ; -- (re)porter
_ + #c + #v + "stik" => S3 ; -- kuristik (TODO: not logistik)
_ + #c + #v + "s" + #kpt + #v + #c => S2 ; -- (k)anister
#v + #c + #v + #c + #c + #v + #c => S3 ; -- apelsin
#v + #c + #c + #v + #c + #v + #c => S3 ; -- admiral
#c + #v + #c + #v + #c + #c + #v => S3 ; -- kaheksa
#c + #c + #v + #c + #v + #c + #c => S2 ; -- klopits
#c + #v + #v + #c + #v + #c + #c => S2 ; -- haarang
#c + #v + #v + #c + #v + #v + #c => S2 ; -- raadius, kauneim
_ + #c + #v + #v + #c + #v + #c => S2 ; -- araabik
_ + #lmnr + #gbd + #v + #c + #c + #v + #c => S3 ; -- (pa)lderjan, (ko)rgitser
-- other
_ + #c + #v + #c + #c + #v + #c + #v + #c => S3 ; -- karneval
#c + #v + #c + #v + #c + #c + #v + #c => S3 ; -- ragastik (kalender is handled above)
_ + #v + #v + #c + #v + #c + #c + #v + #c => S3 ; -- ainestik
_ + #c + #c + #v + #c + #c + #v + #c + #c => S3 ; -- ampersand
_ + #c + #v + #c + #v + #c + #c => S1 ; -- dividend
_ + #v + #c + #c + #c + #v + #v => S1 ; -- displei
_ + #c + #v + #c + #c + #v + #v => S1 ; -- politsei
_ + #c + #v + #c + #v + #v => S1 ; -- defilee, kompanii
_ => S2 -- the default is S2, but the above rules should catch most of the words
} ;
}