forked from GitHub/gf-core
Estonian - the 29th complete RGL language. Copied from Kaarel Kaljurand's and Inari Listenmaa's repository in https://github.com/GF-Estonian/GF-Estonian, where later developments will continue to take place.
This commit is contained in:
618
lib/src/estonian/HjkEst.gf
Normal file
618
lib/src/estonian/HjkEst.gf
Normal file
@@ -0,0 +1,618 @@
|
||||
resource HjkEst = open ResEst, Prelude, Predef in {
|
||||
|
||||
-- Implementation of the noun inflection rules from
|
||||
-- Heiki-Jaan Kaalep. "Eesti käänamissüsteemi seaduspärasused" (2012)
|
||||
--
|
||||
-- @author Kaarel Kaljurand
|
||||
-- @version 2013-09-09
|
||||
|
||||
flags
|
||||
coding = utf8 ;
|
||||
|
||||
-- TODO: change the name of this file and the names of the opers in this file
|
||||
|
||||
param
|
||||
-- S1: stress on the last syllable
|
||||
-- S2: stress on the penultimate syllable
|
||||
-- S3: stress not on the last 2 syllables
|
||||
-- If the S2 word ends with a vowel then we distinguish between:
|
||||
-- S21: 1st quantity: blo.gi, ta.la
|
||||
-- S22: 2nd quantity: rat.su, vol.le
|
||||
-- S23: 3rd quantity: aas.ta
|
||||
SylType = S1 | S2 | S21 | S22 | S23 | S3 ;
|
||||
|
||||
oper
|
||||
|
||||
NFS = {s : NForm => Str} ;
|
||||
|
||||
foreign : pattern Str = #("z" | "ž" | "š") ;
|
||||
-- Foreign vowel endings
|
||||
foreign_v : pattern Str = #("ko" | "po" | "to" | "fo" | "ka" | "pa" | "ta" | "fa" | "ku" | "pu" | "tu" | "fu") ;
|
||||
v : pattern Str = #("a" | "e" | "i" | "o" | "u" | "õ" | "ä" | "ö" | "ü" | "w") ;
|
||||
vv : pattern Str = #("aa" | "ee" | "ii" | "oo" | "uu" | "õõ" | "ää" | "öö" | "üü") ;
|
||||
c : pattern Str = #("m" | "n" | "p" | "b" | "t" | "d" | "k" | "g" | "f" | "v" | "s" | "h" | "l" | "j" | "r" | "z" | "ž" | "š" | "c" | "q") ;
|
||||
lmnr : pattern Str = #("l" | "m" | "n" | "r") ;
|
||||
kpt : pattern Str = #("k" | "p" | "t" | "f" | "š") ;
|
||||
gbd : pattern Str = #("g" | "b" | "d") ;
|
||||
|
||||
-- Types that map singular nominative to the full paradigm.
|
||||
-- VI and VII include gradation which is described separately.
|
||||
hjk_type,
|
||||
hjk_type_I_koi,
|
||||
hjk_type_II_ema,
|
||||
hjk_type_III_ratsu,
|
||||
hjk_type_IVa_aasta,
|
||||
hjk_type_IVb_maakas,
|
||||
hjk_type_Va_otsene,
|
||||
hjk_type_Vb_oluline,
|
||||
hjk_type_VI_link,
|
||||
hjk_type_VI_imelik,
|
||||
hjk_type_VI_meeskond,
|
||||
hjk_type_VI_seminar,
|
||||
hjk_type_VII_touge : Str -> NForms ;
|
||||
-- hjk_type_VII_touge : Str -> NFS ;
|
||||
|
||||
-- IVa additionally needs the stem vowel.
|
||||
hjk_type_IVb_audit,
|
||||
hjk_type_IVb_audit1 : Str -> Str -> NForms ; --NFS
|
||||
|
||||
hjk_type_VI_tukk : Str -> Str -> NForms ;
|
||||
|
||||
|
||||
-- Definition of the mapping rules.
|
||||
-- Verbatim from HJKEKS.
|
||||
hjk_type_I_koi x =
|
||||
nForms6 x x (x+"d") (x+"sse") (x+"de") (x+"sid") ;
|
||||
|
||||
hjk_type_II_ema x =
|
||||
nForms6 x x x (x+"sse") (x+"de") (x+"sid") ;
|
||||
|
||||
hjk_type_III_ratsu x =
|
||||
nForms6 x x (x+"t") (x+"sse") (x+"de") (x+"sid") ;
|
||||
|
||||
-- if ends with 'i' ('arvuti') then last form is 'arvut' + 'e' + 'id'
|
||||
-- There are ~50 such words in the WordNet.
|
||||
hjk_type_IVa_aasta x =
|
||||
let
|
||||
x1 : Str = case x of { _ + "i" => (init x) + "e" ; _ => x }
|
||||
in
|
||||
nForms6 x x (x+"t") (x+"sse") (x+"te") (x1+"id") ;
|
||||
|
||||
-- (audit "a") can be used with comparative and superlative adjectives.
|
||||
hjk_type_IVb_audit x v_g =
|
||||
let
|
||||
v_pl = case v_g of { "i" => "e" ; _ => v_g }
|
||||
in
|
||||
nForms6 x (x+v_g) (x+v_g+"t") (x+v_g+"sse") (x+v_g+"te") (x+v_pl+"id") ;
|
||||
|
||||
-- TODO: clean this up
|
||||
hjk_type_IVb_audit1 x y =
|
||||
nForms6 x (y + "i") (y+"it") (y+"isse") (y+"ite") (y+"eid") ;
|
||||
|
||||
hjk_type_IVb_maakas x =
|
||||
let
|
||||
gen = init x
|
||||
in
|
||||
nForms6 x gen (gen+"t") (gen+"sse") (gen+"te") (gen+"id") ;
|
||||
|
||||
|
||||
--Maakas is for maakas:maaka:maakat, this is for hammas:hamba:hammast
|
||||
--Not sure if this is already covered by some hjk_type,
|
||||
--anyway the grades are explicit with two args, more reliable
|
||||
dHammas : (_,_ : Str) -> NForms ;
|
||||
dHammas hammas hamba =
|
||||
nForms6 hammas hamba (hammas+"t") (hamba+"sse") (hammas+"te") (hamba+"id") ;
|
||||
|
||||
dMeri : (_,_ : Str) -> NForms ;
|
||||
dMeri meri mere =
|
||||
let
|
||||
mer = init mere ;
|
||||
in
|
||||
nForms6 meri mere (mer+"d") (mere+"sse") (mere+"de") (mere+"sid") ;
|
||||
|
||||
-- This rule handles the removal of -ne and -s endings, and the addition of 'e'
|
||||
-- in the case of Cne-nouns (e.g. 'raudne').
|
||||
-- vastus - vastuse - vastust
|
||||
-- otsene - otsese - otsest
|
||||
-- raudne - raudse - raudsEt - raudsesse - raudsEte - raudseid (additional 'e')
|
||||
-- TODO: variant: vastusesse | vastusse
|
||||
hjk_type_Va_otsene x =
|
||||
let
|
||||
f : Str = case x of {
|
||||
y + c@(#c) + "ne" => y + c + "se" ;
|
||||
y + "ne" => y + "s" ;
|
||||
_ => x
|
||||
} ;
|
||||
f1 : Str = case x of {
|
||||
y + "ne" => y + "s" ;
|
||||
_ => x
|
||||
}
|
||||
in
|
||||
nForms6 x (f1+"e") (f+"t") (f1+"esse") (f+"te") (f1+"eid") ;
|
||||
|
||||
-- TODO: variant: olulisesse | olulisse
|
||||
hjk_type_Vb_oluline x =
|
||||
let
|
||||
f : Str = case x of {
|
||||
y + "ne" => y + "s" ;
|
||||
y + "ke" => y + "kes" ;
|
||||
_ => x
|
||||
}
|
||||
in
|
||||
nForms6 x (f+"e") (f+"t") (f+"esse") (f+"te") (f+"i") ;
|
||||
|
||||
hjk_type_VI_link x =
|
||||
let
|
||||
x_n : Str = weaker_noun x
|
||||
in
|
||||
nForms6 x (x_n+"i") (x+"i") (x+"i") (x+"ide") (x+"e") ;
|
||||
|
||||
|
||||
--like link but
|
||||
-- gen form given (takes care of vowel and consonant gradation)
|
||||
-- -sid for pl.part (todo: generate short forms depending on vowel?)
|
||||
hjk_type_VI_tukk x x_gen =
|
||||
let
|
||||
v_g : Str = last x_gen ;
|
||||
|
||||
{- pl_part : Str =
|
||||
case v_g of {
|
||||
"i" => "e" ;
|
||||
_ => v_g + "sid" } ;
|
||||
-}
|
||||
|
||||
in
|
||||
nForms6 x x_gen (x+v_g) (x+v_g) (x+v_g+"de") (x+v_g+"sid") ;
|
||||
|
||||
|
||||
hjk_type_VI_imelik x =
|
||||
let
|
||||
x_t : Str = stronger_noun x
|
||||
in
|
||||
nForms6 x (x+"u") (x_t+"u") (x_t+"u") (x+"e") (x_t+"e") ;
|
||||
|
||||
hjk_type_VI_meeskond x =
|
||||
let
|
||||
x_n : Str = weaker_noun x
|
||||
in
|
||||
nForms6 x (x_n+"a") (x+"a") (x+"a") (x+"ade") (x+"i") ;
|
||||
|
||||
hjk_type_VI_seminar x =
|
||||
nForms6 x (x+"i") (x+"i") (x+"i") (x+"ide") (x+"e") ;
|
||||
|
||||
hjk_type_VII_touge x =
|
||||
let
|
||||
x_t : Str = (stronger_noun (init x)) + "e"
|
||||
in
|
||||
nForms6 x x_t (x+"t") (x_t+"sse") (x+"te") (x_t+"id") ;
|
||||
|
||||
--Identical to the above, just taking 2 arguments (nom + gen)
|
||||
--There are 67 nouns in test cases where stronger_noun gets it wrong
|
||||
--handles liige:liikme as well
|
||||
hjk_type_VII_touge2 : (_,_ : Str) -> NForms ;
|
||||
hjk_type_VII_touge2 touge touke =
|
||||
let
|
||||
liikme : Str = case touke of {
|
||||
_ + "me" => touke ;
|
||||
_ + "mne" => touke ;
|
||||
_ => touge }
|
||||
in
|
||||
nForms6 touge touke (touge+"t") (touke+"sse") (liikme+"te") (touke+"id") ;
|
||||
|
||||
-- Use this only to weaken the verbs
|
||||
weaker : Str -> Str ;
|
||||
weaker link =
|
||||
let
|
||||
li = Predef.tk 2 link ;
|
||||
nk = Predef.dp 2 link
|
||||
in
|
||||
case nk of {
|
||||
"kk" => li + "k" ;
|
||||
"pp" => li + "p" ;
|
||||
"tt" => li + "t" ;
|
||||
"ff" => li + "f" ;
|
||||
("üt"|"üs") => li + "ö" ; --süsi,söe ; ütlema,öelda
|
||||
--"ad" => li + "aj" ; --sada,saja; maybe remove
|
||||
V@(#v) + "k" => li + V + "g" ;
|
||||
V@(#v) + "p" => li + V + "b" ;
|
||||
V@(#v) + "t" => li + V + "d" ;
|
||||
V@(#v) + "g" => li + V ; --liuglema,liuelda
|
||||
V@(#v) + "b" => li + V + "v" ; --leib,leiva
|
||||
V@(#v) + "d" => li + V ; --hoidma,hoiab
|
||||
N@(#lmnr) + "k" => li + N + "g" ;
|
||||
N@(#lmnr) + "p" => li + N + "b" ;
|
||||
N@(#lmnr) + "t" => li + N + "d" ;
|
||||
N@(#lmnr) + "d" => li + N + N ;
|
||||
N@(#lmnr) + "b" => li + N + N ;
|
||||
N@("l"|"r") + "g" => li + N ; --algama,alata
|
||||
"sk" => li + "s" ;
|
||||
"h" + #kpt => li + "h" ;
|
||||
_ => link
|
||||
} ;
|
||||
|
||||
|
||||
-- Weakening of nouns.
|
||||
-- Only the very stable weakening that happens to nouns.
|
||||
-- TODO: verify correctness/completeness based on some other implementation.
|
||||
weaker_noun : Str -> Str ;
|
||||
weaker_noun link =
|
||||
case link of {
|
||||
li + "kk" => li + "k" ;
|
||||
li + "pp" => li + "p" ;
|
||||
li + "tt" => li + "t" ;
|
||||
li + "ff" => li + "f" ;
|
||||
li + "šš" => li + "š" ;
|
||||
li + N@(#lmnr) + "ss" => li + N + "s" ;
|
||||
li + V@(#v) + "k" => li + V + "g" ;
|
||||
li + V@(#v) + "p" => li + V + "b" ;
|
||||
li + V@(#v) + "t" => li + V + "d" ;
|
||||
li + N@(#lmnr) + "k" => li + N + "g" ;
|
||||
li + N@(#lmnr) + "p" => li + N + "b" ;
|
||||
li + N@(#lmnr) + "t" => li + N + "d" ;
|
||||
li + "h" + #kpt => li + "h" ;
|
||||
li + "kond" => li + "konn" ;
|
||||
_ => link
|
||||
} ;
|
||||
|
||||
-- Strengthening of nouns.
|
||||
-- Input must not have the last vowel.
|
||||
stronger_noun : Str -> Str ;
|
||||
stronger_noun x =
|
||||
case x of {
|
||||
y + "lg" => y + "lg" ;
|
||||
y + "hk" => y + "hk" ; -- tahke
|
||||
y + "tk" => y + "tk" ; -- katke
|
||||
y + "rs" => y + "rs" ; -- morse
|
||||
y + "rr" => y + "rd" ; -- murre
|
||||
y + "ks" => y + "ks" ; -- makse
|
||||
y + "us" => y + "us" ; -- lause
|
||||
y + "sk" => y + "sk" ; -- raske (?)
|
||||
y + "ts" => y + "ts" ; -- katse
|
||||
y + "ps" => y + "psm" ; -- ripse -> ripsme
|
||||
y + "nt" => y + "nt" ; -- tante
|
||||
y + "st" => y + "st" ; -- TODO: sometimes stm: iste, kaste
|
||||
y + k@("k"|"p"|"t"|"s") => y + k + k ;
|
||||
y + "g" => y + "k" ;
|
||||
y + "d" => y + "t" ;
|
||||
y + "b" => y + "p" ;
|
||||
y + v@(#v) + "v" => y + v + "b" ; -- works for 'iive' but not 'irve'
|
||||
y + "mm" => y + "mb" ; -- komme -> kombe
|
||||
y + "nn" => y + "nd" ;
|
||||
_ => x
|
||||
} ;
|
||||
|
||||
-- Strengthening of verbs.
|
||||
stronger : Str -> Str ;
|
||||
stronger x =
|
||||
let
|
||||
beginning = tk 2 x ;
|
||||
ending = dp 2 x
|
||||
in
|
||||
beginning + case ending of {
|
||||
y + k@("k"|"p"|"t"|"s") + e => y + k + k + e ;
|
||||
y + "g" + e => y + "k" + e ;
|
||||
y + "d" + e => y + "t" + e ;
|
||||
y + "b" + e => y + "p" + e ;
|
||||
_ => ending
|
||||
} ;
|
||||
|
||||
|
||||
-- Mapping of singular nominative to HJKEKS types.
|
||||
-- This implements the patterns from HJKEKS section 8 but
|
||||
-- makes the rule ordering explicit, handles things like dropping 'e'
|
||||
-- in 'reegel' -> 'reegli', etc.
|
||||
-- Works ~90% correctly, ~100% correctly with input longer than 10 letters.
|
||||
-- If this rule delivers an incorrect form, then use the 6-arg oper.
|
||||
-- This is also needed if another legal form is desired,
|
||||
-- e.g. palk -> palga (the default is palk -> palgi).
|
||||
--
|
||||
-- This rule does not cover:
|
||||
-- - exceptional words (workaround: take these from the lexicon)
|
||||
-- - compound words (workaround: mark the compound border manually)
|
||||
-- - comparative and superlative adjective forms (workaround: use mkA instead)
|
||||
-- - type VII (t6uge -> t6uke), as one needs to detect derivation from verb
|
||||
-- - last syllable superlong (rostbiif)
|
||||
hjk_type x =
|
||||
case <(syl_type x), x> of {
|
||||
<S3, _ + "ke">
|
||||
=> hjk_type_Vb_oluline x ;
|
||||
|
||||
<_, _ + "kond">
|
||||
=> hjk_type_VI_meeskond x ;
|
||||
|
||||
-- Some S2 -ik words (voolik), we only cover words with double vowel
|
||||
<_, _ + #vv + ("lik"|"nik"|"stik")>
|
||||
=> hjk_type_IVb_audit x "u" ;
|
||||
|
||||
-- Other -ik words as in HJKEKS,
|
||||
-- but added 'ndik' which fixes fractions ('kaheksandik')
|
||||
-- and is wrong only for 'kandik'.
|
||||
<_, _ + ("lik"|"nik"|"stik"|"ndik")>
|
||||
=> hjk_type_VI_imelik x ;
|
||||
|
||||
-- Remaining -k words (but need to be S2)
|
||||
-- but not 'konjak'
|
||||
<S2, _ + ("a"|"e"|"i") + ("ng"|"k")>
|
||||
=> hjk_type_IVb_audit x "u" ;
|
||||
|
||||
-- Other -ik words (not in HJKEKS)
|
||||
-- including also: alevik, asemik, lobudik, hämarik, sarapik, põletik
|
||||
<_, _ + ("vik"|"mik"|"dik"|"rik"|"pik"|"tik")>
|
||||
=> hjk_type_VI_imelik x ;
|
||||
|
||||
-- kikas
|
||||
<_, ? + #v + #c + #v + "s">
|
||||
=> hjk_type_Va_otsene x ;
|
||||
|
||||
<_, _ + ("ngas"|"kas"|"jas"|"nud"|"tud")>
|
||||
=> hjk_type_IVb_maakas x ;
|
||||
|
||||
<S1, _ + #v + #v>
|
||||
=> hjk_type_I_koi x ;
|
||||
|
||||
-- 'statiiv' (not like 'karjuv')
|
||||
<S1, _ + #vv + #c>
|
||||
=> hjk_type_VI_link x ;
|
||||
|
||||
<S3, _ + #c + #v + #lmnr>
|
||||
=> hjk_type_VI_seminar x ;
|
||||
|
||||
<S1, _ + #v + #v + #c>
|
||||
=> hjk_type_VI_link x ;
|
||||
|
||||
<_, _ + ("us"|"is")>
|
||||
=> hjk_type_Vb_oluline x ;
|
||||
|
||||
<S3, _ + #v + #v + #c>
|
||||
=> hjk_type_VI_link x ;
|
||||
|
||||
<(S1|S3), _ + #v + #c + #c>
|
||||
=> hjk_type_VI_link x ;
|
||||
|
||||
<(S1|S3), _ + #v + #c + #c + #c>
|
||||
=> hjk_type_VI_link x ;
|
||||
|
||||
<_, _ + "nna">
|
||||
=> hjk_type_III_ratsu x ;
|
||||
|
||||
<-(S21|S22), _ + ("nu"|"tu")>
|
||||
=> hjk_type_IVa_aasta x ;
|
||||
|
||||
-- TODO: improve foreign detection
|
||||
<S2, _ + #foreign + _ + "in">
|
||||
=> hjk_type_IVb_audit x "i" ;
|
||||
|
||||
-- TODO: this is not in HJKEKS
|
||||
-- 'absurd' vs 'ebard'
|
||||
<S2, _ + #v + #lmnr + "d">
|
||||
=> hjk_type_IVb_audit x "i" ;
|
||||
|
||||
-- sometimes 'a' (laurits) TODO: this is not in HJKEKS
|
||||
<S2, _ + #v + #kpt + "s">
|
||||
=> hjk_type_IVb_audit x "i" ;
|
||||
|
||||
-- TODO: next 3 rules: last syllable must be long
|
||||
-- portfell, TODO: not 'karask'
|
||||
<S2, _ + #v + #c + #c>
|
||||
=> hjk_type_VI_link x ;
|
||||
|
||||
-- rostbiif, not viiul
|
||||
<S2, _ + #c + #v + #v + #c>
|
||||
=> hjk_type_VI_link x ;
|
||||
|
||||
-- impulss
|
||||
<S2, _ + #v + #c + #c + #c>
|
||||
=> hjk_type_VI_link x ;
|
||||
|
||||
-- TODO: sometimes masked by 'maakas'
|
||||
<_, _ + #v + "s">
|
||||
=> hjk_type_Va_otsene x ;
|
||||
|
||||
<_, _ + ("v"|"tav")>
|
||||
=> hjk_type_IVb_audit x "a" ;
|
||||
|
||||
-- The choice between Va (pl part: -seid) and Vb (pl part: -si)
|
||||
-- is based on checking the derivational ending.
|
||||
-- We just check the ending of the word and require at least 2 letters
|
||||
-- to precede the ending.
|
||||
-- We added also -tine and -ldane (which occur with adjectives).
|
||||
<_, _ + ? + ? + ("line"|"lane"|"mine"|"kene"|"tine"|"ldane")>
|
||||
=> hjk_type_Vb_oluline x ;
|
||||
|
||||
-- k6ne
|
||||
<S21, _ + "e">
|
||||
=> hjk_type_III_ratsu x ;
|
||||
|
||||
-- Many adjectives end with "ne" (40% in WordNet)
|
||||
-- We require them to be at least 5 letters long (excluding 'öine'),
|
||||
-- to give a chance to VII_touge (next rule).
|
||||
<_, _ + ? + ? + ? + "ne">
|
||||
=> hjk_type_Va_otsene x ;
|
||||
|
||||
-- Note: this rule does not actually check the derivation from verb.
|
||||
-- verb + e, TODO: masked by S21/e
|
||||
<(S2|S22), _ + "e">
|
||||
=> hjk_type_VII_touge x ;
|
||||
|
||||
-- ufo, pita, lito
|
||||
<S21, _ + #foreign_v>
|
||||
=> hjk_type_III_ratsu x ;
|
||||
|
||||
<S21, _ + #v>
|
||||
=> hjk_type_II_ema x ;
|
||||
|
||||
<S22, _ + #v>
|
||||
=> hjk_type_III_ratsu x ;
|
||||
|
||||
<S23, _ + #v>
|
||||
=> hjk_type_IVa_aasta x ;
|
||||
|
||||
<S2, _ + "in">
|
||||
=> hjk_type_IVb_audit x "a" ;
|
||||
|
||||
-- 'e' deletion
|
||||
-- kringel -> kringli, amper -> ampri, meeter -> meetri, reegel -> reegli
|
||||
-- kaabel-> kaabli (TODO: not: juubel -> juubli)
|
||||
-- spikker -> spikri (TODO: not: pokker -> pokkeri)
|
||||
-- Note: pintsel -> pintsli, but not pitser -> pitsri
|
||||
-- Note: 'redel' and 'paber' do not lose the 'e'.
|
||||
<S2, y + kk@("kk"|"pp"|"tt"|"hh") + "e" + l@("l"|"r")>
|
||||
=> hjk_type_IVb_audit1 x (y + (init kk) + l) ;
|
||||
|
||||
-- aaker -> aakri, teater -> teatri
|
||||
<S2, y + vvkpt@(#v + #v + #kpt) + "e" + l@("l"|"r")>
|
||||
=> hjk_type_IVb_audit1 x (y+vvkpt+l) ;
|
||||
|
||||
<S2, y + vv@(#vv) + gbd@(#gbd) + "e" + l@("l"|"r")>
|
||||
=> hjk_type_IVb_audit1 x (y+vv+gbd+l) ;
|
||||
|
||||
-- Disabled, 50-50 correctness
|
||||
--<S2, y + vv@(#vv) + lmnr@(#lmnr) + "e" + l@("l"|"r")>
|
||||
-- => hjk_type_IVb_audit1 x (y+vv+lmnr+l) ; -- 50-50
|
||||
|
||||
<S2, y + vv@(#vv) + s@("s"|"v") + "e" + l@("l"|"r")>
|
||||
=> hjk_type_IVb_audit1 x (y+vv+s+l) ;
|
||||
|
||||
<S2, y + n@("ht"|"hk"|"hv"|"nts"|"ld"|"lv"|"lb"|"ng"|"nd"|"mb"|"mp"|"nt"|"ps"|"ks"|"sk"|"st") + "e" + l@("l"|"r")>
|
||||
=> hjk_type_IVb_audit1 x (y+n+l) ;
|
||||
|
||||
<S2, y + "e" + l@("l"|"r")>
|
||||
=> hjk_type_IVb_audit x "i" ;
|
||||
|
||||
-- TODO: sometimes masked by 'link'
|
||||
<S2, _ + #c>
|
||||
=> hjk_type_IVb_audit x "i" ;
|
||||
|
||||
<S3, _ + #v>
|
||||
=> hjk_type_IVa_aasta x ;
|
||||
|
||||
-- verb + 'e'
|
||||
<_, _ + "e">
|
||||
=> hjk_type_VII_touge x ;
|
||||
|
||||
-- catch all that end with consonant
|
||||
<_, _ + #c>
|
||||
=> hjk_type_IVb_audit x "i" ;
|
||||
|
||||
-- TODO: not in HJKEKS
|
||||
<_, _ + ("ia"|"ja")> --kündja, not gerilja
|
||||
=> hjk_type_IVa_aasta x ;
|
||||
|
||||
--added by Inari 07.10.
|
||||
<S23, _ + #c + ("la")> --haigla, not gorilla
|
||||
=> hjk_type_IVa_aasta x ;
|
||||
|
||||
-- catch all
|
||||
<_, _>
|
||||
=> hjk_type_III_ratsu x
|
||||
} ;
|
||||
|
||||
|
||||
-- Assigns stress/quantity indicator (SylType) to the word based on
|
||||
-- its character composition.
|
||||
-- Note: you cannot use recursion (circular definitions) in these rules
|
||||
-- Note: patterns must be linear (GF book C.4.13), i.e. you cannot write
|
||||
-- oi@(#v + #v) + oi => S2 ; -- oi-oi, ai-ai, oo-oo
|
||||
syl_type : Str -> SylType ;
|
||||
syl_type x =
|
||||
case x of {
|
||||
-- all 1-letters
|
||||
? => S1 ;
|
||||
-- all 2-letters
|
||||
? + ? => S1 ;
|
||||
-- all 3-letters
|
||||
#v + #c + #v => S21 ;
|
||||
#v + #v + #v => S22 ;
|
||||
? + ? + ? => S1 ; -- koi, kae
|
||||
-- all 4-letters
|
||||
#c + #v + #v + #c => S1 ; -- siid
|
||||
#c + #v + #c + #c => S1 ; -- link
|
||||
#v + #c + #v + #c => S2 ;
|
||||
#v + #vv + #c => S1 ; -- auul, ioon, oaas
|
||||
#v + #v + #v + #c => S2 ; -- aiak (?)
|
||||
#v + #v + #c + #v => S22 ; -- aine, aade; not: 6ige
|
||||
#v + #c + #v + #v => S1 ; -- epee, oboe
|
||||
#v + #c + #c + #v => S22 ; -- iste, iglu; not: 6htu
|
||||
#c + #v + #c + #v => S21 ;
|
||||
#c + #v + #v + #v => S22 ; -- muie, neiu, riie
|
||||
? + ? + ? + ? => S1 ;
|
||||
-- all 5-letters
|
||||
_ + #c + "ia" => S2 ; -- aaria, minia, orgia, kirurgia, nostalgia
|
||||
#v + #c + #c + #v + #v => S1 ; -- armee
|
||||
#c + #v + #c + #v + #v => S1 ; -- depoo
|
||||
#c + #c + #v + #c + #c => S1 ; -- tramm
|
||||
#c + #v + #c + #c + #c => S1 ;
|
||||
#c + #v + #vv + #c => S1 ; -- poeem
|
||||
#c + #v + #v + #v + #c => S2 ; -- hoius, laius, maius
|
||||
#c + #v + #c + #v + #c => S2 ; -- redel
|
||||
#c + #v + #c + #gbd + "e" => S23 ; -- valge, k6rge; p6rge, hange
|
||||
#c + #v + #v + #gbd + "e" => S22 ; -- haige, kauge; t6uge
|
||||
#c + #v + #v + #c + #v => S22 ; -- lause; TODO: leitu, rootu (S23)
|
||||
#c + #v + #c + #c + #v => S22 ; -- ratsu; not: surnu
|
||||
#v + #c + #c + #c + #v => S23 ;
|
||||
#v + #c + #c + #v + #c => S2 ; -- amper
|
||||
#v + #c + #v + #c + #c => S2 ; -- avang
|
||||
_ + #c + #vv + #c + #c => S1 ; -- loots (double vowel, otherwise the same as below)
|
||||
#c + #v + #v + #c + #c => S2 ; -- laeng, loend
|
||||
#c + #c + #v + #v + #c => S1 ; -- bluus, kruus, kreem
|
||||
#v + #c + #v + #v + #c => S1 ; -- ukaas, TODO: not 'avaus'
|
||||
#v + #v + #c + #v + #c => S2 ; -- aatom
|
||||
#v + #v + #c + #c + #v => S23 ; -- aasta
|
||||
#v + #v + #c + #v + #v => S1 ; -- aaloe (?)
|
||||
#c + #c + #v + #c + #v => S21 ; -- blogi
|
||||
_ + ? + #v + #vv + #c => S1 ; -- -ioos, kruiis
|
||||
#c + #c + #v + #v + #v + #c => S2 ; -- flaier
|
||||
_ + ? + #c + #v + #c + #v => S3 ; -- oluline
|
||||
-- all 6-letters
|
||||
#v + #c + #c + #v + #v + #c => S1 ; -- aplaus
|
||||
#v + #c + #c + #v + #c + #c => S2 ; -- astang, ellips
|
||||
#c + #vv + #c + #v + #v => S23 ; -- muumia, raadio, TODO: exclude 'vaarao'
|
||||
#c + #v + #v + #c + #v + #v => S1 ; -- peoleo
|
||||
#c + #v + #v + #c + #c + #v => S23 ; -- haigla --added by Inari, not sure if always correct
|
||||
#c + #v + #c + #c + #c + #v => S23 ; -- vangla --added by Inari, not sure if always correct
|
||||
#c + #v + #c + #vv + #c => S1 ; -- deviis (double vowel in the last syllable)
|
||||
#v + #c + #v + #c + #v + #v => S1 ; -- agoraa
|
||||
#c + #v + #c + #v + #c + #c => S2 ;
|
||||
#c + #v + #c + #v + #c + #v => S3 ;
|
||||
#v + #c + #v + #c + #c + #v => S3 ; -- yheksa
|
||||
#c + #v + #c + #c + #v + #c => S2 ; -- rektor
|
||||
#c + #v + #c + #v + #v + #c => S2 ; -- paleus
|
||||
#c + #v + #v + #c + #v + #c => S2 ; -- meeter, reegel
|
||||
#v + #v + #c + #c + #v + #c => S2 ; -- aastak
|
||||
#v + #c + #c + #c + #v + #c => S2 ; -- andmik
|
||||
#v + #c + #c + #v + #c + #v => S3 ;
|
||||
_ + #v + #c + #v + #c + #v + #c => S3 ; -- alevik, elanik
|
||||
-- all 7-letters
|
||||
_ + ? + ? + #c + #vv + #c => S1 ; -- double vowel in the last syllable: bensiin, benseen, bensool
|
||||
#c + #v + #v + #c + #c + #v + #c => S2 ; -- jooksik
|
||||
#c + #v + #c + #c + #c + #v + #c => S2 ; -- hurtsik
|
||||
#c + #v + #c + #c + #v + #c + #c => S2 ; -- kitsend
|
||||
#c + #v + #c + #c + #v + #v + #c => S2 ; -- pension
|
||||
#c + #v + #c + #v + #c + #v + #c => S3 ; -- seminar
|
||||
#c + #c + #v + #c + #c + #v + #c => S2 ; -- kringel, plastik
|
||||
_ + #v + #c + #v + #kpt + #kpt + #v + #c => S2 ; -- elekter, adapter
|
||||
_ + #c + #v + #lmnr + #gbd + #v + #c => S2 ; -- (k)alender, (dets)ember
|
||||
_ + #c + #v + #lmnr + #kpt + #v + #c => S2 ; -- (re)porter
|
||||
_ + #c + #v + "stik" => S3 ; -- kuristik (TODO: not logistik)
|
||||
_ + #c + #v + "s" + #kpt + #v + #c => S2 ; -- (k)anister
|
||||
#v + #c + #v + #c + #c + #v + #c => S3 ; -- apelsin
|
||||
#v + #c + #c + #v + #c + #v + #c => S3 ; -- admiral
|
||||
#c + #v + #c + #v + #c + #c + #v => S3 ; -- kaheksa
|
||||
#c + #c + #v + #c + #v + #c + #c => S2 ; -- klopits
|
||||
#c + #v + #v + #c + #v + #c + #c => S2 ; -- haarang
|
||||
#c + #v + #v + #c + #v + #v + #c => S2 ; -- raadius, kauneim
|
||||
_ + #c + #v + #v + #c + #v + #c => S2 ; -- araabik
|
||||
_ + #lmnr + #gbd + #v + #c + #c + #v + #c => S3 ; -- (pa)lderjan, (ko)rgitser
|
||||
-- other
|
||||
_ + #c + #v + #c + #c + #v + #c + #v + #c => S3 ; -- karneval
|
||||
#c + #v + #c + #v + #c + #c + #v + #c => S3 ; -- ragastik (kalender is handled above)
|
||||
_ + #v + #v + #c + #v + #c + #c + #v + #c => S3 ; -- ainestik
|
||||
_ + #c + #c + #v + #c + #c + #v + #c + #c => S3 ; -- ampersand
|
||||
_ + #c + #v + #c + #v + #c + #c => S1 ; -- dividend
|
||||
_ + #v + #c + #c + #c + #v + #v => S1 ; -- displei
|
||||
_ + #c + #v + #c + #c + #v + #v => S1 ; -- politsei
|
||||
_ + #c + #v + #c + #v + #v => S1 ; -- defilee, kompanii
|
||||
_ => S2 -- the default is S2, but the above rules should catch most of the words
|
||||
} ;
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user