resource TableExtResHrv = open Prelude in { -- AR September 2022 -- sources: -- Wiki = https://en.wikipedia.org/wiki/Serbo-Croatian_grammar -- BCMS = Bosnian, Croatian, Montenegrin and Serbian: -- An Essential Grammar (Routledge Essential Grammars) 1st Edition, by Željko Vrabec -- parameters param Number = Sg | Pl ; Animacy = Anim | Inanim ; Gender = Masc Animacy | Fem | Neutr ; Case = Nom | Gen | Dat | Acc | Voc | Loc | Ins ; -- traditional order Person = P1 | P2 | P3 ; VForm = VInf | VPres Number Person | VPastPart Gender Number ; ---- TODO aorist, imperfect Agr = Ag Gender Number Person ; CTense = CTPres | CTPast ; ----- TODO complete the tense system to match BCS verb morphology -- phonology oper softConsonant : pattern Str = #("c"|"č"|"ć"|"đ"|"j"|"lj"|"nj"|"š"|"ž"|"št") ; --- wiki "and sometimes r"; BCMS slightly different ifSoft : Str -> (soft,hard : Str) -> Str = \word, ssoft, shard -> case word of { _ + #softConsonant => ssoft ; _ => shard } ; animate = Masc Anim ; inanimate = Masc Inanim ; feminine = Fem ; neuter = Neutr ; palatalize : Str -> Str = \s -> case s of { x + "ki" => x + "ci" ; x + "ke" => x + "če" ; x + "gi" => x + "zi" ; x + "ge" => x + "že" ; x + "hi" => x + "si" ; x + "he" => x + "še" ; x + "ce" => x + "če" ; _ => s } ; voicing : Str -> Str = \s -> case s of { x + "b" => x + "p" ; x + "d" => x + "t" ; x + "đ" => x + "ć" ; x + "z" => x + "s" ; x + "dž" => x + "č" ; x + "ž" => x + "š" ; _ => s } ; --------------- -- Nouns --------------- -- novel idea (for RGL): lexical items stored as records rather than tables -- advantages: -- - easier to make exceptions to paradigms (by ** {}) -- - easier to keep the number of forms minimal -- - easier to see what is happening than with lots of anonymous arguments to mkN, mkA, mkV -- Starting from GF 2022-10-04: this can now be done with table extension, which gives -- type safety and more powerful pattern matching param NForm = snom | sgen | sdat | sacc | svoc | sins | pnom | pgen | pdat | pacc ; oper NounForms : Type = NForm => Str ; -- But traditional tables make agreement easier to handle in syntax -- so this is the lincat of CN Noun : Type = {s : Number => Case => Str ; g : Gender} ; -- for lexical nouns N, we also need the gender but keep the minimal set of forms LexNoun : Type = {s : NounForms ; g : Gender} ; -- this is used in UseN nounFormsNoun : NounForms -> Gender -> Noun = \forms, g -> { s = table { Sg => table { Nom => forms ! snom ; Gen => forms ! sgen ; Dat => forms ! sdat ; Acc => case g of { Masc Anim | Fem => forms ! sacc ; _ => forms ! snom } ; Voc => forms ! svoc ; Loc => forms ! sdat ; Ins => forms ! sins } ; Pl => table { Nom => forms ! pnom ; Gen => forms ! pgen ; Dat => forms ! pdat ; Acc => forms ! pacc ; Voc => forms ! pnom ; Loc => forms ! pdat ; Ins => forms ! pdat } } ; g = g } ; -- a declension type produces these forms from a string DeclensionType : Type = Str -> NounForms ; -- smart paradigms smartLexNoun : Str -> LexNoun = \s -> case s of { _ + "a" => {s = zenaN s ; g = feminine} ; _ + "i" => {s = ziriN s ; g = inanimate} ; ---- TODO feminine i _ + "e" => {s = poljeN s ; g = neuter} ; ---- TODO sunce, uze, zvonce, rame _ + "ao" => {s = ugaoN s ; g = inanimate} ; _ + "eo" => {s = pepeoN s ; g = inanimate} ; _ + "o" => {s = koljenoN s ; g = neuter} ; ---- TODO jedro _ + "lac" => {s = posjetilacN s ; g = inanimate} ; _ + "anj" => {s = suzanjN s ; g = inanimate} ; _ + "nj" => {s = panjN s ; g = inanimate} ; _ + "št" => {s = pristN s ; g = inanimate} ; _ + "ac" => {s = klinacN s ; g = neuter} ; _ + "c" => {s = stricN s ; g = inanimate} ; _ + "in" => {s = gradaninN s ; g = neuter} ; _ + "ak" => {s = cvorakN s ; g = inanimate} ; _ + "a" + ? => {s = nokatN s ; g = inanimate} ; _ + "g" => {s = bubregN s ; g = inanimate} ; _ + "h" => {s = trbuhN s ; g = inanimate} ; _ + "k" => {s = vojnikN s ; g = inanimate} ; _ => {s = izvorN s ; g = inanimate} } ; mkgLexNoun : Str -> Gender -> LexNoun = \s,g -> case of { <_ + "i", Masc _> => {s = ziriN s ; g = g} ; <_ + "e", Masc _> => {s = bifeN s ; g = g} ; <_ + "o", Masc _> => {s = bifeN s ; g = g} ; <_, g> => smartLexNoun s ** {g = g} } ; -- the traditional declensions, following Wiki -- they are also exported in ParadigmsHrv with names izvorN etc izvorN : DeclensionType = \izvor -> table { snom => izvor ; sgen => izvor + "a" ; sdat => izvor + "u" ; sacc => izvor + "a" ; svoc => ifSoft izvor (izvor + "u") (palatalize (izvor + "e")) ; sins => ifSoft izvor (izvor + "em") (izvor + "om") ; pnom => palatalize (izvor + "i") ; pgen => izvor + "a" ; pdat => ifSoft izvor (palatalize (izvor + "e") + "vima") (palatalize (izvor + "i") + "ma") ; pacc => izvor + "e" } ; nokatN : DeclensionType = \nokat -> let nokt = Predef.tk 2 nokat + last nokat in izvorN nokt ** { snom => nokat ; sacc => nokt + "a" ; pgen => nokat + "a" } ; gradaninN : DeclensionType = \gradanin -> let gradan = Predef.tk 2 gradanin ; gradanN = izvorN gradan in numbersNounForms (izvorN gradanin) gradanN ; numbersNounForms : (sg, pl : NounForms) -> NounForms = \sg, pl -> sg ** { pnom => pl ! pnom ; pgen => pl ! pgen ; pdat => pl ! pdat ; pacc => pl ! pacc } ; vojnikN : DeclensionType = izvorN ; bubregN : DeclensionType = izvorN ; trbuhN : DeclensionType = izvorN ; cvorakN : DeclensionType = nokatN ; panjN : DeclensionType = \panj -> numbersNounForms (izvorN panj) (izvorN (palatalize (panj + "e") + "v")) ; suzanjN : DeclensionType = \suzanj -> let suznj = Predef.tk 3 suzanj + Predef.dp 2 suzanj in numbersNounForms (izvorN suzanj) (izvorN suznj ** { pgen => "sužanja" ; pdat => "sužnjima" }) ; pristN : DeclensionType = panjN ; stricN : DeclensionType = \stric -> panjN stric ** { svoc => palatalize (stric + "e") } ; klinacN : DeclensionType = \klinac -> let klinc = Predef.tk 2 klinac + last klinac in nokatN klinac ** { svoc => palatalize (klinc + "e") ; pdat => klinc + "ima" } ; posjetilacN : DeclensionType = \posjetilac -> let posjetioc = Predef.tk 3 posjetilac + "oc" in izvorN posjetioc ** { snom => posjetilac ; svoc => palatalize (posjetioc + "e") ; sins => palatalize (posjetioc + "e") + "m" ; pgen => posjetilac + "a" ; pdat => palatalize (posjetioc + "i") + "ma" } ; pepeoN : DeclensionType = \pepeo -> let pepel = init pepeo + "l" in izvorN pepel ** {snom => pepeo} ; ugaoN : DeclensionType = \ugao -> let ugal = init ugao + "l" ; ugl = Predef.tk 2 ugal + last ugal in numbersNounForms (nokatN ugal ** {snom => ugao}) (izvorN (ugl + "ov")) ; bifeN : DeclensionType = \bife -> izvorN bife ** {svoc => bife + "u"} ; ziriN : DeclensionType = \ziri -> bifeN (ziri + "j") ** { snom => ziri ; pdat => ziri + "jima" } ; taksiN : DeclensionType = ziriN ; ---- Danilo, Hrvoje, raščupànko skipped koljenoN : DeclensionType = \koljeno -> let koljen = init koljeno in izvorN koljen ** { snom | sacc | svoc => koljeno ; pnom | pacc => koljen + "a" } ; jedroN : DeclensionType = \jedro -> let jed = Predef.tk 2 jedro ; r = last (init jedro) ; in koljenoN jedro ** { pgen => jed + "a" + r + "a" } ; ---- drvo - drveta skipped, can also decline as koljeno ---- oči, čudo skipped poljeN : DeclensionType = \polje -> koljenoN polje ** { pdat => init polje + "ima" } ; ---- sunce, uže, zvonce, rame, podne, doba ---- no tables given in the sources for feminine nouns, so guessing from ending tables zenaN : DeclensionType = \zena -> let zen = init zena in table { snom => zena ; sgen => zen + "e" ; sdat => zen + "i" ; sacc => zen + "u" ; svoc => zen + "o" ; ---- o/a in Wiki ; o in https://sh.wiktionary.org/wiki/%C5%BEena sins => zen + "om" ; pnom | pacc => zen + "e" ; pdat => zen + "ama" ; pgen => zen + "a" } ; --------------------------- -- Adjectives -- to be used for AP: 56 forms for each degree Adjective : Type = {s : Gender => Number => Case => Str} ; -- to be used for A, in three degrees: 12 forms in each ---- TODO other degrees than positive param AForm = msnom | fsnom | nsnom | msgen | fsgen | -- nsgen = msgen msdat | -- msloc,nsdat,nsloc = msdat fsdat | -- fsloc = fsdat fsacc | -- msins | -- nsins, pdat, ploc, pins = msins fsins | -- no o/e variation like in msdat mpnom | -- mpvoc = mpnom mpgen -- fpgen, npgen = mpgen ; oper AdjForms : Type = AForm => Str ; invarAdjForms : Str -> AdjForms = \s -> \\a => s ; -- used in PositA but will also work in Compar and Superl by calling their record fields adjFormsAdjective : AdjForms -> Adjective = \afs -> { s = \\g,n,c => case of { | => afs ! msnom ; | => afs ! fsnom ; => afs ! nsnom ; | => afs ! msgen ; | | => afs ! fsgen ; => afs ! msdat ; => afs ! fsdat ; => afs ! fsacc ; | => afs ! msins ; => afs ! fsins ; => afs ! mpnom ; => afs ! mpgen } } ; velikA : Str -> AdjForms = \velik -> let velk : Str = case velik of { vel + "stan" => vel + "sn" ; vel + "ao" => vel + "l" ; vel + "ak" => voicing vel + "k" ; vel + "a" + k@? => vel + k ; vel + "i" => vel ; _ => velik } ; oe : Str = ifSoft velik "e" "o" in table { msnom => velik ; fsnom => velk + "a" ; nsnom => velk + oe ; msgen => velk + oe + "g" ; fsgen => velk + "e" ; msdat => velk + oe + "m" ; fsdat => velk + "oj" ; fsacc => velk + "u" ; msins => velk + "im" ; fsins => velk + "om" ; mpnom => velk + "i" ; mpgen => velk + "ih" } ; regComparAForms : AdjForms -> AdjForms = \aposit -> case init (aposit ! fsnom) of { grub@(_ + "b"|"p"|"v"|"h") => velikA (grub + "lji") ; star => velikA (star + "iji") } ; superlAForms : AdjForms -> AdjForms = \acompar -> velikA ("naj" + acompar ! msnom) ; od_Str = "od" ; --------------------- -- Verbs -- Wiki VerbForms : Type = VForm => Str ; ComplementCase : Type = {s : Str ; c : Case ; hasPrep : Bool} ; verbAgr : VerbForms -> Agr -> CTense -> Str ---- TODO tenses = \vf,a,b -> case of { => vf ! VPres n p ; => vf ! VPastPart g n } ; smartVerbForms : Str -> VerbForms = \s -> case s of { cit + "ati" => aeiVerbForms s (cit + "am") (cit + "ao") ; vid + "jeti" => aeiVerbForms s (vid + "im") (vid + "io") ; radi + "ti" => aeiVerbForms s (init radi + "em") (radi + "o") ; _ => Predef.error ("expect infinitive form \"-ti\", found" ++ s) } ; -- an traditional paradigm type, with a slight abstraction ---- TODO other traditional paradigms aeiVerbForms : Str -> Str -> Str -> VerbForms = \citati, citam, citao -> let cita = init citam ; u = case last cita of { "a" => "aju" ; "e" => "u" ; "i" => "e" } ; cital = init citao + "l" ; in table { VInf => citati ; VPres Sg P1 => cita + "m" ; VPres Sg P2 => cita + "š" ; VPres Sg P3 => cita ; VPres Pl P1 => cita + "mo" ; VPres Pl P2 => cita + "te" ; VPres pl P3 => init cita + u ; VPastPart (Masc _) Sg => citao ; VPastPart Fem Sg => cital + "a" ; VPastPart Neutr Sg => cital + "o" ; VPastPart (Masc _) Pl => cital + "i" ; VPastPart Fem Pl => cital + "e" ; VPastPart Neutr Pl => cital + "a" } ; -- copula jesam_Copula : {short, long, negative : Number => Person => Str} = let sam : Number => Person => Str = table { Sg => table { P1 => "sam" ; P2 => "si" ; P3 => "je" } ; Pl => table { P1 => "smo" ; P2 => "ste" ; P3 => "su" } } in { short = sam ; long = \\n,p => case of { => "jeste" ; --- wiki: jest(e) _ => sam ! n ! p } ; negative = \\n,p => "ni" + sam ! n ! p } ; -- Wiki: some grammars (chiefly Serbian ones) treat jesam as a defective verb -- having only present tense. Others treat these forms as two realizations -- of the same irregular verb biti, jesam being imperfective and budem perfective. copula_VerbForms : VerbForms = table { VPres n p => jesam_Copula.short ! n ! p ; v => biti_VerbForms ! v } ; biti_VerbForms : VerbForms = aeiVerbForms "biti" "budem" "bio" ; imati_VerbForms : VerbForms = aeiVerbForms "imati" "imam" "imao" ; --------------------------- -- Pronouns PronForms : Type = { nom, gen, cgen, -- bare, clitic (also as Acc) dat, cdat, -- also as Loc ins : Str ; a : Agr } ; personalPron : Agr -> PronForms = \a -> {a = a ; cnom = []} ** case a of { Ag _ Sg P1 => { nom = "ja" ; gen = "mene" ; cgen = "ma" ; dat = "meni" ; cdat = "mi" ; ins = "mnom" } ; Ag _ Sg P2 => { nom = "ti" ; gen = "tebe" ; cgen = "te" ; dat = "tebi" ; cdat = "ti" ; ins = "tobom" } ; Ag Fem Sg P3 => { nom = "ona" ; gen = "nje" ; cgen = "je" ; dat = "njoj" ; cdat = "joj" ; ins = "njom" } ; Ag g Sg P3 => { nom = case g of { Masc _ => "on" ; _ => "ono" } ; gen = "njega" ; cgen = "ga" ; dat = "njemu" ; cdat = "mu" ; ins = "njim" } ; Ag _ Pl P1 => { nom = "mi" ; gen, cgen = "nas" ; dat, ins = "nama" ; cdat = "nam" } ; Ag _ Pl P2 => { nom = "vi" ; gen, cgen = "vas" ; dat, ins = "vama" ; cdat = "vam" } ; Ag g Pl P3 => { nom = case g of { Masc _ => "oni" ; Fem => "one" ; Neutr => "ona" } ; gen = "njih" ; cgen = "ih" ; dat, ins = "njima" ; cdat = "im" } } ; possessivePron : Agr -> AdjForms = \a -> case a of { Ag _ Sg P1 => velikA "moj" ; Ag _ Sg P2 => velikA "tvoj" ; Ag (Masc _) Sg P3 => velikA "njegov" ; Ag (Fem|Neutr) Sg P3 => velikA "njezin" ; -- BCMS: Hrv: otherwise njen Ag _ Pl P1 => velikA "naš" ; Ag _ Pl P2 => velikA "vaš" ; Ag _ Pl P3 => velikA "njihov" } ; {- reflPossessivePron : DemPronForms = otcovA "svoj" ** { msnom = "svoj" ; msgen = "svojho" ; msdat = "svojmu" ; msins = "svojím" ; ampnom = "svoji" ; nsnom, fpnom = "svoje" ; pgen = "svojich" ; pdat = "svojim" ; pins = "svojimi" ; } ; -} mkPron : Agr -> PronForms ** {poss : AdjForms} = \a -> personalPron a ** {poss = possessivePron a} ; param NumSize = NS_1 | NS_2_4 | NS_5_20 | NS_20_ ; oper Determiner : Type = { s : Gender => Case => Str ; size : NumSize } ; {- -- interrogatives TODO ktoForms : Case => Str = table { Nom => "kto" ; Gen | Acc => "koho" ; Dat => "komu" ; Loc => "kom" ; Ins => "kým" } ; coForms : Case => Str = table { Nom|Acc => "čo" ; Gen => "čoho" ; Dat => "čomu" ; Loc => "čom" ; Ins => "čím" } ; -} -------------------------------- -- combining nouns with numerals oper numSizeForm : (Number => Case => Str) -> NumSize -> Case -> Str = \cns,n,c -> case n of { NS_1 => cns ! Sg ! c ; NS_2_4 => cns ! Pl ! c ; _ => case c of { Nom | Acc => cns ! Pl ! Gen ; _ => cns ! Pl ! c } } ; numSizeAgr : Gender -> NumSize -> Person -> Agr = \g,ns,p -> case ns of { NS_1 => Ag g Sg p ; NS_2_4 => Ag g Pl p ; _ => Ag Neutr Sg p ---- TODO verify } ; numSizeNumber : NumSize -> Number = \ns -> case ns of { NS_1 => Sg ; _ => Pl ---- TO CHECK } ; }