From 54713a2987d419d2c8e755a4b4150113e6a6c930 Mon Sep 17 00:00:00 2001 From: Aarne Ranta Date: Thu, 29 Sep 2022 09:49:29 +0200 Subject: [PATCH] Hrv: extended smart paradigms --- src/croatian/ParadigmsHrv.gf | 126 ++++++++++++++++++++++++++-------- src/croatian/ResHrv.gf | 116 ++++++++++++------------------- src/croatian/StructuralHrv.gf | 2 +- 3 files changed, 141 insertions(+), 103 deletions(-) diff --git a/src/croatian/ParadigmsHrv.gf b/src/croatian/ParadigmsHrv.gf index 0824b14c..7c83473c 100644 --- a/src/croatian/ParadigmsHrv.gf +++ b/src/croatian/ParadigmsHrv.gf @@ -27,7 +27,7 @@ oper accusative : Case = Acc ; vocative : Case - = Voc ; + = R.Voc ; locative : Case = Loc ; instrumental : Case @@ -39,26 +39,64 @@ oper oper mkN = overload { - mkN : (nom : Str) -> N - = \nom -> lin N (guessNounForms nom) ; - mkN : (nom,gen : Str) -> Gender -> N ---- TODO - = \nom,gen,g -> lin N (guessNounForms nom) ; + mkN : (sgnom : Str) -> N -- guessing gender + = \sgnom -> lin N (smartLexNoun sgnom) ; + mkN : (sgnom : Str) -> Gender -> N + = \sgnom, g -> lin N (mkgLexNoun sgnom g) ; + mkN : NForms -> Gender -> N -- the worst case + = \nfs,g -> lin N (nfs ** {g = g}) ; } ; -- The following standard declensions can be used with good accuracy. -- However, they have some defaults that may have to be overwritten. -- This can be done easily by overriding those formes with record extension (**). --- The default extensions are shown in comments; if the default is correct, no extension is needed. --- Notice that some paradigms take two arguments, some take one. ----- TODO - --- The full definition of the noun record is --- { --- snom,sgen,sdat,sacc,svoc,sloc,sins, pnom,pgen,pdat,pacc,ploc,pins : Str ; --- g : Gender --- } + NForms = {snom,sgen,sdat,sacc,svoc,sins,pnom,pgen,pdat,pacc : Str} ; + izvorNForms : Str -> NForms + = izvorN ; + nokatNForms : Str -> NForms + = nokatN ; + gradaninNForms : Str -> NForms + = gradaninN ; + vojnikNForms : Str -> NForms + = vojnikN ; + bubregNForms : Str -> NForms + = bubregN ; + trbuhNForms : Str -> NForms + = trbuhN ; + cvorakNForms : Str -> NForms + = cvorakN ; + panjNForms : Str -> NForms + = panjN ; + suzanjNForms : Str -> NForms + = suzanjN ; + pristNForms : Str -> NForms + = pristN ; + stricNForms : Str -> NForms + = stricN ; + klinacNForms : Str -> NForms + = klinacN ; + posjetilacNForms : Str -> NForms + = posjetilacN ; + pepeoNForms : Str -> NForms + = pepeoN ; + ugaoNForms : Str -> NForms + = ugaoN ; + bifeNForms : Str -> NForms + = bifeN ; + ziriNForms : Str -> NForms + = ziriN ; + taksiNForms : Str -> NForms + = taksiN ; + koljenoNForms : Str -> NForms + = koljenoN ; + jedroNForms : Str -> NForms + = jedroN ; + poljeNForms : Str -> NForms + = poljeN ; + zenaNForms : Str -> NForms + = zenaN ; --------------------- -- Adjectives @@ -67,27 +105,51 @@ oper mkA = overload { mkA : Str -> A - = \s -> lin A (velikA s) + = \s -> lin A (velikA s) ; + mkA : AForms -> A + = \s -> lin A s ; } ; + invarA : Str -> A + = \s -> lin A (invarAForms s) ; + + AForms : Type + = R.AdjForms ; + +-- the complete definition of AForms is +-- {msnom, fsnom, nsnom, msgen, fsgen, msdat, +-- fsdat, fsacc, msloc, msins, fsins, mpnom, pgen : Str} ; + + velikAForms : Str -> AForms + = velikA ; + + invarAForms : Str -> AForms + = \s -> invarAdjForms s ; --- the full definition of the adjective record is --- { --- msnom, fsnom, nsnom, msgen, fsgen, msdat, fsacc, msloc, msins, fsins, --- ampnom, pgen, pins : Str --- } --- ------------------------- -- Verbs + mkV = overload { + mkV : (raditi : Str) -> V + = \s -> lin V {s = smartVerbForms s} ; + mkV : (raditi, radem, radio : Str) -> V + = \raditi, radem, radio -> + lin V {s = aeiVerbForms raditi radem radio} ; + mkV : VerbForms -> V + = \vf -> lin V {s = vf} ; + } ; + + mkV2 = overload { - mkV2 : VerbForms -> V2 - = \vf -> lin V2 {s = vf ; c = {s = [] ; c = Acc ; hasPrep = False}} ; - mkV2 : VerbForms -> Case -> V2 - = \vf,c -> lin V2 {s = vf ; c = {s = [] ; c = c ; hasPrep = False}} ; - mkV2 : VerbForms -> ComplementCase -> V2 - = \vf,c -> lin V2 {s = vf ; c = c} ; + mkV2 : V -> V2 + = \v -> lin V2 {s = v.s ; + c = {s = [] ; c = accusative ; hasPrep = False}} ; + mkV2 : V -> Case -> V2 + = \v,c -> lin V2 {s = v.s ; + c = {s = [] ; c = c ; hasPrep = False}} ; + mkV2 : V -> Prep -> V2 + = \v,c -> lin V2 {s = v.s ; c = c} ; } ; ------------------------ @@ -96,8 +158,14 @@ oper mkAdv : Str -> Adv = \s -> lin Adv {s = s} ; - mkPrep : Str -> Case -> Prep - = \s,c -> lin Prep {s = s ; c = c ; hasPrep = True} ; ---- True if s /= "" + mkPrep = overload { + mkPrep : Str -> Prep -- genitive prepositions + = \s -> lin Prep {s = s ; c = genitive ; hasPrep = True} ; + mkPrep : Case -> Prep -- oblique cases, empty string + = \c -> lin Prep {s = [] ; c = c ; hasPrep = False} ; + mkPrep : Str -> Case -> Prep + = \s,c -> lin Prep {s = s ; c = c ; hasPrep = True} ; + } ; mkConj : Str -> Conj = \s -> lin Conj {s1 = [] ; s2 = s} ; diff --git a/src/croatian/ResHrv.gf b/src/croatian/ResHrv.gf index 0436322e..e1b96ad4 100644 --- a/src/croatian/ResHrv.gf +++ b/src/croatian/ResHrv.gf @@ -3,7 +3,8 @@ resource ResHrv = open Prelude in { -- AR September 2022 -- sources: -- Wiki = https://en.wikipedia.org/wiki/Serbo-Croatian_grammar --- BCMS = Bosnian, Croatian, Montenegrin and Serbian: An Essential Grammar (Routledge Essential Grammars) 1st Edition, by Željko Vrabec +-- BCMS = Bosnian, Croatian, Montenegrin and Serbian: +-- An Essential Grammar (Routledge Essential Grammars) 1st Edition, by Željko Vrabec -- parameters @@ -65,6 +66,7 @@ voicing : Str -> Str = \s -> case s of { x + "ž" => x + "š" ; _ => s } ; + --------------- -- Nouns --------------- @@ -84,6 +86,10 @@ voicing : Str -> Str = \s -> case s of { Noun : Type = {s : Number => Case => Str ; g : Gender} ; +-- for lexical nouns N, we also need the gender but keep the minimal set of forms + + LexNoun : Type = NounForms ** {g : Gender} ; + -- this is used in UseN nounFormsNoun : NounForms -> Gender -> Noun @@ -114,44 +120,41 @@ voicing : Str -> Str = \s -> case s of { g = g } ; +-- a declension type produces these forms from a string --- terminology of CEG DeclensionType : Type = Str -> NounForms ; -{- - declensionNounForms : (snom,pgen : Str) -> Gender -> NounForms - = \snom,pgen,g -> case of { - => hrdinaN snom ; - => ponyN snom ; ---- - => chlapN snom ; - - => strojN snom ; - => dubN snom ; - => dubN snom ; - => zenaN snom ; - => zenaN snom ; - => ulicaN snom ; - => ulicaN snom ; - => kostN snom pgen ; - => dlanN snom pgen ; +-- smart paradigms - => mestoN snom ; - => vysvedcenieN snom ; - => srdceN snom ; - => dievceniecN snom ; - => dievcaN snom ; + smartLexNoun : Str -> LexNoun = \s -> case s of { + _ + "a" => zenaN s ** {g = feminine} ; + _ + "i" => ziriN s ** {g = inanimate} ; ---- TODO feminine i + _ + "e" => poljeN s ** {g = neuter} ; ---- TODO sunce, uze, zvonce, rame + _ + "ao" => ugaoN s ** {g = inanimate} ; + _ + "eo" => pepeoN s ** {g = inanimate} ; + _ + "o" => koljenoN s ** {g = neuter} ; ---- TODO jedro + _ + "lac" => posjetilacN s ** {g = inanimate} ; + _ + "anj" => suzanjN s ** {g = inanimate} ; + _ + "nj" => panjN s ** {g = inanimate} ; + _ + "št" => pristN s ** {g = inanimate} ; + _ + "ac" => klinacN s ** {g = neuter} ; + _ + "c" => stricN s ** {g = inanimate} ; + _ + "in" => gradaninN s ** {g = neuter} ; + _ + "ak" => cvorakN s ** {g = inanimate} ; + _ + "a" + ? => nokatN s ** {g = inanimate} ; + _ + "g" => bubregN s ** {g = inanimate} ; + _ + "h" => trbuhN s ** {g = inanimate} ; + _ + "k" => vojnikN s ** {g = inanimate} ; + _ => izvorN s ** {g = inanimate} + } ; - _ => dubN (""+snom) ** {pgen = pgen} ---- Predef.error ("cannot infer declension type for" ++ snom ++ pgen) - } ** {pgen = pgen ; g = g} ; --} --- the "smartest" one-argument mkN + mkgLexNoun : Str -> Gender -> LexNoun = \s,g -> case of { + <_ + "i", Masc _> => ziriN s ** {g = g} ; + <_ + "e", Masc _> => bifeN s ** {g = g} ; + <_ + "o", Masc _> => bifeN s ** {g = g} ; + <_, g> => smartLexNoun s ** {g = g} + } ; - guessNounForms : Str -> NounForms ** {g : Gender} - = \snom -> case snom of { - ----- TODO - _ => izvorN snom ** {g = inanimate} - } ; -- the traditional declensions, following Wiki -- they are also exported in ParadigmsHrv with names izvorN etc @@ -323,7 +326,7 @@ voicing : Str -> Str = \s -> case s of { -- to be used for AP: 56 forms for each degree Adjective : Type = {s : Gender => Number => Case => Str} ; --- to be used for A, in three degrees: 15 forms in each +-- to be used for A, in three degrees: 12 forms in each ---- TODO other degrees than positive AdjForms : Type = { @@ -370,18 +373,6 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> { } } ; -{- - guessAdjForms : Str -> AdjForms - = \s -> case s of { - _ + "ý" => peknyA s ; - _ + "y" => krasnyA s ; - _ + "í" => cudziA s ; - _ + "i" => rydziA s ; - _ + ("ov"|"in") => otcovA s ; - _ => otcovA (""+s) ---- Predef.error ("no mkA for" ++ s) - } ; --} - velikA : Str -> AdjForms = \velik -> let velk : Str = case velik of { @@ -421,35 +412,14 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> { => vf ! VPres n p ; => vf ! VPastPart g n } ; -{- - copulaVerbForms : VerbForms = { - inf = "byť" ; - pressg1 = "som" ; - pressg2 = "si" ; - pressg3 = "je" ; - prespl1 = "sme" ; - prespl2 = "ste" ; - prespl3 = "sú" ; - pastpmasc = "bol" ; - pastpfem = "bola" ; - pastpneutr = "bolo" ; + + smartVerbForms : Str -> VerbForms = \s -> case s of { + cit + "ati" => aeiVerbForms s (cit + "am") (cit + "ao") ; + radi + "ti" => aeiVerbForms s (init radi + "em") (radi + "o") ; + _ => Predef.error ("expect infinitive form \"-ti\", found" ++ s) } ; - haveVerbForms : VerbForms = { - inf = "mať" ; - pressg1 = "mám" ; - pressg2 = "máš" ; - pressg3 = "má" ; - prespl1 = "máme" ; - prespl2 = "máte" ; - prespl3 = "majú" ; - pastpmasc = "mal" ; - pastpfem = "mala" ; - pastpneutr = "malo" ; - } ; --} - --- just an example of a traditional paradigm +-- an traditional paradigm type, with a slight abstraction ---- TODO other traditional paradigms aeiVerbForms : Str -> Str -> Str -> VerbForms = \citati, citam, citao -> diff --git a/src/croatian/StructuralHrv.gf b/src/croatian/StructuralHrv.gf index afe9c72d..b9e3ff35 100644 --- a/src/croatian/StructuralHrv.gf +++ b/src/croatian/StructuralHrv.gf @@ -7,7 +7,7 @@ lin ---- few_Det = invarNumeral "málo" ; -- see notes for_Prep = mkPrep "pre" accusative ; from_Prep = mkPrep "iz" Gen ; - have_V2 = mkV2 imati_VerbForms ; + have_V2 = mkV2 (mkV imati_VerbForms) ; in_Prep = mkPrep "u" Loc ; ---- many_Det = regNumeral "mnoho" "mnohých" "mnohým" "mnohými" ; ---- alternative: invarNumeral "veľa" ; or_Conj = mkConj "alebo" ;