Hrv: extended smart paradigms

2026-05-27 17:08:54 -06:00 · 2022-09-29 09:49:29 +02:00
parent d8e313c75c
commit 54713a2987
3 changed files with 141 additions and 103 deletions
--- a/src/croatian/ParadigmsHrv.gf
+++ b/src/croatian/ParadigmsHrv.gf
@@ -27,7 +27,7 @@ oper
  accusative : Case
    = Acc ;
  vocative : Case
-    = Voc ;
+    = R.Voc ;
  locative : Case
    = Loc ;
  instrumental : Case
@@ -39,26 +39,64 @@ oper
 oper

  mkN = overload {
-    mkN : (nom : Str) -> N
-      = \nom -> lin N (guessNounForms nom) ;
-    mkN : (nom,gen : Str) -> Gender -> N ---- TODO
-      = \nom,gen,g -> lin N (guessNounForms nom) ;
+    mkN : (sgnom : Str) -> N        -- guessing gender
+      = \sgnom -> lin N (smartLexNoun sgnom) ;
+    mkN : (sgnom : Str) -> Gender -> N 
+      = \sgnom, g -> lin N (mkgLexNoun sgnom g) ;
+    mkN : NForms -> Gender -> N     -- the worst case
+      = \nfs,g -> lin N (nfs ** {g = g}) ;
    } ;

 -- The following standard declensions can be used with good accuracy.
 -- However, they have some defaults that may have to be overwritten.
 -- This can be done easily by overriding those formes with record extension (**).
-- The default extensions are shown in comments; if the default is correct, no extension is needed.
-- Notice that some paradigms take two arguments, some take one.

---- TODO
-
-- The full definition of the noun record is
-- {
--  snom,sgen,sdat,sacc,svoc,sloc,sins, pnom,pgen,pdat,pacc,ploc,pins : Str ;
--  g : Gender
-- }
+  NForms = {snom,sgen,sdat,sacc,svoc,sins,pnom,pgen,pdat,pacc : Str} ;

+  izvorNForms : Str -> NForms
+    = izvorN ;
+  nokatNForms : Str -> NForms
+    = nokatN ;
+  gradaninNForms : Str -> NForms
+    = gradaninN ;
+  vojnikNForms : Str -> NForms
+    = vojnikN ;
+  bubregNForms : Str -> NForms
+    = bubregN ;
+  trbuhNForms : Str -> NForms
+    = trbuhN ;
+  cvorakNForms : Str -> NForms
+    = cvorakN ;
+  panjNForms : Str -> NForms
+    = panjN ;
+  suzanjNForms : Str -> NForms
+    = suzanjN ;
+  pristNForms : Str -> NForms
+    = pristN ;
+  stricNForms : Str -> NForms
+    = stricN ;
+  klinacNForms : Str -> NForms
+    = klinacN ;
+  posjetilacNForms : Str -> NForms
+    = posjetilacN ;
+  pepeoNForms : Str -> NForms
+    = pepeoN ;
+  ugaoNForms : Str -> NForms
+    = ugaoN ;
+  bifeNForms : Str -> NForms
+    = bifeN ;
+  ziriNForms : Str -> NForms
+    = ziriN ;
+  taksiNForms : Str -> NForms
+    = taksiN ;
+  koljenoNForms : Str -> NForms
+    = koljenoN ;
+  jedroNForms : Str -> NForms
+    = jedroN ;
+  poljeNForms : Str -> NForms
+    = poljeN ;
+  zenaNForms : Str -> NForms
+    = zenaN ;

 ---------------------
 -- Adjectives
@@ -67,27 +105,51 @@ oper

  mkA = overload {
    mkA : Str -> A
-      = \s -> lin A (velikA s)
+      = \s -> lin A (velikA s) ;
+    mkA : AForms -> A
+      = \s -> lin A s ;
    } ;

+  invarA : Str -> A
+    = \s -> lin A (invarAForms s) ;
+
+  AForms : Type
+    = R.AdjForms ;
+
+-- the complete definition of AForms is
+--   {msnom, fsnom, nsnom, msgen, fsgen, msdat,
+--    fsdat, fsacc, msloc, msins, fsins, mpnom, pgen : Str} ;
+
+  velikAForms : Str -> AForms
+    = velikA ;
+    
+  invarAForms : Str -> AForms
+    = \s -> invarAdjForms s ;

-- the full definition of the adjective record is
-- {
--    msnom, fsnom, nsnom, msgen, fsgen, msdat, fsacc, msloc, msins, fsins,
--    ampnom, pgen, pins : Str
-- }
--

 -------------------------
 -- Verbs

+  mkV = overload {
+    mkV : (raditi : Str) -> V
+      = \s -> lin V {s = smartVerbForms s} ;
+    mkV : (raditi, radem, radio : Str) -> V
+      = \raditi, radem, radio ->
+           lin V {s = aeiVerbForms raditi radem radio} ;
+    mkV : VerbForms -> V
+      = \vf -> lin V {s = vf} ;
+    } ;
+
+
  mkV2 = overload {
-    mkV2 : VerbForms -> V2
-      = \vf -> lin V2 {s = vf ; c = {s = [] ; c = Acc ; hasPrep = False}} ;
-    mkV2 : VerbForms -> Case -> V2
-      = \vf,c -> lin V2 {s = vf ; c = {s = [] ; c = c ; hasPrep = False}} ;
-    mkV2 : VerbForms -> ComplementCase -> V2 
-      = \vf,c -> lin V2 {s = vf ; c = c} ;
+    mkV2 : V -> V2
+      = \v -> lin V2 {s = v.s ;
+           c = {s = [] ; c = accusative ; hasPrep = False}} ;
+    mkV2 : V -> Case -> V2
+      = \v,c -> lin V2 {s = v.s ;
+           c = {s = [] ; c = c ; hasPrep = False}} ;
+    mkV2 : V -> Prep -> V2 
+      = \v,c -> lin V2 {s = v.s ; c = c} ;
    } ;

 ------------------------
@@ -96,8 +158,14 @@ oper
  mkAdv : Str -> Adv
    = \s -> lin Adv {s = s} ;

+  mkPrep = overload {
+    mkPrep : Str -> Prep    -- genitive prepositions
+      = \s -> lin Prep {s = s ; c = genitive ; hasPrep = True} ;
+    mkPrep : Case -> Prep   -- oblique cases, empty string
+      = \c -> lin Prep {s = [] ; c = c ; hasPrep = False} ;
    mkPrep : Str -> Case -> Prep
-    = \s,c -> lin Prep {s = s ; c = c ; hasPrep = True} ; ---- True if s /= ""
+      = \s,c -> lin Prep {s = s ; c = c ; hasPrep = True} ;
+    } ;
    
  mkConj : Str -> Conj
    = \s -> lin Conj {s1 = [] ; s2 = s} ;
--- a/src/croatian/ResHrv.gf
+++ b/src/croatian/ResHrv.gf
@@ -3,7 +3,8 @@ resource ResHrv = open Prelude in {
 -- AR September 2022
 -- sources:
 -- Wiki = https://en.wikipedia.org/wiki/Serbo-Croatian_grammar
-- BCMS = Bosnian, Croatian, Montenegrin and Serbian: An Essential Grammar (Routledge Essential Grammars) 1st Edition, by Željko Vrabec
+-- BCMS = Bosnian, Croatian, Montenegrin and Serbian:
+--   An Essential Grammar (Routledge Essential Grammars) 1st Edition, by Željko Vrabec

 -- parameters

@@ -65,6 +66,7 @@ voicing : Str -> Str = \s -> case s of {
  x + "ž"  => x + "š" ;
  _ => s
  } ;
+  
 ---------------
 -- Nouns
 ---------------
@@ -84,6 +86,10 @@ voicing : Str -> Str = \s -> case s of {

  Noun : Type = {s : Number => Case => Str ; g : Gender} ;

+-- for lexical nouns N, we also need the gender but keep the minimal set of forms
+
+  LexNoun : Type = NounForms ** {g : Gender} ;
+
 -- this is used in UseN

  nounFormsNoun : NounForms -> Gender -> Noun
@@ -114,45 +120,42 @@ voicing : Str -> Str = \s -> case s of {
      g = g
      } ;

+-- a declension type produces these forms from a string

-- terminology of CEG
  DeclensionType : Type = Str -> NounForms ;
-{-
-  declensionNounForms : (snom,pgen : Str) -> Gender -> NounForms
-    = \snom,pgen,g -> case <g, snom, pgen> of {
-      <Masc Anim,   _ + "a"           , _ + "ov">   => hrdinaN snom ;
-      <Masc _,      _ + ("i"|"y"|"e") , _ + "ov">   => ponyN snom ; ----
-      <Masc Anim,   _                 , _ + "ov">   => chlapN snom ;

-      <Masc Inanim, _ + #softConsonant,   _ + "ov"> => strojN snom ;
-      <Masc Ianim,  _ + #hardConsonant,   _ + "ov"> => dubN snom ;
-      <Masc Ianim,  _ + #neutralConsonant,_ + "ov"> => dubN snom ;
+-- smart paradigms

-      <Fem,         _ + #hardConsonant + "a",    _ + #consonant>  => zenaN snom ;
-      <Fem,         _ + #neutralConsonant + "a", _ + #consonant>  => zenaN snom ;
-      <Fem,         _ + #softConsonant + "a",    _ + #consonant>  => ulicaN snom ;
-      <Fem,         _ + ("ia"|"ya"),             _>               => ulicaN snom ;
-      <Fem,         _ + ("c"|"s"|"p"|"v"|"sť"),  _ + "í">         => kostN snom pgen ;
-      <Fem,         _ + #consonant            ,  _ + "í">         => dlanN snom pgen ;
-
-      <Neutr,       _ + "o"                   ,  _      >         => mestoN snom ;
-      <Neutr,       _ + "ie"                  ,  _ + "í">         => vysvedcenieN snom ;
-      <Neutr,       _ + "e"                   ,  _      >         => srdceN snom ;
-      <Neutr,       _ + ("a"|"ä")             ,  _ + "iec">       => dievceniecN snom ;
-      <Neutr,       _ + ("a"|"ä")             ,  _        >       => dievcaN snom ;
-
-      _ => dubN (""+snom) ** {pgen = pgen} ---- Predef.error ("cannot infer declension type for" ++ snom ++ pgen)
-      } ** {pgen = pgen ; g = g} ;
-}
-- the "smartest" one-argument mkN
-
-  guessNounForms : Str -> NounForms ** {g : Gender}
-    = \snom -> case snom of {
-
---- TODO
-        _ => izvorN snom ** {g = inanimate} 
+  smartLexNoun : Str -> LexNoun = \s -> case s of {
+    _ + "a"       => zenaN s ** {g = feminine} ;
+    _ + "i"       => ziriN s ** {g = inanimate} ; ---- TODO feminine i
+    _ + "e"       => poljeN s ** {g = neuter} ; ---- TODO sunce, uze, zvonce, rame
+    _ + "ao"      => ugaoN s ** {g = inanimate} ;
+    _ + "eo"      => pepeoN s ** {g = inanimate} ;
+    _ + "o"       => koljenoN s ** {g = neuter} ; ---- TODO jedro
+    _ + "lac"     => posjetilacN s ** {g = inanimate} ;
+    _ + "anj"     => suzanjN s ** {g = inanimate} ;
+    _ + "nj"      => panjN s ** {g = inanimate} ;
+    _ + "št"      => pristN s ** {g = inanimate} ;
+    _ + "ac"      => klinacN s ** {g = neuter} ;
+    _ + "c"       => stricN s ** {g = inanimate} ;
+    _ + "in"      => gradaninN s ** {g = neuter} ;
+    _ + "ak"      => cvorakN s ** {g = inanimate} ;
+    _ + "a" + ?   => nokatN s ** {g = inanimate} ;
+    _ + "g"       => bubregN s ** {g = inanimate} ;
+    _ + "h"       => trbuhN s ** {g = inanimate} ;
+    _ + "k"       => vojnikN s ** {g = inanimate} ;
+    _             => izvorN s ** {g = inanimate}
    } ;

+  mkgLexNoun : Str -> Gender -> LexNoun = \s,g -> case <s,g> of {
+    <_ + "i", Masc _> => ziriN s ** {g = g} ;
+    <_ + "e", Masc _> => bifeN s ** {g = g} ;
+    <_ + "o", Masc _> => bifeN s ** {g = g} ;
+    <_, g> => smartLexNoun s ** {g = g}
+    } ;
+
+
 -- the traditional declensions, following Wiki
 -- they are also exported in ParadigmsHrv with names izvorN etc

@@ -323,7 +326,7 @@ voicing : Str -> Str = \s -> case s of {
 -- to be used for AP: 56 forms for each degree
  Adjective : Type = {s : Gender => Number => Case => Str} ;

-- to be used for A, in three degrees: 15 forms in each
+-- to be used for A, in three degrees: 12 forms in each
 ---- TODO other degrees than positive

  AdjForms : Type = {
@@ -370,18 +373,6 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
    }
    } ;

-{-
-  guessAdjForms : Str -> AdjForms
-    = \s -> case s of {
-        _ + "ý"  => peknyA s ;
-        _ + "y"  => krasnyA s ;
-        _ + "í"  => cudziA s ;
-        _ + "i"  => rydziA s ;
-        _ + ("ov"|"in") => otcovA s ;
-        _ => otcovA (""+s)  ---- Predef.error ("no mkA for" ++ s)
-        } ;
-}
-
  velikA : Str -> AdjForms = \velik ->
    let
      velk : Str = case velik of {
@@ -421,35 +412,14 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
      <Ag _ n p, CTPres> => vf ! VPres n p ;
      <Ag g n _, CTPast> => vf ! VPastPart g n
      } ;
-{-
-  copulaVerbForms : VerbForms = {
-    inf = "byť" ;
-    pressg1 = "som" ;
-    pressg2 = "si" ;
-    pressg3 = "je" ;
-    prespl1 = "sme" ;
-    prespl2 = "ste" ;
-    prespl3 = "sú" ;
-    pastpmasc = "bol" ;
-    pastpfem = "bola" ;
-    pastpneutr = "bolo" ;
+
+  smartVerbForms : Str -> VerbForms = \s -> case s of {
+    cit + "ati" => aeiVerbForms s (cit + "am") (cit + "ao") ;
+    radi + "ti" => aeiVerbForms s (init radi + "em") (radi + "o") ;
+    _ => Predef.error ("expect infinitive form \"-ti\", found" ++ s)
    } ;

-  haveVerbForms : VerbForms = {
-    inf = "mať" ;
-    pressg1 = "mám" ;
-    pressg2 = "máš" ;
-    pressg3 = "má" ;
-    prespl1 = "máme" ;
-    prespl2 = "máte" ;
-    prespl3 = "majú" ;
-    pastpmasc = "mal" ;
-    pastpfem = "mala" ;
-    pastpneutr = "malo" ;
-    } ;
-}
-
-- just an example of a traditional paradigm
+-- an traditional paradigm type, with a slight abstraction
 ---- TODO other traditional paradigms

  aeiVerbForms : Str -> Str -> Str -> VerbForms = \citati, citam, citao ->
--- a/src/croatian/StructuralHrv.gf
+++ b/src/croatian/StructuralHrv.gf
@@ -7,7 +7,7 @@ lin
 ----    few_Det = invarNumeral "málo" ; -- see notes
    for_Prep = mkPrep "pre" accusative ;
    from_Prep = mkPrep "iz" Gen ;
-    have_V2 = mkV2 imati_VerbForms ;
+    have_V2 = mkV2 (mkV imati_VerbForms) ;
    in_Prep = mkPrep "u" Loc ; 
 ----    many_Det = regNumeral "mnoho" "mnohých" "mnohým" "mnohými" ; ---- alternative: invarNumeral "veľa" ;
    or_Conj = mkConj "alebo" ;