resource TableExtResHrv = open Prelude in {

-- AR September 2022
-- sources:
-- Wiki = https://en.wikipedia.org/wiki/Serbo-Croatian_grammar
-- BCMS = Bosnian, Croatian, Montenegrin and Serbian:
--   An Essential Grammar (Routledge Essential Grammars) 1st Edition, by Željko Vrabec

-- parameters

param
  Number = Sg | Pl ;

  Animacy = Anim | Inanim ;
  Gender = Masc Animacy | Fem | Neutr ;

  Case = Nom | Gen | Dat | Acc | Voc | Loc | Ins ; -- traditional order

  Person = P1 | P2 | P3 ;

  VForm =
      VInf
    | VPres Number Person
    | VPastPart Gender Number
    ;
    ---- TODO aorist, imperfect

  Agr = Ag Gender Number Person ;

  CTense = CTPres | CTPast ; ----- TODO complete the tense system to match BCS verb morphology

-- phonology

oper
  softConsonant : pattern Str = #("c"|"č"|"ć"|"đ"|"j"|"lj"|"nj"|"š"|"ž"|"št") ;
  --- wiki "and sometimes r"; BCMS slightly different

  ifSoft : Str -> (soft,hard : Str) -> Str = \word, ssoft, shard ->
    case word of {
      _ + #softConsonant => ssoft ;
      _ => shard
      } ;

   animate = Masc Anim ;
   inanimate = Masc Inanim ;
   feminine = Fem ;
   neuter = Neutr ;

palatalize : Str -> Str = \s -> case s of {
  x + "ki" => x + "ci" ;
  x + "ke" => x + "če" ;
  x + "gi" => x + "zi" ;
  x + "ge" => x + "že" ;
  x + "hi" => x + "si" ;
  x + "he" => x + "še" ;
  x + "ce" => x + "če" ;
  _ => s
  } ;

voicing : Str -> Str = \s -> case s of {
  x + "b"  => x + "p" ;
  x + "d"  => x + "t" ;
  x + "đ"  => x + "ć" ;
  x + "z"  => x + "s" ;
  x + "dž" => x + "č" ;
  x + "ž"  => x + "š" ;
  _ => s
  } ;
  
---------------
-- Nouns
---------------

-- novel idea (for RGL): lexical items stored as records rather than tables
-- advantages:
-- - easier to make exceptions to paradigms (by ** {})
-- - easier to keep the number of forms minimal
-- - easier to see what is happening than with lots of anonymous arguments to mkN, mkA, mkV
-- Starting from GF 2022-10-04: this can now be done with table extension, which gives 
-- type safety and more powerful pattern matching

  param
  NForm = snom | sgen | sdat | sacc | svoc | sins | pnom | pgen | pdat | pacc ;
    
  oper
  NounForms : Type = NForm => Str ;

-- But traditional tables make agreement easier to handle in syntax
-- so this is the lincat of CN

  Noun : Type = {s : Number => Case => Str ; g : Gender} ;

-- for lexical nouns N, we also need the gender but keep the minimal set of forms

  LexNoun : Type = {s : NounForms ; g : Gender} ;

-- this is used in UseN

  nounFormsNoun : NounForms -> Gender -> Noun
    = \forms, g -> {
      s = table {
        Sg => table {
	  Nom => forms ! snom ;
	  Gen => forms ! sgen ;
	  Dat => forms ! sdat ;
	  Acc => case g of {
	    Masc Anim | Fem => forms ! sacc ;
	    _ => forms ! snom
	    } ;
	  Voc => forms ! svoc ;
	  Loc => forms ! sdat ;
	  Ins => forms ! sins
	  } ;
        Pl => table {
	  Nom => forms ! pnom ;
	  Gen => forms ! pgen ;
	  Dat => forms ! pdat ;
	  Acc => forms ! pacc ;
	  Voc => forms ! pnom ;
	  Loc => forms ! pdat ;
	  Ins => forms ! pdat
	  }
	} ;
      g = g
      } ;

-- a declension type produces these forms from a string

  DeclensionType : Type = Str -> NounForms ;

-- smart paradigms

  smartLexNoun : Str -> LexNoun = \s -> case s of {
    _ + "a"       => {s = zenaN s ; g = feminine} ;
    _ + "i"       => {s = ziriN s ; g = inanimate} ; ---- TODO feminine i
    _ + "e"       => {s = poljeN s ; g = neuter} ; ---- TODO sunce, uze, zvonce, rame
    _ + "ao"      => {s = ugaoN s ; g = inanimate} ;
    _ + "eo"      => {s = pepeoN s ; g = inanimate} ;
    _ + "o"       => {s = koljenoN s ; g = neuter} ; ---- TODO jedro
    _ + "lac"     => {s = posjetilacN s ; g = inanimate} ;
    _ + "anj"     => {s = suzanjN s ; g = inanimate} ;
    _ + "nj"      => {s = panjN s ; g = inanimate} ;
    _ + "št"      => {s = pristN s ; g = inanimate} ;
    _ + "ac"      => {s = klinacN s ; g = neuter} ;
    _ + "c"       => {s = stricN s ; g = inanimate} ;
    _ + "in"      => {s = gradaninN s ; g = neuter} ;
    _ + "ak"      => {s = cvorakN s ; g = inanimate} ;
    _ + "a" + ?   => {s = nokatN s ; g = inanimate} ;
    _ + "g"       => {s = bubregN s ; g = inanimate} ;
    _ + "h"       => {s = trbuhN s ; g = inanimate} ;
    _ + "k"       => {s = vojnikN s ; g = inanimate} ;
    _             => {s = izvorN s ; g = inanimate}
    } ;

  mkgLexNoun : Str -> Gender -> LexNoun = \s,g -> case <s,g> of {
    <_ + "i", Masc _> => {s = ziriN s ; g = g} ;
    <_ + "e", Masc _> => {s = bifeN s ; g = g} ;
    <_ + "o", Masc _> => {s = bifeN s ; g = g} ;
    <_, g> => smartLexNoun s ** {g = g}
    } ;


-- the traditional declensions, following Wiki
-- they are also exported in ParadigmsHrv with names izvorN etc

  izvorN : DeclensionType = \izvor ->
    table {
      snom => izvor ;
      sgen => izvor + "a" ;
      sdat => izvor + "u" ;
      sacc => izvor + "a" ;
      svoc => ifSoft izvor
                    (izvor + "u")
		    (palatalize (izvor + "e")) ;
      sins => ifSoft izvor
                    (izvor + "em") 
                    (izvor + "om") ;

      pnom => palatalize (izvor + "i") ;
      pgen => izvor + "a" ;
      pdat => ifSoft izvor
	         (palatalize (izvor + "e") + "vima")
	         (palatalize (izvor + "i") + "ma") ;
      pacc => izvor + "e"
      } ;

   nokatN : DeclensionType = \nokat ->
      let
        nokt = Predef.tk 2 nokat + last nokat
      in izvorN nokt ** {
        snom => nokat ;
	sacc => nokt + "a" ;
	pgen => nokat + "a"
        } ;
	
   gradaninN : DeclensionType = \gradanin ->
      let
        gradan = Predef.tk 2 gradanin ;
        gradanN = izvorN gradan 
      in numbersNounForms (izvorN gradanin) gradanN ;

   numbersNounForms : (sg, pl : NounForms) -> NounForms =
     \sg, pl -> sg ** {
        pnom => pl ! pnom ;
        pgen => pl ! pgen ;
        pdat => pl ! pdat ;
        pacc => pl ! pacc
        } ;
  
   vojnikN : DeclensionType = izvorN ;
   bubregN : DeclensionType = izvorN ;
   trbuhN : DeclensionType = izvorN ;
   cvorakN : DeclensionType = nokatN ;

   panjN : DeclensionType = \panj ->
     numbersNounForms (izvorN panj) (izvorN (palatalize (panj + "e") + "v")) ;

   suzanjN : DeclensionType = \suzanj ->
     let
       suznj = Predef.tk 3 suzanj + Predef.dp 2 suzanj
     in
     numbersNounForms
       (izvorN suzanj)
       (izvorN suznj ** {
         pgen => "sužanja" ;
         pdat => "sužnjima"
         }) ;

    pristN : DeclensionType = panjN ;
    
    stricN : DeclensionType = \stric ->
      panjN stric ** {
        svoc => palatalize (stric + "e")
	} ;

    klinacN : DeclensionType = \klinac ->
      let
        klinc = Predef.tk 2 klinac + last klinac
      in nokatN klinac ** {
        svoc => palatalize (klinc + "e") ;
	pdat => klinc + "ima"
	} ;

    posjetilacN : DeclensionType = \posjetilac ->
      let
        posjetioc = Predef.tk 3 posjetilac + "oc"
      in izvorN posjetioc ** {
        snom => posjetilac ;
	svoc => palatalize (posjetioc + "e") ;
	sins => palatalize (posjetioc + "e") + "m" ;
	pgen => posjetilac + "a" ;
	pdat => palatalize (posjetioc + "i") + "ma"
        } ;

    pepeoN : DeclensionType = \pepeo ->
      let
        pepel = init pepeo + "l"
      in izvorN pepel ** {snom => pepeo} ;

    ugaoN : DeclensionType = \ugao ->
      let
        ugal = init ugao + "l" ;
	ugl = Predef.tk 2 ugal + last ugal
      in numbersNounForms
           (nokatN ugal ** {snom => ugao})
	   (izvorN (ugl + "ov")) ;
	   
    bifeN : DeclensionType = \bife ->
      izvorN bife ** {svoc => bife + "u"} ;
      
    ziriN : DeclensionType = \ziri ->
      bifeN (ziri + "j") ** {
        snom => ziri ;
	pdat => ziri + "jima"
	} ;

    taksiN : DeclensionType = ziriN ;

---- Danilo, Hrvoje, raščupànko skipped

    koljenoN : DeclensionType = \koljeno ->
      let
        koljen = init koljeno
      in izvorN koljen ** {
        snom | sacc | svoc => koljeno ;
	pnom | pacc => koljen + "a"
        } ;

    jedroN : DeclensionType = \jedro ->
      let
        jed = Predef.tk 2 jedro ;
	r = last (init jedro) ; 
      in koljenoN jedro ** {
        pgen => jed + "a" + r + "a"
        } ;

---- drvo - drveta skipped, can also decline as koljeno
---- oči, čudo skipped

    poljeN : DeclensionType = \polje ->
      koljenoN polje ** {
        pdat => init polje + "ima"
        } ;
    
---- sunce, uže, zvonce, rame, podne, doba

---- no tables given in the sources for feminine nouns, so guessing from ending tables

    zenaN : DeclensionType = \zena -> 
      let
        zen = init zena
      in table {
        snom => zena ;
	sgen => zen + "e" ;
	sdat => zen + "i" ;
	sacc => zen + "u" ;
	svoc => zen + "o" ; ---- o/a in Wiki ; o in https://sh.wiktionary.org/wiki/%C5%BEena
	sins => zen + "om" ;
	pnom | pacc => zen + "e" ;
	pdat => zen + "ama" ;
	pgen => zen + "a"
        } ;


---------------------------
-- Adjectives

-- to be used for AP: 56 forms for each degree
  Adjective : Type = {s : Gender => Number => Case => Str} ;

-- to be used for A, in three degrees: 12 forms in each
---- TODO other degrees than positive

  param AForm = 
    msnom | fsnom | nsnom |
    msgen | fsgen |       -- nsgen = msgen
    msdat |               -- msloc,nsdat,nsloc = msdat
    fsdat |               -- fsloc = fsdat
    fsacc |               -- 
    msins |               -- nsins, pdat, ploc, pins = msins
    fsins |               -- no o/e variation like in msdat
    mpnom |               -- mpvoc = mpnom
    mpgen                 -- fpgen, npgen = mpgen
    ;

  oper
  AdjForms : Type = AForm => Str ;

  invarAdjForms : Str -> AdjForms = \s -> \\a => s ;

-- used in PositA but will also work in Compar and Superl by calling their record fields

 adjFormsAdjective : AdjForms -> Adjective = \afs -> {
   s = \\g,n,c => case <n,c,g> of {

    <Sg, Nom|Voc, Masc _>
      | <Sg, Acc, Masc Inanim>   => afs ! msnom ;
    <Sg, Nom|Voc, Fem>
      | <Pl, Nom|Acc|Voc, Neutr> => afs ! fsnom ;
    <Sg, Nom|Acc|Voc, Neutr>     => afs ! nsnom ;
    <Sg, Gen, Masc _ | Neutr>
      | <Sg, Acc, Masc Anim>     => afs ! msgen ;
    <Sg, Gen, Fem>
      | <Pl, Nom|Acc|Voc, Fem>
      | <Pl, Acc, Masc _>        => afs ! fsgen ;
    <Sg, Dat|Loc, Masc _|Neutr>  => afs ! msdat ;
    <Sg, Dat|Loc, Fem>           => afs ! fsdat ;
    <Sg, Acc, Fem>               => afs ! fsacc ;
    <Sg, Ins, Masc _|Neutr>
      | <Pl,Dat|Loc|Ins, _>      => afs ! msins ;
    <Sg, Ins, Fem>               => afs ! fsins ;
    <Pl, Nom|Voc, Masc _>        => afs ! mpnom ;
    <Pl, Gen,_>                  => afs ! mpgen
    }
    } ;

  velikA : Str -> AdjForms = \velik ->
    let
      velk : Str = case velik of {
        vel + "stan" => vel + "sn" ;
        vel + "ao" => vel + "l" ;
        vel + "ak" => voicing vel + "k" ;
        vel + "a" + k@? => vel + k ;
	vel + "i" => vel ;
	_ => velik
        } ;
      oe : Str = ifSoft velik "e" "o"
    in table {
      msnom => velik ;
      fsnom => velk + "a" ;
      nsnom => velk + oe ;
      msgen => velk + oe + "g" ;
      fsgen => velk + "e" ;
      msdat => velk + oe + "m" ;
      fsdat => velk + "oj" ;
      fsacc => velk + "u" ;
      msins => velk + "im" ;
      fsins => velk + "om" ;
      mpnom => velk + "i" ;
      mpgen => velk + "ih"
      } ;

  regComparAForms : AdjForms -> AdjForms
    = \aposit -> case init (aposit ! fsnom) of {
        grub@(_ + "b"|"p"|"v"|"h") => velikA (grub + "lji") ;
        star => velikA (star + "iji")
	} ;

  superlAForms : AdjForms -> AdjForms
    = \acompar -> velikA ("naj" + acompar ! msnom) ;

  od_Str = "od" ;

---------------------
-- Verbs
-- Wiki

  VerbForms : Type = VForm => Str ;

  ComplementCase : Type = {s : Str ; c : Case ; hasPrep : Bool} ;

  verbAgr : VerbForms -> Agr -> CTense -> Str   ---- TODO tenses
    = \vf,a,b -> case <a,b> of {
      <Ag _ n p, CTPres> => vf ! VPres n p ;
      <Ag g n _, CTPast> => vf ! VPastPart g n
      } ;

  smartVerbForms : Str -> VerbForms = \s -> case s of {
    cit + "ati" => aeiVerbForms s (cit + "am") (cit + "ao") ;
    vid + "jeti" => aeiVerbForms s (vid + "im") (vid + "io") ;
    radi + "ti" => aeiVerbForms s (init radi + "em") (radi + "o") ;
    _ => Predef.error ("expect infinitive form \"-ti\", found" ++ s)
    } ;

-- an traditional paradigm type, with a slight abstraction
---- TODO other traditional paradigms

  aeiVerbForms : Str -> Str -> Str -> VerbForms = \citati, citam, citao ->
   let
     cita = init citam ;
     u = case last cita of {
        "a" => "aju" ;
	"e" => "u" ;
	"i" => "e"
        } ;
     cital = init citao + "l" ;
   in table {
      VInf => citati ;
      VPres Sg P1 => cita + "m" ;
      VPres Sg P2 => cita + "š" ;
      VPres Sg P3 => cita ;
      VPres Pl P1 => cita + "mo" ;
      VPres Pl P2 => cita + "te" ;
      VPres pl P3 => init cita + u ;
      VPastPart (Masc _) Sg => citao ;
      VPastPart Fem Sg => cital + "a" ;
      VPastPart Neutr Sg => cital + "o" ;
      VPastPart (Masc _) Pl => cital + "i" ;
      VPastPart Fem Pl => cital + "e" ;
      VPastPart Neutr Pl => cital + "a"
    } ;


-- copula

  jesam_Copula : {short, long, negative : Number => Person => Str} =
    let
      sam : Number => Person => Str = table {
        Sg => table {
	  P1 => "sam" ;
	  P2 => "si" ;
	  P3 => "je"
	  } ;
        Pl => table {
	  P1 => "smo" ;
	  P2 => "ste" ;
	  P3 => "su"
	  }
	}
     in {
       short = sam ;
       long = \\n,p => case <n,p> of {
         <Sg,P3> => "jeste" ; --- wiki: jest(e)
	 _ => sam ! n ! p
	 } ;
       negative = \\n,p => "ni" + sam ! n ! p
       } ;
       
-- Wiki: some grammars (chiefly Serbian ones) treat jesam as a defective verb
-- having only present tense. Others treat these forms as two realizations
-- of the same irregular verb biti, jesam being imperfective and budem perfective.

    copula_VerbForms : VerbForms = table {
      VPres n p => jesam_Copula.short ! n ! p ;
      v => biti_VerbForms ! v
      } ;

    biti_VerbForms : VerbForms = aeiVerbForms "biti" "budem" "bio" ;

    imati_VerbForms : VerbForms = aeiVerbForms "imati" "imam" "imao" ;


---------------------------
-- Pronouns

  PronForms : Type = {
    nom, 
    gen, cgen,  -- bare, clitic (also as Acc)
    dat, cdat,  -- also as Loc
    ins        : Str ;
    a : Agr
    } ;

  personalPron : Agr -> PronForms = \a ->
    {a = a ; cnom = []} **
    case a of {
      Ag _ Sg P1 => {
        nom = "ja" ;
        gen = "mene" ;
        cgen = "ma" ;
        dat = "meni" ;
        cdat = "mi" ;
        ins = "mnom"
        } ;
      Ag _ Sg P2 => {
        nom = "ti" ;
        gen = "tebe" ;
        cgen = "te" ;
        dat = "tebi" ;
        cdat = "ti" ;
        ins = "tobom"
        } ;
      Ag Fem Sg P3 => {
        nom = "ona" ;
        gen = "nje" ;
        cgen = "je" ;
        dat = "njoj" ;
        cdat = "joj" ;
        ins = "njom"
        } ;
      Ag g Sg P3 => {
        nom = case g of {
	  Masc _ => "on" ;
	  _ => "ono"
	  } ;
        gen = "njega" ;
        cgen = "ga" ;
        dat = "njemu" ;
        cdat = "mu" ;
        ins = "njim"
        } ;

      Ag _ Pl P1 => {
        nom = "mi" ;
        gen, cgen = "nas" ;
        dat, ins = "nama" ;
        cdat = "nam"
        } ;
      Ag _ Pl P2 => {
        nom = "vi" ;
        gen, cgen = "vas" ;
        dat, ins = "vama" ;
        cdat = "vam"
        } ;
      Ag g Pl P3 => {
        nom = case g of {
	  Masc _ => "oni" ;
	  Fem => "one" ;
	  Neutr => "ona"
	  } ;
        gen = "njih" ;
        cgen = "ih" ;
        dat, ins = "njima" ;
        cdat = "im" 
        }
      } ;

  possessivePron : Agr -> AdjForms = \a -> case a of {
      Ag _ Sg P1 => velikA "moj" ;
      Ag _ Sg P2 => velikA "tvoj" ;
      Ag (Masc _) Sg P3 => velikA "njegov" ;
      Ag (Fem|Neutr) Sg P3 => velikA "njezin" ; -- BCMS: Hrv: otherwise njen
      Ag _ Pl P1 => velikA "naš" ;
      Ag _ Pl P2 => velikA "vaš" ;
      Ag _ Pl P3 => velikA "njihov"
    } ;

{-
  reflPossessivePron : DemPronForms = otcovA "svoj" ** {
        msnom = "svoj" ; msgen = "svojho" ; msdat = "svojmu" ;
	msins = "svojím" ;
	ampnom = "svoji" ;
	nsnom, fpnom = "svoje" ;
	pgen = "svojich" ;
	pdat = "svojim" ;
	pins = "svojimi" ;
	} ;
-}

  mkPron : Agr -> PronForms ** {poss : AdjForms} = \a ->
    personalPron a ** {poss = possessivePron a} ;
    

param NumSize = NS_1 | NS_2_4 | NS_5_20 | NS_20_ ;

oper
  Determiner : Type = {
    s : Gender => Case => Str ;
    size : NumSize
    } ;

{-
-- interrogatives TODO

 ktoForms : Case => Str = table {
   Nom => "kto" ;
   Gen | Acc => "koho" ;
   Dat => "komu" ;
   Loc => "kom" ;
   Ins => "kým"
   } ;

 coForms : Case => Str = table {
   Nom|Acc => "čo" ;
   Gen => "čoho" ;
   Dat => "čomu" ;
   Loc => "čom" ;
   Ins => "čím"
   } ;
-}


--------------------------------
-- combining nouns with numerals

oper
  numSizeForm : (Number => Case => Str) -> NumSize -> Case -> Str
    = \cns,n,c -> case n of {
        NS_1   => cns ! Sg ! c ;
	NS_2_4 => cns ! Pl ! c ;
	_ => case c of {
	  Nom | Acc => cns ! Pl ! Gen ;
	  _ => cns ! Pl ! c
	  }
	} ;

  numSizeAgr : Gender -> NumSize -> Person -> Agr
    = \g,ns,p -> case ns of {
    	NS_1   => Ag g Sg p ;
	NS_2_4 => Ag g Pl p ;
	_   => Ag Neutr Sg p ---- TODO verify
	} ;

  numSizeNumber : NumSize -> Number = \ns -> case ns of {
    NS_1 => Sg ;
    _ => Pl      ---- TO CHECK
    } ;

}