version of ResHrv heavily using table extension ; requires GF from 4 Oct therefore not standard

2022-10-05 11:41:19 +02:00
parent 8811da5e51
commit c656f91f93
1 changed files with 660 additions and 0 deletions
--- a/src/croatian/TableExtResHrv.gf
+++ b/src/croatian/TableExtResHrv.gf
@@ -0,0 +1,660 @@
+resource TableExtResHrv = open Prelude in {
+
+-- AR September 2022
+-- sources:
+-- Wiki = https://en.wikipedia.org/wiki/Serbo-Croatian_grammar
+-- BCMS = Bosnian, Croatian, Montenegrin and Serbian:
+--   An Essential Grammar (Routledge Essential Grammars) 1st Edition, by Željko Vrabec
+
+-- parameters
+
+param
+  Number = Sg | Pl ;
+
+  Animacy = Anim | Inanim ;
+  Gender = Masc Animacy | Fem | Neutr ;
+
+  Case = Nom | Gen | Dat | Acc | Voc | Loc | Ins ; -- traditional order
+
+  Person = P1 | P2 | P3 ;
+
+  VForm =
+      VInf
+    | VPres Number Person
+    | VPastPart Gender Number
+    ;
+    ---- TODO aorist, imperfect
+
+  Agr = Ag Gender Number Person ;
+
+  CTense = CTPres | CTPast ; ----- TODO complete the tense system to match BCS verb morphology
+
+-- phonology
+
+oper
+  softConsonant : pattern Str = #("c"|"č"|"ć"|"đ"|"j"|"lj"|"nj"|"š"|"ž"|"št") ;
+  --- wiki "and sometimes r"; BCMS slightly different
+
+  ifSoft : Str -> (soft,hard : Str) -> Str = \word, ssoft, shard ->
+    case word of {
+      _ + #softConsonant => ssoft ;
+      _ => shard
+      } ;
+
+   animate = Masc Anim ;
+   inanimate = Masc Inanim ;
+   feminine = Fem ;
+   neuter = Neutr ;
+
+palatalize : Str -> Str = \s -> case s of {
+  x + "ki" => x + "ci" ;
+  x + "ke" => x + "če" ;
+  x + "gi" => x + "zi" ;
+  x + "ge" => x + "že" ;
+  x + "hi" => x + "si" ;
+  x + "he" => x + "še" ;
+  x + "ce" => x + "če" ;
+  _ => s
+  } ;
+
+voicing : Str -> Str = \s -> case s of {
+  x + "b"  => x + "p" ;
+  x + "d"  => x + "t" ;
+  x + "đ"  => x + "ć" ;
+  x + "z"  => x + "s" ;
+  x + "dž" => x + "č" ;
+  x + "ž"  => x + "š" ;
+  _ => s
+  } ;
+  
+---------------
+-- Nouns
+---------------
+
+-- novel idea (for RGL): lexical items stored as records rather than tables
+-- advantages:
+-- - easier to make exceptions to paradigms (by ** {})
+-- - easier to keep the number of forms minimal
+-- - easier to see what is happening than with lots of anonymous arguments to mkN, mkA, mkV
+-- Starting from GF 2022-10-04: this can now be done with table extension, which gives 
+-- type safety and more powerful pattern matching
+
+  param
+  NForm = snom | sgen | sdat | sacc | svoc | sins | pnom | pgen | pdat | pacc ;
+    
+  oper
+  NounForms : Type = NForm => Str ;
+
+-- But traditional tables make agreement easier to handle in syntax
+-- so this is the lincat of CN
+
+  Noun : Type = {s : Number => Case => Str ; g : Gender} ;
+
+-- for lexical nouns N, we also need the gender but keep the minimal set of forms
+
+  LexNoun : Type = {s : NounForms ; g : Gender} ;
+
+-- this is used in UseN
+
+  nounFormsNoun : NounForms -> Gender -> Noun
+    = \forms, g -> {
+      s = table {
+        Sg => table {
+	  Nom => forms ! snom ;
+	  Gen => forms ! sgen ;
+	  Dat => forms ! sdat ;
+	  Acc => case g of {
+	    Masc Anim | Fem => forms ! sacc ;
+	    _ => forms ! snom
+	    } ;
+	  Voc => forms ! svoc ;
+	  Loc => forms ! sdat ;
+	  Ins => forms ! sins
+	  } ;
+        Pl => table {
+	  Nom => forms ! pnom ;
+	  Gen => forms ! pgen ;
+	  Dat => forms ! pdat ;
+	  Acc => forms ! pacc ;
+	  Voc => forms ! pnom ;
+	  Loc => forms ! pdat ;
+	  Ins => forms ! pdat
+	  }
+	} ;
+      g = g
+      } ;
+
+-- a declension type produces these forms from a string
+
+  DeclensionType : Type = Str -> NounForms ;
+
+-- smart paradigms
+
+  smartLexNoun : Str -> LexNoun = \s -> case s of {
+    _ + "a"       => {s = zenaN s ; g = feminine} ;
+    _ + "i"       => {s = ziriN s ; g = inanimate} ; ---- TODO feminine i
+    _ + "e"       => {s = poljeN s ; g = neuter} ; ---- TODO sunce, uze, zvonce, rame
+    _ + "ao"      => {s = ugaoN s ; g = inanimate} ;
+    _ + "eo"      => {s = pepeoN s ; g = inanimate} ;
+    _ + "o"       => {s = koljenoN s ; g = neuter} ; ---- TODO jedro
+    _ + "lac"     => {s = posjetilacN s ; g = inanimate} ;
+    _ + "anj"     => {s = suzanjN s ; g = inanimate} ;
+    _ + "nj"      => {s = panjN s ; g = inanimate} ;
+    _ + "št"      => {s = pristN s ; g = inanimate} ;
+    _ + "ac"      => {s = klinacN s ; g = neuter} ;
+    _ + "c"       => {s = stricN s ; g = inanimate} ;
+    _ + "in"      => {s = gradaninN s ; g = neuter} ;
+    _ + "ak"      => {s = cvorakN s ; g = inanimate} ;
+    _ + "a" + ?   => {s = nokatN s ; g = inanimate} ;
+    _ + "g"       => {s = bubregN s ; g = inanimate} ;
+    _ + "h"       => {s = trbuhN s ; g = inanimate} ;
+    _ + "k"       => {s = vojnikN s ; g = inanimate} ;
+    _             => {s = izvorN s ; g = inanimate}
+    } ;
+
+  mkgLexNoun : Str -> Gender -> LexNoun = \s,g -> case <s,g> of {
+    <_ + "i", Masc _> => {s = ziriN s ; g = g} ;
+    <_ + "e", Masc _> => {s = bifeN s ; g = g} ;
+    <_ + "o", Masc _> => {s = bifeN s ; g = g} ;
+    <_, g> => smartLexNoun s ** {g = g}
+    } ;
+
+
+-- the traditional declensions, following Wiki
+-- they are also exported in ParadigmsHrv with names izvorN etc
+
+  izvorN : DeclensionType = \izvor ->
+    table {
+      snom => izvor ;
+      sgen => izvor + "a" ;
+      sdat => izvor + "u" ;
+      sacc => izvor + "a" ;
+      svoc => ifSoft izvor
+                    (izvor + "u")
+		    (palatalize (izvor + "e")) ;
+      sins => ifSoft izvor
+                    (izvor + "em") 
+                    (izvor + "om") ;
+
+      pnom => palatalize (izvor + "i") ;
+      pgen => izvor + "a" ;
+      pdat => ifSoft izvor
+	         (palatalize (izvor + "e") + "vima")
+	         (palatalize (izvor + "i") + "ma") ;
+      pacc => izvor + "e"
+      } ;
+
+   nokatN : DeclensionType = \nokat ->
+      let
+        nokt = Predef.tk 2 nokat + last nokat
+      in izvorN nokt ** {
+        snom => nokat ;
+	sacc => nokt + "a" ;
+	pgen => nokat + "a"
+        } ;
+	
+   gradaninN : DeclensionType = \gradanin ->
+      let
+        gradan = Predef.tk 2 gradanin ;
+        gradanN = izvorN gradan 
+      in numbersNounForms (izvorN gradanin) gradanN ;
+
+   numbersNounForms : (sg, pl : NounForms) -> NounForms =
+     \sg, pl -> sg ** {
+        pnom => pl ! pnom ;
+        pgen => pl ! pgen ;
+        pdat => pl ! pdat ;
+        pacc => pl ! pacc
+        } ;
+  
+   vojnikN : DeclensionType = izvorN ;
+   bubregN : DeclensionType = izvorN ;
+   trbuhN : DeclensionType = izvorN ;
+   cvorakN : DeclensionType = nokatN ;
+
+   panjN : DeclensionType = \panj ->
+     numbersNounForms (izvorN panj) (izvorN (palatalize (panj + "e") + "v")) ;
+
+   suzanjN : DeclensionType = \suzanj ->
+     let
+       suznj = Predef.tk 3 suzanj + Predef.dp 2 suzanj
+     in
+     numbersNounForms
+       (izvorN suzanj)
+       (izvorN suznj ** {
+         pgen => "sužanja" ;
+         pdat => "sužnjima"
+         }) ;
+
+    pristN : DeclensionType = panjN ;
+    
+    stricN : DeclensionType = \stric ->
+      panjN stric ** {
+        svoc => palatalize (stric + "e")
+	} ;
+
+    klinacN : DeclensionType = \klinac ->
+      let
+        klinc = Predef.tk 2 klinac + last klinac
+      in nokatN klinac ** {
+        svoc => palatalize (klinc + "e") ;
+	pdat => klinc + "ima"
+	} ;
+
+    posjetilacN : DeclensionType = \posjetilac ->
+      let
+        posjetioc = Predef.tk 3 posjetilac + "oc"
+      in izvorN posjetioc ** {
+        snom => posjetilac ;
+	svoc => palatalize (posjetioc + "e") ;
+	sins => palatalize (posjetioc + "e") + "m" ;
+	pgen => posjetilac + "a" ;
+	pdat => palatalize (posjetioc + "i") + "ma"
+        } ;
+
+    pepeoN : DeclensionType = \pepeo ->
+      let
+        pepel = init pepeo + "l"
+      in izvorN pepel ** {snom => pepeo} ;
+
+    ugaoN : DeclensionType = \ugao ->
+      let
+        ugal = init ugao + "l" ;
+	ugl = Predef.tk 2 ugal + last ugal
+      in numbersNounForms
+           (nokatN ugal ** {snom => ugao})
+	   (izvorN (ugl + "ov")) ;
+	   
+    bifeN : DeclensionType = \bife ->
+      izvorN bife ** {svoc => bife + "u"} ;
+      
+    ziriN : DeclensionType = \ziri ->
+      bifeN (ziri + "j") ** {
+        snom => ziri ;
+	pdat => ziri + "jima"
+	} ;
+
+    taksiN : DeclensionType = ziriN ;
+
+---- Danilo, Hrvoje, raščupànko skipped
+
+    koljenoN : DeclensionType = \koljeno ->
+      let
+        koljen = init koljeno
+      in izvorN koljen ** {
+        snom | sacc | svoc => koljeno ;
+	pnom | pacc => koljen + "a"
+        } ;
+
+    jedroN : DeclensionType = \jedro ->
+      let
+        jed = Predef.tk 2 jedro ;
+	r = last (init jedro) ; 
+      in koljenoN jedro ** {
+        pgen => jed + "a" + r + "a"
+        } ;
+
+---- drvo - drveta skipped, can also decline as koljeno
+---- oči, čudo skipped
+
+    poljeN : DeclensionType = \polje ->
+      koljenoN polje ** {
+        pdat => init polje + "ima"
+        } ;
+    
+---- sunce, uže, zvonce, rame, podne, doba
+
+---- no tables given in the sources for feminine nouns, so guessing from ending tables
+
+    zenaN : DeclensionType = \zena -> 
+      let
+        zen = init zena
+      in table {
+        snom => zena ;
+	sgen => zen + "e" ;
+	sdat => zen + "i" ;
+	sacc => zen + "u" ;
+	svoc => zen + "o" ; ---- o/a in Wiki ; o in https://sh.wiktionary.org/wiki/%C5%BEena
+	sins => zen + "om" ;
+	pnom | pacc => zen + "e" ;
+	pdat => zen + "ama" ;
+	pgen => zen + "a"
+        } ;
+
+
+
+---------------------------
+-- Adjectives
+
+-- to be used for AP: 56 forms for each degree
+  Adjective : Type = {s : Gender => Number => Case => Str} ;
+
+-- to be used for A, in three degrees: 12 forms in each
+---- TODO other degrees than positive
+
+  param AForm = 
+    msnom | fsnom | nsnom |
+    msgen | fsgen |       -- nsgen = msgen
+    msdat |               -- msloc,nsdat,nsloc = msdat
+    fsdat |               -- fsloc = fsdat
+    fsacc |               -- 
+    msins |               -- nsins, pdat, ploc, pins = msins
+    fsins |               -- no o/e variation like in msdat
+    mpnom |               -- mpvoc = mpnom
+    mpgen                 -- fpgen, npgen = mpgen
+    ;
+
+  oper
+  AdjForms : Type = AForm => Str ;
+
+  invarAdjForms : Str -> AdjForms = \s -> \\a => s ;
+
+-- used in PositA but will also work in Compar and Superl by calling their record fields
+
+ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
+   s = \\g,n,c => case <n,c,g> of {
+
+    <Sg, Nom|Voc, Masc _>
+      | <Sg, Acc, Masc Inanim>   => afs ! msnom ;
+    <Sg, Nom|Voc, Fem>
+      | <Pl, Nom|Acc|Voc, Neutr> => afs ! fsnom ;
+    <Sg, Nom|Acc|Voc, Neutr>     => afs ! nsnom ;
+    <Sg, Gen, Masc _ | Neutr>
+      | <Sg, Acc, Masc Anim>     => afs ! msgen ;
+    <Sg, Gen, Fem>
+      | <Pl, Nom|Acc|Voc, Fem>
+      | <Pl, Acc, Masc _>        => afs ! fsgen ;
+    <Sg, Dat|Loc, Masc _|Neutr>  => afs ! msdat ;
+    <Sg, Dat|Loc, Fem>           => afs ! fsdat ;
+    <Sg, Acc, Fem>               => afs ! fsacc ;
+    <Sg, Ins, Masc _|Neutr>
+      | <Pl,Dat|Loc|Ins, _>      => afs ! msins ;
+    <Sg, Ins, Fem>               => afs ! fsins ;
+    <Pl, Nom|Voc, Masc _>        => afs ! mpnom ;
+    <Pl, Gen,_>                  => afs ! mpgen
+    }
+    } ;
+
+  velikA : Str -> AdjForms = \velik ->
+    let
+      velk : Str = case velik of {
+        vel + "stan" => vel + "sn" ;
+        vel + "ao" => vel + "l" ;
+        vel + "ak" => voicing vel + "k" ;
+        vel + "a" + k@? => vel + k ;
+	vel + "i" => vel ;
+	_ => velik
+        } ;
+      oe : Str = ifSoft velik "e" "o"
+    in table {
+      msnom => velik ;
+      fsnom => velk + "a" ;
+      nsnom => velk + oe ;
+      msgen => velk + oe + "g" ;
+      fsgen => velk + "e" ;
+      msdat => velk + oe + "m" ;
+      fsdat => velk + "oj" ;
+      fsacc => velk + "u" ;
+      msins => velk + "im" ;
+      fsins => velk + "om" ;
+      mpnom => velk + "i" ;
+      mpgen => velk + "ih"
+      } ;
+
+  regComparAForms : AdjForms -> AdjForms
+    = \aposit -> case init (aposit ! fsnom) of {
+        grub@(_ + "b"|"p"|"v"|"h") => velikA (grub + "lji") ;
+        star => velikA (star + "iji")
+	} ;
+
+  superlAForms : AdjForms -> AdjForms
+    = \acompar -> velikA ("naj" + acompar ! msnom) ;
+
+  od_Str = "od" ;
+
+---------------------
+-- Verbs
+-- Wiki
+
+  VerbForms : Type = VForm => Str ;
+
+  ComplementCase : Type = {s : Str ; c : Case ; hasPrep : Bool} ;
+
+  verbAgr : VerbForms -> Agr -> CTense -> Str   ---- TODO tenses
+    = \vf,a,b -> case <a,b> of {
+      <Ag _ n p, CTPres> => vf ! VPres n p ;
+      <Ag g n _, CTPast> => vf ! VPastPart g n
+      } ;
+
+  smartVerbForms : Str -> VerbForms = \s -> case s of {
+    cit + "ati" => aeiVerbForms s (cit + "am") (cit + "ao") ;
+    vid + "jeti" => aeiVerbForms s (vid + "im") (vid + "io") ;
+    radi + "ti" => aeiVerbForms s (init radi + "em") (radi + "o") ;
+    _ => Predef.error ("expect infinitive form \"-ti\", found" ++ s)
+    } ;
+
+-- an traditional paradigm type, with a slight abstraction
+---- TODO other traditional paradigms
+
+  aeiVerbForms : Str -> Str -> Str -> VerbForms = \citati, citam, citao ->
+   let
+     cita = init citam ;
+     u = case last cita of {
+        "a" => "aju" ;
+	"e" => "u" ;
+	"i" => "e"
+        } ;
+     cital = init citao + "l" ;
+   in table {
+      VInf => citati ;
+      VPres Sg P1 => cita + "m" ;
+      VPres Sg P2 => cita + "š" ;
+      VPres Sg P3 => cita ;
+      VPres Pl P1 => cita + "mo" ;
+      VPres Pl P2 => cita + "te" ;
+      VPres pl P3 => init cita + u ;
+      VPastPart (Masc _) Sg => citao ;
+      VPastPart Fem Sg => cital + "a" ;
+      VPastPart Neutr Sg => cital + "o" ;
+      VPastPart (Masc _) Pl => cital + "i" ;
+      VPastPart Fem Pl => cital + "e" ;
+      VPastPart Neutr Pl => cital + "a"
+    } ;
+
+
+-- copula
+
+  jesam_Copula : {short, long, negative : Number => Person => Str} =
+    let
+      sam : Number => Person => Str = table {
+        Sg => table {
+	  P1 => "sam" ;
+	  P2 => "si" ;
+	  P3 => "je"
+	  } ;
+        Pl => table {
+	  P1 => "smo" ;
+	  P2 => "ste" ;
+	  P3 => "su"
+	  }
+	}
+     in {
+       short = sam ;
+       long = \\n,p => case <n,p> of {
+         <Sg,P3> => "jeste" ; --- wiki: jest(e)
+	 _ => sam ! n ! p
+	 } ;
+       negative = \\n,p => "ni" + sam ! n ! p
+       } ;
+       
+-- Wiki: some grammars (chiefly Serbian ones) treat jesam as a defective verb
+-- having only present tense. Others treat these forms as two realizations
+-- of the same irregular verb biti, jesam being imperfective and budem perfective.
+
+    copula_VerbForms : VerbForms = table {
+      VPres n p => jesam_Copula.short ! n ! p ;
+      v => biti_VerbForms ! v
+      } ;
+
+    biti_VerbForms : VerbForms = aeiVerbForms "biti" "budem" "bio" ;
+
+    imati_VerbForms : VerbForms = aeiVerbForms "imati" "imam" "imao" ;
+
+
+---------------------------
+-- Pronouns
+
+  PronForms : Type = {
+    nom, 
+    gen, cgen,  -- bare, clitic (also as Acc)
+    dat, cdat,  -- also as Loc
+    ins        : Str ;
+    a : Agr
+    } ;
+
+  personalPron : Agr -> PronForms = \a ->
+    {a = a ; cnom = []} **
+    case a of {
+      Ag _ Sg P1 => {
+        nom = "ja" ;
+        gen = "mene" ;
+        cgen = "ma" ;
+        dat = "meni" ;
+        cdat = "mi" ;
+        ins = "mnom"
+        } ;
+      Ag _ Sg P2 => {
+        nom = "ti" ;
+        gen = "tebe" ;
+        cgen = "te" ;
+        dat = "tebi" ;
+        cdat = "ti" ;
+        ins = "tobom"
+        } ;
+      Ag Fem Sg P3 => {
+        nom = "ona" ;
+        gen = "nje" ;
+        cgen = "je" ;
+        dat = "njoj" ;
+        cdat = "joj" ;
+        ins = "njom"
+        } ;
+      Ag g Sg P3 => {
+        nom = case g of {
+	  Masc _ => "on" ;
+	  _ => "ono"
+	  } ;
+        gen = "njega" ;
+        cgen = "ga" ;
+        dat = "njemu" ;
+        cdat = "mu" ;
+        ins = "njim"
+        } ;
+
+      Ag _ Pl P1 => {
+        nom = "mi" ;
+        gen, cgen = "nas" ;
+        dat, ins = "nama" ;
+        cdat = "nam"
+        } ;
+      Ag _ Pl P2 => {
+        nom = "vi" ;
+        gen, cgen = "vas" ;
+        dat, ins = "vama" ;
+        cdat = "vam"
+        } ;
+      Ag g Pl P3 => {
+        nom = case g of {
+	  Masc _ => "oni" ;
+	  Fem => "one" ;
+	  Neutr => "ona"
+	  } ;
+        gen = "njih" ;
+        cgen = "ih" ;
+        dat, ins = "njima" ;
+        cdat = "im" 
+        }
+      } ;
+
+  possessivePron : Agr -> AdjForms = \a -> case a of {
+      Ag _ Sg P1 => velikA "moj" ;
+      Ag _ Sg P2 => velikA "tvoj" ;
+      Ag (Masc _) Sg P3 => velikA "njegov" ;
+      Ag (Fem|Neutr) Sg P3 => velikA "njezin" ; -- BCMS: Hrv: otherwise njen
+      Ag _ Pl P1 => velikA "naš" ;
+      Ag _ Pl P2 => velikA "vaš" ;
+      Ag _ Pl P3 => velikA "njihov"
+    } ;
+
+{-
+  reflPossessivePron : DemPronForms = otcovA "svoj" ** {
+        msnom = "svoj" ; msgen = "svojho" ; msdat = "svojmu" ;
+	msins = "svojím" ;
+	ampnom = "svoji" ;
+	nsnom, fpnom = "svoje" ;
+	pgen = "svojich" ;
+	pdat = "svojim" ;
+	pins = "svojimi" ;
+	} ;
+-}
+
+  mkPron : Agr -> PronForms ** {poss : AdjForms} = \a ->
+    personalPron a ** {poss = possessivePron a} ;
+    
+
+param NumSize = NS_1 | NS_2_4 | NS_5_20 | NS_20_ ;
+
+oper
+  Determiner : Type = {
+    s : Gender => Case => Str ;
+    size : NumSize
+    } ;
+
+{-
+-- interrogatives TODO
+
+ ktoForms : Case => Str = table {
+   Nom => "kto" ;
+   Gen | Acc => "koho" ;
+   Dat => "komu" ;
+   Loc => "kom" ;
+   Ins => "kým"
+   } ;
+
+ coForms : Case => Str = table {
+   Nom|Acc => "čo" ;
+   Gen => "čoho" ;
+   Dat => "čomu" ;
+   Loc => "čom" ;
+   Ins => "čím"
+   } ;
+-}
+
+
+--------------------------------
+-- combining nouns with numerals
+
+oper
+  numSizeForm : (Number => Case => Str) -> NumSize -> Case -> Str
+    = \cns,n,c -> case n of {
+        NS_1   => cns ! Sg ! c ;
+	NS_2_4 => cns ! Pl ! c ;
+	_ => case c of {
+	  Nom | Acc => cns ! Pl ! Gen ;
+	  _ => cns ! Pl ! c
+	  }
+	} ;
+
+  numSizeAgr : Gender -> NumSize -> Person -> Agr
+    = \g,ns,p -> case ns of {
+    	NS_1   => Ag g Sg p ;
+	NS_2_4 => Ag g Pl p ;
+	_   => Ag Neutr Sg p ---- TODO verify
+	} ;
+
+  numSizeNumber : NumSize -> Number = \ns -> case ns of {
+    NS_1 => Sg ;
+    _ => Pl      ---- TO CHECK
+    } ;
+
+}