Hrv: extended smart paradigms

This commit is contained in:
Aarne Ranta
2022-09-29 09:49:29 +02:00
parent d8e313c75c
commit 54713a2987
3 changed files with 141 additions and 103 deletions

View File

@@ -27,7 +27,7 @@ oper
accusative : Case
= Acc ;
vocative : Case
= Voc ;
= R.Voc ;
locative : Case
= Loc ;
instrumental : Case
@@ -39,26 +39,64 @@ oper
oper
mkN = overload {
mkN : (nom : Str) -> N
= \nom -> lin N (guessNounForms nom) ;
mkN : (nom,gen : Str) -> Gender -> N ---- TODO
= \nom,gen,g -> lin N (guessNounForms nom) ;
mkN : (sgnom : Str) -> N -- guessing gender
= \sgnom -> lin N (smartLexNoun sgnom) ;
mkN : (sgnom : Str) -> Gender -> N
= \sgnom, g -> lin N (mkgLexNoun sgnom g) ;
mkN : NForms -> Gender -> N -- the worst case
= \nfs,g -> lin N (nfs ** {g = g}) ;
} ;
-- The following standard declensions can be used with good accuracy.
-- However, they have some defaults that may have to be overwritten.
-- This can be done easily by overriding those formes with record extension (**).
-- The default extensions are shown in comments; if the default is correct, no extension is needed.
-- Notice that some paradigms take two arguments, some take one.
---- TODO
-- The full definition of the noun record is
-- {
-- snom,sgen,sdat,sacc,svoc,sloc,sins, pnom,pgen,pdat,pacc,ploc,pins : Str ;
-- g : Gender
-- }
NForms = {snom,sgen,sdat,sacc,svoc,sins,pnom,pgen,pdat,pacc : Str} ;
izvorNForms : Str -> NForms
= izvorN ;
nokatNForms : Str -> NForms
= nokatN ;
gradaninNForms : Str -> NForms
= gradaninN ;
vojnikNForms : Str -> NForms
= vojnikN ;
bubregNForms : Str -> NForms
= bubregN ;
trbuhNForms : Str -> NForms
= trbuhN ;
cvorakNForms : Str -> NForms
= cvorakN ;
panjNForms : Str -> NForms
= panjN ;
suzanjNForms : Str -> NForms
= suzanjN ;
pristNForms : Str -> NForms
= pristN ;
stricNForms : Str -> NForms
= stricN ;
klinacNForms : Str -> NForms
= klinacN ;
posjetilacNForms : Str -> NForms
= posjetilacN ;
pepeoNForms : Str -> NForms
= pepeoN ;
ugaoNForms : Str -> NForms
= ugaoN ;
bifeNForms : Str -> NForms
= bifeN ;
ziriNForms : Str -> NForms
= ziriN ;
taksiNForms : Str -> NForms
= taksiN ;
koljenoNForms : Str -> NForms
= koljenoN ;
jedroNForms : Str -> NForms
= jedroN ;
poljeNForms : Str -> NForms
= poljeN ;
zenaNForms : Str -> NForms
= zenaN ;
---------------------
-- Adjectives
@@ -67,27 +105,51 @@ oper
mkA = overload {
mkA : Str -> A
= \s -> lin A (velikA s)
= \s -> lin A (velikA s) ;
mkA : AForms -> A
= \s -> lin A s ;
} ;
invarA : Str -> A
= \s -> lin A (invarAForms s) ;
AForms : Type
= R.AdjForms ;
-- the complete definition of AForms is
-- {msnom, fsnom, nsnom, msgen, fsgen, msdat,
-- fsdat, fsacc, msloc, msins, fsins, mpnom, pgen : Str} ;
velikAForms : Str -> AForms
= velikA ;
invarAForms : Str -> AForms
= \s -> invarAdjForms s ;
-- the full definition of the adjective record is
-- {
-- msnom, fsnom, nsnom, msgen, fsgen, msdat, fsacc, msloc, msins, fsins,
-- ampnom, pgen, pins : Str
-- }
--
-------------------------
-- Verbs
mkV = overload {
mkV : (raditi : Str) -> V
= \s -> lin V {s = smartVerbForms s} ;
mkV : (raditi, radem, radio : Str) -> V
= \raditi, radem, radio ->
lin V {s = aeiVerbForms raditi radem radio} ;
mkV : VerbForms -> V
= \vf -> lin V {s = vf} ;
} ;
mkV2 = overload {
mkV2 : VerbForms -> V2
= \vf -> lin V2 {s = vf ; c = {s = [] ; c = Acc ; hasPrep = False}} ;
mkV2 : VerbForms -> Case -> V2
= \vf,c -> lin V2 {s = vf ; c = {s = [] ; c = c ; hasPrep = False}} ;
mkV2 : VerbForms -> ComplementCase -> V2
= \vf,c -> lin V2 {s = vf ; c = c} ;
mkV2 : V -> V2
= \v -> lin V2 {s = v.s ;
c = {s = [] ; c = accusative ; hasPrep = False}} ;
mkV2 : V -> Case -> V2
= \v,c -> lin V2 {s = v.s ;
c = {s = [] ; c = c ; hasPrep = False}} ;
mkV2 : V -> Prep -> V2
= \v,c -> lin V2 {s = v.s ; c = c} ;
} ;
------------------------
@@ -96,8 +158,14 @@ oper
mkAdv : Str -> Adv
= \s -> lin Adv {s = s} ;
mkPrep = overload {
mkPrep : Str -> Prep -- genitive prepositions
= \s -> lin Prep {s = s ; c = genitive ; hasPrep = True} ;
mkPrep : Case -> Prep -- oblique cases, empty string
= \c -> lin Prep {s = [] ; c = c ; hasPrep = False} ;
mkPrep : Str -> Case -> Prep
= \s,c -> lin Prep {s = s ; c = c ; hasPrep = True} ; ---- True if s /= ""
= \s,c -> lin Prep {s = s ; c = c ; hasPrep = True} ;
} ;
mkConj : Str -> Conj
= \s -> lin Conj {s1 = [] ; s2 = s} ;

View File

@@ -3,7 +3,8 @@ resource ResHrv = open Prelude in {
-- AR September 2022
-- sources:
-- Wiki = https://en.wikipedia.org/wiki/Serbo-Croatian_grammar
-- BCMS = Bosnian, Croatian, Montenegrin and Serbian: An Essential Grammar (Routledge Essential Grammars) 1st Edition, by Željko Vrabec
-- BCMS = Bosnian, Croatian, Montenegrin and Serbian:
-- An Essential Grammar (Routledge Essential Grammars) 1st Edition, by Željko Vrabec
-- parameters
@@ -65,6 +66,7 @@ voicing : Str -> Str = \s -> case s of {
x + "ž" => x + "š" ;
_ => s
} ;
---------------
-- Nouns
---------------
@@ -84,6 +86,10 @@ voicing : Str -> Str = \s -> case s of {
Noun : Type = {s : Number => Case => Str ; g : Gender} ;
-- for lexical nouns N, we also need the gender but keep the minimal set of forms
LexNoun : Type = NounForms ** {g : Gender} ;
-- this is used in UseN
nounFormsNoun : NounForms -> Gender -> Noun
@@ -114,45 +120,42 @@ voicing : Str -> Str = \s -> case s of {
g = g
} ;
-- a declension type produces these forms from a string
-- terminology of CEG
DeclensionType : Type = Str -> NounForms ;
{-
declensionNounForms : (snom,pgen : Str) -> Gender -> NounForms
= \snom,pgen,g -> case <g, snom, pgen> of {
<Masc Anim, _ + "a" , _ + "ov"> => hrdinaN snom ;
<Masc _, _ + ("i"|"y"|"e") , _ + "ov"> => ponyN snom ; ----
<Masc Anim, _ , _ + "ov"> => chlapN snom ;
<Masc Inanim, _ + #softConsonant, _ + "ov"> => strojN snom ;
<Masc Ianim, _ + #hardConsonant, _ + "ov"> => dubN snom ;
<Masc Ianim, _ + #neutralConsonant,_ + "ov"> => dubN snom ;
-- smart paradigms
<Fem, _ + #hardConsonant + "a", _ + #consonant> => zenaN snom ;
<Fem, _ + #neutralConsonant + "a", _ + #consonant> => zenaN snom ;
<Fem, _ + #softConsonant + "a", _ + #consonant> => ulicaN snom ;
<Fem, _ + ("ia"|"ya"), _> => ulicaN snom ;
<Fem, _ + ("c"|"s"|"p"|"v"|"sť"), _ + "í"> => kostN snom pgen ;
<Fem, _ + #consonant , _ + "í"> => dlanN snom pgen ;
<Neutr, _ + "o" , _ > => mestoN snom ;
<Neutr, _ + "ie" , _ + "í"> => vysvedcenieN snom ;
<Neutr, _ + "e" , _ > => srdceN snom ;
<Neutr, _ + ("a"|"ä") , _ + "iec"> => dievceniecN snom ;
<Neutr, _ + ("a"|"ä") , _ > => dievcaN snom ;
_ => dubN (""+snom) ** {pgen = pgen} ---- Predef.error ("cannot infer declension type for" ++ snom ++ pgen)
} ** {pgen = pgen ; g = g} ;
-}
-- the "smartest" one-argument mkN
guessNounForms : Str -> NounForms ** {g : Gender}
= \snom -> case snom of {
---- TODO
_ => izvorN snom ** {g = inanimate}
smartLexNoun : Str -> LexNoun = \s -> case s of {
_ + "a" => zenaN s ** {g = feminine} ;
_ + "i" => ziriN s ** {g = inanimate} ; ---- TODO feminine i
_ + "e" => poljeN s ** {g = neuter} ; ---- TODO sunce, uze, zvonce, rame
_ + "ao" => ugaoN s ** {g = inanimate} ;
_ + "eo" => pepeoN s ** {g = inanimate} ;
_ + "o" => koljenoN s ** {g = neuter} ; ---- TODO jedro
_ + "lac" => posjetilacN s ** {g = inanimate} ;
_ + "anj" => suzanjN s ** {g = inanimate} ;
_ + "nj" => panjN s ** {g = inanimate} ;
_ + "št" => pristN s ** {g = inanimate} ;
_ + "ac" => klinacN s ** {g = neuter} ;
_ + "c" => stricN s ** {g = inanimate} ;
_ + "in" => gradaninN s ** {g = neuter} ;
_ + "ak" => cvorakN s ** {g = inanimate} ;
_ + "a" + ? => nokatN s ** {g = inanimate} ;
_ + "g" => bubregN s ** {g = inanimate} ;
_ + "h" => trbuhN s ** {g = inanimate} ;
_ + "k" => vojnikN s ** {g = inanimate} ;
_ => izvorN s ** {g = inanimate}
} ;
mkgLexNoun : Str -> Gender -> LexNoun = \s,g -> case <s,g> of {
<_ + "i", Masc _> => ziriN s ** {g = g} ;
<_ + "e", Masc _> => bifeN s ** {g = g} ;
<_ + "o", Masc _> => bifeN s ** {g = g} ;
<_, g> => smartLexNoun s ** {g = g}
} ;
-- the traditional declensions, following Wiki
-- they are also exported in ParadigmsHrv with names izvorN etc
@@ -323,7 +326,7 @@ voicing : Str -> Str = \s -> case s of {
-- to be used for AP: 56 forms for each degree
Adjective : Type = {s : Gender => Number => Case => Str} ;
-- to be used for A, in three degrees: 15 forms in each
-- to be used for A, in three degrees: 12 forms in each
---- TODO other degrees than positive
AdjForms : Type = {
@@ -370,18 +373,6 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
}
} ;
{-
guessAdjForms : Str -> AdjForms
= \s -> case s of {
_ + "ý" => peknyA s ;
_ + "y" => krasnyA s ;
_ + "í" => cudziA s ;
_ + "i" => rydziA s ;
_ + ("ov"|"in") => otcovA s ;
_ => otcovA (""+s) ---- Predef.error ("no mkA for" ++ s)
} ;
-}
velikA : Str -> AdjForms = \velik ->
let
velk : Str = case velik of {
@@ -421,35 +412,14 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
<Ag _ n p, CTPres> => vf ! VPres n p ;
<Ag g n _, CTPast> => vf ! VPastPart g n
} ;
{-
copulaVerbForms : VerbForms = {
inf = "byť" ;
pressg1 = "som" ;
pressg2 = "si" ;
pressg3 = "je" ;
prespl1 = "sme" ;
prespl2 = "ste" ;
prespl3 = "sú" ;
pastpmasc = "bol" ;
pastpfem = "bola" ;
pastpneutr = "bolo" ;
smartVerbForms : Str -> VerbForms = \s -> case s of {
cit + "ati" => aeiVerbForms s (cit + "am") (cit + "ao") ;
radi + "ti" => aeiVerbForms s (init radi + "em") (radi + "o") ;
_ => Predef.error ("expect infinitive form \"-ti\", found" ++ s)
} ;
haveVerbForms : VerbForms = {
inf = "mať" ;
pressg1 = "mám" ;
pressg2 = "máš" ;
pressg3 = "má" ;
prespl1 = "máme" ;
prespl2 = "máte" ;
prespl3 = "majú" ;
pastpmasc = "mal" ;
pastpfem = "mala" ;
pastpneutr = "malo" ;
} ;
-}
-- just an example of a traditional paradigm
-- an traditional paradigm type, with a slight abstraction
---- TODO other traditional paradigms
aeiVerbForms : Str -> Str -> Str -> VerbForms = \citati, citam, citao ->

View File

@@ -7,7 +7,7 @@ lin
---- few_Det = invarNumeral "málo" ; -- see notes
for_Prep = mkPrep "pre" accusative ;
from_Prep = mkPrep "iz" Gen ;
have_V2 = mkV2 imati_VerbForms ;
have_V2 = mkV2 (mkV imati_VerbForms) ;
in_Prep = mkPrep "u" Loc ;
---- many_Det = regNumeral "mnoho" "mnohých" "mnohým" "mnohými" ; ---- alternative: invarNumeral "veľa" ;
or_Conj = mkConj "alebo" ;