started Hrv verbs and their Wiktionary extraction

This commit is contained in:
Aarne Ranta
2022-09-25 10:06:36 +02:00
parent 3eac1b9d0c
commit 7a0b1eed34
4 changed files with 118 additions and 44 deletions

View File

@@ -17,6 +17,13 @@ param
Person = P1 | P2 | P3 ; Person = P1 | P2 | P3 ;
VForm =
VInf
| VPres Number Person
| VPastPart Gender Number
;
---- TODO aorist, imperfect
Agr = Ag Gender Number Person ; Agr = Ag Gender Number Person ;
CTense = CTPres | CTPast ; ----- TODO complete the tense system to match BCS verb morphology CTense = CTPres | CTPast ; ----- TODO complete the tense system to match BCS verb morphology
@@ -414,30 +421,20 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
pgen = velk + "ih" ; pgen = velk + "ih" ;
} ; } ;
{-
--------------------- ---------------------
-- Verbs -- Verbs
-- https://en.wikipedia.org/wiki/Slovak_language#Verbs -- Wiki
VerbForms : Type = { ---- TODO more forms to add ? VerbForms : Type = VForm => Str ;
inf,
pressg1, pressg2, pressg3,
prespl1, prespl2, prespl3,
pastpmasc, pastpfem, pastpneutr : Str
} ;
ComplementCase : Type = {s : Str ; c : Case ; hasPrep : Bool} ; ComplementCase : Type = {s : Str ; c : Case ; hasPrep : Bool} ;
verbAgr : VerbForms -> Agr -> Bool -> Str ---- TODO tenses verbAgr : VerbForms -> Agr -> CTense -> Str ---- TODO tenses
= \vf,a,b -> case a of { = \vf,a,b -> case <a,b> of {
Ag _ Sg P1 => vf.pressg1 ; <Ag _ n p, CTPres> => vf ! VPres n p ;
Ag _ Sg P2 => vf.pressg2 ; <Ag g n _, CTPast> => vf ! VPastPart g n
Ag _ Sg P3 => vf.pressg3 ;
Ag _ Pl P1 => vf.prespl1 ;
Ag _ Pl P2 => vf.prespl2 ;
Ag _ Pl P3 => vf.prespl3
} ; } ;
{-
copulaVerbForms : VerbForms = { copulaVerbForms : VerbForms = {
inf = "byť" ; inf = "byť" ;
pressg1 = "som" ; pressg1 = "som" ;
@@ -463,29 +460,36 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
pastpfem = "mala" ; pastpfem = "mala" ;
pastpneutr = "malo" ; pastpneutr = "malo" ;
} ; } ;
-}
-- just an example of a traditional paradigm -- just an example of a traditional paradigm
---- TODO other traditional paradigms ---- TODO other traditional paradigms
iii_kupovatVerbForms : Str -> VerbForms = \kupovat -> aeiVerbForms : Str -> VerbForms = \citati ->
let let
kupo = Predef.tk 3 kupovat ; cita = Predef.tk 2 citati ;
kupu = Predef.tk 1 kupo + "u" u = case last cita of {
in "a" => "aju" ;
{ "e" => "u" ;
inf = kupovat ; "i" => "e"
pressg1 = kupu + "jem" ; } ;
pressg2 = kupu + "ješ" ; in table {
pressg3 = kupu + "je" ; VInf => citati ;
prespl1 = kupu + "jeme" ; VPres Sg P1 => cita + "m" ;
prespl2 = kupu + "jete" ; VPres Sg P2 => cita + "š" ;
prespl3 = kupu + "jú" ; VPres Sg P3 => cita ;
pastpmasc = "kupoval" ; VPres Pl P1 => cita + "mo" ;
pastpfem = "kupovala" ; VPres Pl P2 => cita + "te" ;
pastpneutr = "kupovalo" ; VPres pl P3 => init cita + u ;
VPastPart (Masc _) Sg => cita + "o" ;
VPastPart Fem Sg => cita + "la" ;
VPastPart Neutr Sg => cita + "lo" ;
VPastPart (Masc _) Pl => cita + "li" ;
VPastPart Fem Pl => cita + "le" ;
VPastPart Neutr Pl => cita + "la"
} ; } ;
{-
--------------------------- ---------------------------
-- Pronouns -- Pronouns

View File

@@ -552,3 +552,34 @@ s . Neutr => Pl => Acc => niska
s . Neutr => Pl => Voc => niska s . Neutr => Pl => Voc => niska
s . Neutr => Pl => Loc => niskim s . Neutr => Pl => Loc => niskim
s . Neutr => Pl => Ins => niskim s . Neutr => Pl => Ins => niskim
VInf => čitati
VPres Sg P1 => čitam
VPres Sg P2 => čitaš
VPres Sg P3 => čita
VPres Pl P1 => čitamo
VPres Pl P2 => čitate
VPres Pl P3 => čitaju
VPastPart (Masc Anim) Sg => čitao
VPastPart (Masc Anim) Pl => čitali
VPastPart (Masc Inanim) Sg => čitao
VPastPart (Masc Inanim) Pl => čitali
VPastPart Fem Sg => čitala
VPastPart Fem Pl => čitale
VPastPart Neutr Sg => čitalo
VPastPart Neutr Pl => čitala
VInf => raditi
VPres Sg P1 => radim
VPres Sg P2 => radiš
VPres Sg P3 => radi
VPres Pl P1 => radimo
VPres Pl P2 => radite
VPres Pl P3 => rade
VPastPart (Masc Anim) Sg => radio
VPastPart (Masc Anim) Pl => radili
VPastPart (Masc Inanim) Sg => radio
VPastPart (Masc Inanim) Pl => radili
VPastPart Fem Sg => radila
VPastPart Fem Pl => radile
VPastPart Neutr Sg => radilo
VPastPart Neutr Pl => radila
aarnes-mbp-2:croatian aarne$

View File

@@ -28,3 +28,6 @@ cc -table -unqual adjFormsAdjective (velikA "mastan")
cc -table -unqual adjFormsAdjective (velikA "gladan") cc -table -unqual adjFormsAdjective (velikA "gladan")
cc -table -unqual adjFormsAdjective (velikA "nizak") cc -table -unqual adjFormsAdjective (velikA "nizak")
cc -table -unqual aeiVerbForms ("čitati")
cc -table -unqual aeiVerbForms ("raditi")

View File

@@ -8,7 +8,7 @@ MYLANG = 'Serbo-Croatian'
GENDERS = ['masculine', 'feminine', 'neuter'] GENDERS = ['masculine', 'feminine', 'neuter']
NOUN_CASES = { NOUN_FORMS = {
'singular': { 'singular': {
'nominative': 'snom', 'nominative': 'snom',
'genitive': 'sgen', 'genitive': 'sgen',
@@ -25,7 +25,7 @@ NOUN_CASES = {
} }
} }
ADJ_CASES = { ADJ_FORMS = {
'masculine': { 'masculine': {
'singular': { 'singular': {
'nominative': 'msnom', 'nominative': 'msnom',
@@ -54,6 +54,33 @@ ADJ_CASES = {
} }
} }
VERB_FORMS = {
'present': {
'singular': {
'first-person': 'pres_sg_1',
'second-person': 'pres_sg_2',
'third-person': 'pres_sg_3'
},
'plural': {
'first-person': 'pres_pl_1',
'second-person': 'pres_pl_2',
'third-person': 'pres_pl_3'
}
},
'participle': {
'singular': {
'masculine': 'ppart_masc_sg',
'feminine': 'ppart_fem_sg',
'neuter': 'ppart_neutr_sg'
},
'plural': {
'masculine': 'ppart_masc_pl',
'feminine': 'ppart_fem_pl',
'neuter': 'ppart_neutr_pl'
}
}
}
def get_forms(pos, forms): def get_forms(pos, forms):
@@ -64,23 +91,32 @@ def get_forms(pos, forms):
if g in f.get('tags', []): if g in f.get('tags', []):
dict['gender'] = g dict['gender'] = g
tags = f.get('tags', []) tags = f.get('tags', [])
for num in NOUN_CASES: for num in NOUN_FORMS:
if num in tags: if num in tags:
for case in NOUN_CASES[num]: for case in NOUN_FORMS[num]:
if case in tags: if case in tags:
dict[NOUN_CASES[num][case]] = f['form'] dict[NOUN_FORMS[num][case]] = f['form']
elif pos == 'adj': elif pos == 'adj':
print(forms)
for f in forms: for f in forms:
tags = f.get('tags', []) tags = f.get('tags', [])
if 'positive' in tags and 'indefinite' in tags: if 'positive' in tags and 'indefinite' in tags:
for g in ADJ_CASES: for g in ADJ_FORMS:
if g in tags: if g in tags:
for n in ADJ_CASES[g]: for n in ADJ_FORMS[g]:
if n in tags: if n in tags:
for c in ADJ_CASES[g][n]: for c in ADJ_FORMS[g][n]:
if c in tags: if c in tags:
dict[ADJ_CASES[g][n][c]] = f['form'] dict[ADJ_FORMS[g][n][c]] = f['form']
elif pos == 'verb':
for f in forms:
tags = f.get('tags', [])
for t in VERB_FORMS:
if t in tags:
for n in VERB_FORMS[t]:
if n in tags:
for g in VERB_FORMS[t][n]:
if g in tags:
dict[VERB_FORMS[t][n][g]] = f['form']
else: else:
dict['forms'] = forms[:10] #### dict['forms'] = forms[:10] ####