forked from GitHub/gf-rgl
started Hrv verbs and their Wiktionary extraction
This commit is contained in:
@@ -17,6 +17,13 @@ param
|
||||
|
||||
Person = P1 | P2 | P3 ;
|
||||
|
||||
VForm =
|
||||
VInf
|
||||
| VPres Number Person
|
||||
| VPastPart Gender Number
|
||||
;
|
||||
---- TODO aorist, imperfect
|
||||
|
||||
Agr = Ag Gender Number Person ;
|
||||
|
||||
CTense = CTPres | CTPast ; ----- TODO complete the tense system to match BCS verb morphology
|
||||
@@ -414,30 +421,20 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
|
||||
pgen = velk + "ih" ;
|
||||
} ;
|
||||
|
||||
{-
|
||||
---------------------
|
||||
-- Verbs
|
||||
-- https://en.wikipedia.org/wiki/Slovak_language#Verbs
|
||||
-- Wiki
|
||||
|
||||
VerbForms : Type = { ---- TODO more forms to add ?
|
||||
inf,
|
||||
pressg1, pressg2, pressg3,
|
||||
prespl1, prespl2, prespl3,
|
||||
pastpmasc, pastpfem, pastpneutr : Str
|
||||
} ;
|
||||
VerbForms : Type = VForm => Str ;
|
||||
|
||||
ComplementCase : Type = {s : Str ; c : Case ; hasPrep : Bool} ;
|
||||
|
||||
verbAgr : VerbForms -> Agr -> Bool -> Str ---- TODO tenses
|
||||
= \vf,a,b -> case a of {
|
||||
Ag _ Sg P1 => vf.pressg1 ;
|
||||
Ag _ Sg P2 => vf.pressg2 ;
|
||||
Ag _ Sg P3 => vf.pressg3 ;
|
||||
Ag _ Pl P1 => vf.prespl1 ;
|
||||
Ag _ Pl P2 => vf.prespl2 ;
|
||||
Ag _ Pl P3 => vf.prespl3
|
||||
verbAgr : VerbForms -> Agr -> CTense -> Str ---- TODO tenses
|
||||
= \vf,a,b -> case <a,b> of {
|
||||
<Ag _ n p, CTPres> => vf ! VPres n p ;
|
||||
<Ag g n _, CTPast> => vf ! VPastPart g n
|
||||
} ;
|
||||
|
||||
{-
|
||||
copulaVerbForms : VerbForms = {
|
||||
inf = "byť" ;
|
||||
pressg1 = "som" ;
|
||||
@@ -463,29 +460,36 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
|
||||
pastpfem = "mala" ;
|
||||
pastpneutr = "malo" ;
|
||||
} ;
|
||||
-}
|
||||
|
||||
-- just an example of a traditional paradigm
|
||||
---- TODO other traditional paradigms
|
||||
|
||||
iii_kupovatVerbForms : Str -> VerbForms = \kupovat ->
|
||||
aeiVerbForms : Str -> VerbForms = \citati ->
|
||||
let
|
||||
kupo = Predef.tk 3 kupovat ;
|
||||
kupu = Predef.tk 1 kupo + "u"
|
||||
in
|
||||
{
|
||||
inf = kupovat ;
|
||||
pressg1 = kupu + "jem" ;
|
||||
pressg2 = kupu + "ješ" ;
|
||||
pressg3 = kupu + "je" ;
|
||||
prespl1 = kupu + "jeme" ;
|
||||
prespl2 = kupu + "jete" ;
|
||||
prespl3 = kupu + "jú" ;
|
||||
pastpmasc = "kupoval" ;
|
||||
pastpfem = "kupovala" ;
|
||||
pastpneutr = "kupovalo" ;
|
||||
cita = Predef.tk 2 citati ;
|
||||
u = case last cita of {
|
||||
"a" => "aju" ;
|
||||
"e" => "u" ;
|
||||
"i" => "e"
|
||||
} ;
|
||||
in table {
|
||||
VInf => citati ;
|
||||
VPres Sg P1 => cita + "m" ;
|
||||
VPres Sg P2 => cita + "š" ;
|
||||
VPres Sg P3 => cita ;
|
||||
VPres Pl P1 => cita + "mo" ;
|
||||
VPres Pl P2 => cita + "te" ;
|
||||
VPres pl P3 => init cita + u ;
|
||||
VPastPart (Masc _) Sg => cita + "o" ;
|
||||
VPastPart Fem Sg => cita + "la" ;
|
||||
VPastPart Neutr Sg => cita + "lo" ;
|
||||
VPastPart (Masc _) Pl => cita + "li" ;
|
||||
VPastPart Fem Pl => cita + "le" ;
|
||||
VPastPart Neutr Pl => cita + "la"
|
||||
} ;
|
||||
|
||||
|
||||
{-
|
||||
---------------------------
|
||||
-- Pronouns
|
||||
|
||||
|
||||
@@ -552,3 +552,34 @@ s . Neutr => Pl => Acc => niska
|
||||
s . Neutr => Pl => Voc => niska
|
||||
s . Neutr => Pl => Loc => niskim
|
||||
s . Neutr => Pl => Ins => niskim
|
||||
VInf => čitati
|
||||
VPres Sg P1 => čitam
|
||||
VPres Sg P2 => čitaš
|
||||
VPres Sg P3 => čita
|
||||
VPres Pl P1 => čitamo
|
||||
VPres Pl P2 => čitate
|
||||
VPres Pl P3 => čitaju
|
||||
VPastPart (Masc Anim) Sg => čitao
|
||||
VPastPart (Masc Anim) Pl => čitali
|
||||
VPastPart (Masc Inanim) Sg => čitao
|
||||
VPastPart (Masc Inanim) Pl => čitali
|
||||
VPastPart Fem Sg => čitala
|
||||
VPastPart Fem Pl => čitale
|
||||
VPastPart Neutr Sg => čitalo
|
||||
VPastPart Neutr Pl => čitala
|
||||
VInf => raditi
|
||||
VPres Sg P1 => radim
|
||||
VPres Sg P2 => radiš
|
||||
VPres Sg P3 => radi
|
||||
VPres Pl P1 => radimo
|
||||
VPres Pl P2 => radite
|
||||
VPres Pl P3 => rade
|
||||
VPastPart (Masc Anim) Sg => radio
|
||||
VPastPart (Masc Anim) Pl => radili
|
||||
VPastPart (Masc Inanim) Sg => radio
|
||||
VPastPart (Masc Inanim) Pl => radili
|
||||
VPastPart Fem Sg => radila
|
||||
VPastPart Fem Pl => radile
|
||||
VPastPart Neutr Sg => radilo
|
||||
VPastPart Neutr Pl => radila
|
||||
aarnes-mbp-2:croatian aarne$
|
||||
|
||||
@@ -28,3 +28,6 @@ cc -table -unqual adjFormsAdjective (velikA "mastan")
|
||||
cc -table -unqual adjFormsAdjective (velikA "gladan")
|
||||
cc -table -unqual adjFormsAdjective (velikA "nizak")
|
||||
|
||||
cc -table -unqual aeiVerbForms ("čitati")
|
||||
cc -table -unqual aeiVerbForms ("raditi")
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ MYLANG = 'Serbo-Croatian'
|
||||
|
||||
GENDERS = ['masculine', 'feminine', 'neuter']
|
||||
|
||||
NOUN_CASES = {
|
||||
NOUN_FORMS = {
|
||||
'singular': {
|
||||
'nominative': 'snom',
|
||||
'genitive': 'sgen',
|
||||
@@ -25,7 +25,7 @@ NOUN_CASES = {
|
||||
}
|
||||
}
|
||||
|
||||
ADJ_CASES = {
|
||||
ADJ_FORMS = {
|
||||
'masculine': {
|
||||
'singular': {
|
||||
'nominative': 'msnom',
|
||||
@@ -54,6 +54,33 @@ ADJ_CASES = {
|
||||
}
|
||||
}
|
||||
|
||||
VERB_FORMS = {
|
||||
'present': {
|
||||
'singular': {
|
||||
'first-person': 'pres_sg_1',
|
||||
'second-person': 'pres_sg_2',
|
||||
'third-person': 'pres_sg_3'
|
||||
},
|
||||
'plural': {
|
||||
'first-person': 'pres_pl_1',
|
||||
'second-person': 'pres_pl_2',
|
||||
'third-person': 'pres_pl_3'
|
||||
}
|
||||
},
|
||||
'participle': {
|
||||
'singular': {
|
||||
'masculine': 'ppart_masc_sg',
|
||||
'feminine': 'ppart_fem_sg',
|
||||
'neuter': 'ppart_neutr_sg'
|
||||
},
|
||||
'plural': {
|
||||
'masculine': 'ppart_masc_pl',
|
||||
'feminine': 'ppart_fem_pl',
|
||||
'neuter': 'ppart_neutr_pl'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
def get_forms(pos, forms):
|
||||
@@ -64,24 +91,33 @@ def get_forms(pos, forms):
|
||||
if g in f.get('tags', []):
|
||||
dict['gender'] = g
|
||||
tags = f.get('tags', [])
|
||||
for num in NOUN_CASES:
|
||||
for num in NOUN_FORMS:
|
||||
if num in tags:
|
||||
for case in NOUN_CASES[num]:
|
||||
for case in NOUN_FORMS[num]:
|
||||
if case in tags:
|
||||
dict[NOUN_CASES[num][case]] = f['form']
|
||||
dict[NOUN_FORMS[num][case]] = f['form']
|
||||
elif pos == 'adj':
|
||||
print(forms)
|
||||
for f in forms:
|
||||
tags = f.get('tags', [])
|
||||
if 'positive' in tags and 'indefinite' in tags:
|
||||
for g in ADJ_CASES:
|
||||
for g in ADJ_FORMS:
|
||||
if g in tags:
|
||||
for n in ADJ_CASES[g]:
|
||||
for n in ADJ_FORMS[g]:
|
||||
if n in tags:
|
||||
for c in ADJ_CASES[g][n]:
|
||||
for c in ADJ_FORMS[g][n]:
|
||||
if c in tags:
|
||||
dict[ADJ_CASES[g][n][c]] = f['form']
|
||||
|
||||
dict[ADJ_FORMS[g][n][c]] = f['form']
|
||||
elif pos == 'verb':
|
||||
for f in forms:
|
||||
tags = f.get('tags', [])
|
||||
for t in VERB_FORMS:
|
||||
if t in tags:
|
||||
for n in VERB_FORMS[t]:
|
||||
if n in tags:
|
||||
for g in VERB_FORMS[t][n]:
|
||||
if g in tags:
|
||||
dict[VERB_FORMS[t][n][g]] = f['form']
|
||||
|
||||
else:
|
||||
dict['forms'] = forms[:10] ####
|
||||
return dict
|
||||
|
||||
Reference in New Issue
Block a user