diff --git a/src/croatian/ResHrv.gf b/src/croatian/ResHrv.gf index 132350c2c..f5bf316c6 100644 --- a/src/croatian/ResHrv.gf +++ b/src/croatian/ResHrv.gf @@ -17,6 +17,13 @@ param Person = P1 | P2 | P3 ; + VForm = + VInf + | VPres Number Person + | VPastPart Gender Number + ; + ---- TODO aorist, imperfect + Agr = Ag Gender Number Person ; CTense = CTPres | CTPast ; ----- TODO complete the tense system to match BCS verb morphology @@ -414,30 +421,20 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> { pgen = velk + "ih" ; } ; -{- --------------------- -- Verbs --- https://en.wikipedia.org/wiki/Slovak_language#Verbs +-- Wiki - VerbForms : Type = { ---- TODO more forms to add ? - inf, - pressg1, pressg2, pressg3, - prespl1, prespl2, prespl3, - pastpmasc, pastpfem, pastpneutr : Str - } ; + VerbForms : Type = VForm => Str ; ComplementCase : Type = {s : Str ; c : Case ; hasPrep : Bool} ; - verbAgr : VerbForms -> Agr -> Bool -> Str ---- TODO tenses - = \vf,a,b -> case a of { - Ag _ Sg P1 => vf.pressg1 ; - Ag _ Sg P2 => vf.pressg2 ; - Ag _ Sg P3 => vf.pressg3 ; - Ag _ Pl P1 => vf.prespl1 ; - Ag _ Pl P2 => vf.prespl2 ; - Ag _ Pl P3 => vf.prespl3 + verbAgr : VerbForms -> Agr -> CTense -> Str ---- TODO tenses + = \vf,a,b -> case of { + => vf ! VPres n p ; + => vf ! VPastPart g n } ; - +{- copulaVerbForms : VerbForms = { inf = "byť" ; pressg1 = "som" ; @@ -463,29 +460,36 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> { pastpfem = "mala" ; pastpneutr = "malo" ; } ; +-} -- just an example of a traditional paradigm ---- TODO other traditional paradigms - iii_kupovatVerbForms : Str -> VerbForms = \kupovat -> + aeiVerbForms : Str -> VerbForms = \citati -> let - kupo = Predef.tk 3 kupovat ; - kupu = Predef.tk 1 kupo + "u" - in - { - inf = kupovat ; - pressg1 = kupu + "jem" ; - pressg2 = kupu + "ješ" ; - pressg3 = kupu + "je" ; - prespl1 = kupu + "jeme" ; - prespl2 = kupu + "jete" ; - prespl3 = kupu + "jú" ; - pastpmasc = "kupoval" ; - pastpfem = "kupovala" ; - pastpneutr = "kupovalo" ; + cita = Predef.tk 2 citati ; + u = case last cita of { + "a" => "aju" ; + "e" => "u" ; + "i" => "e" + } ; + in table { + VInf => citati ; + VPres Sg P1 => cita + "m" ; + VPres Sg P2 => cita + "š" ; + VPres Sg P3 => cita ; + VPres Pl P1 => cita + "mo" ; + VPres Pl P2 => cita + "te" ; + VPres pl P3 => init cita + u ; + VPastPart (Masc _) Sg => cita + "o" ; + VPastPart Fem Sg => cita + "la" ; + VPastPart Neutr Sg => cita + "lo" ; + VPastPart (Masc _) Pl => cita + "li" ; + VPastPart Fem Pl => cita + "le" ; + VPastPart Neutr Pl => cita + "la" } ; - +{- --------------------------- -- Pronouns diff --git a/src/croatian/gold-test.txt b/src/croatian/gold-test.txt index e3293e861..d5ff47eed 100644 --- a/src/croatian/gold-test.txt +++ b/src/croatian/gold-test.txt @@ -552,3 +552,34 @@ s . Neutr => Pl => Acc => niska s . Neutr => Pl => Voc => niska s . Neutr => Pl => Loc => niskim s . Neutr => Pl => Ins => niskim +VInf => čitati +VPres Sg P1 => čitam +VPres Sg P2 => čitaš +VPres Sg P3 => čita +VPres Pl P1 => čitamo +VPres Pl P2 => čitate +VPres Pl P3 => čitaju +VPastPart (Masc Anim) Sg => čitao +VPastPart (Masc Anim) Pl => čitali +VPastPart (Masc Inanim) Sg => čitao +VPastPart (Masc Inanim) Pl => čitali +VPastPart Fem Sg => čitala +VPastPart Fem Pl => čitale +VPastPart Neutr Sg => čitalo +VPastPart Neutr Pl => čitala +VInf => raditi +VPres Sg P1 => radim +VPres Sg P2 => radiš +VPres Sg P3 => radi +VPres Pl P1 => radimo +VPres Pl P2 => radite +VPres Pl P3 => rade +VPastPart (Masc Anim) Sg => radio +VPastPart (Masc Anim) Pl => radili +VPastPart (Masc Inanim) Sg => radio +VPastPart (Masc Inanim) Pl => radili +VPastPart Fem Sg => radila +VPastPart Fem Pl => radile +VPastPart Neutr Sg => radilo +VPastPart Neutr Pl => radila +aarnes-mbp-2:croatian aarne$ diff --git a/src/croatian/testHrv.gfs b/src/croatian/testHrv.gfs index 50ed82723..a80ee37c0 100644 --- a/src/croatian/testHrv.gfs +++ b/src/croatian/testHrv.gfs @@ -28,3 +28,6 @@ cc -table -unqual adjFormsAdjective (velikA "mastan") cc -table -unqual adjFormsAdjective (velikA "gladan") cc -table -unqual adjFormsAdjective (velikA "nizak") +cc -table -unqual aeiVerbForms ("čitati") +cc -table -unqual aeiVerbForms ("raditi") + diff --git a/src/croatian/wiktionary/extract.py b/src/croatian/wiktionary/extract.py index 0abe4cecc..92e5f795f 100644 --- a/src/croatian/wiktionary/extract.py +++ b/src/croatian/wiktionary/extract.py @@ -8,7 +8,7 @@ MYLANG = 'Serbo-Croatian' GENDERS = ['masculine', 'feminine', 'neuter'] -NOUN_CASES = { +NOUN_FORMS = { 'singular': { 'nominative': 'snom', 'genitive': 'sgen', @@ -25,7 +25,7 @@ NOUN_CASES = { } } -ADJ_CASES = { +ADJ_FORMS = { 'masculine': { 'singular': { 'nominative': 'msnom', @@ -54,6 +54,33 @@ ADJ_CASES = { } } +VERB_FORMS = { + 'present': { + 'singular': { + 'first-person': 'pres_sg_1', + 'second-person': 'pres_sg_2', + 'third-person': 'pres_sg_3' + }, + 'plural': { + 'first-person': 'pres_pl_1', + 'second-person': 'pres_pl_2', + 'third-person': 'pres_pl_3' + } + }, + 'participle': { + 'singular': { + 'masculine': 'ppart_masc_sg', + 'feminine': 'ppart_fem_sg', + 'neuter': 'ppart_neutr_sg' + }, + 'plural': { + 'masculine': 'ppart_masc_pl', + 'feminine': 'ppart_fem_pl', + 'neuter': 'ppart_neutr_pl' + } + } + } + def get_forms(pos, forms): @@ -64,24 +91,33 @@ def get_forms(pos, forms): if g in f.get('tags', []): dict['gender'] = g tags = f.get('tags', []) - for num in NOUN_CASES: + for num in NOUN_FORMS: if num in tags: - for case in NOUN_CASES[num]: + for case in NOUN_FORMS[num]: if case in tags: - dict[NOUN_CASES[num][case]] = f['form'] + dict[NOUN_FORMS[num][case]] = f['form'] elif pos == 'adj': - print(forms) for f in forms: tags = f.get('tags', []) if 'positive' in tags and 'indefinite' in tags: - for g in ADJ_CASES: + for g in ADJ_FORMS: if g in tags: - for n in ADJ_CASES[g]: + for n in ADJ_FORMS[g]: if n in tags: - for c in ADJ_CASES[g][n]: + for c in ADJ_FORMS[g][n]: if c in tags: - dict[ADJ_CASES[g][n][c]] = f['form'] - + dict[ADJ_FORMS[g][n][c]] = f['form'] + elif pos == 'verb': + for f in forms: + tags = f.get('tags', []) + for t in VERB_FORMS: + if t in tags: + for n in VERB_FORMS[t]: + if n in tags: + for g in VERB_FORMS[t][n]: + if g in tags: + dict[VERB_FORMS[t][n][g]] = f['form'] + else: dict['forms'] = forms[:10] #### return dict