adjective sound changes and extraction from wiktionary

This commit is contained in:
aarneranta
2022-09-23 15:54:43 +02:00
parent ae2b3bec29
commit 3eac1b9d0c
4 changed files with 248 additions and 109 deletions

View File

@@ -49,6 +49,15 @@ palatalize : Str -> Str = \s -> case s of {
_ => s
} ;
voicing : Str -> Str = \s -> case s of {
x + "b" => x + "p" ;
x + "d" => x + "t" ;
x + "đ" => x + "ć" ;
x + "z" => x + "s" ;
x + "dž" => x + "č" ;
x + "ž" => x + "š" ;
_ => s
} ;
---------------
-- Nouns
---------------
@@ -380,117 +389,32 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
-}
velikA : Str -> AdjForms = \velik ->
{
let
velk : Str = case velik of {
vel + "stan" => vel + "sn" ;
vel + "ao" => vel + "l" ;
vel + "ak" => voicing vel + "k" ;
vel + "a" + k@? => vel + k ;
_ => velik
}
in {
msnom = velik ;
fsnom = velik + "a" ;
nsnom = velik + "o" ;
msgen = velik + "og" ;
fsgen = velik + "e" ;
msdat = velik + "omu" ;
fsdat = velik + "oj" ;
fsacc = velik + "u" ;
msloc = velik + "om" ;
msins = velik + "im" ;
mpnom = velik + "i" ;
pgen = velik + "ih" ;
fsnom = velk + "a" ;
nsnom = ifSoft velik
(velk + "e")
(velk + "o") ;
msgen = velk + "og" ;
fsgen = velk + "e" ;
msdat = velk + "omu" ;
fsdat = velk + "oj" ;
fsacc = velk + "u" ;
msloc = velk + "om" ;
msins = velk + "im" ;
mpnom = velk + "i" ;
pgen = velk + "ih" ;
} ;
{-
-- if the penultimate has accent, e.g. krásny, the last accent disappears
krasnyA : Str -> AdjForms = \krasny ->
let
krasn = init krasny ;
in peknyA krasny ** {
msnom = krasn + "y" ;
fsnom = krasn + "a" ;
nsnom = krasn + "e" ;
msgen = krasn + "eho" ;
msdat = krasn + "emu" ;
fsacc = krasn + "u" ;
msins = krasn + "ym" ;
ampnom = krasn + "i" ;
pgen = krasn + "ych" ;
pins = krasn + "ymi" ;
} ;
-- soft consonant + i
cudziA : Str -> AdjForms = \cudzi ->
let
cudz = init cudzi ;
pcudz = palatal cudz ;
in {
msnom = pcudz + "í" ;
fsnom = pcudz + "ia" ;
nsnom = pcudz + "ie" ;
msgen = pcudz + "ieho" ;
fsgen = pcudz + "ej" ;
msdat = pcudz + "iemu" ;
fsacc = pcudz + "iu" ;
msloc = cudz + "om" ;
msins = pcudz + "ím" ;
fsins = cudz + "ou" ;
ampnom = pcudz + "í" ;
pgen = pcudz + "ích" ;
pins = pcudz + "ími" ;
} ;
-- accented vowel + soft consonant + i
rydziA : Str -> AdjForms = \rydzi ->
let
rydz = init rydzi ;
prydz = palatal rydz ;
in peknyA rydzi ** {
msnom = prydz + "i" ;
fsnom = rydz + "a" ;
nsnom = prydz + "e" ;
msgen = prydz + "eho" ;
msdat = prydz + "emu" ;
fsacc = rydz + "u" ;
msins = prydz + "im" ;
ampnom = prydz + "i" ;
pgen = prydz + "ich" ;
pins = prydz + "imi" ;
} ;
-- masculine possession: the same endings as in feminine
otcovA : Str -> AdjForms = \otcov ->
{
msnom = otcov ;
fsnom = otcov + "a" ;
nsnom = otcov + "o" ;
msgen = otcov + "ho" ;
fsgen = otcov + "ej" ;
msdat = otcov + "mu" ;
fsacc = otcov + "u" ;
msloc = otcov + "om" ;
msins = otcov + "ým" ;
fsins = otcov + "ou" ;
ampnom = otcov + "i" ;
pgen = otcov + "ých" ;
pins = otcov + "ými" ;
} ;
paviA : Str -> AdjForms = \pavi ->
let
pav = init pavi ;
in {
msnom = pav + "í" ;
fsnom = pav + "ia" ;
nsnom = pav + "ie" ;
msgen = pav + "ieho" ;
fsgen = pav + "ej" ;
msdat = pav + "iemu" ;
fsacc = pav + "iu" ;
msloc = pav + "om" ;
msins = pav + "ím" ;
fsins = pav + "ou" ;
ampnom = pav + "í" ; ----
pgen = pav + "ich" ; ----
pins = pav + "imi" ; ----
} ;
{-
---------------------
-- Verbs
-- https://en.wikipedia.org/wiki/Slovak_language#Verbs

View File

@@ -384,3 +384,171 @@ s . Neutr => Pl => Acc => velika
s . Neutr => Pl => Voc => velika
s . Neutr => Pl => Loc => velikim
s . Neutr => Pl => Ins => velikim
s . Masc Anim => Sg => Nom => mastan
s . Masc Anim => Sg => Gen => masnog
s . Masc Anim => Sg => Dat => masnomu
s . Masc Anim => Sg => Acc => masnog
s . Masc Anim => Sg => Voc => mastan
s . Masc Anim => Sg => Loc => masnom
s . Masc Anim => Sg => Ins => masnim
s . Masc Anim => Pl => Nom => masni
s . Masc Anim => Pl => Gen => masnih
s . Masc Anim => Pl => Dat => masnim
s . Masc Anim => Pl => Acc => masne
s . Masc Anim => Pl => Voc => masni
s . Masc Anim => Pl => Loc => masnim
s . Masc Anim => Pl => Ins => masnim
s . Masc Inanim => Sg => Nom => mastan
s . Masc Inanim => Sg => Gen => masnog
s . Masc Inanim => Sg => Dat => masnomu
s . Masc Inanim => Sg => Acc => mastan
s . Masc Inanim => Sg => Voc => mastan
s . Masc Inanim => Sg => Loc => masnom
s . Masc Inanim => Sg => Ins => masnim
s . Masc Inanim => Pl => Nom => masni
s . Masc Inanim => Pl => Gen => masnih
s . Masc Inanim => Pl => Dat => masnim
s . Masc Inanim => Pl => Acc => masne
s . Masc Inanim => Pl => Voc => masni
s . Masc Inanim => Pl => Loc => masnim
s . Masc Inanim => Pl => Ins => masnim
s . Fem => Sg => Nom => masna
s . Fem => Sg => Gen => masne
s . Fem => Sg => Dat => masnoj
s . Fem => Sg => Acc => masnu
s . Fem => Sg => Voc => masna
s . Fem => Sg => Loc => masnoj
s . Fem => Sg => Ins => masnom
s . Fem => Pl => Nom => masne
s . Fem => Pl => Gen => masnih
s . Fem => Pl => Dat => masnim
s . Fem => Pl => Acc => masne
s . Fem => Pl => Voc => masne
s . Fem => Pl => Loc => masnim
s . Fem => Pl => Ins => masnim
s . Neutr => Sg => Nom => masno
s . Neutr => Sg => Gen => masnog
s . Neutr => Sg => Dat => masnomu
s . Neutr => Sg => Acc => masno
s . Neutr => Sg => Voc => masno
s . Neutr => Sg => Loc => masnom
s . Neutr => Sg => Ins => masnim
s . Neutr => Pl => Nom => masna
s . Neutr => Pl => Gen => masnih
s . Neutr => Pl => Dat => masnim
s . Neutr => Pl => Acc => masna
s . Neutr => Pl => Voc => masna
s . Neutr => Pl => Loc => masnim
s . Neutr => Pl => Ins => masnim
s . Masc Anim => Sg => Nom => gladan
s . Masc Anim => Sg => Gen => gladnog
s . Masc Anim => Sg => Dat => gladnomu
s . Masc Anim => Sg => Acc => gladnog
s . Masc Anim => Sg => Voc => gladan
s . Masc Anim => Sg => Loc => gladnom
s . Masc Anim => Sg => Ins => gladnim
s . Masc Anim => Pl => Nom => gladni
s . Masc Anim => Pl => Gen => gladnih
s . Masc Anim => Pl => Dat => gladnim
s . Masc Anim => Pl => Acc => gladne
s . Masc Anim => Pl => Voc => gladni
s . Masc Anim => Pl => Loc => gladnim
s . Masc Anim => Pl => Ins => gladnim
s . Masc Inanim => Sg => Nom => gladan
s . Masc Inanim => Sg => Gen => gladnog
s . Masc Inanim => Sg => Dat => gladnomu
s . Masc Inanim => Sg => Acc => gladan
s . Masc Inanim => Sg => Voc => gladan
s . Masc Inanim => Sg => Loc => gladnom
s . Masc Inanim => Sg => Ins => gladnim
s . Masc Inanim => Pl => Nom => gladni
s . Masc Inanim => Pl => Gen => gladnih
s . Masc Inanim => Pl => Dat => gladnim
s . Masc Inanim => Pl => Acc => gladne
s . Masc Inanim => Pl => Voc => gladni
s . Masc Inanim => Pl => Loc => gladnim
s . Masc Inanim => Pl => Ins => gladnim
s . Fem => Sg => Nom => gladna
s . Fem => Sg => Gen => gladne
s . Fem => Sg => Dat => gladnoj
s . Fem => Sg => Acc => gladnu
s . Fem => Sg => Voc => gladna
s . Fem => Sg => Loc => gladnoj
s . Fem => Sg => Ins => gladnom
s . Fem => Pl => Nom => gladne
s . Fem => Pl => Gen => gladnih
s . Fem => Pl => Dat => gladnim
s . Fem => Pl => Acc => gladne
s . Fem => Pl => Voc => gladne
s . Fem => Pl => Loc => gladnim
s . Fem => Pl => Ins => gladnim
s . Neutr => Sg => Nom => gladno
s . Neutr => Sg => Gen => gladnog
s . Neutr => Sg => Dat => gladnomu
s . Neutr => Sg => Acc => gladno
s . Neutr => Sg => Voc => gladno
s . Neutr => Sg => Loc => gladnom
s . Neutr => Sg => Ins => gladnim
s . Neutr => Pl => Nom => gladna
s . Neutr => Pl => Gen => gladnih
s . Neutr => Pl => Dat => gladnim
s . Neutr => Pl => Acc => gladna
s . Neutr => Pl => Voc => gladna
s . Neutr => Pl => Loc => gladnim
s . Neutr => Pl => Ins => gladnim
s . Masc Anim => Sg => Nom => nizak
s . Masc Anim => Sg => Gen => niskog
s . Masc Anim => Sg => Dat => niskomu
s . Masc Anim => Sg => Acc => niskog
s . Masc Anim => Sg => Voc => nizak
s . Masc Anim => Sg => Loc => niskom
s . Masc Anim => Sg => Ins => niskim
s . Masc Anim => Pl => Nom => niski
s . Masc Anim => Pl => Gen => niskih
s . Masc Anim => Pl => Dat => niskim
s . Masc Anim => Pl => Acc => niske
s . Masc Anim => Pl => Voc => niski
s . Masc Anim => Pl => Loc => niskim
s . Masc Anim => Pl => Ins => niskim
s . Masc Inanim => Sg => Nom => nizak
s . Masc Inanim => Sg => Gen => niskog
s . Masc Inanim => Sg => Dat => niskomu
s . Masc Inanim => Sg => Acc => nizak
s . Masc Inanim => Sg => Voc => nizak
s . Masc Inanim => Sg => Loc => niskom
s . Masc Inanim => Sg => Ins => niskim
s . Masc Inanim => Pl => Nom => niski
s . Masc Inanim => Pl => Gen => niskih
s . Masc Inanim => Pl => Dat => niskim
s . Masc Inanim => Pl => Acc => niske
s . Masc Inanim => Pl => Voc => niski
s . Masc Inanim => Pl => Loc => niskim
s . Masc Inanim => Pl => Ins => niskim
s . Fem => Sg => Nom => niska
s . Fem => Sg => Gen => niske
s . Fem => Sg => Dat => niskoj
s . Fem => Sg => Acc => nisku
s . Fem => Sg => Voc => niska
s . Fem => Sg => Loc => niskoj
s . Fem => Sg => Ins => niskom
s . Fem => Pl => Nom => niske
s . Fem => Pl => Gen => niskih
s . Fem => Pl => Dat => niskim
s . Fem => Pl => Acc => niske
s . Fem => Pl => Voc => niske
s . Fem => Pl => Loc => niskim
s . Fem => Pl => Ins => niskim
s . Neutr => Sg => Nom => nisko
s . Neutr => Sg => Gen => niskog
s . Neutr => Sg => Dat => niskomu
s . Neutr => Sg => Acc => nisko
s . Neutr => Sg => Voc => nisko
s . Neutr => Sg => Loc => niskom
s . Neutr => Sg => Ins => niskim
s . Neutr => Pl => Nom => niska
s . Neutr => Pl => Gen => niskih
s . Neutr => Pl => Dat => niskim
s . Neutr => Pl => Acc => niska
s . Neutr => Pl => Voc => niska
s . Neutr => Pl => Loc => niskim
s . Neutr => Pl => Ins => niskim

View File

@@ -24,4 +24,7 @@ cc -table -unqual nounFormsNoun (poljeN "polje") neuter
cc -table -unqual nounFormsNoun (zenaN "žena") feminine
cc -table -unqual adjFormsAdjective (velikA "velik")
cc -table -unqual adjFormsAdjective (velikA "mastan")
cc -table -unqual adjFormsAdjective (velikA "gladan")
cc -table -unqual adjFormsAdjective (velikA "nizak")

View File

@@ -2,7 +2,7 @@ import json
# https://kaikki.org/dictionary/rawdata.html
FILE = 'raw-wiktextract-data.json'
FILE = 'data/raw-wiktextract-data.json'
MYLANG = 'Serbo-Croatian'
@@ -25,6 +25,35 @@ NOUN_CASES = {
}
}
ADJ_CASES = {
'masculine': {
'singular': {
'nominative': 'msnom',
'genitive': 'msgen',
'dative': 'msdat',
'locative': 'msloc',
'instrumental': 'msins'
},
'plural': {
'nominative': 'mpnom',
'genitive': 'pgen'
}
},
'feminine': {
'singular': {
'nominative': 'fsnom',
'genitive': 'fsgen',
'dative': 'fsdat',
'accusative': 'fsacc'
}
},
'neuter': {
'singular': {
'nominative': 'nsnom'
}
}
}
def get_forms(pos, forms):
@@ -40,6 +69,21 @@ def get_forms(pos, forms):
for case in NOUN_CASES[num]:
if case in tags:
dict[NOUN_CASES[num][case]] = f['form']
elif pos == 'adj':
print(forms)
for f in forms:
tags = f.get('tags', [])
if 'positive' in tags and 'indefinite' in tags:
for g in ADJ_CASES:
if g in tags:
for n in ADJ_CASES[g]:
if n in tags:
for c in ADJ_CASES[g][n]:
if c in tags:
dict[ADJ_CASES[g][n][c]] = f['form']
else:
dict['forms'] = forms[:10] ####
return dict