mirror of
https://github.com/GrammaticalFramework/gf-rgl.git
synced 2026-05-27 08:58:55 -06:00
adjective sound changes and extraction from wiktionary
This commit is contained in:
@@ -49,6 +49,15 @@ palatalize : Str -> Str = \s -> case s of {
|
||||
_ => s
|
||||
} ;
|
||||
|
||||
voicing : Str -> Str = \s -> case s of {
|
||||
x + "b" => x + "p" ;
|
||||
x + "d" => x + "t" ;
|
||||
x + "đ" => x + "ć" ;
|
||||
x + "z" => x + "s" ;
|
||||
x + "dž" => x + "č" ;
|
||||
x + "ž" => x + "š" ;
|
||||
_ => s
|
||||
} ;
|
||||
---------------
|
||||
-- Nouns
|
||||
---------------
|
||||
@@ -380,117 +389,32 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
|
||||
-}
|
||||
|
||||
velikA : Str -> AdjForms = \velik ->
|
||||
{
|
||||
let
|
||||
velk : Str = case velik of {
|
||||
vel + "stan" => vel + "sn" ;
|
||||
vel + "ao" => vel + "l" ;
|
||||
vel + "ak" => voicing vel + "k" ;
|
||||
vel + "a" + k@? => vel + k ;
|
||||
_ => velik
|
||||
}
|
||||
in {
|
||||
msnom = velik ;
|
||||
fsnom = velik + "a" ;
|
||||
nsnom = velik + "o" ;
|
||||
msgen = velik + "og" ;
|
||||
fsgen = velik + "e" ;
|
||||
msdat = velik + "omu" ;
|
||||
fsdat = velik + "oj" ;
|
||||
fsacc = velik + "u" ;
|
||||
msloc = velik + "om" ;
|
||||
msins = velik + "im" ;
|
||||
mpnom = velik + "i" ;
|
||||
pgen = velik + "ih" ;
|
||||
fsnom = velk + "a" ;
|
||||
nsnom = ifSoft velik
|
||||
(velk + "e")
|
||||
(velk + "o") ;
|
||||
msgen = velk + "og" ;
|
||||
fsgen = velk + "e" ;
|
||||
msdat = velk + "omu" ;
|
||||
fsdat = velk + "oj" ;
|
||||
fsacc = velk + "u" ;
|
||||
msloc = velk + "om" ;
|
||||
msins = velk + "im" ;
|
||||
mpnom = velk + "i" ;
|
||||
pgen = velk + "ih" ;
|
||||
} ;
|
||||
|
||||
{-
|
||||
-- if the penultimate has accent, e.g. krásny, the last accent disappears
|
||||
krasnyA : Str -> AdjForms = \krasny ->
|
||||
let
|
||||
krasn = init krasny ;
|
||||
in peknyA krasny ** {
|
||||
msnom = krasn + "y" ;
|
||||
fsnom = krasn + "a" ;
|
||||
nsnom = krasn + "e" ;
|
||||
msgen = krasn + "eho" ;
|
||||
msdat = krasn + "emu" ;
|
||||
fsacc = krasn + "u" ;
|
||||
msins = krasn + "ym" ;
|
||||
ampnom = krasn + "i" ;
|
||||
pgen = krasn + "ych" ;
|
||||
pins = krasn + "ymi" ;
|
||||
} ;
|
||||
|
||||
-- soft consonant + i
|
||||
|
||||
cudziA : Str -> AdjForms = \cudzi ->
|
||||
let
|
||||
cudz = init cudzi ;
|
||||
pcudz = palatal cudz ;
|
||||
in {
|
||||
msnom = pcudz + "í" ;
|
||||
fsnom = pcudz + "ia" ;
|
||||
nsnom = pcudz + "ie" ;
|
||||
msgen = pcudz + "ieho" ;
|
||||
fsgen = pcudz + "ej" ;
|
||||
msdat = pcudz + "iemu" ;
|
||||
fsacc = pcudz + "iu" ;
|
||||
msloc = cudz + "om" ;
|
||||
msins = pcudz + "ím" ;
|
||||
fsins = cudz + "ou" ;
|
||||
ampnom = pcudz + "í" ;
|
||||
pgen = pcudz + "ích" ;
|
||||
pins = pcudz + "ími" ;
|
||||
} ;
|
||||
|
||||
-- accented vowel + soft consonant + i
|
||||
rydziA : Str -> AdjForms = \rydzi ->
|
||||
let
|
||||
rydz = init rydzi ;
|
||||
prydz = palatal rydz ;
|
||||
in peknyA rydzi ** {
|
||||
msnom = prydz + "i" ;
|
||||
fsnom = rydz + "a" ;
|
||||
nsnom = prydz + "e" ;
|
||||
msgen = prydz + "eho" ;
|
||||
msdat = prydz + "emu" ;
|
||||
fsacc = rydz + "u" ;
|
||||
msins = prydz + "im" ;
|
||||
ampnom = prydz + "i" ;
|
||||
pgen = prydz + "ich" ;
|
||||
pins = prydz + "imi" ;
|
||||
} ;
|
||||
|
||||
-- masculine possession: the same endings as in feminine
|
||||
|
||||
otcovA : Str -> AdjForms = \otcov ->
|
||||
{
|
||||
msnom = otcov ;
|
||||
fsnom = otcov + "a" ;
|
||||
nsnom = otcov + "o" ;
|
||||
msgen = otcov + "ho" ;
|
||||
fsgen = otcov + "ej" ;
|
||||
msdat = otcov + "mu" ;
|
||||
fsacc = otcov + "u" ;
|
||||
msloc = otcov + "om" ;
|
||||
msins = otcov + "ým" ;
|
||||
fsins = otcov + "ou" ;
|
||||
ampnom = otcov + "i" ;
|
||||
pgen = otcov + "ých" ;
|
||||
pins = otcov + "ými" ;
|
||||
} ;
|
||||
|
||||
paviA : Str -> AdjForms = \pavi ->
|
||||
let
|
||||
pav = init pavi ;
|
||||
in {
|
||||
msnom = pav + "í" ;
|
||||
fsnom = pav + "ia" ;
|
||||
nsnom = pav + "ie" ;
|
||||
msgen = pav + "ieho" ;
|
||||
fsgen = pav + "ej" ;
|
||||
msdat = pav + "iemu" ;
|
||||
fsacc = pav + "iu" ;
|
||||
msloc = pav + "om" ;
|
||||
msins = pav + "ím" ;
|
||||
fsins = pav + "ou" ;
|
||||
ampnom = pav + "í" ; ----
|
||||
pgen = pav + "ich" ; ----
|
||||
pins = pav + "imi" ; ----
|
||||
} ;
|
||||
|
||||
{-
|
||||
---------------------
|
||||
-- Verbs
|
||||
-- https://en.wikipedia.org/wiki/Slovak_language#Verbs
|
||||
|
||||
@@ -384,3 +384,171 @@ s . Neutr => Pl => Acc => velika
|
||||
s . Neutr => Pl => Voc => velika
|
||||
s . Neutr => Pl => Loc => velikim
|
||||
s . Neutr => Pl => Ins => velikim
|
||||
s . Masc Anim => Sg => Nom => mastan
|
||||
s . Masc Anim => Sg => Gen => masnog
|
||||
s . Masc Anim => Sg => Dat => masnomu
|
||||
s . Masc Anim => Sg => Acc => masnog
|
||||
s . Masc Anim => Sg => Voc => mastan
|
||||
s . Masc Anim => Sg => Loc => masnom
|
||||
s . Masc Anim => Sg => Ins => masnim
|
||||
s . Masc Anim => Pl => Nom => masni
|
||||
s . Masc Anim => Pl => Gen => masnih
|
||||
s . Masc Anim => Pl => Dat => masnim
|
||||
s . Masc Anim => Pl => Acc => masne
|
||||
s . Masc Anim => Pl => Voc => masni
|
||||
s . Masc Anim => Pl => Loc => masnim
|
||||
s . Masc Anim => Pl => Ins => masnim
|
||||
s . Masc Inanim => Sg => Nom => mastan
|
||||
s . Masc Inanim => Sg => Gen => masnog
|
||||
s . Masc Inanim => Sg => Dat => masnomu
|
||||
s . Masc Inanim => Sg => Acc => mastan
|
||||
s . Masc Inanim => Sg => Voc => mastan
|
||||
s . Masc Inanim => Sg => Loc => masnom
|
||||
s . Masc Inanim => Sg => Ins => masnim
|
||||
s . Masc Inanim => Pl => Nom => masni
|
||||
s . Masc Inanim => Pl => Gen => masnih
|
||||
s . Masc Inanim => Pl => Dat => masnim
|
||||
s . Masc Inanim => Pl => Acc => masne
|
||||
s . Masc Inanim => Pl => Voc => masni
|
||||
s . Masc Inanim => Pl => Loc => masnim
|
||||
s . Masc Inanim => Pl => Ins => masnim
|
||||
s . Fem => Sg => Nom => masna
|
||||
s . Fem => Sg => Gen => masne
|
||||
s . Fem => Sg => Dat => masnoj
|
||||
s . Fem => Sg => Acc => masnu
|
||||
s . Fem => Sg => Voc => masna
|
||||
s . Fem => Sg => Loc => masnoj
|
||||
s . Fem => Sg => Ins => masnom
|
||||
s . Fem => Pl => Nom => masne
|
||||
s . Fem => Pl => Gen => masnih
|
||||
s . Fem => Pl => Dat => masnim
|
||||
s . Fem => Pl => Acc => masne
|
||||
s . Fem => Pl => Voc => masne
|
||||
s . Fem => Pl => Loc => masnim
|
||||
s . Fem => Pl => Ins => masnim
|
||||
s . Neutr => Sg => Nom => masno
|
||||
s . Neutr => Sg => Gen => masnog
|
||||
s . Neutr => Sg => Dat => masnomu
|
||||
s . Neutr => Sg => Acc => masno
|
||||
s . Neutr => Sg => Voc => masno
|
||||
s . Neutr => Sg => Loc => masnom
|
||||
s . Neutr => Sg => Ins => masnim
|
||||
s . Neutr => Pl => Nom => masna
|
||||
s . Neutr => Pl => Gen => masnih
|
||||
s . Neutr => Pl => Dat => masnim
|
||||
s . Neutr => Pl => Acc => masna
|
||||
s . Neutr => Pl => Voc => masna
|
||||
s . Neutr => Pl => Loc => masnim
|
||||
s . Neutr => Pl => Ins => masnim
|
||||
s . Masc Anim => Sg => Nom => gladan
|
||||
s . Masc Anim => Sg => Gen => gladnog
|
||||
s . Masc Anim => Sg => Dat => gladnomu
|
||||
s . Masc Anim => Sg => Acc => gladnog
|
||||
s . Masc Anim => Sg => Voc => gladan
|
||||
s . Masc Anim => Sg => Loc => gladnom
|
||||
s . Masc Anim => Sg => Ins => gladnim
|
||||
s . Masc Anim => Pl => Nom => gladni
|
||||
s . Masc Anim => Pl => Gen => gladnih
|
||||
s . Masc Anim => Pl => Dat => gladnim
|
||||
s . Masc Anim => Pl => Acc => gladne
|
||||
s . Masc Anim => Pl => Voc => gladni
|
||||
s . Masc Anim => Pl => Loc => gladnim
|
||||
s . Masc Anim => Pl => Ins => gladnim
|
||||
s . Masc Inanim => Sg => Nom => gladan
|
||||
s . Masc Inanim => Sg => Gen => gladnog
|
||||
s . Masc Inanim => Sg => Dat => gladnomu
|
||||
s . Masc Inanim => Sg => Acc => gladan
|
||||
s . Masc Inanim => Sg => Voc => gladan
|
||||
s . Masc Inanim => Sg => Loc => gladnom
|
||||
s . Masc Inanim => Sg => Ins => gladnim
|
||||
s . Masc Inanim => Pl => Nom => gladni
|
||||
s . Masc Inanim => Pl => Gen => gladnih
|
||||
s . Masc Inanim => Pl => Dat => gladnim
|
||||
s . Masc Inanim => Pl => Acc => gladne
|
||||
s . Masc Inanim => Pl => Voc => gladni
|
||||
s . Masc Inanim => Pl => Loc => gladnim
|
||||
s . Masc Inanim => Pl => Ins => gladnim
|
||||
s . Fem => Sg => Nom => gladna
|
||||
s . Fem => Sg => Gen => gladne
|
||||
s . Fem => Sg => Dat => gladnoj
|
||||
s . Fem => Sg => Acc => gladnu
|
||||
s . Fem => Sg => Voc => gladna
|
||||
s . Fem => Sg => Loc => gladnoj
|
||||
s . Fem => Sg => Ins => gladnom
|
||||
s . Fem => Pl => Nom => gladne
|
||||
s . Fem => Pl => Gen => gladnih
|
||||
s . Fem => Pl => Dat => gladnim
|
||||
s . Fem => Pl => Acc => gladne
|
||||
s . Fem => Pl => Voc => gladne
|
||||
s . Fem => Pl => Loc => gladnim
|
||||
s . Fem => Pl => Ins => gladnim
|
||||
s . Neutr => Sg => Nom => gladno
|
||||
s . Neutr => Sg => Gen => gladnog
|
||||
s . Neutr => Sg => Dat => gladnomu
|
||||
s . Neutr => Sg => Acc => gladno
|
||||
s . Neutr => Sg => Voc => gladno
|
||||
s . Neutr => Sg => Loc => gladnom
|
||||
s . Neutr => Sg => Ins => gladnim
|
||||
s . Neutr => Pl => Nom => gladna
|
||||
s . Neutr => Pl => Gen => gladnih
|
||||
s . Neutr => Pl => Dat => gladnim
|
||||
s . Neutr => Pl => Acc => gladna
|
||||
s . Neutr => Pl => Voc => gladna
|
||||
s . Neutr => Pl => Loc => gladnim
|
||||
s . Neutr => Pl => Ins => gladnim
|
||||
s . Masc Anim => Sg => Nom => nizak
|
||||
s . Masc Anim => Sg => Gen => niskog
|
||||
s . Masc Anim => Sg => Dat => niskomu
|
||||
s . Masc Anim => Sg => Acc => niskog
|
||||
s . Masc Anim => Sg => Voc => nizak
|
||||
s . Masc Anim => Sg => Loc => niskom
|
||||
s . Masc Anim => Sg => Ins => niskim
|
||||
s . Masc Anim => Pl => Nom => niski
|
||||
s . Masc Anim => Pl => Gen => niskih
|
||||
s . Masc Anim => Pl => Dat => niskim
|
||||
s . Masc Anim => Pl => Acc => niske
|
||||
s . Masc Anim => Pl => Voc => niski
|
||||
s . Masc Anim => Pl => Loc => niskim
|
||||
s . Masc Anim => Pl => Ins => niskim
|
||||
s . Masc Inanim => Sg => Nom => nizak
|
||||
s . Masc Inanim => Sg => Gen => niskog
|
||||
s . Masc Inanim => Sg => Dat => niskomu
|
||||
s . Masc Inanim => Sg => Acc => nizak
|
||||
s . Masc Inanim => Sg => Voc => nizak
|
||||
s . Masc Inanim => Sg => Loc => niskom
|
||||
s . Masc Inanim => Sg => Ins => niskim
|
||||
s . Masc Inanim => Pl => Nom => niski
|
||||
s . Masc Inanim => Pl => Gen => niskih
|
||||
s . Masc Inanim => Pl => Dat => niskim
|
||||
s . Masc Inanim => Pl => Acc => niske
|
||||
s . Masc Inanim => Pl => Voc => niski
|
||||
s . Masc Inanim => Pl => Loc => niskim
|
||||
s . Masc Inanim => Pl => Ins => niskim
|
||||
s . Fem => Sg => Nom => niska
|
||||
s . Fem => Sg => Gen => niske
|
||||
s . Fem => Sg => Dat => niskoj
|
||||
s . Fem => Sg => Acc => nisku
|
||||
s . Fem => Sg => Voc => niska
|
||||
s . Fem => Sg => Loc => niskoj
|
||||
s . Fem => Sg => Ins => niskom
|
||||
s . Fem => Pl => Nom => niske
|
||||
s . Fem => Pl => Gen => niskih
|
||||
s . Fem => Pl => Dat => niskim
|
||||
s . Fem => Pl => Acc => niske
|
||||
s . Fem => Pl => Voc => niske
|
||||
s . Fem => Pl => Loc => niskim
|
||||
s . Fem => Pl => Ins => niskim
|
||||
s . Neutr => Sg => Nom => nisko
|
||||
s . Neutr => Sg => Gen => niskog
|
||||
s . Neutr => Sg => Dat => niskomu
|
||||
s . Neutr => Sg => Acc => nisko
|
||||
s . Neutr => Sg => Voc => nisko
|
||||
s . Neutr => Sg => Loc => niskom
|
||||
s . Neutr => Sg => Ins => niskim
|
||||
s . Neutr => Pl => Nom => niska
|
||||
s . Neutr => Pl => Gen => niskih
|
||||
s . Neutr => Pl => Dat => niskim
|
||||
s . Neutr => Pl => Acc => niska
|
||||
s . Neutr => Pl => Voc => niska
|
||||
s . Neutr => Pl => Loc => niskim
|
||||
s . Neutr => Pl => Ins => niskim
|
||||
|
||||
@@ -24,4 +24,7 @@ cc -table -unqual nounFormsNoun (poljeN "polje") neuter
|
||||
cc -table -unqual nounFormsNoun (zenaN "žena") feminine
|
||||
|
||||
cc -table -unqual adjFormsAdjective (velikA "velik")
|
||||
cc -table -unqual adjFormsAdjective (velikA "mastan")
|
||||
cc -table -unqual adjFormsAdjective (velikA "gladan")
|
||||
cc -table -unqual adjFormsAdjective (velikA "nizak")
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ import json
|
||||
|
||||
# https://kaikki.org/dictionary/rawdata.html
|
||||
|
||||
FILE = 'raw-wiktextract-data.json'
|
||||
FILE = 'data/raw-wiktextract-data.json'
|
||||
|
||||
MYLANG = 'Serbo-Croatian'
|
||||
|
||||
@@ -25,6 +25,35 @@ NOUN_CASES = {
|
||||
}
|
||||
}
|
||||
|
||||
ADJ_CASES = {
|
||||
'masculine': {
|
||||
'singular': {
|
||||
'nominative': 'msnom',
|
||||
'genitive': 'msgen',
|
||||
'dative': 'msdat',
|
||||
'locative': 'msloc',
|
||||
'instrumental': 'msins'
|
||||
},
|
||||
'plural': {
|
||||
'nominative': 'mpnom',
|
||||
'genitive': 'pgen'
|
||||
}
|
||||
},
|
||||
'feminine': {
|
||||
'singular': {
|
||||
'nominative': 'fsnom',
|
||||
'genitive': 'fsgen',
|
||||
'dative': 'fsdat',
|
||||
'accusative': 'fsacc'
|
||||
}
|
||||
},
|
||||
'neuter': {
|
||||
'singular': {
|
||||
'nominative': 'nsnom'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
def get_forms(pos, forms):
|
||||
@@ -40,6 +69,21 @@ def get_forms(pos, forms):
|
||||
for case in NOUN_CASES[num]:
|
||||
if case in tags:
|
||||
dict[NOUN_CASES[num][case]] = f['form']
|
||||
elif pos == 'adj':
|
||||
print(forms)
|
||||
for f in forms:
|
||||
tags = f.get('tags', [])
|
||||
if 'positive' in tags and 'indefinite' in tags:
|
||||
for g in ADJ_CASES:
|
||||
if g in tags:
|
||||
for n in ADJ_CASES[g]:
|
||||
if n in tags:
|
||||
for c in ADJ_CASES[g][n]:
|
||||
if c in tags:
|
||||
dict[ADJ_CASES[g][n][c]] = f['form']
|
||||
|
||||
else:
|
||||
dict['forms'] = forms[:10] ####
|
||||
return dict
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user