1
0
forked from GitHub/gf-rgl

first version of MorphoDictHrv extracted from wiktionary; TODO better use of PN and V forms

This commit is contained in:
Aarne Ranta
2022-10-12 10:11:12 +02:00
parent 8755f9da65
commit c20e9b6383
6 changed files with 21937 additions and 14 deletions

View File

@@ -30,7 +30,7 @@ lin
DefArt = {s = \\_,_,_ => []} ; DefArt = {s = \\_,_,_ => []} ;
IndefArt = {s = \\_,_,_ => []} ; IndefArt = {s = \\_,_,_ => []} ;
NumPl = {s = \\_,_ => [] ; size = NS_20_} ; ---- size NumPl = {s = \\_,_ => [] ; size = NS_2_4} ; ---- size
NumSg = {s = \\_,_ => [] ; size = NS_1} ; NumSg = {s = \\_,_ => [] ; size = NS_1} ;
UsePron pron = { UsePron pron = {

View File

@@ -13,6 +13,8 @@ oper
= Masc Anim ; = Masc Anim ;
mascInanimate : Gender mascInanimate : Gender
= Masc Inanim ; = Masc Inanim ;
masculine : Gender
= Masc Inanim ;
feminine : Gender feminine : Gender
= Fem ; = Fem ;
neuter : Gender neuter : Gender
@@ -132,12 +134,27 @@ oper
compar = velikA comp ; compar = velikA comp ;
superl = superlAForms (velikA comp) superl = superlAForms (velikA comp)
} ; } ;
mkA : (posit : AForms) -> (compar : Str) -> A
= \posit,compar -> lin A {
posit = posit ;
compar = velikA compar ;
superl = superlAForms (velikA compar)
} ;
mkA : (posit, compar : AForms) -> A mkA : (posit, compar : AForms) -> A
= \posit,compar -> lin A { = \posit,compar -> lin A {
posit = posit ; posit = posit ;
compar = compar ; compar = compar ;
superl = superlAForms compar superl = superlAForms compar
} ; } ;
mkA : (posit : AForms) -> A
= \posit ->
let
compar = regComparAForms posit
in lin A {
posit = posit ;
compar = compar ;
superl = superlAForms compar
} ;
} ; } ;
invarA : Str -> A invarA : Str -> A

View File

@@ -338,12 +338,12 @@ voicing : Str -> Str = \s -> case s of {
msins : Str ; -- nsins, pdat, ploc, pins = msins msins : Str ; -- nsins, pdat, ploc, pins = msins
fsins : Str ; -- no o/e variation like in msdat fsins : Str ; -- no o/e variation like in msdat
mpnom : Str ; -- mpvoc = mpnom mpnom : Str ; -- mpvoc = mpnom
pgen : Str ; -- mpgen : Str ; --
} ; } ;
invarAdjForms : Str -> AdjForms = \s -> { invarAdjForms : Str -> AdjForms = \s -> {
msnom, fsnom, nsnom, msgen, fsgen, msdat, msnom, fsnom, nsnom, msgen, fsgen, msdat,
fsdat, fsacc, msins, fsins, mpnom, pgen = s ; fsdat, fsacc, msins, fsins, mpnom, mpgen = s ;
} ; } ;
-- used in PositA but will also work in Compar and Superl by calling their record fields -- used in PositA but will also work in Compar and Superl by calling their record fields
@@ -368,7 +368,7 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
| <Pl,Dat|Loc|Ins, _> => afs.msins ; | <Pl,Dat|Loc|Ins, _> => afs.msins ;
<Sg, Ins, Fem> => afs.fsins ; <Sg, Ins, Fem> => afs.fsins ;
<Pl, Nom|Voc, Masc _> => afs.mpnom ; <Pl, Nom|Voc, Masc _> => afs.mpnom ;
<Pl, Gen,_> => afs.pgen <Pl, Gen,_> => afs.mpgen
} }
} ; } ;
@@ -395,7 +395,7 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
msins = velk + "im" ; msins = velk + "im" ;
fsins = velk + "om" ; fsins = velk + "om" ;
mpnom = velk + "i" ; mpnom = velk + "i" ;
pgen = velk + "ih" ; mpgen = velk + "ih" ;
} ; } ;
regComparAForms : AdjForms -> AdjForms regComparAForms : AdjForms -> AdjForms

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -40,13 +40,11 @@ ADJ_FORMS = {
'singular': { 'singular': {
'nominative': 'msnom', 'nominative': 'msnom',
'genitive': 'msgen', 'genitive': 'msgen',
'dative': 'msdat', 'dative': 'msdat'
'locative': 'msloc',
'instrumental': 'msins'
}, },
'plural': { 'plural': {
'nominative': 'mpnom', 'nominative': 'mpnom',
'genitive': 'pgen' 'genitive': 'mpgen'
} }
}, },
'feminine': { 'feminine': {
@@ -54,7 +52,8 @@ ADJ_FORMS = {
'nominative': 'fsnom', 'nominative': 'fsnom',
'genitive': 'fsgen', 'genitive': 'fsgen',
'dative': 'fsdat', 'dative': 'fsdat',
'accusative': 'fsacc' 'accusative': 'fsacc',
'instrumental': 'fsins'
} }
}, },
'neuter': { 'neuter': {
@@ -122,7 +121,7 @@ def unaccent(word):
cyrillic = 'ЀЈЉЊЋЍЏАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШабвгдежзиклмнопрстуфхцчшыѐђјљњћѝџӣӯ' cyrillic = 'ЀЈЉЊЋЍЏАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШабвгдежзиклмнопрстуфхцчшыѐђјљњћѝџӣӯ'
def get_forms(pos, forms): def get_forms(pos, forms, word):
dict = {} dict = {}
if pos == 'noun': if pos == 'noun':
for f in forms: for f in forms:
@@ -157,6 +156,9 @@ def get_forms(pos, forms):
for c in ADJ_FORMS[g][n]: for c in ADJ_FORMS[g][n]:
if c in tags: if c in tags:
dict[ADJ_FORMS[g][n][c]] = unaccent(f['form']) dict[ADJ_FORMS[g][n][c]] = unaccent(f['form'])
elif all([t in tags for t in [
'comparative', 'masculine', 'singular', 'nominative']]):
dict['cmsnom'] = unaccent(f['form'])
elif pos == 'verb': elif pos == 'verb':
for f in forms: for f in forms:
tags = f.get('tags', []) tags = f.get('tags', [])
@@ -167,6 +169,8 @@ def get_forms(pos, forms):
for g in VERB_FORMS[t][n]: for g in VERB_FORMS[t][n]:
if g in tags: if g in tags:
dict[VERB_FORMS[t][n][g]] = unaccent(f['form']) dict[VERB_FORMS[t][n][g]] = unaccent(f['form'])
if dict:
dict['infin'] = unaccent(word)
else: else:
dict['forms'] = forms[:10] #### dict['forms'] = forms[:10] ####
@@ -178,7 +182,7 @@ def get_forms(pos, forms):
def lexinfo(data): def lexinfo(data):
return data['word'], { return data['word'], {
'pos': data['pos'], 'forms': get_forms(data['pos'], data['forms'])} 'pos': data['pos'], 'forms': get_forms(data['pos'], data['forms'], data['word'])}
# write morphology of mylang in m.json # write morphology of mylang in m.json
@@ -219,7 +223,7 @@ def print_gf_code(data, i):
cats = { cats = {
'name': ('PN', 7), 'name': ('PN', 7),
'noun': ('N', 11), 'noun': ('N', 11),
'adj': ('A', 12), 'adj': ('A', 13),
'verb': ('V', 12) 'verb': ('V', 12)
} }
pos = data[lemma]['pos'] pos = data[lemma]['pos']
@@ -230,7 +234,10 @@ def print_gf_code(data, i):
else: else:
s = '{' s = '{'
for f in fs: for f in fs:
s += f + ' = ' + '"' + str(fs[f]) + '"' + ' ; ' if f == 'gender':
s += f + ' = P.' + str(fs[f]) + ' ; '
else:
s += f + ' = ' + '"' + str(fs[f]) + '"' + ' ; '
return s[:-3] + '}' # removing last ; return s[:-3] + '}' # removing last ;
if pos in cats: if pos in cats: