forked from GitHub/gf-rgl
first version of MorphoDictHrv extracted from wiktionary; TODO better use of PN and V forms
This commit is contained in:
@@ -30,7 +30,7 @@ lin
|
||||
|
||||
DefArt = {s = \\_,_,_ => []} ;
|
||||
IndefArt = {s = \\_,_,_ => []} ;
|
||||
NumPl = {s = \\_,_ => [] ; size = NS_20_} ; ---- size
|
||||
NumPl = {s = \\_,_ => [] ; size = NS_2_4} ; ---- size
|
||||
NumSg = {s = \\_,_ => [] ; size = NS_1} ;
|
||||
|
||||
UsePron pron = {
|
||||
|
||||
@@ -13,6 +13,8 @@ oper
|
||||
= Masc Anim ;
|
||||
mascInanimate : Gender
|
||||
= Masc Inanim ;
|
||||
masculine : Gender
|
||||
= Masc Inanim ;
|
||||
feminine : Gender
|
||||
= Fem ;
|
||||
neuter : Gender
|
||||
@@ -132,12 +134,27 @@ oper
|
||||
compar = velikA comp ;
|
||||
superl = superlAForms (velikA comp)
|
||||
} ;
|
||||
mkA : (posit : AForms) -> (compar : Str) -> A
|
||||
= \posit,compar -> lin A {
|
||||
posit = posit ;
|
||||
compar = velikA compar ;
|
||||
superl = superlAForms (velikA compar)
|
||||
} ;
|
||||
mkA : (posit, compar : AForms) -> A
|
||||
= \posit,compar -> lin A {
|
||||
posit = posit ;
|
||||
compar = compar ;
|
||||
superl = superlAForms compar
|
||||
} ;
|
||||
mkA : (posit : AForms) -> A
|
||||
= \posit ->
|
||||
let
|
||||
compar = regComparAForms posit
|
||||
in lin A {
|
||||
posit = posit ;
|
||||
compar = compar ;
|
||||
superl = superlAForms compar
|
||||
} ;
|
||||
} ;
|
||||
|
||||
invarA : Str -> A
|
||||
|
||||
@@ -338,12 +338,12 @@ voicing : Str -> Str = \s -> case s of {
|
||||
msins : Str ; -- nsins, pdat, ploc, pins = msins
|
||||
fsins : Str ; -- no o/e variation like in msdat
|
||||
mpnom : Str ; -- mpvoc = mpnom
|
||||
pgen : Str ; --
|
||||
mpgen : Str ; --
|
||||
} ;
|
||||
|
||||
invarAdjForms : Str -> AdjForms = \s -> {
|
||||
msnom, fsnom, nsnom, msgen, fsgen, msdat,
|
||||
fsdat, fsacc, msins, fsins, mpnom, pgen = s ;
|
||||
fsdat, fsacc, msins, fsins, mpnom, mpgen = s ;
|
||||
} ;
|
||||
|
||||
-- used in PositA but will also work in Compar and Superl by calling their record fields
|
||||
@@ -368,7 +368,7 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
|
||||
| <Pl,Dat|Loc|Ins, _> => afs.msins ;
|
||||
<Sg, Ins, Fem> => afs.fsins ;
|
||||
<Pl, Nom|Voc, Masc _> => afs.mpnom ;
|
||||
<Pl, Gen,_> => afs.pgen
|
||||
<Pl, Gen,_> => afs.mpgen
|
||||
}
|
||||
} ;
|
||||
|
||||
@@ -395,7 +395,7 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
|
||||
msins = velk + "im" ;
|
||||
fsins = velk + "om" ;
|
||||
mpnom = velk + "i" ;
|
||||
pgen = velk + "ih" ;
|
||||
mpgen = velk + "ih" ;
|
||||
} ;
|
||||
|
||||
regComparAForms : AdjForms -> AdjForms
|
||||
|
||||
10965
src/croatian/wiktionary/MorphoDictHrv.gf
Normal file
10965
src/croatian/wiktionary/MorphoDictHrv.gf
Normal file
File diff suppressed because it is too large
Load Diff
10934
src/croatian/wiktionary/MorphoDictHrvAbs.gf
Normal file
10934
src/croatian/wiktionary/MorphoDictHrvAbs.gf
Normal file
File diff suppressed because it is too large
Load Diff
@@ -40,13 +40,11 @@ ADJ_FORMS = {
|
||||
'singular': {
|
||||
'nominative': 'msnom',
|
||||
'genitive': 'msgen',
|
||||
'dative': 'msdat',
|
||||
'locative': 'msloc',
|
||||
'instrumental': 'msins'
|
||||
'dative': 'msdat'
|
||||
},
|
||||
'plural': {
|
||||
'nominative': 'mpnom',
|
||||
'genitive': 'pgen'
|
||||
'genitive': 'mpgen'
|
||||
}
|
||||
},
|
||||
'feminine': {
|
||||
@@ -54,7 +52,8 @@ ADJ_FORMS = {
|
||||
'nominative': 'fsnom',
|
||||
'genitive': 'fsgen',
|
||||
'dative': 'fsdat',
|
||||
'accusative': 'fsacc'
|
||||
'accusative': 'fsacc',
|
||||
'instrumental': 'fsins'
|
||||
}
|
||||
},
|
||||
'neuter': {
|
||||
@@ -122,7 +121,7 @@ def unaccent(word):
|
||||
|
||||
cyrillic = 'ЀЈЉЊЋЍЏАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШабвгдежзиклмнопрстуфхцчшыѐђјљњћѝџӣӯ'
|
||||
|
||||
def get_forms(pos, forms):
|
||||
def get_forms(pos, forms, word):
|
||||
dict = {}
|
||||
if pos == 'noun':
|
||||
for f in forms:
|
||||
@@ -157,6 +156,9 @@ def get_forms(pos, forms):
|
||||
for c in ADJ_FORMS[g][n]:
|
||||
if c in tags:
|
||||
dict[ADJ_FORMS[g][n][c]] = unaccent(f['form'])
|
||||
elif all([t in tags for t in [
|
||||
'comparative', 'masculine', 'singular', 'nominative']]):
|
||||
dict['cmsnom'] = unaccent(f['form'])
|
||||
elif pos == 'verb':
|
||||
for f in forms:
|
||||
tags = f.get('tags', [])
|
||||
@@ -167,6 +169,8 @@ def get_forms(pos, forms):
|
||||
for g in VERB_FORMS[t][n]:
|
||||
if g in tags:
|
||||
dict[VERB_FORMS[t][n][g]] = unaccent(f['form'])
|
||||
if dict:
|
||||
dict['infin'] = unaccent(word)
|
||||
|
||||
else:
|
||||
dict['forms'] = forms[:10] ####
|
||||
@@ -178,7 +182,7 @@ def get_forms(pos, forms):
|
||||
|
||||
def lexinfo(data):
|
||||
return data['word'], {
|
||||
'pos': data['pos'], 'forms': get_forms(data['pos'], data['forms'])}
|
||||
'pos': data['pos'], 'forms': get_forms(data['pos'], data['forms'], data['word'])}
|
||||
|
||||
|
||||
# write morphology of mylang in m.json
|
||||
@@ -219,7 +223,7 @@ def print_gf_code(data, i):
|
||||
cats = {
|
||||
'name': ('PN', 7),
|
||||
'noun': ('N', 11),
|
||||
'adj': ('A', 12),
|
||||
'adj': ('A', 13),
|
||||
'verb': ('V', 12)
|
||||
}
|
||||
pos = data[lemma]['pos']
|
||||
@@ -230,7 +234,10 @@ def print_gf_code(data, i):
|
||||
else:
|
||||
s = '{'
|
||||
for f in fs:
|
||||
s += f + ' = ' + '"' + str(fs[f]) + '"' + ' ; '
|
||||
if f == 'gender':
|
||||
s += f + ' = P.' + str(fs[f]) + ' ; '
|
||||
else:
|
||||
s += f + ' = ' + '"' + str(fs[f]) + '"' + ' ; '
|
||||
return s[:-3] + '}' # removing last ;
|
||||
|
||||
if pos in cats:
|
||||
|
||||
Reference in New Issue
Block a user