forked from GitHub/gf-rgl
first version of MorphoDictHrv extracted from wiktionary; TODO better use of PN and V forms
This commit is contained in:
@@ -30,7 +30,7 @@ lin
|
|||||||
|
|
||||||
DefArt = {s = \\_,_,_ => []} ;
|
DefArt = {s = \\_,_,_ => []} ;
|
||||||
IndefArt = {s = \\_,_,_ => []} ;
|
IndefArt = {s = \\_,_,_ => []} ;
|
||||||
NumPl = {s = \\_,_ => [] ; size = NS_20_} ; ---- size
|
NumPl = {s = \\_,_ => [] ; size = NS_2_4} ; ---- size
|
||||||
NumSg = {s = \\_,_ => [] ; size = NS_1} ;
|
NumSg = {s = \\_,_ => [] ; size = NS_1} ;
|
||||||
|
|
||||||
UsePron pron = {
|
UsePron pron = {
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ oper
|
|||||||
= Masc Anim ;
|
= Masc Anim ;
|
||||||
mascInanimate : Gender
|
mascInanimate : Gender
|
||||||
= Masc Inanim ;
|
= Masc Inanim ;
|
||||||
|
masculine : Gender
|
||||||
|
= Masc Inanim ;
|
||||||
feminine : Gender
|
feminine : Gender
|
||||||
= Fem ;
|
= Fem ;
|
||||||
neuter : Gender
|
neuter : Gender
|
||||||
@@ -132,12 +134,27 @@ oper
|
|||||||
compar = velikA comp ;
|
compar = velikA comp ;
|
||||||
superl = superlAForms (velikA comp)
|
superl = superlAForms (velikA comp)
|
||||||
} ;
|
} ;
|
||||||
|
mkA : (posit : AForms) -> (compar : Str) -> A
|
||||||
|
= \posit,compar -> lin A {
|
||||||
|
posit = posit ;
|
||||||
|
compar = velikA compar ;
|
||||||
|
superl = superlAForms (velikA compar)
|
||||||
|
} ;
|
||||||
mkA : (posit, compar : AForms) -> A
|
mkA : (posit, compar : AForms) -> A
|
||||||
= \posit,compar -> lin A {
|
= \posit,compar -> lin A {
|
||||||
posit = posit ;
|
posit = posit ;
|
||||||
compar = compar ;
|
compar = compar ;
|
||||||
superl = superlAForms compar
|
superl = superlAForms compar
|
||||||
} ;
|
} ;
|
||||||
|
mkA : (posit : AForms) -> A
|
||||||
|
= \posit ->
|
||||||
|
let
|
||||||
|
compar = regComparAForms posit
|
||||||
|
in lin A {
|
||||||
|
posit = posit ;
|
||||||
|
compar = compar ;
|
||||||
|
superl = superlAForms compar
|
||||||
|
} ;
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
invarA : Str -> A
|
invarA : Str -> A
|
||||||
|
|||||||
@@ -338,12 +338,12 @@ voicing : Str -> Str = \s -> case s of {
|
|||||||
msins : Str ; -- nsins, pdat, ploc, pins = msins
|
msins : Str ; -- nsins, pdat, ploc, pins = msins
|
||||||
fsins : Str ; -- no o/e variation like in msdat
|
fsins : Str ; -- no o/e variation like in msdat
|
||||||
mpnom : Str ; -- mpvoc = mpnom
|
mpnom : Str ; -- mpvoc = mpnom
|
||||||
pgen : Str ; --
|
mpgen : Str ; --
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
invarAdjForms : Str -> AdjForms = \s -> {
|
invarAdjForms : Str -> AdjForms = \s -> {
|
||||||
msnom, fsnom, nsnom, msgen, fsgen, msdat,
|
msnom, fsnom, nsnom, msgen, fsgen, msdat,
|
||||||
fsdat, fsacc, msins, fsins, mpnom, pgen = s ;
|
fsdat, fsacc, msins, fsins, mpnom, mpgen = s ;
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
-- used in PositA but will also work in Compar and Superl by calling their record fields
|
-- used in PositA but will also work in Compar and Superl by calling their record fields
|
||||||
@@ -368,7 +368,7 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
|
|||||||
| <Pl,Dat|Loc|Ins, _> => afs.msins ;
|
| <Pl,Dat|Loc|Ins, _> => afs.msins ;
|
||||||
<Sg, Ins, Fem> => afs.fsins ;
|
<Sg, Ins, Fem> => afs.fsins ;
|
||||||
<Pl, Nom|Voc, Masc _> => afs.mpnom ;
|
<Pl, Nom|Voc, Masc _> => afs.mpnom ;
|
||||||
<Pl, Gen,_> => afs.pgen
|
<Pl, Gen,_> => afs.mpgen
|
||||||
}
|
}
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
@@ -395,7 +395,7 @@ adjFormsAdjective : AdjForms -> Adjective = \afs -> {
|
|||||||
msins = velk + "im" ;
|
msins = velk + "im" ;
|
||||||
fsins = velk + "om" ;
|
fsins = velk + "om" ;
|
||||||
mpnom = velk + "i" ;
|
mpnom = velk + "i" ;
|
||||||
pgen = velk + "ih" ;
|
mpgen = velk + "ih" ;
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
regComparAForms : AdjForms -> AdjForms
|
regComparAForms : AdjForms -> AdjForms
|
||||||
|
|||||||
10965
src/croatian/wiktionary/MorphoDictHrv.gf
Normal file
10965
src/croatian/wiktionary/MorphoDictHrv.gf
Normal file
File diff suppressed because it is too large
Load Diff
10934
src/croatian/wiktionary/MorphoDictHrvAbs.gf
Normal file
10934
src/croatian/wiktionary/MorphoDictHrvAbs.gf
Normal file
File diff suppressed because it is too large
Load Diff
@@ -40,13 +40,11 @@ ADJ_FORMS = {
|
|||||||
'singular': {
|
'singular': {
|
||||||
'nominative': 'msnom',
|
'nominative': 'msnom',
|
||||||
'genitive': 'msgen',
|
'genitive': 'msgen',
|
||||||
'dative': 'msdat',
|
'dative': 'msdat'
|
||||||
'locative': 'msloc',
|
|
||||||
'instrumental': 'msins'
|
|
||||||
},
|
},
|
||||||
'plural': {
|
'plural': {
|
||||||
'nominative': 'mpnom',
|
'nominative': 'mpnom',
|
||||||
'genitive': 'pgen'
|
'genitive': 'mpgen'
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'feminine': {
|
'feminine': {
|
||||||
@@ -54,7 +52,8 @@ ADJ_FORMS = {
|
|||||||
'nominative': 'fsnom',
|
'nominative': 'fsnom',
|
||||||
'genitive': 'fsgen',
|
'genitive': 'fsgen',
|
||||||
'dative': 'fsdat',
|
'dative': 'fsdat',
|
||||||
'accusative': 'fsacc'
|
'accusative': 'fsacc',
|
||||||
|
'instrumental': 'fsins'
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'neuter': {
|
'neuter': {
|
||||||
@@ -122,7 +121,7 @@ def unaccent(word):
|
|||||||
|
|
||||||
cyrillic = 'ЀЈЉЊЋЍЏАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШабвгдежзиклмнопрстуфхцчшыѐђјљњћѝџӣӯ'
|
cyrillic = 'ЀЈЉЊЋЍЏАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШабвгдежзиклмнопрстуфхцчшыѐђјљњћѝџӣӯ'
|
||||||
|
|
||||||
def get_forms(pos, forms):
|
def get_forms(pos, forms, word):
|
||||||
dict = {}
|
dict = {}
|
||||||
if pos == 'noun':
|
if pos == 'noun':
|
||||||
for f in forms:
|
for f in forms:
|
||||||
@@ -157,6 +156,9 @@ def get_forms(pos, forms):
|
|||||||
for c in ADJ_FORMS[g][n]:
|
for c in ADJ_FORMS[g][n]:
|
||||||
if c in tags:
|
if c in tags:
|
||||||
dict[ADJ_FORMS[g][n][c]] = unaccent(f['form'])
|
dict[ADJ_FORMS[g][n][c]] = unaccent(f['form'])
|
||||||
|
elif all([t in tags for t in [
|
||||||
|
'comparative', 'masculine', 'singular', 'nominative']]):
|
||||||
|
dict['cmsnom'] = unaccent(f['form'])
|
||||||
elif pos == 'verb':
|
elif pos == 'verb':
|
||||||
for f in forms:
|
for f in forms:
|
||||||
tags = f.get('tags', [])
|
tags = f.get('tags', [])
|
||||||
@@ -167,6 +169,8 @@ def get_forms(pos, forms):
|
|||||||
for g in VERB_FORMS[t][n]:
|
for g in VERB_FORMS[t][n]:
|
||||||
if g in tags:
|
if g in tags:
|
||||||
dict[VERB_FORMS[t][n][g]] = unaccent(f['form'])
|
dict[VERB_FORMS[t][n][g]] = unaccent(f['form'])
|
||||||
|
if dict:
|
||||||
|
dict['infin'] = unaccent(word)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
dict['forms'] = forms[:10] ####
|
dict['forms'] = forms[:10] ####
|
||||||
@@ -178,7 +182,7 @@ def get_forms(pos, forms):
|
|||||||
|
|
||||||
def lexinfo(data):
|
def lexinfo(data):
|
||||||
return data['word'], {
|
return data['word'], {
|
||||||
'pos': data['pos'], 'forms': get_forms(data['pos'], data['forms'])}
|
'pos': data['pos'], 'forms': get_forms(data['pos'], data['forms'], data['word'])}
|
||||||
|
|
||||||
|
|
||||||
# write morphology of mylang in m.json
|
# write morphology of mylang in m.json
|
||||||
@@ -219,7 +223,7 @@ def print_gf_code(data, i):
|
|||||||
cats = {
|
cats = {
|
||||||
'name': ('PN', 7),
|
'name': ('PN', 7),
|
||||||
'noun': ('N', 11),
|
'noun': ('N', 11),
|
||||||
'adj': ('A', 12),
|
'adj': ('A', 13),
|
||||||
'verb': ('V', 12)
|
'verb': ('V', 12)
|
||||||
}
|
}
|
||||||
pos = data[lemma]['pos']
|
pos = data[lemma]['pos']
|
||||||
@@ -230,7 +234,10 @@ def print_gf_code(data, i):
|
|||||||
else:
|
else:
|
||||||
s = '{'
|
s = '{'
|
||||||
for f in fs:
|
for f in fs:
|
||||||
s += f + ' = ' + '"' + str(fs[f]) + '"' + ' ; '
|
if f == 'gender':
|
||||||
|
s += f + ' = P.' + str(fs[f]) + ' ; '
|
||||||
|
else:
|
||||||
|
s += f + ' = ' + '"' + str(fs[f]) + '"' + ' ; '
|
||||||
return s[:-3] + '}' # removing last ;
|
return s[:-3] + '}' # removing last ;
|
||||||
|
|
||||||
if pos in cats:
|
if pos in cats:
|
||||||
|
|||||||
Reference in New Issue
Block a user