mirror of
https://github.com/GrammaticalFramework/gf-rgl.git
synced 2026-05-27 17:08:54 -06:00
Hrv: generating morpholex from wiktionary, in progress
This commit is contained in:
@@ -210,10 +210,22 @@ def translations(mylang, reflang, lines):
|
|||||||
})+'\n')
|
})+'\n')
|
||||||
|
|
||||||
# write GF lexical entry
|
# write GF lexical entry
|
||||||
def print_gf_code(data):
|
def print_gf_code(data, i):
|
||||||
|
|
||||||
def prrec(fs, lemma):
|
lemma = list(data.keys())[0]
|
||||||
if fs.get('status') == 'NOFORMS':
|
if any([c in cyrillic for c in lemma]):
|
||||||
|
return
|
||||||
|
|
||||||
|
cats = {
|
||||||
|
'name': ('PN', 7),
|
||||||
|
'noun': ('N', 11),
|
||||||
|
'adj': ('A', 12),
|
||||||
|
'verb': ('V', 12)
|
||||||
|
}
|
||||||
|
pos = data[lemma]['pos']
|
||||||
|
|
||||||
|
def prrec(fs, lemma, expected):
|
||||||
|
if fs.get('status') == 'NOFORMS' or len(fs) != expected:
|
||||||
return '"' + lemma + '"'
|
return '"' + lemma + '"'
|
||||||
else:
|
else:
|
||||||
s = '{'
|
s = '{'
|
||||||
@@ -221,18 +233,13 @@ def print_gf_code(data):
|
|||||||
s += f + ' = ' + '"' + str(fs[f]) + '"' + ' ; '
|
s += f + ' = ' + '"' + str(fs[f]) + '"' + ' ; '
|
||||||
return s[:-3] + '}' # removing last ;
|
return s[:-3] + '}' # removing last ;
|
||||||
|
|
||||||
cats = {'noun': 'N', 'adv': 'Adv', 'adj': 'A', 'verb': 'V'}
|
if pos in cats:
|
||||||
|
cat, expected = cats[pos]
|
||||||
lemma = list(data.keys())[0]
|
fun = "'" + lemma + '_' + str(i) + '_' + cat + "'"
|
||||||
|
if len(data[lemma]['forms']) == expected:
|
||||||
if any([c in cyrillic for c in lemma]):
|
print(' '.join(['fun', fun, ':', cat, ';']))
|
||||||
return
|
print(' '.join(['lin', fun, '=',
|
||||||
|
'mk'+cat, prrec(data[lemma]['forms'], lemma, expected),';']))
|
||||||
if data[lemma]['pos'] in cats:
|
|
||||||
cat = cats[data[lemma]['pos']]
|
|
||||||
fun = lemma + '_' + cat
|
|
||||||
print(' '.join(['fun', fun, ':', cat, ';']))
|
|
||||||
print(' '.join(['lin', fun, '=', 'mk'+cat, prrec(data[lemma]['forms'], lemma),';']))
|
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -248,9 +255,9 @@ def main():
|
|||||||
|
|
||||||
if mode == 'gf':
|
if mode == 'gf':
|
||||||
with open(MORPHO_FINAL_FILE, "r", encoding="utf-8") as lines:
|
with open(MORPHO_FINAL_FILE, "r", encoding="utf-8") as lines:
|
||||||
for line in lines:
|
for line, i in zip(lines, range(100000)):
|
||||||
data = json.loads(line)
|
data = json.loads(line)
|
||||||
print_gf_code(data)
|
print_gf_code(data, i)
|
||||||
|
|
||||||
with open(WIKTIONARY_FILE, "r", encoding="utf-8") as lines:
|
with open(WIKTIONARY_FILE, "r", encoding="utf-8") as lines:
|
||||||
if mode == 'trans':
|
if mode == 'trans':
|
||||||
|
|||||||
Reference in New Issue
Block a user