mirror of
https://github.com/GrammaticalFramework/gf-rgl.git
synced 2026-05-27 17:08:54 -06:00
GF abstract dict generation
This commit is contained in:
@@ -99,6 +99,7 @@ def forms_for_pos(obj):
|
|||||||
if all([w in descr for w in ['construct', 'nominative', 'singular']])][:1]
|
if all([w in descr for w in ['construct', 'nominative', 'singular']])][:1]
|
||||||
return {
|
return {
|
||||||
'gf_fun': gf_fun(lemma[0], 'N') if lemma else None,
|
'gf_fun': gf_fun(lemma[0], 'N') if lemma else None,
|
||||||
|
'gf_cat': 'N',
|
||||||
'singular': lemma,
|
'singular': lemma,
|
||||||
'plural': [form[:-1] for form, descr in forms
|
'plural': [form[:-1] for form, descr in forms
|
||||||
if all([w in descr for w in ['construct', 'nominative', 'plural']])][:1],
|
if all([w in descr for w in ['construct', 'nominative', 'plural']])][:1],
|
||||||
@@ -112,6 +113,7 @@ def forms_for_pos(obj):
|
|||||||
w in ["active", "indicative", "masculine", "past", "perfective", "singular", "third-person"]])][:1]
|
w in ["active", "indicative", "masculine", "past", "perfective", "singular", "third-person"]])][:1]
|
||||||
return {
|
return {
|
||||||
'gf_fun': gf_fun(lemma[0], 'V') if lemma else None,
|
'gf_fun': gf_fun(lemma[0], 'V') if lemma else None,
|
||||||
|
'gf_cat': 'V',
|
||||||
'perfect': lemma,
|
'perfect': lemma,
|
||||||
'imperfect': [form for form, descr in forms
|
'imperfect': [form for form, descr in forms
|
||||||
if all([w in descr for
|
if all([w in descr for
|
||||||
@@ -125,6 +127,7 @@ def forms_for_pos(obj):
|
|||||||
if all([w in descr for w in ['indefinite', 'masculine', 'singular', 'informal']])][:1]
|
if all([w in descr for w in ['indefinite', 'masculine', 'singular', 'informal']])][:1]
|
||||||
return {
|
return {
|
||||||
'gf_fun': gf_fun(lemma[0], 'A') if lemma else None,
|
'gf_fun': gf_fun(lemma[0], 'A') if lemma else None,
|
||||||
|
'gf_cat': 'A',
|
||||||
'masc_singular': lemma,
|
'masc_singular': lemma,
|
||||||
'masc_plural': [form for form, descr in forms
|
'masc_plural': [form for form, descr in forms
|
||||||
if all([w in descr for w in ['indefinite', 'masculine', 'plural', 'informal']])][:1],
|
if all([w in descr for w in ['indefinite', 'masculine', 'plural', 'informal']])][:1],
|
||||||
@@ -142,9 +145,14 @@ def forms_for_pos(obj):
|
|||||||
def find_root(s):
|
def find_root(s):
|
||||||
return ''.join([c for c in s if is_arabic(c)])
|
return ''.join([c for c in s if is_arabic(c)])
|
||||||
|
|
||||||
|
import sys
|
||||||
|
MODE = sys.argv[1]
|
||||||
|
|
||||||
|
if MODE == 'gf':
|
||||||
|
print('abstract MorphoDictAraAbs = Cat ** {')
|
||||||
|
|
||||||
with open(FILTERED_WIKT) as file:
|
with open(FILTERED_WIKT) as file:
|
||||||
|
seen_gf_funs = set()
|
||||||
for line in file:
|
for line in file:
|
||||||
obj = json.loads(line)
|
obj = json.loads(line)
|
||||||
if 'Arabic lemmas' in obj.get('categories', []):
|
if 'Arabic lemmas' in obj.get('categories', []):
|
||||||
@@ -157,23 +165,17 @@ with open(FILTERED_WIKT) as file:
|
|||||||
}
|
}
|
||||||
# entry['n_forms'] = len(entry['forms'])
|
# entry['n_forms'] = len(entry['forms'])
|
||||||
# print(entry['pos'], entry['n_forms'])
|
# print(entry['pos'], entry['n_forms'])
|
||||||
|
if MODE == 'json':
|
||||||
print(json.dumps(entry, ensure_ascii=False))
|
print(json.dumps(entry, ensure_ascii=False))
|
||||||
|
|
||||||
|
if MODE == 'gf':
|
||||||
|
|
||||||
"""
|
if 'gf_fun' in entry['forms'] and entry['forms']['gf_fun']:
|
||||||
"senses": [
|
if entry['forms']['gf_fun'] not in seen_gf_funs:
|
||||||
{"examples": [
|
print('fun', entry['forms']['gf_fun'], ':', entry['forms']['gf_cat'], ';', '--', entry['senses'])
|
||||||
{"text": "10th century, Al-Mutanabbi\nذُو الْعَقْلِ يَشْقَى فِي النَّعِيمِ بِعَقْلِهِ / وَأَخُو الْجَهَالَةِ فِي الشَّقَاوَةِ يَنْعَمُ\nḏū l-ʕaqli yašqā fī an-naʕīmi biʕaqlihi / waʔaḵū l-jahālati fī š-šaqāwati yanʕamu", "english": "(please add an English translation of this quotation)", "type": "quotation"}],
|
seen_gf_funs.add(entry['forms']['gf_fun'])
|
||||||
"links": [
|
|
||||||
["bliss", "bliss#English"], ["delight", "delight#English"]],
|
# to do: rename duplicate function names: of 13762 names, 12946 are unique
|
||||||
"categories": ["Arabic terms with quotations", "Requests for translations of Arabic quotations"],
|
|
||||||
"glosses": ["bliss, delight"]
|
if MODE == 'gf':
|
||||||
},
|
print('}')
|
||||||
{"links": [
|
|
||||||
["heaven", "heaven"], ["Heaven", "Heaven"], ["paradise", "paradise"], ["Paradise", "Paradise"]],
|
|
||||||
"synonyms": [{"word": "فِرْدَوس"}, {"word": "جَنَّة"}],
|
|
||||||
"antonyms": [{"word": "سَعِير"}, {"word": "لَظَىٰ"}, {"word": "النَّار"}, {"word": "جَهَنَّم"}, {"word": "جَحِيم"}, {"word": "حُطَمَة"}, {"word": "سَقَر"}, {"word": "هَاوِيَة"}],
|
|
||||||
"raw_glosses": ["(figurative) heaven, the Heaven, paradise, the Paradise"],
|
|
||||||
"glosses": ["heaven, the Heaven, paradise, the Paradise"],
|
|
||||||
"tags": ["figuratively"]}]
|
|
||||||
"""
|
|
||||||
|
|||||||
Reference in New Issue
Block a user