diff --git a/src/arabic/wiktionary/Makefile b/src/arabic/wiktionary/Makefile index a14e23e5..90dcf30a 100644 --- a/src/arabic/wiktionary/Makefile +++ b/src/arabic/wiktionary/Makefile @@ -4,5 +4,5 @@ all: python3 read_wiktionary.py gf-map >source_of_MorphoDictAra.jsonl gf -make MorphoDictAra.gf python3 read_wiktionary.py eval-funs >eval.jsonl - python3 to_wordnet.py >wordnet-arabic.jsonl + python3 to_wordnet.py >WordNetAra.gf python3 read_wiktionary.py error-analysis diff --git a/src/arabic/wiktionary/to_wordnet.py b/src/arabic/wiktionary/to_wordnet.py index 2aae047d..df82b20d 100644 --- a/src/arabic/wiktionary/to_wordnet.py +++ b/src/arabic/wiktionary/to_wordnet.py @@ -1,3 +1,4 @@ +import sys import csv import json @@ -6,7 +7,6 @@ from arabic_utilities import * # to run: python3 to_wordnet.py >arabic-wn-morpho.jsonl # the following are assumed - # from https://www.grammaticalframework.org/~krasimir/arabic.tsv.gz # WN_TSV = 'arabic.tsv' # Krasimir WN_TSV = 'ar2en_words_gf.csv' # Zarzoura @@ -31,6 +31,9 @@ with open(MORPHO_GF) as gffile: # abandon_1_V2 ParseAra ترك (1,1,1,3,322,3) with open(WN_TSV) as wnfile: + print('--# -path=.:../gf-wordnet') + print('concrete WordNetAra of WordNet = CatAra ** open MorphoDictAra, MoreAra, ParadigmsAra in {') + ## wnreader = csv.reader(wnfile, delimiter='\t') for row in wnfile: ## word = row[-1].strip() # does not show tha arabic, but the second-last word @@ -38,7 +41,19 @@ with open(WN_TSV) as wnfile: wnfun = row.split()[-1] # 0 in Krasimir cat = [c for c in wnfun if c.isalpha()][-1] # the last letter; the dict only contains N, A, V funs = funmap.get((word, cat), []) - result = {'wnfun': wnfun, 'sought': word, 'found': funs} - print(json.dumps(result, ensure_ascii=False)) + mk = 'mkV2 ' if wnfun.endswith('V2') else '' + results = [' '.join(['lin', wnfun, '=', mk + fs['fun'], ';', '--', str(fs['sense'])]) + for fs in funs] + if results: + print(results[0]) + for r in results[1:]: + print('--', r) + else: + if (cat := wnfun[-2:]) in ['_A', '_N', '_V']: + lin = 'mk' + cat[-1] + ' "' + word + '"' + else: + lin = 'variants {}' + print(' '.join(['lin', wnfun, '=', lin, ';', '---', 'guess from', word])) + print('}')