producing a compilable WordNetAra.gf, with a lot of junk

2026-05-28 01:18:57 -06:00 · 2023-09-28 16:18:23 +02:00
parent 5f4bb014b8
commit 67d1e24761
2 changed files with 19 additions and 4 deletions
--- a/src/arabic/wiktionary/Makefile
+++ b/src/arabic/wiktionary/Makefile
@@ -4,5 +4,5 @@ all:
 	python3 read_wiktionary.py gf-map >source_of_MorphoDictAra.jsonl 
 	gf -make MorphoDictAra.gf
 	python3 read_wiktionary.py eval-funs >eval.jsonl 
-	python3 to_wordnet.py >wordnet-arabic.jsonl
+	python3 to_wordnet.py >WordNetAra.gf
 	python3 read_wiktionary.py error-analysis
--- a/src/arabic/wiktionary/to_wordnet.py
+++ b/src/arabic/wiktionary/to_wordnet.py
@@ -1,3 +1,4 @@
+import sys
 import csv
 import json

@@ -6,7 +7,6 @@ from arabic_utilities import *
 # to run: python3 to_wordnet.py >arabic-wn-morpho.jsonl
 # the following are assumed

-
 # from https://www.grammaticalframework.org/~krasimir/arabic.tsv.gz
 # WN_TSV = 'arabic.tsv'  # Krasimir
 WN_TSV = 'ar2en_words_gf.csv'  # Zarzoura
@@ -31,6 +31,9 @@ with open(MORPHO_GF) as gffile:

 # abandon_1_V2    ParseAra        ترك     (1,1,1,3,322,3)
 with open(WN_TSV) as wnfile:
+    print('--# -path=.:../gf-wordnet')
+    print('concrete WordNetAra of WordNet = CatAra ** open MorphoDictAra, MoreAra, ParadigmsAra in {') 
+
 ##    wnreader = csv.reader(wnfile, delimiter='\t')
    for row in wnfile:
 ##        word = row[-1].strip()   # does not show tha arabic, but the second-last word
@@ -38,7 +41,19 @@ with open(WN_TSV) as wnfile:
        wnfun = row.split()[-1]  # 0 in Krasimir
        cat = [c for c in wnfun if c.isalpha()][-1]  # the last letter; the dict only contains N, A, V
        funs = funmap.get((word, cat), [])
-        result = {'wnfun': wnfun, 'sought': word, 'found': funs}
-        print(json.dumps(result, ensure_ascii=False))
+        mk = 'mkV2 ' if wnfun.endswith('V2') else ''
+        results = [' '.join(['lin', wnfun, '=', mk + fs['fun'], ';', '--', str(fs['sense'])])
+                         for fs in funs]
+        if results:
+            print(results[0])
+            for r in results[1:]:
+                print('--', r)
+        else:
+            if (cat := wnfun[-2:]) in ['_A', '_N', '_V']:
+                lin = 'mk' + cat[-1] + ' "' + word + '"'
+            else:
+                lin = 'variants {}'
+            print(' '.join(['lin', wnfun, '=', lin, ';', '---', 'guess from', word]))
+    print('}')