forked from GitHub/gf-rgl
producing a compilable WordNetAra.gf, with a lot of junk
This commit is contained in:
@@ -4,5 +4,5 @@ all:
|
|||||||
python3 read_wiktionary.py gf-map >source_of_MorphoDictAra.jsonl
|
python3 read_wiktionary.py gf-map >source_of_MorphoDictAra.jsonl
|
||||||
gf -make MorphoDictAra.gf
|
gf -make MorphoDictAra.gf
|
||||||
python3 read_wiktionary.py eval-funs >eval.jsonl
|
python3 read_wiktionary.py eval-funs >eval.jsonl
|
||||||
python3 to_wordnet.py >wordnet-arabic.jsonl
|
python3 to_wordnet.py >WordNetAra.gf
|
||||||
python3 read_wiktionary.py error-analysis
|
python3 read_wiktionary.py error-analysis
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import sys
|
||||||
import csv
|
import csv
|
||||||
import json
|
import json
|
||||||
|
|
||||||
@@ -6,7 +7,6 @@ from arabic_utilities import *
|
|||||||
# to run: python3 to_wordnet.py >arabic-wn-morpho.jsonl
|
# to run: python3 to_wordnet.py >arabic-wn-morpho.jsonl
|
||||||
# the following are assumed
|
# the following are assumed
|
||||||
|
|
||||||
|
|
||||||
# from https://www.grammaticalframework.org/~krasimir/arabic.tsv.gz
|
# from https://www.grammaticalframework.org/~krasimir/arabic.tsv.gz
|
||||||
# WN_TSV = 'arabic.tsv' # Krasimir
|
# WN_TSV = 'arabic.tsv' # Krasimir
|
||||||
WN_TSV = 'ar2en_words_gf.csv' # Zarzoura
|
WN_TSV = 'ar2en_words_gf.csv' # Zarzoura
|
||||||
@@ -31,6 +31,9 @@ with open(MORPHO_GF) as gffile:
|
|||||||
|
|
||||||
# abandon_1_V2 ParseAra ترك (1,1,1,3,322,3)
|
# abandon_1_V2 ParseAra ترك (1,1,1,3,322,3)
|
||||||
with open(WN_TSV) as wnfile:
|
with open(WN_TSV) as wnfile:
|
||||||
|
print('--# -path=.:../gf-wordnet')
|
||||||
|
print('concrete WordNetAra of WordNet = CatAra ** open MorphoDictAra, MoreAra, ParadigmsAra in {')
|
||||||
|
|
||||||
## wnreader = csv.reader(wnfile, delimiter='\t')
|
## wnreader = csv.reader(wnfile, delimiter='\t')
|
||||||
for row in wnfile:
|
for row in wnfile:
|
||||||
## word = row[-1].strip() # does not show tha arabic, but the second-last word
|
## word = row[-1].strip() # does not show tha arabic, but the second-last word
|
||||||
@@ -38,7 +41,19 @@ with open(WN_TSV) as wnfile:
|
|||||||
wnfun = row.split()[-1] # 0 in Krasimir
|
wnfun = row.split()[-1] # 0 in Krasimir
|
||||||
cat = [c for c in wnfun if c.isalpha()][-1] # the last letter; the dict only contains N, A, V
|
cat = [c for c in wnfun if c.isalpha()][-1] # the last letter; the dict only contains N, A, V
|
||||||
funs = funmap.get((word, cat), [])
|
funs = funmap.get((word, cat), [])
|
||||||
result = {'wnfun': wnfun, 'sought': word, 'found': funs}
|
mk = 'mkV2 ' if wnfun.endswith('V2') else ''
|
||||||
print(json.dumps(result, ensure_ascii=False))
|
results = [' '.join(['lin', wnfun, '=', mk + fs['fun'], ';', '--', str(fs['sense'])])
|
||||||
|
for fs in funs]
|
||||||
|
if results:
|
||||||
|
print(results[0])
|
||||||
|
for r in results[1:]:
|
||||||
|
print('--', r)
|
||||||
|
else:
|
||||||
|
if (cat := wnfun[-2:]) in ['_A', '_N', '_V']:
|
||||||
|
lin = 'mk' + cat[-1] + ' "' + word + '"'
|
||||||
|
else:
|
||||||
|
lin = 'variants {}'
|
||||||
|
print(' '.join(['lin', wnfun, '=', lin, ';', '---', 'guess from', word]))
|
||||||
|
print('}')
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user