diff --git a/src/arabic/wiktionary/Makefile b/src/arabic/wiktionary/Makefile index 58fcf2b6..a14e23e5 100644 --- a/src/arabic/wiktionary/Makefile +++ b/src/arabic/wiktionary/Makefile @@ -3,5 +3,6 @@ all: python3 read_wiktionary.py gf-cnc >MorphoDictAra.gf python3 read_wiktionary.py gf-map >source_of_MorphoDictAra.jsonl gf -make MorphoDictAra.gf - python3 read_wiktionary.py eval-funs >1-eval.txt + python3 read_wiktionary.py eval-funs >eval.jsonl python3 to_wordnet.py >wordnet-arabic.jsonl + python3 read_wiktionary.py error-analysis diff --git a/src/arabic/wiktionary/MoreAra.gf b/src/arabic/wiktionary/MoreAra.gf new file mode 100644 index 00000000..e45b49b5 --- /dev/null +++ b/src/arabic/wiktionary/MoreAra.gf @@ -0,0 +1,98 @@ +resource MoreAra = CatAra ** open ParadigmsAra in { + + +-- temporarily moved from ParadigmsAra +-- paradigms for Wiktionary extraction +---- TODO: better usage of information in Wiktionary + +oper + wmkN = overload { + wmkN : {sg, pl : Str ; g : Gender} -> N + = \r -> mkN r.sg r.pl r.g nohum ; --- hum/nohum not in Wikt + wmkN : {sg : Str} -> N + = \r -> smartN r.sg ; + wmkN : {sg : Str ; g : Gender ; root : Str} -> N + = \r -> smartN r.sg ** {g = r.g} ; ---- + wmkN : {sg : Str; g : Gender} -> N + = \r -> smartN r.sg ** {g = r.g} ; + wmkN : {sg : Str; pl : Str; g : Gender; root : Str} -> N + = \r -> mkN r.sg r.pl r.g nohum ; --- hum/nohum not in Wikt + wmkN : {sg : Str; pl : Str} -> N + = \r -> mkN r.sg r.pl masc nohum ; ---- ** {g = (smartN r.sg).g} ; + wmkN : {sg, pl : Str ; root : Str} -> N + = \r -> mkN r.sg r.pl masc nohum ; ---- + wmkN : {sg : Str; root : Str} -> N + = \r -> smartN r.sg ; + } ; + + wmkA = overload { + wmkA : {root : Str} -> A + = \r -> mkA r.root ; + wmkA : {masc_sg : Str; fem_pl : Str; root : Str} -> A + = \r -> mkA r.root ; + wmkA : {masc_sg : Str; fem_sg : Str; fem_pl : Str; root : Str} -> A + = \r -> mkA r.root ; + wmkA : {masc_sg, fem_sg, masc_pl, fem_pl, root, sg_patt, pl_patt : Str} -> A + = \r -> mkA r.root r.sg_patt r.pl_patt ; + wmkA : {masc_sg, fem_sg, masc_pl, root, sg_patt, pl_patt : Str} -> A + = \r -> mkA r.root r.sg_patt r.pl_patt ; + wmkA : {fem_pl : Str; fem_sg : Str; masc_sg : Str; root : Str; sg_patt : Str} -> A + = \r -> mkA r.root r.sg_patt ; + wmkA : {fem_pl : Str; fem_sg : Str; masc_sg, masc_pl, root, sg_patt : Str} -> A + = \r -> mkA r.root r.sg_patt ; + wmkA : {masc_sg, root, sg_patt : Str} -> A + = \r -> mkA r.root r.sg_patt ; + wmkA : {masc_sg, masc_pl, root, sg_patt : Str} -> A + = \r -> mkA r.root r.sg_patt ; + wmkA : {masc_sg, fem_sg, masc_pl, fem_pl, root, pl_patt : Str} -> A + = \r -> mkA r.root ; ---- + wmkA : {masc_sg, fem_sg, masc_pl, fem_pl, root : Str} -> A + = \r -> mkA r.root ; ---- + wmkA : {masc_sg, fem_sg, root : Str} -> A + = \r -> mkA r.root ; ---- + wmkA : {masc_sg, fem_sg, masc_pl, fem_pl, pl_patt : Str} -> A + = \r -> mkA r.masc_sg ; ---- + wmkA : {masc_sg : Str; fem_sg : Str; fem_pl : Str} -> A + = \r -> mkA r.masc_sg ; ---- + wmkA : {masc_sg : Str; fem_sg : Str; root : Str ; sg_patt : Str} -> A + = \r -> mkA r.root r.sg_patt ; + wmkA : {masc_sg : Str; fem_sg : Str} -> A + = \r -> mkA r.masc_sg ; ---- + wmkA : {masc_sg : Str; masc_pl : Str; fem_sg : Str; fem_pl : Str} -> A + = \r -> mkA r.masc_sg ; ---- + wmkA : {masc_sg : Str; masc_pl : Str; fem_sg : Str; root : Str} -> A + = \r -> mkA r.root ; + wmkA : {masc_sg : Str; masc_pl : Str; fem_sg : Str} -> A + = \r -> mkA r.masc_sg ; ---- + wmkA : {masc_sg : Str; masc_pl : Str; root : Str} -> A + = \r -> mkA r.root ; + wmkA : {masc_sg : Str; masc_pl, pl_patt : Str; root : Str} -> A + = \r -> mkA r.root ; + wmkA : {masc_sg : Str; masc_pl, pl_patt, sg_patt : Str; root : Str} -> A + = \r -> mkA r.sg_patt r.pl_patt ; + wmkA : {masc_sg : Str; masc_pl : Str} -> A + = \r -> mkA r.masc_sg ; ---- + wmkA : {masc_sg : Str; masc_pl, pl_patt : Str} -> A + = \r -> mkA r.masc_sg ; ---- + wmkA : {masc_sg : Str; root : Str} -> A + = \r -> mkA r.root ; + wmkA : {masc_sg : Str} -> A + = \r -> mkA r.masc_sg ; ---- + } ; + + wmkV = overload { + wmkV : {perfect : Str; cls : VerbForm; root : Str} -> V + = \r -> mkV r.root r.cls ; ---- + wmkV : {perfect : Str; cls : VerbForm} -> V + = \r -> mkV r.perfect r.cls ; ---- + wmkV : {perfect : Str; imperfect : Str; cls : VerbForm; root : Str} -> V + = \r -> mkV r.root r.cls ; ---- + wmkV : {perfect : Str; imperfect : Str; cls : VerbForm} -> V + = \r -> mkV r.perfect r.cls ; ---- + wmkV : {root : Str ; cls : VerbForm} -> V + = \r -> mkV r.root r.cls ; + wmkV : {imperfect : Str} -> V + = \r -> variants {} ; ---- mkV r.imperfect ; + } ; + +} \ No newline at end of file diff --git a/src/arabic/wiktionary/read_wiktionary.py b/src/arabic/wiktionary/read_wiktionary.py index 69099294..140852c7 100644 --- a/src/arabic/wiktionary/read_wiktionary.py +++ b/src/arabic/wiktionary/read_wiktionary.py @@ -104,8 +104,9 @@ if MODE == 'error-analysis': for line in file: row = json.loads(line) if labels := row.get('labels', None): + cat = row['fun'][-1] verdict = row['verdict'] - evals[(labels, verdict)] = evals.get((labels, verdict), 0) + 1 + evals[(cat, labels, verdict)] = evals.get((cat, labels, verdict), 0) + 1 for labverdict, n in sorted(list(evals.items())): print(labverdict, n)