forked from GitHub/gf-rgl
moved wikt-specific paradigms to a separate file (for the moment)
This commit is contained in:
@@ -868,6 +868,8 @@ formV : (root : Str) -> VerbForm -> V = \s,f -> case f of {
|
|||||||
param VerbForm =
|
param VerbForm =
|
||||||
FormI | FormII | FormIII | FormIV | FormV | FormVI | FormVII | FormVIII | FormX | FormXI ;
|
FormI | FormII | FormIII | FormIV | FormV | FormVI | FormVII | FormVIII | FormX | FormXI ;
|
||||||
|
|
||||||
|
|
||||||
|
{- temporarily moved to wiktionary/MoreAra.gf
|
||||||
-- paradigms for Wiktionary extraction
|
-- paradigms for Wiktionary extraction
|
||||||
---- TODO: better usage of information in Wiktionary
|
---- TODO: better usage of information in Wiktionary
|
||||||
|
|
||||||
@@ -894,55 +896,55 @@ oper
|
|||||||
wmkA = overload {
|
wmkA = overload {
|
||||||
wmkA : {root : Str} -> A
|
wmkA : {root : Str} -> A
|
||||||
= \r -> mkA r.root ;
|
= \r -> mkA r.root ;
|
||||||
mkA : {masc_sg : Str; fem_pl : Str; root : Str} -> A
|
wmkA : {masc_sg : Str; fem_pl : Str; root : Str} -> A
|
||||||
= \r -> mkA r.root ;
|
= \r -> mkA r.root ;
|
||||||
mkA : {masc_sg : Str; fem_sg : Str; fem_pl : Str; root : Str} -> A
|
wmkA : {masc_sg : Str; fem_sg : Str; fem_pl : Str; root : Str} -> A
|
||||||
= \r -> mkA r.root ;
|
= \r -> mkA r.root ;
|
||||||
mkA : {masc_sg, fem_sg, masc_pl, fem_pl, root, sg_patt, pl_patt : Str} -> A
|
wmkA : {masc_sg, fem_sg, masc_pl, fem_pl, root, sg_patt, pl_patt : Str} -> A
|
||||||
= \r -> mkA r.root r.sg_patt r.pl_patt ;
|
= \r -> mkA r.root r.sg_patt r.pl_patt ;
|
||||||
mkA : {masc_sg, fem_sg, masc_pl, root, sg_patt, pl_patt : Str} -> A
|
wmkA : {masc_sg, fem_sg, masc_pl, root, sg_patt, pl_patt : Str} -> A
|
||||||
= \r -> mkA r.root r.sg_patt r.pl_patt ;
|
= \r -> mkA r.root r.sg_patt r.pl_patt ;
|
||||||
mkA : {fem_pl : Str; fem_sg : Str; masc_sg : Str; root : Str; sg_patt : Str} -> A
|
wmkA : {fem_pl : Str; fem_sg : Str; masc_sg : Str; root : Str; sg_patt : Str} -> A
|
||||||
= \r -> mkA r.root r.sg_patt ;
|
= \r -> mkA r.root r.sg_patt ;
|
||||||
mkA : {fem_pl : Str; fem_sg : Str; masc_sg, masc_pl, root, sg_patt : Str} -> A
|
wmkA : {fem_pl : Str; fem_sg : Str; masc_sg, masc_pl, root, sg_patt : Str} -> A
|
||||||
= \r -> mkA r.root r.sg_patt ;
|
= \r -> mkA r.root r.sg_patt ;
|
||||||
mkA : {masc_sg, root, sg_patt : Str} -> A
|
wmkA : {masc_sg, root, sg_patt : Str} -> A
|
||||||
= \r -> mkA r.root r.sg_patt ;
|
= \r -> mkA r.root r.sg_patt ;
|
||||||
mkA : {masc_sg, masc_pl, root, sg_patt : Str} -> A
|
wmkA : {masc_sg, masc_pl, root, sg_patt : Str} -> A
|
||||||
= \r -> mkA r.root r.sg_patt ;
|
= \r -> mkA r.root r.sg_patt ;
|
||||||
mkA : {masc_sg, fem_sg, masc_pl, fem_pl, root, pl_patt : Str} -> A
|
wmkA : {masc_sg, fem_sg, masc_pl, fem_pl, root, pl_patt : Str} -> A
|
||||||
= \r -> mkA r.root ; ----
|
= \r -> mkA r.root ; ----
|
||||||
mkA : {masc_sg, fem_sg, masc_pl, fem_pl, root : Str} -> A
|
wmkA : {masc_sg, fem_sg, masc_pl, fem_pl, root : Str} -> A
|
||||||
= \r -> mkA r.root ; ----
|
= \r -> mkA r.root ; ----
|
||||||
mkA : {masc_sg, fem_sg, root : Str} -> A
|
wmkA : {masc_sg, fem_sg, root : Str} -> A
|
||||||
= \r -> mkA r.root ; ----
|
= \r -> mkA r.root ; ----
|
||||||
mkA : {masc_sg, fem_sg, masc_pl, fem_pl, pl_patt : Str} -> A
|
wmkA : {masc_sg, fem_sg, masc_pl, fem_pl, pl_patt : Str} -> A
|
||||||
= \r -> mkA r.masc_sg ; ----
|
= \r -> mkA r.masc_sg ; ----
|
||||||
mkA : {masc_sg : Str; fem_sg : Str; fem_pl : Str} -> A
|
wmkA : {masc_sg : Str; fem_sg : Str; fem_pl : Str} -> A
|
||||||
= \r -> mkA r.masc_sg ; ----
|
= \r -> mkA r.masc_sg ; ----
|
||||||
mkA : {masc_sg : Str; fem_sg : Str; root : Str ; sg_patt : Str} -> A
|
wmkA : {masc_sg : Str; fem_sg : Str; root : Str ; sg_patt : Str} -> A
|
||||||
= \r -> mkA r.root r.sg_patt ;
|
= \r -> mkA r.root r.sg_patt ;
|
||||||
mkA : {masc_sg : Str; fem_sg : Str} -> A
|
wmkA : {masc_sg : Str; fem_sg : Str} -> A
|
||||||
= \r -> mkA r.masc_sg ; ----
|
= \r -> mkA r.masc_sg ; ----
|
||||||
mkA : {masc_sg : Str; masc_pl : Str; fem_sg : Str; fem_pl : Str} -> A
|
wmkA : {masc_sg : Str; masc_pl : Str; fem_sg : Str; fem_pl : Str} -> A
|
||||||
= \r -> mkA r.masc_sg ; ----
|
= \r -> mkA r.masc_sg ; ----
|
||||||
mkA : {masc_sg : Str; masc_pl : Str; fem_sg : Str; root : Str} -> A
|
wmkA : {masc_sg : Str; masc_pl : Str; fem_sg : Str; root : Str} -> A
|
||||||
= \r -> mkA r.root ;
|
= \r -> mkA r.root ;
|
||||||
mkA : {masc_sg : Str; masc_pl : Str; fem_sg : Str} -> A
|
wmkA : {masc_sg : Str; masc_pl : Str; fem_sg : Str} -> A
|
||||||
= \r -> mkA r.masc_sg ; ----
|
= \r -> mkA r.masc_sg ; ----
|
||||||
mkA : {masc_sg : Str; masc_pl : Str; root : Str} -> A
|
wmkA : {masc_sg : Str; masc_pl : Str; root : Str} -> A
|
||||||
= \r -> mkA r.root ;
|
= \r -> mkA r.root ;
|
||||||
mkA : {masc_sg : Str; masc_pl, pl_patt : Str; root : Str} -> A
|
wmkA : {masc_sg : Str; masc_pl, pl_patt : Str; root : Str} -> A
|
||||||
= \r -> mkA r.root ;
|
= \r -> mkA r.root ;
|
||||||
mkA : {masc_sg : Str; masc_pl, pl_patt, sg_patt : Str; root : Str} -> A
|
wmkA : {masc_sg : Str; masc_pl, pl_patt, sg_patt : Str; root : Str} -> A
|
||||||
= \r -> mkA r.sg_patt r.pl_patt ;
|
= \r -> mkA r.sg_patt r.pl_patt ;
|
||||||
mkA : {masc_sg : Str; masc_pl : Str} -> A
|
wmkA : {masc_sg : Str; masc_pl : Str} -> A
|
||||||
= \r -> mkA r.masc_sg ; ----
|
= \r -> mkA r.masc_sg ; ----
|
||||||
mkA : {masc_sg : Str; masc_pl, pl_patt : Str} -> A
|
wmkA : {masc_sg : Str; masc_pl, pl_patt : Str} -> A
|
||||||
= \r -> mkA r.masc_sg ; ----
|
= \r -> mkA r.masc_sg ; ----
|
||||||
mkA : {masc_sg : Str; root : Str} -> A
|
wmkA : {masc_sg : Str; root : Str} -> A
|
||||||
= \r -> mkA r.root ;
|
= \r -> mkA r.root ;
|
||||||
mkA : {masc_sg : Str} -> A
|
wmkA : {masc_sg : Str} -> A
|
||||||
= \r -> mkA r.masc_sg ; ----
|
= \r -> mkA r.masc_sg ; ----
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
@@ -960,5 +962,5 @@ oper
|
|||||||
wmkV : {imperfect : Str} -> V
|
wmkV : {imperfect : Str} -> V
|
||||||
= \r -> variants {} ; ---- mkV r.imperfect ;
|
= \r -> variants {} ; ---- mkV r.imperfect ;
|
||||||
} ;
|
} ;
|
||||||
|
-}
|
||||||
} ;
|
} ;
|
||||||
|
|||||||
@@ -4,4 +4,4 @@ all:
|
|||||||
python3 read_wiktionary.py gf-map >source_of_MorphoDictAra.jsonl
|
python3 read_wiktionary.py gf-map >source_of_MorphoDictAra.jsonl
|
||||||
gf -make MorphoDictAra.gf
|
gf -make MorphoDictAra.gf
|
||||||
python3 read_wiktionary.py eval-funs >1-eval.txt
|
python3 read_wiktionary.py eval-funs >1-eval.txt
|
||||||
python3 to_wordnet.py >wornet-arabic.jsonl
|
python3 to_wordnet.py >wordnet-arabic.jsonl
|
||||||
|
|||||||
@@ -71,6 +71,10 @@ CONCRETE_MODULE = 'MorphoDictAra'
|
|||||||
|
|
||||||
# concrete syntax file, to debug sources of linearizations
|
# concrete syntax file, to debug sources of linearizations
|
||||||
CONCRETE_FILE = CONCRETE_MODULE + '.gf'
|
CONCRETE_FILE = CONCRETE_MODULE + '.gf'
|
||||||
|
|
||||||
|
# evaluation result file, created with mode eval-funs
|
||||||
|
EVAL_FILE = 'eval.jsonl'
|
||||||
|
|
||||||
|
|
||||||
# read a gzipped jsonl file (one object per line),
|
# read a gzipped jsonl file (one object per line),
|
||||||
# showing lines where one of a list of languages is present
|
# showing lines where one of a list of languages is present
|
||||||
@@ -93,6 +97,17 @@ if MODE == 'raw':
|
|||||||
get_gzip_json(WIKTIONARY_DUMP, 1, [EXTRACTED_LANGUAGE])
|
get_gzip_json(WIKTIONARY_DUMP, 1, [EXTRACTED_LANGUAGE])
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
|
|
||||||
|
if MODE == 'error-analysis':
|
||||||
|
evals = {}
|
||||||
|
with open(EVAL_FILE) as file:
|
||||||
|
for line in file:
|
||||||
|
row = json.loads(line)
|
||||||
|
if labels := row.get('labels', None):
|
||||||
|
verdict = row['verdict']
|
||||||
|
evals[(labels, verdict)] = evals.get((labels, verdict), 0) + 1
|
||||||
|
for labverdict, n in sorted(list(evals.items())):
|
||||||
|
print(labverdict, n)
|
||||||
|
|
||||||
# https://en.wikipedia.org/wiki/Buckwalter_transliteration
|
# https://en.wikipedia.org/wiki/Buckwalter_transliteration
|
||||||
buckwalter_dict = {
|
buckwalter_dict = {
|
||||||
@@ -378,7 +393,7 @@ def find_root(s):
|
|||||||
if MODE == 'gf-abs':
|
if MODE == 'gf-abs':
|
||||||
print('abstract MorphoDictAraAbs = Cat ** {')
|
print('abstract MorphoDictAraAbs = Cat ** {')
|
||||||
if MODE == 'gf-cnc':
|
if MODE == 'gf-cnc':
|
||||||
print('concrete MorphoDictAra of MorphoDictAraAbs = CatAra ** open ParadigmsAra in {')
|
print('concrete MorphoDictAra of MorphoDictAraAbs = CatAra ** open ParadigmsAra, MoreAra in {')
|
||||||
|
|
||||||
# go through the Arabic Wiktionary entries
|
# go through the Arabic Wiktionary entries
|
||||||
# generate functions with unique names
|
# generate functions with unique names
|
||||||
@@ -552,9 +567,9 @@ def eval_grammar(pgffile, concretename, mapfile, show=True, verbose=False):
|
|||||||
totals[cat][rep] = totals[cat].get(rep, 0) + 1
|
totals[cat][rep] = totals[cat].get(rep, 0) + 1
|
||||||
|
|
||||||
if show:
|
if show:
|
||||||
print(report)
|
print(json.dumps(report, ensure_ascii=False))
|
||||||
|
|
||||||
print(totals)
|
print(json.dumps(totals, ensure_ascii=False))
|
||||||
|
|
||||||
|
|
||||||
if MODE.startswith('eval'):
|
if MODE.startswith('eval'):
|
||||||
|
|||||||
Reference in New Issue
Block a user