mirror of
https://github.com/GrammaticalFramework/gf-rgl.git
synced 2026-05-27 08:58:55 -06:00
restructuring and commenting get_dict.py
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
ita:
|
||||
cp -p MorphoDictItaAbs.header MorphoDictItaAbs.gf
|
||||
cp -p MorphoDictIta.header MorphoDictIta.gf
|
||||
python3 get_dict.py | grep -v "lin " >>MorphoDictItaAbs.gf
|
||||
python3 get_dict.py | grep -v "fun " >>MorphoDictIta.gf
|
||||
python3 get_dict.py Ita tmp/IrregItaAbs.gf tmp/entries | grep -v "lin " >>MorphoDictItaAbs.gf
|
||||
python3 get_dict.py Ita tmp/IrregItaAbs.gf tmp/entries | grep -v "fun " >>MorphoDictIta.gf
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,8 +1,113 @@
|
||||
import sys
|
||||
|
||||
source = "tmp/sentries"
|
||||
irregsfile = "tmp/IrregItaAbs.gf"
|
||||
"""
|
||||
Converting a form-lemma-description lexicon to a GF lexicon.
|
||||
|
||||
Example: Romance resources from http://nlp.lsi.upc.edu/freeling/index.php/node/12
|
||||
|
||||
$ python3 get_dict.py Ita tmp/IrregItaAbs.gf tmp/entries
|
||||
|
||||
where entries is obtained by
|
||||
|
||||
$ cat adjs adv noun verb other | sort -u >entries
|
||||
|
||||
in FreeLing/data/it/dictionary/entries/
|
||||
In this file, each line has three words, e.g.
|
||||
|
||||
abbondanze abbondanza NCFP000
|
||||
|
||||
from which the script produces two lines,
|
||||
|
||||
fun abbondanza_N : N ;
|
||||
lin abbondanza_N = mkN "abbondanza" "abbondanze" feminine ;
|
||||
|
||||
These can be directed to an abstract and concrete module by using grep, as shown in Makefile.
|
||||
|
||||
Adaptation to a new language requires extending lang_args() with a new language code and the functions for that language.
|
||||
|
||||
"""
|
||||
# an auxiliary
|
||||
|
||||
def quoted(s):
|
||||
return '"' + s + '"'
|
||||
|
||||
######################
|
||||
## language-specific code
|
||||
######################
|
||||
|
||||
## Ita = Italian
|
||||
|
||||
# how each argument is obtained from the set of descriptions
|
||||
def ita_noun_args(irregs,entry):
|
||||
arg1 = entry.get('NCFS000', entry.get('NCMS000',entry.get('NCFN000',entry.get('NCMN000','NONE'))))
|
||||
arg2 = entry.get('NCFP000', entry.get('NCMP000','NONE'))
|
||||
if 'NCFS000' in entry.keys():
|
||||
arg3 = 'feminine'
|
||||
else:
|
||||
arg3 = 'masculine'
|
||||
if arg2 == 'NONE':
|
||||
return ['NSg', arg1, 'mkNSg', quoted(arg1), arg3] # nouns that occur in singular only
|
||||
elif arg1 == 'NONE':
|
||||
return ['NPl', arg2, 'mkNPl', quoted(arg2), arg3] # nouns that occur in pular only
|
||||
else:
|
||||
return ['N', arg1, 'mkN', quoted(arg1), quoted(arg2),arg3]
|
||||
|
||||
|
||||
def ita_adj_args(irregs,entry):
|
||||
arg1 = entry.get('AQ0MS00', entry.get('AQ0CS00',entry.get('AQ0CN00','NONE')))
|
||||
arg2 = entry.get('AQ0FS00', entry.get('AQ0CS00',entry.get('AQ0CN00','NONE')))
|
||||
arg3 = entry.get('AQ0MP00', entry.get('AQ0CP00',entry.get('AQ0CN00','NONE')))
|
||||
arg4 = entry.get('AQ0FP00', entry.get('AQ0CS00',entry.get('AQ0CN00','NONE_' + str(entry))))
|
||||
return ['A', arg1, 'mkA', quoted(arg1), quoted(arg2),quoted(arg3),quoted(arg4)]
|
||||
|
||||
|
||||
def ita_verb_args(irregs,entry):
|
||||
arg1 = entry.get('VMN0000', 'NONE_' + str(entry))
|
||||
if (arg1 + '_V') in irregs:
|
||||
return ['V', arg1, '', 'IrregIta.' + arg1 + '_V']
|
||||
else:
|
||||
return ['V', arg1, 'mkV', quoted(arg1)]
|
||||
|
||||
|
||||
def ita_adv_args(irregs,entry):
|
||||
arg1 = entry.get('RG', 'NONE_' + str(entry))
|
||||
return ['Adv', arg1, 'mkAdv', quoted(arg1)]
|
||||
|
||||
|
||||
def ita_prep_args(irregs,entry):
|
||||
arg1 = entry.get('SPS00', 'NONE_' + str(entry)) ## includes di,in,... but not their contractions
|
||||
return ['Prep', arg1, 'mkPrep', quoted(arg1)]
|
||||
|
||||
def ignore_args(irregs,entry):
|
||||
arg1 = 'NONE_' + str(entry)
|
||||
return ['IGNORED', list(entry.values())[0], '', quoted(arg1)]
|
||||
|
||||
|
||||
def ita_args(key):
|
||||
if key[0] == 'A':
|
||||
return ita_adj_args
|
||||
elif key[0] == 'N':
|
||||
return ita_noun_args
|
||||
elif key[0] == 'V':
|
||||
return ita_verb_args
|
||||
elif key[0] == 'R':
|
||||
return ita_adv_args
|
||||
elif key[0] == 'S':
|
||||
return ita_prep_args
|
||||
else:
|
||||
return ignore_args
|
||||
|
||||
def lang_args(lang,key):
|
||||
if lang == 'Ita':
|
||||
return ita_args(key)
|
||||
else:
|
||||
print("unknown language", lang)
|
||||
|
||||
##########################################################
|
||||
## from this point, the code is generic for all languages
|
||||
##########################################################
|
||||
|
||||
|
||||
def get_irregs(ifile):
|
||||
file = open(ifile)
|
||||
irregs = []
|
||||
@@ -26,9 +131,6 @@ def get_dict(filename):
|
||||
dict[key][words[2]] = words[0]
|
||||
return dict
|
||||
|
||||
def quoted(s):
|
||||
return '"' + s + '"'
|
||||
|
||||
def clean_fun(lemma,cat):
|
||||
if lemma.isalpha():
|
||||
return lemma + '_' + cat
|
||||
@@ -41,86 +143,32 @@ def clean_fun(lemma,cat):
|
||||
fun += c
|
||||
return "'" + fun + '_' + cat + "'"
|
||||
|
||||
# how each argument is obtained from the set of descriptions
|
||||
def ita_noun_args(irregs,entry):
|
||||
arg1 = entry.get('NCFS000', entry.get('NCMS000',entry.get('NCFN000',entry.get('NCMN000','NONE'))))
|
||||
arg2 = entry.get('NCFP000', entry.get('NCMP000','NONE'))
|
||||
if 'NCFS000' in entry.keys():
|
||||
arg3 = 'feminine'
|
||||
else:
|
||||
arg3 = 'masculine'
|
||||
if arg2 == 'NONE':
|
||||
return ['NSg', clean_fun(arg1,'NSg'), 'mkNSg', quoted(arg1), arg3]
|
||||
elif arg1 == 'NONE':
|
||||
return ['NPl', clean_fun(arg2,'NPl'), 'mkNPl', quoted(arg2), arg3]
|
||||
else:
|
||||
return ['N', clean_fun(arg1,'N'), 'mkN', quoted(arg1), quoted(arg2),arg3]
|
||||
|
||||
|
||||
def ita_adj_args(irregs,entry):
|
||||
arg1 = entry.get('AQ0MS00', entry.get('AQ0CS00',entry.get('AQ0CN00','NONE')))
|
||||
arg2 = entry.get('AQ0FS00', entry.get('AQ0CS00',entry.get('AQ0CN00','NONE')))
|
||||
arg3 = entry.get('AQ0MP00', entry.get('AQ0CP00',entry.get('AQ0CN00','NONE')))
|
||||
arg4 = entry.get('AQ0FP00', entry.get('AQ0CS00',entry.get('AQ0CN00','NONE_' + str(entry))))
|
||||
return ['A', clean_fun(arg1,'A'), 'mkA', quoted(arg1), quoted(arg2),quoted(arg3),quoted(arg4)]
|
||||
|
||||
|
||||
def ita_verb_args(irregs,entry):
|
||||
arg1 = entry.get('VMN0000', 'NONE_' + str(entry))
|
||||
if (arg1 + '_V') in irregs:
|
||||
return ['V', clean_fun(arg1,'V'), '', 'IrregIta.' + arg1 + '_V']
|
||||
else:
|
||||
return ['V', clean_fun(arg1,'V'), 'mkV', quoted(arg1)]
|
||||
|
||||
|
||||
def ita_adv_args(irregs,entry):
|
||||
arg1 = entry.get('RG', 'NONE_' + str(entry))
|
||||
return ['Adv', clean_fun(arg1,'Adv'), 'mkAdv', quoted(arg1)]
|
||||
|
||||
|
||||
def ita_prep_args(irregs,entry):
|
||||
arg1 = entry.get('SPS00', 'NONE_' + str(entry)) ## includes di,in,... but not their contractions
|
||||
return ['Prep', clean_fun(arg1,'Prep'), 'mkPrep', quoted(arg1)]
|
||||
|
||||
def ignore_args(irregs,entry):
|
||||
arg1 = 'NONE_' + str(entry)
|
||||
return ['IGNORED', list(entry.values())[0], '', quoted(arg1)]
|
||||
|
||||
|
||||
def ita_args(key):
|
||||
if key[0] == 'A':
|
||||
return ita_adj_args
|
||||
elif key[0] == 'N':
|
||||
return ita_noun_args
|
||||
elif key[0] == 'V':
|
||||
return ita_verb_args
|
||||
elif key[0] == 'R':
|
||||
return ita_adv_args
|
||||
elif key[0] == 'S':
|
||||
return ita_prep_args
|
||||
else:
|
||||
return ignore_args
|
||||
|
||||
|
||||
def print_entry(irregs,mapping,entry):
|
||||
args = mapping(irregs,entry)
|
||||
fun = clean_fun(args[1],args[0])
|
||||
comment = min(1,str(args).count('NONE')) * '-- '
|
||||
rule = [comment + "fun", args[1], ":", args[0], ";"]
|
||||
rule = [comment + "fun", fun, ":", args[0], ";"]
|
||||
print(' '.join(rule))
|
||||
rule = [comment + "lin", args[1], "=", args[2]]
|
||||
rule = [comment + "lin", fun, "=", args[2]]
|
||||
for arg in args[3:]:
|
||||
rule.append(arg)
|
||||
rule.append(';')
|
||||
print(' '.join(rule))
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 4:
|
||||
print("usage: python3 get_dict <lang> <irregsfile> <dictfile>")
|
||||
return 1
|
||||
lang = sys.argv[1]
|
||||
irregsfile = sys.argv[2]
|
||||
dictfile = sys.argv[3]
|
||||
irregs = get_irregs(irregsfile)
|
||||
dict = get_dict(source)
|
||||
# for word in dict:
|
||||
# print(word,dict[word])
|
||||
dict = get_dict(dictfile)
|
||||
for key,entry in dict.items():
|
||||
print_entry(irregs,ita_args(key),entry)
|
||||
print_entry(irregs,lang_args(lang,key),entry)
|
||||
print("}")
|
||||
return 0
|
||||
|
||||
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user