forked from GitHub/gf-rgl
analysing what Grammar functions are missing in each RGL language
This commit is contained in:
93
doc/show_missing.py
Normal file
93
doc/show_missing.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# show missing functions in the RGL languages
|
||||
# basic usage: python3 show_missing.py >missing-in-rgl-Grammar.tsv
|
||||
|
||||
import subprocess
|
||||
|
||||
# this is the functions you want to find: default is all funs in Grammar
|
||||
GF_LIB_PATH = '/Users/aarne/GF/dist/build/rgl/'
|
||||
ALLTENSES_PATH = GF_LIB_PATH + 'alltenses/'
|
||||
RGL_SOURCE_PATH = '/Users/aarne/GF/gf-rgl/src/'
|
||||
GRAMMAR = 'Grammar'
|
||||
|
||||
# these are the languages you investigate
|
||||
LANGS = ['Cze', 'Urd']
|
||||
|
||||
# get all functions in GRAMMAR, together with their types
|
||||
def get_funs(module=GRAMMAR):
|
||||
cmd = 'echo "pg -funs" | gf -run ' + ALLTENSES_PATH + module + '.gfo'
|
||||
missing = subprocess.Popen(cmd, text=True, shell=True,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
output = missing.communicate()[0].split(';')
|
||||
return ([(fun.split()[0].strip(), ' '.join(fun.split()[2:])) for fun in output if fun.split()])
|
||||
|
||||
# get all missing functions in GRAMMARLng; this can take a long time
|
||||
def get_missing_from_compiled(Lng, module=GRAMMAR):
|
||||
print('investigating', Lng)
|
||||
cmd = 'echo "pg -missing" | gf -run ' + ALLTENSES_PATH + module + Lng + '.gfo'
|
||||
missing = subprocess.Popen(cmd, text=True, shell=True,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
output = missing.communicate()[0].split()
|
||||
return set(output[2:])
|
||||
|
||||
# to get all languages in the compiled RGL, use all_langs from here
|
||||
def all_rgl_compiled_langs():
|
||||
modules = subprocess.run(['ls', ALLTENSES_PATH], capture_output=True, text=True)
|
||||
files = str(modules.stdout)
|
||||
return [file[-7:-4] for file in files.split() if file[:-7].endswith(GRAMMAR)]
|
||||
|
||||
# LANGS = all_rgl_compiled_langs() # uncomment this line if you want all languages
|
||||
|
||||
|
||||
# it is much faster to use source files (sending gfos to /tmp)
|
||||
def str_until(c, s):
|
||||
i = s.find(c)
|
||||
if i >= 0:
|
||||
return s[:i]
|
||||
else:
|
||||
return s
|
||||
|
||||
def get_missing_from_source(lang, gf_file):
|
||||
cmd = 'gf -batch -retain -no-pmcfg -src -gfo-dir=/tmp ' + gf_file
|
||||
missing = subprocess.Popen(cmd, text=True, shell=True,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
output = missing.communicate()[1].split('\n')
|
||||
missing = (lang, {str_until('\x1b[39;49m', line.split()[-1])
|
||||
for line in output
|
||||
if line.strip().startswith('Warning: no linearization of')})
|
||||
return missing
|
||||
|
||||
|
||||
def all_rgl_source_modules(module=GRAMMAR):
|
||||
cmd = 'ls ' + RGL_SOURCE_PATH + '*/' + module + '?*.gf'
|
||||
files = subprocess.Popen(cmd, text=True, shell=True,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
output = files.communicate()[0].split()
|
||||
return {file[-6:-3]: file for file in output}
|
||||
|
||||
LANGS = all_rgl_source_modules() # Lng: file dict
|
||||
|
||||
# make a text with tab-separated strings withcolumns fun, type, langs and rows for each fun
|
||||
def make_table(funs=get_funs(), module=GRAMMAR, langs=LANGS):
|
||||
header = ['fun', 'type'] + list(langs.keys()) # if dict, otherwise langs ###
|
||||
rows = [header]
|
||||
print('\t'.join(header))
|
||||
|
||||
# missings = {lang: get_missing_from_compiled(lang, module) for lang in langs}
|
||||
missings = {lang: get_missing_from_source(lang, file)[1] for lang,file in langs.items()}
|
||||
for fun in funs:
|
||||
row = [fun[0], fun[1]]
|
||||
for lang in langs.keys():
|
||||
if fun[0] in missings[lang]:
|
||||
row.append('-')
|
||||
else:
|
||||
row.append('+')
|
||||
rows.append(row)
|
||||
print('\t'.join(row))
|
||||
|
||||
|
||||
# the output is send to stdout
|
||||
make_table()
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user