mirror of
https://github.com/GrammaticalFramework/gf-rgl.git
synced 2026-05-27 17:08:54 -06:00
94 lines
3.5 KiB
Python
94 lines
3.5 KiB
Python
# show missing functions in the RGL languages
|
|
# basic usage: python3 show_missing.py >missing-in-rgl-Grammar.tsv
|
|
|
|
import subprocess
|
|
|
|
# this is the functions you want to find: default is all funs in Grammar
|
|
GF_LIB_PATH = '/Users/aarne/GF/dist/build/rgl/'
|
|
ALLTENSES_PATH = GF_LIB_PATH + 'alltenses/'
|
|
RGL_SOURCE_PATH = '/Users/aarne/GF/gf-rgl/src/'
|
|
GRAMMAR = 'Grammar'
|
|
|
|
# these are the languages you investigate
|
|
LANGS = ['Cze', 'Urd']
|
|
|
|
# get all functions in GRAMMAR, together with their types
|
|
def get_funs(module=GRAMMAR):
|
|
cmd = 'echo "pg -funs" | gf -run ' + ALLTENSES_PATH + module + '.gfo'
|
|
missing = subprocess.Popen(cmd, text=True, shell=True,
|
|
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
|
output = missing.communicate()[0].split(';')
|
|
return ([(fun.split()[0].strip(), ' '.join(fun.split()[2:])) for fun in output if fun.split()])
|
|
|
|
# get all missing functions in GRAMMARLng; this can take a long time
|
|
def get_missing_from_compiled(Lng, module=GRAMMAR):
|
|
print('investigating', Lng)
|
|
cmd = 'echo "pg -missing" | gf -run ' + ALLTENSES_PATH + module + Lng + '.gfo'
|
|
missing = subprocess.Popen(cmd, text=True, shell=True,
|
|
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
|
output = missing.communicate()[0].split()
|
|
return set(output[2:])
|
|
|
|
# to get all languages in the compiled RGL, use all_langs from here
|
|
def all_rgl_compiled_langs():
|
|
modules = subprocess.run(['ls', ALLTENSES_PATH], capture_output=True, text=True)
|
|
files = str(modules.stdout)
|
|
return [file[-7:-4] for file in files.split() if file[:-7].endswith(GRAMMAR)]
|
|
|
|
# LANGS = all_rgl_compiled_langs() # uncomment this line if you want all languages
|
|
|
|
|
|
# it is much faster to use source files (sending gfos to /tmp)
|
|
def str_until(c, s):
|
|
i = s.find(c)
|
|
if i >= 0:
|
|
return s[:i]
|
|
else:
|
|
return s
|
|
|
|
def get_missing_from_source(lang, gf_file):
|
|
cmd = 'gf -batch -retain -no-pmcfg -src -gfo-dir=/tmp ' + gf_file
|
|
missing = subprocess.Popen(cmd, text=True, shell=True,
|
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
output = missing.communicate()[1].split('\n')
|
|
missing = (lang, {str_until('\x1b[39;49m', line.split()[-1])
|
|
for line in output
|
|
if line.strip().startswith('Warning: no linearization of')})
|
|
return missing
|
|
|
|
|
|
def all_rgl_source_modules(module=GRAMMAR):
|
|
cmd = 'ls ' + RGL_SOURCE_PATH + '*/' + module + '?*.gf'
|
|
files = subprocess.Popen(cmd, text=True, shell=True,
|
|
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
|
output = files.communicate()[0].split()
|
|
return {file[-6:-3]: file for file in output}
|
|
|
|
LANGS = all_rgl_source_modules() # Lng: file dict
|
|
|
|
# make a text with tab-separated strings withcolumns fun, type, langs and rows for each fun
|
|
def make_table(funs=get_funs(), module=GRAMMAR, langs=LANGS):
|
|
header = ['fun', 'type'] + list(langs.keys()) # if dict, otherwise langs ###
|
|
rows = [header]
|
|
print('\t'.join(header))
|
|
|
|
# missings = {lang: get_missing_from_compiled(lang, module) for lang in langs}
|
|
missings = {lang: get_missing_from_source(lang, file)[1] for lang,file in langs.items()}
|
|
for fun in funs:
|
|
row = [fun[0], fun[1]]
|
|
for lang in langs.keys():
|
|
if fun[0] in missings[lang]:
|
|
row.append('-')
|
|
else:
|
|
row.append('+')
|
|
rows.append(row)
|
|
print('\t'.join(row))
|
|
|
|
|
|
# the output is send to stdout
|
|
make_table()
|
|
|
|
|
|
|
|
|