diff --git a/unittest/unittest.py b/unittest/unittest.py index 8f2d8b04..fa01d6b1 100644 --- a/unittest/unittest.py +++ b/unittest/unittest.py @@ -1,11 +1,10 @@ """ Python 2+3 script for unit testing RGL grammars. -Usage: python path-to-script.py path/to/testfile.gftest (...) -The script must be located in a sibling directory +This script must be located in a sibling directory to the RGL 'src' directory to work properly. -For for information see README.md +For for information see README.md, or run with argument '-h' """ from __future__ import print_function @@ -13,6 +12,7 @@ from __future__ import print_function import sys import io import os.path +import argparse from subprocess import Popen, PIPE from glob import glob @@ -20,25 +20,40 @@ GRAMMARDIR = '../src' ENCODING = 'utf-8' -def usage(): - print("Usage: python %s path/to/testfile.gftest (...)" % (sys.argv[0],)) - print() - print("(Note: to work properly this script must be located in") - print("the RGL 'unittest' directory, and gf must be in the system path)") - print() +def create_argparser(): + """Creates an command line argument parser""" + parser = argparse.ArgumentParser( + description="Unit-test one (or more) RGL language(s).", + epilog=""" +This script must be located in a sibling directory +to the RGL 'src' directory to work properly. +For for information see README.md. +""") + parser.add_argument('testfile', nargs='+', + help="one (or more) .gfscript file(s) containing unittests") + parser.add_argument('-v', '--verbose', action='store_true', + help="be more verbose") + parser.add_argument('--no-pmcfg', action='store_true', + help="don't calculate the PMCFG (faster for complex grammars); " + "for this to work, every test case needs a parse tree") + return parser def error(linenr, *args): + """Prints an error to the terminal""" print("[Error at line %s]" % (linenr,), *args) def gferror(reply): + """Determines if a GF reply is an error""" return (reply.startswith('The parser failed') or reply.startswith('The sentence is not complete') + or reply.startswith('Warning:') or reply.startswith('Function') and reply.endswith('is not in scope')) def importfile(linenr, lang): + """Calculate the path to the GF file to import""" scriptdir = os.path.dirname(sys.argv[0]) or '.' langfiles = glob('%s/%s/*/%s.gf' % (scriptdir, GRAMMARDIR, lang)) if not langfiles: @@ -51,83 +66,98 @@ def importfile(linenr, lang): def stripstrings(strings): + """Strip leading/trailing blanks of every string in the given list""" return [s for s0 in strings for s in [s0.strip()] if s] -def create_gf_input_cc_only(testlines): - # building the input to the GF process out of the lines of test file - gfinput = '' - testing = False + +def numbered_np(num, noun, plural=None): + """Crude way of inflecting nouns for number""" + return "%d %s" % (num, noun if num == 1 else (plural or noun+'s')) + + +def collect_testcases(testlines): + """Parse the test file and return a list of test cases""" + tests = [[]] for linenr, line in enumerate(testlines, 1): if line.startswith('#') or line.startswith('--'): # a comment line: do nothing pass - elif ':' in line: - if not testing: - gfinput += 'ps "### %d" \n' % (linenr,) - testing = True - lang, sent = stripstrings(line.split(':', 1)) - langfile = importfile(linenr, lang) - if '/abstract/' not in langfile: - gfinput += 'ps "+++ %d %s" \n' % (linenr, lang) - gfinput += 'i -retain -no-pmcfg %s \n' % (langfile,) - gfinput += 'ps "%s" \n' % (sent,) # Gold standard to compare against - else: - gfinput += 'cc -unqual -one %s \n' % (sent,) elif not line.strip(): # an empty line: start a new test - testing = False + if tests[-1]: + tests.append([]) + elif ':' in line: + lang, sentence = stripstrings(line.split(':', 1)) + langfile = importfile(linenr, lang) + is_tree = '/abstract/' in langfile + tests[-1].append((is_tree, linenr, lang, langfile, sentence)) else: error(linenr, "Ill-formatted line in test file:", line) exit(1) + return tests - # if cc only, gf input is this long and complicated thing - command = [ - u'gf', - u'-run', - u'-retain', - u'-no-pmcfg', - u'-gfo-dir=/tmp'] - return (command,gfinput) - -def create_gf_input(testlines): - # building the input to the GF process out of the lines of test file - gfinput = '' - testing = False - for linenr, line in enumerate(testlines, 1): - if line.startswith('#') or line.startswith('--'): - # a comment line: do nothing - pass - elif ':' in line: - if not testing: - gfinput += 'ps "### %d" \n' % (linenr,) - testing = True - lang, sent = stripstrings(line.split(':', 1)) - gfinput += 'ps "+++ %d %s" \n' % (linenr, lang) - langfile = importfile(linenr, lang) - gfinput += 'i %s \n' % (langfile,) - if '/abstract/' in langfile: - gfinput += 'pt %s \n' % (sent,) - else: - gfinput += 'p -lang=%s "%s" \n' % (lang, sent) - elif not line.strip(): - # an empty line: start a new test - testing = False - else: - error(linenr, "Ill-formatted line in test file:", line) +def create_gf_input(testcases, args): + """Create a GF test script from the collected test cases""" + gfscript = [] + for test in testcases: + test_linenr = test[0][1] + # check if the test contains an abstract tree: + abs_linenr = abs_tree = None + test.sort(key=lambda x:x[0], reverse=True) + if test[0][0]: + _, abs_linenr, abs_lang, _, abs_tree = test.pop(0) + # the test should not consist of only a tree: + if not test: + error(test_linenr, "Empty test case") exit(1) + # there should not be more than one abstree in the test: + if test[0][0]: + error(test[0][1], "Multiple abstract trees in test case") + exit(1) + # if there is an abstree, we use it for linearisation: + if abs_tree: + for _, linenr, lang, langfile, sentence in test: + gfscript += ['ps "### %d"' % (test_linenr,), + 'ps "+++ %d %s"' % (abs_linenr, abs_lang)] + if not args.no_pmcfg: + gfscript += ['i %s' % (langfile,), + 'l -lang=%s %s' % (lang, abs_tree)] + else: + gfscript += ['i -retain -no-pmcfg %s' % (langfile,), + 'cc -unqual -one %s' % (abs_tree,)] + gfscript += ['ps "+++ %d %s"' % (linenr, lang), + 'ps "%s"' % (sentence,)] + # if there is no abstree, we have to use parsing; + # in this case, the flag 'no_pmfcg' is of no use: + elif args.no_pmcfg: + error(test_linenr, "The flag '--no-pmcfg' requires that all test cases contain an abstract tree") + exit(1) + else: + gfscript += ['ps "### %d"' % (test_linenr,)] + for _, linenr, lang, langfile, sentence in test: + gfscript += ['ps "+++ %d %s"' % (linenr, lang), + 'i %s' % (langfile,), + 'p -lang=%s "%s"' % (lang, sentence)] + return gfscript - # If we're parsing, then command is just `gf -run' - return ('gf -run'.split(), gfinput) -def runtest(testlines,is_cc_only): +def runtest(testlines, args): + """Read the test cases, run GF and report the results""" + # first we build the input to the GF process: - if is_cc_only: - command,gfinput = create_gf_input_cc_only(testlines) - else: - command,gfinput = create_gf_input(testlines) + testcases = collect_testcases(testlines) + gfscript = create_gf_input(testcases, args) + + if args.verbose: + print("---+ GF testing script:") + for line in gfscript: + print(' |', line) + print() # calling GF from a subprocess: + command = 'gf -run'.split() + gfinput = '\n'.join(gfscript) + '\n' gf = Popen(command, stdin=PIPE, stdout=PIPE) stdout, _stderr = gf.communicate(gfinput.encode(ENCODING)) stdout = stdout.decode(ENCODING) @@ -139,35 +169,33 @@ def runtest(testlines,is_cc_only): for testnr, test in enumerate(alltests, 1): sents = stripstrings(test.split('+++')) startline = int(sents.pop(0)) - print("Test %d (line %d..): %d examples" % (testnr, startline, len(sents))) + print("Test %d (line %d..): %s" % (testnr, startline, numbered_np(len(sents), "example"))) testerrors = 0 oldresults = [] for sresults in sents: alltrees = stripstrings(sresults.splitlines()) linenr, lang = alltrees.pop(0).split() - if len(alltrees) == 0 or len(alltrees) == 1 and gferror(alltrees[0]): - theerror = alltrees[0] if alltrees else "No parse trees found" + if args.verbose: + print('---+ line %s (%s), result from GF:' % (linenr, lang)) + for tree in alltrees: + print(' |', tree) + if len(alltrees) == 0 or gferror("\n".join(alltrees)): + theerror = "\n".join(alltrees) if alltrees else "No parse trees found" error(linenr, theerror) testerrors += 1 else: - if is_cc_only: - # If is_cc_only, gfinput (and thus stdout) include gold standard - gold = alltrees.pop(0) - lin = alltrees.pop(0) - if gold != lin: - testerrors += 1 - error(linenr,"\nExpected linearisation\n\t%s \n\nActual linearisation\n\t%s" % (gold, lin)) - else: - allerrors = [(sum(tree not in oldtrees for _, _, oldtrees in oldresults), tree) - for tree in alltrees] - besterrors, besttree = min(allerrors) - if besterrors > 0: - for oldlinenr, oldlang, oldtrees in oldresults: - if besttree not in oldtrees: - error(linenr, "Line %s (%s) is not a translation of line %s (%s)" - % (linenr, lang, oldlinenr, oldlang)) - testerrors += 1 - oldresults.append((linenr, lang, alltrees)) + allerrors = [(sum(tree not in oldtrees for _, _, oldtrees in oldresults), tree) + for tree in alltrees] + besterrors, besttree = min(allerrors) + if besterrors > 0: + for oldlinenr, oldlang, oldtrees in oldresults: + if besttree not in oldtrees: + error(linenr, + "The result of line %s (%s):\n %s\n" + "is not among the results of line %s (%s):\n %s" + % (linenr, lang, besttree, oldlinenr, oldlang, "\n ".join(oldtrees))) + testerrors += 1 + oldresults.append((linenr, lang, alltrees)) if not testerrors: print("OK!") print() @@ -175,29 +203,23 @@ def runtest(testlines,is_cc_only): # finally we report a summary: if not totalerrors: - print("All %d tests passed!" % (len(alltests),)) + print("All %s passed!" % (numbered_np(len(alltests), "test"),)) else: - print("There were %d errors in %d tests!" % (totalerrors, len(alltests))) + print("Found %s in %s!" % (numbered_np(totalerrors, "error"), numbered_np(len(alltests), "test"))) print() if __name__ == '__main__': - if len(sys.argv) <= 1: - usage() - exit(1) - if "-only-cc" in sys.argv: - is_cc_only = True - else: - is_cc_only = False - for filename in sys.argv[1:]: - if filename != "-only-cc": - try: - print("# Testing file:", filename) - with io.open(filename, encoding=ENCODING) as F: - print() - runtest(F,is_cc_only) - except IOError as err: - print(err) + parser = create_argparser() + args = parser.parse_args() + for filename in args.testfile: + try: + print("# Testing file:", filename) + with io.open(filename, encoding=ENCODING) as F: print() - usage() - exit(1) + runtest(F, args) + except IOError as err: + print(err) + print() + parser.print_usage() + exit(1)