diff --git a/unittest/unittest.py b/unittest/unittest.py
index 8f2d8b04..fa01d6b1 100644
--- a/unittest/unittest.py
+++ b/unittest/unittest.py
@@ -1,11 +1,10 @@
 """
 Python 2+3 script for unit testing RGL grammars.
 
-Usage: python path-to-script.py path/to/testfile.gftest (...)
-The script must be located in a sibling directory
+This script must be located in a sibling directory
 to the RGL 'src' directory to work properly.
 
-For for information see README.md
+For for information see README.md, or run with argument '-h'
 """
 
 from __future__ import print_function
@@ -13,6 +12,7 @@ from __future__ import print_function
 import sys
 import io
 import os.path
+import argparse
 from subprocess import Popen, PIPE
 from glob import glob
 
@@ -20,25 +20,40 @@ GRAMMARDIR = '../src'
 ENCODING = 'utf-8'
 
 
-def usage():
-    print("Usage: python %s path/to/testfile.gftest (...)" % (sys.argv[0],))
-    print()
-    print("(Note: to work properly this script must be located in")
-    print("the RGL 'unittest' directory, and gf must be in the system path)")
-    print()
+def create_argparser():
+    """Creates an command line argument parser"""
+    parser = argparse.ArgumentParser(
+                description="Unit-test one (or more) RGL language(s).",
+                epilog="""
+This script must be located in a sibling directory
+to the RGL 'src' directory to work properly.
+For for information see README.md.
+""")
+    parser.add_argument('testfile', nargs='+',
+                        help="one (or more) .gfscript file(s) containing unittests")
+    parser.add_argument('-v', '--verbose', action='store_true',
+                        help="be more verbose")
+    parser.add_argument('--no-pmcfg', action='store_true',
+                        help="don't calculate the PMCFG (faster for complex grammars); "
+                        "for this to work, every test case needs a parse tree")
+    return parser
 
 
 def error(linenr, *args):
+    """Prints an error to the terminal"""
     print("[Error at line %s]" % (linenr,), *args)
 
 
 def gferror(reply):
+    """Determines if a GF reply is an error"""
     return (reply.startswith('The parser failed')
             or reply.startswith('The sentence is not complete')
+            or reply.startswith('Warning:')
             or reply.startswith('Function') and reply.endswith('is not in scope'))
 
 
 def importfile(linenr, lang):
+    """Calculate the path to the GF file to import"""
     scriptdir = os.path.dirname(sys.argv[0]) or '.'
     langfiles = glob('%s/%s/*/%s.gf' % (scriptdir, GRAMMARDIR, lang))
     if not langfiles:
@@ -51,83 +66,98 @@ def importfile(linenr, lang):
 
 
 def stripstrings(strings):
+    """Strip leading/trailing blanks of every string in the given list"""
     return [s for s0 in strings for s in [s0.strip()] if s]
 
-def create_gf_input_cc_only(testlines):
-    # building the input to the GF process out of the lines of test file
-    gfinput = ''
-    testing = False
+
+def numbered_np(num, noun, plural=None):
+    """Crude way of inflecting nouns for number"""
+    return "%d %s" % (num, noun if num == 1 else (plural or noun+'s'))
+
+
+def collect_testcases(testlines):
+    """Parse the test file and return a list of test cases"""
+    tests = [[]]
     for linenr, line in enumerate(testlines, 1):
         if line.startswith('#') or line.startswith('--'):
             # a comment line: do nothing
             pass
-        elif ':' in line:
-            if not testing:
-                gfinput += 'ps "### %d" \n' % (linenr,)
-                testing = True
-            lang, sent = stripstrings(line.split(':', 1))
-            langfile = importfile(linenr, lang)
-            if '/abstract/' not in langfile:
-                gfinput += 'ps "+++ %d %s" \n' % (linenr, lang)
-                gfinput += 'i -retain -no-pmcfg %s \n' % (langfile,)
-                gfinput += 'ps "%s" \n' % (sent,) # Gold standard to compare against
-            else:
-                gfinput += 'cc -unqual -one %s \n' % (sent,)
         elif not line.strip():
             # an empty line: start a new test
-            testing = False
+            if tests[-1]:
+                tests.append([])
+        elif ':' in line:
+            lang, sentence = stripstrings(line.split(':', 1))
+            langfile = importfile(linenr, lang)
+            is_tree = '/abstract/' in langfile
+            tests[-1].append((is_tree, linenr, lang, langfile, sentence))
         else:
             error(linenr, "Ill-formatted line in test file:", line)
             exit(1)
+    return tests
 
-    # if cc only, gf input is this long and complicated thing
-    command = [
-        u'gf',
-        u'-run',
-        u'-retain',
-        u'-no-pmcfg',
-        u'-gfo-dir=/tmp']
 
-    return (command,gfinput)
-
-def create_gf_input(testlines):
-    # building the input to the GF process out of the lines of test file
-    gfinput = ''
-    testing = False
-    for linenr, line in enumerate(testlines, 1):
-        if line.startswith('#') or line.startswith('--'):
-            # a comment line: do nothing
-            pass
-        elif ':' in line:
-            if not testing:
-                gfinput += 'ps "### %d" \n' % (linenr,)
-                testing = True
-            lang, sent = stripstrings(line.split(':', 1))
-            gfinput += 'ps "+++ %d %s" \n' % (linenr, lang)
-            langfile = importfile(linenr, lang)
-            gfinput += 'i %s \n' % (langfile,)
-            if '/abstract/' in langfile:
-                gfinput += 'pt %s \n' % (sent,)
-            else:
-                gfinput += 'p -lang=%s "%s" \n' % (lang, sent)
-        elif not line.strip():
-            # an empty line: start a new test
-            testing = False
-        else:
-            error(linenr, "Ill-formatted line in test file:", line)
+def create_gf_input(testcases, args):
+    """Create a GF test script from the collected test cases"""
+    gfscript = []
+    for test in testcases:
+        test_linenr = test[0][1]
+        # check if the test contains an abstract tree:
+        abs_linenr = abs_tree = None
+        test.sort(key=lambda x:x[0], reverse=True)
+        if test[0][0]:
+            _, abs_linenr, abs_lang, _, abs_tree = test.pop(0)
+        # the test should not consist of only a tree:
+        if not test:
+            error(test_linenr, "Empty test case")
             exit(1)
+        # there should not be more than one abstree in the test:
+        if test[0][0]:
+            error(test[0][1], "Multiple abstract trees in test case")
+            exit(1)
+        # if there is an abstree, we use it for linearisation:
+        if abs_tree:
+            for _, linenr, lang, langfile, sentence in test:
+                gfscript += ['ps "### %d"' % (test_linenr,),
+                            'ps "+++ %d %s"' % (abs_linenr, abs_lang)]
+                if not args.no_pmcfg:
+                    gfscript += ['i %s' % (langfile,),
+                                'l -lang=%s %s' % (lang, abs_tree)]
+                else:
+                    gfscript += ['i -retain -no-pmcfg %s' % (langfile,),
+                                'cc -unqual -one %s' % (abs_tree,)]
+                gfscript += ['ps "+++ %d %s"' % (linenr, lang),
+                            'ps "%s"' % (sentence,)]
+        # if there is no abstree, we have to use parsing;
+        # in this case, the flag 'no_pmfcg' is of no use:
+        elif args.no_pmcfg:
+            error(test_linenr, "The flag '--no-pmcfg' requires that all test cases contain an abstract tree")
+            exit(1)
+        else:
+            gfscript += ['ps "### %d"' % (test_linenr,)]
+            for _, linenr, lang, langfile, sentence in test:
+                gfscript += ['ps "+++ %d %s"' % (linenr, lang),
+                            'i %s' % (langfile,),
+                            'p -lang=%s "%s"' % (lang, sentence)]
+    return gfscript
 
-    # If we're parsing, then command is just `gf -run'
-    return ('gf -run'.split(), gfinput)
 
-def runtest(testlines,is_cc_only):
+def runtest(testlines, args):
+    """Read the test cases, run GF and report the results"""
+
     # first we build the input to the GF process:
-    if is_cc_only:
-        command,gfinput = create_gf_input_cc_only(testlines)
-    else:
-        command,gfinput = create_gf_input(testlines)
+    testcases = collect_testcases(testlines)
+    gfscript = create_gf_input(testcases, args)
+
+    if args.verbose:
+        print("---+ GF testing script:")
+        for line in gfscript:
+            print('   |', line)
+        print()
 
     # calling GF from a subprocess:
+    command = 'gf -run'.split()
+    gfinput = '\n'.join(gfscript) + '\n'
     gf = Popen(command, stdin=PIPE, stdout=PIPE)
     stdout, _stderr = gf.communicate(gfinput.encode(ENCODING))
     stdout = stdout.decode(ENCODING)
@@ -139,35 +169,33 @@ def runtest(testlines,is_cc_only):
     for testnr, test in enumerate(alltests, 1):
         sents = stripstrings(test.split('+++'))
         startline = int(sents.pop(0))
-        print("Test %d (line %d..): %d examples" % (testnr, startline, len(sents)))
+        print("Test %d (line %d..): %s" % (testnr, startline, numbered_np(len(sents), "example")))
         testerrors = 0
         oldresults = []
         for sresults in sents:
             alltrees = stripstrings(sresults.splitlines())
             linenr, lang = alltrees.pop(0).split()
-            if len(alltrees) == 0 or len(alltrees) == 1 and gferror(alltrees[0]):
-                theerror = alltrees[0] if alltrees else "No parse trees found"
+            if args.verbose:
+                print('---+ line %s (%s), result from GF:' % (linenr, lang))
+                for tree in alltrees: 
+                    print('   |', tree)
+            if len(alltrees) == 0 or gferror("\n".join(alltrees)):
+                theerror = "\n".join(alltrees) if alltrees else "No parse trees found"
                 error(linenr, theerror)
                 testerrors += 1
             else:
-                if is_cc_only:
-                    # If is_cc_only, gfinput (and thus stdout) include gold standard
-                    gold = alltrees.pop(0)
-                    lin = alltrees.pop(0)
-                    if gold != lin:
-                        testerrors += 1
-                        error(linenr,"\nExpected linearisation\n\t%s \n\nActual linearisation\n\t%s" % (gold, lin))
-                else:
-                    allerrors = [(sum(tree not in oldtrees for _, _, oldtrees in oldresults), tree)
-                                 for tree in alltrees]
-                    besterrors, besttree = min(allerrors)
-                    if besterrors > 0:
-                        for oldlinenr, oldlang, oldtrees in oldresults:
-                            if besttree not in oldtrees:
-                                error(linenr, "Line %s (%s) is not a translation of line %s (%s)"
-                                        % (linenr, lang, oldlinenr, oldlang))
-                                testerrors += 1
-                    oldresults.append((linenr, lang, alltrees))
+                allerrors = [(sum(tree not in oldtrees for _, _, oldtrees in oldresults), tree)
+                                for tree in alltrees]
+                besterrors, besttree = min(allerrors)
+                if besterrors > 0:
+                    for oldlinenr, oldlang, oldtrees in oldresults:
+                        if besttree not in oldtrees:
+                            error(linenr, 
+                                    "The result of line %s (%s):\n    %s\n"
+                                    "is not among the results of line %s (%s):\n    %s"
+                                    % (linenr, lang, besttree, oldlinenr, oldlang, "\n    ".join(oldtrees)))
+                            testerrors += 1
+                oldresults.append((linenr, lang, alltrees))
         if not testerrors:
             print("OK!")
         print()
@@ -175,29 +203,23 @@ def runtest(testlines,is_cc_only):
 
     # finally we report a summary:
     if not totalerrors:
-        print("All %d tests passed!" % (len(alltests),))
+        print("All %s passed!" % (numbered_np(len(alltests), "test"),))
     else:
-        print("There were %d errors in %d tests!" % (totalerrors, len(alltests)))
+        print("Found %s in %s!" % (numbered_np(totalerrors, "error"), numbered_np(len(alltests), "test")))
     print()
 
 
 if __name__ == '__main__':
-    if len(sys.argv) <= 1:
-        usage()
-        exit(1)
-    if "-only-cc" in sys.argv:
-        is_cc_only = True
-    else:
-        is_cc_only = False
-    for filename in sys.argv[1:]:
-        if filename != "-only-cc":
-            try:
-                print("# Testing file:", filename)
-                with io.open(filename, encoding=ENCODING) as F:
-                    print()
-                    runtest(F,is_cc_only)
-            except IOError as err:
-                print(err)
+    parser = create_argparser()
+    args = parser.parse_args()
+    for filename in args.testfile:
+        try:
+            print("# Testing file:", filename)
+            with io.open(filename, encoding=ENCODING) as F:
                 print()
-                usage()
-                exit(1)
+                runtest(F, args)
+        except IOError as err:
+            print(err)
+            print()
+            parser.print_usage()
+            exit(1)