From 35b6d8be5549eb7f7dd8ef0426f5dcc187c45734 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20Ljunglo=CC=88f?= <peter.ljunglof@heatherleaf.se>
Date: Mon, 8 Jul 2019 23:02:53 +0200
Subject: [PATCH 01/10] Unit testing for RGL languages, written in Python 2+3

---
 src/unittest.py | 147 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 147 insertions(+)
 create mode 100644 src/unittest.py

diff --git a/src/unittest.py b/src/unittest.py
new file mode 100644
index 000000000..fb4d7d79d
--- /dev/null
+++ b/src/unittest.py
@@ -0,0 +1,147 @@
+"""
+Python 2+3 script for unit testing RGL grammars.
+
+Usage: python path-to-script.py path/to/testfile.gftest ...
+The script muste be located in the RGL 'src' directory to work properly.
+
+The test file should look something like this:
+
+    LangSwe: jag sover i huset
+    LangEng: I sleep in the house
+
+    LangSwe: huset verkar stort
+    Lang: PhrUtt NoPConj (UttS (UseCl (TTAnt TPres ASimul) PPos (PredVP ...
+
+This contains two tests: Every test should be separated by empty lines.
+Every line starts with a language, followed by ":" and the sentence
+(or the abstract syntax tree if the abstract grammar is specified).
+"""
+
+from __future__ import print_function
+
+import sys
+import io
+import os.path
+from subprocess import Popen, PIPE
+from glob import glob
+
+ENCODING = 'utf-8'
+
+
+def usage():
+    print("Usage: python %s path/to/testfile.gftest ..." % (sys.argv[0],))
+    print()
+    print("(Note: to work properly this script must be located in")
+    print("the RGL 'src' directory, and gf must be in the system path)")
+    print()
+
+
+def error(linenr, *args):
+    print("[Error at line %s]" % (linenr,), *args)
+
+
+def gferror(reply):
+    return (reply.startswith('The parser failed')
+            or reply.startswith('The sentence is not complete')
+            or reply.startswith('Function') and reply.endswith('is not in scope'))
+
+
+def importfile(linenr, lang):
+    scriptdir = os.path.dirname(sys.argv[0]) or '.'
+    langfiles = glob('%s/*/%s.gf' % (scriptdir, lang))
+    if not langfiles:
+        error(linenr, "Cannot find language:", lang)
+        exit(1)
+    elif len(langfiles) > 1:
+        error(linenr, "Found multiple language files for %s:" % (lang,), *langfiles)
+        exit(1)
+    return langfiles[0]
+
+
+def stripstrings(strings):
+    return [s for s0 in strings for s in [s0.strip()] if s]
+
+
+def runtest(testlines):
+    # first we build the input to the GF process:
+    gfinput = ''
+    testing = False
+    for linenr, line in enumerate(testlines, 1):
+        if ':' in line:
+            if not testing:
+                gfinput += 'ps "### %d" \n' % (linenr,)
+                testing = True
+            lang, sent = stripstrings(line.split(':', 1))
+            gfinput += 'ps "+++ %d %s" \n' % (linenr, lang)
+            langfile = importfile(linenr, lang)
+            gfinput += 'i %s \n' % (langfile,)
+            if '/abstract/' in langfile:
+                gfinput += 'pt %s \n' % (sent,)
+            else:
+                gfinput += 'p -lang=%s "%s" \n' % (lang, sent)
+        elif not line.strip():
+            testing = False
+        else:
+            error(linenr, "Ill-formatted line in test file:", line)
+            exit(1)
+
+    # then we call GF with the script, catching stdout:
+    gf = Popen('gf -run'.split(), stdin=PIPE, stdout=PIPE)
+    stdout, _stderr = gf.communicate(gfinput.encode(ENCODING))
+    stdout = stdout.decode(ENCODING)
+
+    # then we analyse the result from the GF process:
+    totalerrors = 0
+    alltests = stripstrings(stdout.split('###'))
+    for testnr, test in enumerate(alltests, 1):
+        sents = stripstrings(test.split('+++'))
+        startline = int(sents.pop(0))
+        print("Test %d (line %d..): %d examples" % (testnr, startline, len(sents)))
+        testerrors = 0
+        oldresults = []
+        for sresults in sents:
+            alltrees = stripstrings(sresults.splitlines())
+            linenr, lang = alltrees.pop(0).split()
+            if len(alltrees) == 0 or len(alltrees) == 1 and gferror(alltrees[0]):
+                theerror = alltrees[0] if alltrees else "No parse trees found"
+                error(linenr, theerror)
+                testerrors += 1
+            else:
+                allerrors = [(sum(tree not in oldtrees for _, _, oldtrees in oldresults), tree)
+                             for tree in alltrees]
+                besterrors, besttree = min(allerrors)
+                if besterrors > 0:
+                    for oldlinenr, oldlang, oldtrees in oldresults:
+                        if besttree not in oldtrees:
+                            error(linenr, "Line %s (%s) is not a translation of line %s (%s)"
+                                    % (linenr, lang, oldlinenr, oldlang))
+                            testerrors += 1
+                oldresults.append((linenr, lang, alltrees))
+        if not testerrors:
+            print("OK!")
+        print()
+        totalerrors += testerrors
+
+    # finally we report a summary:
+    if not totalerrors:
+        print("All %d tests passed!" % (len(alltests),))
+    else:
+        print("There were %d errors in %d tests!" % (totalerrors, len(alltests)))
+    print()
+
+
+if __name__ == '__main__':
+    if len(sys.argv) <= 1:
+        usage()
+        exit(1)
+    for filename in sys.argv[1:]:
+        try:
+            print("# Testing file:", filename)
+            with io.open(filename, encoding=ENCODING) as F:
+                print()
+                runtest(F)
+        except IOError as err:
+            print(err)
+            print()
+            usage()
+            exit(1)

From 9646629fb3d20a8a1c380bf055f42e24c41b94ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20Ljunglo=CC=88f?= <peter.ljunglof@heatherleaf.se>
Date: Fri, 12 Jul 2019 10:19:06 +0200
Subject: [PATCH 02/10] unittest: Move script to new directory

---
 {src => unittest}/unittest.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename {src => unittest}/unittest.py (100%)

diff --git a/src/unittest.py b/unittest/unittest.py
similarity index 100%
rename from src/unittest.py
rename to unittest/unittest.py

From 29ee6d0d70a9b93e3e2aaa3c2ee7a4ad7e4e8411 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20Ljunglo=CC=88f?= <peter.ljunglof@heatherleaf.se>
Date: Fri, 12 Jul 2019 10:25:11 +0200
Subject: [PATCH 03/10] unittest: updated the script to be able to work from
 the unittest directory

---
 unittest/unittest.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/unittest/unittest.py b/unittest/unittest.py
index fb4d7d79d..4b2c0acc3 100644
--- a/unittest/unittest.py
+++ b/unittest/unittest.py
@@ -1,8 +1,9 @@
 """
 Python 2+3 script for unit testing RGL grammars.
 
-Usage: python path-to-script.py path/to/testfile.gftest ...
-The script muste be located in the RGL 'src' directory to work properly.
+Usage: python path-to-script.py path/to/testfile.gftest (...)
+The script must be located in a sibling directory
+to the RGL 'src' directory to work properly.
 
 The test file should look something like this:
 
@@ -25,14 +26,15 @@ import os.path
 from subprocess import Popen, PIPE
 from glob import glob
 
+GRAMMARDIR = '../src'
 ENCODING = 'utf-8'
 
 
 def usage():
-    print("Usage: python %s path/to/testfile.gftest ..." % (sys.argv[0],))
+    print("Usage: python %s path/to/testfile.gftest (...)" % (sys.argv[0],))
     print()
     print("(Note: to work properly this script must be located in")
-    print("the RGL 'src' directory, and gf must be in the system path)")
+    print("the RGL 'unittest' directory, and gf must be in the system path)")
     print()
 
 
@@ -48,7 +50,7 @@ def gferror(reply):
 
 def importfile(linenr, lang):
     scriptdir = os.path.dirname(sys.argv[0]) or '.'
-    langfiles = glob('%s/*/%s.gf' % (scriptdir, lang))
+    langfiles = glob('%s/%s/*/%s.gf' % (scriptdir, GRAMMARDIR, lang))
     if not langfiles:
         error(linenr, "Cannot find language:", lang)
         exit(1)

From 7cbe4e78106398f854452524e3cea8422675fbcc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20Ljunglo=CC=88f?= <peter.ljunglof@heatherleaf.se>
Date: Fri, 12 Jul 2019 10:38:01 +0200
Subject: [PATCH 04/10] unittest: adding support for Python- or GF-style
 comments

---
 unittest/unittest.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/unittest/unittest.py b/unittest/unittest.py
index 4b2c0acc3..199e4ca2c 100644
--- a/unittest/unittest.py
+++ b/unittest/unittest.py
@@ -69,7 +69,10 @@ def runtest(testlines):
     gfinput = ''
     testing = False
     for linenr, line in enumerate(testlines, 1):
-        if ':' in line:
+        if line.startswith('#') or line.startswith('--'):
+            # a comment line: do nothing
+            pass
+        elif ':' in line:
             if not testing:
                 gfinput += 'ps "### %d" \n' % (linenr,)
                 testing = True
@@ -82,6 +85,7 @@ def runtest(testlines):
             else:
                 gfinput += 'p -lang=%s "%s" \n' % (lang, sent)
         elif not line.strip():
+            # an empty line: start a new test
             testing = False
         else:
             error(linenr, "Ill-formatted line in test file:", line)

From 26442cdbd03884072333b06a649f3efe1258fbf9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20Ljunglo=CC=88f?= <peter.ljunglof@heatherleaf.se>
Date: Fri, 12 Jul 2019 11:05:40 +0200
Subject: [PATCH 05/10] unittest: example test file

---
 unittest/unittest-example.gftest | 41 ++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 unittest/unittest-example.gftest

diff --git a/unittest/unittest-example.gftest b/unittest/unittest-example.gftest
new file mode 100644
index 000000000..7897a79e6
--- /dev/null
+++ b/unittest/unittest-example.gftest
@@ -0,0 +1,41 @@
+# This file consists of some example unittests
+# Tests are separated by blank lines
+# Comment lines start with "#" or "--"
+
+# The recommendation is to put unittest files 
+# in the directory 'src/(language)/unittest'
+# and use the file suffix '.gftest'
+
+# Basic usage: a sentence and its translation
+LangEng: I sleep in the house
+LangSwe: jag sover i huset
+
+# Comparing a sentence and a parse tree
+LangEng: the house is big
+Lang: PhrUtt NoPConj (UttS (UseCl (TTAnt TPres ASimul) PPos (PredVP (DetCN (DetQuant DefArt NumSg) (UseN house_N)) (UseComp (CompAP (PositA big_A)))))) NoVoc
+
+-- Several translations of a sentence
+LangEng: I sleep in the house
+LangSwe: jag sover i huset
+LangGer: ich schlafe im Haus
+
+-- Ambiguous sentences (the fish can be singular or plural)
+-- The test is correct if some translations match
+-- (i.e., if the set of parse trees overlap)
+LangEng: the cat eats the fish
+LangSwe: katten äter fisken
+
+# If we only specify one sentence, the script only checks if it's possible to parse
+LangEng: the cat in Paris sleeps
+
+# This test should fail, becuase they are not translations of each other
+LangEng: the house is big
+LangSwe: jag sover i huset
+
+# Here are some GF parsing errors
+# This sentence cannot be parsed:
+LangEng: this is not parseable
+# This sentence is not complete:
+LangEng: I sleep in
+# This is not an abstract syntax tree:
+Lang: THIS IS NOT A TREE

From 2d3d382a417509b06164c66fcd7e57a124edb6df Mon Sep 17 00:00:00 2001
From: "John J. Camilleri" <john@digitalgrammars.com>
Date: Mon, 5 Aug 2019 10:57:26 +0200
Subject: [PATCH 06/10] unittest: create README.md as main documentation

---
 unittest/README.md   | 28 ++++++++++++++++++++++++++++
 unittest/unittest.py | 12 +-----------
 2 files changed, 29 insertions(+), 11 deletions(-)
 create mode 100644 unittest/README.md

diff --git a/unittest/README.md b/unittest/README.md
new file mode 100644
index 000000000..fdfa5e80e
--- /dev/null
+++ b/unittest/README.md
@@ -0,0 +1,28 @@
+# Python script for unit testing RGL grammars
+
+## Usage
+
+```
+python path-to-script.py path/to/testfile.gftest (...)
+```
+
+The script must be located in a sibling directory
+to the RGL `src` directory to work properly.
+
+## Test format
+
+The test file should look something like this:
+
+```
+LangSwe: jag sover i huset
+LangEng: I sleep in the house
+
+LangSwe: huset verkar stort
+Lang: PhrUtt NoPConj (UttS (UseCl (TTAnt TPres ASimul) PPos (PredVP ...
+```
+
+This contains two tests: Every test should be separated by empty lines.
+Every line starts with a language, followed by ":" and the sentence
+(or the abstract syntax tree if the abstract grammar is specified).
+
+You can also see an example in the file [`unittest-example.gftest`](unittest-example.gftest).
diff --git a/unittest/unittest.py b/unittest/unittest.py
index 199e4ca2c..f16ef5895 100644
--- a/unittest/unittest.py
+++ b/unittest/unittest.py
@@ -5,17 +5,7 @@ Usage: python path-to-script.py path/to/testfile.gftest (...)
 The script must be located in a sibling directory
 to the RGL 'src' directory to work properly.
 
-The test file should look something like this:
-
-    LangSwe: jag sover i huset
-    LangEng: I sleep in the house
-
-    LangSwe: huset verkar stort
-    Lang: PhrUtt NoPConj (UttS (UseCl (TTAnt TPres ASimul) PPos (PredVP ...
-
-This contains two tests: Every test should be separated by empty lines.
-Every line starts with a language, followed by ":" and the sentence
-(or the abstract syntax tree if the abstract grammar is specified).
+For for information see README.md
 """
 
 from __future__ import print_function

From 4c02a6c6d12d91505c014eb865fbd3922fec91a0 Mon Sep 17 00:00:00 2001
From: Inari Listenmaa <inari.listenmaa@gmail.com>
Date: Sun, 11 Aug 2019 14:34:55 +0200
Subject: [PATCH 07/10] (unittest) Add option to only use cc, never parse

Usage like before, but add -only-cc as one of the arguments. For example:

`python3 unittest/unittest.py src/somali/unittest/vp.gftest -only-cc`

In order for it to work, the test file has to only contain test cases like this:

```
LangSom: isku BIND ma barto
Lang: PhrUtt NoPConj (UttS (UseCl (TTAnt TPres ASimul) PNeg (PredVP (UsePron youSg_Pron) (ReflVP (SlashV2a teach_V2))))) NoVoc
```

&+ needs to be written as BIND.
---
 unittest/unittest.py | 106 +++++++++++++++++++++++++++++++++----------
 1 file changed, 83 insertions(+), 23 deletions(-)

diff --git a/unittest/unittest.py b/unittest/unittest.py
index f16ef5895..8f2d8b048 100644
--- a/unittest/unittest.py
+++ b/unittest/unittest.py
@@ -53,9 +53,45 @@ def importfile(linenr, lang):
 def stripstrings(strings):
     return [s for s0 in strings for s in [s0.strip()] if s]
 
+def create_gf_input_cc_only(testlines):
+    # building the input to the GF process out of the lines of test file
+    gfinput = ''
+    testing = False
+    for linenr, line in enumerate(testlines, 1):
+        if line.startswith('#') or line.startswith('--'):
+            # a comment line: do nothing
+            pass
+        elif ':' in line:
+            if not testing:
+                gfinput += 'ps "### %d" \n' % (linenr,)
+                testing = True
+            lang, sent = stripstrings(line.split(':', 1))
+            langfile = importfile(linenr, lang)
+            if '/abstract/' not in langfile:
+                gfinput += 'ps "+++ %d %s" \n' % (linenr, lang)
+                gfinput += 'i -retain -no-pmcfg %s \n' % (langfile,)
+                gfinput += 'ps "%s" \n' % (sent,) # Gold standard to compare against
+            else:
+                gfinput += 'cc -unqual -one %s \n' % (sent,)
+        elif not line.strip():
+            # an empty line: start a new test
+            testing = False
+        else:
+            error(linenr, "Ill-formatted line in test file:", line)
+            exit(1)
 
-def runtest(testlines):
-    # first we build the input to the GF process:
+    # if cc only, gf input is this long and complicated thing
+    command = [
+        u'gf',
+        u'-run',
+        u'-retain',
+        u'-no-pmcfg',
+        u'-gfo-dir=/tmp']
+
+    return (command,gfinput)
+
+def create_gf_input(testlines):
+    # building the input to the GF process out of the lines of test file
     gfinput = ''
     testing = False
     for linenr, line in enumerate(testlines, 1):
@@ -81,14 +117,25 @@ def runtest(testlines):
             error(linenr, "Ill-formatted line in test file:", line)
             exit(1)
 
-    # then we call GF with the script, catching stdout:
-    gf = Popen('gf -run'.split(), stdin=PIPE, stdout=PIPE)
+    # If we're parsing, then command is just `gf -run'
+    return ('gf -run'.split(), gfinput)
+
+def runtest(testlines,is_cc_only):
+    # first we build the input to the GF process:
+    if is_cc_only:
+        command,gfinput = create_gf_input_cc_only(testlines)
+    else:
+        command,gfinput = create_gf_input(testlines)
+
+    # calling GF from a subprocess:
+    gf = Popen(command, stdin=PIPE, stdout=PIPE)
     stdout, _stderr = gf.communicate(gfinput.encode(ENCODING))
     stdout = stdout.decode(ENCODING)
 
     # then we analyse the result from the GF process:
     totalerrors = 0
     alltests = stripstrings(stdout.split('###'))
+
     for testnr, test in enumerate(alltests, 1):
         sents = stripstrings(test.split('+++'))
         startline = int(sents.pop(0))
@@ -103,16 +150,24 @@ def runtest(testlines):
                 error(linenr, theerror)
                 testerrors += 1
             else:
-                allerrors = [(sum(tree not in oldtrees for _, _, oldtrees in oldresults), tree)
-                             for tree in alltrees]
-                besterrors, besttree = min(allerrors)
-                if besterrors > 0:
-                    for oldlinenr, oldlang, oldtrees in oldresults:
-                        if besttree not in oldtrees:
-                            error(linenr, "Line %s (%s) is not a translation of line %s (%s)"
-                                    % (linenr, lang, oldlinenr, oldlang))
-                            testerrors += 1
-                oldresults.append((linenr, lang, alltrees))
+                if is_cc_only:
+                    # If is_cc_only, gfinput (and thus stdout) include gold standard
+                    gold = alltrees.pop(0)
+                    lin = alltrees.pop(0)
+                    if gold != lin:
+                        testerrors += 1
+                        error(linenr,"\nExpected linearisation\n\t%s \n\nActual linearisation\n\t%s" % (gold, lin))
+                else:
+                    allerrors = [(sum(tree not in oldtrees for _, _, oldtrees in oldresults), tree)
+                                 for tree in alltrees]
+                    besterrors, besttree = min(allerrors)
+                    if besterrors > 0:
+                        for oldlinenr, oldlang, oldtrees in oldresults:
+                            if besttree not in oldtrees:
+                                error(linenr, "Line %s (%s) is not a translation of line %s (%s)"
+                                        % (linenr, lang, oldlinenr, oldlang))
+                                testerrors += 1
+                    oldresults.append((linenr, lang, alltrees))
         if not testerrors:
             print("OK!")
         print()
@@ -130,14 +185,19 @@ if __name__ == '__main__':
     if len(sys.argv) <= 1:
         usage()
         exit(1)
+    if "-only-cc" in sys.argv:
+        is_cc_only = True
+    else:
+        is_cc_only = False
     for filename in sys.argv[1:]:
-        try:
-            print("# Testing file:", filename)
-            with io.open(filename, encoding=ENCODING) as F:
+        if filename != "-only-cc":
+            try:
+                print("# Testing file:", filename)
+                with io.open(filename, encoding=ENCODING) as F:
+                    print()
+                    runtest(F,is_cc_only)
+            except IOError as err:
+                print(err)
                 print()
-                runtest(F)
-        except IOError as err:
-            print(err)
-            print()
-            usage()
-            exit(1)
+                usage()
+                exit(1)

From 091e53619d281b793021d77903f6bf7d3732c429 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20Ljunglo=CC=88f?= <peter.ljunglof@heatherleaf.se>
Date: Thu, 22 Aug 2019 11:31:51 +0200
Subject: [PATCH 08/10] Updated script: better handling of arguments,
 simplified code, better reporting, etc.

Note that the flag `-only-cc` has been renamed to `--no-pmcfg`
---
 unittest/unittest.py | 238 +++++++++++++++++++++++--------------------
 1 file changed, 130 insertions(+), 108 deletions(-)

diff --git a/unittest/unittest.py b/unittest/unittest.py
index 8f2d8b048..fa01d6b10 100644
--- a/unittest/unittest.py
+++ b/unittest/unittest.py
@@ -1,11 +1,10 @@
 """
 Python 2+3 script for unit testing RGL grammars.
 
-Usage: python path-to-script.py path/to/testfile.gftest (...)
-The script must be located in a sibling directory
+This script must be located in a sibling directory
 to the RGL 'src' directory to work properly.
 
-For for information see README.md
+For for information see README.md, or run with argument '-h'
 """
 
 from __future__ import print_function
@@ -13,6 +12,7 @@ from __future__ import print_function
 import sys
 import io
 import os.path
+import argparse
 from subprocess import Popen, PIPE
 from glob import glob
 
@@ -20,25 +20,40 @@ GRAMMARDIR = '../src'
 ENCODING = 'utf-8'
 
 
-def usage():
-    print("Usage: python %s path/to/testfile.gftest (...)" % (sys.argv[0],))
-    print()
-    print("(Note: to work properly this script must be located in")
-    print("the RGL 'unittest' directory, and gf must be in the system path)")
-    print()
+def create_argparser():
+    """Creates an command line argument parser"""
+    parser = argparse.ArgumentParser(
+                description="Unit-test one (or more) RGL language(s).",
+                epilog="""
+This script must be located in a sibling directory
+to the RGL 'src' directory to work properly.
+For for information see README.md.
+""")
+    parser.add_argument('testfile', nargs='+',
+                        help="one (or more) .gfscript file(s) containing unittests")
+    parser.add_argument('-v', '--verbose', action='store_true',
+                        help="be more verbose")
+    parser.add_argument('--no-pmcfg', action='store_true',
+                        help="don't calculate the PMCFG (faster for complex grammars); "
+                        "for this to work, every test case needs a parse tree")
+    return parser
 
 
 def error(linenr, *args):
+    """Prints an error to the terminal"""
     print("[Error at line %s]" % (linenr,), *args)
 
 
 def gferror(reply):
+    """Determines if a GF reply is an error"""
     return (reply.startswith('The parser failed')
             or reply.startswith('The sentence is not complete')
+            or reply.startswith('Warning:')
             or reply.startswith('Function') and reply.endswith('is not in scope'))
 
 
 def importfile(linenr, lang):
+    """Calculate the path to the GF file to import"""
     scriptdir = os.path.dirname(sys.argv[0]) or '.'
     langfiles = glob('%s/%s/*/%s.gf' % (scriptdir, GRAMMARDIR, lang))
     if not langfiles:
@@ -51,83 +66,98 @@ def importfile(linenr, lang):
 
 
 def stripstrings(strings):
+    """Strip leading/trailing blanks of every string in the given list"""
     return [s for s0 in strings for s in [s0.strip()] if s]
 
-def create_gf_input_cc_only(testlines):
-    # building the input to the GF process out of the lines of test file
-    gfinput = ''
-    testing = False
+
+def numbered_np(num, noun, plural=None):
+    """Crude way of inflecting nouns for number"""
+    return "%d %s" % (num, noun if num == 1 else (plural or noun+'s'))
+
+
+def collect_testcases(testlines):
+    """Parse the test file and return a list of test cases"""
+    tests = [[]]
     for linenr, line in enumerate(testlines, 1):
         if line.startswith('#') or line.startswith('--'):
             # a comment line: do nothing
             pass
-        elif ':' in line:
-            if not testing:
-                gfinput += 'ps "### %d" \n' % (linenr,)
-                testing = True
-            lang, sent = stripstrings(line.split(':', 1))
-            langfile = importfile(linenr, lang)
-            if '/abstract/' not in langfile:
-                gfinput += 'ps "+++ %d %s" \n' % (linenr, lang)
-                gfinput += 'i -retain -no-pmcfg %s \n' % (langfile,)
-                gfinput += 'ps "%s" \n' % (sent,) # Gold standard to compare against
-            else:
-                gfinput += 'cc -unqual -one %s \n' % (sent,)
         elif not line.strip():
             # an empty line: start a new test
-            testing = False
+            if tests[-1]:
+                tests.append([])
+        elif ':' in line:
+            lang, sentence = stripstrings(line.split(':', 1))
+            langfile = importfile(linenr, lang)
+            is_tree = '/abstract/' in langfile
+            tests[-1].append((is_tree, linenr, lang, langfile, sentence))
         else:
             error(linenr, "Ill-formatted line in test file:", line)
             exit(1)
+    return tests
 
-    # if cc only, gf input is this long and complicated thing
-    command = [
-        u'gf',
-        u'-run',
-        u'-retain',
-        u'-no-pmcfg',
-        u'-gfo-dir=/tmp']
 
-    return (command,gfinput)
-
-def create_gf_input(testlines):
-    # building the input to the GF process out of the lines of test file
-    gfinput = ''
-    testing = False
-    for linenr, line in enumerate(testlines, 1):
-        if line.startswith('#') or line.startswith('--'):
-            # a comment line: do nothing
-            pass
-        elif ':' in line:
-            if not testing:
-                gfinput += 'ps "### %d" \n' % (linenr,)
-                testing = True
-            lang, sent = stripstrings(line.split(':', 1))
-            gfinput += 'ps "+++ %d %s" \n' % (linenr, lang)
-            langfile = importfile(linenr, lang)
-            gfinput += 'i %s \n' % (langfile,)
-            if '/abstract/' in langfile:
-                gfinput += 'pt %s \n' % (sent,)
-            else:
-                gfinput += 'p -lang=%s "%s" \n' % (lang, sent)
-        elif not line.strip():
-            # an empty line: start a new test
-            testing = False
-        else:
-            error(linenr, "Ill-formatted line in test file:", line)
+def create_gf_input(testcases, args):
+    """Create a GF test script from the collected test cases"""
+    gfscript = []
+    for test in testcases:
+        test_linenr = test[0][1]
+        # check if the test contains an abstract tree:
+        abs_linenr = abs_tree = None
+        test.sort(key=lambda x:x[0], reverse=True)
+        if test[0][0]:
+            _, abs_linenr, abs_lang, _, abs_tree = test.pop(0)
+        # the test should not consist of only a tree:
+        if not test:
+            error(test_linenr, "Empty test case")
             exit(1)
+        # there should not be more than one abstree in the test:
+        if test[0][0]:
+            error(test[0][1], "Multiple abstract trees in test case")
+            exit(1)
+        # if there is an abstree, we use it for linearisation:
+        if abs_tree:
+            for _, linenr, lang, langfile, sentence in test:
+                gfscript += ['ps "### %d"' % (test_linenr,),
+                            'ps "+++ %d %s"' % (abs_linenr, abs_lang)]
+                if not args.no_pmcfg:
+                    gfscript += ['i %s' % (langfile,),
+                                'l -lang=%s %s' % (lang, abs_tree)]
+                else:
+                    gfscript += ['i -retain -no-pmcfg %s' % (langfile,),
+                                'cc -unqual -one %s' % (abs_tree,)]
+                gfscript += ['ps "+++ %d %s"' % (linenr, lang),
+                            'ps "%s"' % (sentence,)]
+        # if there is no abstree, we have to use parsing;
+        # in this case, the flag 'no_pmfcg' is of no use:
+        elif args.no_pmcfg:
+            error(test_linenr, "The flag '--no-pmcfg' requires that all test cases contain an abstract tree")
+            exit(1)
+        else:
+            gfscript += ['ps "### %d"' % (test_linenr,)]
+            for _, linenr, lang, langfile, sentence in test:
+                gfscript += ['ps "+++ %d %s"' % (linenr, lang),
+                            'i %s' % (langfile,),
+                            'p -lang=%s "%s"' % (lang, sentence)]
+    return gfscript
 
-    # If we're parsing, then command is just `gf -run'
-    return ('gf -run'.split(), gfinput)
 
-def runtest(testlines,is_cc_only):
+def runtest(testlines, args):
+    """Read the test cases, run GF and report the results"""
+
     # first we build the input to the GF process:
-    if is_cc_only:
-        command,gfinput = create_gf_input_cc_only(testlines)
-    else:
-        command,gfinput = create_gf_input(testlines)
+    testcases = collect_testcases(testlines)
+    gfscript = create_gf_input(testcases, args)
+
+    if args.verbose:
+        print("---+ GF testing script:")
+        for line in gfscript:
+            print('   |', line)
+        print()
 
     # calling GF from a subprocess:
+    command = 'gf -run'.split()
+    gfinput = '\n'.join(gfscript) + '\n'
     gf = Popen(command, stdin=PIPE, stdout=PIPE)
     stdout, _stderr = gf.communicate(gfinput.encode(ENCODING))
     stdout = stdout.decode(ENCODING)
@@ -139,35 +169,33 @@ def runtest(testlines,is_cc_only):
     for testnr, test in enumerate(alltests, 1):
         sents = stripstrings(test.split('+++'))
         startline = int(sents.pop(0))
-        print("Test %d (line %d..): %d examples" % (testnr, startline, len(sents)))
+        print("Test %d (line %d..): %s" % (testnr, startline, numbered_np(len(sents), "example")))
         testerrors = 0
         oldresults = []
         for sresults in sents:
             alltrees = stripstrings(sresults.splitlines())
             linenr, lang = alltrees.pop(0).split()
-            if len(alltrees) == 0 or len(alltrees) == 1 and gferror(alltrees[0]):
-                theerror = alltrees[0] if alltrees else "No parse trees found"
+            if args.verbose:
+                print('---+ line %s (%s), result from GF:' % (linenr, lang))
+                for tree in alltrees: 
+                    print('   |', tree)
+            if len(alltrees) == 0 or gferror("\n".join(alltrees)):
+                theerror = "\n".join(alltrees) if alltrees else "No parse trees found"
                 error(linenr, theerror)
                 testerrors += 1
             else:
-                if is_cc_only:
-                    # If is_cc_only, gfinput (and thus stdout) include gold standard
-                    gold = alltrees.pop(0)
-                    lin = alltrees.pop(0)
-                    if gold != lin:
-                        testerrors += 1
-                        error(linenr,"\nExpected linearisation\n\t%s \n\nActual linearisation\n\t%s" % (gold, lin))
-                else:
-                    allerrors = [(sum(tree not in oldtrees for _, _, oldtrees in oldresults), tree)
-                                 for tree in alltrees]
-                    besterrors, besttree = min(allerrors)
-                    if besterrors > 0:
-                        for oldlinenr, oldlang, oldtrees in oldresults:
-                            if besttree not in oldtrees:
-                                error(linenr, "Line %s (%s) is not a translation of line %s (%s)"
-                                        % (linenr, lang, oldlinenr, oldlang))
-                                testerrors += 1
-                    oldresults.append((linenr, lang, alltrees))
+                allerrors = [(sum(tree not in oldtrees for _, _, oldtrees in oldresults), tree)
+                                for tree in alltrees]
+                besterrors, besttree = min(allerrors)
+                if besterrors > 0:
+                    for oldlinenr, oldlang, oldtrees in oldresults:
+                        if besttree not in oldtrees:
+                            error(linenr, 
+                                    "The result of line %s (%s):\n    %s\n"
+                                    "is not among the results of line %s (%s):\n    %s"
+                                    % (linenr, lang, besttree, oldlinenr, oldlang, "\n    ".join(oldtrees)))
+                            testerrors += 1
+                oldresults.append((linenr, lang, alltrees))
         if not testerrors:
             print("OK!")
         print()
@@ -175,29 +203,23 @@ def runtest(testlines,is_cc_only):
 
     # finally we report a summary:
     if not totalerrors:
-        print("All %d tests passed!" % (len(alltests),))
+        print("All %s passed!" % (numbered_np(len(alltests), "test"),))
     else:
-        print("There were %d errors in %d tests!" % (totalerrors, len(alltests)))
+        print("Found %s in %s!" % (numbered_np(totalerrors, "error"), numbered_np(len(alltests), "test")))
     print()
 
 
 if __name__ == '__main__':
-    if len(sys.argv) <= 1:
-        usage()
-        exit(1)
-    if "-only-cc" in sys.argv:
-        is_cc_only = True
-    else:
-        is_cc_only = False
-    for filename in sys.argv[1:]:
-        if filename != "-only-cc":
-            try:
-                print("# Testing file:", filename)
-                with io.open(filename, encoding=ENCODING) as F:
-                    print()
-                    runtest(F,is_cc_only)
-            except IOError as err:
-                print(err)
+    parser = create_argparser()
+    args = parser.parse_args()
+    for filename in args.testfile:
+        try:
+            print("# Testing file:", filename)
+            with io.open(filename, encoding=ENCODING) as F:
                 print()
-                usage()
-                exit(1)
+                runtest(F, args)
+        except IOError as err:
+            print(err)
+            print()
+            parser.print_usage()
+            exit(1)

From 7e1d0d87ea2112f66f63f560f750e8b566d13f82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20Ljunglo=CC=88f?= <peter.ljunglof@heatherleaf.se>
Date: Thu, 22 Aug 2019 11:32:35 +0200
Subject: [PATCH 09/10] Updated README

---
 unittest/README.md | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/unittest/README.md b/unittest/README.md
index fdfa5e80e..430694489 100644
--- a/unittest/README.md
+++ b/unittest/README.md
@@ -3,7 +3,7 @@
 ## Usage
 
 ```
-python path-to-script.py path/to/testfile.gftest (...)
+python path/to/unittest.py [-h] [-v] [--no-pmcfg] path/to/testfile.gftest (...)
 ```
 
 The script must be located in a sibling directory
@@ -26,3 +26,10 @@ Every line starts with a language, followed by ":" and the sentence
 (or the abstract syntax tree if the abstract grammar is specified).
 
 You can also see an example in the file [`unittest-example.gftest`](unittest-example.gftest).
+
+## No PMCFG
+
+If your grammar is complex and takes long time to compile, you can try 
+the option `--no-pmcfg`, which tells GF to not build the parsing grammar.
+
+Note however that in this case, every test case needs to contain a parse tree.

From 01f6957bad7cbc96e37a8f784b7487ad8ab8cf31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20Ljunglo=CC=88f?= <peter.ljunglof@heatherleaf.se>
Date: Thu, 22 Aug 2019 11:33:00 +0200
Subject: [PATCH 10/10] Updated example test cases

---
 unittest/unittest-example.gftest | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/unittest/unittest-example.gftest b/unittest/unittest-example.gftest
index 7897a79e6..8a8553510 100644
--- a/unittest/unittest-example.gftest
+++ b/unittest/unittest-example.gftest
@@ -28,14 +28,25 @@ LangSwe: katten äter fisken
 # If we only specify one sentence, the script only checks if it's possible to parse
 LangEng: the cat in Paris sleeps
 
-# This test should fail, becuase they are not translations of each other
+# This test should fail, because they are not translations of each other
 LangEng: the house is big
 LangSwe: jag sover i huset
 
-# Here are some GF parsing errors
+# This test should fail, because the tree does not have this linearisation
+LangEng: the house is small
+Lang: PhrUtt NoPConj (UttS (UseCl (TTAnt TPres ASimul) PPos (PredVP (DetCN (DetQuant DefArt NumSg) (UseN house_N)) (UseComp (CompAP (PositA big_A)))))) NoVoc
+
+
+# And finally some GF parsing errors
+
 # This sentence cannot be parsed:
 LangEng: this is not parseable
+LangSwe: jag sover i huset
+
 # This sentence is not complete:
 LangEng: I sleep in
+LangSwe: jag sover i huset
+
 # This is not an abstract syntax tree:
 Lang: THIS IS NOT A TREE
+LangSwe: jag sover i huset