mirror of
https://github.com/GrammaticalFramework/gf-rgl.git
synced 2026-05-27 08:58:55 -06:00
(unittest) Add option to only use cc, never parse
Usage like before, but add -only-cc as one of the arguments. For example: `python3 unittest/unittest.py src/somali/unittest/vp.gftest -only-cc` In order for it to work, the test file has to only contain test cases like this: ``` LangSom: isku BIND ma barto Lang: PhrUtt NoPConj (UttS (UseCl (TTAnt TPres ASimul) PNeg (PredVP (UsePron youSg_Pron) (ReflVP (SlashV2a teach_V2))))) NoVoc ``` &+ needs to be written as BIND.
This commit is contained in:
@@ -53,9 +53,45 @@ def importfile(linenr, lang):
|
|||||||
def stripstrings(strings):
|
def stripstrings(strings):
|
||||||
return [s for s0 in strings for s in [s0.strip()] if s]
|
return [s for s0 in strings for s in [s0.strip()] if s]
|
||||||
|
|
||||||
|
def create_gf_input_cc_only(testlines):
|
||||||
|
# building the input to the GF process out of the lines of test file
|
||||||
|
gfinput = ''
|
||||||
|
testing = False
|
||||||
|
for linenr, line in enumerate(testlines, 1):
|
||||||
|
if line.startswith('#') or line.startswith('--'):
|
||||||
|
# a comment line: do nothing
|
||||||
|
pass
|
||||||
|
elif ':' in line:
|
||||||
|
if not testing:
|
||||||
|
gfinput += 'ps "### %d" \n' % (linenr,)
|
||||||
|
testing = True
|
||||||
|
lang, sent = stripstrings(line.split(':', 1))
|
||||||
|
langfile = importfile(linenr, lang)
|
||||||
|
if '/abstract/' not in langfile:
|
||||||
|
gfinput += 'ps "+++ %d %s" \n' % (linenr, lang)
|
||||||
|
gfinput += 'i -retain -no-pmcfg %s \n' % (langfile,)
|
||||||
|
gfinput += 'ps "%s" \n' % (sent,) # Gold standard to compare against
|
||||||
|
else:
|
||||||
|
gfinput += 'cc -unqual -one %s \n' % (sent,)
|
||||||
|
elif not line.strip():
|
||||||
|
# an empty line: start a new test
|
||||||
|
testing = False
|
||||||
|
else:
|
||||||
|
error(linenr, "Ill-formatted line in test file:", line)
|
||||||
|
exit(1)
|
||||||
|
|
||||||
def runtest(testlines):
|
# if cc only, gf input is this long and complicated thing
|
||||||
# first we build the input to the GF process:
|
command = [
|
||||||
|
u'gf',
|
||||||
|
u'-run',
|
||||||
|
u'-retain',
|
||||||
|
u'-no-pmcfg',
|
||||||
|
u'-gfo-dir=/tmp']
|
||||||
|
|
||||||
|
return (command,gfinput)
|
||||||
|
|
||||||
|
def create_gf_input(testlines):
|
||||||
|
# building the input to the GF process out of the lines of test file
|
||||||
gfinput = ''
|
gfinput = ''
|
||||||
testing = False
|
testing = False
|
||||||
for linenr, line in enumerate(testlines, 1):
|
for linenr, line in enumerate(testlines, 1):
|
||||||
@@ -81,14 +117,25 @@ def runtest(testlines):
|
|||||||
error(linenr, "Ill-formatted line in test file:", line)
|
error(linenr, "Ill-formatted line in test file:", line)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
# then we call GF with the script, catching stdout:
|
# If we're parsing, then command is just `gf -run'
|
||||||
gf = Popen('gf -run'.split(), stdin=PIPE, stdout=PIPE)
|
return ('gf -run'.split(), gfinput)
|
||||||
|
|
||||||
|
def runtest(testlines,is_cc_only):
|
||||||
|
# first we build the input to the GF process:
|
||||||
|
if is_cc_only:
|
||||||
|
command,gfinput = create_gf_input_cc_only(testlines)
|
||||||
|
else:
|
||||||
|
command,gfinput = create_gf_input(testlines)
|
||||||
|
|
||||||
|
# calling GF from a subprocess:
|
||||||
|
gf = Popen(command, stdin=PIPE, stdout=PIPE)
|
||||||
stdout, _stderr = gf.communicate(gfinput.encode(ENCODING))
|
stdout, _stderr = gf.communicate(gfinput.encode(ENCODING))
|
||||||
stdout = stdout.decode(ENCODING)
|
stdout = stdout.decode(ENCODING)
|
||||||
|
|
||||||
# then we analyse the result from the GF process:
|
# then we analyse the result from the GF process:
|
||||||
totalerrors = 0
|
totalerrors = 0
|
||||||
alltests = stripstrings(stdout.split('###'))
|
alltests = stripstrings(stdout.split('###'))
|
||||||
|
|
||||||
for testnr, test in enumerate(alltests, 1):
|
for testnr, test in enumerate(alltests, 1):
|
||||||
sents = stripstrings(test.split('+++'))
|
sents = stripstrings(test.split('+++'))
|
||||||
startline = int(sents.pop(0))
|
startline = int(sents.pop(0))
|
||||||
@@ -102,6 +149,14 @@ def runtest(testlines):
|
|||||||
theerror = alltrees[0] if alltrees else "No parse trees found"
|
theerror = alltrees[0] if alltrees else "No parse trees found"
|
||||||
error(linenr, theerror)
|
error(linenr, theerror)
|
||||||
testerrors += 1
|
testerrors += 1
|
||||||
|
else:
|
||||||
|
if is_cc_only:
|
||||||
|
# If is_cc_only, gfinput (and thus stdout) include gold standard
|
||||||
|
gold = alltrees.pop(0)
|
||||||
|
lin = alltrees.pop(0)
|
||||||
|
if gold != lin:
|
||||||
|
testerrors += 1
|
||||||
|
error(linenr,"\nExpected linearisation\n\t%s \n\nActual linearisation\n\t%s" % (gold, lin))
|
||||||
else:
|
else:
|
||||||
allerrors = [(sum(tree not in oldtrees for _, _, oldtrees in oldresults), tree)
|
allerrors = [(sum(tree not in oldtrees for _, _, oldtrees in oldresults), tree)
|
||||||
for tree in alltrees]
|
for tree in alltrees]
|
||||||
@@ -130,12 +185,17 @@ if __name__ == '__main__':
|
|||||||
if len(sys.argv) <= 1:
|
if len(sys.argv) <= 1:
|
||||||
usage()
|
usage()
|
||||||
exit(1)
|
exit(1)
|
||||||
|
if "-only-cc" in sys.argv:
|
||||||
|
is_cc_only = True
|
||||||
|
else:
|
||||||
|
is_cc_only = False
|
||||||
for filename in sys.argv[1:]:
|
for filename in sys.argv[1:]:
|
||||||
|
if filename != "-only-cc":
|
||||||
try:
|
try:
|
||||||
print("# Testing file:", filename)
|
print("# Testing file:", filename)
|
||||||
with io.open(filename, encoding=ENCODING) as F:
|
with io.open(filename, encoding=ENCODING) as F:
|
||||||
print()
|
print()
|
||||||
runtest(F)
|
runtest(F,is_cc_only)
|
||||||
except IOError as err:
|
except IOError as err:
|
||||||
print(err)
|
print(err)
|
||||||
print()
|
print()
|
||||||
|
|||||||
Reference in New Issue
Block a user