diff --git a/src/runtime/java/jpgf.c b/src/runtime/java/jpgf.c index c64854eb8..627727c15 100644 --- a/src/runtime/java/jpgf.c +++ b/src/runtime/java/jpgf.c @@ -422,9 +422,8 @@ jpgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr, JNIEnv *env; (*cachedJVM)->AttachCurrentThread(cachedJVM, (void**)&env, NULL); - jstring jsentence = gu2j_string(env, sentence); size_t joffset = gu2j_string_offset(sentence, *poffset); - jobject result = (*env)->CallObjectMethod(env, callback->jcallback, callback->match_methodId, lin_idx, jsentence, joffset); + jobject result = (*env)->CallObjectMethod(env, callback->jcallback, callback->match_methodId, lin_idx, joffset); if (result == NULL) return NULL; @@ -539,7 +538,7 @@ JNIEXPORT void JNICALL Java_org_grammaticalframework_pgf_Parser_addLiteralCallba callback->fin.fn = jpgf_literal_callback_fin; jclass callback_class = (*env)->GetObjectClass(env, jcallback); - callback->match_methodId = (*env)->GetMethodID(env, callback_class, "match", "(ILjava/lang/String;I)Lorg/grammaticalframework/pgf/LiteralCallback$CallbackResult;"); + callback->match_methodId = (*env)->GetMethodID(env, callback_class, "match", "(II)Lorg/grammaticalframework/pgf/LiteralCallback$CallbackResult;"); callback->predict_methodId = (*env)->GetMethodID(env, callback_class, "predict", "(ILjava/lang/String;)Ljava/util/Iterator;"); gu_pool_finally(pool, &callback->fin); diff --git a/src/runtime/java/org/grammaticalframework/pgf/LiteralCallback.java b/src/runtime/java/org/grammaticalframework/pgf/LiteralCallback.java index 106b528e6..6c6b657e5 100644 --- a/src/runtime/java/org/grammaticalframework/pgf/LiteralCallback.java +++ b/src/runtime/java/org/grammaticalframework/pgf/LiteralCallback.java @@ -3,7 +3,7 @@ package org.grammaticalframework.pgf; import java.util.Iterator; public interface LiteralCallback { - public CallbackResult match(int lin_idx, String sentence, int start_offset); + public CallbackResult match(int lin_idx, int start_offset); public Iterator predict(int lin_idx, String prefix); diff --git a/src/runtime/java/org/grammaticalframework/pgf/NercLiteralCallback.java b/src/runtime/java/org/grammaticalframework/pgf/NercLiteralCallback.java index 1d5491f64..f5375a70a 100644 --- a/src/runtime/java/org/grammaticalframework/pgf/NercLiteralCallback.java +++ b/src/runtime/java/org/grammaticalframework/pgf/NercLiteralCallback.java @@ -11,13 +11,15 @@ import java.util.Iterator; public class NercLiteralCallback implements LiteralCallback { private PGF pgf; private Concr concr; + private String sentence; - public NercLiteralCallback(PGF pgf, Concr concr) { + public NercLiteralCallback(PGF pgf, Concr concr, String sentence) { this.pgf = pgf; this.concr = concr; + this.sentence = sentence; } - public CallbackResult match(int lin_idx, String sentence, int offset) { + public CallbackResult match(int lin_idx, int offset) { StringBuilder sbuilder = new StringBuilder(); int i = 0; diff --git a/src/runtime/java/org/grammaticalframework/pgf/UnknownLiteralCallback.java b/src/runtime/java/org/grammaticalframework/pgf/UnknownLiteralCallback.java index 7d4209aad..d8e865db7 100644 --- a/src/runtime/java/org/grammaticalframework/pgf/UnknownLiteralCallback.java +++ b/src/runtime/java/org/grammaticalframework/pgf/UnknownLiteralCallback.java @@ -8,12 +8,14 @@ import java.util.Iterator; */ public class UnknownLiteralCallback implements LiteralCallback { private Concr concr; + private String sentence; - public UnknownLiteralCallback(Concr concr) { + public UnknownLiteralCallback(Concr concr, String sentence) { this.concr = concr; + this.sentence = sentence; } - public CallbackResult match(int lin_idx, String sentence, int offset) { + public CallbackResult match(int lin_idx, int offset) { if (offset < sentence.length() && !Character.isUpperCase(sentence.charAt(offset))) { int start_offset = offset; diff --git a/src/runtime/python/examples/gf_utils.py b/src/runtime/python/examples/gf_utils.py index 934235ef4..bb637cf04 100644 --- a/src/runtime/python/examples/gf_utils.py +++ b/src/runtime/python/examples/gf_utils.py @@ -122,8 +122,9 @@ def getKLinearizations(grammar, tgtlanguage, abstractParsesList, K=10): kBestTrans.append( ((parseprob,), postprocessor(linstring)) ); yield kBestTrans; -def getKBestParses(grammar, language, K, callbacks=[], serializable=False, sentid=count(1), max_length=50): +def getKBestParses(grammar, language, K, serializable=False, sentid=count(1), max_length=50): parser = grammar.languages[language].parse; + import translation_pipeline def worker(sentence): sentence = sentence.strip(); curid = sentid.next(); @@ -135,6 +136,7 @@ def getKBestParses(grammar, language, K, callbacks=[], serializable=False, senti print >>sys.stderr, '%d\t%.4f\t%s' %(curid, tend-tstart, err); return tend-tstart, kBestParses; # temporary hack to make sure parser does not get killed for very long sentences; try: + callbacks = [('PN', translation_pipeline.parseNames(grammar, args.srclang, sentence)), ('Symb', translation_pipeline.parseUnknown(grammar, args.srclang, sentence))] for parseidx, parse in enumerate( parser(sentence, heuristics=0, callbacks=callbacks) ): parseScores[parse[0]] = True; kBestParses.append( (parse[0], str(parse[1]) if serializable else parse[1]) ); @@ -160,8 +162,7 @@ def pgf_parse(args): preprocessor = lexer(); inputSet = translation_pipeline.web_lexer(grammar, args.srclang, imap(preprocessor, args.inputstream) ); outputPrinter = lambda X: "%f\t%s" %(X[0], str(X[1])); #operator.itemgetter(1); - callbacks = [('PN', translation_pipeline.parseNames(grammar, args.srclang)), ('Symb', translation_pipeline.parseUnknown(grammar, args.srclang))]; - parser = getKBestParses(grammar, args.srclang, 1, callbacks); + parser = getKBestParses(grammar, args.srclang, 1); sentidx = 0; for time, parsesBlock in imap(parser, inputSet): @@ -176,8 +177,7 @@ def pgf_kparse(args): preprocessor = lexer(); inputSet = translation_pipeline.web_lexer(grammar, args.srclang, imap(preprocessor, args.inputstream) ); outputPrinter = printJohnsonRerankerFormat; - callbacks = [('PN', translation_pipeline.parseNames(grammar, args.srclang)), ('Symb', translation_pipeline.parseUnknown(grammar, args.srclang))]; - parser = getKBestParses(grammar, args.srclang, args.K, callbacks=callbacks); + parser = getKBestParses(grammar, args.srclang, args.K); sentidx = 0; for time, parsesBlock in imap(parser, inputSet): diff --git a/src/runtime/python/examples/translation_pipeline.py b/src/runtime/python/examples/translation_pipeline.py index b081c68f0..bfd8b5c94 100644 --- a/src/runtime/python/examples/translation_pipeline.py +++ b/src/runtime/python/examples/translation_pipeline.py @@ -129,8 +129,8 @@ def clean_gfstrings(sentence): sentence = sentence.replace(entry, ' '.join(entry[1:-1].split('_')[:-1]) if entry.find('_') != -1 else ''); return ' '.join( sentence.split() ); -def parseNames(grammar, language): - def callback(lin_idx, sentence, start): +def parseNames(grammar, language, sentence): + def callback(lin_idx, start): moving_start, end, eot = start, len(sentence), True; if moving_start < end and (not sentence[moving_start].isupper()): return None; @@ -175,8 +175,8 @@ def parseNames(grammar, language): return None; return callback; -def parseUnknown(grammar, language): - def callback(lin_idx, sentence, start): +def parseUnknown(grammar, language, sentence): + def callback(lin_idx, start): moving_start, end, eot = start, len(sentence), True; isNewToken = (moving_start == 0) or (moving_start > 1 and sentence[moving_start-1].isspace()) # -- added to deal with segmentation errors like may => ma_N + Symb y if moving_start < end and (not sentence[moving_start].isupper()): @@ -271,7 +271,7 @@ def pipelineParsing(grammar, language, sentences, K=20): #buf = [sent for sent in sentences]; buf, sentences = itertools.tee(sentences, 2); sentences = itertools.imap(gf_utils.lexer(lang=language), sentences); - parser = gf_utils.getKBestParses(grammar, language, K, callbacks=[("PN", parseNames(grammar, language)), ("Symb", parseUnknown(grammar, language))]); + parser = gf_utils.getKBestParses(grammar, language, K); for sent, (time, parsesBlock) in itertools.izip(buf, itertools.imap(parser, sentences)): yield (sent, parsesBlock); diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c index 9f88a771b..c3eef6afc 100644 --- a/src/runtime/python/pypgf.c +++ b/src/runtime/python/pypgf.c @@ -1280,11 +1280,11 @@ pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr, gu_container(self, PyPgfLiteralCallback, callback); PyObject* result = - PyObject_CallFunction(callback->pycallback, "isi", - lin_idx, sentence, *poffset); + PyObject_CallFunction(callback->pycallback, "ii", + lin_idx, *poffset); if (result == NULL) return NULL; - + if (result == Py_None) { Py_DECREF(result); return NULL; diff --git a/src/ui/android/src/org/grammaticalframework/ui/android/Translator.java b/src/ui/android/src/org/grammaticalframework/ui/android/Translator.java index 0781e9397..e91e4f1b4 100644 --- a/src/ui/android/src/org/grammaticalframework/ui/android/Translator.java +++ b/src/ui/android/src/org/grammaticalframework/ui/android/Translator.java @@ -307,8 +307,8 @@ public class Translator { Concr targetLang = getTargetConcr(); Map callbacks = new HashMap(); - callbacks.put("PN", new NercLiteralCallback(mGrammarLoader.getGrammar(), sourceLang)); - callbacks.put("Symb", new UnknownLiteralCallback(sourceLang)); + callbacks.put("PN", new NercLiteralCallback(mGrammarLoader.getGrammar(), sourceLang, input)); + callbacks.put("Symb", new UnknownLiteralCallback(sourceLang, input)); int count = NUM_ALT_TRANSLATIONS; for (ExprProb ep : sourceLang.parseWithHeuristics(getGrammar().getStartCat(), input, -1, callbacks)) {