forked from GitHub/gf-core
change the API for literals in Java and Python. The input sentence is no longer a parameter to the callbacks.
This commit is contained in:
@@ -422,9 +422,8 @@ jpgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
|
|||||||
JNIEnv *env;
|
JNIEnv *env;
|
||||||
(*cachedJVM)->AttachCurrentThread(cachedJVM, (void**)&env, NULL);
|
(*cachedJVM)->AttachCurrentThread(cachedJVM, (void**)&env, NULL);
|
||||||
|
|
||||||
jstring jsentence = gu2j_string(env, sentence);
|
|
||||||
size_t joffset = gu2j_string_offset(sentence, *poffset);
|
size_t joffset = gu2j_string_offset(sentence, *poffset);
|
||||||
jobject result = (*env)->CallObjectMethod(env, callback->jcallback, callback->match_methodId, lin_idx, jsentence, joffset);
|
jobject result = (*env)->CallObjectMethod(env, callback->jcallback, callback->match_methodId, lin_idx, joffset);
|
||||||
if (result == NULL)
|
if (result == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
@@ -539,7 +538,7 @@ JNIEXPORT void JNICALL Java_org_grammaticalframework_pgf_Parser_addLiteralCallba
|
|||||||
callback->fin.fn = jpgf_literal_callback_fin;
|
callback->fin.fn = jpgf_literal_callback_fin;
|
||||||
|
|
||||||
jclass callback_class = (*env)->GetObjectClass(env, jcallback);
|
jclass callback_class = (*env)->GetObjectClass(env, jcallback);
|
||||||
callback->match_methodId = (*env)->GetMethodID(env, callback_class, "match", "(ILjava/lang/String;I)Lorg/grammaticalframework/pgf/LiteralCallback$CallbackResult;");
|
callback->match_methodId = (*env)->GetMethodID(env, callback_class, "match", "(II)Lorg/grammaticalframework/pgf/LiteralCallback$CallbackResult;");
|
||||||
callback->predict_methodId = (*env)->GetMethodID(env, callback_class, "predict", "(ILjava/lang/String;)Ljava/util/Iterator;");
|
callback->predict_methodId = (*env)->GetMethodID(env, callback_class, "predict", "(ILjava/lang/String;)Ljava/util/Iterator;");
|
||||||
|
|
||||||
gu_pool_finally(pool, &callback->fin);
|
gu_pool_finally(pool, &callback->fin);
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ package org.grammaticalframework.pgf;
|
|||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
public interface LiteralCallback {
|
public interface LiteralCallback {
|
||||||
public CallbackResult match(int lin_idx, String sentence, int start_offset);
|
public CallbackResult match(int lin_idx, int start_offset);
|
||||||
|
|
||||||
public Iterator<TokenProb> predict(int lin_idx, String prefix);
|
public Iterator<TokenProb> predict(int lin_idx, String prefix);
|
||||||
|
|
||||||
|
|||||||
@@ -11,13 +11,15 @@ import java.util.Iterator;
|
|||||||
public class NercLiteralCallback implements LiteralCallback {
|
public class NercLiteralCallback implements LiteralCallback {
|
||||||
private PGF pgf;
|
private PGF pgf;
|
||||||
private Concr concr;
|
private Concr concr;
|
||||||
|
private String sentence;
|
||||||
|
|
||||||
public NercLiteralCallback(PGF pgf, Concr concr) {
|
public NercLiteralCallback(PGF pgf, Concr concr, String sentence) {
|
||||||
this.pgf = pgf;
|
this.pgf = pgf;
|
||||||
this.concr = concr;
|
this.concr = concr;
|
||||||
|
this.sentence = sentence;
|
||||||
}
|
}
|
||||||
|
|
||||||
public CallbackResult match(int lin_idx, String sentence, int offset) {
|
public CallbackResult match(int lin_idx, int offset) {
|
||||||
StringBuilder sbuilder = new StringBuilder();
|
StringBuilder sbuilder = new StringBuilder();
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|||||||
@@ -8,12 +8,14 @@ import java.util.Iterator;
|
|||||||
*/
|
*/
|
||||||
public class UnknownLiteralCallback implements LiteralCallback {
|
public class UnknownLiteralCallback implements LiteralCallback {
|
||||||
private Concr concr;
|
private Concr concr;
|
||||||
|
private String sentence;
|
||||||
|
|
||||||
public UnknownLiteralCallback(Concr concr) {
|
public UnknownLiteralCallback(Concr concr, String sentence) {
|
||||||
this.concr = concr;
|
this.concr = concr;
|
||||||
|
this.sentence = sentence;
|
||||||
}
|
}
|
||||||
|
|
||||||
public CallbackResult match(int lin_idx, String sentence, int offset) {
|
public CallbackResult match(int lin_idx, int offset) {
|
||||||
if (offset < sentence.length() &&
|
if (offset < sentence.length() &&
|
||||||
!Character.isUpperCase(sentence.charAt(offset))) {
|
!Character.isUpperCase(sentence.charAt(offset))) {
|
||||||
int start_offset = offset;
|
int start_offset = offset;
|
||||||
|
|||||||
@@ -122,8 +122,9 @@ def getKLinearizations(grammar, tgtlanguage, abstractParsesList, K=10):
|
|||||||
kBestTrans.append( ((parseprob,), postprocessor(linstring)) );
|
kBestTrans.append( ((parseprob,), postprocessor(linstring)) );
|
||||||
yield kBestTrans;
|
yield kBestTrans;
|
||||||
|
|
||||||
def getKBestParses(grammar, language, K, callbacks=[], serializable=False, sentid=count(1), max_length=50):
|
def getKBestParses(grammar, language, K, serializable=False, sentid=count(1), max_length=50):
|
||||||
parser = grammar.languages[language].parse;
|
parser = grammar.languages[language].parse;
|
||||||
|
import translation_pipeline
|
||||||
def worker(sentence):
|
def worker(sentence):
|
||||||
sentence = sentence.strip();
|
sentence = sentence.strip();
|
||||||
curid = sentid.next();
|
curid = sentid.next();
|
||||||
@@ -135,6 +136,7 @@ def getKBestParses(grammar, language, K, callbacks=[], serializable=False, senti
|
|||||||
print >>sys.stderr, '%d\t%.4f\t%s' %(curid, tend-tstart, err);
|
print >>sys.stderr, '%d\t%.4f\t%s' %(curid, tend-tstart, err);
|
||||||
return tend-tstart, kBestParses; # temporary hack to make sure parser does not get killed for very long sentences;
|
return tend-tstart, kBestParses; # temporary hack to make sure parser does not get killed for very long sentences;
|
||||||
try:
|
try:
|
||||||
|
callbacks = [('PN', translation_pipeline.parseNames(grammar, args.srclang, sentence)), ('Symb', translation_pipeline.parseUnknown(grammar, args.srclang, sentence))]
|
||||||
for parseidx, parse in enumerate( parser(sentence, heuristics=0, callbacks=callbacks) ):
|
for parseidx, parse in enumerate( parser(sentence, heuristics=0, callbacks=callbacks) ):
|
||||||
parseScores[parse[0]] = True;
|
parseScores[parse[0]] = True;
|
||||||
kBestParses.append( (parse[0], str(parse[1]) if serializable else parse[1]) );
|
kBestParses.append( (parse[0], str(parse[1]) if serializable else parse[1]) );
|
||||||
@@ -160,8 +162,7 @@ def pgf_parse(args):
|
|||||||
preprocessor = lexer();
|
preprocessor = lexer();
|
||||||
inputSet = translation_pipeline.web_lexer(grammar, args.srclang, imap(preprocessor, args.inputstream) );
|
inputSet = translation_pipeline.web_lexer(grammar, args.srclang, imap(preprocessor, args.inputstream) );
|
||||||
outputPrinter = lambda X: "%f\t%s" %(X[0], str(X[1])); #operator.itemgetter(1);
|
outputPrinter = lambda X: "%f\t%s" %(X[0], str(X[1])); #operator.itemgetter(1);
|
||||||
callbacks = [('PN', translation_pipeline.parseNames(grammar, args.srclang)), ('Symb', translation_pipeline.parseUnknown(grammar, args.srclang))];
|
parser = getKBestParses(grammar, args.srclang, 1);
|
||||||
parser = getKBestParses(grammar, args.srclang, 1, callbacks);
|
|
||||||
|
|
||||||
sentidx = 0;
|
sentidx = 0;
|
||||||
for time, parsesBlock in imap(parser, inputSet):
|
for time, parsesBlock in imap(parser, inputSet):
|
||||||
@@ -176,8 +177,7 @@ def pgf_kparse(args):
|
|||||||
preprocessor = lexer();
|
preprocessor = lexer();
|
||||||
inputSet = translation_pipeline.web_lexer(grammar, args.srclang, imap(preprocessor, args.inputstream) );
|
inputSet = translation_pipeline.web_lexer(grammar, args.srclang, imap(preprocessor, args.inputstream) );
|
||||||
outputPrinter = printJohnsonRerankerFormat;
|
outputPrinter = printJohnsonRerankerFormat;
|
||||||
callbacks = [('PN', translation_pipeline.parseNames(grammar, args.srclang)), ('Symb', translation_pipeline.parseUnknown(grammar, args.srclang))];
|
parser = getKBestParses(grammar, args.srclang, args.K);
|
||||||
parser = getKBestParses(grammar, args.srclang, args.K, callbacks=callbacks);
|
|
||||||
|
|
||||||
sentidx = 0;
|
sentidx = 0;
|
||||||
for time, parsesBlock in imap(parser, inputSet):
|
for time, parsesBlock in imap(parser, inputSet):
|
||||||
|
|||||||
@@ -129,8 +129,8 @@ def clean_gfstrings(sentence):
|
|||||||
sentence = sentence.replace(entry, ' '.join(entry[1:-1].split('_')[:-1]) if entry.find('_') != -1 else '');
|
sentence = sentence.replace(entry, ' '.join(entry[1:-1].split('_')[:-1]) if entry.find('_') != -1 else '');
|
||||||
return ' '.join( sentence.split() );
|
return ' '.join( sentence.split() );
|
||||||
|
|
||||||
def parseNames(grammar, language):
|
def parseNames(grammar, language, sentence):
|
||||||
def callback(lin_idx, sentence, start):
|
def callback(lin_idx, start):
|
||||||
moving_start, end, eot = start, len(sentence), True;
|
moving_start, end, eot = start, len(sentence), True;
|
||||||
if moving_start < end and (not sentence[moving_start].isupper()):
|
if moving_start < end and (not sentence[moving_start].isupper()):
|
||||||
return None;
|
return None;
|
||||||
@@ -175,8 +175,8 @@ def parseNames(grammar, language):
|
|||||||
return None;
|
return None;
|
||||||
return callback;
|
return callback;
|
||||||
|
|
||||||
def parseUnknown(grammar, language):
|
def parseUnknown(grammar, language, sentence):
|
||||||
def callback(lin_idx, sentence, start):
|
def callback(lin_idx, start):
|
||||||
moving_start, end, eot = start, len(sentence), True;
|
moving_start, end, eot = start, len(sentence), True;
|
||||||
isNewToken = (moving_start == 0) or (moving_start > 1 and sentence[moving_start-1].isspace()) # -- added to deal with segmentation errors like may => ma_N + Symb y
|
isNewToken = (moving_start == 0) or (moving_start > 1 and sentence[moving_start-1].isspace()) # -- added to deal with segmentation errors like may => ma_N + Symb y
|
||||||
if moving_start < end and (not sentence[moving_start].isupper()):
|
if moving_start < end and (not sentence[moving_start].isupper()):
|
||||||
@@ -271,7 +271,7 @@ def pipelineParsing(grammar, language, sentences, K=20):
|
|||||||
#buf = [sent for sent in sentences];
|
#buf = [sent for sent in sentences];
|
||||||
buf, sentences = itertools.tee(sentences, 2);
|
buf, sentences = itertools.tee(sentences, 2);
|
||||||
sentences = itertools.imap(gf_utils.lexer(lang=language), sentences);
|
sentences = itertools.imap(gf_utils.lexer(lang=language), sentences);
|
||||||
parser = gf_utils.getKBestParses(grammar, language, K, callbacks=[("PN", parseNames(grammar, language)), ("Symb", parseUnknown(grammar, language))]);
|
parser = gf_utils.getKBestParses(grammar, language, K);
|
||||||
for sent, (time, parsesBlock) in itertools.izip(buf, itertools.imap(parser, sentences)):
|
for sent, (time, parsesBlock) in itertools.izip(buf, itertools.imap(parser, sentences)):
|
||||||
yield (sent, parsesBlock);
|
yield (sent, parsesBlock);
|
||||||
|
|
||||||
|
|||||||
@@ -1280,11 +1280,11 @@ pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
|
|||||||
gu_container(self, PyPgfLiteralCallback, callback);
|
gu_container(self, PyPgfLiteralCallback, callback);
|
||||||
|
|
||||||
PyObject* result =
|
PyObject* result =
|
||||||
PyObject_CallFunction(callback->pycallback, "isi",
|
PyObject_CallFunction(callback->pycallback, "ii",
|
||||||
lin_idx, sentence, *poffset);
|
lin_idx, *poffset);
|
||||||
if (result == NULL)
|
if (result == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (result == Py_None) {
|
if (result == Py_None) {
|
||||||
Py_DECREF(result);
|
Py_DECREF(result);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|||||||
@@ -307,8 +307,8 @@ public class Translator {
|
|||||||
Concr targetLang = getTargetConcr();
|
Concr targetLang = getTargetConcr();
|
||||||
|
|
||||||
Map<String,LiteralCallback> callbacks = new HashMap<String,LiteralCallback>();
|
Map<String,LiteralCallback> callbacks = new HashMap<String,LiteralCallback>();
|
||||||
callbacks.put("PN", new NercLiteralCallback(mGrammarLoader.getGrammar(), sourceLang));
|
callbacks.put("PN", new NercLiteralCallback(mGrammarLoader.getGrammar(), sourceLang, input));
|
||||||
callbacks.put("Symb", new UnknownLiteralCallback(sourceLang));
|
callbacks.put("Symb", new UnknownLiteralCallback(sourceLang, input));
|
||||||
|
|
||||||
int count = NUM_ALT_TRANSLATIONS;
|
int count = NUM_ALT_TRANSLATIONS;
|
||||||
for (ExprProb ep : sourceLang.parseWithHeuristics(getGrammar().getStartCat(), input, -1, callbacks)) {
|
for (ExprProb ep : sourceLang.parseWithHeuristics(getGrammar().getStartCat(), input, -1, callbacks)) {
|
||||||
|
|||||||
Reference in New Issue
Block a user