mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-24 03:52:50 -06:00
Cleaned up Python Translation pipeline
This commit is contained in:
@@ -205,26 +205,6 @@ def parseTester(grammar, language):
|
|||||||
return None;
|
return None;
|
||||||
return callback;
|
return callback;
|
||||||
|
|
||||||
def translateWord(grammar, language, tgtlanguage, word):
|
|
||||||
lowerword = word.lower();
|
|
||||||
try:
|
|
||||||
partialExprList = grammar.languages[language].parse(word, cat='Chunk');
|
|
||||||
for expr in partialExprList:
|
|
||||||
trans = grammar.languages[tgtlanguage].linearize(expr[1]);
|
|
||||||
if not trans:
|
|
||||||
print expr[1], tgtlanguage;
|
|
||||||
return gf_utils.gf_postprocessor( trans if trans else ' ' );
|
|
||||||
except pgf.ParseError:
|
|
||||||
morphAnalysis = grammar.languages[language].lookupMorpho(word) + grammar.languages[language].lookupMorpho(lowerword);
|
|
||||||
for morph in morphAnalysis:
|
|
||||||
if grammar.languages[tgtlanguage].hasLinearization(morph[0]):
|
|
||||||
return gf_utils.gf_postprocessor( grammar.languages[tgtlanguage].linearize( pgf.readExpr(morph[0]) ) );
|
|
||||||
return word;
|
|
||||||
|
|
||||||
def translationByLookup(grammar, language, tgtlanguages, sentence):
|
|
||||||
return [(lang, gf_utils.gf_postprocessor("% " + " ".join([translateWord(grammar, language, lang, word) for word in sentence.split()]))) \
|
|
||||||
for lang in tgtlanguages];
|
|
||||||
|
|
||||||
def translateWordsAsChunks(grammar, language, tgtlanguages, word):
|
def translateWordsAsChunks(grammar, language, tgtlanguages, word):
|
||||||
parser = grammar.languages[language].parse;
|
parser = grammar.languages[language].parse;
|
||||||
linearizersList = dict((lang, grammar.languages[lang].linearize) for lang in tgtlanguages);
|
linearizersList = dict((lang, grammar.languages[lang].linearize) for lang in tgtlanguages);
|
||||||
@@ -239,7 +219,7 @@ def translateWordsAsChunks(grammar, language, tgtlanguages, word):
|
|||||||
return [];
|
return [];
|
||||||
return translations;
|
return translations;
|
||||||
|
|
||||||
def translateWord_(grammar, language, tgtlanguages, word):
|
def translateWord(grammar, language, tgtlanguages, word):
|
||||||
possible_translations = translateWordsAsChunks(grammar, language, tgtlanguages, word);
|
possible_translations = translateWordsAsChunks(grammar, language, tgtlanguages, word);
|
||||||
if len(possible_translations):
|
if len(possible_translations):
|
||||||
return possible_translations;
|
return possible_translations;
|
||||||
@@ -257,7 +237,7 @@ def translateWord_(grammar, language, tgtlanguages, word):
|
|||||||
return [(lang, gf_utils.gf_postprocessor( grammar.languages[lang].linearize( pgf.readExpr(morph[0]) ) )) for lang in tgtlanguages];
|
return [(lang, gf_utils.gf_postprocessor( grammar.languages[lang].linearize( pgf.readExpr(morph[0]) ) )) for lang in tgtlanguages];
|
||||||
return [(lang, word) for lang in tgtlanguages];
|
return [(lang, word) for lang in tgtlanguages];
|
||||||
|
|
||||||
def translationByLookup_(grammar, language, tgtlanguages, sentence):
|
def translationByLookup(grammar, language, tgtlanguages, sentence):
|
||||||
parser = grammar.languages[language].parse;
|
parser = grammar.languages[language].parse;
|
||||||
linearizersList = dict([(lang, grammar.languages[lang].linearize) for lang in tgtlanguages]);
|
linearizersList = dict([(lang, grammar.languages[lang].linearize) for lang in tgtlanguages]);
|
||||||
queue = [sentence.strip().split()];
|
queue = [sentence.strip().split()];
|
||||||
@@ -267,7 +247,7 @@ def translationByLookup_(grammar, language, tgtlanguages, sentence):
|
|||||||
if not len(head):
|
if not len(head):
|
||||||
pass;
|
pass;
|
||||||
elif len(head) == 1 and head[0].strip():
|
elif len(head) == 1 and head[0].strip():
|
||||||
for lang, wordchoice in translateWord_(grammar, language, tgtlanguages, head[0]):
|
for lang, wordchoice in translateWord(grammar, language, tgtlanguages, head[0]):
|
||||||
transChunks.setdefault(lang, []).append( gf_utils.postprocessor(wordchoice) );
|
transChunks.setdefault(lang, []).append( gf_utils.postprocessor(wordchoice) );
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
@@ -370,7 +350,7 @@ def translation_pipeline(props):
|
|||||||
if not len(parsesBlock):
|
if not len(parsesBlock):
|
||||||
# failed to parse;
|
# failed to parse;
|
||||||
# translate using lookup
|
# translate using lookup
|
||||||
for tgtlang, translation in translationByLookup_(grammar, sourceLanguage, targetLanguages, absParses[idx][0]):
|
for tgtlang, translation in translationByLookup(grammar, sourceLanguage, targetLanguages, absParses[idx][0]):
|
||||||
if bestK == 1:
|
if bestK == 1:
|
||||||
addItem(translationBlocks[tgtlang], postprocessor(translation));
|
addItem(translationBlocks[tgtlang], postprocessor(translation));
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user