Wide Coverage Translation Demo: length limit test uses # of source chars now

The length limit test previously used the URL-encoded UTF-8 representation
of the source sentense. This was needed because of a fixed size buffer in C.
Now that the server is in Haskell, the only reason the length is limited
is to avoid excessive time and space use in the parser, so it is better to
count source characters. This also avoids being too restrictive with
non-European languages.
This commit is contained in:
hallgren
2015-04-16 12:21:32 +00:00
parent 2ceeea17fd
commit 1420e35cbb

View File

@@ -50,8 +50,8 @@ function length_limit(lang) {
} }
} }
function check_limit(lang,encsrc) { function check_limit(lang,source) {
var len=encsrc.length, limit=length_limit(lang) var len=source.length, limit=length_limit(lang)
return len<=limit ? null : "sentense too long, "+len+">"+limit return len<=limit ? null : "sentense too long, "+len+">"+limit
} }
@@ -60,15 +60,15 @@ gftranslate.translate=function(source,from,to,start,limit,cont) {
var g=gftranslate.grammar var g=gftranslate.grammar
var lexer="&lexer=text" var lexer="&lexer=text"
if(from=="Chi") lexer="",source=source.split("").join(" ") if(from=="Chi") lexer="",source=source.split("").join(" ")
var encsrc=encodeURIComponent(source)
function errcont(text,code) { cont([{error:code+" "+text}]) } function errcont(text,code) { cont([{error:code+" "+text}]) }
function extract(result) { function extract(result) {
cont(unspace_translations(g,result[0].translations)) cont(unspace_translations(g,result[0].translations))
} }
var too_long=check_limit(from,encsrc) var too_long=check_limit(from,source)
if(too_long) cont([{error:too_long}]) if(too_long) cont([{error:too_long}])
else else
gftranslate.call("?command=c-translate&jsontree=true&input="+encsrc gftranslate.call("?command=c-translate&jsontree=true&input="
+encodeURIComponent(source)
+lexer+"&unlexer=text&from="+g+from+"&to="+enc_langs(g,to) +lexer+"&unlexer=text&from="+g+from+"&to="+enc_langs(g,to)
+"&start="+start+"&limit="+limit,extract,errcont) +"&start="+start+"&limit="+limit,extract,errcont)
} }
@@ -78,16 +78,16 @@ gftranslate.wordforword=function(source,from,to,cont) {
var g=gftranslate.grammar var g=gftranslate.grammar
var lexer="&lexer=text" var lexer="&lexer=text"
if(from=="Chi") lexer="",source=source.split("").join(" ") if(from=="Chi") lexer="",source=source.split("").join(" ")
var encsrc=encodeURIComponent(source)
function errcont(text,code) { cont([{error:code+" "+text}]) } function errcont(text,code) { cont([{error:code+" "+text}]) }
function extract(result) { function extract(result) {
cont(unspace_translations(g,result[0].translations)) cont(unspace_translations(g,result[0].translations))
} }
var enc_to = enc_langs(g,to) var enc_to = enc_langs(g,to)
var too_long=check_limit(from,encsrc) var too_long=check_limit(from,source)
if(too_long) cont([{error:too_long}]) if(too_long) cont([{error:too_long}])
else else
gftranslate.call("?command=c-wordforword&input="+encsrc gftranslate.call("?command=c-wordforword&input="
+encodeURIComponent(source)
+lexer+"&unlexer=text&from="+g+from+"&to="+enc_to +lexer+"&unlexer=text&from="+g+from+"&to="+enc_to
,extract,errcont) ,extract,errcont)
} }