forked from GitHub/gf-core
translator: segment imported text based on punctuation
This commit is contained in:
@@ -16,9 +16,13 @@
|
||||
|
||||
<p>
|
||||
This is a simple bilingual document editor. Documents consist of a sequence
|
||||
of segments that are translated independently. The user can add segments
|
||||
in the source language and obtain automatically translated segments in
|
||||
the target language. If an unsatisfactory automatic translation is
|
||||
of segments that are translated independently. The user can import text
|
||||
in the source language and obtain automatically translated text in
|
||||
the target language. Imported text can be segmented based on punctuation.
|
||||
Optionally, one can also use line breaks or blank lines to indicate segmentation
|
||||
in imported text.
|
||||
|
||||
<p>If an unsatisfactory automatic translation is
|
||||
obtained, the user can click on it and replace it with a manual translation.
|
||||
If multiple translations are obtained, one of them is shown by default and
|
||||
the other ones are available in a popup menu.
|
||||
@@ -35,8 +39,6 @@ closed and reopened later.
|
||||
<ul>
|
||||
<li>Text can be imported/exported by copying and pasting, but other ways
|
||||
could be added.
|
||||
<li>Segmentation of imported text based on punctuation. (Currently, segments
|
||||
must be separated by line breaks or blank lines.)
|
||||
<li>GF's lexer/unlexer is used to allow for more natural looking text, but
|
||||
the unlexer does the wrong thing if the first word of a sentence is supposed
|
||||
to be capitalized, e.g. "I am ready." and "Spanish wine is good."
|
||||
@@ -52,7 +54,7 @@ closed and reopened later.
|
||||
|
||||
<hr>
|
||||
<div class=modtime><small>
|
||||
<!-- hhmts start --> Last modified: Mon May 28 18:36:10 CEST 2012 <!-- hhmts end -->
|
||||
<!-- hhmts start --> Last modified: Tue May 29 16:30:58 CEST 2012 <!-- hhmts end -->
|
||||
</small></div>
|
||||
<address>
|
||||
<a href="http://www.cse.chalmers.se/~hallgren/">TH</a>
|
||||
|
||||
@@ -3,8 +3,12 @@ h1 { float: right; margin: 0; font-size: 150%; }
|
||||
h2 { font-size: 120%; }
|
||||
h3 { font-size: 100%; }
|
||||
|
||||
div.pagehead { font-family: sans-serif;
|
||||
background-color: #ccc;
|
||||
div.pagehead {
|
||||
font-family: sans-serif;
|
||||
/*position: fixed; top: 5px; left: 5px; right: 5px; z-index: 2;*/
|
||||
background-color: #d0d0d0;
|
||||
padding: 1px 5px;
|
||||
border-radius: 5px;
|
||||
}
|
||||
table.menubar td { padding: 5px; }
|
||||
table.menubar dl, td.options > div > dl, dl.popupmenu {
|
||||
@@ -24,6 +28,7 @@ table.menubar td:hover, table.menubar dt:hover, dl.popupmenu > dt:hover {
|
||||
table table dl { left: 6em; }
|
||||
table.menubar dt { white-space: nowrap; }
|
||||
div.document {
|
||||
/*margin-top: 7ex;*/
|
||||
clear: both;
|
||||
background: white;
|
||||
border: 2px solid #009;
|
||||
@@ -50,10 +55,12 @@ td.options > div > dl {
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
td.source input[name=it], td.target input[name=it], textarea {
|
||||
width: 100%; font-family: inherit; font-size: inherit;
|
||||
td.source input[name=it], td.target input[name=it], textarea, input[name=punctchars] {
|
||||
font-family: inherit; font-size: inherit;
|
||||
}
|
||||
|
||||
textarea { width: 100% }
|
||||
|
||||
table.paralleltexts td {
|
||||
vertical-align: baseline;
|
||||
line-height: 130%;
|
||||
|
||||
@@ -274,28 +274,36 @@ Translator.prototype.import=function(el) {
|
||||
function restore() {
|
||||
t.redraw()
|
||||
}
|
||||
function done2() {
|
||||
function done() {
|
||||
var text=inp.value
|
||||
var ls=text.split("\n")
|
||||
var segs= paras.firstChild.checked ? join_paragraphs(ls) : ls
|
||||
var segs= punct.firstChild.checked
|
||||
? split_punct(text,punctchars.value)
|
||||
: paras.firstChild.checked
|
||||
? join_paragraphs(ls)
|
||||
: ls
|
||||
for(var i in segs)
|
||||
t.document.segments.push(new_segment(segs[i]))
|
||||
restore()
|
||||
return false
|
||||
}
|
||||
var inp=node("textarea",{name:"it",value:"",rows:"10"})
|
||||
var punct=radiobutton("separator","punct",
|
||||
"Punctuation indicates where segments end: ",null,true)
|
||||
var lines=radiobutton("separator","lines",
|
||||
"Segments are separated by line breaks",null,true)
|
||||
"Segments are separated by line breaks",null,false)
|
||||
var paras=radiobutton("separator","paras",
|
||||
"Segments are separated by blank lines",null,false)
|
||||
var e=node("form",{onsubmit:done2},
|
||||
[wrap("h3",text("Import text")),
|
||||
var punctchars=node("input",{name:"punctchars",value:".?!",size:"5"})
|
||||
var lang=concname(t.document.options.from)
|
||||
var e=node("form",{class:"import"},
|
||||
[wrap("h3",text("Import text ("+lang+")")),
|
||||
inp,
|
||||
wrap("dl",map(dt,[lines,paras])),
|
||||
wrap("dl",[dt([punct,punctchars]),dt(lines),dt(paras)]),
|
||||
submit(), button("Cancel",restore)])
|
||||
|
||||
t.view.appendChild(e)
|
||||
e.onsubmit=done2
|
||||
e.onsubmit=done
|
||||
inp.focus();
|
||||
}
|
||||
setTimeout(imp,100)
|
||||
@@ -556,10 +564,17 @@ function join_paragraphs(lines) {
|
||||
return paras
|
||||
}
|
||||
|
||||
function split_punct(text,punct) {
|
||||
var ss=text.split(new RegExp("(["+punct+"])"))
|
||||
var segs=[];
|
||||
for(var i=0;i<ss.length;i+=2) segs.push((ss[i]+(ss[i+1]||"")).trim())
|
||||
if(segs.length>0 && segs[segs.length-1]=="") segs.pop();
|
||||
return segs
|
||||
}
|
||||
|
||||
/* --- DOM Support ---------------------------------------------------------- */
|
||||
|
||||
function a(url,linked) { return node("a",{href:url},linked); }
|
||||
function li(xs) { return wrap("li",xs); }
|
||||
function jsurl(js) { return "javascript:"+js; }
|
||||
|
||||
function replaceNode(node,ref) { ref.parentNode.replaceChild(node,ref) }
|
||||
|
||||
Reference in New Issue
Block a user