translator: segment imported text based on punctuation

This commit is contained in:
hallgren
2012-05-29 14:47:59 +00:00
parent 8e650eafd3
commit 07e6ff8963
3 changed files with 42 additions and 18 deletions

View File

@@ -16,9 +16,13 @@
<p>
This is a simple bilingual document editor. Documents consist of a sequence
of segments that are translated independently. The user can add segments
in the source language and obtain automatically translated segments in
the target language. If an unsatisfactory automatic translation is
of segments that are translated independently. The user can import text
in the source language and obtain automatically translated text in
the target language. Imported text can be segmented based on punctuation.
Optionally, one can also use line breaks or blank lines to indicate segmentation
in imported text.
<p>If an unsatisfactory automatic translation is
obtained, the user can click on it and replace it with a manual translation.
If multiple translations are obtained, one of them is shown by default and
the other ones are available in a popup menu.
@@ -35,8 +39,6 @@ closed and reopened later.
<ul>
<li>Text can be imported/exported by copying and pasting, but other ways
could be added.
<li>Segmentation of imported text based on punctuation. (Currently, segments
must be separated by line breaks or blank lines.)
<li>GF's lexer/unlexer is used to allow for more natural looking text, but
the unlexer does the wrong thing if the first word of a sentence is supposed
to be capitalized, e.g. "I am ready." and "Spanish wine is good."
@@ -52,7 +54,7 @@ closed and reopened later.
<hr>
<div class=modtime><small>
<!-- hhmts start --> Last modified: Mon May 28 18:36:10 CEST 2012 <!-- hhmts end -->
<!-- hhmts start --> Last modified: Tue May 29 16:30:58 CEST 2012 <!-- hhmts end -->
</small></div>
<address>
<a href="http://www.cse.chalmers.se/~hallgren/">TH</a>

View File

@@ -3,8 +3,12 @@ h1 { float: right; margin: 0; font-size: 150%; }
h2 { font-size: 120%; }
h3 { font-size: 100%; }
div.pagehead { font-family: sans-serif;
background-color: #ccc;
div.pagehead {
font-family: sans-serif;
/*position: fixed; top: 5px; left: 5px; right: 5px; z-index: 2;*/
background-color: #d0d0d0;
padding: 1px 5px;
border-radius: 5px;
}
table.menubar td { padding: 5px; }
table.menubar dl, td.options > div > dl, dl.popupmenu {
@@ -24,6 +28,7 @@ table.menubar td:hover, table.menubar dt:hover, dl.popupmenu > dt:hover {
table table dl { left: 6em; }
table.menubar dt { white-space: nowrap; }
div.document {
/*margin-top: 7ex;*/
clear: both;
background: white;
border: 2px solid #009;
@@ -50,10 +55,12 @@ td.options > div > dl {
white-space: nowrap;
}
td.source input[name=it], td.target input[name=it], textarea {
width: 100%; font-family: inherit; font-size: inherit;
td.source input[name=it], td.target input[name=it], textarea, input[name=punctchars] {
font-family: inherit; font-size: inherit;
}
textarea { width: 100% }
table.paralleltexts td {
vertical-align: baseline;
line-height: 130%;

View File

@@ -274,28 +274,36 @@ Translator.prototype.import=function(el) {
function restore() {
t.redraw()
}
function done2() {
function done() {
var text=inp.value
var ls=text.split("\n")
var segs= paras.firstChild.checked ? join_paragraphs(ls) : ls
var segs= punct.firstChild.checked
? split_punct(text,punctchars.value)
: paras.firstChild.checked
? join_paragraphs(ls)
: ls
for(var i in segs)
t.document.segments.push(new_segment(segs[i]))
restore()
return false
}
var inp=node("textarea",{name:"it",value:"",rows:"10"})
var punct=radiobutton("separator","punct",
"Punctuation indicates where segments end: ",null,true)
var lines=radiobutton("separator","lines",
"Segments are separated by line breaks",null,true)
"Segments are separated by line breaks",null,false)
var paras=radiobutton("separator","paras",
"Segments are separated by blank lines",null,false)
var e=node("form",{onsubmit:done2},
[wrap("h3",text("Import text")),
var punctchars=node("input",{name:"punctchars",value:".?!",size:"5"})
var lang=concname(t.document.options.from)
var e=node("form",{class:"import"},
[wrap("h3",text("Import text ("+lang+")")),
inp,
wrap("dl",map(dt,[lines,paras])),
wrap("dl",[dt([punct,punctchars]),dt(lines),dt(paras)]),
submit(), button("Cancel",restore)])
t.view.appendChild(e)
e.onsubmit=done2
e.onsubmit=done
inp.focus();
}
setTimeout(imp,100)
@@ -556,10 +564,17 @@ function join_paragraphs(lines) {
return paras
}
function split_punct(text,punct) {
var ss=text.split(new RegExp("(["+punct+"])"))
var segs=[];
for(var i=0;i<ss.length;i+=2) segs.push((ss[i]+(ss[i+1]||"")).trim())
if(segs.length>0 && segs[segs.length-1]=="") segs.pop();
return segs
}
/* --- DOM Support ---------------------------------------------------------- */
function a(url,linked) { return node("a",{href:url},linked); }
function li(xs) { return wrap("li",xs); }
function jsurl(js) { return "javascript:"+js; }
function replaceNode(node,ref) { ref.parentNode.replaceChild(node,ref) }