translator: segment imported text based on punctuation

2026-06-27 20:06:28 -06:00 · 2012-05-29 14:47:59 +00:00
parent 8e650eafd3
commit 07e6ff8963
3 changed files with 42 additions and 18 deletions
@@ -16,9 +16,13 @@
 <p>
 This is a simple bilingual document editor. Documents consist of a sequence
-of segments that are translated independently. The user can add segments
+of segments that are translated independently. The user can import text
-in the source language and obtain automatically translated segments in
+in the source language and obtain automatically translated text in
-the target language. If an unsatisfactory automatic translation is
+the target language. Imported text can be segmented based on punctuation.
 Optionally, one can also use line breaks or blank lines to indicate segmentation
 in imported text.
 <p>If an unsatisfactory automatic translation is
 obtained, the user can click on it and replace it with a manual translation.
 If multiple translations are obtained, one of them is shown by default and
 the other ones are available in a popup menu.
@@ -35,8 +39,6 @@ closed and reopened later.
 <ul>
  <li>Text can be imported/exported by copying and pasting, but other ways
  could be added.
  <li>Segmentation of imported text based on punctuation. (Currently, segments
  must be separated by line breaks or blank lines.)
  <li>GF's lexer/unlexer is used to allow for more natural looking text, but
  the unlexer does the wrong thing if the first word of a sentence is supposed
  to be capitalized, e.g. "I am ready." and "Spanish wine is good."
@@ -52,7 +54,7 @@ closed and reopened later.
 <hr>
 <div class=modtime><small>
-<!-- hhmts start --> Last modified: Mon May 28 18:36:10 CEST 2012 <!-- hhmts end -->
+<!-- hhmts start --> Last modified: Tue May 29 16:30:58 CEST 2012 <!-- hhmts end -->
  </small></div>
 <address>
 <a href="http://www.cse.chalmers.se/~hallgren/">TH</a>
@@ -3,8 +3,12 @@ h1 { float: right; margin: 0; font-size: 150%; }
 h2 { font-size: 120%; }
 h3 { font-size: 100%; }
-div.pagehead { font-family: sans-serif;
+div.pagehead {
-               background-color: #ccc;
+    font-family: sans-serif;
    /*position: fixed; top: 5px; left: 5px; right: 5px; z-index: 2;*/
    background-color: #d0d0d0;
    padding: 1px 5px;
    border-radius: 5px;
 }
 table.menubar td { padding: 5px; }
 table.menubar dl, td.options > div > dl, dl.popupmenu {
@@ -24,6 +28,7 @@ table.menubar td:hover, table.menubar dt:hover, dl.popupmenu > dt:hover {
 table table dl { left: 6em; }
 table.menubar dt { white-space: nowrap; }
 div.document {
    /*margin-top: 7ex;*/
    clear: both;
    background: white;
    border: 2px solid #009;
@@ -50,10 +55,12 @@ td.options > div > dl {
    white-space: nowrap;
 }
-td.source input[name=it], td.target input[name=it], textarea {
+td.source input[name=it], td.target input[name=it], textarea, input[name=punctchars] {
-    width: 100%; font-family: inherit; font-size: inherit;
+    font-family: inherit; font-size: inherit;
 }
 textarea { width: 100% }
 table.paralleltexts td {
    vertical-align: baseline;
    line-height: 130%;
@@ -274,28 +274,36 @@ Translator.prototype.import=function(el) {
 	function restore() {
 	    t.redraw()
 	}
-	function done2() {
+	function done() {
 	    var text=inp.value
 	    var ls=text.split("\n")
-	    var segs= paras.firstChild.checked ? join_paragraphs(ls) : ls
+	    var segs= punct.firstChild.checked 
 	              ? split_punct(text,punctchars.value)
 		      : paras.firstChild.checked 
 		        ? join_paragraphs(ls)
 		        : ls
 	    for(var i in segs)
 		t.document.segments.push(new_segment(segs[i]))
 	    restore()
 	    return false
 	}
 	var inp=node("textarea",{name:"it",value:"",rows:"10"})
 	var punct=radiobutton("separator","punct",
 			      "Punctuation indicates where segments end: ",null,true)
 	var lines=radiobutton("separator","lines",
-			      "Segments are separated by line breaks",null,true)
+			      "Segments are separated by line breaks",null,false)
 	var paras=radiobutton("separator","paras",
 			      "Segments are separated by blank lines",null,false)
-	var e=node("form",{onsubmit:done2},
+	var punctchars=node("input",{name:"punctchars",value:".?!",size:"5"})
-		   [wrap("h3",text("Import text")),
+	var lang=concname(t.document.options.from)
 	var e=node("form",{class:"import"},
 		   [wrap("h3",text("Import text ("+lang+")")),
 		    inp,
-		    wrap("dl",map(dt,[lines,paras])),
+		    wrap("dl",[dt([punct,punctchars]),dt(lines),dt(paras)]),
 		    submit(), button("Cancel",restore)])
 	t.view.appendChild(e)
-	e.onsubmit=done2
+	e.onsubmit=done
 	inp.focus();
    }
    setTimeout(imp,100)
@@ -556,10 +564,17 @@ function join_paragraphs(lines) {
    return paras
 }
 function split_punct(text,punct) {
    var ss=text.split(new RegExp("(["+punct+"])"))
    var segs=[];
    for(var i=0;i<ss.length;i+=2) segs.push((ss[i]+(ss[i+1]||"")).trim())
    if(segs.length>0 && segs[segs.length-1]=="") segs.pop();
    return segs
 }
 /* --- DOM Support ---------------------------------------------------------- */
 function a(url,linked) { return node("a",{href:url},linked); }
 function li(xs) { return wrap("li",xs); }
 function jsurl(js) { return "javascript:"+js; }
 function replaceNode(node,ref) { ref.parentNode.replaceChild(node,ref) }