mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 19:42:50 -06:00
translator: segment imported text based on punctuation
This commit is contained in:
@@ -16,9 +16,13 @@
|
|||||||
|
|
||||||
<p>
|
<p>
|
||||||
This is a simple bilingual document editor. Documents consist of a sequence
|
This is a simple bilingual document editor. Documents consist of a sequence
|
||||||
of segments that are translated independently. The user can add segments
|
of segments that are translated independently. The user can import text
|
||||||
in the source language and obtain automatically translated segments in
|
in the source language and obtain automatically translated text in
|
||||||
the target language. If an unsatisfactory automatic translation is
|
the target language. Imported text can be segmented based on punctuation.
|
||||||
|
Optionally, one can also use line breaks or blank lines to indicate segmentation
|
||||||
|
in imported text.
|
||||||
|
|
||||||
|
<p>If an unsatisfactory automatic translation is
|
||||||
obtained, the user can click on it and replace it with a manual translation.
|
obtained, the user can click on it and replace it with a manual translation.
|
||||||
If multiple translations are obtained, one of them is shown by default and
|
If multiple translations are obtained, one of them is shown by default and
|
||||||
the other ones are available in a popup menu.
|
the other ones are available in a popup menu.
|
||||||
@@ -35,8 +39,6 @@ closed and reopened later.
|
|||||||
<ul>
|
<ul>
|
||||||
<li>Text can be imported/exported by copying and pasting, but other ways
|
<li>Text can be imported/exported by copying and pasting, but other ways
|
||||||
could be added.
|
could be added.
|
||||||
<li>Segmentation of imported text based on punctuation. (Currently, segments
|
|
||||||
must be separated by line breaks or blank lines.)
|
|
||||||
<li>GF's lexer/unlexer is used to allow for more natural looking text, but
|
<li>GF's lexer/unlexer is used to allow for more natural looking text, but
|
||||||
the unlexer does the wrong thing if the first word of a sentence is supposed
|
the unlexer does the wrong thing if the first word of a sentence is supposed
|
||||||
to be capitalized, e.g. "I am ready." and "Spanish wine is good."
|
to be capitalized, e.g. "I am ready." and "Spanish wine is good."
|
||||||
@@ -52,7 +54,7 @@ closed and reopened later.
|
|||||||
|
|
||||||
<hr>
|
<hr>
|
||||||
<div class=modtime><small>
|
<div class=modtime><small>
|
||||||
<!-- hhmts start --> Last modified: Mon May 28 18:36:10 CEST 2012 <!-- hhmts end -->
|
<!-- hhmts start --> Last modified: Tue May 29 16:30:58 CEST 2012 <!-- hhmts end -->
|
||||||
</small></div>
|
</small></div>
|
||||||
<address>
|
<address>
|
||||||
<a href="http://www.cse.chalmers.se/~hallgren/">TH</a>
|
<a href="http://www.cse.chalmers.se/~hallgren/">TH</a>
|
||||||
|
|||||||
@@ -3,8 +3,12 @@ h1 { float: right; margin: 0; font-size: 150%; }
|
|||||||
h2 { font-size: 120%; }
|
h2 { font-size: 120%; }
|
||||||
h3 { font-size: 100%; }
|
h3 { font-size: 100%; }
|
||||||
|
|
||||||
div.pagehead { font-family: sans-serif;
|
div.pagehead {
|
||||||
background-color: #ccc;
|
font-family: sans-serif;
|
||||||
|
/*position: fixed; top: 5px; left: 5px; right: 5px; z-index: 2;*/
|
||||||
|
background-color: #d0d0d0;
|
||||||
|
padding: 1px 5px;
|
||||||
|
border-radius: 5px;
|
||||||
}
|
}
|
||||||
table.menubar td { padding: 5px; }
|
table.menubar td { padding: 5px; }
|
||||||
table.menubar dl, td.options > div > dl, dl.popupmenu {
|
table.menubar dl, td.options > div > dl, dl.popupmenu {
|
||||||
@@ -24,6 +28,7 @@ table.menubar td:hover, table.menubar dt:hover, dl.popupmenu > dt:hover {
|
|||||||
table table dl { left: 6em; }
|
table table dl { left: 6em; }
|
||||||
table.menubar dt { white-space: nowrap; }
|
table.menubar dt { white-space: nowrap; }
|
||||||
div.document {
|
div.document {
|
||||||
|
/*margin-top: 7ex;*/
|
||||||
clear: both;
|
clear: both;
|
||||||
background: white;
|
background: white;
|
||||||
border: 2px solid #009;
|
border: 2px solid #009;
|
||||||
@@ -50,10 +55,12 @@ td.options > div > dl {
|
|||||||
white-space: nowrap;
|
white-space: nowrap;
|
||||||
}
|
}
|
||||||
|
|
||||||
td.source input[name=it], td.target input[name=it], textarea {
|
td.source input[name=it], td.target input[name=it], textarea, input[name=punctchars] {
|
||||||
width: 100%; font-family: inherit; font-size: inherit;
|
font-family: inherit; font-size: inherit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
textarea { width: 100% }
|
||||||
|
|
||||||
table.paralleltexts td {
|
table.paralleltexts td {
|
||||||
vertical-align: baseline;
|
vertical-align: baseline;
|
||||||
line-height: 130%;
|
line-height: 130%;
|
||||||
|
|||||||
@@ -274,28 +274,36 @@ Translator.prototype.import=function(el) {
|
|||||||
function restore() {
|
function restore() {
|
||||||
t.redraw()
|
t.redraw()
|
||||||
}
|
}
|
||||||
function done2() {
|
function done() {
|
||||||
var text=inp.value
|
var text=inp.value
|
||||||
var ls=text.split("\n")
|
var ls=text.split("\n")
|
||||||
var segs= paras.firstChild.checked ? join_paragraphs(ls) : ls
|
var segs= punct.firstChild.checked
|
||||||
|
? split_punct(text,punctchars.value)
|
||||||
|
: paras.firstChild.checked
|
||||||
|
? join_paragraphs(ls)
|
||||||
|
: ls
|
||||||
for(var i in segs)
|
for(var i in segs)
|
||||||
t.document.segments.push(new_segment(segs[i]))
|
t.document.segments.push(new_segment(segs[i]))
|
||||||
restore()
|
restore()
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
var inp=node("textarea",{name:"it",value:"",rows:"10"})
|
var inp=node("textarea",{name:"it",value:"",rows:"10"})
|
||||||
|
var punct=radiobutton("separator","punct",
|
||||||
|
"Punctuation indicates where segments end: ",null,true)
|
||||||
var lines=radiobutton("separator","lines",
|
var lines=radiobutton("separator","lines",
|
||||||
"Segments are separated by line breaks",null,true)
|
"Segments are separated by line breaks",null,false)
|
||||||
var paras=radiobutton("separator","paras",
|
var paras=radiobutton("separator","paras",
|
||||||
"Segments are separated by blank lines",null,false)
|
"Segments are separated by blank lines",null,false)
|
||||||
var e=node("form",{onsubmit:done2},
|
var punctchars=node("input",{name:"punctchars",value:".?!",size:"5"})
|
||||||
[wrap("h3",text("Import text")),
|
var lang=concname(t.document.options.from)
|
||||||
|
var e=node("form",{class:"import"},
|
||||||
|
[wrap("h3",text("Import text ("+lang+")")),
|
||||||
inp,
|
inp,
|
||||||
wrap("dl",map(dt,[lines,paras])),
|
wrap("dl",[dt([punct,punctchars]),dt(lines),dt(paras)]),
|
||||||
submit(), button("Cancel",restore)])
|
submit(), button("Cancel",restore)])
|
||||||
|
|
||||||
t.view.appendChild(e)
|
t.view.appendChild(e)
|
||||||
e.onsubmit=done2
|
e.onsubmit=done
|
||||||
inp.focus();
|
inp.focus();
|
||||||
}
|
}
|
||||||
setTimeout(imp,100)
|
setTimeout(imp,100)
|
||||||
@@ -556,10 +564,17 @@ function join_paragraphs(lines) {
|
|||||||
return paras
|
return paras
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function split_punct(text,punct) {
|
||||||
|
var ss=text.split(new RegExp("(["+punct+"])"))
|
||||||
|
var segs=[];
|
||||||
|
for(var i=0;i<ss.length;i+=2) segs.push((ss[i]+(ss[i+1]||"")).trim())
|
||||||
|
if(segs.length>0 && segs[segs.length-1]=="") segs.pop();
|
||||||
|
return segs
|
||||||
|
}
|
||||||
|
|
||||||
/* --- DOM Support ---------------------------------------------------------- */
|
/* --- DOM Support ---------------------------------------------------------- */
|
||||||
|
|
||||||
function a(url,linked) { return node("a",{href:url},linked); }
|
function a(url,linked) { return node("a",{href:url},linked); }
|
||||||
function li(xs) { return wrap("li",xs); }
|
|
||||||
function jsurl(js) { return "javascript:"+js; }
|
function jsurl(js) { return "javascript:"+js; }
|
||||||
|
|
||||||
function replaceNode(node,ref) { ref.parentNode.replaceChild(node,ref) }
|
function replaceNode(node,ref) { ref.parentNode.replaceChild(node,ref) }
|
||||||
|
|||||||
Reference in New Issue
Block a user