diff --git a/examples/phrasebook/GreetingsSpa.gf b/examples/phrasebook/GreetingsSpa.gf
index 6008688f6..673bac85e 100644
--- a/examples/phrasebook/GreetingsSpa.gf
+++ b/examples/phrasebook/GreetingsSpa.gf
@@ -11,6 +11,9 @@ lin
GDamn = ss "joder" ;
GExcuse = ss "perdón" ;
GExcusePol = ss "perdone" ;
+ GCongratulations = ss "felicitaciones" ;
+ GGoodLuck = ss "buena suerte" ;
+ GHappyBirthday = ss "feliz cumpleaños" ;
GGoodMorning, GGoodDay = ss "buenos dÃas" ;
GGoodEvening = ss "buenas tardes" ;
GGoodNight = ss "buenas noches" ;
diff --git a/examples/phrasebook/Implementation.html b/examples/phrasebook/Implementation.html
index 41bab9f70..ff2275979 100644
--- a/examples/phrasebook/Implementation.html
+++ b/examples/phrasebook/Implementation.html
@@ -106,8 +106,10 @@ gfdoc - a rudimentary GF document generator.
Too property = mkAP too_AdA (mkAP property) ;
PropQuality property = mkAP property ;
- ThePlace kind = placeNP the_Det kind ;
- APlace kind = placeNP a_Det kind ;
+ ThePlace kind = let dd = if_then_else Det kind.isPl thePl_Det theSg_Det
+ in placeNP dd kind ;
+ APlace kind = let dd = if_then_else Det kind.isPl thePl_Det theSg_Det
+ in placeNP dd kind ;
IMale, IFemale = mkPerson i_Pron ;
YouFamMale, YouFamFemale = mkPerson youSg_Pron ;
@@ -130,7 +132,11 @@ gfdoc - a rudimentary GF document generator.
NNumeral n = mkCard <lin Numeral n : Numeral> ;
- AHave p obj = mkCl p.name have_V2 obj ;
+ SHave p obj = mkS (mkCl p.name have_V2 obj) ;
+ SHaveNo p k = mkS negativePol (mkCl p.name have_V2 (mkNP aPl_Det k)) ;
+ SHaveNoMass p m = mkS negativePol (mkCl p.name have_V2 (mkNP m)) ;
+ QDoHave p obj = mkQS (mkQCl (mkCl p.name have_V2 obj)) ;
+
AHaveCurr p curr = mkCl p.name have_V2 (mkNP aPl_Det curr) ;
ACitizen p n = mkCl p.name n ;
ABePlace p place = mkCl p.name place.at ;
@@ -166,12 +172,20 @@ These are used in Words for each language.
} ;
NPPlace : Type = {name : NP ; at : Adv ; to : Adv} ;
- CNPlace : Type = {name : CN ; at : Prep ; to : Prep} ;
+ CNPlace : Type = {name : CN ; at : Prep ; to : Prep; isPl : Bool} ;
mkCNPlace : CN -> Prep -> Prep -> CNPlace = \p,i,t -> {
name = p ;
at = i ;
- to = t
+ to = t ;
+ isPl = False
+ } ;
+
+ mkCNPlacePl : CN -> Prep -> Prep -> CNPlace = \p,i,t -> {
+ name = p ;
+ at = i ;
+ to = t ;
+ isPl = True
} ;
placeNP : Det -> CNPlace -> NPPlace = \det,kind ->
@@ -344,7 +358,7 @@ Means of transportation
Actions: the predication patterns are very often language-dependent.
- AHasAge p num = mkCl p.name (mkNP (mkNP num L.year_N) (mkAdv "old"));
+ AHasAge p num = mkCl p.name (mkNP (mkNP num L.year_N) (ParadigmsEng.mkAdv "old"));
AHasChildren p num = mkCl p.name have_V2 (mkNP num L.child_N) ;
AHasRoom p num = mkCl p.name have_V2
(mkNP (mkNP a_Det (mkN "room")) (SyntaxEng.mkAdv for_Prep (mkNP num (mkN "person")))) ;
@@ -456,10 +470,10 @@ auxiliaries
mkNPDay day (SyntaxEng.mkAdv on_Prep day)
(SyntaxEng.mkAdv on_Prep (mkNP a_Quant plNum (mkCN (mkN d)))) ;
- mkCompoundPlace : Str -> Str -> Str -> {name : CN ; at : Prep ; to : Prep} = \comp, p, i ->
+ mkCompoundPlace : Str -> Str -> Str -> {name : CN ; at : Prep ; to : Prep; isPl : Bool} = \comp, p, i ->
mkCNPlace (mkCN (P.mkN comp (mkN p))) (P.mkPrep i) to_Prep ;
- mkPlace : Str -> Str -> {name : CN ; at : Prep ; to : Prep} = \p,i ->
+ mkPlace : Str -> Str -> {name : CN ; at : Prep ; to : Prep; isPl : Bool} = \p,i ->
mkCNPlace (mkCN (mkN p)) (P.mkPrep i) to_Prep ;
open_Adv = P.mkAdv "open" ;
diff --git a/examples/phrasebook/Makefile b/examples/phrasebook/Makefile
index f0dc1826d..4e36e2988 100644
--- a/examples/phrasebook/Makefile
+++ b/examples/phrasebook/Makefile
@@ -29,7 +29,7 @@ doc:
rm -f Ontology.gf
cat SentencesI.gf WordsEng.gf >Implementation.gf
gfdoc Implementation.gf
- txt2tags -thtml phrasebook.txt
+ txt2tags -thtml --toc phrasebook.txt
rm -f Ontology.gf Implementation.gf
upload:: Phrasebook.pgf
diff --git a/examples/phrasebook/Ontology.html b/examples/phrasebook/Ontology.html
index 0765ac4e0..48059049a 100644
--- a/examples/phrasebook/Ontology.html
+++ b/examples/phrasebook/Ontology.html
@@ -147,12 +147,16 @@ Determiners.
Actions are typically language-dependent, not only lexically but also
structurally. However, these ones are mostly functorial.
- AHave : Person -> Object -> Action ; -- you have pizzas
+ SHave : Person -> Object -> Sentence ; -- you have beer
+ SHaveNo : Person -> Kind -> Sentence ; -- you have no apples
+ SHaveNoMass : Person -> MassKind -> Sentence ; -- you have no beer
+ QDoHave : Person -> Object -> Question ; -- do you have beer
+
AHaveCurr : Person -> Currency -> Action ; -- you have dollars
ACitizen : Person -> Citizenship -> Action ; -- you are Swedish
ABePlace : Person -> Place -> Action ; -- you are in the bar
- ByTransp : Transport -> ByTransport ; -- by bus
+ ByTransp : Transport -> ByTransport ; -- by bus
}
diff --git a/examples/phrasebook/WordsFin.gf b/examples/phrasebook/WordsFin.gf
index 29494ccb2..0e4e7d14c 100644
--- a/examples/phrasebook/WordsFin.gf
+++ b/examples/phrasebook/WordsFin.gf
@@ -208,7 +208,9 @@ concrete WordsFin of Words = SentencesFin **
mkQS (mkQCl (mkIP which_IDet trans.name) (mkVP (mkVP L.go_V) place.to)) ;
IsTranspPlace trans place =
- mkQS (mkQCl (E.AdvPredNP place.to L.go_V (E.PartCN (trans.name)))) ;
+ mkQS (mkQCl (mkCl (mkVP (mkVP (mkVP (mkV "päästä")) trans.by) place.to))) ;
+ -- pääseekö keskustaan bussilla
+ -- mkQS (mkQCl (E.AdvPredNP place.to L.go_V (E.PartCN (trans.name)))) ;
-- meneekö keskustaan bussia
-- modifiers of places
diff --git a/examples/phrasebook/missing.txt b/examples/phrasebook/missing.txt
index e05b4c3c2..88a998dfb 100644
--- a/examples/phrasebook/missing.txt
+++ b/examples/phrasebook/missing.txt
@@ -11,5 +11,5 @@ PhrasebookIta :
PhrasebookNor :
PhrasebookPol :
PhrasebookRon :
-PhrasebookSpa : GCongratulations GGoodLuck GHappyBirthday
+PhrasebookSpa :
PhrasebookSwe :
diff --git a/examples/phrasebook/phrasebook.html b/examples/phrasebook/phrasebook.html
index fae61468a..2d36e5fc0 100644
--- a/examples/phrasebook/phrasebook.html
+++ b/examples/phrasebook/phrasebook.html
@@ -2,6 +2,7 @@
+
MOLTO Multilingual Phrasebook
MOLTO Multilingual Phrasebook
@@ -10,6 +11,25 @@
Showcase for project FP7-ICT-247914, Deliverable D10.2.
+
+
+
+
+
+
+
+
@@ -18,6 +38,8 @@ Showcase for project FP7-ICT-247914, Deliverable D10.2.
History
+- 2 June. Version 1.0 released!
+
- 29 May. Link to Google translate with the current language pair and phrase.
- 27 May. Polish added.
- 26 May. Version 0.9:
Catalan added, mass/count noun distinction to reduce overgeneration,
@@ -49,33 +71,34 @@ History
+
Purpose
This phrasebook is a program for translating touristic phrases
-between the 15 European languages included in the
+between 14 European languages included in the
MOLTO project
(Multilingual On-Line Translation):
- Bulgarian, Catalan, Danish, Dutch, English,
Finnish, French, German, Italian, Norwegian,
- Polish, Romanian, Russian, Spanish, Swedish
+ Polish, Romanian, Spanish, Swedish
It is implemented by using the GF programming language
(Grammatical Framework).
-It is the first demo for the MOLTO project, released in the third month (by June 2010)
-but to be updated in the course of the project.
+It is the first demo for the MOLTO project, released in the third month (by June 2010).
+The first version is a very small system, but it will extended in the course of the project.
-The phrasebook has the following requirements:
+The phrasebook has the following requirement specification:
- high quality: reliable translations to express yourself in any language
- translation between all pairs of languages
- runnable in web browsers
-
- runnable on mobile phones (also off-line: forthcoming for Android phones)
+
- runnable on mobile phones (forthcoming: Android phones)
- easily extensible by new words (forthcoming: semi-automatic extensions by users)
@@ -84,39 +107,91 @@ The phrasebook is available as open-source software, licensed under GNU LGPL.
The source code resides in
code.haskell.org/gf/examples/phrasebook/
-
-Current status (27 May 2010):
-
-
-- small but useful coverage in abstract syntax
-
- reasonable implementations for all MOLTO languages except Russian
-
- works on web browsers calling a server
-
- web service not yet released, but preliminarily available in
- http://www.grammaticalframework.org/demos/phrasebook/
-
-
+
Points illustrated
-Interlingua-based translation.
+Interlingua-based translation
+
+- we translate meanings, rather than words
+
+
-Incremental parsing.
+Incremental parsing
+
+- the user is at every point guided by the list of possible next words
+
+
-The use of resource grammars and functors.
+The use of resource grammars and functors
+
+
-Example-based grammar writing and grammar induction from statistical models (Google).
+Example-based grammar writing and grammar induction from statistical models
+(Google translate)
+
+- many of the grammars were created semi-automatically by generalization from
+ examples
+
+
-Compile-time transfer: especially, in Action in Words.
+Compile-time transfer: especially, in Action in Words
+
+- the structural differences between languages are treated at compile time,
+ for maximal run-time efficiency
+
+
-Quasi-incremental translation: many basic types are also used as phrases.
+Quasi-incremental translation: many basic types are also used as phrases
+
+- one can translate both words and complete sentences, and get intermediate results
+
+
-Disambiguation, esp. of politeness distinctions.
+Disambiguation, esp. of politeness distinctions
+
+- if a phrase has many translations, each of them is shown and given an explanation
+ (currently just in English, later in any source language)
+
+
+
+Fall-back to statistical translation
+
+
+- currently just a link to Google translate (forthcoming: tailor-made statistical models)
+
+
+
+Feed-back from users
+
+
+- you are welcome to send comments, bug reports, and better translation suggestions!
+
+
+
+The level of skills involved in grammar development
+
+
+- testing different configurations (see table below)
+
+
+
+Grammar testing
+
+
+- use of treebanks with guided random generation for initial evaluation and regression testing
+
+
+
Ontology
The abstract syntax defines the ontology behind the phrasebook.
@@ -128,6 +203,7 @@ and
Words.gf
by make doc.
+
Files
Sentences: general syntactic structures implementable in a uniform way.
@@ -164,18 +240,9 @@ Here is the module structure as produced in GF by
+
To Do
-Improved translation interface
-
-
-- a nicer way to show disambiguation (maybe hidden by default)
-
-
-
-Complete the missing words and phrases
-
-
Disambiguation grammars for other languages than English
@@ -183,20 +250,15 @@ Extend the abstract lexicon in Words by hand or (semi)automatically
- food stuff
-
- languages
- places
+
- actions
-Link to Google translate, for fall-back and for comparison
-
-
-Feedback facility in the UI
-
-
-Customizable distribution: make your own selection of the 2^15 language subsets
+Customizable phone distribution: make your own selection of the 2^15 language subsets
when downloading the phrasebook to a phone
+
How to contribute
The basic things "everyone" can do is
@@ -253,15 +315,337 @@ Here are the steps to follow for contributors:
Don't compromise quality to gain coverage: non multa sed multum!
-Acknowledgements
+
+Effort and cost
+
+
+| Language |
+Grammarian's language skills |
+Grammarian's GF skills |
+Informant used for development |
+Informant used for testing |
+Use of external tools |
+Impact of external tools |
+Changes on the resource grammar |
+Development time |
+
+
+| Bulgarian |
+### |
+### |
+- |
+- |
+- |
+? |
+# |
+## |
+
+
+| Catalan |
+### |
+### |
+- |
+- |
+- |
+? |
+# |
+# |
+
+
+| Danish |
+- |
+### |
++ |
++ |
++ |
+## |
+## |
+## |
+
+
+| Dutch |
+- |
+### |
++ |
++ |
++ |
+## |
+# |
+## |
+
+
+| English |
+## |
+### |
+- |
++ |
+- |
+- |
+_ |
+# |
+
+
+| Finnish |
+### |
+### |
+- |
+- |
+- |
+? |
+# |
+## |
+
+
+| French |
+## |
+### |
+- |
++ |
+- |
+? |
+# |
+# |
+
+
+| German |
+# |
+### |
++ |
++ |
++ |
+## |
+## |
+### |
+
+
+| Italian |
+### |
+# |
+- |
+- |
+- |
+? |
+## |
+## |
+
+
+| Norwegian |
+# |
+### |
++ |
+- |
++ |
+## |
+# |
+## |
+
+
+| Polish |
+### |
+### |
++ |
++ |
++ |
+# |
+# |
+## |
+
+
+| Romanian |
+### |
+### |
+- |
+- |
++ |
+# |
+### |
+### |
+
+
+| Spanish |
+## |
+# |
+- |
+- |
+- |
+? |
+_ |
+## |
+
+
+| Swedish |
+## |
+### |
+- |
++ |
+- |
+? |
+- |
+## |
+
+
+
+
+Explanation on scores
+
+
+- Grammarian's language skills
+
+ - - : no skills
+
- # : passive knowledge
+
- ## : fluent non-native
+
- ### : native speaker
+
+
+
+
+- Grammarian's GF skills
+
+ - - : no skills
+
- # : basic skills (2-day GF tutorial)
+
- ## : medium skills (previous experience of similar task)
+
- ### : advanced skills (resource grammar writer/substantial contributor)
+
+
+
+
+- Informant used for development/Informant needed for testing/Use of external tools
+
+
+
+
+- Impact of external tools
+
+ - ? : not investigated
+
- - : no effect on the Phrasebook
+
- # : small impact (literal translation, simple idioms)
+
- ## : medium effect (translation of more forms of words, contextual preposition)
+
- ### : great effect (no extra work needed, translations are correct)
+
+
+
+
+- Changes on the resource grammars
+
+ - - : no changes
+
- # : 1-3 minor changes
+
- ## : 4-10 minor changes, 1-3 medium changes
+
- ### : >10 changes of any kind
+
+
+
+
+- Overall effort (including extra work on resource grammars)
+
+ - # : less than 8 person hours
+
- ## : 8-24 person hours
+
- ### : >24 person hours
+
+
+
+
+Example-based grammar writing prototype
+
+The figure presents the process of creating a Phrasebook using an example-based
+approach for the language X, where X = {Danish, Dutch, German, Norwegian}.
+
+
+
+
+
+- the first step assumes an analysis of the resource grammar and extracts the necessary
+ information that functions that build new lexical entries would need.
+ A model is built so that the proper forms of the word can be rendered,
+ and additional information, such as gender, can be inferred. The script applies
+ these rules to each entry that we want to translate into the target language, and
+ one obtains a set of constructions.
+
- they are furthermore given to an external translator tool (Google translate)
+ or a native speaker for translation. One needs the configuration file even if the
+ translator is human, because formal knowledge of grammar is not assumed.
+
- the translations into the target language are further more processed in order to
+ build the linearizations of the categories first, decoding the information received.
+ Furthermore, having the words in the lexicon, one can parse the translations of
+ functions with the GF parser and generalize from that.
+
- the resulting grammar is tested with the aid of a script that generates
+ constructions covering all the functions and categories from the grammar, along
+ with some other constructions that proved to be problematic in some language.
+ The result of the script contains for each construction in the target language
+ its English correspondent and the abstract syntax tree. A native speaker
+ evaluates the results and if corrections are needed, the algorithm runs again
+ with the new examples. Depending on the language skills of the grammar writer,
+ the changes can be made directly into the GF files, and the correct examples
+ given by the native informant are just kept for validating the results.
+ The algorithm is repeated as long as corrections are needed.
+
+
+
+The time needed for preparing the configuration files for a grammar will not be needed
+in the future, since the files are reusable for other applications.
+The time for the second step can be saved if automatic tools, like Google translate
+are used. This is only possible in languages with a simpler morphology and syntax
+and large corpora available.
+Good results were obtained for German and Dutch with Google translate, but for
+languages like Romanian or Polish, which are both complex and lack enough resources,
+the results are discouraging.
+
+
+If the statistical oracle works well, the only step where the presence of a human
+translator is needed is the evaluation and feedback step. An average of 4 hours per
+round and 2 rounds were needed in average for the languages for which we performed
+the experiment. It is possible that more effort is needed for more complex languages.
+
+
+Conclusions (tentative)
+
+The grammarian need not be a native speaker of the language.
+
+
+For many languages, the grammarian need not even know the language - native informants are
+enough.
+
+
+However, evaluation by native speakers is necessary.
+
+
+Correct and idiomatic translations are possible.
+
+
+A typical development time was 2-3 person working days per language.
+
+
+Google translate helps in bootstrapping grammars, but must be checked.
+
+
+- in particular, unreliable for morphologically rich languages
+
+
+
+Resource grammars should give some more support
+
+
+- higher-level access to constructions like negative expressions
+
- large-scale morphological lexica
+
+
+
+Acknowledgements
The Phrasebook has been built in the MOLTO project funded by the European Commission.
The authors are grateful to their native speaker informants helping to bootstrap and evaluate
-the grammars: Richard Bubel, Grégoire Détrez, Michal Palka, Willard Rafnsson,...
+the grammars:
+Richard Bubel,
+Grégoire Détrez,
+Karin Keijzer,
+Michał Pałka,
+Willard Rafnsson,
+Nick Smallbone.
-
+
diff --git a/examples/phrasebook/phrasebook.txt b/examples/phrasebook/phrasebook.txt
index 7226ae1b1..d7bfa162d 100644
--- a/examples/phrasebook/phrasebook.txt
+++ b/examples/phrasebook/phrasebook.txt
@@ -3,6 +3,8 @@ Krasimir Angelov, Olga Caprotti, Ramona Enache, Thomas Hallgren, Inari Listenmaa
Showcase for project FP7-ICT-247914, Deliverable D10.2.
+%!Encoding:utf-8
+
%!postproc(html): #HR
%!postproc(html): #BSMALL
%!postproc(html): #ESMALL
@@ -14,6 +16,8 @@ Showcase for project FP7-ICT-247914, Deliverable D10.2.
#BSMALL
History
+- 2 June. Version 1.0 released!
+- 29 May. Link to Google translate with the current language pair and phrase.
- 27 May. Polish added.
- 26 May. Version 0.9:
Catalan added, mass/count noun distinction to reduce overgeneration,
@@ -46,24 +50,24 @@ History
=Purpose=
This phrasebook is a program for translating touristic phrases
-between the 15 European languages included in the
+between 14 European languages included in the
[MOLTO http://www.molto-project.eu] project
(Multilingual On-Line Translation):
- Bulgarian, Catalan, Danish, Dutch, English,
Finnish, French, German, Italian, Norwegian,
- Polish, Romanian, Russian, Spanish, Swedish
+ Polish, Romanian, Spanish, Swedish
It is implemented by using the GF programming language
([Grammatical Framework http://grammaticalframework.org]).
-It is the first demo for the MOLTO project, released in the third month (by June 2010)
-but to be updated in the course of the project.
+It is the first demo for the MOLTO project, released in the third month (by June 2010).
+The first version is a very small system, but it will extended in the course of the project.
-The phrasebook has the following requirements:
+The phrasebook has the following requirement specification:
- high quality: reliable translations to express yourself in any language
- translation between all pairs of languages
- runnable in web browsers
-- runnable on mobile phones (also off-line: forthcoming for Android phones)
+- runnable on mobile phones (forthcoming: Android phones)
- easily extensible by new words (forthcoming: semi-automatic extensions by users)
@@ -72,30 +76,57 @@ The source code resides in
[``code.haskell.org/gf/examples/phrasebook/`` http://code.haskell.org/gf/examples/phrasebook/]
-Current status (27 May 2010):
-- small but useful coverage in abstract syntax
-- reasonable implementations for all MOLTO languages except Russian
-- works on web browsers calling a server
-- web service not yet released, but preliminarily available in
- http://www.grammaticalframework.org/demos/phrasebook/
-
-
=Points illustrated=
-Interlingua-based translation.
+Interlingua-based translation
+- we translate meanings, rather than words
-Incremental parsing.
-The use of resource grammars and functors.
+Incremental parsing
+- the user is at every point guided by the list of possible next words
-Example-based grammar writing and grammar induction from statistical models (Google).
-Compile-time transfer: especially, in Action in Words.
+The use of resource grammars and functors
+- the translator was implemented on top of an earlier linguistic knowledge base,
+ the [GF Resource Grammar Library http://grammaticalframework.com/lib]
-Quasi-incremental translation: many basic types are also used as phrases.
-Disambiguation, esp. of politeness distinctions.
+Example-based grammar writing and grammar induction from statistical models
+([Google translate http://translate.google.com])
+- many of the grammars were created semi-automatically by generalization from
+ examples
+
+
+Compile-time transfer: especially, in Action in Words
+- the structural differences between languages are treated at compile time,
+ for maximal run-time efficiency
+
+
+Quasi-incremental translation: many basic types are also used as phrases
+- one can translate both words and complete sentences, and get intermediate results
+
+
+Disambiguation, esp. of politeness distinctions
+- if a phrase has many translations, each of them is shown and given an explanation
+ (currently just in English, later in any source language)
+
+
+Fall-back to statistical translation
+- currently just a link to Google translate (forthcoming: tailor-made statistical models)
+
+
+Feed-back from users
+- you are welcome to send comments, bug reports, and better translation suggestions!
+
+
+The level of skills involved in grammar development
+- testing different configurations (see table below)
+
+
+Grammar testing
+- use of treebanks with guided random generation for initial evaluation and regression testing
+
@@ -146,25 +177,15 @@ Here is the module structure as produced in GF by
=To Do=
-Improved translation interface
-- a nicer way to show disambiguation (maybe hidden by default)
-
-
-Complete the missing words and phrases
-
Disambiguation grammars for other languages than English
Extend the abstract lexicon in ``Words`` by hand or (semi)automatically for
- food stuff
-- languages
- places
+- actions
-Link to Google translate, for fall-back and for comparison
-
-Feedback facility in the UI
-
-Customizable distribution: make your own selection of the 2^15 language subsets
+Customizable phone distribution: make your own selection of the 2^15 language subsets
when downloading the phrasebook to a phone
@@ -214,10 +235,151 @@ Here are the steps to follow for contributors:
- Don't compromise quality to gain coverage: //non multa sed multum!//
-==Acknowledgements==
+
+=Effort and cost=
+
+|| Language | Grammarian's language skills | Grammarian's GF skills | Informant used for development | Informant used for testing | Use of external tools | Impact of external tools | Changes on the resource grammar | Development time ||
+| Bulgarian | ### | ### | - | - | - | ? | # | ## |
+| Catalan | ### | ### | - | - | - | ? | # | # |
+| Danish | - | ### | + | + | + | ## | ## | ## |
+| Dutch | - | ### | + | + | + | ## | # | ## |
+| English | ## | ### | - | + | - | - | _ | # |
+| Finnish | ### | ### | - | - | - | ? | # | ## |
+| French | ## | ### | - | + | - | ? | # | # |
+| German | # | ### | + | + | + | ## | ## | ### |
+| Italian | ### | # | - | - | - | ? | ## | ## |
+| Norwegian | # | ### | + | - | + | ## | # | ## |
+| Polish | ### | ### | + | + | + | # | # | ## |
+| Romanian | ### | ### | - | - | + | # | ### | ### |
+| Spanish | ## | # | - | - | - | ? | _ | ## |
+| Swedish | ## | ### | - | + | - | ? | - | ## |
+
+
+Explanation on scores
+
+- Grammarian's language skills
+ - - : no skills
+ - # : passive knowledge
+ - ## : fluent non-native
+ - ### : native speaker
+
+
+- Grammarian's GF skills
+ - - : no skills
+ - # : basic skills (2-day GF tutorial)
+ - ## : medium skills (previous experience of similar task)
+ - ### : advanced skills (resource grammar writer/substantial contributor)
+
+
+- Informant used for development/Informant needed for testing/Use of external tools
+ - - : no
+ - + : yes
+
+
+- Impact of external tools
+ - ? : not investigated
+ - - : no effect on the Phrasebook
+ - # : small impact (literal translation, simple idioms)
+ - ## : medium effect (translation of more forms of words, contextual preposition)
+ - ### : great effect (no extra work needed, translations are correct)
+
+
+- Changes on the resource grammars
+ - - : no changes
+ - # : 1-3 minor changes
+ - ## : 4-10 minor changes, 1-3 medium changes
+ - ### : >10 changes of any kind
+
+
+- Overall effort (including extra work on resource grammars)
+ - # : less than 8 person hours
+ - ## : 8-24 person hours
+ - ### : >24 person hours
+
+
+=Example-based grammar writing prototype=
+
+The figure presents the process of creating a Phrasebook using an example-based
+approach for the language X, where X = {Danish, Dutch, German, Norwegian}.
+
+[picpic.jpg]
+
+- the first step assumes an analysis of the resource grammar and extracts the necessary
+ information that functions that build new lexical entries would need.
+ A model is built so that the proper forms of the word can be rendered,
+ and additional information, such as gender, can be inferred. The script applies
+ these rules to each entry that we want to translate into the target language, and
+ one obtains a set of constructions.
+- they are furthermore given to an external translator tool (Google translate)
+ or a native speaker for translation. One needs the configuration file even if the
+ translator is human, because formal knowledge of grammar is not assumed.
+- the translations into the target language are further more processed in order to
+ build the linearizations of the categories first, decoding the information received.
+ Furthermore, having the words in the lexicon, one can parse the translations of
+ functions with the GF parser and generalize from that.
+- the resulting grammar is tested with the aid of a script that generates
+ constructions covering all the functions and categories from the grammar, along
+ with some other constructions that proved to be problematic in some language.
+ The result of the script contains for each construction in the target language
+ its English correspondent and the abstract syntax tree. A native speaker
+ evaluates the results and if corrections are needed, the algorithm runs again
+ with the new examples. Depending on the language skills of the grammar writer,
+ the changes can be made directly into the GF files, and the correct examples
+ given by the native informant are just kept for validating the results.
+ The algorithm is repeated as long as corrections are needed.
+
+
+The time needed for preparing the configuration files for a grammar will not be needed
+in the future, since the files are reusable for other applications.
+The time for the second step can be saved if automatic tools, like Google translate
+are used. This is only possible in languages with a simpler morphology and syntax
+and large corpora available.
+Good results were obtained for German and Dutch with Google translate, but for
+languages like Romanian or Polish, which are both complex and lack enough resources,
+the results are discouraging.
+
+If the statistical oracle works well, the only step where the presence of a human
+translator is needed is the evaluation and feedback step. An average of 4 hours per
+round and 2 rounds were needed in average for the languages for which we performed
+the experiment. It is possible that more effort is needed for more complex languages.
+
+
+=Conclusions (tentative)=
+
+The grammarian need not be a native speaker of the language.
+
+For many languages, the grammarian need not even know the language - native informants are
+enough.
+
+However, evaluation by native speakers is necessary.
+
+Correct and idiomatic translations are possible.
+
+A typical development time was 2-3 person working days per language.
+
+Google translate helps in bootstrapping grammars, but must be checked.
+- in particular, unreliable for morphologically rich languages
+
+
+Resource grammars should give some more support
+- higher-level access to constructions like negative expressions
+- large-scale morphological lexica
+
+
+
+
+
+
+=Acknowledgements=
The Phrasebook has been built in the MOLTO project funded by the European Commission.
The authors are grateful to their native speaker informants helping to bootstrap and evaluate
-the grammars: Richard Bubel, Grégoire Détrez, Michal Palka, Willard Rafnsson,...
+the grammars:
+Richard Bubel,
+Grégoire Détrez,
+Karin Keijzer,
+Michał Pałka,
+Willard Rafnsson,
+Nick Smallbone.
diff --git a/examples/phrasebook/picpic.jpg b/examples/phrasebook/picpic.jpg
new file mode 100644
index 000000000..aac20b611
Binary files /dev/null and b/examples/phrasebook/picpic.jpg differ