restored the summer school and Resource-HOWTO documents

2026-06-27 11:56:28 -06:00 · 2008-09-16 08:01:47 +00:00
parent e112fd188f
commit c09783e604
14 changed files with 2589 additions and 0 deletions
@@ -0,0 +1,75 @@
 digraph {
 size = "12,8" ;
 Lang [style = "solid", shape = "ellipse", URL = "Lang.gf"];
 Lang -> Grammar [style = "solid"];
 Lang -> Lexicon [style = "solid"];
 Grammar [style = "solid", shape = "ellipse", URL = "Lang.gf"];
 Grammar -> Noun [style = "solid"];
 Grammar -> Verb [style = "solid"];
 Grammar -> Adjective [style = "solid"];
 Grammar -> Adverb [style = "solid"];
 Grammar -> Numeral [style = "solid"];
 Grammar -> Sentence [style = "solid"];
 Grammar -> Question [style = "solid"];
 Grammar -> Relative [style = "solid"];
 Grammar -> Conjunction [style = "solid"];
 Grammar -> Phrase [style = "solid"];
 Grammar -> Text [style = "solid"];
 Grammar -> Idiom [style = "solid"];
 Grammar -> Structural [style = "solid"];
 Noun [style = "solid", shape = "ellipse", URL = "Noun.gf"];
 Noun -> Cat [style = "solid"];
 Verb [style = "solid", shape = "ellipse", URL = "Verb.gf"];
 Verb -> Cat [style = "solid"];
 Adjective [style = "solid", shape = "ellipse", URL = "Adjective.gf"];
 Adjective -> Cat [style = "solid"];
 Adverb [style = "solid", shape = "ellipse", URL = "Adverb.gf"];
 Adverb -> Cat [style = "solid"];
 Numeral [style = "solid", shape = "ellipse", URL = "Numeral.gf"];
 Numeral -> Cat [style = "solid"];
 Sentence [style = "solid", shape = "ellipse", URL = "Sentence.gf"];
 Sentence -> Cat [style = "solid"];
 Question [style = "solid", shape = "ellipse", URL = "Question.gf"];
 Question -> Cat [style = "solid"];
 Relative [style = "solid", shape = "ellipse", URL = "Relative.gf"];
 Relative -> Cat [style = "solid"];
 Conjunction [style = "solid", shape = "ellipse", URL = "Conjunction.gf"];
 Conjunction -> Cat [style = "solid"];
 Phrase [style = "solid", shape = "ellipse", URL = "Phrase.gf"];
 Phrase -> Cat [style = "solid"];
 Text [style = "solid", shape = "ellipse", URL = "Phrase.gf"];
 Text -> Cat [style = "solid"];
 Idiom [style = "solid", shape = "ellipse", URL = "Phrase.gf"];
 Idiom -> Cat [style = "solid"];
 Structural [style = "solid", shape = "ellipse", URL = "Structural.gf"];
 Structural -> Cat [style = "solid"];
 Lexicon [style = "solid", shape = "ellipse", URL = "Lexicon.gf"];
 Lexicon -> Cat [style = "solid"];
 Cat [style = "solid", shape = "ellipse", URL = "Cat.gf"];
 Cat -> Common [style = "solid"];
 Common [style = "solid", shape = "ellipse", URL = "Tense.gf"];
 }
@@ -0,0 +1,827 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
 <HTML>
 <HEAD>
 <META NAME="generator" CONTENT="http://txt2tags.sf.net">
 <TITLE>Resource grammar writing HOWTO</TITLE>
 </HEAD><BODY BGCOLOR="white" TEXT="black">
 <P ALIGN="center"><CENTER><H1>Resource grammar writing HOWTO</H1>
 <FONT SIZE="4">
 <I>Author: Aarne Ranta &lt;aarne (at) cs.chalmers.se&gt;</I><BR>
 Last update: Tue Sep 16 09:58:01 2008
 </FONT></CENTER>
 <P>
 <B>History</B>
 </P>
 <P>
 September 2008: partly outdated - to be updated for API 1.5.
 </P>
 <P>
 October 2007: updated for API 1.2.
 </P>
 <P>
 January 2006: first version.
 </P>
 <P>
 The purpose of this document is to tell how to implement the GF
 resource grammar API for a new language. We will <I>not</I> cover how
 to use the resource grammar, nor how to change the API. But we
 will give some hints how to extend the API.
 </P>
 <P>
 A manual for using the resource grammar is found in
 </P>
 <P>
 <A HREF="http://www.cs.chalmers.se/~aarne/GF/lib/resource-1.0/doc/synopsis.html"><CODE>http://www.cs.chalmers.se/~aarne/GF/lib/resource-1.0/doc/synopsis.html</CODE></A>.
 </P>
 <P>
 A tutorial on GF, also introducing the idea of resource grammars, is found in
 </P>
 <P>
 <A HREF="../../../doc/tutorial/gf-tutorial2.html"><CODE>http://www.cs.chalmers.se/~aarne/GF/doc/tutorial/gf-tutorial2.html</CODE></A>.
 </P>
 <P>
 This document concerns the API v. 1.0. You can find the current code in 
 </P>
 <P>
 <A HREF=".."><CODE>http://www.cs.chalmers.se/~aarne/GF/lib/resource-1.0/</CODE></A>
 </P>
 <H2>The resource grammar structure</H2>
 <P>
 The library is divided into a bunch of modules, whose dependencies
 are given in the following figure.
 </P>
 <P>
 <IMG ALIGN="left" SRC="Syntax.png" BORDER="0" ALT=""> 
 </P>
 <UL>
 <LI>solid contours: module used by end users
 <LI>dashed contours: internal module
 <LI>ellipse: abstract/concrete pair of modules
 <LI>rectangle: resource or instance
 <LI>diamond: interface
 </UL>
 <P>
 The solid ellipses show the API as visible to the user of the library. The
 dashed ellipses form the main of the implementation, on which the resource
 grammar programmer has to work with. With the exception of the <CODE>Paradigms</CODE>
 module, the visible API modules can be produced mechanically.
 </P>
 <P>
 <IMG ALIGN="left" SRC="Grammar.png" BORDER="0" ALT=""> 
 </P>
 <P>
 Thus the API consists of a grammar and a lexicon, which is
 provided for test purposes.
 </P>
 <P>
 The module structure is rather flat: most modules are direct
 parents of <CODE>Grammar</CODE>. The idea
 is that you can concentrate on one linguistic aspect at a time, or
 also distribute the work among several authors. The module <CODE>Cat</CODE>
 defines the "glue" that ties the aspects together - a type system
 to which all the other modules conform, so that e.g. <CODE>NP</CODE> means
 the same thing in those modules that use <CODE>NP</CODE>s and those that
 constructs them.
 </P>
 <H3>Phrase category modules</H3>
 <P>
 The direct parents of the top will be called <B>phrase category modules</B>,
 since each of them concentrates on a particular phrase category (nouns, verbs,
 adjectives, sentences,...). A phrase category module tells 
 <I>how to construct phrases in that category</I>. You will find out that
 all functions in any of these modules have the same value type (or maybe
 one of a small number of different types). Thus we have
 </P>
 <UL>
 <LI><CODE>Noun</CODE>: construction of nouns and noun phrases
 <LI><CODE>Adjective</CODE>: construction of adjectival phrases
 <LI><CODE>Verb</CODE>: construction of verb phrases
 <LI><CODE>Adverb</CODE>: construction of adverbial phrases
 <LI><CODE>Numeral</CODE>: construction of cardinal and ordinal numerals
 <LI><CODE>Sentence</CODE>: construction of sentences and imperatives
 <LI><CODE>Question</CODE>: construction of questions
 <LI><CODE>Relative</CODE>: construction of relative clauses
 <LI><CODE>Conjunction</CODE>: coordination of phrases
 <LI><CODE>Phrase</CODE>: construction of the major units of text and speech
 <LI><CODE>Text</CODE>: construction of texts as sequences of phrases
 <LI><CODE>Idiom</CODE>: idiomatic phrases such as existentials
 </UL>
 <H3>Infrastructure modules</H3>
 <P>
 Expressions of each phrase category are constructed in the corresponding
 phrase category module. But their <I>use</I> takes mostly place in other modules.
 For instance, noun phrases, which are constructed in <CODE>Noun</CODE>, are
 used as arguments of functions of almost all other phrase category modules. 
 How can we build all these modules independently of each other?
 </P>
 <P>
 As usual in typeful programming, the <I>only</I> thing you need to know
 about an object you use is its type. When writing a linearization rule
 for a GF abstract syntax function, the only thing you need to know is
 the linearization types of its value and argument categories. To achieve
 the division of the resource grammar to several parallel phrase category modules,
 what we need is an underlying definition of the linearization types. This
 definition is given as the implementation of
 </P>
 <UL>
 <LI><CODE>Cat</CODE>: syntactic categories of the resource grammar
 </UL>
 <P>
 Any resource grammar implementation has first to agree on how to implement
 <CODE>Cat</CODE>. Luckily enough, even this can be done incrementally: you
 can skip the <CODE>lincat</CODE> definition of a category and use the default
 <CODE>{s : Str}</CODE> until you need to change it to something else. In
 English, for instance, many categories do have this linearization type.
 </P>
 <H3>Lexical modules</H3>
 <P>
 What is lexical and what is syntactic is not as clearcut in GF as in
 some other grammar formalisms. Logically, lexical means atom, i.e. a
 <CODE>fun</CODE> with no arguments. Linguistically, one may add to this
 that the <CODE>lin</CODE> consists of only one token (or of a table whose values
 are single tokens). Even in the restricted lexicon included in the resource
 API, the latter rule is sometimes violated in some languages. For instance,
 <CODE>Structural.both7and_DConj</CODE> is an atom, but its linearization is
 two words e.g. <I>both - and</I>.
 </P>
 <P>
 Another characterization of lexical is that lexical units can be added
 almost <I>ad libitum</I>, and they cannot be defined in terms of already
 given rules. The lexical modules of the resource API are thus more like
 samples than complete lists. There are two such modules:
 </P>
 <UL>
 <LI><CODE>Structural</CODE>: structural words (determiners, conjunctions,...)
 <LI><CODE>Lexicon</CODE>: basic everyday content words (nouns, verbs,...)
 </UL>
 <P>
 The module <CODE>Structural</CODE> aims for completeness, and is likely to
 be extended in future releases of the resource. The module <CODE>Lexicon</CODE>
 gives a "random" list of words, which enable interesting testing of syntax,
 and also a check list for morphology, since those words are likely to include
 most morphological patterns of the language.
 </P>
 <P>
 In the case of <CODE>Lexicon</CODE> it may come out clearer than anywhere else
 in the API that it is impossible to give exact translation equivalents in
 different languages on the level of a resource grammar. In other words,
 application grammars are likely to use the resource in different ways for
 different languages.
 </P>
 <H2>Language-dependent syntax modules</H2>
 <P>
 In addition to the common API, there is room for language-dependent extensions
 of the resource. The top level of each languages looks as follows (with English as example):
 </P>
 <PRE>
    abstract English = Grammar, ExtraEngAbs, DictEngAbs
 </PRE>
 <P>
 where <CODE>ExtraEngAbs</CODE> is a collection of syntactic structures specific to English,
 and <CODE>DictEngAbs</CODE> is an English dictionary 
 (at the moment, it consists of <CODE>IrregEngAbs</CODE>,
 the irregular verbs of English). Each of these language-specific grammars has 
 the potential to grow into a full-scale grammar of the language. These grammar
 can also be used as libraries, but the possibility of using functors is lost.
 </P>
 <P>
 To give a better overview of language-specific structures, 
 modules like <CODE>ExtraEngAbs</CODE>
 are built from a language-independent module <CODE>ExtraAbs</CODE> 
 by restricted inheritance:
 </P>
 <PRE>
    abstract ExtraEngAbs = Extra [f,g,...]
 </PRE>
 <P>
 Thus any category and function in <CODE>Extra</CODE> may be shared by a subset of all
 languages. One can see this set-up as a matrix, which tells 
 what <CODE>Extra</CODE> structures
 are implemented in what languages. For the common API in <CODE>Grammar</CODE>, the matrix
 is filled with 1's (everything is implemented in every language).
 </P>
 <P>
 In a minimal resource grammar implementation, the language-dependent
 extensions are just empty modules, but it is good to provide them for
 the sake of uniformity.
 </P>
 <H2>The core of the syntax</H2>
 <P>
 Among all categories and functions, a handful are 
 most important and distinct ones, of which the others are can be 
 seen as variations. The categories are
 </P>
 <PRE>
    Cl ; VP ; V2 ; NP ; CN ; Det ; AP ;
 </PRE>
 <P>
 The functions are
 </P>
 <PRE>
    PredVP  : NP  -&gt; VP -&gt; Cl ;  -- predication
    ComplV2 : V2  -&gt; NP -&gt; VP ;  -- complementization
    DetCN   : Det -&gt; CN -&gt; NP ;  -- determination
    ModCN   : AP  -&gt; CN -&gt; CN ;  -- modification
 </PRE>
 <P>
 This <A HREF="latin.gf">toy Latin grammar</A> shows in a nutshell how these
 rules relate the categories to each other. It is intended to be a
 first approximation when designing the parameter system of a new
 language. 
 </P>
 <H3>Another reduced API</H3>
 <P>
 If you want to experiment with a small subset of the resource API first, 
 try out the module 
 <A HREF="http://www.cs.chalmers.se/~aarne/GF/doc/tutorial/resource/Syntax.gf">Syntax</A>
 explained in the
 <A HREF="http://www.cs.chalmers.se/~aarne/GF/doc/tutorial/gf-tutorial2.html">GF Tutorial</A>.
 </P>
 <H3>The present-tense fragment</H3>
 <P>
 Some lines in the resource library are suffixed with the comment
 ```--# notpresent
 which is used by a preprocessor to exclude those lines from 
 a reduced version of the full resource. This present-tense-only
 version is useful for applications in most technical text, since
 they reduce the grammar size and compilation time. It can also
 be useful to exclude those lines in a first version of resource
 implementation. To compile a grammar with present-tense-only, use
 </P>
 <PRE>
    i -preproc=GF/lib/resource-1.0/mkPresent LangGer.gf
 </PRE>
 <P></P>
 <H2>Phases of the work</H2>
 <H3>Putting up a directory</H3>
 <P>
 Unless you are writing an instance of a parametrized implementation
 (Romance or Scandinavian), which will be covered later, the
 simplest way is to follow roughly the following procedure. Assume you
 are building a grammar for the German language. Here are the first steps,
 which we actually followed ourselves when building the German implementation
 of resource v. 1.0.
 </P>
 <OL>
 <LI>Create a sister directory for <CODE>GF/lib/resource/english</CODE>, named
     <CODE>german</CODE>.
 <PRE>
         cd GF/lib/resource/
         mkdir german
         cd german
 </PRE>
 <P></P>
 <LI>Check out the [ISO 639 3-letter language code 
   <A HREF="http://www.w3.org/WAI/ER/IG/ert/iso639.htm">http://www.w3.org/WAI/ER/IG/ert/iso639.htm</A>] 
   for German: both <CODE>Ger</CODE> and <CODE>Deu</CODE> are given, and we pick <CODE>Ger</CODE>.
 <P></P>
 <LI>Copy the <CODE>*Eng.gf</CODE> files from <CODE>english</CODE> <CODE>german</CODE>,
     and rename them:
 <PRE>
         cp ../english/*Eng.gf .
         rename 's/Eng/Ger/' *Eng.gf
 </PRE>
 <P></P>
 <LI>Change the <CODE>Eng</CODE> module references to <CODE>Ger</CODE> references
     in all files:
 <PRE>
         sed -i 's/English/German/g' *Ger.gf
         sed -i 's/Eng/Ger/g' *Ger.gf
 </PRE>
  The first line prevents changing the word <CODE>English</CODE>, which appears
  here and there in comments, to <CODE>Gerlish</CODE>.
 <P></P>
 <LI>This may of course change unwanted occurrences of the 
     string <CODE>Eng</CODE> - verify this by
 <PRE>
         grep Ger *.gf
 </PRE>
     But you will have to make lots of manual changes in all files anyway!
 <P></P>
 <LI>Comment out the contents of these files:
 <PRE>
         sed -i 's/^/--/' *Ger.gf
 </PRE>
     This will give you a set of templates out of which the grammar
     will grow as you uncomment and modify the files rule by rule.
 <P></P>
 <LI>In all <CODE>.gf</CODE> files, uncomment the module headers and brackets,
  leaving the module bodies commented. Unfortunately, there is no
  simple way to do this automatically (or to avoid commenting these
  lines in the previous step) - but uncommenting the first
  and the last lines will actually do the job for many of the files.
 <P></P>
 <LI>Uncomment the contents of the main grammar file:
 <PRE>
         sed -i 's/^--//' LangGer.gf
 </PRE>
 <P></P>
 <LI>Now you can open the grammar <CODE>LangGer</CODE> in GF:
 <PRE>
         gf LangGer.gf
 </PRE>
  You will get lots of warnings on missing rules, but the grammar will compile.
 <P></P>
 <LI>At all following steps you will now have a valid, but incomplete
     GF grammar. The GF command
 <PRE>
         pg -printer=missing
 </PRE>
     tells you what exactly is missing.
 </OL>
 <P>
 Here is the module structure of <CODE>LangGer</CODE>. It has been simplified by leaving out
 the majority of the phrase category modules. Each of them has the same dependencies
 as e.g. <CODE>VerbGer</CODE>.
 </P>
 <P>
 <IMG ALIGN="middle" SRC="German.png" BORDER="0" ALT="">
 </P>
 <H3>Direction of work</H3>
 <P>
 The real work starts now. There are many ways to proceed, the main ones being
 </P>
 <UL>
 <LI>Top-down: start from the module <CODE>Phrase</CODE> and go down to <CODE>Sentence</CODE>, then
  <CODE>Verb</CODE>, <CODE>Noun</CODE>, and in the end <CODE>Lexicon</CODE>. In this way, you are all the time
  building complete phrases, and add them with more content as you proceed.
  <B>This approach is not recommended</B>. It is impossible to test the rules if
  you have no words to apply the constructions to.
 <P></P>
 <LI>Bottom-up: set as your first goal to implement <CODE>Lexicon</CODE>. To this end, you
  need to write <CODE>ParadigmsGer</CODE>, which in turn needs parts of 
  <CODE>MorphoGer</CODE> and <CODE>ResGer</CODE>.
  <B>This approach is not recommended</B>. You can get stuck to details of
  morphology such as irregular words, and you don't have enough grasp about
  the type system to decide what forms to cover in morphology.
 </UL>
 <P>
 The practical working direction is thus a saw-like motion between the morphological
 and top-level modules. Here is a possible course of the work that gives enough
 test data and enough general view at any point:
 </P>
 <OL>
 <LI>Define <CODE>Cat.N</CODE> and the required parameter types in <CODE>ResGer</CODE>. As we define
 <PRE>
    lincat N  = {s : Number =&gt; Case =&gt; Str ; g : Gender} ;
 </PRE>
 we need the parameter types <CODE>Number</CODE>, <CODE>Case</CODE>, and <CODE>Gender</CODE>. The definition
 of <CODE>Number</CODE> in <A HREF="../common/ParamX.gf"><CODE>common/ParamX</CODE></A> works for German, so we
 use it and just define <CODE>Case</CODE> and <CODE>Gender</CODE> in <CODE>ResGer</CODE>.
 <P></P>
 <LI>Define <CODE>regN</CODE> in <CODE>ParadigmsGer</CODE>. In this way you can 
 already implement a huge amount of nouns correctly in <CODE>LexiconGer</CODE>. Actually
 just adding <CODE>mkN</CODE> should suffice for every noun - but, 
 since it is tedious to use, you
 might proceed to the next step before returning to morphology and defining the
 real work horse <CODE>reg2N</CODE>.
 <P></P>
 <LI>While doing this, you may want to test the resource independently. Do this by
 <PRE>
         i -retain ParadigmsGer
         cc regN "Kirche"
 </PRE>
 <P></P>
 <LI>Proceed to determiners and pronouns in 
 <CODE>NounGer</CODE> (<CODE>DetCN UsePron DetSg SgQuant NoNum NoOrd DefArt IndefArt UseN</CODE>)and 
 <CODE>StructuralGer</CODE> (<CODE>i_Pron every_Det</CODE>). You also need some categories and
 parameter types. At this point, it is maybe not possible to find out the final
 linearization types of <CODE>CN</CODE>, <CODE>NP</CODE>, and <CODE>Det</CODE>, but at least you should
 be able to correctly inflect noun phrases such as <I>every airplane</I>:
 <PRE>
    i LangGer.gf
    l -table DetCN every_Det (UseN airplane_N)
    Nom: jeder Flugzeug
    Acc: jeden Flugzeug
    Dat: jedem Flugzeug
    Gen: jedes Flugzeugs
 </PRE>
 <P></P>
 <LI>Proceed to verbs: define <CODE>CatGer.V</CODE>,  <CODE>ResGer.VForm</CODE>, and
 <CODE>ParadigmsGer.regV</CODE>. You may choose to exclude <CODE>notpresent</CODE>
 cases at this point. But anyway, you will be able to inflect a good
 number of verbs in <CODE>Lexicon</CODE>, such as
 <CODE>live_V</CODE> (<CODE>regV "leven"</CODE>).
 <P></P>
 <LI>Now you can soon form your first sentences: define <CODE>VP</CODE> and
 <CODE>Cl</CODE> in <CODE>CatGer</CODE>, <CODE>VerbGer.UseV</CODE>, and <CODE>SentenceGer.PredVP</CODE>.
 Even if you have excluded the tenses, you will be able to produce
 <PRE>
    i -preproc=mkPresent LangGer.gf
    &gt; l -table PredVP (UsePron i_Pron) (UseV live_V)
    Pres Simul Pos Main: ich lebe
    Pres Simul Pos Inv:  lebe ich
    Pres Simul Pos Sub:  ich lebe
    Pres Simul Neg Main: ich lebe nicht
    Pres Simul Neg Inv:  lebe ich nicht
    Pres Simul Neg Sub:  ich nicht lebe
 </PRE>
 <P></P>
 <LI>Transitive verbs (<CODE>CatGer.V2 ParadigmsGer.dirV2 VerbGer.ComplV2</CODE>) 
 are a natural next step, so that you can
 produce <CODE>ich liebe dich</CODE>.
 <P></P>
 <LI>Adjectives (<CODE>CatGer.A ParadigmsGer.regA NounGer.AdjCN AdjectiveGer.PositA</CODE>) 
 will force you to think about strong and weak declensions, so that you can
 correctly inflect <I>my new car, this new car</I>. 
 <P></P>
 <LI>Once you have implemented the set
 (``Noun.DetCN Noun.AdjCN Verb.UseV Verb.ComplV2 Sentence.PredVP),
 you have overcome most of difficulties. You know roughly what parameters
 and dependences there are in your language, and you can now produce very
 much in the order you please. 
 </OL>
 <H3>The develop-test cycle</H3>
 <P>
 The following develop-test cycle will
 be applied most of the time, both in the first steps described above
 and in later steps where you are more on your own.
 </P>
 <OL>
 <LI>Select a phrase category module, e.g. <CODE>NounGer</CODE>, and uncomment some
  linearization rules (for instance, <CODE>DefSg</CODE>, which is
  not too complicated).
 <P></P>
 <LI>Write down some German examples of this rule, for instance translations
     of "the dog", "the house", "the big house", etc. Write these in all their
     different forms (two numbers and four cases).
 <P></P>
 <LI>Think about the categories involved (<CODE>CN, NP, N</CODE>) and the
     variations they have. Encode this in the lincats of <CODE>CatGer</CODE>.
     You may have to define some new parameter types in <CODE>ResGer</CODE>.
 <P></P>
 <LI>To be able to test the construction, 
     define some words you need to instantiate it
     in <CODE>LexiconGer</CODE>. You will also need some regular inflection patterns
     in<CODE>ParadigmsGer</CODE>.
 <P></P>
 <LI>Test by parsing, linearization,
     and random generation. In particular, linearization to a table should
     be used so that you see all forms produced:
 <PRE>
         gr -cat=NP -number=20 -tr | l -table
 </PRE>
 <P></P>
 <LI>Spare some tree-linearization pairs for later regression testing. Use the
  <CODE>tree_bank</CODE> command,
 <PRE>
         gr -cat=NP -number=20 | tb -xml | wf NP.tb
 </PRE>
  You can later compared your modified grammar to this treebank by
 <PRE>
         rf NP.tb | tb -c
 </PRE>
 </OL>
 <P>
 You are likely to run this cycle a few times for each linearization rule
 you implement, and some hundreds of times altogether. There are 66 <CODE>cat</CODE>s and
 458 <CODE>funs</CODE> in <CODE>Lang</CODE> at the moment; 149 of the <CODE>funs</CODE> are outside the two
 lexicon modules).
 </P>
 <P>
 Here is a <A HREF="../german/log.txt">live log</A> of the actual process of
 building the German implementation of resource API v. 1.0.
 It is the basis of the more detailed explanations, which will
 follow soon. (You will found out that these explanations involve
 a rational reconstruction of the live process! Among other things, the
 API was changed during the actual process to make it more intuitive.)
 </P>
 <H3>Resource modules used</H3>
 <P>
 These modules will be written by you.
 </P>
 <UL>
 <LI><CODE>ResGer</CODE>: parameter types and auxiliary operations 
 (a resource for the resource grammar!)
 <LI><CODE>ParadigmsGer</CODE>: complete inflection engine and most important regular paradigms
 <LI><CODE>MorphoGer</CODE>: auxiliaries for <CODE>ParadigmsGer</CODE> and <CODE>StructuralGer</CODE>. This need
 not be separate from <CODE>ResGer</CODE>.
 </UL>
 <P>
 These modules are language-independent and provided by the existing resource
 package.
 </P>
 <UL>
 <LI><CODE>ParamX</CODE>: parameter types used in many languages
 <LI><CODE>CommonX</CODE>: implementation of language-uniform categories 
    such as $Text$ and $Phr$, as well as of
    the logical tense, anteriority, and polarity parameters
 <LI><CODE>Coordination</CODE>: operations to deal with lists and coordination
 <LI><CODE>Prelude</CODE>: general-purpose operations on strings, records,
      truth values, etc.
 <LI><CODE>Predefined</CODE>: general-purpose operations with hard-coded definitions
 </UL>
 <P>
 An important decision is what rules to implement in terms of operations in
 <CODE>ResGer</CODE>. A golden rule of functional programming says that, whenever
 you find yourself programming by copy and paste, you should write a function
 instead. This indicates that an operation should be created if it is to be
 used at least twice. At the same time, a sound principle of vicinity says that
 it should not require too much browsing to understand what a rule does.
 From these two principles, we have derived the following practice:
 </P>
 <UL>
 <LI>If an operation is needed <I>in two different modules</I>, 
 it should be created in <CODE>ResGer</CODE>. An example is <CODE>mkClause</CODE>, 
 used in <CODE>Sentence</CODE>, <CODE>Question</CODE>, and <CODE>Relative</CODE>-
 <LI>If an operation is needed <I>twice in the same module</I>, but never
 outside, it should be created in the same module. Many examples are
 found in <CODE>Numerals</CODE>.
 <LI>If an operation is only needed once, it should not be created (but rather
 inlined). Most functions in phrase category modules are implemented in this
 way.
 </UL>
 <P>
 This discipline is very different from the one followed in earlier
 versions of the library (up to 0.9). We then valued the principle of
 abstraction more than vicinity, creating layers of abstraction for
 almost everything. This led in practice to the duplication of almost
 all code on the <CODE>lin</CODE> and <CODE>oper</CODE> levels, and made the code
 hard to understand and maintain.
 </P>
 <H3>Morphology and lexicon</H3>
 <P>
 The paradigms needed to implement
 <CODE>LexiconGer</CODE> are defined in
 <CODE>ParadigmsGer</CODE>.
 This module provides high-level ways to define the linearization of
 lexical items, of categories <CODE>N, A, V</CODE> and their complement-taking
 variants.
 </P>
 <P>
 For ease of use, the <CODE>Paradigms</CODE> modules follow a certain
 naming convention. Thus they for each lexical category, such as <CODE>N</CODE>,
 the functions
 </P>
 <UL>
 <LI><CODE>mkN</CODE>, for worst-case construction of <CODE>N</CODE>. Its type signature
     has the form
 <PRE>
         mkN : Str -&gt; ... -&gt; Str -&gt; P -&gt; ... -&gt; Q -&gt; N
 </PRE>
     with as many string and parameter arguments as can ever be needed to
     construct an <CODE>N</CODE>.
 <LI><CODE>regN</CODE>, for the most common cases, with just one string argument:
 <PRE>
         regN : Str -&gt; N
 </PRE>
 <LI>A language-dependent (small) set of functions to handle mild irregularities
     and common exceptions.
 <P></P>
 For the complement-taking variants, such as <CODE>V2</CODE>, we provide
 <P></P>
 <LI><CODE>mkV2</CODE>, which takes a <CODE>V</CODE> and all necessary arguments, such
     as case and preposition:
 <PRE>
         mkV2 : V -&gt; Case -&gt; Str -&gt; V2 ;
 </PRE>
 <LI>A language-dependent (small) set of functions to handle common special cases,
     such as direct transitive verbs:
 <PRE>
         dirV2 : V -&gt; V2 ;
         -- dirV2 v = mkV2 v accusative [] 
 </PRE>
 </UL>
 <P>
 The golden rule for the design of paradigms is that
 </P>
 <UL>
 <LI>The user will only need function applications with constants and strings,
     never any records or tables.
 </UL>
 <P>
 The discipline of data abstraction moreover requires that the user of the resource
 is not given access to parameter constructors, but only to constants that denote 
 them. This gives the resource grammarian the freedom to change the underlying
 data representation if needed. It means that the <CODE>ParadigmsGer</CODE> module has
 to define constants for those parameter types and constructors that 
 the application grammarian may need to use, e.g.
 </P>
 <PRE>
    oper 
      Case : Type ;
      nominative, accusative, genitive, dative : Case ;
 </PRE>
 <P>
 These constants are defined in terms of parameter types and constructors
 in <CODE>ResGer</CODE> and <CODE>MorphoGer</CODE>, which modules are not
 visible to the application grammarian.
 </P>
 <H3>Lock fields</H3>
 <P>
 An important difference between <CODE>MorphoGer</CODE> and
 <CODE>ParadigmsGer</CODE> is that the former uses "raw" record types
 for word classes, whereas the latter used category symbols defined in
 <CODE>CatGer</CODE>. When these category symbols are used to denote
 record types in a resource modules, such as <CODE>ParadigmsGer</CODE>,
 a <B>lock field</B> is added to the record, so that categories
 with the same implementation are not confused with each other.
 (This is inspired by the <CODE>newtype</CODE> discipline in Haskell.)
 For instance, the lincats of adverbs and conjunctions are the same
 in <CODE>CommonX</CODE> (and therefore in <CODE>CatGer</CODE>, which inherits it):
 </P>
 <PRE>
    lincat Adv  = {s : Str} ;
    lincat Conj = {s : Str} ;
 </PRE>
 <P>
 But when these category symbols are used to denote their linearization 
 types in resource module, these definitions are translated to
 </P>
 <PRE>
    oper Adv  : Type = {s : Str  ; lock_Adv  : {}} ;
    oper Conj : Type = {s : Str} ; lock_Conj : {}} ;
 </PRE>
 <P>
 In this way, the user of a resource grammar cannot confuse adverbs with
 conjunctions. In other words, the lock fields force the type checker
 to function as grammaticality checker.
 </P>
 <P>
 When the resource grammar is <CODE>open</CODE>ed in an application grammar, the
 lock fields are never seen (except possibly in type error messages),
 and the application grammarian should never write them herself. If she
 has to do this, it is a sign that the resource grammar is incomplete, and
 the proper way to proceed is to fix the resource grammar.
 </P>
 <P>
 The resource grammarian has to provide the dummy lock field values
 in her hidden definitions of constants in <CODE>Paradigms</CODE>. For instance,
 </P>
 <PRE>
    mkAdv : Str -&gt; Adv ;
    -- mkAdv s = {s = s ; lock_Adv = &lt;&gt;} ;
 </PRE>
 <P></P>
 <H3>Lexicon construction</H3>
 <P>
 The lexicon belonging to <CODE>LangGer</CODE> consists of two modules:
 </P>
 <UL>
 <LI><CODE>StructuralGer</CODE>, structural words, built by directly using
     <CODE>MorphoGer</CODE>.
 <LI><CODE>BasicGer</CODE>, content words, built by using <CODE>ParadigmsGer</CODE>.
 </UL>
 <P>
 The reason why <CODE>MorphoGer</CODE> has to be used in <CODE>StructuralGer</CODE>
 is that <CODE>ParadigmsGer</CODE> does not contain constructors for closed
 word classes such as pronouns and determiners. The reason why we
 recommend <CODE>ParadigmsGer</CODE> for building <CODE>LexiconGer</CODE> is that
 the coverage of the paradigms gets thereby tested and that the
 use of the paradigms in <CODE>LexiconGer</CODE> gives a good set of examples for
 those who want to build new lexica.
 </P>
 <H2>Inside grammar modules</H2>
 <P>
 Detailed implementation tricks
 are found in the comments of each module.
 </P>
 <H3>The category system</H3>
 <UL>
 <LI><A HREF="gfdoc/Common.html">Common</A>, <A HREF="../common/CommonX.gf">CommonX</A>
 <LI><A HREF="gfdoc/Cat.html">Cat</A>, <A HREF="gfdoc/CatGer.gf">CatGer</A>
 </UL>
 <H3>Phrase category modules</H3>
 <UL>
 <LI><A HREF="gfdoc/Noun.html">Noun</A>, <A HREF="../german/NounGer.gf">NounGer</A>
 <LI><A HREF="gfdoc/Adjective.html">Adjective</A>, <A HREF="../german/AdjectiveGer.gf">AdjectiveGer</A>
 <LI><A HREF="gfdoc/Verb.html">Verb</A>, <A HREF="../german/VerbGer.gf">VerbGer</A>
 <LI><A HREF="gfdoc/Adverb.html">Adverb</A>, <A HREF="../german/AdverbGer.gf">AdverbGer</A>
 <LI><A HREF="gfdoc/Numeral.html">Numeral</A>, <A HREF="../german/NumeralGer.gf">NumeralGer</A>
 <LI><A HREF="gfdoc/Sentence.html">Sentence</A>, <A HREF="../german/SentenceGer.gf">SentenceGer</A>
 <LI><A HREF="gfdoc/Question.html">Question</A>, <A HREF="../german/QuestionGer.gf">QuestionGer</A>
 <LI><A HREF="gfdoc/Relative.html">Relative</A>, <A HREF="../german/RelativeGer.gf">RelativeGer</A>
 <LI><A HREF="gfdoc/Conjunction.html">Conjunction</A>, <A HREF="../german/ConjunctionGer.gf">ConjunctionGer</A>
 <LI><A HREF="gfdoc/Phrase.html">Phrase</A>, <A HREF="../german/PhraseGer.gf">PhraseGer</A>
 <LI><A HREF="gfdoc/Text.html">Text</A>, <A HREF="../common/TextX.gf">TextX</A>
 <LI><A HREF="gfdoc/Idiom.html">Idiom</A>, <A HREF="../german/IdiomGer.gf">IdiomGer</A>
 <LI><A HREF="gfdoc/Lang.html">Lang</A>, <A HREF="../german/LangGer.gf">LangGer</A>
 </UL>
 <H3>Resource modules</H3>
 <UL>
 <LI><A HREF="../german/ResGer.gf">ResGer</A>
 <LI><A HREF="../german/MorphoGer.gf">MorphoGer</A>
 <LI><A HREF="gfdoc/ParadigmsGer.html">ParadigmsGer</A>, <A HREF="../german/ParadigmsGer.gf">ParadigmsGer.gf</A>
 </UL>
 <H3>Lexicon</H3>
 <UL>
 <LI><A HREF="gfdoc/Structural.html">Structural</A>, <A HREF="../german/StructuralGer.gf">StructuralGer</A>
 <LI><A HREF="gfdoc/Lexicon.html">Lexicon</A>, <A HREF="../german/LexiconGer.gf">LexiconGer</A>
 </UL>
 <H2>Lexicon extension</H2>
 <H3>The irregularity lexicon</H3>
 <P>
 It may be handy to provide a separate module of irregular
 verbs and other words which are difficult for a lexicographer
 to handle. There are usually a limited number of such words - a
 few hundred perhaps. Building such a lexicon separately also
 makes it less important to cover <I>everything</I> by the
 worst-case paradigms (<CODE>mkV</CODE> etc).
 </P>
 <H3>Lexicon extraction from a word list</H3>
 <P>
 You can often find resources such as lists of 
 irregular verbs on the internet. For instance, the
 <A HREF="http://www.iee.et.tu-dresden.de/~wernerr/grammar/verben_dt.html">Irregular German Verbs</A> 
 page gives a list of verbs in the
 traditional tabular format, which begins as follows:
 </P>
 <PRE>
    backen (du bäckst, er bäckt)	                 backte [buk]              gebacken
    befehlen (du befiehlst, er befiehlt; befiehl!) befahl (beföhle; befähle) befohlen
    beginnen                                       begann (begönne; begänne) begonnen
    beißen                                         biß                       gebissen
 </PRE>
 <P>
 All you have to do is to write a suitable verb paradigm
 </P>
 <PRE>
    irregV : (x1,_,_,_,_,x6 : Str) -&gt; V ;
 </PRE>
 <P>
 and a Perl or Python or Haskell script that transforms
 the table to
 </P>
 <PRE>
    backen_V   = irregV "backen" "bäckt" "back" "backte" "backte" "gebacken" ;
    befehlen_V = irregV "befehlen" "befiehlt" "befiehl" "befahl" "beföhle" "befohlen" ;
 </PRE>
 <P></P>
 <P>
 When using ready-made word lists, you should think about
 coyright issues. Ideally, all resource grammar material should
 be provided under GNU General Public License.
 </P>
 <H3>Lexicon extraction from raw text data</H3>
 <P>
 This is a cheap technique to build a lexicon of thousands
 of words, if text data is available in digital format.
 See the <A HREF="http://www.cs.chalmers.se/~markus/FM/">Functional Morphology</A> 
 homepage for details.
 </P>
 <H3>Extending the resource grammar API</H3>
 <P>
 Sooner or later it will happen that the resource grammar API
 does not suffice for all applications. A common reason is
 that it does not include idiomatic expressions in a given language.
 The solution then is in the first place to build language-specific
 extension modules. This chapter will deal with this issue (to be completed).
 </P>
 <H2>Writing an instance of parametrized resource grammar implementation</H2>
 <P>
 Above we have looked at how a resource implementation is built by
 the copy and paste method (from English to German), that is, formally
 speaking, from scratch. A more elegant solution available for 
 families of languages such as Romance and Scandinavian is to
 use parametrized modules. The advantages are
 </P>
 <UL>
 <LI>theoretical: linguistic generalizations and insights
 <LI>practical: maintainability improves with fewer components
 </UL>
 <P>
 In this chapter, we will look at an example: adding Italian to
 the Romance family (to be completed). Here is a set of
 <A HREF="http://www.cs.chalmers.se/~aarne/geocal2006.pdf">slides</A>
 on the topic.
 </P>
 <H2>Parametrizing a resource grammar implementation</H2>
 <P>
 This is the most demanding form of resource grammar writing.
 We do <I>not</I> recommend the method of parametrizing from the
 beginning: it is easier to have one language first implemented 
 in the conventional way and then add another language of the
 same family by aprametrization. This means that the copy and
 paste method is still used, but at this time the differences
 are put into an <CODE>interface</CODE> module. 
 </P>
 <P>
 This chapter will work out an example of how an Estonian grammar
 is constructed from the Finnish grammar through parametrization.
 </P>
 <!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) -->
 <!-- cmdline: txt2tags Resource-HOWTO.txt -->
 </BODY></HTML>
@@ -0,0 +1,789 @@
 Resource grammar writing HOWTO
 Author: Aarne Ranta <aarne (at) cs.chalmers.se>
 Last update: %%date(%c)
 % NOTE: this is a txt2tags file.
 % Create an html file from this file using:
 % txt2tags --toc -thtml Resource-HOWTO.txt
 %!target:html
 **History**
 September 2008: partly outdated - to be updated for API 1.5.
 October 2007: updated for API 1.2.
 January 2006: first version.
 The purpose of this document is to tell how to implement the GF
 resource grammar API for a new language. We will //not// cover how
 to use the resource grammar, nor how to change the API. But we
 will give some hints how to extend the API.
 A manual for using the resource grammar is found in
 [``http://www.cs.chalmers.se/~aarne/GF/lib/resource-1.0/doc/synopsis.html`` http://www.cs.chalmers.se/~aarne/GF/lib/resource-1.0/doc/synopsis.html].
 A tutorial on GF, also introducing the idea of resource grammars, is found in
 [``http://www.cs.chalmers.se/~aarne/GF/doc/tutorial/gf-tutorial2.html`` ../../../doc/tutorial/gf-tutorial2.html].
 This document concerns the API v. 1.0. You can find the current code in 
 [``http://www.cs.chalmers.se/~aarne/GF/lib/resource-1.0/`` ..]
 ==The resource grammar structure==
 The library is divided into a bunch of modules, whose dependencies
 are given in the following figure.
 [Syntax.png] 
 - solid contours: module used by end users
 - dashed contours: internal module
 - ellipse: abstract/concrete pair of modules
 - rectangle: resource or instance
 - diamond: interface
 The solid ellipses show the API as visible to the user of the library. The
 dashed ellipses form the main of the implementation, on which the resource
 grammar programmer has to work with. With the exception of the ``Paradigms``
 module, the visible API modules can be produced mechanically.
 [Grammar.png] 
 Thus the API consists of a grammar and a lexicon, which is
 provided for test purposes.
 The module structure is rather flat: most modules are direct
 parents of ``Grammar``. The idea
 is that you can concentrate on one linguistic aspect at a time, or
 also distribute the work among several authors. The module ``Cat``
 defines the "glue" that ties the aspects together - a type system
 to which all the other modules conform, so that e.g. ``NP`` means
 the same thing in those modules that use ``NP``s and those that
 constructs them.
 ===Phrase category modules===
 The direct parents of the top will be called **phrase category modules**,
 since each of them concentrates on a particular phrase category (nouns, verbs,
 adjectives, sentences,...). A phrase category module tells 
 //how to construct phrases in that category//. You will find out that
 all functions in any of these modules have the same value type (or maybe
 one of a small number of different types). Thus we have
 - ``Noun``: construction of nouns and noun phrases
 - ``Adjective``: construction of adjectival phrases
 - ``Verb``: construction of verb phrases
 - ``Adverb``: construction of adverbial phrases
 - ``Numeral``: construction of cardinal and ordinal numerals
 - ``Sentence``: construction of sentences and imperatives
 - ``Question``: construction of questions
 - ``Relative``: construction of relative clauses
 - ``Conjunction``: coordination of phrases
 - ``Phrase``: construction of the major units of text and speech
 - ``Text``: construction of texts as sequences of phrases
 - ``Idiom``: idiomatic phrases such as existentials
 ===Infrastructure modules===
 Expressions of each phrase category are constructed in the corresponding
 phrase category module. But their //use// takes mostly place in other modules.
 For instance, noun phrases, which are constructed in ``Noun``, are
 used as arguments of functions of almost all other phrase category modules. 
 How can we build all these modules independently of each other?
 As usual in typeful programming, the //only// thing you need to know
 about an object you use is its type. When writing a linearization rule
 for a GF abstract syntax function, the only thing you need to know is
 the linearization types of its value and argument categories. To achieve
 the division of the resource grammar to several parallel phrase category modules,
 what we need is an underlying definition of the linearization types. This
 definition is given as the implementation of
 - ``Cat``: syntactic categories of the resource grammar
 Any resource grammar implementation has first to agree on how to implement
 ``Cat``. Luckily enough, even this can be done incrementally: you
 can skip the ``lincat`` definition of a category and use the default
 ``{s : Str}`` until you need to change it to something else. In
 English, for instance, many categories do have this linearization type.
 ===Lexical modules===
 What is lexical and what is syntactic is not as clearcut in GF as in
 some other grammar formalisms. Logically, lexical means atom, i.e. a
 ``fun`` with no arguments. Linguistically, one may add to this
 that the ``lin`` consists of only one token (or of a table whose values
 are single tokens). Even in the restricted lexicon included in the resource
 API, the latter rule is sometimes violated in some languages. For instance,
 ``Structural.both7and_DConj`` is an atom, but its linearization is
 two words e.g. //both - and//.
 Another characterization of lexical is that lexical units can be added
 almost //ad libitum//, and they cannot be defined in terms of already
 given rules. The lexical modules of the resource API are thus more like
 samples than complete lists. There are two such modules:
 - ``Structural``: structural words (determiners, conjunctions,...)
 - ``Lexicon``: basic everyday content words (nouns, verbs,...)
 The module ``Structural`` aims for completeness, and is likely to
 be extended in future releases of the resource. The module ``Lexicon``
 gives a "random" list of words, which enable interesting testing of syntax,
 and also a check list for morphology, since those words are likely to include
 most morphological patterns of the language.
 In the case of ``Lexicon`` it may come out clearer than anywhere else
 in the API that it is impossible to give exact translation equivalents in
 different languages on the level of a resource grammar. In other words,
 application grammars are likely to use the resource in different ways for
 different languages.
 ==Language-dependent syntax modules==
 In addition to the common API, there is room for language-dependent extensions
 of the resource. The top level of each languages looks as follows (with English as example):
 ```
  abstract English = Grammar, ExtraEngAbs, DictEngAbs
 ```
 where ``ExtraEngAbs`` is a collection of syntactic structures specific to English,
 and ``DictEngAbs`` is an English dictionary 
 (at the moment, it consists of ``IrregEngAbs``,
 the irregular verbs of English). Each of these language-specific grammars has 
 the potential to grow into a full-scale grammar of the language. These grammar
 can also be used as libraries, but the possibility of using functors is lost.
 To give a better overview of language-specific structures, 
 modules like ``ExtraEngAbs``
 are built from a language-independent module ``ExtraAbs`` 
 by restricted inheritance:
 ```
  abstract ExtraEngAbs = Extra [f,g,...]
 ```
 Thus any category and function in ``Extra`` may be shared by a subset of all
 languages. One can see this set-up as a matrix, which tells 
 what ``Extra`` structures
 are implemented in what languages. For the common API in ``Grammar``, the matrix
 is filled with 1's (everything is implemented in every language).
 In a minimal resource grammar implementation, the language-dependent
 extensions are just empty modules, but it is good to provide them for
 the sake of uniformity.
 ==The core of the syntax==
 Among all categories and functions, a handful are 
 most important and distinct ones, of which the others are can be 
 seen as variations. The categories are
 ```
  Cl ; VP ; V2 ; NP ; CN ; Det ; AP ;
 ```
 The functions are
 ```
  PredVP  : NP  -> VP -> Cl ;  -- predication
  ComplV2 : V2  -> NP -> VP ;  -- complementization
  DetCN   : Det -> CN -> NP ;  -- determination
  ModCN   : AP  -> CN -> CN ;  -- modification
 ```
 This [toy Latin grammar  latin.gf] shows in a nutshell how these
 rules relate the categories to each other. It is intended to be a
 first approximation when designing the parameter system of a new
 language. 
 ===Another reduced API===
 If you want to experiment with a small subset of the resource API first, 
 try out the module 
 [Syntax http://www.cs.chalmers.se/~aarne/GF/doc/tutorial/resource/Syntax.gf]
 explained in the
 [GF Tutorial http://www.cs.chalmers.se/~aarne/GF/doc/tutorial/gf-tutorial2.html].
 ===The present-tense fragment===
 Some lines in the resource library are suffixed with the comment
 ```--# notpresent
 which is used by a preprocessor to exclude those lines from 
 a reduced version of the full resource. This present-tense-only
 version is useful for applications in most technical text, since
 they reduce the grammar size and compilation time. It can also
 be useful to exclude those lines in a first version of resource
 implementation. To compile a grammar with present-tense-only, use
 ```
  i -preproc=GF/lib/resource-1.0/mkPresent LangGer.gf
 ```
 ==Phases of the work==
 ===Putting up a directory===
 Unless you are writing an instance of a parametrized implementation
 (Romance or Scandinavian), which will be covered later, the
 simplest way is to follow roughly the following procedure. Assume you
 are building a grammar for the German language. Here are the first steps,
 which we actually followed ourselves when building the German implementation
 of resource v. 1.0.
 + Create a sister directory for ``GF/lib/resource/english``, named
     ``german``.
 ```
       cd GF/lib/resource/
       mkdir german
       cd german
 ```
 + Check out the [ISO 639 3-letter language code 
   http://www.w3.org/WAI/ER/IG/ert/iso639.htm] 
   for German: both ``Ger`` and ``Deu`` are given, and we pick ``Ger``.
 + Copy the ``*Eng.gf`` files from ``english`` ``german``,
     and rename them:
 ```
       cp ../english/*Eng.gf .
       rename 's/Eng/Ger/' *Eng.gf
 ```
 + Change the ``Eng`` module references to ``Ger`` references
     in all files:
 ```
       sed -i 's/English/German/g' *Ger.gf
       sed -i 's/Eng/Ger/g' *Ger.gf
 ```
  The first line prevents changing the word ``English``, which appears
  here and there in comments, to ``Gerlish``.
 + This may of course change unwanted occurrences of the 
     string ``Eng`` - verify this by
 ```
       grep Ger *.gf
 ```
     But you will have to make lots of manual changes in all files anyway!
 + Comment out the contents of these files:
 ``` 
       sed -i 's/^/--/' *Ger.gf
 ```
     This will give you a set of templates out of which the grammar
     will grow as you uncomment and modify the files rule by rule.
 + In all ``.gf`` files, uncomment the module headers and brackets,
  leaving the module bodies commented. Unfortunately, there is no
  simple way to do this automatically (or to avoid commenting these
  lines in the previous step) - but uncommenting the first
  and the last lines will actually do the job for many of the files.
 + Uncomment the contents of the main grammar file:
 ``` 
       sed -i 's/^--//' LangGer.gf
 ```
 + Now you can open the grammar ``LangGer`` in GF:
 ``` 
       gf LangGer.gf
 ```
  You will get lots of warnings on missing rules, but the grammar will compile.
 + At all following steps you will now have a valid, but incomplete
     GF grammar. The GF command
 ``` 
       pg -printer=missing
 ```
     tells you what exactly is missing.
 Here is the module structure of ``LangGer``. It has been simplified by leaving out
 the majority of the phrase category modules. Each of them has the same dependencies
 as e.g. ``VerbGer``.
 [German.png]
 ===Direction of work===
 The real work starts now. There are many ways to proceed, the main ones being
 - Top-down: start from the module ``Phrase`` and go down to ``Sentence``, then
  ``Verb``, ``Noun``, and in the end ``Lexicon``. In this way, you are all the time
  building complete phrases, and add them with more content as you proceed.
  **This approach is not recommended**. It is impossible to test the rules if
  you have no words to apply the constructions to.
 - Bottom-up: set as your first goal to implement ``Lexicon``. To this end, you
  need to write ``ParadigmsGer``, which in turn needs parts of 
  ``MorphoGer`` and ``ResGer``.
  **This approach is not recommended**. You can get stuck to details of
  morphology such as irregular words, and you don't have enough grasp about
  the type system to decide what forms to cover in morphology.
 The practical working direction is thus a saw-like motion between the morphological
 and top-level modules. Here is a possible course of the work that gives enough
 test data and enough general view at any point:
 + Define ``Cat.N`` and the required parameter types in ``ResGer``. As we define
 ```
  lincat N  = {s : Number => Case => Str ; g : Gender} ;
 ```
 we need the parameter types ``Number``, ``Case``, and ``Gender``. The definition
 of ``Number`` in [``common/ParamX``  ../common/ParamX.gf] works for German, so we
 use it and just define ``Case`` and ``Gender`` in ``ResGer``.
 + Define ``regN`` in ``ParadigmsGer``. In this way you can 
 already implement a huge amount of nouns correctly in ``LexiconGer``. Actually
 just adding ``mkN`` should suffice for every noun - but, 
 since it is tedious to use, you
 might proceed to the next step before returning to morphology and defining the
 real work horse ``reg2N``.
 + While doing this, you may want to test the resource independently. Do this by
 ```
       i -retain ParadigmsGer
       cc regN "Kirche"
 ```
 + Proceed to determiners and pronouns in 
 ``NounGer`` (``DetCN UsePron DetSg SgQuant NoNum NoOrd DefArt IndefArt UseN``)and 
 ``StructuralGer`` (``i_Pron every_Det``). You also need some categories and
 parameter types. At this point, it is maybe not possible to find out the final
 linearization types of ``CN``, ``NP``, and ``Det``, but at least you should
 be able to correctly inflect noun phrases such as //every airplane//:
 ```
  i LangGer.gf
  l -table DetCN every_Det (UseN airplane_N)
  Nom: jeder Flugzeug
  Acc: jeden Flugzeug
  Dat: jedem Flugzeug
  Gen: jedes Flugzeugs
 ```
 + Proceed to verbs: define ``CatGer.V``,  ``ResGer.VForm``, and
 ``ParadigmsGer.regV``. You may choose to exclude ``notpresent``
 cases at this point. But anyway, you will be able to inflect a good
 number of verbs in ``Lexicon``, such as
 ``live_V`` (``regV "leven"``).
 + Now you can soon form your first sentences: define ``VP`` and
 ``Cl`` in ``CatGer``, ``VerbGer.UseV``, and ``SentenceGer.PredVP``.
 Even if you have excluded the tenses, you will be able to produce
 ```
  i -preproc=mkPresent LangGer.gf
  > l -table PredVP (UsePron i_Pron) (UseV live_V)
  Pres Simul Pos Main: ich lebe
  Pres Simul Pos Inv:  lebe ich
  Pres Simul Pos Sub:  ich lebe
  Pres Simul Neg Main: ich lebe nicht
  Pres Simul Neg Inv:  lebe ich nicht
  Pres Simul Neg Sub:  ich nicht lebe
 ```
 + Transitive verbs (``CatGer.V2 ParadigmsGer.dirV2 VerbGer.ComplV2``) 
 are a natural next step, so that you can
 produce ``ich liebe dich``.
 + Adjectives (``CatGer.A ParadigmsGer.regA NounGer.AdjCN AdjectiveGer.PositA``) 
 will force you to think about strong and weak declensions, so that you can
 correctly inflect //my new car, this new car//. 
 + Once you have implemented the set
 (``Noun.DetCN Noun.AdjCN Verb.UseV Verb.ComplV2 Sentence.PredVP),
 you have overcome most of difficulties. You know roughly what parameters
 and dependences there are in your language, and you can now produce very
 much in the order you please. 
 ===The develop-test cycle===
 The following develop-test cycle will
 be applied most of the time, both in the first steps described above
 and in later steps where you are more on your own.
 + Select a phrase category module, e.g. ``NounGer``, and uncomment some
  linearization rules (for instance, ``DefSg``, which is
  not too complicated).
 + Write down some German examples of this rule, for instance translations
     of "the dog", "the house", "the big house", etc. Write these in all their
     different forms (two numbers and four cases).
 + Think about the categories involved (``CN, NP, N``) and the
     variations they have. Encode this in the lincats of ``CatGer``.
     You may have to define some new parameter types in ``ResGer``.
 + To be able to test the construction, 
     define some words you need to instantiate it
     in ``LexiconGer``. You will also need some regular inflection patterns
     in``ParadigmsGer``.
 + Test by parsing, linearization,
     and random generation. In particular, linearization to a table should
     be used so that you see all forms produced:
 ```
       gr -cat=NP -number=20 -tr | l -table
 ```
 + Spare some tree-linearization pairs for later regression testing. Use the
  ``tree_bank`` command,
 ```
       gr -cat=NP -number=20 | tb -xml | wf NP.tb
 ```
  You can later compared your modified grammar to this treebank by
 ```
       rf NP.tb | tb -c
 ```
 You are likely to run this cycle a few times for each linearization rule
 you implement, and some hundreds of times altogether. There are 66 ``cat``s and
 458 ``funs`` in ``Lang`` at the moment; 149 of the ``funs`` are outside the two
 lexicon modules).
 Here is a [live log ../german/log.txt] of the actual process of
 building the German implementation of resource API v. 1.0.
 It is the basis of the more detailed explanations, which will
 follow soon. (You will found out that these explanations involve
 a rational reconstruction of the live process! Among other things, the
 API was changed during the actual process to make it more intuitive.)
 ===Resource modules used===
 These modules will be written by you.
 - ``ResGer``: parameter types and auxiliary operations 
 (a resource for the resource grammar!)
 - ``ParadigmsGer``: complete inflection engine and most important regular paradigms
 - ``MorphoGer``: auxiliaries for ``ParadigmsGer`` and ``StructuralGer``. This need
 not be separate from ``ResGer``.
 These modules are language-independent and provided by the existing resource
 package.
 - ``ParamX``: parameter types used in many languages
 - ``CommonX``: implementation of language-uniform categories 
    such as $Text$ and $Phr$, as well as of
    the logical tense, anteriority, and polarity parameters
 - ``Coordination``: operations to deal with lists and coordination
 - ``Prelude``: general-purpose operations on strings, records,
      truth values, etc.
 - ``Predefined``: general-purpose operations with hard-coded definitions
 An important decision is what rules to implement in terms of operations in
 ``ResGer``. A golden rule of functional programming says that, whenever
 you find yourself programming by copy and paste, you should write a function
 instead. This indicates that an operation should be created if it is to be
 used at least twice. At the same time, a sound principle of vicinity says that
 it should not require too much browsing to understand what a rule does.
 From these two principles, we have derived the following practice:
 - If an operation is needed //in two different modules//, 
 it should be created in ``ResGer``. An example is ``mkClause``, 
 used in ``Sentence``, ``Question``, and ``Relative``-
 - If an operation is needed //twice in the same module//, but never
 outside, it should be created in the same module. Many examples are
 found in ``Numerals``.
 - If an operation is only needed once, it should not be created (but rather
 inlined). Most functions in phrase category modules are implemented in this
 way.
 This discipline is very different from the one followed in earlier
 versions of the library (up to 0.9). We then valued the principle of
 abstraction more than vicinity, creating layers of abstraction for
 almost everything. This led in practice to the duplication of almost
 all code on the ``lin`` and ``oper`` levels, and made the code
 hard to understand and maintain.
 ===Morphology and lexicon===
 The paradigms needed to implement
 ``LexiconGer`` are defined in
 ``ParadigmsGer``.
 This module provides high-level ways to define the linearization of
 lexical items, of categories ``N, A, V`` and their complement-taking
 variants.
 For ease of use, the ``Paradigms`` modules follow a certain
 naming convention. Thus they for each lexical category, such as ``N``,
 the functions
 - ``mkN``, for worst-case construction of ``N``. Its type signature
     has the form
 ```
       mkN : Str -> ... -> Str -> P -> ... -> Q -> N
 ```
     with as many string and parameter arguments as can ever be needed to
     construct an ``N``.
 - ``regN``, for the most common cases, with just one string argument:
 ```
       regN : Str -> N
 ```
 - A language-dependent (small) set of functions to handle mild irregularities
     and common exceptions.
 For the complement-taking variants, such as ``V2``, we provide
 - ``mkV2``, which takes a ``V`` and all necessary arguments, such
     as case and preposition:
 ```
       mkV2 : V -> Case -> Str -> V2 ;
 ```
 - A language-dependent (small) set of functions to handle common special cases,
     such as direct transitive verbs:
 ```
       dirV2 : V -> V2 ;
       -- dirV2 v = mkV2 v accusative [] 
 ```
 The golden rule for the design of paradigms is that
 - The user will only need function applications with constants and strings,
     never any records or tables.
 The discipline of data abstraction moreover requires that the user of the resource
 is not given access to parameter constructors, but only to constants that denote 
 them. This gives the resource grammarian the freedom to change the underlying
 data representation if needed. It means that the ``ParadigmsGer`` module has
 to define constants for those parameter types and constructors that 
 the application grammarian may need to use, e.g.
 ```
  oper 
    Case : Type ;
    nominative, accusative, genitive, dative : Case ;
 ```
 These constants are defined in terms of parameter types and constructors
 in ``ResGer`` and ``MorphoGer``, which modules are not
 visible to the application grammarian.
 ===Lock fields===
 An important difference between ``MorphoGer`` and
 ``ParadigmsGer`` is that the former uses "raw" record types
 for word classes, whereas the latter used category symbols defined in
 ``CatGer``. When these category symbols are used to denote
 record types in a resource modules, such as ``ParadigmsGer``,
 a **lock field** is added to the record, so that categories
 with the same implementation are not confused with each other.
 (This is inspired by the ``newtype`` discipline in Haskell.)
 For instance, the lincats of adverbs and conjunctions are the same
 in ``CommonX`` (and therefore in ``CatGer``, which inherits it):
 ```
  lincat Adv  = {s : Str} ;
  lincat Conj = {s : Str} ;
 ```
 But when these category symbols are used to denote their linearization 
 types in resource module, these definitions are translated to
 ```
  oper Adv  : Type = {s : Str  ; lock_Adv  : {}} ;
  oper Conj : Type = {s : Str} ; lock_Conj : {}} ;
 ```
 In this way, the user of a resource grammar cannot confuse adverbs with
 conjunctions. In other words, the lock fields force the type checker
 to function as grammaticality checker.
 When the resource grammar is ``open``ed in an application grammar, the
 lock fields are never seen (except possibly in type error messages),
 and the application grammarian should never write them herself. If she
 has to do this, it is a sign that the resource grammar is incomplete, and
 the proper way to proceed is to fix the resource grammar.
 The resource grammarian has to provide the dummy lock field values
 in her hidden definitions of constants in ``Paradigms``. For instance,
 ```
  mkAdv : Str -> Adv ;
  -- mkAdv s = {s = s ; lock_Adv = <>} ;
 ```
 ===Lexicon construction===
 The lexicon belonging to ``LangGer`` consists of two modules:
 - ``StructuralGer``, structural words, built by directly using
     ``MorphoGer``.
 - ``BasicGer``, content words, built by using ``ParadigmsGer``.
 The reason why ``MorphoGer`` has to be used in ``StructuralGer``
 is that ``ParadigmsGer`` does not contain constructors for closed
 word classes such as pronouns and determiners. The reason why we
 recommend ``ParadigmsGer`` for building ``LexiconGer`` is that
 the coverage of the paradigms gets thereby tested and that the
 use of the paradigms in ``LexiconGer`` gives a good set of examples for
 those who want to build new lexica.
 ==Inside grammar modules==
 Detailed implementation tricks
 are found in the comments of each module.
 ===The category system===
 - [Common gfdoc/Common.html], [CommonX ../common/CommonX.gf]
 - [Cat gfdoc/Cat.html], [CatGer gfdoc/CatGer.gf]
 ===Phrase category modules===
 - [Noun gfdoc/Noun.html], [NounGer ../german/NounGer.gf]
 - [Adjective gfdoc/Adjective.html], [AdjectiveGer ../german/AdjectiveGer.gf]
 - [Verb gfdoc/Verb.html], [VerbGer ../german/VerbGer.gf]
 - [Adverb gfdoc/Adverb.html], [AdverbGer ../german/AdverbGer.gf]
 - [Numeral gfdoc/Numeral.html], [NumeralGer ../german/NumeralGer.gf]
 - [Sentence gfdoc/Sentence.html], [SentenceGer ../german/SentenceGer.gf]
 - [Question gfdoc/Question.html], [QuestionGer ../german/QuestionGer.gf]
 - [Relative gfdoc/Relative.html], [RelativeGer ../german/RelativeGer.gf]
 - [Conjunction gfdoc/Conjunction.html], [ConjunctionGer ../german/ConjunctionGer.gf]
 - [Phrase gfdoc/Phrase.html], [PhraseGer ../german/PhraseGer.gf]
 - [Text gfdoc/Text.html], [TextX ../common/TextX.gf]
 - [Idiom gfdoc/Idiom.html], [IdiomGer ../german/IdiomGer.gf]
 - [Lang gfdoc/Lang.html], [LangGer ../german/LangGer.gf]
 ===Resource modules===
 - [ResGer ../german/ResGer.gf]
 - [MorphoGer ../german/MorphoGer.gf]
 - [ParadigmsGer gfdoc/ParadigmsGer.html], [ParadigmsGer.gf ../german/ParadigmsGer.gf]
 ===Lexicon===
 - [Structural gfdoc/Structural.html], [StructuralGer ../german/StructuralGer.gf]
 - [Lexicon gfdoc/Lexicon.html], [LexiconGer ../german/LexiconGer.gf]
 ==Lexicon extension==
 ===The irregularity lexicon===
 It may be handy to provide a separate module of irregular
 verbs and other words which are difficult for a lexicographer
 to handle. There are usually a limited number of such words - a
 few hundred perhaps. Building such a lexicon separately also
 makes it less important to cover //everything// by the
 worst-case paradigms (``mkV`` etc).
 ===Lexicon extraction from a word list===
 You can often find resources such as lists of 
 irregular verbs on the internet. For instance, the
 [Irregular German Verbs http://www.iee.et.tu-dresden.de/~wernerr/grammar/verben_dt.html] 
 page gives a list of verbs in the
 traditional tabular format, which begins as follows:
 ```
  backen (du bäckst, er bäckt)	                 backte [buk]              gebacken
  befehlen (du befiehlst, er befiehlt; befiehl!) befahl (beföhle; befähle) befohlen
  beginnen                                       begann (begönne; begänne) begonnen
  beißen                                         biß                       gebissen
 ```
 All you have to do is to write a suitable verb paradigm
 ```
  irregV : (x1,_,_,_,_,x6 : Str) -> V ;
 ```
 and a Perl or Python or Haskell script that transforms
 the table to
 ```
  backen_V   = irregV "backen" "bäckt" "back" "backte" "backte" "gebacken" ;
  befehlen_V = irregV "befehlen" "befiehlt" "befiehl" "befahl" "beföhle" "befohlen" ;
 ```
 When using ready-made word lists, you should think about
 coyright issues. Ideally, all resource grammar material should
 be provided under GNU General Public License.
 ===Lexicon extraction from raw text data===
 This is a cheap technique to build a lexicon of thousands
 of words, if text data is available in digital format.
 See the [Functional Morphology http://www.cs.chalmers.se/~markus/FM/] 
 homepage for details.
 ===Extending the resource grammar API===
 Sooner or later it will happen that the resource grammar API
 does not suffice for all applications. A common reason is
 that it does not include idiomatic expressions in a given language.
 The solution then is in the first place to build language-specific
 extension modules. This chapter will deal with this issue (to be completed).
 ==Writing an instance of parametrized resource grammar implementation==
 Above we have looked at how a resource implementation is built by
 the copy and paste method (from English to German), that is, formally
 speaking, from scratch. A more elegant solution available for 
 families of languages such as Romance and Scandinavian is to
 use parametrized modules. The advantages are
 - theoretical: linguistic generalizations and insights
 - practical: maintainability improves with fewer components
 In this chapter, we will look at an example: adding Italian to
 the Romance family (to be completed). Here is a set of
 [slides http://www.cs.chalmers.se/~aarne/geocal2006.pdf]
 on the topic.
 ==Parametrizing a resource grammar implementation==
 This is the most demanding form of resource grammar writing.
 We do //not// recommend the method of parametrizing from the
 beginning: it is easier to have one language first implemented 
 in the conventional way and then add another language of the
 same family by aprametrization. This means that the copy and
 paste method is still used, but at this time the differences
 are put into an ``interface`` module. 
 This chapter will work out an example of how an Estonian grammar
 is constructed from the Finnish grammar through parametrization.
@@ -0,0 +1,79 @@
 graph{
 size = "7,7" ;
 overlap = scale ;
 "Abs" [label = "Abstract Syntax", style = "solid", shape = "rectangle"] ;
 "1"   [label = "Bulgarian", style = "solid", shape = "ellipse", color = "green"] ;
 "1" -- "Abs" [style = "solid"];
 "2"   [label = "Czech", style = "solid", shape = "ellipse", color = "red"] ;
 "2" -- "Abs" [style = "solid"];
 "3"   [label = "Danish", style = "solid", shape = "ellipse", color = "green"] ;
 "3" -- "Abs" [style = "solid"];
 "4"   [label = "German", style = "solid", shape = "ellipse", color = "green"] ;
 "4" -- "Abs" [style = "solid"];
 "5"   [label = "Estonian", style = "solid", shape = "ellipse", color = "red"] ;
 "5" -- "Abs" [style = "solid"];
 "6"   [label = "Greek", style = "solid", shape = "ellipse", color = "red"] ;
 "6" -- "Abs" [style = "solid"];
 "7"   [label = "English", style = "solid", shape = "ellipse", color = "green"] ;
 "7" -- "Abs" [style = "solid"];
 "8"   [label = "Spanish", style = "solid", shape = "ellipse", color = "green"] ;
 "8" -- "Abs" [style = "solid"];
 "9"   [label = "French", style = "solid", shape = "ellipse", color = "green"] ;
 "9" -- "Abs" [style = "solid"];
 "10"   [label = "Italian", style = "solid", shape = "ellipse", color = "green"] ;
 "10" -- "Abs" [style = "solid"];
 "11"   [label = "Latvian", style = "solid", shape = "ellipse", color = "red"] ;
 "11" -- "Abs" [style = "solid"];
 "12"   [label = "Lithuanian", style = "solid", shape = "ellipse", color = "red"] ;
 "Abs" -- "12" [style = "solid"];
 "13"   [label = "Irish", style = "solid", shape = "ellipse", color = "red"] ;
 "Abs" -- "13" [style = "solid"];
 "14"   [label = "Hungarian", style = "solid", shape = "ellipse", color = "red"] ;
 "Abs" -- "14" [style = "solid"];
 "15"   [label = "Maltese", style = "solid", shape = "ellipse", color = "red"] ;
 "Abs" -- "15" [style = "solid"];
 "16"   [label = "Dutch", style = "solid", shape = "ellipse", color = "red"] ;
 "Abs" -- "16" [style = "solid"];
 "17"   [label = "Polish", style = "solid", shape = "ellipse", color = "red"] ;
 "Abs" -- "17" [style = "solid"];
 "18"   [label = "Portuguese", style = "solid", shape = "ellipse", color = "red"] ;
 "Abs" -- "18" [style = "solid"];
 "19"   [label = "Slovak", style = "solid", shape = "ellipse", color = "red"] ;
 "Abs" -- "19" [style = "solid"];
 "20"   [label = "Slovene", style = "solid", shape = "ellipse", color = "red"] ;
 "Abs" -- "20" [style = "solid"];
 "21"   [label = "Romanian", style = "solid", shape = "ellipse", color = "red"] ;
 "Abs" -- "21" [style = "solid"];
 "22"   [label = "Finnish", style = "solid", shape = "ellipse", color = "green"] ;
 "Abs" -- "22" [style = "solid"];
 "23"   [label = "Swedish", style = "solid", shape = "ellipse", color = "green"] ;
 "Abs" -- "23" [style = "solid"];
 }
@@ -0,0 +1,368 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
 <HTML>
 <HEAD>
 <META NAME="generator" CONTENT="http://txt2tags.sf.net">
 <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
 <TITLE>European Resource Grammar Summer School</TITLE>
 </HEAD><BODY BGCOLOR="white" TEXT="black">
 <P ALIGN="center"><CENTER><H1>European Resource Grammar Summer School</H1>
 <FONT SIZE="4">
 <I>Gothenburg, August 2009</I><BR>
 Aarne Ranta (aarne at chalmers.se)
 </FONT></CENTER>
 <P>
 <IMG ALIGN="middle" SRC="eu-langs.png" BORDER="0" ALT="">
 </P>
 <H3>Executive summary</H3>
 <P>
 We plan to organize a summer school with the goal of implementing the GF
 resource grammar library for 15 new languages, so that the library will
 cover all the 23 official EU languages of year 2009. 
 As a test application of the grammars, also an extension of
 the WebALT mathematical exercise translator will be built for each
 language.
 </P>
 <P>
 2 students per language are selected to the summer school, after a phase of 
 self-studies and on the basis of assignments that consist of parts of the resource
 grammars. Travel and accommodation are paid to these participants.
 If funding gets arranged, the call of participation for the summer school will
 be announced in February 2009, and the summer school itself will take place
 in August 2009, in Gothenburg.
 </P>
 <H2>Introduction</H2>
 <P>
 Since 2007, EU-27 has 23 official languages, listed in the diagram on top of this
 document.
 There is a growing need of translation between
 these languages. The traditional language-to-language method requires 23*22 = 506
 translators (humans or computer programs) to cover all possible translation needs.
 </P>
 <P>
 An alternative to language-to-language translation is the use of an <B>interlingua</B>:
 a language-independent representation such that all translation problems can
 be reduced to translating to and from the interlingua. With 23 languages,
 only 2*23 = 46 translators are needed.
 </P>
 <P>
 Interlingua sounds too good to be true. In a sense, it is. All attempts to
 create an interlingua that would solve all translation problems have failed.
 However, interlinguas for restricted applications have shown more
 success. For instance, mathematical texts and weather reports can be translated
 by using interlinguas tailor-made for the domains of mathematics and weather reports,
 respectively.
 </P>
 <P>
 What is required of an interlingua is
 </P>
 <UL>
 <LI>semantic accuracy: correspondence to what you want to say in the application
 <LI>language-independence: abstraction from individual languages
 </UL>
 <P>
 Thus, for instance, an interlingua for mathematical texts may be based on
 mathematical logic, which at the same time gives semantic accuracy and
 language independence. In other domains, something else than mathematical
 logic may be needed; the <B>ontologies</B> defined within the semantic
 web technology are often good starting points for interlinguas.
 </P>
 <H2>GF: a framework for multilingual grammars</H2>
 <P>
 The interlingua is just one part of a translation system. We also need
 the mappings between the interlingua and the involved languages. As the
 number of languages increases, this part grows while the interlingua remains
 constant.
 </P>
 <P>
 GF (Grammatical Framework, 
 <A HREF="http://gf.digitalgrammars.com"><CODE>gf.digitalgrammars.com</CODE></A>)
 is a programming language designed to support interlingua-based translation.
 A "program" in GF is a <B>multilingual grammar</B>, which consists of an 
 <B>abstract syntax</B> and a set of <B>concrete syntaxes</B>. A concrete
 syntaxes is a mapping from the abstract syntax to a particular language.
 These mappings are <B>reversible</B>, which means that they can be used for
 translating in both directions. This means that creating an interlingua-based
 translator for 23 languages just requires 1 + 23 = 24 grammar modules (the abstract
 syntax and the concrete syntaxes).
 </P>
 <P>
 The diagram first in this document shows a system covering the 23 EU languages. 
 Languages marked in
 red are of particular interest for the summer school, since they are those 
 on which the effort will be concentrated.
 </P>
 <H2>The GF resource grammar library</H2>
 <P>
 The GF resource grammar library is a set of grammars used as libraries when
 building interlingua-based translation systems. The library currently covers
 the 9 languages coloured in green in the diagram above; in addition, 
 Catalan, Norwegian, and Russian are covered, and there is ongoing work on
 Arabic, Hindi/Urdu, and Thai.
 </P>
 <P>
 The purpose of the resource grammar library is to define the "low-level" structure
 of a language: inflection, word order, agreement. This structure belongs to what
 linguists call morphology and syntax. It can be very complex and requires
 a lot of knowledge. Yet, when translating from one language to another, knowing 
 morphology and syntax is but a part of what is needed. The translator (whether human
 or machine) must understand the meaning of what is translated, and must also know
 the idiomatic way to express the meaning in the target language. This knowledge
 can be very domain-dependent and requires in general an expert in the field to
 reach high quality: a mathematician in the field of mathematics, a meteorologist
 in the field of weather reports, etc.
 </P>
 <P>
 The problem is to find a person who is an expert in both the domain of translation
 and in the low-level linguistic details. It is the rareness of this combination
 that has made it difficult to build interlingua-based translation systems.
 The GF resource grammar library has the mission of helping in this task. It encapsulates
 the low-level linguistics in program modules accessed through easy-to-use interfaces.
 Experts on different domains can build translation systems by using the library, 
 without knowing low-level linguistics. The idea is much the same as when a 
 programmer builds a graphical user interface (GUI) from high-level elements such as
 buttons and menus, without having to care about pixels or geometrical forms.
 </P>
 <H3>Applications of the library</H3>
 <P>
 In addition to translation, the library is also useful in <B>localization</B>,
 that is, porting a piece of software to new languages. 
 The GF resource grammar library has been used in three major projects that need
 interlingua-based translation or localization of systems to new languages:
 </P>
 <UL>
 <LI>in KeY, 
  <A HREF="http://www.key-project.org/"><CODE>http://www.key-project.org/</CODE></A>,
  for writing formal and informal software specifications (3 languages)
 <LI>in WebALT,
  <A HREF="http://webalt.math.helsinki.fi/content/index_eng.html"><CODE>http://webalt.math.helsinki.fi/content/index_eng.html</CODE></A>,
  for translating mathematical exercises to 7 languages
 <LI>in TALK <A HREF="http://www.talk-project.org"><CODE>http://www.talk-project.org</CODE></A>,
  where the library was used for localizing spoken dialogue systems to six languages
 </UL>
 <P>
 The library is also a generic linguistic resource, which can be used for tasks
 such as language teaching and information retrieval. The liberal license (GPL)
 makes it usable for anyone and for any task. GF also has tools supporting the
 use of grammars in programs written in other programming languages: C, C++, Haskell,
 Java, JavaScript, and Prolog. In connection with the TALK project, support has also been
 developed for translating GF grammars to language models used in speech
 recognition.
 </P>
 <H3>The structure of the library</H3>
 <P>
 The library has the following main parts:
 </P>
 <UL>
 <LI><B>Inflection paradigms</B>, covering the inflection of each language.
 <LI><B>Common Syntax API</B>, covering a large set of syntax rule that
  can be implemented for all languages involved.
 <LI><B>Common Test Lexicon</B>, giving ca. 500 common words that can be used for
  testing the library.
 <LI><B>Language-Specific Syntax Extensions</B>, covering syntax rules that are
  not implementable for all languages.
 <LI><B>Language-Specific Lexica</B>, word lists for each language, with
  accurate morphological and syntactic information.
 </UL>
 <P>
 The goal of the summer school is to implement, for each language, at least
 the first three components. The latter three are more open-ended in character.
 </P>
 <H2>The summer school</H2>
 <P>
 The goal of the summer school is to extend the GF resource grammar library
 to covering all 23 EU languages, which means we need 15 new languages.
 </P>
 <P>
 The amount of work and skill is between a Master's thesis and a PhD thesis.
 The Russian implementation was made by Janna Khegai as a part of her
 PhD thesis; the thesis contains other material, too.
 The Arabic implementation was started by Ali El Dada in his Master's thesis,
 but the thesis does not cover the whole API. The realistic amount of work is
 somewhere around 8 person months, but this is very much language-dependent.
 Dutch, for instance, can profit from previous implementations of German and
 Scandinavian languages, and will probably require less work.
 Latvian and Lithuanian are the first languages of the Baltic family and
 will probably require much more work.
 </P>
 <P>
 In any case, the proposed allocation of work power is 2 participants per
 language. They will have 6 months to work at home, followed
 by 2 weeks of summer school. Who are these participants?
 </P>
 <H3>Selecting participants</H3>
 <P>
 After the call has been published, persons interested to participate in
 the project are expected to learn GF by self-study from the 
 <A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-tutorial.html">tutorial</A>. 
 This should take a couple of weeks.
 </P>
 <P>
 After and perhapts in parallel with
 working out the tutorial, the participants should continue to 
 implement selected parts of the resource grammar, following the advice from
 the 
 <A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/Resource-HOWTO.html">Resource-HOWTO document</A>.
 What parts exactly are selected will be announced later. 
 This work will take another couple of weeks.
 </P>
 <P>
 This sample resource grammar fragment 
 will be submitted to the Summer School Committee in the beginning of May.
 The Committee then decides who is invited to represent which language
 in the summer school.
 </P>
 <P>
 After the Committee decision, the  participants have around three months
 to work on their languages. The work is completed in the summer school itself. It is also
 thoroughly tested by using it to add a new language to the WebALT mathematical
 exercise translator.
 </P>
 <P>
 Depending on the quality of submitted work, and on the demands of different
 languages, the Committee may decide to select another number than 2 participants
 for a language. We will also consider accepting participants who want to
 pay their own expenses.
 </P>
 <P>
 Also good proposals from non-EU languages will be considered. Proponents of
 such languages should contact the summer school organizers as early as possible.
 </P>
 <P>
 To keep track on who is working on which language, we will establish a web page
 (Wiki or similar) soon after the call is published. The participants are encourage
 to contact each other and even work in groups.
 </P>
 <H3>Who is qualified</H3>
 <P>
 Writing a resource grammar implementation requires good general programming
 skills, and a good explicit knowledge of the grammar of the target language. 
 A typical participant could be 
 </P>
 <UL>
 <LI>native or fluent speaker of the target language
 <LI>interested in languages on the theoretical level, and preferably familiar
  with many languages (to be able to think about them on an abstract level)
 <LI>familiar with functional programming languages such as ML or Haskell
  (GF itself is a language similar to these)
 <LI>on Master's or PhD level in linguistics, computer science, or mathematics
 </UL>
 <P>
 But it is the quality of the assignment that is assessed, not any formal
 requirements. The "typical participant" was described to give an idea of
 who is likely to succeed in this.
 </P>
 <H3>Costs</H3>
 <P>
 Our aim is to make the summer school free of charge for the participants
 who are selected on the basis of their assignments. And not only that:
 we plan to cover their travel and accommodation costs, up to 1000 EUR
 per person.
 </P>
 <P>
 We want to get the funding question settled by mid-February 2009, and make
 the final decision on the summer school then.
 </P>
 <H3>Teachers</H3>
 <P>
 Krasimir Angelov
 </P>
 <P>
 ?Olga Caprotti
 </P>
 <P>
 ?Lauri Carlson
 </P>
 <P>
 ?Robin Cooper
 </P>
 <P>
 ?Björn Bringert
 </P>
 <P>
 Håkan Burden
 </P>
 <P>
 ?Elisabet Engdahl
 </P>
 <P>
 ?Markus Forsberg
 </P>
 <P>
 ?Janna Khegai
 </P>
 <P>
 ?Peter Ljunglöf
 </P>
 <P>
 ?Wanjiku Ng'ang'a
 </P>
 <P>
 Aarne Ranta
 </P>
 <P>
 ?Jordi Saludes
 </P>
 <P>
 In addition, we will look for consultants who can help to assess the results
 for each language
 </P>
 <H3>The Summer School Committee</H3>
 <P>
 This committee consists of a number of teachers and consultants, 
 who will select the participants.
 </P>
 <H3>Time and Place</H3>
 <P>
 The summer school will
 be organized in Gothenburg in the latter half of August 2009.
 </P>
 <P>
 Time schedule (2009):
 </P>
 <UL>
 <LI>February: announcement of summer school and the grammar
  writing contest to get participants
 <LI>March-April: work on the contest assignment (ca 1 month)
 <LI>May: submission deadline and notification of acceptance
 <LI>June-July: more work on the grammars
 <LI>August: summer school
 </UL>
 <H3>Dissemination and intellectual property</H3>
 <P>
 The new resource grammars will be released under the GPL just like 
 the current resource grammars,
 with the copyright held by respective authors.
 </P>
 <P>
 The grammars will be distributed via the GF web site.
 </P>
 <P>
 The WebALT-specific grammars will have special licenses agreed between the
 authors and WebALT Inc.
 </P>
 <H2>Why I should participate</H2>
 <P>
 Seven reasons:
 </P>
 <OL>
 <LI>free trip and stay in Gothenburg (to be confirmed)
 <LI>participation in a pioneering language technology work in an enthusiastic atmosphere
 <LI>work and fun with people from all over Europe
 <LI>job opportunities and business ideas
 <LI>credits: the school project will be established as a course worth
  15 ETCS points per person, but extensions to Master's thesis will
  also be considered
 <LI>merits: the resulting grammar can easily lead to a published paper
 <LI>contribution to the multilingual and multicultural development of Europe
 </OL>
 <!-- html code generated by txt2tags 2.4 (http://txt2tags.sf.net) -->
 <!-- cmdline: txt2tags gf-summerschool.txt -->
 </BODY></HTML>
@@ -0,0 +1,332 @@
 European Resource Grammar Summer School
 Gothenburg, August 2009
 Aarne Ranta (aarne at chalmers.se)
 %!Encoding : iso-8859-1
 %!target:html
 [eu-langs.png]
 ===Executive summary===
 We plan to organize a summer school with the goal of implementing the GF
 resource grammar library for 15 new languages, so that the library will
 cover all the 23 official EU languages of year 2009. 
 As a test application of the grammars, also an extension of
 the WebALT mathematical exercise translator will be built for each
 language.
 2 students per language are selected to the summer school, after a phase of 
 self-studies and on the basis of assignments that consist of parts of the resource
 grammars. Travel and accommodation are paid to these participants.
 If funding gets arranged, the call of participation for the summer school will
 be announced in February 2009, and the summer school itself will take place
 in August 2009, in Gothenburg.
 ==Introduction==
 Since 2007, EU-27 has 23 official languages, listed in the diagram on top of this
 document.
 %[``http://ec.europa.eu/education/policies/lang/languages/index_en.html`` 
 %http://ec.europa.eu/education/policies/lang/languages/index_en.html]. 
 There is a growing need of translation between
 these languages. The traditional language-to-language method requires 23*22 = 506
 translators (humans or computer programs) to cover all possible translation needs.
 An alternative to language-to-language translation is the use of an **interlingua**:
 a language-independent representation such that all translation problems can
 be reduced to translating to and from the interlingua. With 23 languages,
 only 2*23 = 46 translators are needed.
 Interlingua sounds too good to be true. In a sense, it is. All attempts to
 create an interlingua that would solve all translation problems have failed.
 However, interlinguas for restricted applications have shown more
 success. For instance, mathematical texts and weather reports can be translated
 by using interlinguas tailor-made for the domains of mathematics and weather reports,
 respectively.
 What is required of an interlingua is
 - semantic accuracy: correspondence to what you want to say in the application
 - language-independence: abstraction from individual languages
 Thus, for instance, an interlingua for mathematical texts may be based on
 mathematical logic, which at the same time gives semantic accuracy and
 language independence. In other domains, something else than mathematical
 logic may be needed; the **ontologies** defined within the semantic
 web technology are often good starting points for interlinguas.
 ==GF: a framework for multilingual grammars==
 The interlingua is just one part of a translation system. We also need
 the mappings between the interlingua and the involved languages. As the
 number of languages increases, this part grows while the interlingua remains
 constant.
 GF (Grammatical Framework, 
 [``gf.digitalgrammars.com`` http://gf.digitalgrammars.com])
 is a programming language designed to support interlingua-based translation.
 A "program" in GF is a **multilingual grammar**, which consists of an 
 **abstract syntax** and a set of **concrete syntaxes**. A concrete
 syntaxes is a mapping from the abstract syntax to a particular language.
 These mappings are **reversible**, which means that they can be used for
 translating in both directions. This means that creating an interlingua-based
 translator for 23 languages just requires 1 + 23 = 24 grammar modules (the abstract
 syntax and the concrete syntaxes).
 The diagram first in this document shows a system covering the 23 EU languages. 
 Languages marked in
 red are of particular interest for the summer school, since they are those 
 on which the effort will be concentrated.
 ==The GF resource grammar library==
 The GF resource grammar library is a set of grammars used as libraries when
 building interlingua-based translation systems. The library currently covers
 the 9 languages coloured in green in the diagram above; in addition, 
 Catalan, Norwegian, and Russian are covered, and there is ongoing work on
 Arabic, Hindi/Urdu, and Thai.
 The purpose of the resource grammar library is to define the "low-level" structure
 of a language: inflection, word order, agreement. This structure belongs to what
 linguists call morphology and syntax. It can be very complex and requires
 a lot of knowledge. Yet, when translating from one language to another, knowing 
 morphology and syntax is but a part of what is needed. The translator (whether human
 or machine) must understand the meaning of what is translated, and must also know
 the idiomatic way to express the meaning in the target language. This knowledge
 can be very domain-dependent and requires in general an expert in the field to
 reach high quality: a mathematician in the field of mathematics, a meteorologist
 in the field of weather reports, etc.
 The problem is to find a person who is an expert in both the domain of translation
 and in the low-level linguistic details. It is the rareness of this combination
 that has made it difficult to build interlingua-based translation systems.
 The GF resource grammar library has the mission of helping in this task. It encapsulates
 the low-level linguistics in program modules accessed through easy-to-use interfaces.
 Experts on different domains can build translation systems by using the library, 
 without knowing low-level linguistics. The idea is much the same as when a 
 programmer builds a graphical user interface (GUI) from high-level elements such as
 buttons and menus, without having to care about pixels or geometrical forms.
 ===Applications of the library===
 In addition to translation, the library is also useful in **localization**,
 that is, porting a piece of software to new languages. 
 The GF resource grammar library has been used in three major projects that need
 interlingua-based translation or localization of systems to new languages:
 - in KeY, 
  [``http://www.key-project.org/`` http://www.key-project.org/],
  for writing formal and informal software specifications (3 languages)
 - in WebALT,
  [``http://webalt.math.helsinki.fi/content/index_eng.html`` http://webalt.math.helsinki.fi/content/index_eng.html],
  for translating mathematical exercises to 7 languages
 - in TALK [``http://www.talk-project.org`` http://www.talk-project.org],
  where the library was used for localizing spoken dialogue systems to six languages
 The library is also a generic linguistic resource, which can be used for tasks
 such as language teaching and information retrieval. The liberal license (GPL)
 makes it usable for anyone and for any task. GF also has tools supporting the
 use of grammars in programs written in other programming languages: C, C++, Haskell,
 Java, JavaScript, and Prolog. In connection with the TALK project, support has also been
 developed for translating GF grammars to language models used in speech
 recognition.
 ===The structure of the library===
 The library has the following main parts:
 - **Inflection paradigms**, covering the inflection of each language.
 - **Common Syntax API**, covering a large set of syntax rule that
  can be implemented for all languages involved.
 - **Common Test Lexicon**, giving ca. 500 common words that can be used for
  testing the library.
 - **Language-Specific Syntax Extensions**, covering syntax rules that are
  not implementable for all languages.
 - **Language-Specific Lexica**, word lists for each language, with
  accurate morphological and syntactic information.
 The goal of the summer school is to implement, for each language, at least
 the first three components. The latter three are more open-ended in character.
 ==The summer school==
 The goal of the summer school is to extend the GF resource grammar library
 to covering all 23 EU languages, which means we need 15 new languages.
 The amount of work and skill is between a Master's thesis and a PhD thesis.
 The Russian implementation was made by Janna Khegai as a part of her
 PhD thesis; the thesis contains other material, too.
 The Arabic implementation was started by Ali El Dada in his Master's thesis,
 but the thesis does not cover the whole API. The realistic amount of work is
 somewhere around 8 person months, but this is very much language-dependent.
 Dutch, for instance, can profit from previous implementations of German and
 Scandinavian languages, and will probably require less work.
 Latvian and Lithuanian are the first languages of the Baltic family and
 will probably require much more work.
 In any case, the proposed allocation of work power is 2 participants per
 language. They will have 6 months to work at home, followed
 by 2 weeks of summer school. Who are these participants?
 ===Selecting participants===
 After the call has been published, persons interested to participate in
 the project are expected to learn GF by self-study from the 
 [tutorial http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/gf-tutorial.html]. 
 This should take a couple of weeks.
 After and perhapts in parallel with
 working out the tutorial, the participants should continue to 
 implement selected parts of the resource grammar, following the advice from
 the 
 [Resource-HOWTO document http://www.cs.chalmers.se/Cs/Research/Language-technology/GF/doc/Resource-HOWTO.html].
 %[``http://www.cs.chalmers.se/~aarne/GF/lib/resource-1.0/doc/Resource-HOWTO.html`` http://www.cs.chalmers.se/~aarne/GF/lib/resource-1.0/doc/Resource-HOWTO.html].
 What parts exactly are selected will be announced later. 
 This work will take another couple of weeks.
 This sample resource grammar fragment 
 will be submitted to the Summer School Committee in the beginning of May.
 The Committee then decides who is invited to represent which language
 in the summer school.
 After the Committee decision, the  participants have around three months
 to work on their languages. The work is completed in the summer school itself. It is also
 thoroughly tested by using it to add a new language to the WebALT mathematical
 exercise translator.
 Depending on the quality of submitted work, and on the demands of different
 languages, the Committee may decide to select another number than 2 participants
 for a language. We will also consider accepting participants who want to
 pay their own expenses.
 Also good proposals from non-EU languages will be considered. Proponents of
 such languages should contact the summer school organizers as early as possible.
 To keep track on who is working on which language, we will establish a web page
 (Wiki or similar) soon after the call is published. The participants are encourage
 to contact each other and even work in groups.
 ===Who is qualified===
 Writing a resource grammar implementation requires good general programming
 skills, and a good explicit knowledge of the grammar of the target language. 
 A typical participant could be 
 - native or fluent speaker of the target language
 - interested in languages on the theoretical level, and preferably familiar
  with many languages (to be able to think about them on an abstract level)
 - familiar with functional programming languages such as ML or Haskell
  (GF itself is a language similar to these)
 - on Master's or PhD level in linguistics, computer science, or mathematics
 But it is the quality of the assignment that is assessed, not any formal
 requirements. The "typical participant" was described to give an idea of
 who is likely to succeed in this.
 ===Costs===
 Our aim is to make the summer school free of charge for the participants
 who are selected on the basis of their assignments. And not only that:
 we plan to cover their travel and accommodation costs, up to 1000 EUR
 per person.
 We want to get the funding question settled by mid-February 2009, and make
 the final decision on the summer school then.
 ===Teachers===
 Krasimir Angelov
 ?Olga Caprotti
 ?Lauri Carlson
 ?Robin Cooper
 ?Björn Bringert
 Håkan Burden
 ?Elisabet Engdahl
 ?Markus Forsberg
 ?Janna Khegai
 ?Peter Ljunglöf
 ?Wanjiku Ng'ang'a
 Aarne Ranta
 ?Jordi Saludes
 In addition, we will look for consultants who can help to assess the results
 for each language
 ===The Summer School Committee===
 This committee consists of a number of teachers and consultants, 
 who will select the participants.
 ===Time and Place===
 The summer school will
 be organized in Gothenburg in the latter half of August 2009.
 Time schedule (2009):
 - February: announcement of summer school and the grammar
  writing contest to get participants
 - March-April: work on the contest assignment (ca 1 month)
 - May: submission deadline and notification of acceptance
 - June-July: more work on the grammars
 - August: summer school
 ===Dissemination and intellectual property===
 The new resource grammars will be released under the GPL just like 
 the current resource grammars,
 with the copyright held by respective authors.
 The grammars will be distributed via the GF web site.
 The WebALT-specific grammars will have special licenses agreed between the
 authors and WebALT Inc.
 ==Why I should participate==
 Seven reasons:
 + free trip and stay in Gothenburg (to be confirmed)
 + participation in a pioneering language technology work in an enthusiastic atmosphere
 + work and fun with people from all over Europe
 + job opportunities and business ideas
 + credits: the school project will be established as a course worth
  15 ETCS points per person, but extensions to Master's thesis will
  also be considered
 + merits: the resulting grammar can easily lead to a published paper
 + contribution to the multilingual and multicultural development of Europe
@@ -0,0 +1,16 @@
 interface LexMath = open Syntax in {
 oper
  zero_PN : PN ;
  successor_N2 : N2 ;
  sum_N2 : N2 ;
  product_N2 : N2 ;
  even_A : A ;
  odd_A : A ;
  prime_A : A ;
  equal_A2 : A2 ;
  small_A : A ;
  great_A : A ;
  divisible_A2 : A2 ;
  number_N : N ;
 }
@@ -0,0 +1,18 @@
 instance LexMathFre of LexMath = 
  open SyntaxFre, ParadigmsFre, (L = LexiconFre) in {
 oper
  zero_PN = mkPN "zéro" ;
  successor_N2 = mkN2 (mkN "successeur") genitive ;
  sum_N2 = mkN2 (mkN "somme") genitive ;
  product_N2 = mkN2 (mkN "produit") genitive ;
  even_A = mkA "pair" ;
  odd_A = mkA "impair" ;
  prime_A = mkA "premier" ;
  equal_A2 = mkA2 (mkA "égal") dative ;
  small_A = L.small_A ;
  great_A = L.big_A ;
  divisible_A2 = mkA2 (mkA "divisible") (mkPrep "par") ;
  number_N = mkN "entier" ;
 }
@@ -0,0 +1,18 @@
 instance LexMathSwe of LexMath = 
  open SyntaxSwe, ParadigmsSwe, (L = LexiconSwe) in {
 oper
  zero_PN = mkPN "noll" neutrum ;
  successor_N2 = mkN2 (mkN "efterföljare" "efterföljare") (mkPrep "till") ;
  sum_N2 = mkN2 (mkN "summa") (mkPrep "av") ;
  product_N2 = mkN2 (mkN "produkt" "produkter") (mkPrep "av") ;
  even_A = mkA "jämn" ;
  odd_A = mkA "udda" "udda" ;
  prime_A = mkA "prim" ;
  equal_A2 = mkA2 (mkA "lika" "lika") (mkPrep "med") ;
  small_A = L.small_A ;
  great_A = L.big_A ;
  divisible_A2 = mkA2 (mkA "delbar") (mkPrep "med") ;
  number_N = mkN "tal" "tal" ;
 }
@@ -0,0 +1,8 @@
 --# -path=.:present
 concrete MathFre of Math = MathI with
  (Syntax = SyntaxFre),
  (Mathematical = MathematicalFre),
  (LexMath = LexMathFre) ;
@@ -0,0 +1,51 @@
 incomplete concrete MathI of Math = open 
  Syntax,
  Mathematical,
  LexMath,
  Prelude in {
 lincat 
  Prop = S ;
  Exp = NP ;
 lin
  And = mkS and_Conj ;
  Or  = mkS or_Conj ;
  If a = mkS (mkAdv if_Subj a) ;
  Zero = mkNP zero_PN ;
  Successor = funct1 successor_N2 ;
  Sum = funct2 sum_N2 ;
  Product = funct2 product_N2 ;
  Even = pred1 even_A ;
  Odd = pred1 odd_A ;
  Prime = pred1 prime_A ;
  Equal = pred2 equal_A2 ;
  Less = predC small_A ;
  Greater = predC great_A  ;
  Divisible = pred2 divisible_A2 ;
 oper
  funct1 : N2 -> NP -> NP = \f,x -> mkNP the_Art (mkCN f x) ;
  funct2 : N2 -> NP -> NP -> NP = \f,x,y -> mkNP the_Art (mkCN f (mkNP and_Conj x y)) ;
  pred1 : A -> NP -> S = \f,x -> mkS (mkCl x f) ;
  pred2 : A2 -> NP -> NP -> S = \f,x,y -> mkS (mkCl x f y) ;
  predC : A -> NP -> NP -> S = \f,x,y -> mkS (mkCl x f y) ;
 lincat 
  Var = Symb ;
 lin
  X = MkSymb (ss "x") ;
  Y = MkSymb (ss "y") ;
  EVar x = mkNP (SymbPN x) ;
  EInt i = mkNP (IntPN i) ;
  ANumberVar x = mkNP a_Art (mkCN (mkCN number_N) (mkNP (SymbPN x))) ;
  TheNumberVar x = mkNP the_Art (mkCN (mkCN number_N) (mkNP (SymbPN x))) ;
 }
@@ -0,0 +1,8 @@
 --# -path=.:present
 concrete MathSwe of Math = MathI with
  (Syntax = SyntaxSwe),
  (Mathematical = MathematicalSwe),
  (LexMath = LexMathSwe) ;