diff --git a/lib/resource-1.0/abstract/Cat.gf b/lib/resource-1.0/abstract/Cat.gf index a3f15c300..97745746a 100644 --- a/lib/resource-1.0/abstract/Cat.gf +++ b/lib/resource-1.0/abstract/Cat.gf @@ -1,6 +1,6 @@ --1 The category system -abstract Cat = { +abstract Cat = Tense ** { cat @@ -13,19 +13,13 @@ abstract Cat = { Utt ; -- sentence, question, word... e.g. "be quiet" Voc ; -- vocative or "please" e.g. "my darling" ---2 Tensed sentences +--2 Sentences and clauses --- Constructed in [Tensed Tensed.html]. --- A simplified variant, with just present forms, is [Untensed Untensed.html]. +-- Constructed in [Sentence Sentence.html]. S ; -- declarative sentence e.g. "she lived here" QS ; -- question e.g. "where did she live" RS ; -- relative e.g. "in which she lived" - ---2 Clauses - --- Constructed in [Sentence Sentence.html]. - Cl ; -- declarative clause, with all tenses e.g. "she looks at this" Slash ; -- clause missing NP (S/NP in GPSG) e.g. "she looks at" Imp ; -- imperative e.g. "look at this" @@ -108,7 +102,7 @@ abstract Cat = { --2 Words of open classes --- These are constructed in [Basic Basic.html] and in additional lexicon modules. +-- These are constructed in [Lexicon Lexicon.html] and in additional lexicon modules. V ; -- one-place verb e.g. "sleep" V2 ; -- two-place verb e.g. "love" diff --git a/lib/resource-1.0/abstract/Lang.gf b/lib/resource-1.0/abstract/Lang.gf index 5395ade2e..5663ac5cc 100644 --- a/lib/resource-1.0/abstract/Lang.gf +++ b/lib/resource-1.0/abstract/Lang.gf @@ -15,7 +15,6 @@ abstract Lang = Relative, Conjunction, Phrase, - Tensed, Structural, - Basic + Lexicon ** {} ; diff --git a/lib/resource-1.0/abstract/Basic.gf b/lib/resource-1.0/abstract/Lexicon.gf similarity index 98% rename from lib/resource-1.0/abstract/Basic.gf rename to lib/resource-1.0/abstract/Lexicon.gf index dfffd549c..837fac26f 100644 --- a/lib/resource-1.0/abstract/Basic.gf +++ b/lib/resource-1.0/abstract/Lexicon.gf @@ -1,4 +1,4 @@ ---1 A Basic Lexicon +--1 A Lexicon Lexicon -- This files gives a list of words whose purpose is to test the GF -- resource grammar. It covers all lexical categories of [Cat Cat.html]. @@ -10,7 +10,7 @@ -- of the words in different languages are not guaranteed to be translation -- equivalents. -abstract Basic = Cat ** { +abstract Lexicon = Cat ** { fun add_V3 : V3 ; airplane_N : N ; diff --git a/lib/resource-1.0/abstract/Phrase.gf b/lib/resource-1.0/abstract/Phrase.gf index 52a77a387..4b571f107 100644 --- a/lib/resource-1.0/abstract/Phrase.gf +++ b/lib/resource-1.0/abstract/Phrase.gf @@ -1,6 +1,6 @@ --1 Phrases and utterances -abstract Phrase = Cat, Tense ** { +abstract Phrase = Cat ** { -- When a phrase is built from an utterance it can be prefixed -- with a phrasal conjunction (such as "but", "therefore") diff --git a/lib/resource-1.0/abstract/Sentence.gf b/lib/resource-1.0/abstract/Sentence.gf index 475429bcb..927eda09b 100644 --- a/lib/resource-1.0/abstract/Sentence.gf +++ b/lib/resource-1.0/abstract/Sentence.gf @@ -1,4 +1,4 @@ ---1 Clauses, imperatives, and sentential complements +--1 Sentences, clauses, imperatives, and sentential complements abstract Sentence = Cat ** { @@ -49,5 +49,35 @@ abstract Sentence = Cat ** { EmbedQS : QS -> SC ; -- whether you go EmbedVP : VP -> SC ; -- to go +--2 Sentences + +-- These are the 2 x 4 x 4 = 16 forms generated by different +-- combinations of tense, polarity, and +-- anteriority, which are defined in [Tense Tense.html]. + + fun + UseCl : Tense -> Ant -> Pol -> Cl -> S ; + UseQCl : Tense -> Ant -> Pol -> QCl -> QS ; + UseRCl : Tense -> Ant -> Pol -> RCl -> RS ; } + +-- Examples for English $S$/$Cl$: +{- + Pres Simul Pos ODir : he sleeps + Pres Simul Neg ODir : he doesn't sleep + Pres Anter Pos ODir : he has slept + Pres Anter Neg ODir : he hasn't slept + Past Simul Pos ODir : he slept + Past Simul Neg ODir : he didn't sleep + Past Anter Pos ODir : he had slept + Past Anter Neg ODir : he hadn't slept + Fut Simul Pos ODir : he will sleep + Fut Simul Neg ODir : he won't sleep + Fut Anter Pos ODir : he will have slept + Fut Anter Neg ODir : he won't have slept + Cond Simul Pos ODir : he would sleep + Cond Simul Neg ODir : he wouldn't sleep + Cond Anter Pos ODir : he would have slept + Cond Anter Neg ODir : he wouldn't have slept +-} diff --git a/lib/resource-1.0/doc/German.png b/lib/resource-1.0/doc/German.png index ab16ef27e..df3c71c86 100644 Binary files a/lib/resource-1.0/doc/German.png and b/lib/resource-1.0/doc/German.png differ diff --git a/lib/resource-1.0/doc/Lang.dot b/lib/resource-1.0/doc/Lang.dot new file mode 100644 index 000000000..ec1fa508a --- /dev/null +++ b/lib/resource-1.0/doc/Lang.dot @@ -0,0 +1,60 @@ +digraph { + +size = "12,8" ; + +Lang [style = "solid", shape = "ellipse", URL = "Lang.gf"]; +Lang -> Noun [style = "solid"]; +Lang -> Verb [style = "solid"]; +Lang -> Adjective [style = "solid"]; +Lang -> Adverb [style = "solid"]; +Lang -> Numeral [style = "solid"]; +Lang -> Sentence [style = "solid"]; +Lang -> Question [style = "solid"]; +Lang -> Relative [style = "solid"]; +Lang -> Conjunction [style = "solid"]; +Lang -> Phrase [style = "solid"]; +Lang -> Structural [style = "solid"]; +Lang -> Lexicon [style = "solid"]; + +Noun [style = "solid", shape = "ellipse", URL = "Noun.gf"]; +Noun -> Cat [style = "solid"]; + +Verb [style = "solid", shape = "ellipse", URL = "Verb.gf"]; +Verb -> Cat [style = "solid"]; + +Adjective [style = "solid", shape = "ellipse", URL = "Adjective.gf"]; +Adjective -> Cat [style = "solid"]; + +Adverb [style = "solid", shape = "ellipse", URL = "Adverb.gf"]; +Adverb -> Cat [style = "solid"]; + +Numeral [style = "solid", shape = "ellipse", URL = "Numeral.gf"]; +Numeral -> Cat [style = "solid"]; + +Sentence [style = "solid", shape = "ellipse", URL = "Sentence.gf"]; +Sentence -> Cat [style = "solid"]; + +Question [style = "solid", shape = "ellipse", URL = "Question.gf"]; +Question -> Cat [style = "solid"]; + +Relative [style = "solid", shape = "ellipse", URL = "Relative.gf"]; +Relative -> Cat [style = "solid"]; + +Conjunction [style = "solid", shape = "ellipse", URL = "Conjunction.gf"]; +Conjunction -> Cat [style = "solid"]; + +Phrase [style = "solid", shape = "ellipse", URL = "Phrase.gf"]; +Phrase -> Cat [style = "solid"]; + +Structural [style = "solid", shape = "ellipse", URL = "Structural.gf"]; +Structural -> Cat [style = "solid"]; + +Lexicon [style = "solid", shape = "ellipse", URL = "Lexicon.gf"]; +Lexicon -> Cat [style = "solid"]; + +Cat [style = "solid", shape = "ellipse", URL = "Cat.gf"]; +Cat -> Tense [style = "solid"]; + +Tense [style = "solid", shape = "ellipse", URL = "Tense.gf"]; + +} diff --git a/lib/resource-1.0/doc/Lang.png b/lib/resource-1.0/doc/Lang.png index db077e713..4010ed9c9 100644 Binary files a/lib/resource-1.0/doc/Lang.png and b/lib/resource-1.0/doc/Lang.png differ diff --git a/lib/resource-1.0/doc/Resource-HOWTO.html b/lib/resource-1.0/doc/Resource-HOWTO.html index f515c5d41..2dc7e144f 100644 --- a/lib/resource-1.0/doc/Resource-HOWTO.html +++ b/lib/resource-1.0/doc/Resource-HOWTO.html @@ -7,7 +7,7 @@
-It is advisable to start with a simpler subset of the API, which -leaves out certain complicated but not always necessary things: -tenses and most part of the lexicon. -
-
-
-
The module structure is rather flat: almost every module is a direct
-parent of the top module (Lang or Test). The idea
+parent of the top module Lang. The idea
is that you can concentrate on one linguistic aspect at a time, or
also distribute the work among several authors.
lincat definition of a category and use the default
{s : Str} until you need to change it to something else. In
English, for instance, most categories do have this linearization type!
--As a slight asymmetry in the module diagrams, you find the following -modules: -
-Tense: defines the parameters of polarity, anteriority, and tense
-Tensed: defines how sentences use those parameters
-Untensed: makes sentences use the polarity parameter only
-
-The full resource API (Lang) uses Tensed, whereas the
-restricted Test API uses Untensed.
-
@@ -165,29 +144,22 @@ API, the latter rule is sometimes violated in some languages. Another characterization of lexical is that lexical units can be added almost ad libitum, and they cannot be defined in terms of already given rules. The lexical modules of the resource API are thus more like -samples than complete lists. There are three such modules: +samples than complete lists. There are two such modules:
Structural: structural words (determiners, conjunctions,...)
-Basic: basic everyday content words (nouns, verbs,...)
-Lex: a very small sample of both structural and content words
+Lexicon: basic everyday content words (nouns, verbs,...)
The module Structural aims for completeness, and is likely to
-be extended in future releases of the resource. The module Basic
+be extended in future releases of the resource. The module Lexicon
gives a "random" list of words, which enable interesting testing of syntax,
and also a check list for morphology, since those words are likely to include
most morphological patterns of the language.
-The module Lex is used in Test instead of the two
-larger modules. Its purpose is to provide a quick way to test the
-syntactic structures of the phrase category modules without having to implement
-the larger lexica.
-
-In the case of Basic it may come out clearer than anywhere else
+In the case of Lexicon it may come out clearer than anywhere else
in the API that it is impossible to give exact translation equivalents in
different languages on the level of a resource grammar. In other words,
application grammars are likely to use the resource in different ways for
@@ -254,9 +226,9 @@ of resource v. 1.0.
lines in the previous step) - but you uncommenting the first
and the last lines will actually do the job for many of the files.
TestGer in GF:
+LangGer in GF:
- gf TestGer.gf + gf LangGer.gfYou will get lots of warnings on missing rules, but the grammar will compile. @@ -267,7 +239,7 @@ of resource v. 1.0. tells you what exactly is missing. -Here is the module structure of
TestGer. It has been simplified by leaving out
+Here is the module structure of LangGer. It has been simplified by leaving out
the majority of the phrase category modules. Each of them has the same dependencies
as e.g. VerbGer.
@@ -296,7 +268,7 @@ only one. So you will find yourself iterating the following steps:
LexGer. Again, it can be helpful to define some simple-minded
+ in LexiconGer. Again, it can be helpful to define some simple-minded
morphological paradigms in ResGer, in particular worst-case
constructors corresponding to e.g.
ResEng.mkNoun.
@@ -307,8 +279,8 @@ only one. So you will find yourself iterating the following steps:
cc mkNoun "Brief" "Briefe" Masc
-NounGer and LexGer in TestGer,
- and compile TestGer in GF. Then test by parsing, linearization,
+NounGer and LexiconGer in LangGer,
+ and compile LangGer in GF. Then test by parsing, linearization,
and random generation. In particular, linearization to a table should
be used so that you see all forms produced:
@@ -321,8 +293,9 @@ only one. So you will find yourself iterating the following steps:You are likely to run this cycle a few times for each linearization rule -you implement, and some hundreds of times altogether. There are 159 -
funsinTest(at the moment). +you implement, and some hundreds of times altogether. There are 66cats and +458funsinLangat the moment; 149 of thefunsare outside the two +lexicon modules).Of course, you don't need to complete one phrase category module before starting @@ -335,7 +308,8 @@ Here is a live log of the actual process of building the German implementation of resource API v. 1.0. It is the basis of the more detailed explanations, which will follow soon. (You will found out that these explanations involve -a rational reconstruction of the live process!) +a rational reconstruction of the live process! Among other things, the +API was changed during the actual process to make it more intuitive.)
Resource modules used
@@ -343,8 +317,9 @@ a rational reconstruction of the live process!) These modules will be written by you.
ResGer: parameter types and auxiliary operations
-MorphoGer: complete inflection engine; not needed for Test.
+ParamGer: parameter types
+ResGer: auxiliary operations (a resource for the resource grammar!)
+MorphoGer: complete inflection engine
@@ -439,7 +414,7 @@ the application grammarian may need to use, e.g.
These constants are defined in terms of parameter types and constructors
in ResGer and MorphoGer, which modules are are not
-accessible to the application grammarian.
+visible to the application grammarian.
BasicGer gives a good set of examples for
those who want to build new lexica.
-+So far we just give links to the implementations of each API. +More explanation iś to follow - but many detail implementation tricks +are only found in the cooments of the modules. +
-
It may be handy to provide a separate module of irregular
@@ -528,7 +541,7 @@ few hundred perhaps. Building such a lexicon separately also
makes it less important to cover everything by the
worst-case paradigms (mkV etc).
You can often find resources such as lists of @@ -538,10 +551,10 @@ page gives a list of verbs in the traditional tabular format, which begins as follows:
- backen (du bäckst, er bäckt) backte [buk] gebacken
+ backen (du bäckst, er bäckt) backte [buk] gebacken
befehlen (du befiehlst, er befiehlt; befiehl!) befahl (beföhle; befähle) befohlen
- beginnen begann (begönne; begänne) begonnen
- beißen biß gebissen
+ beginnen begann (begönne; begänne) begonnen
+ beißen biß gebissen
All you have to do is to write a suitable verb paradigm @@ -563,7 +576,7 @@ When using ready-made word lists, you should think about coyright issues. Ideally, all resource grammar material should be provided under GNU General Public License.
- +This is a cheap technique to build a lexicon of thousands @@ -571,7 +584,7 @@ of words, if text data is available in digital format. See the Functional Morphology homepage for details.
- +Sooner or later it will happen that the resource grammar API @@ -580,7 +593,7 @@ that it does not include idiomatic expressions in a given language. The solution then is in the first place to build language-specific extension modules. This chapter will deal with this issue.
- +Above we have looked at how a resource implementation is built by @@ -595,10 +608,10 @@ use parametrized modules. The advantages are
-In this chapter, we will look at an example: adding Portuguese to +In this chapter, we will look at an example: adding Italian to the Romance family.
- +This is the most demanding form of resource grammar writing. @@ -614,6 +627,6 @@ This chapter will work out an example of how an Estonian grammar is constructed from the Finnish grammar through parametrization.
- +