Latin example in HOWTO

This commit is contained in:
aarne
2006-02-01 23:04:05 +00:00
parent 390efcd43c
commit a28eeb58bd
5 changed files with 271 additions and 31 deletions

View File

@@ -7,7 +7,7 @@
<P ALIGN="center"><CENTER><H1>Resource grammar writing HOWTO</H1>
<FONT SIZE="4">
<I>Author: Aarne Ranta &lt;aarne (at) cs.chalmers.se&gt;</I><BR>
Last update: Wed Jan 25 15:04:36 2006
Last update: Thu Feb 2 00:04:00 2006
</FONT></CENTER>
<P></P>
@@ -30,22 +30,23 @@ Last update: Wed Jan 25 15:04:36 2006
<LI><A HREF="#toc11">Lock fields</A>
<LI><A HREF="#toc12">Lexicon construction</A>
</UL>
<LI><A HREF="#toc13">Inside grammar modules</A>
<LI><A HREF="#toc13">The core of the syntax</A>
<LI><A HREF="#toc14">Inside grammar modules</A>
<UL>
<LI><A HREF="#toc14">The category system</A>
<LI><A HREF="#toc15">Phrase category modules</A>
<LI><A HREF="#toc16">Resource modules</A>
<LI><A HREF="#toc17">Lexicon</A>
<LI><A HREF="#toc15">The category system</A>
<LI><A HREF="#toc16">Phrase category modules</A>
<LI><A HREF="#toc17">Resource modules</A>
<LI><A HREF="#toc18">Lexicon</A>
</UL>
<LI><A HREF="#toc18">Lexicon extension</A>
<LI><A HREF="#toc19">Lexicon extension</A>
<UL>
<LI><A HREF="#toc19">The irregularity lexicon</A>
<LI><A HREF="#toc20">Lexicon extraction from a word list</A>
<LI><A HREF="#toc21">Lexicon extraction from raw text data</A>
<LI><A HREF="#toc22">Extending the resource grammar API</A>
<LI><A HREF="#toc20">The irregularity lexicon</A>
<LI><A HREF="#toc21">Lexicon extraction from a word list</A>
<LI><A HREF="#toc22">Lexicon extraction from raw text data</A>
<LI><A HREF="#toc23">Extending the resource grammar API</A>
</UL>
<LI><A HREF="#toc23">Writing an instance of parametrized resource grammar implementation</A>
<LI><A HREF="#toc24">Parametrizing a resource grammar implementation</A>
<LI><A HREF="#toc24">Writing an instance of parametrized resource grammar implementation</A>
<LI><A HREF="#toc25">Parametrizing a resource grammar implementation</A>
</UL>
<P></P>
@@ -175,6 +176,14 @@ try out the module
explained in the
<A HREF="http://www.cs.chalmers.se/~aarne/GF/doc/tutorial/gf-tutorial2.html">GF Tutorial</A>.
</P>
<P>
Another reduced API is the
<A HREF="latin.gf">toy Latin grammar</A>
which will be used as a reference when discussing the details.
It is not so usable in practice as the Tutorial API, but it goes
deeper in explaining what parameters and dependencies the principal categories
and rules have.
</P>
<A NAME="toc6"></A>
<H2>Phases of the work</H2>
<A NAME="toc7"></A>
@@ -494,19 +503,45 @@ use of the paradigms in <CODE>BasicGer</CODE> gives a good set of examples for
those who want to build new lexica.
</P>
<A NAME="toc13"></A>
<H2>The core of the syntax</H2>
<P>
Among all categories and functions, there is is a handful of the
most important and distinct ones, of which the others are can be
seen as variations. The categories are
</P>
<PRE>
Cl ; VP ; V2 ; NP ; CN ; Det ; AP ;
</PRE>
<P>
The functions are
</P>
<PRE>
PredVP : NP -&gt; VP -&gt; Cl ; -- predication
ComplV2 : V2 -&gt; NP -&gt; VP ; -- complementization
DetCN : Det -&gt; CN -&gt; NP ; -- determination
ModCN : AP -&gt; CN -&gt; CN ; -- modification
</PRE>
<P>
This <A HREF="latin.gf">toy Latin grammar</A> shows in a nutshell how these
rules relate the categories to each other. It is intended to be a
first approximation when designing the parameter system of a new
language. We will refer to the implementations contained in it
when discussing the modules in more detail.
</P>
<A NAME="toc14"></A>
<H2>Inside grammar modules</H2>
<P>
So far we just give links to the implementations of each API.
More explanation iś to follow - but many detail implementation tricks
are only found in the cooments of the modules.
More explanations follow - but many detail implementation tricks
are only found in the comments of the modules.
</P>
<A NAME="toc14"></A>
<A NAME="toc15"></A>
<H3>The category system</H3>
<UL>
<LI><A HREF="gfdoc/Cat.html">Cat</A>, <A HREF="gfdoc/CatGer.gf">CatGer</A>
</UL>
<A NAME="toc15"></A>
<A NAME="toc16"></A>
<H3>Phrase category modules</H3>
<UL>
<LI><A HREF="gfdoc/Tense.html">Tense</A>, <A HREF="../german/TenseGer.gf">TenseGer</A>
@@ -523,7 +558,7 @@ are only found in the cooments of the modules.
<LI><A HREF="gfdoc/Lang.html">Lang</A>, <A HREF="../german/LangGer.gf">LangGer</A>
</UL>
<A NAME="toc16"></A>
<A NAME="toc17"></A>
<H3>Resource modules</H3>
<UL>
<LI><A HREF="../german/ParamGer.gf">ParamGer</A>
@@ -532,16 +567,16 @@ are only found in the cooments of the modules.
<LI><A HREF="gfdoc/ParadigmsGer.html">ParadigmsGer</A>, <A HREF="../german/ParadigmsGer.gf">ParadigmsGer.gf</A>
</UL>
<A NAME="toc17"></A>
<A NAME="toc18"></A>
<H3>Lexicon</H3>
<UL>
<LI><A HREF="gfdoc/Structural.html">Structural</A>, <A HREF="../german/StructuralGer.gf">StructuralGer</A>
<LI><A HREF="gfdoc/Lexicon.html">Lexicon</A>, <A HREF="../german/LexiconGer.gf">LexiconGer</A>
</UL>
<A NAME="toc18"></A>
<H2>Lexicon extension</H2>
<A NAME="toc19"></A>
<H2>Lexicon extension</H2>
<A NAME="toc20"></A>
<H3>The irregularity lexicon</H3>
<P>
It may be handy to provide a separate module of irregular
@@ -551,7 +586,7 @@ few hundred perhaps. Building such a lexicon separately also
makes it less important to cover <I>everything</I> by the
worst-case paradigms (<CODE>mkV</CODE> etc).
</P>
<A NAME="toc20"></A>
<A NAME="toc21"></A>
<H3>Lexicon extraction from a word list</H3>
<P>
You can often find resources such as lists of
@@ -586,7 +621,7 @@ When using ready-made word lists, you should think about
coyright issues. Ideally, all resource grammar material should
be provided under GNU General Public License.
</P>
<A NAME="toc21"></A>
<A NAME="toc22"></A>
<H3>Lexicon extraction from raw text data</H3>
<P>
This is a cheap technique to build a lexicon of thousands
@@ -594,7 +629,7 @@ of words, if text data is available in digital format.
See the <A HREF="http://www.cs.chalmers.se/~markus/FM/">Functional Morphology</A>
homepage for details.
</P>
<A NAME="toc22"></A>
<A NAME="toc23"></A>
<H3>Extending the resource grammar API</H3>
<P>
Sooner or later it will happen that the resource grammar API
@@ -603,7 +638,7 @@ that it does not include idiomatic expressions in a given language.
The solution then is in the first place to build language-specific
extension modules. This chapter will deal with this issue.
</P>
<A NAME="toc23"></A>
<A NAME="toc24"></A>
<H2>Writing an instance of parametrized resource grammar implementation</H2>
<P>
Above we have looked at how a resource implementation is built by
@@ -621,7 +656,7 @@ use parametrized modules. The advantages are
In this chapter, we will look at an example: adding Italian to
the Romance family.
</P>
<A NAME="toc24"></A>
<A NAME="toc25"></A>
<H2>Parametrizing a resource grammar implementation</H2>
<P>
This is the most demanding form of resource grammar writing.
@@ -637,6 +672,6 @@ This chapter will work out an example of how an Estonian grammar
is constructed from the Finnish grammar through parametrization.
</P>
<!-- html code generated by txt2tags 2.0 (http://txt2tags.sf.net) -->
<!-- html code generated by txt2tags 2.3 (http://txt2tags.sf.net) -->
<!-- cmdline: txt2tags -\-toc -thtml Resource-HOWTO.txt -->
</BODY></HTML>

View File

@@ -127,6 +127,13 @@ try out the module
explained in the
[GF Tutorial http://www.cs.chalmers.se/~aarne/GF/doc/tutorial/gf-tutorial2.html].
Another reduced API is the
[toy Latin grammar latin.gf]
which will be used as a reference when discussing the details.
It is not so usable in practice as the Tutorial API, but it goes
deeper in explaining what parameters and dependencies the principal categories
and rules have.
@@ -426,11 +433,34 @@ those who want to build new lexica.
==The core of the syntax==
Among all categories and functions, there is is a handful of the
most important and distinct ones, of which the others are can be
seen as variations. The categories are
```
Cl ; VP ; V2 ; NP ; CN ; Det ; AP ;
```
The functions are
```
PredVP : NP -> VP -> Cl ; -- predication
ComplV2 : V2 -> NP -> VP ; -- complementization
DetCN : Det -> CN -> NP ; -- determination
ModCN : AP -> CN -> CN ; -- modification
```
This [toy Latin grammar latin.gf] shows in a nutshell how these
rules relate the categories to each other. It is intended to be a
first approximation when designing the parameter system of a new
language. We will refer to the implementations contained in it
when discussing the modules in more detail.
==Inside grammar modules==
So far we just give links to the implementations of each API.
More explanation iś to follow - but many detail implementation tricks
are only found in the cooments of the modules.
More explanations follow - but many detail implementation tricks
are only found in the comments of the modules.
===The category system===

View File

@@ -0,0 +1,175 @@
--1 A Latin grammar to illustrate some main dependencies
--
-- All implementations of the resource API have so far been variations
-- and extensions of this. The most notable regularities are
-- - which types of features each category has
-- - how inherent features are inherited and passed to parameters
--
--
cat
Cl ; -- clause
VP ; -- verb phrase
V2 ; -- two-place verb
NP ; -- noun phrase
CN ; -- common noun
Det ; -- determiner
AP ; -- adjectival phrase
fun
PredVP : NP -> VP -> Cl ; -- predication
ComplV2 : V2 -> NP -> VP ; -- complementization
DetCN : Det -> CN -> NP ; -- determination
ModCN : AP -> CN -> CN ; -- modification
param
Number = Sg | Pl ;
Person = P1 | P2 | P3 ;
Tense = Pres | Past ;
Polarity = Pos | Neg ;
Case = Nom | Acc | Dat ;
Gender = Masc | Fem | Neutr ;
oper
Agr = {g : Gender ; n : Number ; p : Person} ; -- agreement features
lincat
Cl = {
s : Tense => Polarity => Str
} ;
VP = {
verb : Tense => Polarity => Agr => Str ;
neg : Polarity => Str ; -- negation
compl : Agr => Str -- complement
} ;
V2 = {
s : Tense => Number => Person => Str ;
c : Case -- complement case
} ;
NP = {
s : Case => Str ;
a : Agr -- agreement features
} ;
CN = {
s : Number => Case => Str ;
g : Gender
} ;
Det = {
s : Gender => Case => Str ;
n : Number
} ;
AP = {
s : Gender => Number => Case => Str
} ;
lin
PredVP np vp = {
s = \\t,p =>
let
agr = np.a ;
subject = np.s ! Nom ;
object = vp.compl ! agr ;
verb = vp.neg ! p ++ vp.verb ! t ! p ! agr
in
subject ++ object ++ verb
} ;
ComplV2 v np = {
verb = \\t,p,a => v.s ! t ! a.n ! a.p ;
compl = \\_ => np.s ! v.c ;
neg = table {Pos => [] ; Neg => "non"}
} ;
DetCN det cn =
let
g = cn.g ;
n = det.n
in {
s = \\c => det.s ! g ! c ++ cn.s ! n ! c ;
a = {g = g ; n = n ; p = P3}
} ;
ModCN ap cn =
let
g = cn.g
in {
s = \\n,c => cn.s ! n ! c ++ ap.s ! g ! n ! c ;
g = g
} ;
-- lexicon to test
fun
ego_NP : NP ;
omnis_Det : Det ;
defPl_Det : Det ;
amare_V2 : V2 ;
licere_V2 : V2 ;
puella_CN : CN ;
servus_CN : CN ;
habilis_AP : AP ;
lin
ego_NP = {
s = table Case ["ego" ; "me" ; "mihi"] ;
a = {g = Fem ; n = Sg ; p = P1}
} ;
omnis_Det = {
s = table {
Masc | Fem => table Case ["omnis" ; "omnem" ; "omni"] ;
_ => table Case ["omne" ; "omne" ; "omni"]
} ;
n = Sg
} ;
defPl_Det = {
s = \\_,_ => [] ;
n = Pl
} ;
amare_V2 = {
s = \\t,n,p => table (Tense * Number * Person) [
"amo" ; "amas" ; "amat" ; "amamus" ; "amatis" ; "amant" ;
"amabam" ; "amabas" ; "amabat" ; "amabamus" ; "amabatis" ; "amabant"
] ! <t,n,p> ;
c = Acc
} ;
licere_V2 = {
s = \\t,n,p => table (Tense * Number * Person) [
"liceo" ; "lices" ; "licet" ; "licemus" ; "licetis" ; "licent" ;
"licebam" ; "licebas" ; "licebat" ; "licebamus" ; "licebatis" ; "licebant"
] ! <t,n,p> ;
c = Dat
} ;
puella_CN = {
s = \\n,c => table (Number * Case) [
"puella" ; "puellam" ; "puellae" ;
"puellae" ; "puellas" ; "puellis"
] ! <n,c> ;
g = Fem
} ;
servus_CN = {
s = \\n,c => table (Number * Case) [
"servus" ; "servum" ; "servo" ;
"servi" ; "servos" ; "servis"
] ! <n,c> ;
g = Masc
} ;
habilis_AP = {
s = table {
Masc | Fem => \\n,c => table (Number * Case) [
"habilis" ; "habilem" ; "habili" ; "habiles" ; "habiles" ; "habilibus"
] ! <n,c> ;
_ => \\n,c => table (Number * Case) [
"habile" ; "habile" ; "habili" ; "habilia" ; "habilia" ; "habilibus"
] ! <n,c>
}
} ;

View File

@@ -14,7 +14,7 @@ concrete MathematicalFre of Mathematical =
StructuralFre,
SymbolFre,
PredicationFre,
PredicationFre - [predV3], ---- gf bug
LexiconFre
** {

View File

@@ -14,7 +14,7 @@ concrete MathematicalIta of Mathematical =
StructuralIta,
SymbolIta,
PredicationIta,
PredicationIta, -- - [predV3,predV2], --- gf bug
LexiconIta
** {