mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
Latin example in HOWTO
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
<P ALIGN="center"><CENTER><H1>Resource grammar writing HOWTO</H1>
|
||||
<FONT SIZE="4">
|
||||
<I>Author: Aarne Ranta <aarne (at) cs.chalmers.se></I><BR>
|
||||
Last update: Wed Jan 25 15:04:36 2006
|
||||
Last update: Thu Feb 2 00:04:00 2006
|
||||
</FONT></CENTER>
|
||||
|
||||
<P></P>
|
||||
@@ -30,22 +30,23 @@ Last update: Wed Jan 25 15:04:36 2006
|
||||
<LI><A HREF="#toc11">Lock fields</A>
|
||||
<LI><A HREF="#toc12">Lexicon construction</A>
|
||||
</UL>
|
||||
<LI><A HREF="#toc13">Inside grammar modules</A>
|
||||
<LI><A HREF="#toc13">The core of the syntax</A>
|
||||
<LI><A HREF="#toc14">Inside grammar modules</A>
|
||||
<UL>
|
||||
<LI><A HREF="#toc14">The category system</A>
|
||||
<LI><A HREF="#toc15">Phrase category modules</A>
|
||||
<LI><A HREF="#toc16">Resource modules</A>
|
||||
<LI><A HREF="#toc17">Lexicon</A>
|
||||
<LI><A HREF="#toc15">The category system</A>
|
||||
<LI><A HREF="#toc16">Phrase category modules</A>
|
||||
<LI><A HREF="#toc17">Resource modules</A>
|
||||
<LI><A HREF="#toc18">Lexicon</A>
|
||||
</UL>
|
||||
<LI><A HREF="#toc18">Lexicon extension</A>
|
||||
<LI><A HREF="#toc19">Lexicon extension</A>
|
||||
<UL>
|
||||
<LI><A HREF="#toc19">The irregularity lexicon</A>
|
||||
<LI><A HREF="#toc20">Lexicon extraction from a word list</A>
|
||||
<LI><A HREF="#toc21">Lexicon extraction from raw text data</A>
|
||||
<LI><A HREF="#toc22">Extending the resource grammar API</A>
|
||||
<LI><A HREF="#toc20">The irregularity lexicon</A>
|
||||
<LI><A HREF="#toc21">Lexicon extraction from a word list</A>
|
||||
<LI><A HREF="#toc22">Lexicon extraction from raw text data</A>
|
||||
<LI><A HREF="#toc23">Extending the resource grammar API</A>
|
||||
</UL>
|
||||
<LI><A HREF="#toc23">Writing an instance of parametrized resource grammar implementation</A>
|
||||
<LI><A HREF="#toc24">Parametrizing a resource grammar implementation</A>
|
||||
<LI><A HREF="#toc24">Writing an instance of parametrized resource grammar implementation</A>
|
||||
<LI><A HREF="#toc25">Parametrizing a resource grammar implementation</A>
|
||||
</UL>
|
||||
|
||||
<P></P>
|
||||
@@ -175,6 +176,14 @@ try out the module
|
||||
explained in the
|
||||
<A HREF="http://www.cs.chalmers.se/~aarne/GF/doc/tutorial/gf-tutorial2.html">GF Tutorial</A>.
|
||||
</P>
|
||||
<P>
|
||||
Another reduced API is the
|
||||
<A HREF="latin.gf">toy Latin grammar</A>
|
||||
which will be used as a reference when discussing the details.
|
||||
It is not so usable in practice as the Tutorial API, but it goes
|
||||
deeper in explaining what parameters and dependencies the principal categories
|
||||
and rules have.
|
||||
</P>
|
||||
<A NAME="toc6"></A>
|
||||
<H2>Phases of the work</H2>
|
||||
<A NAME="toc7"></A>
|
||||
@@ -494,19 +503,45 @@ use of the paradigms in <CODE>BasicGer</CODE> gives a good set of examples for
|
||||
those who want to build new lexica.
|
||||
</P>
|
||||
<A NAME="toc13"></A>
|
||||
<H2>The core of the syntax</H2>
|
||||
<P>
|
||||
Among all categories and functions, there is is a handful of the
|
||||
most important and distinct ones, of which the others are can be
|
||||
seen as variations. The categories are
|
||||
</P>
|
||||
<PRE>
|
||||
Cl ; VP ; V2 ; NP ; CN ; Det ; AP ;
|
||||
</PRE>
|
||||
<P>
|
||||
The functions are
|
||||
</P>
|
||||
<PRE>
|
||||
PredVP : NP -> VP -> Cl ; -- predication
|
||||
ComplV2 : V2 -> NP -> VP ; -- complementization
|
||||
DetCN : Det -> CN -> NP ; -- determination
|
||||
ModCN : AP -> CN -> CN ; -- modification
|
||||
</PRE>
|
||||
<P>
|
||||
This <A HREF="latin.gf">toy Latin grammar</A> shows in a nutshell how these
|
||||
rules relate the categories to each other. It is intended to be a
|
||||
first approximation when designing the parameter system of a new
|
||||
language. We will refer to the implementations contained in it
|
||||
when discussing the modules in more detail.
|
||||
</P>
|
||||
<A NAME="toc14"></A>
|
||||
<H2>Inside grammar modules</H2>
|
||||
<P>
|
||||
So far we just give links to the implementations of each API.
|
||||
More explanation iś to follow - but many detail implementation tricks
|
||||
are only found in the cooments of the modules.
|
||||
More explanations follow - but many detail implementation tricks
|
||||
are only found in the comments of the modules.
|
||||
</P>
|
||||
<A NAME="toc14"></A>
|
||||
<A NAME="toc15"></A>
|
||||
<H3>The category system</H3>
|
||||
<UL>
|
||||
<LI><A HREF="gfdoc/Cat.html">Cat</A>, <A HREF="gfdoc/CatGer.gf">CatGer</A>
|
||||
</UL>
|
||||
|
||||
<A NAME="toc15"></A>
|
||||
<A NAME="toc16"></A>
|
||||
<H3>Phrase category modules</H3>
|
||||
<UL>
|
||||
<LI><A HREF="gfdoc/Tense.html">Tense</A>, <A HREF="../german/TenseGer.gf">TenseGer</A>
|
||||
@@ -523,7 +558,7 @@ are only found in the cooments of the modules.
|
||||
<LI><A HREF="gfdoc/Lang.html">Lang</A>, <A HREF="../german/LangGer.gf">LangGer</A>
|
||||
</UL>
|
||||
|
||||
<A NAME="toc16"></A>
|
||||
<A NAME="toc17"></A>
|
||||
<H3>Resource modules</H3>
|
||||
<UL>
|
||||
<LI><A HREF="../german/ParamGer.gf">ParamGer</A>
|
||||
@@ -532,16 +567,16 @@ are only found in the cooments of the modules.
|
||||
<LI><A HREF="gfdoc/ParadigmsGer.html">ParadigmsGer</A>, <A HREF="../german/ParadigmsGer.gf">ParadigmsGer.gf</A>
|
||||
</UL>
|
||||
|
||||
<A NAME="toc17"></A>
|
||||
<A NAME="toc18"></A>
|
||||
<H3>Lexicon</H3>
|
||||
<UL>
|
||||
<LI><A HREF="gfdoc/Structural.html">Structural</A>, <A HREF="../german/StructuralGer.gf">StructuralGer</A>
|
||||
<LI><A HREF="gfdoc/Lexicon.html">Lexicon</A>, <A HREF="../german/LexiconGer.gf">LexiconGer</A>
|
||||
</UL>
|
||||
|
||||
<A NAME="toc18"></A>
|
||||
<H2>Lexicon extension</H2>
|
||||
<A NAME="toc19"></A>
|
||||
<H2>Lexicon extension</H2>
|
||||
<A NAME="toc20"></A>
|
||||
<H3>The irregularity lexicon</H3>
|
||||
<P>
|
||||
It may be handy to provide a separate module of irregular
|
||||
@@ -551,7 +586,7 @@ few hundred perhaps. Building such a lexicon separately also
|
||||
makes it less important to cover <I>everything</I> by the
|
||||
worst-case paradigms (<CODE>mkV</CODE> etc).
|
||||
</P>
|
||||
<A NAME="toc20"></A>
|
||||
<A NAME="toc21"></A>
|
||||
<H3>Lexicon extraction from a word list</H3>
|
||||
<P>
|
||||
You can often find resources such as lists of
|
||||
@@ -586,7 +621,7 @@ When using ready-made word lists, you should think about
|
||||
coyright issues. Ideally, all resource grammar material should
|
||||
be provided under GNU General Public License.
|
||||
</P>
|
||||
<A NAME="toc21"></A>
|
||||
<A NAME="toc22"></A>
|
||||
<H3>Lexicon extraction from raw text data</H3>
|
||||
<P>
|
||||
This is a cheap technique to build a lexicon of thousands
|
||||
@@ -594,7 +629,7 @@ of words, if text data is available in digital format.
|
||||
See the <A HREF="http://www.cs.chalmers.se/~markus/FM/">Functional Morphology</A>
|
||||
homepage for details.
|
||||
</P>
|
||||
<A NAME="toc22"></A>
|
||||
<A NAME="toc23"></A>
|
||||
<H3>Extending the resource grammar API</H3>
|
||||
<P>
|
||||
Sooner or later it will happen that the resource grammar API
|
||||
@@ -603,7 +638,7 @@ that it does not include idiomatic expressions in a given language.
|
||||
The solution then is in the first place to build language-specific
|
||||
extension modules. This chapter will deal with this issue.
|
||||
</P>
|
||||
<A NAME="toc23"></A>
|
||||
<A NAME="toc24"></A>
|
||||
<H2>Writing an instance of parametrized resource grammar implementation</H2>
|
||||
<P>
|
||||
Above we have looked at how a resource implementation is built by
|
||||
@@ -621,7 +656,7 @@ use parametrized modules. The advantages are
|
||||
In this chapter, we will look at an example: adding Italian to
|
||||
the Romance family.
|
||||
</P>
|
||||
<A NAME="toc24"></A>
|
||||
<A NAME="toc25"></A>
|
||||
<H2>Parametrizing a resource grammar implementation</H2>
|
||||
<P>
|
||||
This is the most demanding form of resource grammar writing.
|
||||
@@ -637,6 +672,6 @@ This chapter will work out an example of how an Estonian grammar
|
||||
is constructed from the Finnish grammar through parametrization.
|
||||
</P>
|
||||
|
||||
<!-- html code generated by txt2tags 2.0 (http://txt2tags.sf.net) -->
|
||||
<!-- html code generated by txt2tags 2.3 (http://txt2tags.sf.net) -->
|
||||
<!-- cmdline: txt2tags -\-toc -thtml Resource-HOWTO.txt -->
|
||||
</BODY></HTML>
|
||||
|
||||
@@ -127,6 +127,13 @@ try out the module
|
||||
explained in the
|
||||
[GF Tutorial http://www.cs.chalmers.se/~aarne/GF/doc/tutorial/gf-tutorial2.html].
|
||||
|
||||
Another reduced API is the
|
||||
[toy Latin grammar latin.gf]
|
||||
which will be used as a reference when discussing the details.
|
||||
It is not so usable in practice as the Tutorial API, but it goes
|
||||
deeper in explaining what parameters and dependencies the principal categories
|
||||
and rules have.
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -426,11 +433,34 @@ those who want to build new lexica.
|
||||
|
||||
|
||||
|
||||
==The core of the syntax==
|
||||
|
||||
Among all categories and functions, there is is a handful of the
|
||||
most important and distinct ones, of which the others are can be
|
||||
seen as variations. The categories are
|
||||
```
|
||||
Cl ; VP ; V2 ; NP ; CN ; Det ; AP ;
|
||||
```
|
||||
The functions are
|
||||
```
|
||||
PredVP : NP -> VP -> Cl ; -- predication
|
||||
ComplV2 : V2 -> NP -> VP ; -- complementization
|
||||
DetCN : Det -> CN -> NP ; -- determination
|
||||
ModCN : AP -> CN -> CN ; -- modification
|
||||
```
|
||||
This [toy Latin grammar latin.gf] shows in a nutshell how these
|
||||
rules relate the categories to each other. It is intended to be a
|
||||
first approximation when designing the parameter system of a new
|
||||
language. We will refer to the implementations contained in it
|
||||
when discussing the modules in more detail.
|
||||
|
||||
|
||||
|
||||
==Inside grammar modules==
|
||||
|
||||
So far we just give links to the implementations of each API.
|
||||
More explanation iś to follow - but many detail implementation tricks
|
||||
are only found in the cooments of the modules.
|
||||
More explanations follow - but many detail implementation tricks
|
||||
are only found in the comments of the modules.
|
||||
|
||||
|
||||
===The category system===
|
||||
|
||||
175
lib/resource-1.0/doc/latin.gf
Normal file
175
lib/resource-1.0/doc/latin.gf
Normal file
@@ -0,0 +1,175 @@
|
||||
--1 A Latin grammar to illustrate some main dependencies
|
||||
--
|
||||
-- All implementations of the resource API have so far been variations
|
||||
-- and extensions of this. The most notable regularities are
|
||||
-- - which types of features each category has
|
||||
-- - how inherent features are inherited and passed to parameters
|
||||
--
|
||||
--
|
||||
|
||||
|
||||
cat
|
||||
Cl ; -- clause
|
||||
VP ; -- verb phrase
|
||||
V2 ; -- two-place verb
|
||||
NP ; -- noun phrase
|
||||
CN ; -- common noun
|
||||
Det ; -- determiner
|
||||
AP ; -- adjectival phrase
|
||||
|
||||
fun
|
||||
PredVP : NP -> VP -> Cl ; -- predication
|
||||
ComplV2 : V2 -> NP -> VP ; -- complementization
|
||||
DetCN : Det -> CN -> NP ; -- determination
|
||||
ModCN : AP -> CN -> CN ; -- modification
|
||||
|
||||
param
|
||||
Number = Sg | Pl ;
|
||||
Person = P1 | P2 | P3 ;
|
||||
Tense = Pres | Past ;
|
||||
Polarity = Pos | Neg ;
|
||||
Case = Nom | Acc | Dat ;
|
||||
Gender = Masc | Fem | Neutr ;
|
||||
|
||||
oper
|
||||
Agr = {g : Gender ; n : Number ; p : Person} ; -- agreement features
|
||||
|
||||
lincat
|
||||
Cl = {
|
||||
s : Tense => Polarity => Str
|
||||
} ;
|
||||
VP = {
|
||||
verb : Tense => Polarity => Agr => Str ;
|
||||
neg : Polarity => Str ; -- negation
|
||||
compl : Agr => Str -- complement
|
||||
} ;
|
||||
V2 = {
|
||||
s : Tense => Number => Person => Str ;
|
||||
c : Case -- complement case
|
||||
} ;
|
||||
NP = {
|
||||
s : Case => Str ;
|
||||
a : Agr -- agreement features
|
||||
} ;
|
||||
CN = {
|
||||
s : Number => Case => Str ;
|
||||
g : Gender
|
||||
} ;
|
||||
Det = {
|
||||
s : Gender => Case => Str ;
|
||||
n : Number
|
||||
} ;
|
||||
AP = {
|
||||
s : Gender => Number => Case => Str
|
||||
} ;
|
||||
|
||||
lin
|
||||
PredVP np vp = {
|
||||
s = \\t,p =>
|
||||
let
|
||||
agr = np.a ;
|
||||
subject = np.s ! Nom ;
|
||||
object = vp.compl ! agr ;
|
||||
verb = vp.neg ! p ++ vp.verb ! t ! p ! agr
|
||||
in
|
||||
subject ++ object ++ verb
|
||||
} ;
|
||||
|
||||
ComplV2 v np = {
|
||||
verb = \\t,p,a => v.s ! t ! a.n ! a.p ;
|
||||
compl = \\_ => np.s ! v.c ;
|
||||
neg = table {Pos => [] ; Neg => "non"}
|
||||
} ;
|
||||
|
||||
DetCN det cn =
|
||||
let
|
||||
g = cn.g ;
|
||||
n = det.n
|
||||
in {
|
||||
s = \\c => det.s ! g ! c ++ cn.s ! n ! c ;
|
||||
a = {g = g ; n = n ; p = P3}
|
||||
} ;
|
||||
|
||||
ModCN ap cn =
|
||||
let
|
||||
g = cn.g
|
||||
in {
|
||||
s = \\n,c => cn.s ! n ! c ++ ap.s ! g ! n ! c ;
|
||||
g = g
|
||||
} ;
|
||||
|
||||
-- lexicon to test
|
||||
|
||||
fun
|
||||
ego_NP : NP ;
|
||||
omnis_Det : Det ;
|
||||
defPl_Det : Det ;
|
||||
|
||||
amare_V2 : V2 ;
|
||||
licere_V2 : V2 ;
|
||||
puella_CN : CN ;
|
||||
servus_CN : CN ;
|
||||
habilis_AP : AP ;
|
||||
|
||||
lin
|
||||
ego_NP = {
|
||||
s = table Case ["ego" ; "me" ; "mihi"] ;
|
||||
a = {g = Fem ; n = Sg ; p = P1}
|
||||
} ;
|
||||
|
||||
omnis_Det = {
|
||||
s = table {
|
||||
Masc | Fem => table Case ["omnis" ; "omnem" ; "omni"] ;
|
||||
_ => table Case ["omne" ; "omne" ; "omni"]
|
||||
} ;
|
||||
n = Sg
|
||||
} ;
|
||||
|
||||
defPl_Det = {
|
||||
s = \\_,_ => [] ;
|
||||
n = Pl
|
||||
} ;
|
||||
|
||||
amare_V2 = {
|
||||
s = \\t,n,p => table (Tense * Number * Person) [
|
||||
"amo" ; "amas" ; "amat" ; "amamus" ; "amatis" ; "amant" ;
|
||||
"amabam" ; "amabas" ; "amabat" ; "amabamus" ; "amabatis" ; "amabant"
|
||||
] ! <t,n,p> ;
|
||||
c = Acc
|
||||
} ;
|
||||
|
||||
licere_V2 = {
|
||||
s = \\t,n,p => table (Tense * Number * Person) [
|
||||
"liceo" ; "lices" ; "licet" ; "licemus" ; "licetis" ; "licent" ;
|
||||
"licebam" ; "licebas" ; "licebat" ; "licebamus" ; "licebatis" ; "licebant"
|
||||
] ! <t,n,p> ;
|
||||
c = Dat
|
||||
} ;
|
||||
|
||||
puella_CN = {
|
||||
s = \\n,c => table (Number * Case) [
|
||||
"puella" ; "puellam" ; "puellae" ;
|
||||
"puellae" ; "puellas" ; "puellis"
|
||||
] ! <n,c> ;
|
||||
g = Fem
|
||||
} ;
|
||||
|
||||
servus_CN = {
|
||||
s = \\n,c => table (Number * Case) [
|
||||
"servus" ; "servum" ; "servo" ;
|
||||
"servi" ; "servos" ; "servis"
|
||||
] ! <n,c> ;
|
||||
g = Masc
|
||||
} ;
|
||||
|
||||
habilis_AP = {
|
||||
s = table {
|
||||
Masc | Fem => \\n,c => table (Number * Case) [
|
||||
"habilis" ; "habilem" ; "habili" ; "habiles" ; "habiles" ; "habilibus"
|
||||
] ! <n,c> ;
|
||||
_ => \\n,c => table (Number * Case) [
|
||||
"habile" ; "habile" ; "habili" ; "habilia" ; "habilia" ; "habilibus"
|
||||
] ! <n,c>
|
||||
}
|
||||
} ;
|
||||
|
||||
@@ -14,7 +14,7 @@ concrete MathematicalFre of Mathematical =
|
||||
StructuralFre,
|
||||
|
||||
SymbolFre,
|
||||
PredicationFre,
|
||||
PredicationFre - [predV3], ---- gf bug
|
||||
|
||||
LexiconFre
|
||||
** {
|
||||
|
||||
@@ -14,7 +14,7 @@ concrete MathematicalIta of Mathematical =
|
||||
StructuralIta,
|
||||
|
||||
SymbolIta,
|
||||
PredicationIta,
|
||||
PredicationIta, -- - [predV3,predV2], --- gf bug
|
||||
|
||||
LexiconIta
|
||||
** {
|
||||
|
||||
Reference in New Issue
Block a user