forked from GitHub/gf-core
tutorial; mkMorpho bug fix
This commit is contained in:
@@ -7,7 +7,7 @@
|
|||||||
<P ALIGN="center"><CENTER><H1>Grammatical Framework Tutorial</H1>
|
<P ALIGN="center"><CENTER><H1>Grammatical Framework Tutorial</H1>
|
||||||
<FONT SIZE="4">
|
<FONT SIZE="4">
|
||||||
<I>Author: Aarne Ranta <aarne (at) cs.chalmers.se></I><BR>
|
<I>Author: Aarne Ranta <aarne (at) cs.chalmers.se></I><BR>
|
||||||
Last update: Sat Dec 17 13:32:10 2005
|
Last update: Sat Dec 17 21:42:39 2005
|
||||||
</FONT></CENTER>
|
</FONT></CENTER>
|
||||||
|
|
||||||
<P></P>
|
<P></P>
|
||||||
@@ -80,20 +80,35 @@ Last update: Sat Dec 17 13:32:10 2005
|
|||||||
<LI><A HREF="#toc49">Morphological analysis and morphology quiz</A>
|
<LI><A HREF="#toc49">Morphological analysis and morphology quiz</A>
|
||||||
<LI><A HREF="#toc50">Discontinuous constituents</A>
|
<LI><A HREF="#toc50">Discontinuous constituents</A>
|
||||||
</UL>
|
</UL>
|
||||||
<LI><A HREF="#toc51">Topics still to be written</A>
|
<LI><A HREF="#toc51">More constructs for concrete syntax</A>
|
||||||
<UL>
|
<UL>
|
||||||
<LI><A HREF="#toc52">Free variation</A>
|
<LI><A HREF="#toc52">Free variation</A>
|
||||||
<LI><A HREF="#toc53">Record extension, tuples</A>
|
<LI><A HREF="#toc53">Record extension and subtyping</A>
|
||||||
<LI><A HREF="#toc54">Predefined types and operations</A>
|
<LI><A HREF="#toc54">Tuples and product types</A>
|
||||||
<LI><A HREF="#toc55">Lexers and unlexers</A>
|
<LI><A HREF="#toc55">Predefined types and operations</A>
|
||||||
<LI><A HREF="#toc56">Grammars of formal languages</A>
|
</UL>
|
||||||
|
<LI><A HREF="#toc56">More features of the module system</A>
|
||||||
|
<UL>
|
||||||
<LI><A HREF="#toc57">Resource grammars and their reuse</A>
|
<LI><A HREF="#toc57">Resource grammars and their reuse</A>
|
||||||
<LI><A HREF="#toc58">Interfaces, instances, and functors</A>
|
<LI><A HREF="#toc58">Interfaces, instances, and functors</A>
|
||||||
<LI><A HREF="#toc59">Speech input and output</A>
|
<LI><A HREF="#toc59">Restricted inheritance and qualified opening</A>
|
||||||
<LI><A HREF="#toc60">Embedded grammars in Haskell, Java, and Prolog</A>
|
</UL>
|
||||||
<LI><A HREF="#toc61">Dependent types, variable bindings, semantic definitions</A>
|
<LI><A HREF="#toc60">More concepts of abstract syntax</A>
|
||||||
<LI><A HREF="#toc62">Transfer modules</A>
|
<UL>
|
||||||
<LI><A HREF="#toc63">Alternative input and output grammar formats</A>
|
<LI><A HREF="#toc61">Dependent types</A>
|
||||||
|
<LI><A HREF="#toc62">Higher-order abstract syntax</A>
|
||||||
|
<LI><A HREF="#toc63">Semantic definitions</A>
|
||||||
|
<LI><A HREF="#toc64">Case study: grammars of formal languages</A>
|
||||||
|
</UL>
|
||||||
|
<LI><A HREF="#toc65">Transfer modules</A>
|
||||||
|
<LI><A HREF="#toc66">Practical issues</A>
|
||||||
|
<UL>
|
||||||
|
<LI><A HREF="#toc67">Lexers and unlexers</A>
|
||||||
|
<LI><A HREF="#toc68">Efficiency of grammars</A>
|
||||||
|
<LI><A HREF="#toc69">Speech input and output</A>
|
||||||
|
<LI><A HREF="#toc70">Communicating with GF</A>
|
||||||
|
<LI><A HREF="#toc71">Embedded grammars in Haskell, Java, and Prolog</A>
|
||||||
|
<LI><A HREF="#toc72">Alternative input and output grammar formats</A>
|
||||||
</UL>
|
</UL>
|
||||||
</UL>
|
</UL>
|
||||||
|
|
||||||
@@ -619,11 +634,7 @@ Examples of records of this type are
|
|||||||
{s = "foo"}
|
{s = "foo"}
|
||||||
{s = "hello" ++ "world"}
|
{s = "hello" ++ "world"}
|
||||||
</PRE>
|
</PRE>
|
||||||
<P>
|
<P></P>
|
||||||
The type <CODE>Str</CODE> is really the type of <B>token lists</B>, but
|
|
||||||
most of the time one can conveniently think of it as the type of strings,
|
|
||||||
denoted by string literals in double quotes.
|
|
||||||
</P>
|
|
||||||
<P>
|
<P>
|
||||||
Whenever a record <CODE>r</CODE> of type <CODE>{s : Str}</CODE> is given,
|
Whenever a record <CODE>r</CODE> of type <CODE>{s : Str}</CODE> is given,
|
||||||
<CODE>r.s</CODE> is an object of type <CODE>Str</CODE>. This is
|
<CODE>r.s</CODE> is an object of type <CODE>Str</CODE>. This is
|
||||||
@@ -634,6 +645,35 @@ of fields from a record:
|
|||||||
<LI>if <I>r</I> : <CODE>{</CODE> ... <I>p</I> : <I>T</I> ... <CODE>}</CODE> then <I>r.p</I> : <I>T</I>
|
<LI>if <I>r</I> : <CODE>{</CODE> ... <I>p</I> : <I>T</I> ... <CODE>}</CODE> then <I>r.p</I> : <I>T</I>
|
||||||
</UL>
|
</UL>
|
||||||
|
|
||||||
|
<P>
|
||||||
|
The type <CODE>Str</CODE> is really the type of <B>token lists</B>, but
|
||||||
|
most of the time one can conveniently think of it as the type of strings,
|
||||||
|
denoted by string literals in double quotes.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Notice that
|
||||||
|
</P>
|
||||||
|
<PRE>
|
||||||
|
"hello world"
|
||||||
|
</PRE>
|
||||||
|
<P>
|
||||||
|
is not recommended as an expression of type <CODE>Str</CODE>. It denotes
|
||||||
|
a token with a space in it, and will usually
|
||||||
|
not work with the lexical analysis that precedes parsing. A shorthand
|
||||||
|
exemplified by
|
||||||
|
</P>
|
||||||
|
<PRE>
|
||||||
|
["hello world and people"] === "hello" ++ "world" ++ "and" ++ "people"
|
||||||
|
</PRE>
|
||||||
|
<P>
|
||||||
|
can be used for lists of tokens. The expression
|
||||||
|
</P>
|
||||||
|
<PRE>
|
||||||
|
[]
|
||||||
|
</PRE>
|
||||||
|
<P>
|
||||||
|
denotes the empty token list.
|
||||||
|
</P>
|
||||||
<A NAME="toc18"></A>
|
<A NAME="toc18"></A>
|
||||||
<H3>An abstract syntax example</H3>
|
<H3>An abstract syntax example</H3>
|
||||||
<P>
|
<P>
|
||||||
@@ -1498,28 +1538,33 @@ the formation of noun phrases and verb phrases.
|
|||||||
<A NAME="toc47"></A>
|
<A NAME="toc47"></A>
|
||||||
<H3>English concrete syntax with parameters</H3>
|
<H3>English concrete syntax with parameters</H3>
|
||||||
<PRE>
|
<PRE>
|
||||||
concrete PaleolithicEng of Paleolithic = open MorphoEng in {
|
concrete PaleolithicEng of Paleolithic = open Prelude, MorphoEng in {
|
||||||
lincat
|
lincat
|
||||||
S, A = {s : Str} ;
|
S, A = SS ;
|
||||||
VP, CN, V, TV = {s : Number => Str} ;
|
VP, CN, V, TV = {s : Number => Str} ;
|
||||||
NP = {s : Str ; n : Number} ;
|
NP = {s : Str ; n : Number} ;
|
||||||
lin
|
lin
|
||||||
PredVP np vp = {s = np.s ++ vp.s ! np.n} ;
|
PredVP np vp = ss (np.s ++ vp.s ! np.n) ;
|
||||||
UseV v = v ;
|
UseV v = v ;
|
||||||
ComplTV tv np = {s = \\n => tv.s ! n ++ np.s} ;
|
ComplTV tv np = {s = \\n => tv.s ! n ++ np.s} ;
|
||||||
UseA a = {s = \\n => case n of {Sg => "is" ; Pl => "are"} ++ a.s} ;
|
UseA a = {s = \\n => case n of {Sg => "is" ; Pl => "are"} ++ a.s} ;
|
||||||
This cn = {s = "this" ++ cn.s ! Sg } ;
|
This = det Sg "this" ;
|
||||||
Indef cn = {s = "a" ++ cn.s ! Sg} ;
|
Indef = det Sg "a" ;
|
||||||
All cn = {s = "all" ++ cn.s ! Pl} ;
|
All = det Pl "all" ;
|
||||||
Two cn = {s = "two" ++ cn.s ! Pl} ;
|
Two = det Pl "two" ;
|
||||||
ModA a cn = {s = \\n => a.s ++ cn.s ! n} ;
|
ModA a cn = {s = \\n => a.s ++ cn.s ! n} ;
|
||||||
Louse = mkNoun "louse" "lice" ;
|
Louse = mkNoun "louse" "lice" ;
|
||||||
Snake = regNoun "snake" ;
|
Snake = regNoun "snake" ;
|
||||||
Green = {s = "green"} ;
|
Green = ss "green" ;
|
||||||
Warm = {s = "warm"} ;
|
Warm = ss "warm" ;
|
||||||
Laugh = regVerb "laugh" ;
|
Laugh = regVerb "laugh" ;
|
||||||
Sleep = regVerb "sleep" ;
|
Sleep = regVerb "sleep" ;
|
||||||
Kill = regVerb "kill" ;
|
Kill = regVerb "kill" ;
|
||||||
|
oper
|
||||||
|
det : Number -> Str -> Noun -> {s : Str ; n : Number} = \n,d,cn -> {
|
||||||
|
s = d ++ n.s ! n ;
|
||||||
|
n = n
|
||||||
|
} ;
|
||||||
}
|
}
|
||||||
</PRE>
|
</PRE>
|
||||||
<P></P>
|
<P></P>
|
||||||
@@ -1527,19 +1572,19 @@ the formation of noun phrases and verb phrases.
|
|||||||
<H3>Hierarchic parameter types</H3>
|
<H3>Hierarchic parameter types</H3>
|
||||||
<P>
|
<P>
|
||||||
The reader familiar with a functional programming language such as
|
The reader familiar with a functional programming language such as
|
||||||
<a href="<A HREF="http://www.haskell.org">http://www.haskell.org</A>">Haskell<a> must have noticed the similarity
|
<A HREF="http://www.haskell.org">Haskell</A> must have noticed the similarity
|
||||||
between parameter types in GF and algebraic datatypes (<CODE>data</CODE> definitions
|
between parameter types in GF and <B>algebraic datatypes</B> (<CODE>data</CODE> definitions
|
||||||
in Haskell). The GF parameter types are actually a special case of algebraic
|
in Haskell). The GF parameter types are actually a special case of algebraic
|
||||||
datatypes: the main restriction is that in GF, these types must be finite.
|
datatypes: the main restriction is that in GF, these types must be finite.
|
||||||
(This restriction makes it possible to invert linearization rules into
|
(It is this restriction that makes it possible to invert linearization rules into
|
||||||
parsing methods.)
|
parsing methods.)
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
However, finite is not the same thing as enumerated. Even in GF, parameter
|
However, finite is not the same thing as enumerated. Even in GF, parameter
|
||||||
constructors can take arguments, provided these arguments are from other
|
constructors can take arguments, provided these arguments are from other
|
||||||
parameter types (recursion is forbidden). Such parameter types impose a
|
parameter types - only recursion is forbidden. Such parameter types impose a
|
||||||
hierarchic order among parameters. They are often useful to define
|
hierarchic order among parameters. They are often needed to define
|
||||||
linguistically accurate parameter systems.
|
the linguistically most accurate parameter systems.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
To give an example, Swedish adjectives
|
To give an example, Swedish adjectives
|
||||||
@@ -1603,7 +1648,7 @@ file for later use, by the command <CODE>morpho_list = ml</CODE>
|
|||||||
> morpho_list -number=25 -cat=V
|
> morpho_list -number=25 -cat=V
|
||||||
</PRE>
|
</PRE>
|
||||||
<P>
|
<P>
|
||||||
The number flag gives the number of exercises generated.
|
The <CODE>number</CODE> flag gives the number of exercises generated.
|
||||||
</P>
|
</P>
|
||||||
<A NAME="toc50"></A>
|
<A NAME="toc50"></A>
|
||||||
<H3>Discontinuous constituents</H3>
|
<H3>Discontinuous constituents</H3>
|
||||||
@@ -1615,7 +1660,7 @@ a sentence may place the object between the verb and the particle:
|
|||||||
<I>he switched it off</I>.
|
<I>he switched it off</I>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The first of the following judgements defines transitive verbs as a
|
The first of the following judgements defines transitive verbs as
|
||||||
<B>discontinuous constituents</B>, i.e. as having a linearization
|
<B>discontinuous constituents</B>, i.e. as having a linearization
|
||||||
type with two strings and not just one. The second judgement
|
type with two strings and not just one. The second judgement
|
||||||
shows how the constituents are separated by the object in complementization.
|
shows how the constituents are separated by the object in complementization.
|
||||||
@@ -1624,37 +1669,145 @@ shows how the constituents are separated by the object in complementization.
|
|||||||
lincat TV = {s : Number => Str ; s2 : Str} ;
|
lincat TV = {s : Number => Str ; s2 : Str} ;
|
||||||
lin ComplTV tv obj = {s = \\n => tv.s ! n ++ obj.s ++ tv.s2} ;
|
lin ComplTV tv obj = {s = \\n => tv.s ! n ++ obj.s ++ tv.s2} ;
|
||||||
</PRE>
|
</PRE>
|
||||||
<P></P>
|
|
||||||
<P>
|
<P>
|
||||||
GF currently requires that all fields in linearization records that
|
There is no restriction in the number of discontinuous constituents
|
||||||
have a table with value type <CODE>Str</CODE> have as labels
|
(or other fields) a <CODE>lincat</CODE> may contain. The only condition is that
|
||||||
either <CODE>s</CODE> or <CODE>s</CODE> with an integer index.
|
the fields must be of finite types, i.e. built from records, tables,
|
||||||
|
parameters, and <CODE>Str</CODE>, and not functions. A mathematical result
|
||||||
|
about parsing in GF says that the worst-case complexity of parsing
|
||||||
|
increases with the number of discontinuous constituents. Moreover,
|
||||||
|
the parsing and linearization commands only give reliable results
|
||||||
|
for categories whose linearization type has a unique <CODE>Str</CODE> valued
|
||||||
|
field labelled <CODE>s</CODE>.
|
||||||
</P>
|
</P>
|
||||||
<A NAME="toc51"></A>
|
<A NAME="toc51"></A>
|
||||||
<H2>Topics still to be written</H2>
|
<H2>More constructs for concrete syntax</H2>
|
||||||
<A NAME="toc52"></A>
|
<A NAME="toc52"></A>
|
||||||
<H3>Free variation</H3>
|
<H3>Free variation</H3>
|
||||||
|
<P>
|
||||||
|
Sometimes there are many alternative ways to define a concrete syntax.
|
||||||
|
For instance, the verb negation in English can be expressed both by
|
||||||
|
<I>does not</I> and <I>doesn't</I>. In linguistic terms, these expressions
|
||||||
|
are in <B>free variation</B>. The <CODE>variants</CODE> construct of GF can
|
||||||
|
be used to give a list of strings in free variation. For example,
|
||||||
|
</P>
|
||||||
|
<PRE>
|
||||||
|
NegVerb verb = {s = variants {["does not"] ; "doesn't} ++ verb.s} ;
|
||||||
|
</PRE>
|
||||||
|
<P>
|
||||||
|
An empty variant list
|
||||||
|
</P>
|
||||||
|
<PRE>
|
||||||
|
variants {}
|
||||||
|
</PRE>
|
||||||
|
<P>
|
||||||
|
can be used e.g. if a word lacks a certain form.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
In general, <CODE>variants</CODE> should be used cautiously. It is not
|
||||||
|
recommended for modules aimed to be libraries, because the
|
||||||
|
user of the library has no way to choose among the variants.
|
||||||
|
Moreover, even though <CODE>variants</CODE> admits lists of any type,
|
||||||
|
its semantics for complex types can cause surprises.
|
||||||
|
</P>
|
||||||
<A NAME="toc53"></A>
|
<A NAME="toc53"></A>
|
||||||
<H3>Record extension, tuples</H3>
|
<H3>Record extension and subtyping</H3>
|
||||||
|
<P>
|
||||||
|
Record types and records can be <B>extended</B> with new fields. For instance,
|
||||||
|
in German it is natural to see transitive verbs as verbs with a case.
|
||||||
|
The symbol <CODE>**</CODE> is used for both constructs.
|
||||||
|
</P>
|
||||||
|
<PRE>
|
||||||
|
lincat TV = Verb ** {c : Case} ;
|
||||||
|
|
||||||
|
lin Follow = regVerb "folgen" ** {c = Dative} ;
|
||||||
|
</PRE>
|
||||||
|
<P>
|
||||||
|
To extend a record type or a record with a field whose label it
|
||||||
|
already has is a type error.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
A record type <I>T</I> is a <B>subtype</B> of another one <I>R</I>, if <I>T</I> has
|
||||||
|
all the fields of <I>R</I> and possibly other fields. For instance,
|
||||||
|
an extension of a record type is always a subtype of it.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If <I>T</I> is a subtype of <I>R</I>, an object of <I>T</I> can be used whenever
|
||||||
|
an object of <I>R</I> is required. For instance, a transitive verb can
|
||||||
|
be used whenever a verb is required.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<B>Contravariance</B> means that a function taking an <I>R</I> as argument
|
||||||
|
can also be applied to any object of a subtype <I>T</I>.
|
||||||
|
</P>
|
||||||
<A NAME="toc54"></A>
|
<A NAME="toc54"></A>
|
||||||
<H3>Predefined types and operations</H3>
|
<H3>Tuples and product types</H3>
|
||||||
|
<P>
|
||||||
|
Product types and tuples are syntactic sugar for record types and records:
|
||||||
|
</P>
|
||||||
|
<PRE>
|
||||||
|
T1 * ... * Tn === {p1 : T1 ; ... ; pn : Tn}
|
||||||
|
<t1, ..., tn> === {p1 = T1 ; ... ; pn = Tn}
|
||||||
|
</PRE>
|
||||||
|
<P>
|
||||||
|
Thus the labels <CODE>p1, p2,...`</CODE> are hard-coded.
|
||||||
|
</P>
|
||||||
<A NAME="toc55"></A>
|
<A NAME="toc55"></A>
|
||||||
<H3>Lexers and unlexers</H3>
|
<H3>Predefined types and operations</H3>
|
||||||
|
<P>
|
||||||
|
GF has the following predefined categories in abstract syntax:
|
||||||
|
</P>
|
||||||
|
<PRE>
|
||||||
|
cat Int ; -- integers, e.g. 0, 5, 743145151019
|
||||||
|
cat Float ; -- floats, e.g. 0.0, 3.1415926
|
||||||
|
cat String ; -- strings, e.g. "", "foo", "123"
|
||||||
|
</PRE>
|
||||||
|
<P>
|
||||||
|
The objects of each of these categories are <B>literals</B>
|
||||||
|
as indicated in the comments above. No <CODE>fun</CODE> definition
|
||||||
|
can have a predefined category as its value type, but
|
||||||
|
they can be used as arguments. For example:
|
||||||
|
</P>
|
||||||
|
<PRE>
|
||||||
|
fun StreetAddress : Int -> String -> Address ;
|
||||||
|
lin StreetAddress number street = {s = number.s ++ street.s} ;
|
||||||
|
|
||||||
|
-- e.g. (StreetAddress 10 "Downing Street") : Address
|
||||||
|
</PRE>
|
||||||
|
<P></P>
|
||||||
<A NAME="toc56"></A>
|
<A NAME="toc56"></A>
|
||||||
<H3>Grammars of formal languages</H3>
|
<H2>More features of the module system</H2>
|
||||||
<A NAME="toc57"></A>
|
<A NAME="toc57"></A>
|
||||||
<H3>Resource grammars and their reuse</H3>
|
<H3>Resource grammars and their reuse</H3>
|
||||||
<A NAME="toc58"></A>
|
<A NAME="toc58"></A>
|
||||||
<H3>Interfaces, instances, and functors</H3>
|
<H3>Interfaces, instances, and functors</H3>
|
||||||
<A NAME="toc59"></A>
|
<A NAME="toc59"></A>
|
||||||
<H3>Speech input and output</H3>
|
<H3>Restricted inheritance and qualified opening</H3>
|
||||||
<A NAME="toc60"></A>
|
<A NAME="toc60"></A>
|
||||||
<H3>Embedded grammars in Haskell, Java, and Prolog</H3>
|
<H2>More concepts of abstract syntax</H2>
|
||||||
<A NAME="toc61"></A>
|
<A NAME="toc61"></A>
|
||||||
<H3>Dependent types, variable bindings, semantic definitions</H3>
|
<H3>Dependent types</H3>
|
||||||
<A NAME="toc62"></A>
|
<A NAME="toc62"></A>
|
||||||
<H3>Transfer modules</H3>
|
<H3>Higher-order abstract syntax</H3>
|
||||||
<A NAME="toc63"></A>
|
<A NAME="toc63"></A>
|
||||||
|
<H3>Semantic definitions</H3>
|
||||||
|
<A NAME="toc64"></A>
|
||||||
|
<H3>Case study: grammars of formal languages</H3>
|
||||||
|
<A NAME="toc65"></A>
|
||||||
|
<H2>Transfer modules</H2>
|
||||||
|
<A NAME="toc66"></A>
|
||||||
|
<H2>Practical issues</H2>
|
||||||
|
<A NAME="toc67"></A>
|
||||||
|
<H3>Lexers and unlexers</H3>
|
||||||
|
<A NAME="toc68"></A>
|
||||||
|
<H3>Efficiency of grammars</H3>
|
||||||
|
<A NAME="toc69"></A>
|
||||||
|
<H3>Speech input and output</H3>
|
||||||
|
<A NAME="toc70"></A>
|
||||||
|
<H3>Communicating with GF</H3>
|
||||||
|
<A NAME="toc71"></A>
|
||||||
|
<H3>Embedded grammars in Haskell, Java, and Prolog</H3>
|
||||||
|
<A NAME="toc72"></A>
|
||||||
<H3>Alternative input and output grammar formats</H3>
|
<H3>Alternative input and output grammar formats</H3>
|
||||||
|
|
||||||
<!-- html code generated by txt2tags 2.3 (http://txt2tags.sf.net) -->
|
<!-- html code generated by txt2tags 2.3 (http://txt2tags.sf.net) -->
|
||||||
|
|||||||
@@ -464,18 +464,11 @@ type used for linearization in GF is
|
|||||||
```
|
```
|
||||||
which has one field, with **label** ``s`` and type ``Str``.
|
which has one field, with **label** ``s`` and type ``Str``.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Examples of records of this type are
|
Examples of records of this type are
|
||||||
```
|
```
|
||||||
{s = "foo"}
|
{s = "foo"}
|
||||||
{s = "hello" ++ "world"}
|
{s = "hello" ++ "world"}
|
||||||
```
|
```
|
||||||
The type ``Str`` is really the type of **token lists**, but
|
|
||||||
most of the time one can conveniently think of it as the type of strings,
|
|
||||||
denoted by string literals in double quotes.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Whenever a record ``r`` of type ``{s : Str}`` is given,
|
Whenever a record ``r`` of type ``{s : Str}`` is given,
|
||||||
``r.s`` is an object of type ``Str``. This is
|
``r.s`` is an object of type ``Str``. This is
|
||||||
@@ -485,6 +478,23 @@ of fields from a record:
|
|||||||
- if //r// : ``{`` ... //p// : //T// ... ``}`` then //r.p// : //T//
|
- if //r// : ``{`` ... //p// : //T// ... ``}`` then //r.p// : //T//
|
||||||
|
|
||||||
|
|
||||||
|
The type ``Str`` is really the type of **token lists**, but
|
||||||
|
most of the time one can conveniently think of it as the type of strings,
|
||||||
|
denoted by string literals in double quotes.
|
||||||
|
|
||||||
|
Notice that
|
||||||
|
``` "hello world"
|
||||||
|
is not recommended as an expression of type ``Str``. It denotes
|
||||||
|
a token with a space in it, and will usually
|
||||||
|
not work with the lexical analysis that precedes parsing. A shorthand
|
||||||
|
exemplified by
|
||||||
|
``` ["hello world and people"] === "hello" ++ "world" ++ "and" ++ "people"
|
||||||
|
can be used for lists of tokens. The expression
|
||||||
|
``` []
|
||||||
|
denotes the empty token list.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
%--!
|
%--!
|
||||||
===An abstract syntax example===
|
===An abstract syntax example===
|
||||||
|
|
||||||
@@ -1274,8 +1284,6 @@ different linearization types of noun phrases and verb phrases:
|
|||||||
We say that the number of ``NP`` is an **inherent feature**,
|
We say that the number of ``NP`` is an **inherent feature**,
|
||||||
whereas the number of ``NP`` is **parametric**.
|
whereas the number of ``NP`` is **parametric**.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
The agreement rule itself is expressed in the linearization rule of
|
The agreement rule itself is expressed in the linearization rule of
|
||||||
the predication structure:
|
the predication structure:
|
||||||
```
|
```
|
||||||
@@ -1295,28 +1303,33 @@ the formation of noun phrases and verb phrases.
|
|||||||
===English concrete syntax with parameters===
|
===English concrete syntax with parameters===
|
||||||
|
|
||||||
```
|
```
|
||||||
concrete PaleolithicEng of Paleolithic = open MorphoEng in {
|
concrete PaleolithicEng of Paleolithic = open Prelude, MorphoEng in {
|
||||||
lincat
|
lincat
|
||||||
S, A = {s : Str} ;
|
S, A = SS ;
|
||||||
VP, CN, V, TV = {s : Number => Str} ;
|
VP, CN, V, TV = {s : Number => Str} ;
|
||||||
NP = {s : Str ; n : Number} ;
|
NP = {s : Str ; n : Number} ;
|
||||||
lin
|
lin
|
||||||
PredVP np vp = {s = np.s ++ vp.s ! np.n} ;
|
PredVP np vp = ss (np.s ++ vp.s ! np.n) ;
|
||||||
UseV v = v ;
|
UseV v = v ;
|
||||||
ComplTV tv np = {s = \\n => tv.s ! n ++ np.s} ;
|
ComplTV tv np = {s = \\n => tv.s ! n ++ np.s} ;
|
||||||
UseA a = {s = \\n => case n of {Sg => "is" ; Pl => "are"} ++ a.s} ;
|
UseA a = {s = \\n => case n of {Sg => "is" ; Pl => "are"} ++ a.s} ;
|
||||||
This cn = {s = "this" ++ cn.s ! Sg } ;
|
This = det Sg "this" ;
|
||||||
Indef cn = {s = "a" ++ cn.s ! Sg} ;
|
Indef = det Sg "a" ;
|
||||||
All cn = {s = "all" ++ cn.s ! Pl} ;
|
All = det Pl "all" ;
|
||||||
Two cn = {s = "two" ++ cn.s ! Pl} ;
|
Two = det Pl "two" ;
|
||||||
ModA a cn = {s = \\n => a.s ++ cn.s ! n} ;
|
ModA a cn = {s = \\n => a.s ++ cn.s ! n} ;
|
||||||
Louse = mkNoun "louse" "lice" ;
|
Louse = mkNoun "louse" "lice" ;
|
||||||
Snake = regNoun "snake" ;
|
Snake = regNoun "snake" ;
|
||||||
Green = {s = "green"} ;
|
Green = ss "green" ;
|
||||||
Warm = {s = "warm"} ;
|
Warm = ss "warm" ;
|
||||||
Laugh = regVerb "laugh" ;
|
Laugh = regVerb "laugh" ;
|
||||||
Sleep = regVerb "sleep" ;
|
Sleep = regVerb "sleep" ;
|
||||||
Kill = regVerb "kill" ;
|
Kill = regVerb "kill" ;
|
||||||
|
oper
|
||||||
|
det : Number -> Str -> Noun -> {s : Str ; n : Number} = \n,d,cn -> {
|
||||||
|
s = d ++ n.s ! n ;
|
||||||
|
n = n
|
||||||
|
} ;
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -1326,22 +1339,18 @@ lin
|
|||||||
===Hierarchic parameter types===
|
===Hierarchic parameter types===
|
||||||
|
|
||||||
The reader familiar with a functional programming language such as
|
The reader familiar with a functional programming language such as
|
||||||
<a href="http://www.haskell.org">Haskell<a> must have noticed the similarity
|
[Haskell http://www.haskell.org] must have noticed the similarity
|
||||||
between parameter types in GF and algebraic datatypes (``data`` definitions
|
between parameter types in GF and **algebraic datatypes** (``data`` definitions
|
||||||
in Haskell). The GF parameter types are actually a special case of algebraic
|
in Haskell). The GF parameter types are actually a special case of algebraic
|
||||||
datatypes: the main restriction is that in GF, these types must be finite.
|
datatypes: the main restriction is that in GF, these types must be finite.
|
||||||
(This restriction makes it possible to invert linearization rules into
|
(It is this restriction that makes it possible to invert linearization rules into
|
||||||
parsing methods.)
|
parsing methods.)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
However, finite is not the same thing as enumerated. Even in GF, parameter
|
However, finite is not the same thing as enumerated. Even in GF, parameter
|
||||||
constructors can take arguments, provided these arguments are from other
|
constructors can take arguments, provided these arguments are from other
|
||||||
parameter types (recursion is forbidden). Such parameter types impose a
|
parameter types - only recursion is forbidden. Such parameter types impose a
|
||||||
hierarchic order among parameters. They are often useful to define
|
hierarchic order among parameters. They are often needed to define
|
||||||
linguistically accurate parameter systems.
|
the linguistically most accurate parameter systems.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
To give an example, Swedish adjectives
|
To give an example, Swedish adjectives
|
||||||
are inflected in number (singular or plural) and
|
are inflected in number (singular or plural) and
|
||||||
@@ -1396,7 +1405,7 @@ file for later use, by the command ``morpho_list = ml``
|
|||||||
```
|
```
|
||||||
> morpho_list -number=25 -cat=V
|
> morpho_list -number=25 -cat=V
|
||||||
```
|
```
|
||||||
The number flag gives the number of exercises generated.
|
The ``number`` flag gives the number of exercises generated.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1409,9 +1418,7 @@ verbs, such as //switch off//. The linearization of
|
|||||||
a sentence may place the object between the verb and the particle:
|
a sentence may place the object between the verb and the particle:
|
||||||
//he switched it off//.
|
//he switched it off//.
|
||||||
|
|
||||||
|
The first of the following judgements defines transitive verbs as
|
||||||
|
|
||||||
The first of the following judgements defines transitive verbs as a
|
|
||||||
**discontinuous constituents**, i.e. as having a linearization
|
**discontinuous constituents**, i.e. as having a linearization
|
||||||
type with two strings and not just one. The second judgement
|
type with two strings and not just one. The second judgement
|
||||||
shows how the constituents are separated by the object in complementization.
|
shows how the constituents are separated by the object in complementization.
|
||||||
@@ -1419,38 +1426,106 @@ shows how the constituents are separated by the object in complementization.
|
|||||||
lincat TV = {s : Number => Str ; s2 : Str} ;
|
lincat TV = {s : Number => Str ; s2 : Str} ;
|
||||||
lin ComplTV tv obj = {s = \\n => tv.s ! n ++ obj.s ++ tv.s2} ;
|
lin ComplTV tv obj = {s = \\n => tv.s ! n ++ obj.s ++ tv.s2} ;
|
||||||
```
|
```
|
||||||
|
There is no restriction in the number of discontinuous constituents
|
||||||
|
(or other fields) a ``lincat`` may contain. The only condition is that
|
||||||
|
the fields must be of finite types, i.e. built from records, tables,
|
||||||
GF currently requires that all fields in linearization records that
|
parameters, and ``Str``, and not functions. A mathematical result
|
||||||
have a table with value type ``Str`` have as labels
|
about parsing in GF says that the worst-case complexity of parsing
|
||||||
either ``s`` or ``s`` with an integer index.
|
increases with the number of discontinuous constituents. Moreover,
|
||||||
|
the parsing and linearization commands only give reliable results
|
||||||
|
for categories whose linearization type has a unique ``Str`` valued
|
||||||
|
field labelled ``s``.
|
||||||
|
|
||||||
|
|
||||||
%--!
|
%--!
|
||||||
==Topics still to be written==
|
==More constructs for concrete syntax==
|
||||||
|
|
||||||
|
|
||||||
|
%--!
|
||||||
===Free variation===
|
===Free variation===
|
||||||
|
|
||||||
|
Sometimes there are many alternative ways to define a concrete syntax.
|
||||||
|
For instance, the verb negation in English can be expressed both by
|
||||||
|
//does not// and //doesn't//. In linguistic terms, these expressions
|
||||||
|
are in **free variation**. The ``variants`` construct of GF can
|
||||||
|
be used to give a list of strings in free variation. For example,
|
||||||
|
```
|
||||||
|
NegVerb verb = {s = variants {["does not"] ; "doesn't} ++ verb.s} ;
|
||||||
|
```
|
||||||
|
An empty variant list
|
||||||
|
```
|
||||||
|
variants {}
|
||||||
|
```
|
||||||
|
can be used e.g. if a word lacks a certain form.
|
||||||
|
|
||||||
|
In general, ``variants`` should be used cautiously. It is not
|
||||||
|
recommended for modules aimed to be libraries, because the
|
||||||
|
user of the library has no way to choose among the variants.
|
||||||
|
Moreover, even though ``variants`` admits lists of any type,
|
||||||
|
its semantics for complex types can cause surprises.
|
||||||
|
|
||||||
|
|
||||||
===Record extension, tuples===
|
|
||||||
|
|
||||||
|
===Record extension and subtyping===
|
||||||
|
|
||||||
|
Record types and records can be **extended** with new fields. For instance,
|
||||||
|
in German it is natural to see transitive verbs as verbs with a case.
|
||||||
|
The symbol ``**`` is used for both constructs.
|
||||||
|
```
|
||||||
|
lincat TV = Verb ** {c : Case} ;
|
||||||
|
|
||||||
|
lin Follow = regVerb "folgen" ** {c = Dative} ;
|
||||||
|
```
|
||||||
|
To extend a record type or a record with a field whose label it
|
||||||
|
already has is a type error.
|
||||||
|
|
||||||
|
A record type //T// is a **subtype** of another one //R//, if //T// has
|
||||||
|
all the fields of //R// and possibly other fields. For instance,
|
||||||
|
an extension of a record type is always a subtype of it.
|
||||||
|
|
||||||
|
If //T// is a subtype of //R//, an object of //T// can be used whenever
|
||||||
|
an object of //R// is required. For instance, a transitive verb can
|
||||||
|
be used whenever a verb is required.
|
||||||
|
|
||||||
|
**Contravariance** means that a function taking an //R// as argument
|
||||||
|
can also be applied to any object of a subtype //T//.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
===Tuples and product types===
|
||||||
|
|
||||||
|
Product types and tuples are syntactic sugar for record types and records:
|
||||||
|
```
|
||||||
|
T1 * ... * Tn === {p1 : T1 ; ... ; pn : Tn}
|
||||||
|
<t1, ..., tn> === {p1 = T1 ; ... ; pn = Tn}
|
||||||
|
```
|
||||||
|
Thus the labels ``p1, p2,...``` are hard-coded.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Predefined types and operations===
|
===Predefined types and operations===
|
||||||
|
|
||||||
|
GF has the following predefined categories in abstract syntax:
|
||||||
|
```
|
||||||
|
cat Int ; -- integers, e.g. 0, 5, 743145151019
|
||||||
|
cat Float ; -- floats, e.g. 0.0, 3.1415926
|
||||||
|
cat String ; -- strings, e.g. "", "foo", "123"
|
||||||
|
```
|
||||||
|
The objects of each of these categories are **literals**
|
||||||
|
as indicated in the comments above. No ``fun`` definition
|
||||||
|
can have a predefined category as its value type, but
|
||||||
|
they can be used as arguments. For example:
|
||||||
|
```
|
||||||
|
fun StreetAddress : Int -> String -> Address ;
|
||||||
|
lin StreetAddress number street = {s = number.s ++ street.s} ;
|
||||||
|
|
||||||
|
-- e.g. (StreetAddress 10 "Downing Street") : Address
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
===Lexers and unlexers===
|
%--!
|
||||||
|
==More features of the module system==
|
||||||
|
|
||||||
|
|
||||||
===Grammars of formal languages===
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Resource grammars and their reuse===
|
===Resource grammars and their reuse===
|
||||||
@@ -1459,20 +1534,45 @@ either ``s`` or ``s`` with an integer index.
|
|||||||
===Interfaces, instances, and functors===
|
===Interfaces, instances, and functors===
|
||||||
|
|
||||||
|
|
||||||
|
===Restricted inheritance and qualified opening===
|
||||||
|
|
||||||
|
|
||||||
|
==More concepts of abstract syntax==
|
||||||
|
|
||||||
|
|
||||||
|
===Dependent types===
|
||||||
|
|
||||||
|
===Higher-order abstract syntax===
|
||||||
|
|
||||||
|
===Semantic definitions===
|
||||||
|
|
||||||
|
===Case study: grammars of formal languages===
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
==Transfer modules==
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
==Practical issues==
|
||||||
|
|
||||||
|
|
||||||
|
===Lexers and unlexers===
|
||||||
|
|
||||||
|
|
||||||
|
===Efficiency of grammars===
|
||||||
|
|
||||||
|
|
||||||
===Speech input and output===
|
===Speech input and output===
|
||||||
|
|
||||||
|
|
||||||
|
===Communicating with GF===
|
||||||
|
|
||||||
|
|
||||||
===Embedded grammars in Haskell, Java, and Prolog===
|
===Embedded grammars in Haskell, Java, and Prolog===
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Dependent types, variable bindings, semantic definitions===
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
===Transfer modules===
|
|
||||||
|
|
||||||
|
|
||||||
===Alternative input and output grammar formats===
|
===Alternative input and output grammar formats===
|
||||||
|
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ import GF.Canon.AbsGFC
|
|||||||
import GF.Canon.GFC
|
import GF.Canon.GFC
|
||||||
import GF.Grammar.PrGrammar
|
import GF.Grammar.PrGrammar
|
||||||
import GF.Canon.CMacros
|
import GF.Canon.CMacros
|
||||||
|
import GF.Canon.Look
|
||||||
import GF.Grammar.LookAbs
|
import GF.Grammar.LookAbs
|
||||||
import GF.Infra.Ident
|
import GF.Infra.Ident
|
||||||
import qualified GF.Grammar.Macros as M
|
import qualified GF.Grammar.Macros as M
|
||||||
@@ -63,13 +64,15 @@ isKnownWord mo = not . null . snd . appMorphoOnly mo
|
|||||||
mkMorpho :: CanonGrammar -> Ident -> Morpho
|
mkMorpho :: CanonGrammar -> Ident -> Morpho
|
||||||
mkMorpho gr a = tcompile $ concatMap mkOne $ allItems where
|
mkMorpho gr a = tcompile $ concatMap mkOne $ allItems where
|
||||||
|
|
||||||
|
comp = ccompute gr [] -- to undo 'values' optimization
|
||||||
|
|
||||||
mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun
|
mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun
|
||||||
mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun
|
mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun
|
||||||
|
|
||||||
-- gather forms of lexical items
|
-- gather forms of lexical items
|
||||||
allLins fun@(m,f) = errVal [] $ do
|
allLins fun@(m,f) = errVal [] $ do
|
||||||
ts <- allLinsOfFun gr (CIQ a f)
|
ts <- allLinsOfFun gr (CIQ a f)
|
||||||
ss <- mapM (mapPairsM (mapPairsM (return . wordsInTerm))) ts
|
ss <- mapM (mapPairsM (mapPairsM (liftM wordsInTerm . comp))) ts
|
||||||
return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs]
|
return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs]
|
||||||
prOne (_,f) c (ps,s) = (s, [prt f +++ tagPrt c +++ unwords (map prt_ ps)])
|
prOne (_,f) c (ps,s) = (s, [prt f +++ tagPrt c +++ unwords (map prt_ ps)])
|
||||||
|
|
||||||
|
|||||||
@@ -532,7 +532,6 @@ q, quit: q
|
|||||||
Each of the flags can have the suffix _subs, which performs
|
Each of the flags can have the suffix _subs, which performs
|
||||||
common subexpression elimination after the main optimization.
|
common subexpression elimination after the main optimization.
|
||||||
Thus, -optimize=all_subs is the most aggressive one.
|
Thus, -optimize=all_subs is the most aggressive one.
|
||||||
|
|
||||||
-optimize=share share common branches in tables
|
-optimize=share share common branches in tables
|
||||||
-optimize=parametrize first try parametrize then do share with the rest
|
-optimize=parametrize first try parametrize then do share with the rest
|
||||||
-optimize=values represent tables as courses-of-values
|
-optimize=values represent tables as courses-of-values
|
||||||
|
|||||||
Reference in New Issue
Block a user