lecture 3 material and README

This commit is contained in:
Aarne Ranta
2025-04-07 17:12:58 +02:00
parent d3544950d3
commit 3b11763e34
3 changed files with 102 additions and 1 deletions

View File

@@ -98,3 +98,33 @@ You can work here for a while.
The next step will be to install the RGL, but this can wait a bit. The next step will be to install the RGL, but this can wait a bit.
The instructions in https://www.grammaticalframework.org/download/index-3.11.html should work even for the ARM Mac. The instructions in https://www.grammaticalframework.org/download/index-3.11.html should work even for the ARM Mac.
## Lecture 3
Course notes: Chapter 2, Chapter 5
Analysing UD data with shell commands:
```
cat treebanks/UD_Swedish-Talbanken/sv_talbanken-ud-train.conllu | cut -f4 | grep -v "#" | sort
cat treebanks/UD_Swedish-Talbanken/sv_talbanken-ud-train.conllu | cut -f4 | grep -v "#" | sort -u
cat treebanks/UD_Swedish-Talbanken/sv_talbanken-ud-train.conllu | cut -f4 | grep -v "#" | sort -u | wc
```
Again, make sure to learn to use these shell commands!
Adding deptreepy to the pipeline:
```
cat treebanks/UD_English-EWT/en_ewt-ud-train.conllu | ./deptreepy.py "statistics POS"
cat treebanks/UD_English-EWT/en_ewt-ud-train.conllu | ./deptreepy.py "match_wordlines (POS X)"
cat treebanks/UD_English-EWT/en_ewt-ud-train.conllu | ./deptreepy.py "statistics FEATS"
cat treebanks/UD_English-EWT/en_ewt-ud-train.conllu | ./deptreepy.py "match_wordlines (POS NOUN) | statistics FEATS"
```
Download deptreepy and the UD treebanks, and do the same for other treebanks of other languages!
Confirmed Swedish inflection table by looking up a word at https://svenska.se/ and also learn what is inherent and what is variable.
Started MorphologyEng.gf and MorphologySwe.gf in lecture-03/.

View File

@@ -1 +1,34 @@
resource MorphologyEng = {} resource MorphologyEng = {
param
Number = Sg | Pl ;
oper
Noun : Type = {s : Number => Str} ;
mkNoun : Str -> Str -> Noun = \sg, pl ->
{s = table {Sg => sg ; Pl => pl}} ;
regNoun : Str -> Noun = \sg -> mkNoun sg (sg + "s") ;
smartNoun : Str -> Noun = \sg -> case sg of {
_ + ("s" | "ch" | "sh") => mkNoun sg (sg + "es") ;
_ + ("ay" | "ey" | "oy" | "uy") => regNoun sg ;
x + "y" => mkNoun sg (x + "ies") ;
_ => regNoun sg
} ;
-- to test
teacher_N : Noun = {s = table {Sg => "teacher" ; Pl => "teachers"}} ;
cat_N : Noun = mkNoun "cat" "cats" ;
dog_N : Noun = regNoun "dog" ;
bus_N : Noun = smartNoun "bus" ;
baby_N : Noun = smartNoun "baby" ;
fly_N : Noun = smartNoun "fly" ;
}

View File

@@ -0,0 +1,38 @@
resource MorphologySwe = {
param
Case = Nom | Gen ;
Definite = Ind | Def ;
Gender = Com | Neut ;
Number = Sg | Pl ;
NForm = NF Number Definite Case ; -- NF is a constructor
oper
-- Noun = {s : Number => Definite => Case => Str ; g : Gender} ;
Noun = {s : NForm => Str ; g : Gender} ;
mkNoun : (sin, sig, sdn, sdg, pin, pig, pdn, pdg : Str) -> Gender -> Noun =
\sin, sig, sdn, sdg, pin, pig, pdn, pdg, g -> {
s = table {
NF Sg Ind Nom => sin ;
NF Sg Ind Gen => sig ;
NF Sg Def Nom => sdn ;
NF Sg Def Gen => sdg ;
NF Pl Ind Nom => pin ;
NF Pl Ind Gen => pig ;
NF Pl Def Nom => pdn ;
NF Pl Def Gen => pdg
} ;
g = g
} ;
smartNoun : Str -> Noun = \mamma -> case mamma of {
mamm + "a" => mkNoun mamma (mamma + "s") (mamma + "n") (mamma + "ns")
(mamm + "or") (mamm + "ors") (mamm + "orna") (mamm + "ornas")
Com ;
bil => mkNoun bil (bil + "s") (bil + "en") (bil + "ens")
(bil + "ar") (bil + "ars") (bil + "arna") (bil + "arnas") Com
} ;
}