From 3b11763e340428578b676e8c8eb428289d319ec8 Mon Sep 17 00:00:00 2001 From: Aarne Ranta Date: Mon, 7 Apr 2025 17:12:58 +0200 Subject: [PATCH] lecture 3 material and README --- lectures/README.md | 30 ++++++++++++++++++++++ lectures/lecture-03/MorphologyEng.gf | 35 ++++++++++++++++++++++++- lectures/lecture-03/MorphologySwe.gf | 38 ++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 lectures/lecture-03/MorphologySwe.gf diff --git a/lectures/README.md b/lectures/README.md index 5e1d1b9..d75b48f 100644 --- a/lectures/README.md +++ b/lectures/README.md @@ -98,3 +98,33 @@ You can work here for a while. The next step will be to install the RGL, but this can wait a bit. The instructions in https://www.grammaticalframework.org/download/index-3.11.html should work even for the ARM Mac. + +## Lecture 3 + +Course notes: Chapter 2, Chapter 5 + +Analysing UD data with shell commands: +``` +cat treebanks/UD_Swedish-Talbanken/sv_talbanken-ud-train.conllu | cut -f4 | grep -v "#" | sort +cat treebanks/UD_Swedish-Talbanken/sv_talbanken-ud-train.conllu | cut -f4 | grep -v "#" | sort -u +cat treebanks/UD_Swedish-Talbanken/sv_talbanken-ud-train.conllu | cut -f4 | grep -v "#" | sort -u | wc +``` +Again, make sure to learn to use these shell commands! + +Adding deptreepy to the pipeline: +``` +cat treebanks/UD_English-EWT/en_ewt-ud-train.conllu | ./deptreepy.py "statistics POS" +cat treebanks/UD_English-EWT/en_ewt-ud-train.conllu | ./deptreepy.py "match_wordlines (POS X)" +cat treebanks/UD_English-EWT/en_ewt-ud-train.conllu | ./deptreepy.py "statistics FEATS" +cat treebanks/UD_English-EWT/en_ewt-ud-train.conllu | ./deptreepy.py "match_wordlines (POS NOUN) | statistics FEATS" +``` +Download deptreepy and the UD treebanks, and do the same for other treebanks of other languages! + +Confirmed Swedish inflection table by looking up a word at https://svenska.se/ and also learn what is inherent and what is variable. + +Started MorphologyEng.gf and MorphologySwe.gf in lecture-03/. + + + + + diff --git a/lectures/lecture-03/MorphologyEng.gf b/lectures/lecture-03/MorphologyEng.gf index ddfd094..7f9f7d8 100644 --- a/lectures/lecture-03/MorphologyEng.gf +++ b/lectures/lecture-03/MorphologyEng.gf @@ -1 +1,34 @@ -resource MorphologyEng = {} +resource MorphologyEng = { + +param + Number = Sg | Pl ; + +oper + Noun : Type = {s : Number => Str} ; + + mkNoun : Str -> Str -> Noun = \sg, pl -> + {s = table {Sg => sg ; Pl => pl}} ; + + regNoun : Str -> Noun = \sg -> mkNoun sg (sg + "s") ; + + smartNoun : Str -> Noun = \sg -> case sg of { + _ + ("s" | "ch" | "sh") => mkNoun sg (sg + "es") ; + _ + ("ay" | "ey" | "oy" | "uy") => regNoun sg ; + x + "y" => mkNoun sg (x + "ies") ; + _ => regNoun sg + } ; + +-- to test + teacher_N : Noun = {s = table {Sg => "teacher" ; Pl => "teachers"}} ; + + cat_N : Noun = mkNoun "cat" "cats" ; + + dog_N : Noun = regNoun "dog" ; + + bus_N : Noun = smartNoun "bus" ; + baby_N : Noun = smartNoun "baby" ; + fly_N : Noun = smartNoun "fly" ; + + +} + diff --git a/lectures/lecture-03/MorphologySwe.gf b/lectures/lecture-03/MorphologySwe.gf new file mode 100644 index 0000000..d3d92cd --- /dev/null +++ b/lectures/lecture-03/MorphologySwe.gf @@ -0,0 +1,38 @@ +resource MorphologySwe = { + +param + Case = Nom | Gen ; + Definite = Ind | Def ; + Gender = Com | Neut ; + Number = Sg | Pl ; + + NForm = NF Number Definite Case ; -- NF is a constructor + +oper +-- Noun = {s : Number => Definite => Case => Str ; g : Gender} ; + Noun = {s : NForm => Str ; g : Gender} ; + + mkNoun : (sin, sig, sdn, sdg, pin, pig, pdn, pdg : Str) -> Gender -> Noun = + \sin, sig, sdn, sdg, pin, pig, pdn, pdg, g -> { + s = table { + NF Sg Ind Nom => sin ; + NF Sg Ind Gen => sig ; + NF Sg Def Nom => sdn ; + NF Sg Def Gen => sdg ; + NF Pl Ind Nom => pin ; + NF Pl Ind Gen => pig ; + NF Pl Def Nom => pdn ; + NF Pl Def Gen => pdg + } ; + g = g + } ; + + smartNoun : Str -> Noun = \mamma -> case mamma of { + mamm + "a" => mkNoun mamma (mamma + "s") (mamma + "n") (mamma + "ns") + (mamm + "or") (mamm + "ors") (mamm + "orna") (mamm + "ornas") + Com ; + bil => mkNoun bil (bil + "s") (bil + "en") (bil + "ens") + (bil + "ar") (bil + "ars") (bil + "arna") (bil + "arnas") Com + } ; + +} \ No newline at end of file