4 Commits
main ... main

Author SHA1 Message Date
Aarne Ranta
14ece60235 lecture 2 examples 2026-04-01 12:04:05 +02:00
aarneranta
c0bc9c85f2 lecture 1 examples 2026-03-30 17:28:42 +02:00
aarneranta
9d0f650881 last year's lecture material moved to directory 2025 2026-03-30 07:43:08 +02:00
Arianna Masciolini
088f52a0f6 2026 modifications to lab 3 2026-03-29 21:15:06 +02:00
60 changed files with 125 additions and 628 deletions

View File

@@ -1,6 +0,0 @@
((nil
. ((eval
. (progn (add-to-list 'load-path (project-root (project-current)))
(require 'helpers)
(add-hook 'before-save-hook
#'hangul-convert-buffer-to-jamo t))))))

1
.envrc
View File

@@ -1 +0,0 @@
use flake

4
.gitignore vendored
View File

@@ -1,3 +1 @@
*.gfo
.gf-lsp
.direnv
*.gfo

133
flake.lock generated
View File

@@ -1,133 +0,0 @@
{
"nodes": {
"bnfc": {
"flake": false,
"locked": {
"lastModified": 1694438405,
"narHash": "sha256-UmrJlHrMlB4tOxQEnBA1blh3aUV28dJHFZs5LNUbNLU=",
"owner": "BNFC",
"repo": "bnfc",
"rev": "1ead871febe45b8adecad286a90650414e24d8a4",
"type": "github"
},
"original": {
"owner": "BNFC",
"ref": "master",
"repo": "bnfc",
"type": "github"
}
},
"gf": {
"inputs": {
"bnfc": "bnfc",
"gf-core": "gf-core",
"gf-rgl": "gf-rgl",
"gf-wordnet": "gf-wordnet",
"nixpkgs": "nixpkgs"
},
"locked": {
"lastModified": 1695852170,
"narHash": "sha256-xgA9ltioXjh5gYdgmzWACMFeFJu3w4ytMqQlb649oH8=",
"owner": "anka-213",
"repo": "cclaw-nix-stuff",
"rev": "bb591a7d0b6e81f5ae053d2e99a0f8dd9fb5d2a9",
"type": "github"
},
"original": {
"owner": "anka-213",
"ref": "nix-flakes",
"repo": "cclaw-nix-stuff",
"type": "github"
}
},
"gf-core": {
"flake": false,
"locked": {
"lastModified": 1695655790,
"narHash": "sha256-de5Fk5TK5aUL1YQphoYNBrpJj8GRuPJis7komT95+q8=",
"owner": "GrammaticalFramework",
"repo": "gf-core",
"rev": "7d9015e2e159b376cf2ba8332093c9623375557e",
"type": "github"
},
"original": {
"owner": "GrammaticalFramework",
"ref": "master",
"repo": "gf-core",
"type": "github"
}
},
"gf-rgl": {
"flake": false,
"locked": {
"lastModified": 1695810223,
"narHash": "sha256-deTXlcYreUl/pHnFZbjSrZIq8L/XunLTODm7aE9LKSA=",
"owner": "GrammaticalFramework",
"repo": "gf-rgl",
"rev": "f19dcc01f99252feb79823830863389e6cf0fc7f",
"type": "github"
},
"original": {
"owner": "GrammaticalFramework",
"ref": "master",
"repo": "gf-rgl",
"type": "github"
}
},
"gf-wordnet": {
"flake": false,
"locked": {
"lastModified": 1695803720,
"narHash": "sha256-LG5NVsB81Any5P/2WgEpELJKZQmySloHk1F42E7wD1k=",
"owner": "GrammaticalFramework",
"repo": "gf-wordnet",
"rev": "39efb2f91ccb9575c8d96bc272bd2d9f90c1eb23",
"type": "github"
},
"original": {
"owner": "GrammaticalFramework",
"ref": "master",
"repo": "gf-wordnet",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1665056165,
"narHash": "sha256-2C7VfNphJa0FxPoT+suMOmUDVrQ5RIE+NKfDWqElvE4=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "27a89ba43b0fb735ce867e8ab3d2442f8cc61dad",
"type": "github"
},
"original": {
"id": "nixpkgs",
"type": "indirect"
}
},
"nixpkgs_2": {
"locked": {
"lastModified": 1769170682,
"narHash": "sha256-oMmN1lVQU0F0W2k6OI3bgdzp2YOHWYUAw79qzDSjenU=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "c5296fdd05cfa2c187990dd909864da9658df755",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"gf": "gf",
"nixpkgs": "nixpkgs_2"
}
}
},
"root": "root",
"version": 7
}

View File

@@ -1,41 +0,0 @@
{
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
gf.url = "github:anka-213/cclaw-nix-stuff/nix-flakes";
};
outputs = { self, nixpkgs, ... }@inputs:
let
supportedSystems = [
"aarch64-darwin"
"aarch64-linux"
"x86_64-darwin"
"x86_64-linux"
];
each-system = f: nixpkgs.lib.genAttrs supportedSystems (system: f rec {
pkgs = import nixpkgs { inherit system; };
inherit (pkgs) lib;
inherit system;
});
in {
devShells =
each-system ({ pkgs, system, ... }:
let
gf-lsp = import
(pkgs.fetchzip {
url = "https://github.com/anka-213/gf-lsp/archive/refs/tags/1.0.6.0.tar.gz";
hash = "sha256-UAI2qUslzLOWYjTirZJ0y4DZbkPZnVXTY0XtFO8+Rks=";
}) {inherit system;};
in {
default = pkgs.mkShell {
packages = [
inputs.gf.packages.${system}.gf-with-rgl
pkgs.graphviz
gf-lsp.gf-lsp
pkgs.babashka
];
};
});
};
}

View File

@@ -1,56 +0,0 @@
(defun hangul-syllables-to-jamo (str)
"Convert HANGUL SYLLABLES characters in STR to their HANGUL JAMO
equivalents."
(let ((result "")
(i 0))
(while (< i (length str))
(let ((char (aref str i)))
(if (and (>= char #xAC00) (<= char #xD7A3))
;; Hangul syllable
(let* ((code (- char #xAC00))
(lead (/ code (* 21 28)))
(medial (/ (% code (* 21 28)) 28))
(final (% code 28))
(lead-jamo (+ #x1100 lead))
(medial-jamo (+ #x1161 medial))
(final-jamo (if (> final 0) (+ #x11A7 final) nil)))
(setq result
(concat result (char-to-string lead-jamo)
(char-to-string medial-jamo)
(if final-jamo (char-to-string final-jamo) ""))))
;; Not a Hangul syllable
(setq result (concat result (char-to-string char)))))
(setq i (1+ i)))
result))
(defun hangul-convert-region-to-jamo (beg end)
(interactive "r")
(replace-region-contents
beg end (lambda ()
(hangul-syllables-to-jamo (buffer-substring (point-min)
(point-max))))))
(defun hangul-convert-buffer-to-jamo ()
(interactive)
(hangul-convert-region-to-jamo (point-min) (point-max))
(message "Converted Hangul Syllables in buffer to Jamo."))
(require 'dash)
(defconst gf-hangul/choseong
(cl-loop for i from #x1100 to #x1112 collect i))
(defconst gf-hangul/jungseong
(cl-loop for i from #x1161 to #x1175 collect i))
(defconst gf-hangul/batchim
(cl-loop for i from #x11a8 to #x11c2 collect i))
(defun gf-hangul/make-pattern (name seq)
(format "'%s' : pattern Str = #(%s) ;"
name (->> seq
(--map (concat "\"" it "\""))
(-interpose " | ")
(apply #'concat))))
(provide 'helpers)

View File

@@ -49,11 +49,6 @@ abstract MicroLang = {
aPl_Det : Det ; -- indefinite plural ---s
the_Det : Det ; -- definite singular ---s
thePl_Det : Det ; -- definite plural ---s
this_Det : Det ;
thisPl_Det: Det ;
that_Det : Det ;
thatPl_Det: Det ;
UseN : N -> CN ; -- house
AdjCN : AP -> CN -> CN ; -- big house
@@ -167,4 +162,4 @@ fun
yellow_A : A ;
young_A : A ;
}
}

View File

@@ -30,7 +30,7 @@ concrete MicroLangEng of MicroLang = open MicroResEng, Prelude in {
PredVPS np vp = {
s = np.s ! Nom ++ vp.verb.s ! agr2vform np.a ++ vp.compl
} ;
UseV v = {
verb = v ;
compl = [] ;
@@ -62,10 +62,6 @@ concrete MicroLangEng of MicroLang = open MicroResEng, Prelude in {
aPl_Det = {s = "" ; n = Pl} ;
the_Det = {s = "the" ; n = Sg} ;
thePl_Det = {s = "the" ; n = Pl} ;
this_Det = {s = "this"; n = Sg} ;
thisPl_Det = {s = "these"; n = Pl} ;
that_Det = {s = "that"; n = Sg} ;
thatPl_Det = {s = "those"; n = Pl} ;
UseN n = n ;

View File

@@ -1,72 +0,0 @@
resource HangulJamo = open Prelude in {
flags coding=utf8 ;
oper
lemmaToStem : (lemma : Str) -> Str
= \lemma -> case lemma of {
stem + "다" => stem ;
_ => Predef.error ("lemmaToStem was applied to a non-lemma," ++ lemma)
} ;
infinitive : (stem : Str) -> Str
= \stem -> case stem of {
-- 하다 ⇒ 해
init + #ha => init + "해" ;
-- 찾다 ⇒ 찾아, 좁다 ⇒ 좁아
_ + #a_o + #batchim => stem + "아" ;
-- 가다 ⇒ 가,
_ + (#a | #eo) => stem ;
-- 오다 ⇒ 와
init + #o => init + "ᅪ" ;
-- 따르다 ⇒ 따러
init + #eu => init + "ᅥ" ;
-- 기다리다 ⇒ 기다려
init + #i => init + "ᅧ" ;
--
init + #u => init + "ᅯ" ;
-- 맛있다 ⇒ 맛있어
_ => stem + "어"
} ;
present_haeyo : (stem : Str) -> Str
= \stem -> infinitive stem + "요" ;
past_haeyo : (stem : Str) -> Str
= \stem -> infinitive stem + "ᆻ어요" ;
vc_allomorph : (s,vowel,consonant : Str) -> Str
= \s,v,c -> case s of {
_ + #vowel => v ;
_ => c
} ;
oper
a_o : pattern Str = #("ᅡ" | "ᅩ") ;
a : pattern Str = #"ᅡ" ;
o : pattern Str = #"ᅩ" ;
eo : pattern Str = #"ᅥ" ;
eu : pattern Str = #"ᅳ" ;
i : pattern Str = #"ᅵ" ;
u : pattern Str = #"ᅮ" ;
ha : pattern Str = #"하" ;
oper
consonant : pattern Str =
#("ᄀ" | "ᄁ" | "ᄂ" | "ᄃ" | "ᄄ" | "ᄅ" | "ᄆ" | "ᄇ"
| "ᄈ" | "ᄉ" | "ᄊ" | "ᄋ" | "ᄌ" | "ᄍ" | "ᄎ" | "ᄏ"
| "ᄐ" | "ᄑ" | "ᄒ" | "ᆨ" | "ᆩ" | "ᆪ" | "ᆫ" | "ᆬ"
| "ᆭ" | "ᆮ" | "ᆯ" | "ᆰ" | "ᆱ" | "ᆲ" | "ᆳ" | "ᆴ"
| "ᆵ" | "ᆶ" | "ᆷ" | "ᆸ" | "ᆹ" | "ᆺ" | "ᆻ" | "ᆼ"
| "ᆽ" | "ᆾ" | "ᆿ" | "ᇀ" | "ᇁ" | "ᇂ") ;
batchim : pattern Str =
#("ᆨ" | "ᆩ" | "ᆪ" | "ᆫ" | "ᆬ" | "ᆭ" | "ᆮ" | "ᆯ" | "ᆰ"
| "ᆱ" | "ᆲ" | "ᆳ" | "ᆴ" | "ᆵ" | "ᆶ" | "ᆷ" | "ᆸ" | "ᆹ"
| "ᆺ" | "ᆻ" | "ᆼ" | "ᆽ" | "ᆾ" | "ᆿ" | "ᇀ" | "ᇁ" | "ᇂ") ;
choseong : pattern Str =
#("ᄀ" | "ᄁ" | "ᄂ" | "ᄃ" | "ᄄ" | "ᄅ" | "ᄆ" | "ᄇ" | "ᄈ"
| "ᄉ" | "ᄊ" | "ᄋ" | "ᄌ" | "ᄍ" | "ᄎ" | "ᄏ" | "ᄐ" | "ᄑ"
| "ᄒ") ;
vowel : pattern Str =
#("ᅡ" | "ᅢ" | "ᅣ" | "ᅤ" | "ᅥ" | "ᅦ" | "ᅧ" | "ᅨ" | "ᅩ"
| "ᅪ" | "ᅫ" | "ᅬ" | "ᅭ" | "ᅮ" | "ᅯ" | "ᅰ" | "ᅱ" | "ᅲ"
| "ᅳ" | "ᅴ" | "ᅵ") ;
}

View File

@@ -1,165 +0,0 @@
--# -path=.:../abstract
concrete MicroLangKor of MicroLang = open MicroResKor, Prelude in {
flags coding=utf8 ;
-----------------------------------------------------
---------------- Grammar part -----------------------
-----------------------------------------------------
lincat
Utt = {s : Str} ;
S = {s : Str} ;
VP = {verb : Verb ; compl : Str} ; ---s special case of Mini
Comp = Noun ;
AP = Adjective ;
CN = Noun ;
NP = {s : Str} ;
Pron = {s : Str} ;
Det = {s : Str} ;
Prep = {s : Str} ;
V = Verb ;
V2 = Verb2 ;
A = Adjective ;
N = Noun ;
Adv = {s : Str} ;
lin
UttS s = {s = s.s} ;
UttNP np = {s = np.s} ;
PredVPS np vp = {s = np.s ++ vp.compl ++ vp.verb.s ! VPresent} ;
UseV v = {verb = v ; compl = []} ;
ComplV2 v2 np = {verb = v2 ; compl = np.s};
UseN n = n ;
DetCN det cn = {s = det.s ++ cn.s} ;
a_Det = {s = []} ;
aPl_Det = {s = []} ;
the_Det = {s = []} ;
thePl_Det = {s = []} ;
this_Det = {s = "이"} ;
thisPl_Det = {s = "이"} ;
that_Det = {s = "그"} ;
thatPl_Det = {s = "그"} ;
PositA a = a ;
AdjCN ap cn = {s = ap.s ! VAdnomial ++ cn.s} ;
-----------------------------------------------------
---------------- Lexicon part -----------------------
-----------------------------------------------------
-- lin already_Adv = mkAdv "벌써" ;
lin animal_N = mkN "동물" ;
lin apple_N = mkN "사과" ;
lin baby_N = mkN "아기" ;
lin bad_A = mkA "나쁘다" ;
-- lin beer_N = mkN "beer" ;
lin big_A = mkA "크다" ;
-- lin bike_N = mkN "bike" ;
-- lin bird_N = mkN "bird" ;
-- lin black_A = mkA "black" ;
lin blood_N = mkN "피" ;
-- lin blue_A = mkA "blue" ;
-- lin boat_N = mkN "boat" ;
lin book_N = mkN "책" ;
lin boy_N = mkN "소녁" ;
lin bread_N = mkN "빵" ;
-- lin break_V2 = mkV2 (mkV "break" "broke" "broken") ;
lin buy_V2 = mkV2 "사다" ;
lin car_N = mkN "자동차" ;
lin cat_N = mkN "고양이" ;
lin child_N = mkN "어린이" ;
-- lin city_N = mkN "city" ;
lin clean_A = mkA "정소하다" ;
lin clever_A = mkA "똑똑하다" ;
-- lin cloud_N = mkN "cloud" ;
-- lin cold_A = mkA "차가운" ;
lin come_V = mkV "오다" ;
lin computer_N = mkN "컴퓨터" ;
-- lin cow_N = mkN "cow" ;
-- lin dirty_A = mkA "더러운" ;
lin dog_N = mkN "개" ;
lin drink_V2 = mkV2 "마시다" ;
lin eat_V2 = mkV2 "먹다" ;
lin find_V2 = mkV2 "찾다" ;
-- lin fire_N = mkN "fire" ;
lin fish_N = mkN "생선" ;
-- lin flower_N = mkN "flower" ;
lin friend_N = mkN "진구" ;
lin girl_N = mkN "소녀" ;
-- lin good_A = mkA "좋은" ;
lin go_V = mkV "가다" ;
-- lin grammar_N = mkN "grammar" ;
-- lin green_A = mkA "green" ;
-- lin heavy_A = mkA "heavy" ;
-- lin horse_N = mkN "horse" ;
-- lin hot_A = mkA "hot" ;
lin house_N = mkN "집" ;
-- lin john_PN = mkPN "John" ;
-- lin jump_V = mkV "jump" ;
lin kill_V2 = mkV2 "죽다" ;
-- lin know_V2 = mkV "알다" ;
lin language_N = mkN "언어" ;
-- lin live_V = mkV "live" ;
-- lin love_V2 = mkV2 (mkV "love") ;
lin man_N = mkN "남자" ;
lin milk_N = mkN "우유" ;
lin music_N = mkN "음악" ;
-- lin new_A = mkA "new" ;
lin now_Adv = mkAdv "지금" ;
-- lin old_A = mkA "낡안" ;
-- lin paris_PN = mkPN "Paris" ;
lin play_V = mkV "놀다" ;
lin read_V2 = mkV2 "읽다" ;
-- lin ready_A = mkA "ready" ;
-- lin red_A = mkA "red" ;
lin river_N = mkN "강" ;
-- lin run_V = mkV "run" "ran" "run" ;
lin sea_N = mkN "바다" ;
lin see_V2 = mkV2 "보다" ;
-- lin ship_N = mkN "ship" ;
lin sleep_V = mkV "자다" ;
-- lin small_A = mkA "작은" ;
lin star_N = mkN "별" ;
lin swim_V = mkV "수영하다" ;
lin teach_V2 = mkV2 "가르치다" ;
-- lin train_N = mkN "train" ;
-- lin travel_V = mkV "travel" ;
-- lin tree_N = mkN "tree" ;
-- lin understand_V2 = mkV2 (mkV "understand" "understood" "understood") ;
lin wait_V2 = mkV2 "기다리다" ;
lin walk_V = mkV "걷다" ;
-- lin warm_A = mkA "따뜻한" ;
lin water_N = mkN "물" ;
-- lin white_A = mkA "하얗은" ;
-- lin wine_N = mkN "wine" ;
lin woman_N = mkN "여자" ;
-- lin yellow_A = mkA "yellow" ;
-- lin young_A = mkA "young" ;
---------------------------
-- Paradigms part ---------
---------------------------
oper
mkN : Str -> Noun
= \s -> lin N {s = s} ;
mkV : Str -> V = \lemma -> lin V (regVerb lemma) ;
mkV2 = overload {
mkV2 : Str -> V2
= \lemma -> lin V2 (mkV lemma) ;
mkV2 : V -> V2
= \v -> lin V2 v ;
} ;
mkAdv : Str -> Adv
= \s -> lin Adv {s = s} ;
mkPrep : Str -> Prep
= \s -> lin Prep {s = s} ;
mkA : Str -> A
= \lemma -> lin A (regAdjective lemma) ;
}

View File

@@ -1,34 +0,0 @@
resource MicroResKor = open Prelude, HangulJamo in {
param
VForm = VLemma | VPresent | VPast | VAdnomial ;
oper
Noun : Type = {s : Str} ;
Verb : Type = {s : VForm => Str} ;
Verb2 : Type = Verb ;
Adjective : Type = Verb ;
lemmaToStem : (lemma : Str) -> Str
= \lemma -> case lemma of {
stem + "다" => stem ;
_ => Predef.error ("lemmaToStem was applied to a non-lemma," ++ lemma)
} ;
reg : (descriptive : Bool) -> (lemma : Str) -> Verb
= \descriptive,lemma ->
let stem = lemmaToStem lemma ;
in {
s = table {
VLemma => lemma ;
VPresent => present_haeyo stem ;
VPast => past_haeyo stem ;
VAdnomial =>
if_then_else Str descriptive
(stem + vc_allomorph stem "ᆫ" "은")
(stem + "는")
}
} ;
regVerb : (lemma : Str) -> Verb = reg False ;
regAdjective : (lemma : Str) -> Adjective = reg True ;
}

View File

@@ -12,28 +12,27 @@ Go to [universaldependencies.org](https://universaldependencies.org/) and downlo
Choose a short (5-10 tokens) and a long (>25 words) sentence and convert it from CoNNL-U to a graphical trees by hand.
### Step 2: choose a corpus
Choose one of the two corpora provided in this folder:
Choose a corpus of 25+ sentences.
- [`comp-syntax-corpus-english.txt`](comp-syntax-corpus-english.txt) is a combination of __English__ sentences from different sources, including [the Parallel UD treebank (PUD)](https://github.com/UniversalDependencies/UD_English-PUD/tree/master). If you want to cheat - or just check your answers - you can look for them in the official treebank. You can also compare your analyses with those of an automatic parser, such as [UDPipe](https://lindat.mff.cuni.cz/services/udpipe/), which you can try directly in your browser. These automatic analyses must of course be taken with a grain of salt
- [`comp-syntax-corpus-swedish.txt`](comp-syntax-corpus-swedish.txt) consists of teacher-corrected sentences from the [__Swedish__ Learner Language (SweLL) corpus](https://spraakbanken.gu.se/en/resources/swell-gold), which is currently being annotated in UD for the first time.
In this case, there is no "gold standard" to check your answers against, but you can still compare your solutions with [UDPipe](https://lindat.mff.cuni.cz/services/udpipe/)'s automatic analyses.
If you want to start with __English__, you can use[`comp-syntax-corpus-english.txt`](comp-syntax-corpus-english.txt) is a combination of sentences from different sources, including [the Parallel UD treebank (PUD)](https://github.com/UniversalDependencies/UD_English-PUD/tree/master). If you want to cheat - or just check your answers - you can look for them in the official treebank. You can also compare your analyses with those of an automatic parser, such as [UDPipe](https://lindat.mff.cuni.cz/services/udpipe/), which you can try directly in your browser. These automatic analyses must of course be taken with a grain of salt. Note that the first few sentences of this corpus are pre-tokenized and POS-tagged. Each token is in the form `word:<UPOS>`.
In both corpora, the first few sentences are pre-tokenized and POS-tagged. Each token is in the form
`word:<UPOS>`.
If you want to work with __Swedish__ and might be interested in contributing to an [official UD treebank](https://github.com/universaldependencies/UD_Swedish-SweLL), ask Arianna for [a sample of the Swedish Learner Language corpus](https://spraakbanken.gu.se/en/resources/swell).
If you have other data in mind that you think would be interesting to annotate in UD (not necessarily in English or Swedish), don't hesitate to bring it up during a lab session!
### Step 3: annotate
For each sentence in the corpus, the annotation tasks consists in:
1. analyzing the sentence in UD
2. translating it to a language of your choice
2. translating it to a language of your choice (as long as one of the two versions is in English or Swedish)
3. analyzing your translation
The only required fields are `ID`, `FORM`, `UPOS`, `HEAD` and `DEPREL`.
In the end, you will submit two parallel CoNLL-U files, one containing the analyses of the source sentences and one for the analyses of the translations.
To produce the CoNLL-U files, you may work in your text editor (if you use Visual Studio Code, you can use the [vscode-conllu](https://marketplace.visualstudio.com/items?itemName=lgrobol.vscode-conllu) to get syntax highlighting), use a spreadsheet program and then export to TSV, or use a dedicated graphical annotation tool such as [Arborator](https://arborator.grew.fr/#/).
To produce the CoNLL-U files, you may work in your text editor (you can usually get syntax highlighting by changing the extension to `.tsv`), use a spreadsheet program and then export to TSV, or use a dedicated graphical annotation tool such as [Arborator](https://arborator.grew.fr/#/) (helpful but unstable!).
If you work in your text editor, it might be easier to first write a simplified CoNLL-U, with just the fields `ID`, `FORM`, `UPOS`, `HEAD` and `DEPREL`, separated by tabs, and then expand it to full CoNLL-U with [this script](https://gist.github.com/harisont/612a87d20f729aa3411041f873367fa2) (or similar).
@@ -54,7 +53,7 @@ To fully comply with the CoNLL-U standard, comment lines should consist of key-v
# comment = your comment here
```
but for this assigment lines like
but for this assignment lines like
```
# your comment here
@@ -63,24 +62,14 @@ but for this assigment lines like
are perfectly acceptable too.
### Step 4: make sure your files match the CoNLL-U specification
Once you have full CoNLL, you can use [deptreepy](https://github.com/aarneranta/deptreepy/), [STUnD](https://harisont.github.io/STUnD/) or [the official online CoNNL-U viewer](https://universaldependencies.org/conllu_viewer.html) to visualize it.
With deptreepy, you will need to issue the command
`cat my-file.conllu | python deptreepy.py visualize_conllu > my-file.html`
which creates an HTML file you can open in you web browser.
If you can visualize your trees with any of these tools, that's a very good sign that your file _more or less_ matches the CoNNL-U format!
As a last step, validate your treebank with the official UD validator.
Check your treebank with the official UD validator.
To do that, clone or download the [UD tools repository](https://github.com/UniversalDependencies/tools), move inside the corresponding folder and run
```
python validate.py PATH-TO-YOUR-TREEBANK.conllu --lang=2-LETTER-LANGCODE-FOR-YOUR-LANGUAGE --level=1
python validate.py PATH-TO-YOUR-TREEBANK.conllu --lang=2-LETTER-LANGCODE-FOR-YOUR-LANGUAGE --level=2
```
If you want to check for more subtle errors, you can [go up a few levels](https://harisont.github.io/gfaqs.html#ud-validator).
Level 2 should be enough for part 2, but you can [go up a few levels](https://harisont.github.io/gfaqs.html#ud-validator) to check for more subtle errors.
Submit the two CoNLL-U files on Canvas.
@@ -91,7 +80,7 @@ If you want to install MaChAmp on your own computer, keep in mind that very old
For more information, see [here](https://github.com/machamp-nlp/machamp/issues/42).
### Step 1: setting up MaChAmp
1. optional, but recommended: create a Python virtual environment with the command
1. create a Python virtual environment with the command
```
python -m venv ENVNAME
```
@@ -124,7 +113,7 @@ python scripts/misc/cleanconl.py PATH-TO-A-DATASET-SPLIT
This replaces the contents of your input file with a "cleaned up" version of the same treebank.
### Step 3: training
Copy `compsyn.json` to `machamp/configs` and replace the traning and development data paths with the paths to the files you selected/created in step 2.
Copy `compsyn.json` to `machamp/configs` and replace the training and development data paths with the paths to the files you selected/created in step 2.
You can now train your model by running
@@ -152,4 +141,4 @@ Then, use the `machamp/scripts/misc/conll18_ud_eval.py` script to evaluate the s
python scripts/misc/conll18_ud_eval.py PATH-TO-YOUR-PART1-TREEBANK predictions/OUTPUT-FILE-NAME.conllu
```
On Canvas, submit the training logs, the predictions and the output of `conll18_ud_eval.py`, along with a short text summarizing your considerations on the performance of the parser, based on the predictions themselves and on the output of the results of the evaluation.
On Canvas, submit the training logs, the predictions and the output of `conll18_ud_eval.py`, along with a short text summarizing your considerations on the performance of the parser, based on the predictions themselves and on the automatic evaluation.

View File

@@ -6,7 +6,7 @@ The:<DET> study:<NOUN> of:<ADP> volcanoes:<NOUN> is:<AUX> called:<VERB> volcanol
It:<PRON> was:<AUX> conducted:<VERB> just:<ADV> off:<ADP> the:<DET> Mexican:<ADJ> coast:<NOUN> from:<ADP> April:<PROPN> to:<ADP> June:<PROPN> .:<PUNCT>
":<PUNCT> Her:<PRON> voice:<NOUN> literally:<ADV> went:<VERB> around:<ADP> the:<DET> world:<NOUN> ,:<PUNCT> ":<PUNCT> Leive:<PROPN> said:<VERB> .:<PUNCT>
A:<DET> witness:<NOUN> told:<VERB> police:<NOUN> that:<SCONJ> the:<DET> victim:<NOUN> had:<AUX> attacked:<VERB> the:<DET> suspect:<NOUN> in:<ADP> April:<PROPN> .:<PUNCT>
It:<PRON> 's:<AUX> most:<ADV> obvious:<ADJ> when:<SSUBJ> a:<DET> celebrity:<NOUN> 's:<PART> name:<NOUN> is:<AUX> initially:<ADV> quite:<ADV> rare:<ADJ> .:<PUNCT>
It:<PRON> 's:<AUX> most:<ADV> obvious:<ADJ> when:<SCONJ> a:<DET> celebrity:<NOUN> 's:<PART> name:<NOUN> is:<AUX> initially:<ADV> quite:<ADV> rare:<ADJ> .:<PUNCT>
This:<PRON> has:<AUX> not:<PART> stopped:<VERB> investors:<NOUN> flocking:<VERB> to:<PART> put:<VERB> their:<PRON> money:<NOUN> in:<ADP> the:<DET> funds:<NOUN> .:<PUNCT>
This:<DET> discordance:<NOUN> between:<ADP> economic:<ADJ> data:<NOUN> and:<CCONJ> political:<ADJ> rhetoric:<NOUN> is:<AUX> familiar:<ADJ> ,:<PUNCT> or:<CCONJ> should:<AUX> be:<AUX> .:<PUNCT>
The:<DET> feasibility:<NOUN> study:<NOUN> estimates:<VERB> that:<SCONJ> it:<PRON> would:<AUX> take:<VERB> passengers:<NOUN> about:<ADV> four:<NUM> minutes:<NOUN> to:<PART> cross:<VERB> the:<DET> Potomac:<PROPN> River:<PROPN> on:<ADP> the:<DET> gondola:<NOUN> .:<PUNCT>

View File

@@ -1,24 +0,0 @@
Jag:<PRON> tycker:<VERB> att:<SCONJ> du:<PRON> ska:<AUX> börja:<VERB> med:<ADP> en:<DET> språkkurs:<NOUN>.:<PUNCT>
Flerspråkighet:<NOUN> gynnar:<VERB> oss:<PRON> även:<ADV> på:<ADP> arbetsmarknaden:<NOUN>.:<PUNCT>
Språket:<NOUN> är:<AUX> lätt:<ADJ> och:<CCONJ> jag:<PRON> kan:<AUX> läsa:<VERB> utan:<ADP> något:<DET> problem:<PRON>.:<PUNCT>
Man:<PRON> känner:<VERB> sig:<PRON> ensam:<ADJ> när:<SCONJ> man:<PRON> inte:<PART> kan:<AUX> prata:<VERB> språket:<NOUN> bra:<ADV>.:<PUNCT>
Det:<PRON> kan:<AUX> vara:<AUX> kroppsspråk:<NOUN> men:<CCONJ> främst:<ADV> sker:<VERB> det:<PRON> genom:<ADP> talet:<NOUN>.
Språket:<NOUN> är:<AUX> nyckeln:<NOUN> till:<ADP> alla:<DET> låsta:<ADJ> dörrar:<NOUN>,:<PUNCT> har:<AUX> vi:<PRON> hört:<VERB> flera:<ADJ> gånger:<NOUN>.:<PUNCT>
Att:<PART> kunna:<VERB> ett:<DET> språk:<NOUN> är:<AUX> en:<DET> av:<ADP> de:<DET> viktigaste:<ADJ> och:<CCONJ> värdefullaste:<ADJ> egenskaper:<NOUN> en:<DET> människa:<NOUN> kan:<AUX> ha:<VERB> så:<SCONJ> det:<PRON> är:<AUX> värt:<ADJ> mer:<ADV> än:<ADP> vad:<PRON> man:<PRON> tror:<VERB>.:<PUNCT>
Med:<ADP> andra:<ADJ> ord:<NOUN>,:<PUNCT> språket:<NOUN> är:<AUX> nyckeln:<NOUN> till:<ADP> alla:<DET> låsta:<ADJ> dörrar:<NOUN>,:<PUNCT> men:<CCONJ> det:<PRON> finns:<VERB> viktigare:<ADJ> saker:<NOUN> att:<PART> satsa:<VERB> på:<ADP> som:<PRON> jag:<PRON> kommer:<AUX> att:<PART> nämna:<VERB> längre:<ADV> ner:<ADV>.:<PUNCT>
Han:<PRON> kom:<VERB> till:<ADP> Sverige:<PROPN> för:<ADP> 4:<NUM> år:<NOUN> sedan:<ADV>,:<PUNCT> han:<PRON> kunde:<AUX> inte:<PART> tala:<VERB> svenska:<ADJ> språket:<NOUN>,<PUNCT> ingen:<DET> engelska:<NOUN>,:<PUNCT> han:<PRON> kunde:<AUX> i:<ADP> princip:<NOUN> inte:<PART> kommunicera:<VERB> med:<ADP> någon:<PRON> här<ADV>.:<PUNCT>
För:<ADP> det:<DET> första:<ADJ> hänger:<VERB> språket:<NOUN> ihop:<ADV> med:<ADP> tillhörighet:<NOUN>,:<PUNCT> särskilt:<ADV> för:<ADP> de:<DET> nya:<ADJ> invandrare:<NOUN> som:<PRON> har:<AUX> bestämt:<VERB> sig:<PRON> för:<ADP> att:<PART> flytta:<VERB> och:<CCONJ> bosätta:<VERB> sig:<PRON> i:<ADP> Sverige:<PROPN>.:<PUNCT>
Om:<SCONJ> alla:<PRON> hade:<AUX> talat:<VERB> samma:<DET> språk:<NOUN> hade:<AUX> det:<PRON> förmodligen:<ADV> inte:<PART> funnits:<VERB> något:<DET> utanförskap:<NOUN>,:<PUNCT> utan:<CCONJ> man:<PRON> hade:<AUX> fått:<VERB> en:<DET> typ:<NOUN> av:<ADP> gemenskap:<NOUN> där:<ADV> man:<PRON> delar:<VERB> samma:<DET> kultur:<NOUN>.:<PUNCT>
Att:<PART> lära:<VERB> sig:<PRON> ett:<DET> språk:<NOUN> är:<AUX> väldigt:<ADV> svårt:<ADJ>,:<PUNCT> speciellt:<ADV> för:<ADP> vuxna:<ADJ> människor:<NOUN>,:<PUNCT> och:<CCONJ> eftersom:<SCONJ> majoritetsspråket:<NOUN> blir:<VERB> en:<DET> viktig:<ADJ> del:<NOUN> i:<ADP> en:<DET> persons:<NOUN> liv:<NOUN> räcker:<VERB> det:<PRON> inte:<PART> att:<PART> tala:<VERB> det:<PRON> på:<ADP> söndagar:<NOUN> utan:<CCONJ> det:<PRON> måste:<AUX> läras:<VERB> in:<PART> som:<SCONJ> ett:<DET> modersmål:<NOUN>,:<PUNCT> vilket:<PRON> finansieras:<VERB> av:<ADP> oss:<PRON> skattebetalare:<NOUN>.:<PUNCT>
Avslutningsvis så vill jag förmedla att vi bör rädda världen innan språken.
Språket är ganska enkelt, och det är lätt att förstå vad romanen handlar om.
Det är även kostsamt för staten att se till att dessa minoritetsspråk lever kvar.
Låt mig säga att det är inte för sent att rädda de små språken, vi måste ta steget nu.
Att hålla dessa minoritetsspråk vid liv är både slöseri med tid och mycket ekonomiskt krävande.
Jag tackar alla lärare på Sfi som hjälper oss för att vi ska kunna bli bättre på svenska språket.
Språk skapades för flera tusen år sedan och vissa språk har tynat bort medan några nya har skapats.
Samhället behöver flerspråkiga och vägen till kommunikation och till att begripa andras kulturer är ett språk.
Om man kan fler språk har man fler möjligheter att använda sig av dem vilket leder till utveckling.
Därför tycker jag att vi bör införa ett förbud mot främmande språk i statliga myndigheter och föreningar.
Men jag anser först och främst att språket är som själen, det som ger oss livskraft, säregenhet och karaktär.
På Sveriges riksdags hemsida kan man läsa om hur Sverige bidrar med att skydda dessa språk med hjälp av statligt bidrag.

View File

Before

Width:  |  Height:  |  Size: 81 KiB

After

Width:  |  Height:  |  Size: 81 KiB

View File

Before

Width:  |  Height:  |  Size: 57 KiB

After

Width:  |  Height:  |  Size: 57 KiB

View File

Before

Width:  |  Height:  |  Size: 64 KiB

After

Width:  |  Height:  |  Size: 64 KiB

View File

Before

Width:  |  Height:  |  Size: 337 KiB

After

Width:  |  Height:  |  Size: 337 KiB

View File

Before

Width:  |  Height:  |  Size: 160 KiB

After

Width:  |  Height:  |  Size: 160 KiB

View File

Before

Width:  |  Height:  |  Size: 51 KiB

After

Width:  |  Height:  |  Size: 51 KiB

View File

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 2.1 KiB

View File

Before

Width:  |  Height:  |  Size: 81 KiB

After

Width:  |  Height:  |  Size: 81 KiB

View File

Before

Width:  |  Height:  |  Size: 258 KiB

After

Width:  |  Height:  |  Size: 258 KiB

View File

Before

Width:  |  Height:  |  Size: 60 KiB

After

Width:  |  Height:  |  Size: 60 KiB

View File

@@ -0,0 +1,13 @@
S ::= NP VP ;
VP ::= V NP ;
NP ::= Det CN ;
CN ::= A CN ;
CN ::= N ;
NP ::= Pron ;
Det ::= "the" ;
A ::= "black" ;
N ::= "cat" ;
V ::= "sees" ;
Pron ::= "us" ;

View File

@@ -0,0 +1,19 @@
S ::= NP_nom_sg VP_sg ;
S ::= NP_nom_pl VP_pl ;
VP_sg ::= V_sg NP_acc ;
VP_pl ::= V_pl NP_acc ;
NP_nom_sg ::= Det CN ;
NP_acc ::= Det CN ;
CN ::= A CN ;
CN ::= N ;
NP_nom_pl ::= Pron_nom_pl ;
NP_acc ::= Pron_acc ;
Det ::= "the" ;
A ::= "black" ;
N ::= "cat" ;
V_sg ::= "sees" ;
V_pl ::= "see" ;
Pron_nom_pl ::= "we" ;
Pron_acc ::= "us" ;

View File

@@ -0,0 +1,21 @@
abstract Nano = {
cat
S ; NP ; VP ; CN ;
Det ; Pron ; A ; N ; V2 ;
fun
PredVPS : NP -> VP -> S ;
ComplV2 : V2 -> NP -> VP ;
DetCN : Det -> CN -> NP ;
AdjCN : A -> CN -> CN ;
UseCN : N -> CN ;
UsePron : Pron -> NP ;
the_Det : Det ;
black_A : A ;
cat_N : N ;
see_V2 : V2 ;
we_Pron : Pron ;
}

View File

@@ -0,0 +1,33 @@
concrete NanoEng of Nano = {
lincat
S = Str ;
NP, Pron = {s : Case => Str ; n : Number} ;
VP, V2 = Number => Str ;
CN, Det = Str ;
A, N = Str ;
lin
PredVPS np vp = np.s ! Nom ++ vp ! np.n ;
ComplV2 v2 np = table {n => v2 ! n ++ np.s ! Acc} ;
DetCN det cn =
{s = table {c => det ++ cn} ; n = Sg} ;
AdjCN a cn = a ++ cn ;
UseCN n = n ;
UsePron pron = pron ;
the_Det = "the" ;
black_A = "black" ;
cat_N = "cat" ;
see_V2 =
table {Sg => "sees" ; Pl => "see"} ;
we_Pron = {
s = table {Nom => "we" ; Acc => "us"} ;
n = Pl
} ;
param
Number = Sg | Pl ;
Case = Nom | Acc ;
}

View File

@@ -0,0 +1,21 @@
concrete NanoIta of Nano = {
lincat
S, NP, VP, CN,
Det, Pron, A, N, V2 = Str ;
lin
PredVPS np vp = np ++ vp ;
ComplV2 v2 np = np ++ v2 ;
DetCN det cn = det ++ cn ;
AdjCN a cn = cn ++ a ;
UseCN n = n ;
UsePron pron = pron ;
the_Det = "il" ;
black_A = "nero" ;
cat_N = "gatto" ;
see_V2 = "vede" ;
we_Pron = "ci" ;
}

View File

@@ -1,12 +0,0 @@
#!/usr/bin/env bb
(require '[babashka.fs :as fs]
'[clojure.string :as str])
(doseq [f (fs/glob "lab1/grammar/korean" "**.gf")
[line-number line] (map-indexed (fn [i x] [i x])
(-> f str slurp str/split-lines))]
(when (re-find #"\p{block=HangulJamo}" line)
(printf "JAMO: %s:%d: %s\n" (str f) line-number line))
(when (re-find #"\p{block=HangulSyllables}" line)
(printf "SYLLABLE: %s:%d: %s\n" (str f) line-number line)))

View File

@@ -1,44 +0,0 @@
#!/usr/bin/env bb
(require '[babashka.fs :as fs]
'[clojure.string :as str])
(defn syllables->jamo [s]
"Convert Hangul syllables in string S to their jamo components."
(->> s
(map int)
(mapcat
(fn [c]
(if (<= 0xAC00 c 0xD7A3)
;; Hangul syllable - decompose
(let [code (- c 0xAC00)
lead (quot code (* 21 28))
medial (quot (mod code (* 21 28)) 28)
final (mod code 28)
lead-jamo (+ 0x1100 lead)
medial-jamo (+ 0x1161 medial)
final-jamo (if (> final 0) (+ 0x11A7 final) nil)]
(remove nil? [lead-jamo medial-jamo final-jamo]))
;; Not a Hangul syllable
[c])))
(map char)
(apply str)))
(defn -main [& args]
(if (seq args)
(doseq [f args]
(let [x (-> (slurp f)
(str/replace #"\p{block=HangulSyllables}+"
syllables->jamo))]
(spit f x)))
(loop [line (read-line)]
(when line
(-> line
(str/replace #"\p{block=HangulSyllables}+"
syllables->jamo)
println)
(recur (read-line))))))
(when (= *file* (System/getProperty "babashka.file"))
(apply -main *command-line-args*))