diff --git a/lectures/lecture-n/arianna/.gitignore b/lectures/lecture-n/arianna/.gitignore new file mode 100644 index 0000000..4494f4b --- /dev/null +++ b/lectures/lecture-n/arianna/.gitignore @@ -0,0 +1,94 @@ +## Core latex/pdflatex auxiliary files: +*.aux +*.lof +*.log +*.lot +*.fls +*.out +*.toc + +## Intermediate documents: +*.dvi +# these rules might exclude image files for figures etc. +# *.ps +# *.eps +# *.pdf + +## Bibliography auxiliary files (bibtex/biblatex/biber): +*.bbl +*.bcf +*.blg +*-blx.aux +*-blx.bib +*.run.xml + +## Build tool auxiliary files: +*.fdb_latexmk +*.synctex.gz +*.synctex.gz(busy) +*.pdfsync + +## Auxiliary and intermediate files from other packages: + +# algorithms +*.alg +*.loa + +# amsthm +*.thm + +# beamer +*.nav +*.snm +*.vrb + +# glossaries +*.acn +*.acr +*.glg +*.glo +*.gls + +# hyperref +*.brf + +# listings +*.lol + +# makeidx +*.idx +*.ilg +*.ind +*.ist + +# minitoc +*.maf +*.mtc +*.mtc0 + +# minted +*.pyg + +# nomencl +*.nlo + +# sagetex +*.sagetex.sage +*.sagetex.py +*.sagetex.scmd + +# sympy +*.sout +*.sympy +sympy-plots-for-*.tex/ + +# todonotes +*.tdo + +# xindy +*.xdy + +# useless files +color_scheme.png +identicon.png +._wordcount_selection.tex \ No newline at end of file diff --git a/lectures/lecture-n/arianna/beamerthemelucid.sty b/lectures/lecture-n/arianna/beamerthemelucid.sty new file mode 100644 index 0000000..3c53260 --- /dev/null +++ b/lectures/lecture-n/arianna/beamerthemelucid.sty @@ -0,0 +1,187 @@ +\usepackage{tikz} +\usetikzlibrary{calc} + +% -------- COLOR SCHEME -------- +\definecolor{PrimaryColor}{RGB}{7,79,140} % primary color (blue) +\definecolor{SecondaryColor}{RGB}{242,88,26} % bulleted lists +\definecolor{BackgroundColor}{RGB}{255,255,255} % background & titles (white) +\definecolor{TextColor}{RGB}{0,0,0} % text (black) +\definecolor{ProgBarBGColor}{RGB}{175,175,175} % progress bar background (grey) + + +% set colours +\setbeamercolor{normal text}{fg=TextColor}\usebeamercolor*{normal text} +\setbeamercolor{alerted text}{fg=PrimaryColor} +\setbeamercolor{section in toc}{fg=PrimaryColor} +\setbeamercolor{structure}{fg=SecondaryColor} +\hypersetup{colorlinks,linkcolor=,urlcolor=SecondaryColor} + +% set fonts +\setbeamerfont{itemize/enumerate body}{size=\large} +\setbeamerfont{itemize/enumerate subbody}{size=\normalsize} +\setbeamerfont{itemize/enumerate subsubbody}{size=\small} + +% make pixelated bullets +\setbeamertemplate{itemize item}{ + \tikz{ + \draw[fill=SecondaryColor,draw=none] (0, 0) rectangle(0.1, 0.1); + \draw[fill=SecondaryColor,draw=none] (0.1, 0.1) rectangle(0.2, 0.2); + \draw[fill=SecondaryColor,draw=none] (0, 0.2) rectangle(0.1, 0.3); + } +} +\setbeamertemplate{itemize subitem}{ + \tikz{ + \draw[fill=SecondaryColor,draw=none] (0, 0) rectangle(0.075, 0.075); + \draw[fill=SecondaryColor,draw=none] (0.075, 0.075) rectangle(0.15, 0.15); + \draw[fill=SecondaryColor,draw=none] (0, 0.15) rectangle(0.075, 0.225); + } +} +\setbeamertemplate{itemize subsubitem}{ + \tikz{ + \draw[fill=SecondaryColor,draw=none] (0.050, 0.050) rectangle(0.15, 0.15); + } +} + +% disable navigation +\setbeamertemplate{navigation symbols}{} + +% disable the damn default logo! +\setbeamertemplate{sidebar right}{} + +% custom draw the title page above +\setbeamertemplate{title page}{} + +% again, manually draw the frame title above +\setbeamertemplate{frametitle}{} + +% disable "Figure:" in the captions +% TODO: somehow this doesn't work for md-generated slides +%\setbeamertemplate{caption}{\tiny\insertcaption} +%\setbeamertemplate{caption label separator}{} + +% add some space below the footnotes so they don't end up on the progress bar +\setbeamertemplate{footnote}{ + \parindent 0em + \noindent + \raggedright + \hbox to 0.8em{\hfil\insertfootnotemark} + \insertfootnotetext + \par + \vspace{2em} +} + +% add the same vspace both before and after quotes +\setbeamertemplate{quote begin}{\vspace{0.5em}} +\setbeamertemplate{quote end}{\vspace{0.5em}} + +% progress bar counters +\newcounter{showProgressBar} +\setcounter{showProgressBar}{1} +\newcounter{showSlideNumbers} +\setcounter{showSlideNumbers}{1} +\newcounter{showSlideTotal} +\setcounter{showSlideTotal}{1} + +% use \makeatletter for our progress bar definitions +% progress bar idea from http://tex.stackexchange.com/a/59749/44221 +% slightly adapted for visual purposes here +\makeatletter +\newcount\progressbar@tmpcounta% auxiliary counter +\newcount\progressbar@tmpcountb% auxiliary counter +\newdimen\progressbar@pbwidth %progressbar width +\newdimen\progressbar@tmpdim % auxiliary dimension + +\newdimen\slidewidth % auxiliary dimension +\newdimen\slideheight % auxiliary dimension + +% make the progress bar go across the screen +\progressbar@pbwidth=\the\paperwidth +\slidewidth=\the\paperwidth +\slideheight=\the\paperheight + +% draw everything with tikz +\setbeamertemplate{background}{ % all slides + % progress bar stuff + \progressbar@tmpcounta=\insertframenumber + \progressbar@tmpcountb=\inserttotalframenumber + \progressbar@tmpdim=\progressbar@pbwidth + \divide\progressbar@tmpdim by 100 + \multiply\progressbar@tmpdim by \progressbar@tmpcounta + \divide\progressbar@tmpdim by \progressbar@tmpcountb + \multiply\progressbar@tmpdim by 100 + + \begin{tikzpicture} + % set up the entire slide as the canvas + \useasboundingbox (0,0) rectangle(\the\paperwidth,\the\paperheight); + + % background + \fill[color=BackgroundColor] (0,0) rectangle(\the\paperwidth,\the\paperheight); + + \ifnum\thepage=1\relax % only title slides + % primary color rectangle + \fill[color=PrimaryColor] (0, 4cm) rectangle(\slidewidth,\slideheight); + + % text (title, subtitle, author, date) + \node[anchor=south,text width=\slidewidth-1cm,inner xsep=0.5cm] at (0.5\slidewidth,4cm) {\color{BackgroundColor}\Huge\textbf{\inserttitle}}; + \node[anchor=north east,text width=\slidewidth-1cm,align=right] at (\slidewidth-0.4cm,4cm) {\color{PrimaryColor}\large\textbf{\insertsubtitle}}; + \node at (0.5\slidewidth,2cm) {\color{PrimaryColor}\LARGE\insertauthor}; + \node at (0.5\slidewidth,1.25cm) {\color{PrimaryColor}\Large\insertinstitute}; + \node[anchor=south east] at(\slidewidth,0cm) {\color{PrimaryColor}\tiny\insertdate}; + \else % other slides + % title bar + \fill[color=PrimaryColor] (0, \slideheight-1cm) rectangle(\slidewidth,\slideheight); + + % slide title + \node[anchor=north,text width=\slidewidth-0.75cm,inner xsep=0.5cm,inner ysep=0.25cm] at (0.5\slidewidth,\slideheight) {\color{BackgroundColor}\huge\textbf{\insertframetitle}}; + + % logo (TODO: autoscale; now it expects 350x350 + \node[anchor=north east] at (\slidewidth-0.25cm,\slideheight+0.06cm){\insertlogo}; + + % show progress bar + \ifnum \value{showProgressBar}>0\relax% + % progress bar icon in the middle of the screen + \draw[fill=ProgBarBGColor,draw=none] (0cm,0cm) rectangle(\slidewidth,0.25cm); + \draw[fill=PrimaryColor,draw=none] (0cm,0cm) rectangle(\progressbar@tmpdim,0.25cm); + + % bottom info + \node[anchor=south west] at(0cm,0.25cm) {\color{PrimaryColor}\tiny\vphantom{lp}\insertsection}; + % if slide numbers are active + \ifnum \value{showSlideNumbers}>0\relax% + % if slide totals are active + \ifnum \value{showSlideTotal}>0\relax% + % draw both slide number and slide total + \node[anchor=south east] at(\slidewidth,0.25cm) {\color{PrimaryColor}\tiny\insertframenumber/\inserttotalframenumber}; + \else + \node[anchor=south east] at(\slidewidth,0.25cm) {\color{PrimaryColor}\tiny\insertframenumber}; + \fi + \fi + \else + % section title in the bottom left + \node[anchor=south west] at(0cm,0cm) {\color{PrimaryColor}\tiny\vphantom{lp}\insertsection}; + % if we're showing slide numbers + \ifnum \value{showSlideNumbers}>0\relax% + % if slide totals are active + \ifnum \value{showSlideTotal}>0\relax% + % slide number and slide total + \node[anchor=south east] at(\slidewidth,0cm) {\color{PrimaryColor}\tiny\insertframenumber/\inserttotalframenumber}; + \else + \node[anchor=south east] at(\slidewidth,0cm) {\color{PrimaryColor}\tiny\insertframenumber}; + \fi + \fi + \fi + \fi + \end{tikzpicture} +} +\makeatother + +\AtBeginSection{\frame{\sectionpage}} % section pages +\setbeamertemplate{section page} +{ + \begin{tikzpicture} + % set up the entire slide as the canvas + \useasboundingbox (0,0) rectangle(\slidewidth,\slideheight); + \fill[color=BackgroundColor] (-1cm, 2cm) rectangle (\slidewidth, \slideheight+0.1cm); + \fill[color=PrimaryColor] (-1cm, 0.5\slideheight-1cm) rectangle(\slidewidth, 0.5\slideheight+1cm); + \node[text width=\the\paperwidth-1cm,align=center] at (0.4\slidewidth, 0.5\slideheight) {\color{BackgroundColor}\Huge\textbf{\insertsection}}; + \end{tikzpicture} +} diff --git a/lectures/lecture-n/arianna/gu.png b/lectures/lecture-n/arianna/gu.png new file mode 100644 index 0000000..ea2f1e5 Binary files /dev/null and b/lectures/lecture-n/arianna/gu.png differ diff --git a/lectures/lecture-n/arianna/img/cda.png b/lectures/lecture-n/arianna/img/cda.png new file mode 100644 index 0000000..8dd042e Binary files /dev/null and b/lectures/lecture-n/arianna/img/cda.png differ diff --git a/lectures/lecture-n/arianna/img/ex.conllu b/lectures/lecture-n/arianna/img/ex.conllu new file mode 100644 index 0000000..8eb4222 --- /dev/null +++ b/lectures/lecture-n/arianna/img/ex.conllu @@ -0,0 +1,24 @@ +# generator = UDPipe 2, https://lindat.mff.cuni.cz/services/udpipe +# udpipe_model = swedish-talbanken-ud-2.15-241121 +# udpipe_model_licence = CC BY-NC-SA +# newdoc +# newpar +# sent_id = 1 +# text = den är smog salt och det bra för all kropen +1 den den PRON PN|UTR|SIN|DEF|SUB/OBJ Definite=Def|Gender=Com|Number=Sing|PronType=Prs 4 nsubj _ TokenRange=0:3 +2 är vara AUX VB|PRS|AKT Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act 4 cop _ TokenRange=4:6 +3 smog smog ADV AB _ 4 advmod _ TokenRange=7:11 +4 salt salt ADJ JJ|POS|UTR|SIN|IND|NOM Case=Nom|Definite=Ind|Degree=Pos|Number=Sing 0 root _ TokenRange=12:16 +5 och och CCONJ KN _ 7 cc _ TokenRange=17:20 +6 det den PRON PN|NEU|SIN|DEF|SUB/OBJ Definite=Def|Gender=Neut|Number=Sing|PronType=Prs 7 nsubj _ TokenRange=21:24 +7 bra bra ADJ JJ|POS|UTR/NEU|SIN/PLU|IND/DEF|NOM Case=Nom|Degree=Pos 4 conj _ TokenRange=25:28 +8 för för ADP PP _ 10 case _ TokenRange=29:32 +9 all all DET DT|UTR|SIN|IND/DEF Gender=Com|Number=Sing|PronType=Tot 10 det _ TokenRange=33:36 +10 kropen krop NOUN NN|UTR|SIN|DEF|NOM Case=Nom|Definite=Def|Gender=Com|Number=Sing 7 obl _ SpaceAfter=No|TokenRange=37:43 + +1 Självklart självklar ADV JJ|POS|NEU|SIN|IND|NOM Degree=Pos 0 root _ ORIG_LABEL=root +2 att att SCONJ SN _ 5 mark _ CorrectionLabels=S-Clause +3 det den PRON PN|NEU|SIN|DEF|SUB/OBJ Definite=Def|Gender=Neut|Number=Sing|PronType=Prs 5 nsubj _ _ +4 är vara AUX VB|PRS|AKT Mood=Ind|Tense=Pres|VerbForm=Fin|Voice=Act 5 cop _ CorrectionLabels=S-Clause +5 viktigt viktig ADJ JJ|POS|NEU|SIN|IND|NOM Case=Nom|Definite=Ind|Degree=Pos|Gender=Neut|Number=Sing 1 csubj _ _ +6 . . PUNCT MAD _ 1 punct _ _ \ No newline at end of file diff --git a/lectures/lecture-n/arianna/img/ex.pdf b/lectures/lecture-n/arianna/img/ex.pdf new file mode 100644 index 0000000..2618064 Binary files /dev/null and b/lectures/lecture-n/arianna/img/ex.pdf differ diff --git a/lectures/lecture-n/arianna/img/ex.tex b/lectures/lecture-n/arianna/img/ex.tex new file mode 100644 index 0000000..9dc0aa3 --- /dev/null +++ b/lectures/lecture-n/arianna/img/ex.tex @@ -0,0 +1,111 @@ +\documentclass{article} +\usepackage[a4paper,margin=0.5in,landscape]{geometry} +\usepackage[utf8]{inputenc} +\begin{document} +%% den är smog salt och det bra för all kropen +\setlength{\unitlength}{0.2mm} +\begin{picture}(531.0,110.0) + \put(0.0,0.0){den} + \put(46.0,0.0){är} + \put(83.0,0.0){smog} + \put(129.0,0.0){salt} + \put(175.0,0.0){och} + \put(230.0,0.0){det} + \put(276.0,0.0){bra} + \put(313.0,0.0){för} + \put(350.0,0.0){all} + \put(387.0,0.0){kropen} + \put(0.0,15.0){{\tiny PRON}} + \put(46.0,15.0){{\tiny AUX}} + \put(83.0,15.0){{\tiny ADV}} + \put(129.0,15.0){{\tiny ADJ}} + \put(175.0,15.0){{\tiny CCONJ}} + \put(230.0,15.0){{\tiny PRON}} + \put(276.0,15.0){{\tiny ADJ}} + \put(313.0,15.0){{\tiny ADP}} + \put(350.0,15.0){{\tiny DET}} + \put(387.0,15.0){{\tiny NOUN}} + \put(0.0,-11.0){{\scriptsize {\slshape den}}} + \put(46.0,-11.0){{\scriptsize {\slshape vara}}} + \put(83.0,-11.0){{\scriptsize {\slshape smog}}} + \put(129.0,-11.0){{\scriptsize {\slshape salt}}} + \put(175.0,-11.0){{\scriptsize {\slshape och}}} + \put(230.0,-11.0){{\scriptsize {\slshape den}}} + \put(276.0,-11.0){{\scriptsize {\slshape bra}}} + \put(313.0,-11.0){{\scriptsize {\slshape för}}} + \put(350.0,-11.0){{\scriptsize {\slshape all}}} + \put(387.0,-11.0){{\scriptsize {\slshape krop}}} + \put(74.5,30.0){\oval(126.67441860465117,100.0)[t]} + \put(11.162790697674417,35.0){\vector(0,-1){5.0}} + \put(63.25,83.0){{\tiny nsubj}} + \put(97.5,30.0){\oval(79.3855421686747,66.66666666666667)[t]} + \put(57.80722891566265,35.0){\vector(0,-1){5.0}} + \put(90.75,66.33333333333334){{\tiny cop}} + \put(116.0,30.0){\oval(39.47826086956522,33.333333333333336)[t]} + \put(96.26086956521739,35.0){\vector(0,-1){5.0}} + \put(102.5,49.66666666666667){{\tiny advmod}} + \put(144.0,110.0){\vector(0,-1){80.0}} + \put(149.0,100.0){{\tiny root}} + \put(235.5,30.0){\oval(98.02970297029702,66.66666666666667)[t]} + \put(186.4851485148515,35.0){\vector(0,-1){5.0}} + \put(231.0,66.33333333333334){{\tiny cc}} + \put(263.0,30.0){\oval(39.47826086956522,33.333333333333336)[t]} + \put(243.26086956521738,35.0){\vector(0,-1){5.0}} + \put(251.75,49.66666666666667){{\tiny nsubj}} + \put(222.5,30.0){\oval(144.9591836734694,100.0)[t]} + \put(294.9795918367347,35.0){\vector(0,-1){5.0}} + \put(213.5,83.0){{\tiny conj}} + \put(360.0,30.0){\oval(69.94594594594595,66.66666666666667)[t]} + \put(325.02702702702703,35.0){\vector(0,-1){5.0}} + \put(351.0,66.33333333333334){{\tiny case}} + \put(378.5,30.0){\oval(28.89189189189189,33.333333333333336)[t]} + \put(364.05405405405406,35.0){\vector(0,-1){5.0}} + \put(371.75,49.66666666666667){{\tiny det}} + \put(351.5,30.0){\oval(108.29729729729729,100.0)[t]} + \put(405.64864864864865,35.0){\vector(0,-1){5.0}} + \put(344.75,83.0){{\tiny obl}} +\end{picture} + + +\vspace{4mm} +%% Självklart att det är viktigt . +\setlength{\unitlength}{0.2mm} +\begin{picture}(406.0,150.0) + \put(0.0,0.0){Självklart} + \put(100.0,0.0){att} + \put(155.0,0.0){det} + \put(201.0,0.0){är} + \put(238.0,0.0){viktigt} + \put(311.0,0.0){.} + \put(0.0,15.0){{\tiny ADV}} + \put(100.0,15.0){{\tiny SCONJ}} + \put(155.0,15.0){{\tiny PRON}} + \put(201.0,15.0){{\tiny AUX}} + \put(238.0,15.0){{\tiny ADJ}} + \put(311.0,15.0){{\tiny PUNCT}} + \put(0.0,-11.0){{\scriptsize {\slshape självklar}}} + \put(100.0,-11.0){{\scriptsize {\slshape att}}} + \put(155.0,-11.0){{\scriptsize {\slshape den}}} + \put(201.0,-11.0){{\scriptsize {\slshape vara}}} + \put(238.0,-11.0){{\scriptsize {\slshape viktig}}} + \put(311.0,-11.0){{\scriptsize {\slshape .}}} + \put(15.0,150.0){\vector(0,-1){120.0}} + \put(20.0,140.0){{\tiny root}} + \put(179.0,30.0){\oval(135.82608695652175,100.0)[t]} + \put(111.08695652173913,35.0){\vector(0,-1){5.0}} + \put(170.0,83.0){{\tiny mark}} + \put(206.5,30.0){\oval(79.3855421686747,66.66666666666667)[t]} + \put(166.80722891566265,35.0){\vector(0,-1){5.0}} + \put(195.25,66.33333333333334){{\tiny nsubj}} + \put(229.5,30.0){\oval(28.89189189189189,33.333333333333336)[t]} + \put(215.05405405405406,35.0){\vector(0,-1){5.0}} + \put(222.75,49.66666666666667){{\tiny cop}} + \put(139.0,30.0){\oval(236.73949579831933,133.33333333333334)[t]} + \put(257.3697478991597,35.0){\vector(0,-1){5.0}} + \put(127.75,99.66666666666667){{\tiny csubj}} + \put(175.5,30.0){\oval(310.0353697749196,166.66666666666666)[t]} + \put(330.51768488745984,35.0){\vector(0,-1){5.0}} + \put(164.25,116.33333333333333){{\tiny punct}} +\end{picture} + +\end{document} \ No newline at end of file diff --git a/lectures/lecture-n/arianna/img/l1l2.png b/lectures/lecture-n/arianna/img/l1l2.png new file mode 100644 index 0000000..a79f6fe Binary files /dev/null and b/lectures/lecture-n/arianna/img/l1l2.png differ diff --git a/lectures/lecture-n/arianna/slides.md b/lectures/lecture-n/arianna/slides.md new file mode 100644 index 0000000..c9d06d4 --- /dev/null +++ b/lectures/lecture-n/arianna/slides.md @@ -0,0 +1,638 @@ +--- +title: "UD as an annotation standard \\newline for learner language" +subtitle: "a case study on L2 Swedish" +author: "Arianna Masciolini" +theme: "lucid" +logo: "gu.png" +date: "VT25" +institute: "LT2214 Computational Syntax" +--- + +## Learner data + + + +\bigskip \bigskip + +### English (FCE) +\small +```xml +I also suggest that more plays and films should + be takentake + place. +``` + +### Italian (VALICO) +\small +```xml +Finse aveva paurache aveva paura + di un rapitorapimento. +``` + +### Swedish (SweLL) +\small +```xml + " Den är +en tredjedel +av din dag +! +``` + +## The problems +- coarse-grained error labels +- exclusive focus on errors +- lots of manual annotation needed +- lack of interoperability between corpora + +## The solution: UD +- fine-grained morphosyntactic annotation +- parsers +- cross-linguistic consistency $\to$ possibility to compare: + - L2 vs. standard + - L1 vs. L2 + - different L2s + +## L1-L2 treebanks + + + +![](img/l1l2.png) + +\bigskip + +- L2 sentences $\parallel$ correction hypotheses +- no explicit error tagging + + + +## UD treebanks of learner language +\bigskip + +| **language** | **name** | **size** | **status** | **parallel** | +| ----------: | --------- | -------: | :-----------: | :--------: | +| Chinese | CFL | 451 | released | **yes**\*\* | +| English | ESL | 5124 | retired\* | **yes** | +| English | ESLSpok | 2320 | released | no | +| Italian | Valico | 398 | released | **yes** | +| | | | | | +| Korean | KSL | 12977 | released | no | +| Russian | ? | 500 | WIP | **yes** | +| \color{SecondaryColor}Swedish | \color{SecondaryColor}SweLL | \color{SecondaryColor}\~5000 | \color{SecondaryColor}WIP | \color{SecondaryColor}**yes** | + +\footnotesize \*available for download but not part of the latest UD release +\newline\footnotesize \**only L2 half available + +## Challenges +| **expectations** | **reality** | +| -----: | :----- | +| fine-grained annotation | when the validator allows that | +| parsers | don't work terribly well | +| cross-linguistic consistency | is limited to error-free spans | + +## The `root` of the problem +The UD guidelines are designed with standard language in mind + +- should we annotate the intended meaning (correction) and/or the observed language use? +- how to handle mismatches between the characteristics of individual tokens and their use in context? + +# Treebanking SweLL + +## Source corpus +__SweLL-gold__, aka the Swedish Learner Language corpus: + +- __genre__: essays (misc topics) +- __learners__: adult L2 Swedish learners with various language backgrounds and proficiency levels +- __annotation__: error tagging, pseudonymization and normalization (minimal edits) +- __license__: CLARIN-ID -PRIV \underline{-NORED} -BY + +## Example 0 +\setlength{\unitlength}{0.20mm} +\begin{picture}(406.0,150.0) + \put(0.0,0.0){Självklart} + \put(100.0,0.0){\bfseries att} + \put(155.0,0.0){\bfseries det} + \put(201.0,0.0){\bfseries är} + \put(238.0,0.0){viktigt} + \put(311.0,0.0){.} + \put(0.0,-11.0){{\scriptsize {\slshape of.course}}} + \put(100.0,-11.0){{\scriptsize {\slshape that}}} + \put(155.0,-11.0){{\scriptsize {\slshape it}}} + \put(201.0,-11.0){{\scriptsize {\slshape is}}} + \put(238.0,-11.0){{\scriptsize {\slshape important}}} + \put(311.0,-11.0){{\scriptsize {\slshape .}}} +\end{picture} + +\bigskip + +- \small correction: "Självklart __är det__ viktigt." +- \small translation: "Of course it is important." + +## Example 0 +\setlength{\unitlength}{0.20mm} +\begin{picture}(406.0,150.0) + \put(0.0,0.0){Självklart} + \put(100.0,0.0){\bfseries att} + \put(155.0,0.0){\bfseries det} + \put(201.0,0.0){\bfseries är} + \put(238.0,0.0){viktigt} + \put(311.0,0.0){.} + \put(0.0,15.0){{\tiny ADV}} + \put(100.0,15.0){{\tiny SCONJ}} + \put(155.0,15.0){{\tiny PRON}} + \put(201.0,15.0){{\tiny AUX}} + \put(238.0,15.0){{\tiny ADJ}} + \put(311.0,15.0){{\tiny PUNCT}} + \put(0.0,-11.0){{\scriptsize {\slshape of.course}}} + \put(100.0,-11.0){{\scriptsize {\slshape that}}} + \put(155.0,-11.0){{\scriptsize {\slshape it}}} + \put(201.0,-11.0){{\scriptsize {\slshape is}}} + \put(238.0,-11.0){{\scriptsize {\slshape important}}} + \put(311.0,-11.0){{\scriptsize {\slshape .}}} +\end{picture} + +\bigskip + +- \small correction: "Självklart __är det__ viktigt." +- \small translation: "Of course it is important." + +## Example 0 +\setlength{\unitlength}{0.20mm} +\begin{picture}(406.0,150.0) + \put(0.0,0.0){Självklart} + \put(100.0,0.0){\bfseries att} + \put(155.0,0.0){\bfseries det} + \put(201.0,0.0){\bfseries är} + \put(238.0,0.0){viktigt} + \put(311.0,0.0){.} + \put(0.0,15.0){{\tiny ADV}} + \put(100.0,15.0){{\tiny SCONJ}} + \put(155.0,15.0){{\tiny PRON}} + \put(201.0,15.0){{\tiny AUX}} + \put(238.0,15.0){{\tiny ADJ}} + \put(311.0,15.0){{\tiny PUNCT}} + \put(0.0,-11.0){{\scriptsize {\slshape of.course}}} + \put(100.0,-11.0){{\scriptsize {\slshape that}}} + \put(155.0,-11.0){{\scriptsize {\slshape it}}} + \put(201.0,-11.0){{\scriptsize {\slshape is}}} + \put(238.0,-11.0){{\scriptsize {\slshape important}}} + \put(311.0,-11.0){{\scriptsize {\slshape .}}} + \put(15.0,150.0){\vector(0,-1){120.0}} + \put(20.0,140.0){{\tiny root}} + \put(179.0,30.0){\oval(135.82608695652175,100.0)[t]} + \put(111.08695652173913,35.0){\vector(0,-1){5.0}} + \put(170.0,83.0){{\tiny mark}} + \put(206.5,30.0){\oval(79.3855421686747,66.66666666666667)[t]} + \put(166.80722891566265,35.0){\vector(0,-1){5.0}} + \put(195.25,66.33333333333334){{\tiny nsubj}} + \put(229.5,30.0){\oval(28.89189189189189,33.333333333333336)[t]} + \put(215.05405405405406,35.0){\vector(0,-1){5.0}} + \put(222.75,49.66666666666667){{\tiny cop}} + \put(139.0,30.0){\oval(236.73949579831933,133.33333333333334)[t]} + \put(257.3697478991597,35.0){\vector(0,-1){5.0}} + \put(127.75,99.66666666666667){{\tiny csubj}} + \put(175.5,30.0){\oval(310.0353697749196,166.66666666666666)[t]} + \put(330.51768488745984,35.0){\vector(0,-1){5.0}} + \put(164.25,116.33333333333333){{\tiny punct}} +\end{picture} + +\bigskip + +- \small correction: "Självklart __är det__ viktigt." +- \small translation: "Of course it is important." + +## Example 1 +\setlength{\unitlength}{0.23mm} +\begin{picture}(409.0,130.0) + \put(0.0,0.0){Jag} + \put(46.0,0.0){hade} + \put(92.0,0.0){\bfseries emotskänslor} + \put(200.0,0.0){fast} + \put(270.0,0.0){jag} + \put(311.0,0.0){\bfseries var} + \put(348.0,0.0){\bfseries vänta} + \put(403.0,0.0){det} + \put(0.0,-11.0){{\scriptsize {\slshape I}}} + \put(46.0,-11.0){{\scriptsize {\slshape had}}} + \put(92.0,-11.0){{\scriptsize {\slshape againstfeelings}}} + \put(200.0,-11.0){{\scriptsize {\slshape although}}} + \put(270.0,-11.0){{\scriptsize {\slshape I}}} + \put(311.0,-11.0){{\scriptsize {\slshape was}}} + \put(348.0,-11.0){{\scriptsize {\slshape wait}}} + \put(403.0,-11.0){{\scriptsize {\slshape that}}} +\end{picture} + +\bigskip + +- \small correction: "Jag hade __motstridiga känslor__ fast jag __hade väntat mig__ det" +- \small translation: "I had mixed feelings although I was expecting that" + +## Example 1 +\setlength{\unitlength}{0.23mm} +\begin{picture}(409.0,130.0) + \put(0.0,0.0){Jag} + \put(46.0,0.0){hade} + \put(92.0,0.0){\bfseries emotskänslor} + \put(200.0,0.0){fast} + \put(270.0,0.0){jag} + \put(311.0,0.0){\bfseries var} + \put(348.0,0.0){\bfseries vänta} + \put(403.0,0.0){det} + \put(0.0,15.0){{\tiny PRON}} + \put(46.0,15.0){{\tiny VERB}} + \put(92.0,15.0){{\tiny NOUN}} + \put(200.0,15.0){{\tiny SCONJ}} + \put(270.0,15.0){{\tiny PRON}} + \put(311.0,15.0){{\tiny AUX}} + \put(348.0,15.0){{\tiny VERB}} + \put(403.0,15.0){{\tiny PRON}} + \put(0.0,-11.0){{\scriptsize {\slshape I}}} + \put(46.0,-11.0){{\scriptsize {\slshape had}}} + \put(92.0,-11.0){{\scriptsize {\slshape againstfeelings}}} + \put(200.0,-11.0){{\scriptsize {\slshape although}}} + \put(270.0,-11.0){{\scriptsize {\slshape I}}} + \put(311.0,-11.0){{\scriptsize {\slshape was}}} + \put(348.0,-11.0){{\scriptsize {\slshape wait}}} + \put(403.0,-11.0){{\scriptsize {\slshape that}}} +\end{picture} + +\bigskip + +- \small correction: "Jag hade __motstridiga känslor__ fast jag __hade väntat mig__ det" +- \small translation: "I had mixed feelings although I was expecting that" + +## Example 1 +\setlength{\unitlength}{0.23mm} +\begin{picture}(409.0,130.0) + \put(0.0,0.0){Jag} + \put(46.0,0.0){hade} + \put(92.0,0.0){\bfseries emotskänslor} + \put(200.0,0.0){fast} + \put(270.0,0.0){jag} + \put(311.0,0.0){\bfseries var} + \put(348.0,0.0){\bfseries vänta} + \put(403.0,0.0){det} + \put(0.0,15.0){{\tiny PRON}} + \put(46.0,15.0){{\tiny VERB}} + \put(92.0,15.0){{\tiny NOUN}} + \put(200.0,15.0){{\tiny SCONJ}} + \put(270.0,15.0){{\tiny PRON}} + \put(311.0,15.0){{\tiny AUX}} + \put(348.0,15.0){{\tiny VERB}} + \put(403.0,15.0){{\tiny PRON}} + \put(0.0,-11.0){{\scriptsize {\slshape I}}} + \put(46.0,-11.0){{\scriptsize {\slshape had}}} + \put(92.0,-11.0){{\scriptsize {\slshape againstfeelings}}} + \put(200.0,-11.0){{\scriptsize {\slshape although}}} + \put(270.0,-11.0){{\scriptsize {\slshape I}}} + \put(311.0,-11.0){{\scriptsize {\slshape was}}} + \put(348.0,-11.0){{\scriptsize {\slshape wait}}} + \put(403.0,-11.0){{\scriptsize {\slshape that}}} + \put(33.0,30.0){\oval(39.47826086956522,33.333333333333336)[t]} + \put(13.26086956521739,35.0){\vector(0,-1){5.0}} + \put(21.75,49.66666666666667){{\tiny nsubj}} + \put(61.0,130.0){\vector(0,-1){100.0}} + \put(66.0,120.0){{\tiny root}} + \put(89.0,30.0){\oval(39.47826086956522,33.333333333333336)[t]} + \put(108.73913043478261,35.0){\vector(0,-1){5.0}} + \put(82.25,49.66666666666667){{\tiny obj}} + \put(289.0,30.0){\oval(135.82608695652175,100.0)[t]} + \put(221.08695652173913,35.0){\vector(0,-1){5.0}} + \put(280.0,83.0){{\tiny mark}} + \put(316.5,30.0){\oval(79.3855421686747,66.66666666666667)[t]} + \put(276.8072289156627,35.0){\vector(0,-1){5.0}} + \put(305.25,66.33333333333334){{\tiny nsubj}} + \put(339.5,30.0){\oval(28.89189189189189,33.333333333333336)[t]} + \put(325.05405405405406,35.0){\vector(0,-1){5.0}} + \put(332.75,49.66666666666667){{\tiny \bfseries ?}} + \put(217.0,30.0){\oval(301.0066225165563,133.33333333333334)[t]} + \put(367.50331125827813,35.0){\vector(0,-1){5.0}} + \put(205.75,99.66666666666667){{\tiny advcl}} + \put(395.5,30.0){\oval(49.54545454545455,33.333333333333336)[t]} + \put(420.27272727272725,35.0){\vector(0,-1){5.0}} + \put(388.75,49.66666666666667){{\tiny obj}} +\end{picture} + +\bigskip + +- \small correction: "Jag hade __motstridiga känslor__ fast jag __hade väntat mig__ det" +- \small translation: "I had mixed feelings although I was expecting that" + +## Example 1 +\setlength{\unitlength}{0.23mm} +\begin{picture}(409.0,130.0) + \put(0.0,0.0){Jag} + \put(46.0,0.0){hade} + \put(92.0,0.0){\bfseries emotskänslor} + \put(200.0,0.0){fast} + \put(270.0,0.0){jag} + \put(311.0,0.0){\bfseries var} + \put(348.0,0.0){\bfseries vänta} + \put(403.0,0.0){det} + \put(0.0,15.0){{\tiny PRON}} + \put(46.0,15.0){{\tiny VERB}} + \put(92.0,15.0){{\tiny NOUN}} + \put(200.0,15.0){{\tiny SCONJ}} + \put(270.0,15.0){{\tiny PRON}} + \put(311.0,15.0){{\tiny AUX}} + \put(348.0,15.0){{\tiny VERB}} + \put(403.0,15.0){{\tiny PRON}} + \put(0.0,-11.0){{\scriptsize {\slshape I}}} + \put(46.0,-11.0){{\scriptsize {\slshape had}}} + \put(92.0,-11.0){{\scriptsize {\slshape againstfeelings}}} + \put(200.0,-11.0){{\scriptsize {\slshape although}}} + \put(270.0,-11.0){{\scriptsize {\slshape I}}} + \put(311.0,-11.0){{\scriptsize {\slshape was}}} + \put(348.0,-11.0){{\scriptsize {\slshape wait}}} + \put(403.0,-11.0){{\scriptsize {\slshape that}}} + \put(33.0,30.0){\oval(39.47826086956522,33.333333333333336)[t]} + \put(13.26086956521739,35.0){\vector(0,-1){5.0}} + \put(21.75,49.66666666666667){{\tiny nsubj}} + \put(61.0,130.0){\vector(0,-1){100.0}} + \put(66.0,120.0){{\tiny root}} + \put(89.0,30.0){\oval(39.47826086956522,33.333333333333336)[t]} + \put(108.73913043478261,35.0){\vector(0,-1){5.0}} + \put(82.25,49.66666666666667){{\tiny obj}} + \put(289.0,30.0){\oval(135.82608695652175,100.0)[t]} + \put(221.08695652173913,35.0){\vector(0,-1){5.0}} + \put(280.0,83.0){{\tiny mark}} + \put(316.5,30.0){\oval(79.3855421686747,66.66666666666667)[t]} + \put(276.8072289156627,35.0){\vector(0,-1){5.0}} + \put(305.25,66.33333333333334){{\tiny nsubj}} + \put(339.5,30.0){\oval(28.89189189189189,33.333333333333336)[t]} + \put(325.05405405405406,35.0){\vector(0,-1){5.0}} + \put(332.75,49.66666666666667){{\tiny \bfseries aux:*}} + \put(217.0,30.0){\oval(301.0066225165563,133.33333333333334)[t]} + \put(367.50331125827813,35.0){\vector(0,-1){5.0}} + \put(205.75,99.66666666666667){{\tiny advcl}} + \put(395.5,30.0){\oval(49.54545454545455,33.333333333333336)[t]} + \put(420.27272727272725,35.0){\vector(0,-1){5.0}} + \put(388.75,49.66666666666667){{\tiny obj}} +\end{picture} + +\bigskip + +- \small correction: "Jag hade __motstridiga känslor__ fast jag __hade väntat mig__ det" +- \small translation: "I had mixed feelings although I was expecting that" + +## Example 2 +\setlength{\unitlength}{0.23mm} +\begin{picture}(195.0,110.0) + \put(0.0,0.0){en} + \put(37.0,0.0){lång} + \put(83.0,0.0){\bfseries bus} + \put(129.0,0.0){\bfseries resa} + \put(0.0,-13.0){{\scriptsize {\slshape a}}} + \put(37.0,-13.0){{\scriptsize {\slshape long}}} + \put(83.0,-13.0){{\scriptsize {\slshape bus}}} + \put(129.0,-13.0){{\scriptsize {\slshape trip}}} +\end{picture} + +\bigskip + +- \small correction: "en lång __bussresa__" +- \small translation: "a long bus trip" + +## Example 2 +\setlength{\unitlength}{0.23mm} +\begin{picture}(195.0,110.0) + \put(0.0,0.0){en} + \put(37.0,0.0){lång} + \put(83.0,0.0){\bfseries bus} + \put(129.0,0.0){\bfseries resa} + \put(0.0,15.0){{\tiny DET}} + \put(37.0,15.0){{\tiny ADJ}} + \put(83.0,15.0){{\tiny NOUN}} + \put(129.0,15.0){{\tiny NOUN}} + \put(0.0,-13.0){{\scriptsize {\slshape a}}} + \put(37.0,-13.0){{\scriptsize {\slshape long}}} + \put(83.0,-13.0){{\scriptsize {\slshape bus}}} + \put(129.0,-13.0){{\scriptsize {\slshape trip}}} +\end{picture} + +\bigskip + +- \small correction: "en lång __bussresa__" +- \small translation: "a long bus trip" + +## Example 2 +\setlength{\unitlength}{0.23mm} +\begin{picture}(195.0,110.0) + \put(0.0,0.0){en} + \put(37.0,0.0){lång} + \put(83.0,0.0){\bfseries bus} + \put(129.0,0.0){\bfseries resa} + \put(0.0,15.0){{\tiny DET}} + \put(37.0,15.0){{\tiny ADJ}} + \put(83.0,15.0){{\tiny NOUN}} + \put(129.0,15.0){{\tiny NOUN}} + \put(0.0,-13.0){{\scriptsize {\slshape a}}} + \put(37.0,-13.0){{\scriptsize {\slshape long}}} + \put(83.0,-13.0){{\scriptsize {\slshape bus}}} + \put(129.0,-13.0){{\scriptsize {\slshape trip}}} + \put(74.5,30.0){\oval(126.67441860465117,100.0)[t]} + \put(11.162790697674417,35.0){\vector(0,-1){5.0}} + \put(67.75,83.0){{\tiny det}} + \put(93.0,30.0){\oval(88.73913043478261,66.66666666666667)[t]} + \put(48.630434782608695,35.0){\vector(0,-1){5.0}} + \put(84.0,66.33333333333334){{\tiny amod}} + \put(116.0,30.0){\oval(39.47826086956522,33.333333333333336)[t]} + \put(96.26086956521739,35.0){\vector(0,-1){5.0}} + \put(75.5,49.66666666666667){{\tiny compound:*}} + \put(144.0,110.0){\vector(0,-1){80.0}} + \put(149.0,100.0){{\tiny root}} +\end{picture} + +\bigskip + +- \small correction: "en lång __bussresa__" +- \small translation: "a long bus trip" + +## Example 3 +\setlength{\unitlength}{0.23mm} +\begin{picture}(531.0,110.0) + \small + \put(0.0,0.0){\bfseries den} + \put(46.0,0.0){\bfseries är} + \put(83.0,0.0){\bfseries smog} + \put(129.0,0.0){salt} + \put(175.0,0.0){och} + \put(230.0,0.0){det} + \put(276.0,0.0){bra} + \put(313.0,0.0){för} + \put(350.0,0.0){\bfseries all} + \put(387.0,0.0){\bfseries kropen} + \put(0.0,15.0){{\tiny PRON}} + \put(46.0,15.0){{\tiny AUX}} + \put(83.0,15.0){{\tiny NOUN}} + \put(129.0,15.0){{\tiny NOUN}} + \put(175.0,15.0){{\tiny CCONJ}} + \put(230.0,15.0){{\tiny PRON}} + \put(276.0,15.0){{\tiny ADJ}} + \put(313.0,15.0){{\tiny ADP}} + \put(350.0,15.0){{\tiny DET}} + \put(387.0,15.0){{\tiny NOUN}} + \put(0.0,-13.0){{\scriptsize {\slshape it}}} + \put(46.0,-13.0){{\scriptsize {\slshape is}}} + \put(83.0,-13.0){{\scriptsize {\slshape taste?}}} + \put(129.0,-13.0){{\scriptsize {\slshape salt}}} + \put(175.0,-13.0){{\scriptsize {\slshape and}}} + \put(230.0,-13.0){{\scriptsize {\slshape it}}} + \put(276.0,-13.0){{\scriptsize {\slshape good}}} + \put(313.0,-13.0){{\scriptsize {\slshape for}}} + \put(350.0,-13.0){{\scriptsize {\slshape all}}} + \put(387.0,-13.0){{\scriptsize {\slshape the.body}}} + \put(51.5,30.0){\oval(79.3855421686747,66.66666666666667)[t]} + \put(11.807228915662648,35.0){\vector(0,-1){5.0}} + \put(40.25,66.33333333333334){{\tiny nsubj}} + \put(74.5,30.0){\oval(28.89189189189189,33.333333333333336)[t]} + \put(60.054054054054056,35.0){\vector(0,-1){5.0}} + \put(67.75,49.66666666666667){{\tiny cop}} + \put(98.0,110.0){\vector(0,-1){80.0}} + \put(103.0,100.0){{\tiny root}} + \put(126.0,30.0){\oval(39.47826086956522,33.333333333333336)[t]} + \put(145.73913043478262,35.0){\vector(0,-1){5.0}} + \put(117.0,49.66666666666667){{\tiny nmod}} + \put(235.5,30.0){\oval(98.02970297029702,66.66666666666667)[t]} + \put(186.4851485148515,35.0){\vector(0,-1){5.0}} + \put(231.0,66.33333333333334){{\tiny cc}} + \put(263.0,30.0){\oval(39.47826086956522,33.333333333333336)[t]} + \put(243.26086956521738,35.0){\vector(0,-1){5.0}} + \put(251.75,49.66666666666667){{\tiny nsubj}} + \put(199.5,30.0){\oval(191.4455958549223,100.0)[t]} + \put(295.22279792746116,35.0){\vector(0,-1){5.0}} + \put(190.5,83.0){{\tiny conj}} + \put(360.0,30.0){\oval(69.94594594594595,66.66666666666667)[t]} + \put(325.02702702702703,35.0){\vector(0,-1){5.0}} + \put(351.0,66.33333333333334){{\tiny case}} + \put(378.5,30.0){\oval(28.89189189189189,33.333333333333336)[t]} + \put(364.05405405405406,35.0){\vector(0,-1){5.0}} + \put(371.75,49.66666666666667){{\tiny det}} + \put(351.5,30.0){\oval(108.29729729729729,100.0)[t]} + \put(405.64864864864865,35.0){\vector(0,-1){5.0}} + \put(344.75,83.0){{\tiny obl}} +\end{picture} + +\bigskip + +- \small correction: "__Det smakar__ salt och det __är__ bra för __hela kroppen__" +- \small translation: "it tastes salt and it's good for the whole body" + +## Example 3: parser output +\setlength{\unitlength}{0.23mm} +\begin{picture}(531.0,110.0) + \put(0.0,0.0){\bfseries den} + \put(46.0,0.0){\bfseries är} + \put(83.0,0.0){\bfseries smog} + \put(129.0,0.0){salt} + \put(175.0,0.0){och} + \put(230.0,0.0){det} + \put(276.0,0.0){bra} + \put(313.0,0.0){för} + \put(350.0,0.0){\bfseries all} + \put(387.0,0.0){\bfseries kropen} + \put(0.0,15.0){{\tiny PRON}} + \put(46.0,15.0){{\tiny AUX}} + \put(83.0,15.0){{\tiny \color{SecondaryColor} ADV}} + \put(129.0,15.0){{\tiny \color{SecondaryColor} ADJ}} + \put(175.0,15.0){{\tiny CCONJ}} + \put(230.0,15.0){{\tiny PRON}} + \put(276.0,15.0){{\tiny ADJ}} + \put(313.0,15.0){{\tiny ADP}} + \put(350.0,15.0){{\tiny DET}} + \put(387.0,15.0){{\tiny NOUN}} + \put(74.5,30.0){\oval(126.67441860465117,100.0)[t]} + \put(11.162790697674417,35.0){\vector(0,-1){5.0}} + \put(63.25,83.0){{\tiny nsubj}} + \put(97.5,30.0){\oval(79.3855421686747,66.66666666666667)[t]} + \put(57.80722891566265,35.0){\vector(0,-1){5.0}} + \put(90.75,66.33333333333334){{\tiny cop}} + \put(116.0,30.0){\color{SecondaryColor} \oval(39.47826086956522,33.333333333333336)[t]} + \put(96.26086956521739,35.0){\color{SecondaryColor} \vector(0,-1){5.0}} + \put(102.5,49.66666666666667){{\tiny \color{SecondaryColor} advmod}} + \put(144.0,110.0){\color{SecondaryColor} \vector(0,-1){80.0}} + \put(149.0,100.0){{\tiny \color{SecondaryColor} root}} + \put(235.5,30.0){\oval(98.02970297029702,66.66666666666667)[t]} + \put(186.4851485148515,35.0){\vector(0,-1){5.0}} + \put(231.0,66.33333333333334){{\tiny cc}} + \put(263.0,30.0){\oval(39.47826086956522,33.333333333333336)[t]} + \put(243.26086956521738,35.0){\vector(0,-1){5.0}} + \put(251.75,49.66666666666667){{\tiny nsubj}} + \put(222.5,30.0){\oval(144.9591836734694,100.0)[t]} + \put(294.9795918367347,35.0){\vector(0,-1){5.0}} + \put(213.5,83.0){{\tiny conj}} + \put(360.0,30.0){\oval(69.94594594594595,66.66666666666667)[t]} + \put(325.02702702702703,35.0){\vector(0,-1){5.0}} + \put(351.0,66.33333333333334){{\tiny case}} + \put(378.5,30.0){\oval(28.89189189189189,33.333333333333336)[t]} + \put(364.05405405405406,35.0){\vector(0,-1){5.0}} + \put(371.75,49.66666666666667){{\tiny det}} + \put(351.5,30.0){\oval(108.29729729729729,100.0)[t]} + \put(405.64864864864865,35.0){\vector(0,-1){5.0}} + \put(344.75,83.0){{\tiny obl}} +\end{picture} + +\bigskip \bigskip + +\footnotesize (obtained with the UDPipe 2 Talbanken 2.15 model) + + + +## Our principles +- the validator is a tool, not a goal: + - __*literal* criteria at the token level__ + - __*distributional* criteria at the syntax level__ + - __borrow from L1__ guidelines when necessary +- __correction-aware annotation__: the annotation of learner sentences should be consistent with the semantics of the correction hypothesis + +## Status +- guidelines and test set (200/500 sentences) WIP +- remaining 5000 + 500 sentences TODO \pause + - you are welcome to __participate__! + - you do _not_ have to be a native speaker + (in fact, none of the current annotators is) + - you _might_ be able to do this as a course project + +# Exploring parallel learner treebanks with STUnD + +## STUnD +- _Sökverktyg för Tvåspråkiga Universal Dependencies-trädbanker_, or +- Search Tool for (parallel) Universal Dependencies Treebanks +- available at `demo.spraakbanken.gu.se/stund` (hopefully) + +## Under the hood +1. identify subtree alignments +2. run the query on the LHS treebanks, looking for matching subtres +3. find the corresponding RHS subtree (and check if it matches the RHS-specific patters) + +## Use cases +- error retrieval: patterns (queries) $\to$ trees +- pattern extraction: trees $\to$ patterns +- feedback comment generation: patterns $\to$ natural language comments + +# Sources + +## In order of appearance +- \small John Lee, Keying Li, and Herman Leung. _L1-L2 parallel dependency treebank as learner corpus_. In Proceedings of the 15th International Conference on Parsing Technologies, pages 44-49, Pisa, Italy, September 2017. Association for Computational Linguistics +- \small John Lee, Herman Leung, and Keying Li. _Towards Universal Dependencies for learner Chinese_. In Marie-Catherine de Marneffe, Joakim Nivre, and Sebastian Schuster, editors, Proceedings of the NoDaLiDa 2017 Workshop on Universal Dependencies (UDW 2017), pages 67-71, Gothenburg, Sweden, may 2017. Association for Computational Linguistics + +## In order of appearance +- \small Yevgeni Berzak, Jessica Kenney, Carolyn Spadine, Jing Xian Wang, Lucia Lam, Keiko Sophie Mori, Sebastian Garza, and Boris Katz. _Universal Dependencies for learner English_. In Katrin Erk and Noah A. Smith, editors, Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 737-746, Berlin, Germany, aug 2016. Association for Computational Linguistics. +- \small Elisa Di Nuovo, Manuela Sanguinetti, Alessandro Mazzei, Elisa Corino, and Cristina Bosco. _VALICO-UD: Treebanking an Italian learner corpus in Universal Dependencies_. IJCoL. Italian Journal of Computational Linguistics, 8(8-1), 2022 + +## In order of appearance +- \small Hakyung Sung and Gyu-Ho Shin. _Constructing a dependency treebank for second language learners of Korean_. In Nicoletta Calzolari, Min-Yen Kan, Veronique Hoste, Alessandro Lenci, Sakriani Sakti, and Nianwen Xue, editors, Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 3747-3758, Torino, Italia, may 2024. ELRA and ICCL +- \small Hakyung Sung and Gyu-Ho Shin. _Second language Korean Universal Dependency treebank v1.2: Focus on data augmentation and annotation scheme refinement_. In Špela Arhar Holdt, Nikolai Ilinykh, Barbara Scalvini, Micaella Bruton, Iben Nyholm Debess, and Crina Madalina Tudor, editors, Proceedings of the Third Workshop on Resources and Representations for Under-Resourced Languages and Domains (RESOURCEFUL-2025), pages 13-19, Tallinn, Estonia, March 2025. University of Tartu Library, Estonia + +## In order of appearance +- \small Alla Rozovskaya. _Universal Dependencies for learner Russian_. In Nicoletta Calzolari, Min-Yen Kan, Veronique Hoste, Alessandro Lenci, Sakriani Sakti, and Nianwen Xue, editors, Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 17112-17119, Torino, Italia, may 2024. ELRA and ICCL +- \small Elena Volodina, Lena Granstedt, Arild Matsson, Beáta Megyesi, Ildikó Pilán, Julia Prentice, Dan Rosén, Lisa Rudebeck, Carl-Johan Schenström, Gunlög Sundberg, et al. _The SweLL language learner corpus: From design to annotation_. Northern European Journal of Language Technology, 6:67-104, 2019 +- \small Arianna Masciolini. _A query engine for L1-L2 parallel dependency treebanks_. In Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa), pages 574--587, Tórshavn, Faroe Islands, May 2023. University of Tartu Library + +## In order of appearance +- \small Arianna Masciolini, Elena Volodina, and Dana Dannélls. _Towards automatically extracting morphosyntactical error patterns from L1-L2 parallel dependency treebanks_. In Proceedings of the 18th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2023), pages 585-597, Toronto, Canada, jul 2023. Association for Computational Linguistics +- \small Arianna Masciolini and Márton A Tóth. _STUnD: ett Sökverktyg för Tvåspråkiga Universal Dependencies-trädbanker_. In Proceedings of the Huminfra Conference, pages 95-109, Gothenburg, Sweden, 2024 + +## To appear +- \small Arianna Masciolini, Herbert Lange and Márton A Tóth. _Exploring parallel corpora with STUnD: a Search Tool for Universal Dependencies_. In the upcoming Huminfra Handbook, Gothenburg, Sweden, __most likely__ 2025 +- \small a paper about harmonization of UD guidelines for L2 treebanks \ No newline at end of file diff --git a/lectures/lecture-n/arianna/slides.pdf b/lectures/lecture-n/arianna/slides.pdf new file mode 100644 index 0000000..b533c1a Binary files /dev/null and b/lectures/lecture-n/arianna/slides.pdf differ