mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
integrating russian in document
This commit is contained in:
@@ -3,9 +3,12 @@ resource:
|
||||
gfdoc -txt2 ../lib/resource-1.0/*/Paradigms*.gf
|
||||
txt2tags --toc resource.txt
|
||||
# cat resource-preamble resource.tex >final-resource.tex
|
||||
sed -i 's/\\docum/%\\docum/g' resource.tex
|
||||
sed -i 's/ion\*{/ion{/g' resource.tex
|
||||
sed -i 's/\\paragraph{}//g' resource.tex
|
||||
sed -i 's/}\\\\/}/g' resource.tex
|
||||
cat resource-preamble resource.tex >resource.tmp
|
||||
mv resource.tmp resource.tex
|
||||
latex resource.tex
|
||||
latex resource.tex
|
||||
dvipdf resource.dvi
|
||||
|
||||
424
doc/ParadigmsRus.tex
Normal file
424
doc/ParadigmsRus.tex
Normal file
@@ -0,0 +1,424 @@
|
||||
This is an API for the user of the resource grammar for adding
|
||||
lexical items. It gives functions for forming expressions of open
|
||||
categories: nouns, adjectives, verbs.
|
||||
|
||||
Closed categories (determiners, pronouns, conjunctions) are
|
||||
accessed through the resource syntax API, {\tt Structural.gf}.
|
||||
|
||||
The main difference with {\tt MorphoRus.gf} is that the types
|
||||
referred to are compiled resource grammar types. We have moreover
|
||||
had the design principle of always having existing forms, rather
|
||||
than stems, as string arguments of the paradigms.
|
||||
|
||||
The structure of functions for each word class {\tt C} is the following:
|
||||
first we give a handful of patterns that aim to cover all
|
||||
regular cases. Then we give a worst-case function {\tt mkC}, which serves as an
|
||||
escape to construct the most irregular words of type {\tt C}.
|
||||
%However, this function should only seldom be needed: we have a
|
||||
%separate module {\tt IrregularEng}, which covers all irregularly inflected
|
||||
words.
|
||||
|
||||
The following modules are presupposed:
|
||||
\begin{verbatim}
|
||||
resource ParadigmsRus = open
|
||||
(Predef=Predef),
|
||||
Prelude,
|
||||
MorphoRus,
|
||||
CatRus,
|
||||
NounRus
|
||||
in {
|
||||
|
||||
flags coding=utf8 ;
|
||||
\end{verbatim}
|
||||
|
||||
\textbf{Parameters}
|
||||
|
||||
To abstract over gender names, we define the following identifiers.
|
||||
\begin{verbatim}
|
||||
oper
|
||||
Gender : Type ;
|
||||
|
||||
masculine : Gender ;
|
||||
feminine : Gender ;
|
||||
neuter : Gender ;
|
||||
\end{verbatim}
|
||||
|
||||
To abstract over case names, we define the following.
|
||||
\begin{verbatim}
|
||||
Case : Type ;
|
||||
|
||||
nominative : Case ;
|
||||
genitive : Case ;
|
||||
dative : Case ;
|
||||
accusative : Case ;
|
||||
instructive : Case ;
|
||||
prepositional : Case ;
|
||||
\end{verbatim}
|
||||
|
||||
In some (written in English) textbooks accusative case
|
||||
is put on the second place. However, we follow the case order
|
||||
standard for Russian textbooks.
|
||||
To abstract over number names, we define the following.
|
||||
\begin{verbatim}
|
||||
Number : Type ;
|
||||
|
||||
singular : Number ;
|
||||
plural : Number ;
|
||||
|
||||
Animacy: Type ;
|
||||
|
||||
animate: Animacy;
|
||||
inanimate: Animacy;
|
||||
\end{verbatim}
|
||||
|
||||
\textbf{Nouns}
|
||||
Best case: indeclinabe nouns: {\cyr kofe}, {\cyr
|
||||
pal\cyrsftsn{}to}, {\cyr VUZ}.
|
||||
\begin{verbatim}
|
||||
mkIndeclinableNoun: Str -> Gender -> Animacy -> N ;
|
||||
\end{verbatim}
|
||||
|
||||
Worst case - give six singular forms:
|
||||
Nominative, Genetive, Dative, Accusative, Instructive and Prepositional;
|
||||
corresponding six plural forms and the gender.
|
||||
May be the number of forms needed can be reduced,
|
||||
but this requires a separate investigation.
|
||||
Animacy parameter (determining whether the Accusative form is equal
|
||||
to the Nominative or the Genetive one) is actually of no help,
|
||||
since there are a lot of exceptions and the gain is just one form less.
|
||||
\begin{verbatim}
|
||||
mkN : (nomSg, genSg, datSg, accSg, instSg, preposSg,
|
||||
nomPl, genPl, datPl, accPl, instPl, preposPl: Str)
|
||||
-> Gender -> Animacy -> N ;
|
||||
\end{verbatim}
|
||||
({\cyr \em muzhchina, muzhchinu, muzhchine, muzhchinu,
|
||||
muzhchino\cyrishrt{}, muzhchine}
|
||||
|
||||
\noindent {\cyr \em muzhchin\cyrery{}, muzhchin, muzhchinam,
|
||||
muzhchin, muzhchinami, muzhchinah})
|
||||
|
||||
\vspace{5mm}
|
||||
|
||||
The regular function captures the variants for some popular nouns
|
||||
endings from the list below:
|
||||
\begin{verbatim}
|
||||
regN : Str -> N ;
|
||||
\end{verbatim}
|
||||
|
||||
Here are some common patterns. The list is far from complete.
|
||||
|
||||
\subsubsection{Feminine patterns}
|
||||
|
||||
\noindent feminine, inanimate, ending with "-a", Inst -"{\cyr
|
||||
mashin-o\cyrishrt{}}":
|
||||
\begin{verbatim}
|
||||
nMashina : Str -> N ;
|
||||
\end{verbatim}
|
||||
feminine, inanimate, ending with "-a", Inst -"{\cyr
|
||||
edinic-e\cyrishrt{}}":
|
||||
\begin{verbatim}
|
||||
nEdinica : Str -> N ;
|
||||
\end{verbatim}
|
||||
feminine, animate, ending with "-a":
|
||||
\begin{verbatim}
|
||||
nZhenchina : Str -> N ;
|
||||
\end{verbatim}
|
||||
feminine, inanimate, ending with "{\cyr g\_k\_h-a}":
|
||||
\begin{verbatim}
|
||||
nNoga : Str -> N ;
|
||||
\end{verbatim}
|
||||
feminine, inanimate, ending with "-{\cyr -iya}":
|
||||
\begin{verbatim}
|
||||
nMalyariya : Str -> N ;
|
||||
\end{verbatim}
|
||||
feminine, animate, ending with "{\cyr -ya}":
|
||||
\begin{verbatim}
|
||||
nTetya : Str -> N ;
|
||||
\end{verbatim}
|
||||
feminine, inanimate, ending with "-{\cyr \cyrsftsn{}}"(soft sign):
|
||||
\begin{verbatim}
|
||||
nBol : Str -> N ;
|
||||
\end{verbatim}
|
||||
|
||||
\subsubsection{Neuter patterns}
|
||||
|
||||
\noindent neutral, inanimate, ending with "-ee":
|
||||
\begin{verbatim}
|
||||
nObezbolivauchee : Str -> N ;
|
||||
\end{verbatim}
|
||||
neutral, inanimate, ending with "-e":
|
||||
\begin{verbatim}
|
||||
nProizvedenie : Str -> N ;
|
||||
\end{verbatim}
|
||||
neutral, inanimate, ending with "-o":
|
||||
\begin{verbatim}
|
||||
nChislo : Str -> N ;
|
||||
\end{verbatim}
|
||||
neutral, inanimate, ending with "-{\cyr oe}":
|
||||
\begin{verbatim}
|
||||
nZhivotnoe : Str -> N ;
|
||||
\end{verbatim}
|
||||
|
||||
\subsubsection{Masculine patterns}
|
||||
|
||||
\noindent Ending with consonant:
|
||||
|
||||
\noindent masculine, inanimate, ending with "-{\cyr el}"- "{\cyr
|
||||
pep-la}":
|
||||
\begin{verbatim}
|
||||
nPepel : Str -> N ;
|
||||
\end{verbatim}
|
||||
animate, "{\cyr brat-\cyrsftsn{}ya}":
|
||||
\begin{verbatim}
|
||||
nBrat : Str -> N ;
|
||||
\end{verbatim}
|
||||
same as above, but inanimate:
|
||||
\begin{verbatim}
|
||||
nStul : Str -> N ;
|
||||
\end{verbatim}
|
||||
"{\cyr malyshe\cyrishrt{}}":
|
||||
\begin{verbatim}
|
||||
nMalush : Str -> N ;
|
||||
\end{verbatim}
|
||||
"{\cyr potol-ok, potol-ka}"
|
||||
\begin{verbatim}
|
||||
nPotolok : Str -> N ;
|
||||
\end{verbatim}
|
||||
the next four differ in plural nominative and/or accusative
|
||||
form(s):
|
||||
|
||||
\noindent {\cyr bank-i}(Nom=Acc):
|
||||
\begin{verbatim}
|
||||
nBank : Str -> N ;
|
||||
\end{verbatim}
|
||||
same as above, but animate:
|
||||
\begin{verbatim}
|
||||
nStomatolog : Str -> N ;
|
||||
\end{verbatim}
|
||||
"{\cyr adres-a}" (Nom=Acc):
|
||||
\begin{verbatim}
|
||||
nAdres : Str -> N ;
|
||||
\end{verbatim}
|
||||
"{\cyr telefony}" (Nom=Acc):
|
||||
\begin{verbatim}
|
||||
nTelefon : Str -> N ;
|
||||
\end{verbatim}
|
||||
masculine, inanimate, ending with "{\cyr \cyrsftsn{}}" (soft
|
||||
sign):
|
||||
\begin{verbatim}
|
||||
nNol : Str -> N ;
|
||||
\end{verbatim}
|
||||
masculine, inanimate, ending with "{\cyr -en\cyrsftsn{}}":
|
||||
\begin{verbatim}
|
||||
nUroven : Str -> N ;
|
||||
\end{verbatim}
|
||||
|
||||
Nouns used as functions need a preposition. The most common is with Genitive.
|
||||
\begin{verbatim}
|
||||
mkFun : N -> Prep -> N2 ;
|
||||
mkN2 : N -> N2 ;
|
||||
mkN3 : N -> Prep -> Prep -> N3 ;
|
||||
\end{verbatim}
|
||||
|
||||
\subsubsection{Proper names}
|
||||
|
||||
{\cyr Ivan, Masha}:
|
||||
\begin{verbatim}
|
||||
mkPN : Str -> Gender -> Animacy -> PN ;
|
||||
\end{verbatim}
|
||||
\begin{verbatim}
|
||||
nounPN : N -> PN ;
|
||||
\end{verbatim}
|
||||
|
||||
On the top level, it is maybe {\tt CN} that is used rather than {\tt N}, and
|
||||
{\tt NP} rather than {\tt PN}.
|
||||
\begin{verbatim}
|
||||
mkCN : N -> CN ;
|
||||
mkNP : Str -> Gender -> Animacy -> NP ;
|
||||
\end{verbatim}
|
||||
|
||||
\textbf{Adjectives}
|
||||
Non-comparison (only positive degree) one-place adjectives need 28
|
||||
(4 by 7) forms in the worst case: (Masculine | Feminine | Neutral
|
||||
| Plural) * (Nominative | Genitive | Dative | Accusative Inanimate
|
||||
| Accusative Animate | Instructive | Prepositional). Notice that 4
|
||||
short forms, which exist for some adjectives are not included in
|
||||
the current description, otherwise there would be 32 forms for
|
||||
positive degree.
|
||||
|
||||
The regular function captures the variants for some popular
|
||||
adjective endings below. The first string agrument is the
|
||||
masculine singular form, the second is comparative:
|
||||
\begin{verbatim}
|
||||
regA : Str -> Str -> A ;
|
||||
\end{verbatim}
|
||||
|
||||
\noindent Invariable adjective is a special case: {\cyr haki,
|
||||
mini, hindi, netto}:
|
||||
\begin{verbatim}
|
||||
adjInvar : Str -> A ;
|
||||
\end{verbatim}
|
||||
|
||||
Some regular patterns depending on the ending.
|
||||
|
||||
\noindent ending with "{\cyr y\cyrishrt{}}":
|
||||
\begin{verbatim}
|
||||
AStaruyj : Str -> Str -> A ;
|
||||
\end{verbatim}
|
||||
ending with "{\cyr i\cyrishrt{}}", Gen - "{\cyr
|
||||
malen\cyrsftsn{}k-ogo}":
|
||||
\begin{verbatim}
|
||||
AMalenkij : Str -> Str -> A ;
|
||||
\end{verbatim}
|
||||
ending with "{\cyr i\cyrishrt{}}", Gen - "{\cyr horosh-ego}":
|
||||
\begin{verbatim}
|
||||
AKhoroshij : Str -> Str -> A ;
|
||||
\end{verbatim}
|
||||
ending with "{\cyr o\cyrishrt{}}", plural - "{\cyr molod-ye}":
|
||||
\begin{verbatim}
|
||||
AMolodoj : Str -> Str -> A ;
|
||||
\end{verbatim}
|
||||
ending with "{\cyr o\cyrishrt{}}", plural - "{\cyr kak-ie}":
|
||||
\begin{verbatim}
|
||||
AKakoj_Nibud : Str -> Str -> Str -> A ;
|
||||
\end{verbatim}
|
||||
|
||||
Two-place adjectives need a preposition and a case as extra arguments.
|
||||
|
||||
"{\cyr delim na}":
|
||||
\begin{verbatim}
|
||||
mkA2 : A -> Str -> Case -> A2 ;
|
||||
\end{verbatim}
|
||||
|
||||
Comparison adjectives need a positive adjective (28 forms without
|
||||
short forms). Taking only one comparative form (non-syntactic) and
|
||||
only one superlative form (syntactic) we can produce the
|
||||
comparison adjective with only one extra argument - non-syntactic
|
||||
comparative form. Syntactic forms are based on the positive forms.
|
||||
|
||||
\begin{verbatim}
|
||||
mkADeg : A -> Str -> ADeg ;
|
||||
\end{verbatim}
|
||||
On top level, there are adjectival phrases. The most common case
|
||||
is just to use a one-place adjective.
|
||||
\begin{verbatim}
|
||||
ap : A -> IsPostfixAdj -> AP ;
|
||||
\end{verbatim}
|
||||
|
||||
\textbf{Adverbs}
|
||||
Adverbs are not inflected. %Most lexical ones have position after the verb. Some can be preverbal (e.g. {\it always}).
|
||||
\begin{verbatim}
|
||||
mkAdv : Str -> Adv ;
|
||||
\end{verbatim}
|
||||
|
||||
\textbf{Verbs}
|
||||
|
||||
In our lexicon description ({\it Verbum}) there are 62 forms: 2
|
||||
(Voice) by { 1 (infinitive) + [2(number) by 3
|
||||
(person)](imperative) + [ [2(Number) by 3(Person)](present) +
|
||||
[2(Number) by 3(Person)](future) + 4(GenNum)(past) ](indicative)+
|
||||
4 (GenNum) (subjunctive) } Participles (Present and Past) and
|
||||
Gerund forms are not included, since they fuction more like
|
||||
Adjectives and Adverbs correspondingly rather than verbs. Aspect
|
||||
is regarded as an inherent parameter of a verb. Notice, that some
|
||||
forms are never used for some verbs. %Actually, the majority of verbs do not have many of the forms.
|
||||
\begin{verbatim}
|
||||
Voice: Type;
|
||||
Aspect: Type;
|
||||
Tense : Type;
|
||||
Bool: Type;
|
||||
Conjugation: Type ;
|
||||
\end{verbatim}
|
||||
"{\cyr gulya-Esh\cyrsftsn{}, gulya-Em}":
|
||||
\begin{verbatim}
|
||||
first: Conjugation;
|
||||
\end{verbatim}
|
||||
|
||||
\noindent Verbs with vowel "{\cyr \cyryo}": "{\cyr
|
||||
da\cyryo{}sh\cyrsftsn{}}" (give), "{\cyr
|
||||
p\cyrsftsn{}\cyryo{}sh\cyrsftsn{}}" (drink):
|
||||
\begin{verbatim}
|
||||
firstE: Conjugation;
|
||||
\end{verbatim}
|
||||
|
||||
\noindent "{\cyr vid-Ish\cyrsftsn{}, vid-Im}":
|
||||
\begin{verbatim}
|
||||
second: Conjugation;
|
||||
\end{verbatim}
|
||||
"{\cyr hoch-Esh\cyrsftsn{}, hot-Im}":
|
||||
\begin{verbatim}
|
||||
mixed: Conjugation;
|
||||
\end{verbatim}
|
||||
irregular:
|
||||
\begin{verbatim}
|
||||
dolzhen: Conjugation;
|
||||
|
||||
true: Bool;
|
||||
false: Bool;
|
||||
|
||||
active: Voice ;
|
||||
passive: Voice ;
|
||||
imperfective: Aspect;
|
||||
perfective: Aspect ;
|
||||
\end{verbatim}
|
||||
|
||||
The worst case need 6 forms of the present tense in indicative
|
||||
mood ({\cyr ya begu}, {\cyr ty bezhish\cyrsftsn{}}, {\cyr on
|
||||
bezhit}, {\cyr my bezhim}, {\cyr vy bezhite}, {\cyr oni begut}), a
|
||||
past form (singular, masculine: {\cyr ya bezhal}), an imperative
|
||||
form (singular, second person: {\cyr begi}), an infinitive ({\cyr
|
||||
bezhat\cyrsftsn{}}). Inherent aspect should also be specified.
|
||||
\begin{verbatim}
|
||||
mkVerbum : Aspect -> (presentSgP1,presentSgP2,presentSgP3,
|
||||
presentPlP1,presentPlP2,presentPlP3,
|
||||
pastSgMasculine,imperative,infinitive: Str) -> V ;
|
||||
\end{verbatim}
|
||||
|
||||
Common conjugation patterns are two conjugations: first - verbs
|
||||
ending with {\cyr -at\cyrsftsn{}/-yat\cyrsftsn{}} and second -
|
||||
{\cyr -it\cyrsftsn{}/-et\cyrsftsn{}}. Instead of 6 present forms
|
||||
of the worst case, we only need a present stem and one ending
|
||||
(singular, first person): {\cyr ya l\cyryu{}bl\cyryu{}}, {\cyr ya
|
||||
zhdu}, etc. To determine where the border between stem and ending
|
||||
lies it is sufficient to compare first person from with second
|
||||
person form: {\cyr ya l\cyryu{}bl\cyryu{}}, {\cyr ty
|
||||
l\cyryu{}bish\cyrsftsn{}}. Stems should be the same. So the
|
||||
definition for verb {\cyr l\cyryu{}bit\cyrsftsn{}} looks like:
|
||||
\texttt{regV Imperfective Second }"{\cyr l\cyryu{}b}" "{\cyr
|
||||
l\cyryu{}}" "{\cyr l\cyryu{}bil}" "{\cyr l\cyryu{}bi}" "{\cyr
|
||||
l\cyryu{}bit\cyrsftsn{}}";
|
||||
\begin{verbatim}
|
||||
regV : Aspect -> Conjugation -> (stemPresentSgP1,
|
||||
endingPresentSgP1,pastSgP1,imperative,infinitive: Str) -> V ;
|
||||
\end{verbatim}
|
||||
|
||||
For writing an application grammar one usually doesn't need the
|
||||
whole inflection table, since each verb is used in a particular
|
||||
context that determines some of the parameters (Tense and Voice
|
||||
while Aspect is fixed from the beginning) for certain usage. The
|
||||
{\it V} type, that have these parameters fixed. We can extract the
|
||||
{\it V} from the lexicon.
|
||||
\begin{verbatim}
|
||||
mkV : Verbum -> Voice -> V ;
|
||||
mkPresentV : Verbum -> Voice -> V ;
|
||||
\end{verbatim}
|
||||
|
||||
Two-place verbs, and the special case with direct object. Notice
|
||||
that a particle can be included in a {\tt V}.
|
||||
|
||||
\noindent "{\cyr vo\cyrishrt{}ti v dom}", "{\cyr v}", accusative:
|
||||
\begin{verbatim}
|
||||
mkV2 : V -> Str -> Case -> V2 ;
|
||||
\end{verbatim}
|
||||
{\cyr slozhit\cyrsftsn{} pic\cyrsftsn{}mo v konvert}:
|
||||
\begin{verbatim}
|
||||
mkV3 : V -> Str -> Str -> Case -> Case -> V3 ;
|
||||
\end{verbatim}
|
||||
"{\cyr videt\cyrsftsn{}}", "{\cyr l\cyryu{}bit\cyrsftsn{}}":
|
||||
\begin{verbatim}
|
||||
dirV2 : V -> V2 ;
|
||||
tvDirDir : V -> V3 ;
|
||||
\end{verbatim}
|
||||
|
||||
The definitions should not bother the user of the API. So they are
|
||||
hidden from the document.
|
||||
7
doc/resource-preamble
Normal file
7
doc/resource-preamble
Normal file
@@ -0,0 +1,7 @@
|
||||
\documentclass[11pt,a4paper]{article}
|
||||
|
||||
\usepackage[T2A,OT1]{fontenc}
|
||||
\usepackage[ot2enc]{inputenc}
|
||||
\usepackage[russian,german,french,english]{babel}
|
||||
\usepackage{isolatin1} % user defined package
|
||||
|
||||
BIN
doc/resource.pdf
BIN
doc/resource.pdf
Binary file not shown.
@@ -16,7 +16,7 @@ Last update: %%date(%c)
|
||||
%!postproc(tex): "#CAPTION" "caption{"
|
||||
%!postproc(tex): "#RBRACE" "end{figure}"
|
||||
%!postproc(tex): "#CLEARPAGE" "clearpage"
|
||||
|
||||
%!postproc(tex): "#PARADIGMSRUS" "input{ParadigmsRus.tex}"
|
||||
%!target:tex
|
||||
|
||||
#CLEARPAGE
|
||||
@@ -956,6 +956,8 @@ has only been exploited in a very small scale so far.
|
||||
|
||||
% %!include: ""./ParadigmsRus.tex""
|
||||
|
||||
#PARADIGMSRUS
|
||||
|
||||
===Spanish===
|
||||
|
||||
%!include: ../lib/resource-1.0/spanish/ParadigmsSpa.txt
|
||||
|
||||
Reference in New Issue
Block a user