integrating russian in document

2026-05-26 03:08:55 -06:00 · 2006-06-22 13:57:36 +00:00
parent 56aa14bf60
commit 5382e222d3
5 changed files with 437 additions and 1 deletions
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -3,9 +3,12 @@ resource:
 	gfdoc -txt2 ../lib/resource-1.0/*/Paradigms*.gf
 	txt2tags --toc resource.txt
 #	cat resource-preamble resource.tex >final-resource.tex
+	sed -i 's/\\docum/%\\docum/g' resource.tex
 	sed -i 's/ion\*{/ion{/g' resource.tex
 	sed -i 's/\\paragraph{}//g' resource.tex
 	sed -i 's/}\\\\/}/g' resource.tex
+	cat resource-preamble resource.tex >resource.tmp
+	mv resource.tmp resource.tex
 	latex resource.tex
 	latex resource.tex
 	dvipdf resource.dvi
--- a/doc/ParadigmsRus.tex
+++ b/doc/ParadigmsRus.tex
@@ -0,0 +1,424 @@
+This is an API for the user of the resource grammar for adding
+lexical items. It gives functions for forming expressions of open
+categories: nouns, adjectives, verbs.
+
+Closed categories (determiners, pronouns, conjunctions) are
+accessed through the resource syntax API, {\tt Structural.gf}.
+
+The main difference with {\tt MorphoRus.gf} is that the types
+referred to are compiled resource grammar types. We have moreover
+had the design principle of always having existing forms, rather
+than stems, as string arguments of the paradigms.
+
+The structure of functions for each word class {\tt C} is the following:
+first we give a handful of patterns that aim to cover all
+regular cases. Then we give a worst-case function {\tt mkC}, which serves as an
+escape to construct the most irregular words of type {\tt C}.
+%However, this function should only seldom be needed: we have a
+%separate module {\tt IrregularEng}, which covers all irregularly inflected
+words.
+
+The following modules are presupposed:
+\begin{verbatim}
+  resource ParadigmsRus = open
+    (Predef=Predef),
+    Prelude,
+    MorphoRus,
+    CatRus,
+    NounRus
+    in {
+
+  flags  coding=utf8 ;
+\end{verbatim}
+
+\textbf{Parameters}
+
+To abstract over gender names, we define the following identifiers.
+\begin{verbatim}
+  oper
+    Gender : Type ;
+
+    masculine : Gender ;
+    feminine  : Gender ;
+    neuter    : Gender ;
+\end{verbatim}
+
+To abstract over case names, we define the following.
+\begin{verbatim}
+    Case : Type ;
+
+    nominative    : Case ;
+    genitive      : Case ;
+    dative        : Case ;
+    accusative    : Case ;
+    instructive   : Case ;
+    prepositional : Case ;
+\end{verbatim}
+
+In some (written in English) textbooks accusative case
+is put on the second place. However, we follow the case order
+standard for Russian textbooks.
+To abstract over number names, we define the following.
+\begin{verbatim}
+    Number : Type ;
+
+    singular : Number ;
+    plural   : Number ;
+
+    Animacy: Type ;
+
+    animate: Animacy;
+    inanimate: Animacy;
+\end{verbatim}
+
+\textbf{Nouns}
+Best case: indeclinabe nouns: {\cyr kofe}, {\cyr
+pal\cyrsftsn{}to}, {\cyr VUZ}.
+\begin{verbatim}
+     mkIndeclinableNoun: Str -> Gender -> Animacy -> N ;
+\end{verbatim}
+
+Worst case - give six singular forms:
+Nominative, Genetive, Dative, Accusative, Instructive and Prepositional;
+corresponding six plural forms and the gender.
+May be the number of forms needed can be reduced,
+but this requires a separate investigation.
+Animacy parameter (determining whether the Accusative form is equal
+to the Nominative or the Genetive one) is actually of no help,
+since there are a lot of exceptions and the gain is just one form less.
+\begin{verbatim}
+     mkN  : (nomSg, genSg, datSg, accSg, instSg, preposSg,
+             nomPl, genPl, datPl, accPl, instPl, preposPl: Str)
+             -> Gender -> Animacy -> N ;
+\end{verbatim}
+({\cyr \em muzhchina, muzhchinu, muzhchine, muzhchinu,
+muzhchino\cyrishrt{}, muzhchine}
+
+\noindent {\cyr \em muzhchin\cyrery{}, muzhchin, muzhchinam,
+muzhchin, muzhchinami, muzhchinah})
+
+\vspace{5mm}
+
+The regular function captures the variants for some popular nouns
+endings from the list below:
+\begin{verbatim}
+     regN             : Str -> N ;
+\end{verbatim}
+
+Here are some common patterns. The list is far from complete.
+
+\subsubsection{Feminine patterns}
+
+\noindent feminine, inanimate, ending with "-a", Inst -"{\cyr
+mashin-o\cyrishrt{}}":
+\begin{verbatim}
+     nMashina         : Str -> N ;
+\end{verbatim}
+feminine, inanimate, ending with "-a", Inst -"{\cyr
+edinic-e\cyrishrt{}}":
+\begin{verbatim}
+     nEdinica         : Str -> N ;
+\end{verbatim}
+feminine, animate, ending with "-a":
+\begin{verbatim}
+     nZhenchina       : Str -> N ;
+\end{verbatim}
+feminine, inanimate, ending with "{\cyr g\_k\_h-a}":
+\begin{verbatim}
+     nNoga            : Str -> N ;
+\end{verbatim}
+feminine, inanimate, ending with "-{\cyr -iya}":
+\begin{verbatim}
+     nMalyariya       : Str -> N ;
+\end{verbatim}
+feminine, animate, ending with "{\cyr -ya}":
+\begin{verbatim}
+     nTetya           : Str -> N ;
+\end{verbatim}
+feminine, inanimate, ending with "-{\cyr \cyrsftsn{}}"(soft sign):
+\begin{verbatim}
+     nBol             : Str -> N ;
+\end{verbatim}
+
+\subsubsection{Neuter patterns}
+
+\noindent neutral, inanimate, ending with "-ee":
+\begin{verbatim}
+     nObezbolivauchee : Str -> N ;
+\end{verbatim}
+neutral, inanimate, ending with "-e":
+\begin{verbatim}
+     nProizvedenie    : Str -> N ;
+\end{verbatim}
+neutral, inanimate, ending with "-o":
+\begin{verbatim}
+     nChislo          : Str -> N ;
+\end{verbatim}
+neutral, inanimate, ending with "-{\cyr oe}":
+\begin{verbatim}
+     nZhivotnoe       : Str -> N ;
+\end{verbatim}
+
+\subsubsection{Masculine patterns}
+
+\noindent Ending with consonant:
+
+\noindent masculine, inanimate, ending with "-{\cyr el}"- "{\cyr
+pep-la}":
+\begin{verbatim}
+     nPepel           : Str -> N ;
+\end{verbatim}
+animate, "{\cyr brat-\cyrsftsn{}ya}":
+\begin{verbatim}
+     nBrat            : Str -> N ;
+\end{verbatim}
+same as above, but inanimate:
+\begin{verbatim}
+     nStul            : Str -> N ;
+\end{verbatim}
+"{\cyr malyshe\cyrishrt{}}":
+\begin{verbatim}
+     nMalush          : Str -> N ;
+\end{verbatim}
+"{\cyr potol-ok, potol-ka}"
+\begin{verbatim}
+     nPotolok         : Str -> N ;
+\end{verbatim}
+the next four differ in plural nominative and/or accusative
+form(s):
+
+\noindent {\cyr bank-i}(Nom=Acc):
+\begin{verbatim}
+     nBank            : Str -> N ;
+\end{verbatim}
+same as above, but animate:
+\begin{verbatim}
+     nStomatolog      : Str -> N ;
+\end{verbatim}
+"{\cyr adres-a}" (Nom=Acc):
+\begin{verbatim}
+     nAdres           : Str -> N ;
+\end{verbatim}
+"{\cyr telefony}" (Nom=Acc):
+\begin{verbatim}
+     nTelefon         : Str -> N ;
+\end{verbatim}
+masculine, inanimate, ending with "{\cyr \cyrsftsn{}}" (soft
+sign):
+\begin{verbatim}
+     nNol             : Str -> N ;
+\end{verbatim}
+masculine, inanimate, ending with "{\cyr -en\cyrsftsn{}}":
+\begin{verbatim}
+     nUroven          : Str -> N ;
+\end{verbatim}
+
+Nouns used as functions need a preposition. The most common is with Genitive.
+\begin{verbatim}
+     mkFun            : N -> Prep -> N2 ;
+     mkN2             : N -> N2 ;
+     mkN3             : N -> Prep -> Prep -> N3 ;
+\end{verbatim}
+
+\subsubsection{Proper names}
+
+{\cyr Ivan, Masha}:
+\begin{verbatim}
+     mkPN             : Str -> Gender -> Animacy -> PN ;
+\end{verbatim}
+\begin{verbatim}
+     nounPN           : N -> PN ;
+\end{verbatim}
+
+On the top level, it is maybe {\tt CN} that is used rather than {\tt N}, and
+{\tt NP} rather than {\tt PN}.
+\begin{verbatim}
+     mkCN             : N -> CN ;
+     mkNP             : Str -> Gender -> Animacy -> NP ;
+\end{verbatim}
+
+\textbf{Adjectives}
+Non-comparison (only positive degree) one-place adjectives need 28
+(4 by 7) forms in the worst case: (Masculine  | Feminine | Neutral
+| Plural) * (Nominative | Genitive | Dative | Accusative Inanimate
+| Accusative Animate | Instructive | Prepositional). Notice that 4
+short forms, which exist for some adjectives are not included in
+the current description, otherwise there would be 32 forms for
+positive degree.
+
+The regular function captures the variants for some popular
+adjective endings below. The first string agrument is the
+masculine singular form, the second is comparative:
+\begin{verbatim}
+     regA             : Str -> Str -> A ;
+\end{verbatim}
+
+\noindent Invariable adjective is a special case: {\cyr haki,
+mini, hindi, netto}:
+\begin{verbatim}
+     adjInvar         : Str -> A ;
+\end{verbatim}
+
+Some regular patterns depending on the ending.
+
+\noindent ending with "{\cyr y\cyrishrt{}}":
+\begin{verbatim}
+     AStaruyj         : Str -> Str -> A ;
+\end{verbatim}
+ending with "{\cyr i\cyrishrt{}}", Gen - "{\cyr
+malen\cyrsftsn{}k-ogo}":
+\begin{verbatim}
+     AMalenkij        : Str -> Str -> A ;
+\end{verbatim}
+ending with "{\cyr i\cyrishrt{}}", Gen - "{\cyr horosh-ego}":
+\begin{verbatim}
+     AKhoroshij       : Str -> Str -> A ;
+\end{verbatim}
+ending with "{\cyr o\cyrishrt{}}", plural - "{\cyr molod-ye}":
+\begin{verbatim}
+     AMolodoj         : Str -> Str -> A ;
+\end{verbatim}
+ending with "{\cyr o\cyrishrt{}}", plural - "{\cyr kak-ie}":
+\begin{verbatim}
+     AKakoj_Nibud     : Str -> Str -> Str -> A ;
+\end{verbatim}
+
+Two-place adjectives need a preposition and a case as extra arguments.
+
+"{\cyr delim na}":
+\begin{verbatim}
+     mkA2             : A -> Str -> Case -> A2 ;
+\end{verbatim}
+
+Comparison adjectives need a positive adjective (28 forms without
+short forms). Taking only one comparative form (non-syntactic) and
+only one superlative form (syntactic) we can produce the
+comparison adjective with only one extra argument - non-syntactic
+comparative form. Syntactic forms are based on the positive forms.
+
+\begin{verbatim}
+     mkADeg           : A -> Str -> ADeg ;
+\end{verbatim}
+On top level, there are adjectival phrases. The most common case
+is just to use a one-place adjective.
+\begin{verbatim}
+     ap               : A  -> IsPostfixAdj -> AP ;
+\end{verbatim}
+
+\textbf{Adverbs}
+Adverbs are not inflected. %Most lexical ones have position after the verb. Some can be preverbal (e.g. {\it always}).
+\begin{verbatim}
+     mkAdv            : Str -> Adv ;
+\end{verbatim}
+
+\textbf{Verbs}
+
+In our lexicon description ({\it Verbum}) there are 62 forms: 2
+(Voice) by { 1 (infinitive) + [2(number) by 3
+(person)](imperative) + [ [2(Number) by 3(Person)](present) +
+[2(Number) by 3(Person)](future) + 4(GenNum)(past) ](indicative)+
+4 (GenNum) (subjunctive) } Participles (Present and Past) and
+Gerund forms are not included, since they fuction more like
+Adjectives and Adverbs correspondingly rather than verbs. Aspect
+is regarded as an inherent parameter of a verb. Notice, that some
+forms are never used for some verbs. %Actually, the majority of verbs do not have many of the forms.
+\begin{verbatim}
+  Voice: Type;
+  Aspect: Type;
+  Tense : Type;
+  Bool: Type;
+  Conjugation: Type ;
+\end{verbatim}
+"{\cyr gulya-Esh\cyrsftsn{}, gulya-Em}":
+\begin{verbatim}
+  first: Conjugation;
+\end{verbatim}
+
+\noindent Verbs with vowel "{\cyr \cyryo}": "{\cyr
+da\cyryo{}sh\cyrsftsn{}}" (give), "{\cyr
+p\cyrsftsn{}\cyryo{}sh\cyrsftsn{}}" (drink):
+\begin{verbatim}
+  firstE: Conjugation;
+\end{verbatim}
+
+\noindent "{\cyr vid-Ish\cyrsftsn{}, vid-Im}":
+\begin{verbatim}
+  second: Conjugation;
+\end{verbatim}
+"{\cyr hoch-Esh\cyrsftsn{}, hot-Im}":
+\begin{verbatim}
+  mixed: Conjugation;
+\end{verbatim}
+irregular:
+\begin{verbatim}
+  dolzhen: Conjugation;
+
+  true: Bool;
+  false: Bool;
+
+  active: Voice ;
+  passive: Voice ;
+  imperfective: Aspect;
+  perfective: Aspect ;
+\end{verbatim}
+
+The worst case need 6 forms of the present tense in indicative
+mood ({\cyr ya begu}, {\cyr ty bezhish\cyrsftsn{}}, {\cyr on
+bezhit}, {\cyr my bezhim}, {\cyr vy bezhite}, {\cyr oni begut}), a
+past form (singular, masculine: {\cyr ya bezhal}), an imperative
+form (singular, second person: {\cyr begi}), an infinitive ({\cyr
+bezhat\cyrsftsn{}}). Inherent aspect should also be specified.
+\begin{verbatim}
+     mkVerbum : Aspect -> (presentSgP1,presentSgP2,presentSgP3,
+                           presentPlP1,presentPlP2,presentPlP3,
+           pastSgMasculine,imperative,infinitive: Str) -> V ;
+\end{verbatim}
+
+Common conjugation patterns are two conjugations: first - verbs
+ending with {\cyr -at\cyrsftsn{}/-yat\cyrsftsn{}} and second -
+{\cyr -it\cyrsftsn{}/-et\cyrsftsn{}}. Instead of 6 present forms
+of the worst case, we only need a present stem and one ending
+(singular, first person): {\cyr ya l\cyryu{}bl\cyryu{}}, {\cyr ya
+zhdu}, etc. To determine where the border between stem and ending
+lies it is sufficient to compare first person from with second
+person form: {\cyr ya l\cyryu{}bl\cyryu{}}, {\cyr ty
+l\cyryu{}bish\cyrsftsn{}}. Stems should be the same. So the
+definition for verb {\cyr l\cyryu{}bit\cyrsftsn{}} looks like:
+\texttt{regV Imperfective Second }"{\cyr l\cyryu{}b}" "{\cyr
+l\cyryu{}}" "{\cyr l\cyryu{}bil}" "{\cyr l\cyryu{}bi}" "{\cyr
+l\cyryu{}bit\cyrsftsn{}}";
+\begin{verbatim}
+     regV : Aspect -> Conjugation -> (stemPresentSgP1,
+       endingPresentSgP1,pastSgP1,imperative,infinitive: Str) -> V ;
+\end{verbatim}
+
+For writing an application grammar one usually doesn't need the
+whole inflection table, since each verb is used in a particular
+context that determines some of the parameters (Tense and Voice
+while Aspect is fixed from the beginning) for certain usage. The
+{\it V} type, that have these parameters fixed. We can extract the
+{\it V} from the lexicon.
+\begin{verbatim}
+     mkV              : Verbum -> Voice ->  V ;
+     mkPresentV       : Verbum -> Voice -> V ;
+\end{verbatim}
+
+Two-place verbs, and the special case with direct object. Notice
+that a particle can be included in a {\tt V}.
+
+\noindent "{\cyr vo\cyrishrt{}ti v dom}", "{\cyr v}", accusative:
+\begin{verbatim}
+     mkV2             : V   -> Str -> Case -> V2 ;
+\end{verbatim}
+{\cyr slozhit\cyrsftsn{} pic\cyrsftsn{}mo v konvert}:
+\begin{verbatim}
+     mkV3             : V -> Str -> Str -> Case -> Case -> V3 ;
+\end{verbatim}
+"{\cyr videt\cyrsftsn{}}", "{\cyr l\cyryu{}bit\cyrsftsn{}}":
+\begin{verbatim}
+     dirV2            : V -> V2 ;
+     tvDirDir         : V -> V3 ;
+\end{verbatim}
+
+The definitions should not bother the user of the API. So they are
+hidden from the document.
--- a/doc/resource-preamble
+++ b/doc/resource-preamble
@@ -0,0 +1,7 @@
+\documentclass[11pt,a4paper]{article}
+
+\usepackage[T2A,OT1]{fontenc}
+\usepackage[ot2enc]{inputenc}
+\usepackage[russian,german,french,english]{babel}
+\usepackage{isolatin1}  % user defined package
+
--- a/doc/resource.pdf
+++ b/doc/resource.pdf
--- a/doc/resource.txt
+++ b/doc/resource.txt
@@ -16,7 +16,7 @@ Last update: %%date(%c)
 %!postproc(tex): "#CAPTION" "caption{"
 %!postproc(tex): "#RBRACE" "end{figure}"
 %!postproc(tex): "#CLEARPAGE" "clearpage"
-
+%!postproc(tex): "#PARADIGMSRUS" "input{ParadigmsRus.tex}"
 %!target:tex

 #CLEARPAGE
@@ -956,6 +956,8 @@ has only been exploited in a very small scale so far.

 % %!include: ""./ParadigmsRus.tex""

+#PARADIGMSRUS
+
 ===Spanish===

 %!include: ../lib/resource-1.0/spanish/ParadigmsSpa.txt