diff --git a/doc/Makefile b/doc/Makefile index 5c3a92168..9c0c735be 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -3,9 +3,12 @@ resource: gfdoc -txt2 ../lib/resource-1.0/*/Paradigms*.gf txt2tags --toc resource.txt # cat resource-preamble resource.tex >final-resource.tex + sed -i 's/\\docum/%\\docum/g' resource.tex sed -i 's/ion\*{/ion{/g' resource.tex sed -i 's/\\paragraph{}//g' resource.tex sed -i 's/}\\\\/}/g' resource.tex + cat resource-preamble resource.tex >resource.tmp + mv resource.tmp resource.tex latex resource.tex latex resource.tex dvipdf resource.dvi diff --git a/doc/ParadigmsRus.tex b/doc/ParadigmsRus.tex new file mode 100644 index 000000000..950c88965 --- /dev/null +++ b/doc/ParadigmsRus.tex @@ -0,0 +1,424 @@ +This is an API for the user of the resource grammar for adding +lexical items. It gives functions for forming expressions of open +categories: nouns, adjectives, verbs. + +Closed categories (determiners, pronouns, conjunctions) are +accessed through the resource syntax API, {\tt Structural.gf}. + +The main difference with {\tt MorphoRus.gf} is that the types +referred to are compiled resource grammar types. We have moreover +had the design principle of always having existing forms, rather +than stems, as string arguments of the paradigms. + +The structure of functions for each word class {\tt C} is the following: +first we give a handful of patterns that aim to cover all +regular cases. Then we give a worst-case function {\tt mkC}, which serves as an +escape to construct the most irregular words of type {\tt C}. +%However, this function should only seldom be needed: we have a +%separate module {\tt IrregularEng}, which covers all irregularly inflected +words. + +The following modules are presupposed: +\begin{verbatim} + resource ParadigmsRus = open + (Predef=Predef), + Prelude, + MorphoRus, + CatRus, + NounRus + in { + + flags coding=utf8 ; +\end{verbatim} + +\textbf{Parameters} + +To abstract over gender names, we define the following identifiers. +\begin{verbatim} + oper + Gender : Type ; + + masculine : Gender ; + feminine : Gender ; + neuter : Gender ; +\end{verbatim} + +To abstract over case names, we define the following. +\begin{verbatim} + Case : Type ; + + nominative : Case ; + genitive : Case ; + dative : Case ; + accusative : Case ; + instructive : Case ; + prepositional : Case ; +\end{verbatim} + +In some (written in English) textbooks accusative case +is put on the second place. However, we follow the case order +standard for Russian textbooks. +To abstract over number names, we define the following. +\begin{verbatim} + Number : Type ; + + singular : Number ; + plural : Number ; + + Animacy: Type ; + + animate: Animacy; + inanimate: Animacy; +\end{verbatim} + +\textbf{Nouns} +Best case: indeclinabe nouns: {\cyr kofe}, {\cyr +pal\cyrsftsn{}to}, {\cyr VUZ}. +\begin{verbatim} + mkIndeclinableNoun: Str -> Gender -> Animacy -> N ; +\end{verbatim} + +Worst case - give six singular forms: +Nominative, Genetive, Dative, Accusative, Instructive and Prepositional; +corresponding six plural forms and the gender. +May be the number of forms needed can be reduced, +but this requires a separate investigation. +Animacy parameter (determining whether the Accusative form is equal +to the Nominative or the Genetive one) is actually of no help, +since there are a lot of exceptions and the gain is just one form less. +\begin{verbatim} + mkN : (nomSg, genSg, datSg, accSg, instSg, preposSg, + nomPl, genPl, datPl, accPl, instPl, preposPl: Str) + -> Gender -> Animacy -> N ; +\end{verbatim} +({\cyr \em muzhchina, muzhchinu, muzhchine, muzhchinu, +muzhchino\cyrishrt{}, muzhchine} + +\noindent {\cyr \em muzhchin\cyrery{}, muzhchin, muzhchinam, +muzhchin, muzhchinami, muzhchinah}) + +\vspace{5mm} + +The regular function captures the variants for some popular nouns +endings from the list below: +\begin{verbatim} + regN : Str -> N ; +\end{verbatim} + +Here are some common patterns. The list is far from complete. + +\subsubsection{Feminine patterns} + +\noindent feminine, inanimate, ending with "-a", Inst -"{\cyr +mashin-o\cyrishrt{}}": +\begin{verbatim} + nMashina : Str -> N ; +\end{verbatim} +feminine, inanimate, ending with "-a", Inst -"{\cyr +edinic-e\cyrishrt{}}": +\begin{verbatim} + nEdinica : Str -> N ; +\end{verbatim} +feminine, animate, ending with "-a": +\begin{verbatim} + nZhenchina : Str -> N ; +\end{verbatim} +feminine, inanimate, ending with "{\cyr g\_k\_h-a}": +\begin{verbatim} + nNoga : Str -> N ; +\end{verbatim} +feminine, inanimate, ending with "-{\cyr -iya}": +\begin{verbatim} + nMalyariya : Str -> N ; +\end{verbatim} +feminine, animate, ending with "{\cyr -ya}": +\begin{verbatim} + nTetya : Str -> N ; +\end{verbatim} +feminine, inanimate, ending with "-{\cyr \cyrsftsn{}}"(soft sign): +\begin{verbatim} + nBol : Str -> N ; +\end{verbatim} + +\subsubsection{Neuter patterns} + +\noindent neutral, inanimate, ending with "-ee": +\begin{verbatim} + nObezbolivauchee : Str -> N ; +\end{verbatim} +neutral, inanimate, ending with "-e": +\begin{verbatim} + nProizvedenie : Str -> N ; +\end{verbatim} +neutral, inanimate, ending with "-o": +\begin{verbatim} + nChislo : Str -> N ; +\end{verbatim} +neutral, inanimate, ending with "-{\cyr oe}": +\begin{verbatim} + nZhivotnoe : Str -> N ; +\end{verbatim} + +\subsubsection{Masculine patterns} + +\noindent Ending with consonant: + +\noindent masculine, inanimate, ending with "-{\cyr el}"- "{\cyr +pep-la}": +\begin{verbatim} + nPepel : Str -> N ; +\end{verbatim} +animate, "{\cyr brat-\cyrsftsn{}ya}": +\begin{verbatim} + nBrat : Str -> N ; +\end{verbatim} +same as above, but inanimate: +\begin{verbatim} + nStul : Str -> N ; +\end{verbatim} +"{\cyr malyshe\cyrishrt{}}": +\begin{verbatim} + nMalush : Str -> N ; +\end{verbatim} +"{\cyr potol-ok, potol-ka}" +\begin{verbatim} + nPotolok : Str -> N ; +\end{verbatim} +the next four differ in plural nominative and/or accusative +form(s): + +\noindent {\cyr bank-i}(Nom=Acc): +\begin{verbatim} + nBank : Str -> N ; +\end{verbatim} +same as above, but animate: +\begin{verbatim} + nStomatolog : Str -> N ; +\end{verbatim} +"{\cyr adres-a}" (Nom=Acc): +\begin{verbatim} + nAdres : Str -> N ; +\end{verbatim} +"{\cyr telefony}" (Nom=Acc): +\begin{verbatim} + nTelefon : Str -> N ; +\end{verbatim} +masculine, inanimate, ending with "{\cyr \cyrsftsn{}}" (soft +sign): +\begin{verbatim} + nNol : Str -> N ; +\end{verbatim} +masculine, inanimate, ending with "{\cyr -en\cyrsftsn{}}": +\begin{verbatim} + nUroven : Str -> N ; +\end{verbatim} + +Nouns used as functions need a preposition. The most common is with Genitive. +\begin{verbatim} + mkFun : N -> Prep -> N2 ; + mkN2 : N -> N2 ; + mkN3 : N -> Prep -> Prep -> N3 ; +\end{verbatim} + +\subsubsection{Proper names} + +{\cyr Ivan, Masha}: +\begin{verbatim} + mkPN : Str -> Gender -> Animacy -> PN ; +\end{verbatim} +\begin{verbatim} + nounPN : N -> PN ; +\end{verbatim} + +On the top level, it is maybe {\tt CN} that is used rather than {\tt N}, and +{\tt NP} rather than {\tt PN}. +\begin{verbatim} + mkCN : N -> CN ; + mkNP : Str -> Gender -> Animacy -> NP ; +\end{verbatim} + +\textbf{Adjectives} +Non-comparison (only positive degree) one-place adjectives need 28 +(4 by 7) forms in the worst case: (Masculine | Feminine | Neutral +| Plural) * (Nominative | Genitive | Dative | Accusative Inanimate +| Accusative Animate | Instructive | Prepositional). Notice that 4 +short forms, which exist for some adjectives are not included in +the current description, otherwise there would be 32 forms for +positive degree. + +The regular function captures the variants for some popular +adjective endings below. The first string agrument is the +masculine singular form, the second is comparative: +\begin{verbatim} + regA : Str -> Str -> A ; +\end{verbatim} + +\noindent Invariable adjective is a special case: {\cyr haki, +mini, hindi, netto}: +\begin{verbatim} + adjInvar : Str -> A ; +\end{verbatim} + +Some regular patterns depending on the ending. + +\noindent ending with "{\cyr y\cyrishrt{}}": +\begin{verbatim} + AStaruyj : Str -> Str -> A ; +\end{verbatim} +ending with "{\cyr i\cyrishrt{}}", Gen - "{\cyr +malen\cyrsftsn{}k-ogo}": +\begin{verbatim} + AMalenkij : Str -> Str -> A ; +\end{verbatim} +ending with "{\cyr i\cyrishrt{}}", Gen - "{\cyr horosh-ego}": +\begin{verbatim} + AKhoroshij : Str -> Str -> A ; +\end{verbatim} +ending with "{\cyr o\cyrishrt{}}", plural - "{\cyr molod-ye}": +\begin{verbatim} + AMolodoj : Str -> Str -> A ; +\end{verbatim} +ending with "{\cyr o\cyrishrt{}}", plural - "{\cyr kak-ie}": +\begin{verbatim} + AKakoj_Nibud : Str -> Str -> Str -> A ; +\end{verbatim} + +Two-place adjectives need a preposition and a case as extra arguments. + +"{\cyr delim na}": +\begin{verbatim} + mkA2 : A -> Str -> Case -> A2 ; +\end{verbatim} + +Comparison adjectives need a positive adjective (28 forms without +short forms). Taking only one comparative form (non-syntactic) and +only one superlative form (syntactic) we can produce the +comparison adjective with only one extra argument - non-syntactic +comparative form. Syntactic forms are based on the positive forms. + +\begin{verbatim} + mkADeg : A -> Str -> ADeg ; +\end{verbatim} +On top level, there are adjectival phrases. The most common case +is just to use a one-place adjective. +\begin{verbatim} + ap : A -> IsPostfixAdj -> AP ; +\end{verbatim} + +\textbf{Adverbs} +Adverbs are not inflected. %Most lexical ones have position after the verb. Some can be preverbal (e.g. {\it always}). +\begin{verbatim} + mkAdv : Str -> Adv ; +\end{verbatim} + +\textbf{Verbs} + +In our lexicon description ({\it Verbum}) there are 62 forms: 2 +(Voice) by { 1 (infinitive) + [2(number) by 3 +(person)](imperative) + [ [2(Number) by 3(Person)](present) + +[2(Number) by 3(Person)](future) + 4(GenNum)(past) ](indicative)+ +4 (GenNum) (subjunctive) } Participles (Present and Past) and +Gerund forms are not included, since they fuction more like +Adjectives and Adverbs correspondingly rather than verbs. Aspect +is regarded as an inherent parameter of a verb. Notice, that some +forms are never used for some verbs. %Actually, the majority of verbs do not have many of the forms. +\begin{verbatim} + Voice: Type; + Aspect: Type; + Tense : Type; + Bool: Type; + Conjugation: Type ; +\end{verbatim} +"{\cyr gulya-Esh\cyrsftsn{}, gulya-Em}": +\begin{verbatim} + first: Conjugation; +\end{verbatim} + +\noindent Verbs with vowel "{\cyr \cyryo}": "{\cyr +da\cyryo{}sh\cyrsftsn{}}" (give), "{\cyr +p\cyrsftsn{}\cyryo{}sh\cyrsftsn{}}" (drink): +\begin{verbatim} + firstE: Conjugation; +\end{verbatim} + +\noindent "{\cyr vid-Ish\cyrsftsn{}, vid-Im}": +\begin{verbatim} + second: Conjugation; +\end{verbatim} +"{\cyr hoch-Esh\cyrsftsn{}, hot-Im}": +\begin{verbatim} + mixed: Conjugation; +\end{verbatim} +irregular: +\begin{verbatim} + dolzhen: Conjugation; + + true: Bool; + false: Bool; + + active: Voice ; + passive: Voice ; + imperfective: Aspect; + perfective: Aspect ; +\end{verbatim} + +The worst case need 6 forms of the present tense in indicative +mood ({\cyr ya begu}, {\cyr ty bezhish\cyrsftsn{}}, {\cyr on +bezhit}, {\cyr my bezhim}, {\cyr vy bezhite}, {\cyr oni begut}), a +past form (singular, masculine: {\cyr ya bezhal}), an imperative +form (singular, second person: {\cyr begi}), an infinitive ({\cyr +bezhat\cyrsftsn{}}). Inherent aspect should also be specified. +\begin{verbatim} + mkVerbum : Aspect -> (presentSgP1,presentSgP2,presentSgP3, + presentPlP1,presentPlP2,presentPlP3, + pastSgMasculine,imperative,infinitive: Str) -> V ; +\end{verbatim} + +Common conjugation patterns are two conjugations: first - verbs +ending with {\cyr -at\cyrsftsn{}/-yat\cyrsftsn{}} and second - +{\cyr -it\cyrsftsn{}/-et\cyrsftsn{}}. Instead of 6 present forms +of the worst case, we only need a present stem and one ending +(singular, first person): {\cyr ya l\cyryu{}bl\cyryu{}}, {\cyr ya +zhdu}, etc. To determine where the border between stem and ending +lies it is sufficient to compare first person from with second +person form: {\cyr ya l\cyryu{}bl\cyryu{}}, {\cyr ty +l\cyryu{}bish\cyrsftsn{}}. Stems should be the same. So the +definition for verb {\cyr l\cyryu{}bit\cyrsftsn{}} looks like: +\texttt{regV Imperfective Second }"{\cyr l\cyryu{}b}" "{\cyr +l\cyryu{}}" "{\cyr l\cyryu{}bil}" "{\cyr l\cyryu{}bi}" "{\cyr +l\cyryu{}bit\cyrsftsn{}}"; +\begin{verbatim} + regV : Aspect -> Conjugation -> (stemPresentSgP1, + endingPresentSgP1,pastSgP1,imperative,infinitive: Str) -> V ; +\end{verbatim} + +For writing an application grammar one usually doesn't need the +whole inflection table, since each verb is used in a particular +context that determines some of the parameters (Tense and Voice +while Aspect is fixed from the beginning) for certain usage. The +{\it V} type, that have these parameters fixed. We can extract the +{\it V} from the lexicon. +\begin{verbatim} + mkV : Verbum -> Voice -> V ; + mkPresentV : Verbum -> Voice -> V ; +\end{verbatim} + +Two-place verbs, and the special case with direct object. Notice +that a particle can be included in a {\tt V}. + +\noindent "{\cyr vo\cyrishrt{}ti v dom}", "{\cyr v}", accusative: +\begin{verbatim} + mkV2 : V -> Str -> Case -> V2 ; +\end{verbatim} +{\cyr slozhit\cyrsftsn{} pic\cyrsftsn{}mo v konvert}: +\begin{verbatim} + mkV3 : V -> Str -> Str -> Case -> Case -> V3 ; +\end{verbatim} +"{\cyr videt\cyrsftsn{}}", "{\cyr l\cyryu{}bit\cyrsftsn{}}": +\begin{verbatim} + dirV2 : V -> V2 ; + tvDirDir : V -> V3 ; +\end{verbatim} + +The definitions should not bother the user of the API. So they are +hidden from the document. diff --git a/doc/resource-preamble b/doc/resource-preamble new file mode 100644 index 000000000..ac3185f42 --- /dev/null +++ b/doc/resource-preamble @@ -0,0 +1,7 @@ +\documentclass[11pt,a4paper]{article} + +\usepackage[T2A,OT1]{fontenc} +\usepackage[ot2enc]{inputenc} +\usepackage[russian,german,french,english]{babel} +\usepackage{isolatin1} % user defined package + diff --git a/doc/resource.pdf b/doc/resource.pdf index 8ba05f891..6c96b8f1a 100644 Binary files a/doc/resource.pdf and b/doc/resource.pdf differ diff --git a/doc/resource.txt b/doc/resource.txt index e601c0254..04427c8f6 100644 --- a/doc/resource.txt +++ b/doc/resource.txt @@ -16,7 +16,7 @@ Last update: %%date(%c) %!postproc(tex): "#CAPTION" "caption{" %!postproc(tex): "#RBRACE" "end{figure}" %!postproc(tex): "#CLEARPAGE" "clearpage" - +%!postproc(tex): "#PARADIGMSRUS" "input{ParadigmsRus.tex}" %!target:tex #CLEARPAGE @@ -956,6 +956,8 @@ has only been exploited in a very small scale so far. % %!include: ""./ParadigmsRus.tex"" +#PARADIGMSRUS + ===Spanish=== %!include: ../lib/resource-1.0/spanish/ParadigmsSpa.txt