1
0
forked from GitHub/gf-core

Significant advances in Maltese verb morphology

This commit is contained in:
john.j.camilleri
2012-07-19 16:19:56 +00:00
parent 4884087c2f
commit 01c58b4e23
10 changed files with 1584 additions and 798 deletions

View File

@@ -6,32 +6,31 @@
--# -path=.:../abstract:../common:../prelude
{-
Verb types summary:
===================
- Strong verb: none of radicals are semi-vowels eg ĦAREĠ (Ħ-R-Ġ)
- Defective verb: third radical is semivowel GĦ eg QATA' (Q-T-GĦ)
- Weak verb: third radical is semivowel J eg MEXA (M-X-J)
- Hollow verb: long A or IE btw radicals 1 & 3 eg QAL (Q-W-L) or SAB (S-J-B)
- Double/Geminated verb: radicals 2 & 3 identical eg ĦABB (Ħ-B-B)
- Quadriliteral verb: 4 radicals eg QARMEĊ (Q-R-M-Ċ)
-}
resource ResMlt = PatternsMlt ** open Prelude in {
resource ResMlt = ParamX - [Tense] ** open Prelude, Predef in {
flags coding=utf8 ;
param
-- Used in the NumeralMlt module
Gender = Masc | Fem ;
NPCase = Nom | Gen ;
{- Numerals -}
CardOrd = NCard | NOrd ;
DTail = T1 | T2 | T3 ; -- This is already defined in ParamX...
Num_Number =
NumSg
| NumDual
| NumPl
Num_Sg
| Num_Dl
| Num_Pl
;
-- oper
-- Num_Number : Type = { n : Number ; isDual : Bool } ;
param
DForm =
Unit -- 0..10
| Teen -- 11-19
@@ -41,32 +40,11 @@ resource ResMlt = PatternsMlt ** open Prelude in {
--| Thou -- 1000+
;
Num_Case =
NumNominative
| NumAdjectival ;
NumNominative -- TNEJN, ĦAMSA, TNAX, MIJA
| NumAdjectival ; -- ŻEWĠ, ĦAMES, TNAX-IL, MITT
{- Nouns -}
{-
Note: NNQ = Non-numerically quantifiable
Nouns can have the following forms:
o Singular
- Singulative (1, >10)
- Collective (NNQ)
o Dual (2)
o Plural
- Determinate (2-10)
- Indeterminate (NNQ)
- Sound
- Broken
- Plural of Plural
Typical combinations thereof (* marks base form):
- Singulative, no plural!
- Singulative*, Plural
- Singulative* (1), Dual (2), Plural (>2)
- Singulative (1, >10), Collective* (NNQ), Determinate Plural (2-10)
- Singulative, Collective*, Determinate Plural, Indeterminate Plural -> very few nouns have these 4 forms
-}
Noun_Sg_Type =
Singulative -- eg ĦUTA
| Collective -- eg ĦUT
@@ -81,17 +59,13 @@ resource ResMlt = PatternsMlt ** open Prelude in {
| Plural Noun_Pl_Type -- eg ĦUTIET / ĦWIET
;
{-
Noun_PluralType =
Sound -- External (affix), eg FERGĦA -> FERGĦAT
| Broken -- Internal, eg FERGĦA -> FRIEGĦI
| Irregular -- eg MARA -> NISA
| PluralOfPlural -- eg TARF -> TRUF -> TRUFIJIET
| Foreign -- eg KARTI, PRATTIĊI, TELEVIXINS
;
-}
NForm =
NRegular -- WIĊĊ
| NPronSuffix Agr ; -- WIĊĊU
Gender = Masc | Fem ;
{- Other... -}
GenNum = GSg Gender | GPl ; -- masc/fem/plural, e.g. adjective inflection
Animacy =
Animate
@@ -101,167 +75,194 @@ resource ResMlt = PatternsMlt ** open Prelude in {
Definiteness =
Definite -- eg IL-KARTA. In this context same as Determinate
| Indefinite -- eg KARTA
;
;
{-
-- CASE AS DEFINED BY GRAMMATIKA MALTIJA, p132
-- Noun cases
Case =
Nominative -- referent as subject, eg IT-TARBIJA ...
| Genitive -- referent as possessor, eg ... TAT-TARBIJA
| Accusative -- referent as direct object
| Dative -- referent as indirect object, eg ... LIT-TARBIJA
| Ablative -- referent as instrument, cause, location, source or time, eg ... MINN TARBIJA
| Vocative -- referent being adressed, eg AA TARBIJA (lol)
;
-}
{-
-- CASE AS DEFINED BY ME
-- Noun cases (note my examples include DEFINITE ARTICLE)
-- Commented lines mean that noun iflection is unchanged, not that the case does not occur in Maltese!
Case =
-- Absessive -- lack or absence of referent (MINGĦAJR)
-- | Ablative -- referent as instrument, cause, location, source or time
-- | Absolutive -- subject of intransitive or object of transitive verb (in ergative-absolutive languages)
-- | Accusative -- referent as direct object (in nominative-accusative languages)
-- | Allative -- motion towards referent (LEJN)
-- | Additive -- synonym of Allative (above)
Benefactive -- referent as recipient, eg GĦAT-TARBIJA. cf Dative.
-- | Causative -- referent as the cause of a situation (MINĦABBA)
| Comitative -- with, eg MAT-TARBIJA
| Dative -- referent as indirect object, eg LIT-TARBIJA. cf Benefactive.
-- | Delative -- motion downward from referent
| Elative -- motion away from referent, eg MIT-TARBIJA
| Equative -- likeness or identity, eg BĦAT-TARBIJA
-- | Ergative -- subject of transitive verb (in ergative-absolutive languages)
-- | Essive -- temporary state / while / in capacity of (BĦALA)
| Genitive -- referent as possessor, eg TAT-TARBIJA
-- | Illative -- motion into / towards referent, eg SAT-TARBIJA. cf Allative.
| Inessive -- within referent, eg ĠOT-TARBIJA, FIT-TARBIJA
| Instrumental -- referent as instrument, eg BIT-TARBIJA. cf Ablative.
| Lative -- motion up to referent, eg SAT-TARBIJA
-- | Locative -- location at referent
| Nominative -- referent as subject, eg IT-TARBIJA
-- | Partitive -- partial nature of referent
-- | Prolative -- motion along / beside referent
-- | Superessive -- on / upon (FUQ)
-- | Translative -- referent noun or adjective as result of process of change
-- | Vocative -- referent being adressed, eg AA TARBIJA (lol)
;
-}
-- Person = P1 | P2 | P3 ;
-- State = Def | Indef | Const ;
-- Mood = Ind | Cnj | Jus ;
-- Voice = Act | Pas ;
Origin =
Semitic
| Romance
| English
;
-- Order = Verbal | Nominal ;
-- Just for my own use
-- Mamma = Per3 Sg Masc ;
-- Shortcut type
-- GenNum = gn Gender Number2 ;
Person_Number = Sg | Pl ;
-- Agreement features
Agr =
Per1 Person_Number -- Jiena, Aħna
| Per2 Person_Number -- Inti, Intom
| Per3Sg Gender -- Huwa, Hija
| Per3Pl -- Huma
AgP1 Number -- Jiena, Aħna
| AgP2 Number -- Inti, Intom
| AgP3Sg Gender -- Huwa, Hija
| AgP3Pl -- Huma
;
-- Agr : Type = {g : Gender ; n : Number ; p : Person} ;
-- Ag : Gender -> Number -> Person -> Agr = \g,n,p -> {g = g ; n = n ; p = p} ;
-- agrP1 : Number -> Agr = \n -> Ag {} n P1 ;
-- agrP3 : Gender -> Number -> Agr = \g,n -> Ag g n P3 ;
-- Possible tenses
Tense =
Perf -- Perfect tense, eg SERAQ
| Impf -- Imperfect tense, eg JISRAQ
| Imp -- Imperative, eg ISRAQ
-- Tense =
-- Perf -- Perfect tense, eg SERAQ
-- | Impf -- Imperfect tense, eg JISRAQ
-- | Imp -- Imperative, eg ISRAQ
-- | PresPart -- Present Particible. Intransitive and 'motion' verbs only, eg NIEŻEL
-- | PastPart -- Past Particible. Both verbal & adjectival function, eg MISRUQ
-- | VerbalNoun -- Verbal Noun, eg SERQ
;
-- ;
-- Possible verb forms (tense + person)
VForm =
VPerf Agr -- Perfect tense in all pronoun cases
| VImpf Agr -- Imperfect tense in all pronoun cases
| VImp Person_Number -- Imperative is always Per2, Sg & Pl
| VImp Number -- Imperative is always P2, Sg & Pl
-- | VPresPart GenNum -- Present Particible for Gender/Number
-- | VPastPart GenNum -- Past Particible for Gender/Number
-- | VVerbalNoun -- Verbal Noun
;
-- Possible verb types
VType =
Strong -- Strong verb: none of radicals are semi-vowels eg ĦAREĠ (Ħ-R-Ġ)
| Defective -- Defective verb: third radical is semivowel GĦ eg QATA' (Q-T-GĦ)
| Weak -- Weak verb: third radicl is semivowel J eg MEXA (M-X-J)
| Hollow -- Hollow verb: long A or IE btw radicals 1 & 3 eg QAL (Q-W-L) or SAB (S-J-B)
| Double -- Double/Geminated verb: radicals 2 & 3 identical eg ĦABB (Ħ-B-B)
| Quad -- Quadliteral verb eg KARKAR (K-R-K-R), MAQDAR (M-Q-D-R), LEMBEB (L-M-B-B)
VDerivedForm =
FormI
| FormII
| FormIII
| FormIV
| FormV
| FormVI
| FormVII
| FormVIII
| FormXI
| FormX
;
-- Verb classification
VClass =
Strong VStrongClass
| Weak VWeakClass
| Loan --- temporary
-- | Romance
-- | English
;
VStrongClass =
Regular
| LiquidMedial
| Reduplicative
| Quad
;
VWeakClass =
Assimilative
| Hollow
| WeakFinal
| Defective
| QuadWeakFinal
;
-- VQuadClass =
-- BiradicalBase
-- | RepeatedC3
-- | RepeatedC1
-- | AdditionalC4
-- ;
VRomanceClass =
Integrated
| NonIntegrated
;
-- Inflection of verbs for pronominal suffixes
VSuffixForm =
VNone -- eg FTAĦT
| VDir Agr -- eg FTAĦTU
| VInd Agr -- eg FTAĦTLU
| VDirInd Agr Agr -- eg FTAĦTHULU
;
-- For Adjectives
AForm =
-- AF Degree GenNum
APosit GenNum
| ACompar
| ASuperl
;
oper
-- Roots & Patterns
Pattern : Type = {v1, v2 : Str} ; -- vowel1, vowel2
Pattern : Type = {V1, V2 : Str} ;
-- Root3 : Type = {K, T, B : Str} ;
-- Root4 : Type = Root3 ** {L : Str} ;
Root : Type = {K, T, B, L : Str} ;
Root : Type = {C1, C2, C3, C4 : Str} ;
-- Some classes. I need to include "c" because currently "ċ" gets downgraded to "c" in input :/
Consonant : pattern Str = #( "b" | "c" | "ċ" | "d" | "f" | "ġ" | "g" | "għ" | "ħ" | "h" | "j" | "k" | "l" | "m" | "n" | "p" | "q" | "r" | "s" | "t" | "v" | "w" | "x" | "ż" | "z" );
CoronalConsonant : pattern Str = #( "c" | "ċ" | "d" | "n" | "r" | "s" | "t" | "x" | "ż" | "z" ); -- "konsonanti xemxin"
mkRoot : Root = overload {
mkRoot : Root =
{ C1=[] ; C2=[] ; C3=[] ; C4=[] } ;
mkRoot : Str -> Root = \root ->
let root = toLower root in
case (charAt 1 root) of {
"-" => { C1=(charAt 0 root) ; C2=(charAt 2 root) ; C3=(charAt 4 root) ; C4=(charAt 6 root) } ; -- "k-t-b"
_ => { C1=(charAt 0 root) ; C2=(charAt 1 root) ; C3=(charAt 2 root) ; C4=(charAt 3 root) } -- "ktb"
} ;
mkRoot : Str -> Str -> Str -> Root = \c1,c2,c3 ->
{ C1=c1 ; C2=c2 ; C3=c3 ; C4=[] } ;
mkRoot : Str -> Str -> Str -> Str -> Root = \c1,c2,c3,c4 ->
{ C1=c1 ; C2=c2 ; C3=c3 ; C4=c4 } ;
} ;
mkPattern : Pattern = overload {
mkPattern : Pattern =
{ V1=[] ; V2=[] } ;
mkPattern : Str -> Pattern = \v1 ->
{ V1=v1 ; V2=[] } ;
mkPattern : Str -> Str -> Pattern = \v1,v2 ->
{ V1=v1 ; V2=v2 } ;
} ;
-- Some character classes
Consonant : pattern Str = #( "b" | "ċ" | "d" | "f" | "ġ" | "g" | "għ" | "ħ" | "h" | "j" | "k" | "l" | "m" | "n" | "p" | "q" | "r" | "s" | "t" | "v" | "w" | "x" | "ż" | "z" );
CoronalCons : pattern Str = #( "ċ" | "d" | "n" | "r" | "s" | "t" | "x" | "ż" | "z" ); -- "konsonanti xemxin"
ImpfDoublingCons : pattern Str = #( "d" | "ġ" | "s" | "t" | "ż" ); -- require doubling in imperfect, eg (inti) IDDUM, IĠĠOR, ISSIB, ITTIR, IŻŻID. --- only used in hollow paradigm (?)
LiquidCons : pattern Str = #( "l" | "m" | "n" | "r" | "għ" );
WeakCons : pattern Str = #( "j" | "w" );
Vowel : pattern Str = #( "a" | "e" | "i" | "o" | "u" );
Digraph : pattern Str = #( "ie" );
SemiVowel : pattern Str = #( "għ" | "j" );
{- ===== Type declarations ===== -}
-- VP = {
-- v : Verb ;
-- clit : Str ;
-- clitAgr : ClitAgr ;
-- obj : Agr => Str
-- } ;
-- NP = {
-- s : Case => {clit,obj : Str ; isClit : Bool} ;
-- a : Agr
-- } ;
{-
Noun : Type = {
s : Number5 => Str ;
g : Gender ;
} ;
-}
Noun : Type = {
s : Noun_Number => Str ;
s : Noun_Number => NForm => Str ;
g : Gender ;
-- anim : Animacy ; -- is the noun animate? e.g. TABIB
} ;
Adj : Type = {
s : Gender => Person_Number => Str ;
-- isPre : Bool ;
ProperNoun : Type = {
s : Str ;
g : Gender ;
} ;
Verb : Type = {
s : VForm => Str ; -- Give me the form (tense, person etc) and I'll give you the string
t : VType ; -- Inherent - Strong/Hollow etc
o : Origin ; -- Inherent - a verb of Semitic or Romance origins?
s : VForm => Str ;
-- s : VForm => VSuffixForm => Str ;
c : VClass ;
} ;
Adjective : Type = {
s : AForm => Str ;
} ;
{- ===== Conversions ===== -}
numnum2nounnum : Num_Number -> Noun_Number = \n ->
case n of {
Num_Sg => Singular Singulative ;
_ => Plural Determinate
} ;
{- ===== Useful helper functions ===== -}
-- Get the character at the specific index (0-based).
-- Negative indexes behave as 0 (first character). Out of range indexes return the empty string.
charAt : Int -> Str -> Str ;
charAt i s = take 1 (drop i s) ;
addDefinitePreposition : Str -> Str -> Str = \prep,n -> (getDefinitePreposition prep n) ++ n ;
addDefiniteArticle = addDefinitePreposition "il" ;
getDefiniteArticle = getDefinitePreposition "il" ;
@@ -288,12 +289,14 @@ resource ResMlt = PatternsMlt ** open Prelude in {
"il" => "l" + "-" ;
_ => prep + "-"
};
K@#CoronalConsonant + _ => prepStem + K + "-" ; -- IĊ-ĊISK
K@#CoronalCons + _ => prepStem + K + "-" ; -- IĊ-ĊISK
#Consonant + _ => prep + "-" ; -- IL-QADDIS
_ => [] -- ?
} ;
definiteArticle : Str =
artIndef = [] ;
artDef : Str =
pre {
"il-" ;
"l-" / strs { "a" ; "e" ; "i" ; "o" ; "u" ; "h" ; "għ" } ;
@@ -308,4 +311,113 @@ resource ResMlt = PatternsMlt ** open Prelude in {
"iz-" / strs { "z" }
} ;
{- ===== Worst-case functions ===== -}
-- Noun: Takes all forms and a gender
-- Params:
-- Singulative, eg KOXXA
-- Collective, eg KOXXOX
-- Double, eg KOXXTEJN
-- Determinate Plural, eg KOXXIET
-- Indeterminate Plural
-- Gender
-- mkNoun : (_,_,_,_,_ : NForm => Str) -> Gender -> Noun = \sing,coll,dual,det,ind,gen -> {
-- s = table {
-- Singular Singulative => sing ;
-- Singular Collective => coll ;
-- Dual => dual ;
-- Plural Determinate => det ;
-- Plural Indeterminate => ind
-- } ;
-- g = gen ;
-- -- anim = Inanimate ;
-- } ;
-- Make a noun animate
animateNoun : Noun -> Noun ;
animateNoun = \n -> n ** {anim = Animate} ;
-- Build an empty pronominal suffix table
nullSuffixTable : Str -> (NForm => Str) ;
nullSuffixTable = \s -> table {
NRegular => s ;
NPronSuffix _ => []
} ;
-- Build a noun's pronominal suffix table
mkSuffixTable : (NForm => Str) = overload {
mkSuffixTable : (_ : Str) -> (NForm => Str) = \wicc ->
table {
NRegular => wicc ;
NPronSuffix (AgP1 Sg) => wicc + "i" ;
NPronSuffix (AgP2 Sg) => wicc + "ek" ;
NPronSuffix (AgP3Sg Masc) => wicc + "u" ;
NPronSuffix (AgP3Sg Fem) => wicc + "ha" ;
NPronSuffix (AgP1 Pl) => wicc + "na" ;
NPronSuffix (AgP2 Pl) => wicc + "kom" ;
NPronSuffix (AgP3Pl) => wicc + "hom"
} ;
mkSuffixTable : (_,_,_,_,_,_,_,_ : Str) -> (NForm => Str) = \isem,ismi,ismek,ismu,isimha,isimna,isimkom,isimhom ->
table {
NRegular => isem ;
NPronSuffix (AgP1 Sg) => ismi ;
NPronSuffix (AgP2 Sg) => ismek ;
NPronSuffix (AgP3Sg Masc) => ismu ;
NPronSuffix (AgP3Sg Fem) => isimha ;
NPronSuffix (AgP1 Pl) => isimna ;
NPronSuffix (AgP2 Pl) => isimkom ;
NPronSuffix (AgP3Pl) => isimhom
} ;
} ;
-- mkNoun = overload {
-- mkNoun : (_,_,_,_,_ : Str) -> Gender -> Noun = \sing,coll,dual,det,ind,gen -> {
-- s = table {
-- Singular Singulative => (nullSuffixTable sing) ;
-- Singular Collective => (nullSuffixTable coll) ;
-- Dual => (nullSuffixTable dual) ;
-- Plural Determinate => (nullSuffixTable det) ;
-- Plural Indeterminate => (nullSuffixTable ind)
-- } ;
-- g = gen ;
-- -- anim = Inanimate ;
-- } ;
mkNoun : (_,_,_,_,_ : NForm => Str) -> Gender -> Noun = \sing,coll,dual,det,ind,gen -> {
s = table {
Singular Singulative => sing ;
Singular Collective => coll ;
Dual => dual ;
Plural Determinate => det ;
Plural Indeterminate => ind
} ;
g = gen ;
-- anim = Inanimate ;
} ;
-- } ;
-- Adjective: Takes all forms (except superlative)
-- Params:
-- Masculine, eg SABIĦ
-- Feminine, eg SABIĦA
-- Plural, eg SBIEĦ
-- Comparative, eg ISBAĦ
mkAdjective : (_,_,_,_ : Str) -> Adjective = \masc,fem,plural,compar -> {
s = table {
APosit gn => case gn of {
GSg Masc => masc ;
GSg Fem => fem ;
GPl => plural
} ;
ACompar => compar ;
ASuperl => addDefiniteArticle compar
} ;
} ;
}