Maltese: finished morphology for derived verbs

Of course it's never really finished. There's more cases I should write treebanks
for and test, but I think the coverage is good enough and the implementation
for the verbs is solid enough that only minor fixes may arise.

There's also the issue of verb participles, but I'll come back to those
when I am looked more closely at the syntax.
This commit is contained in:
john.j.camilleri
2012-10-23 09:41:00 +00:00
parent 6fb15ecc2f
commit c084e1b040
6 changed files with 2777 additions and 799 deletions

View File

@@ -5,6 +5,7 @@
-- Licensed under LGPL
concrete AdjectiveMlt of Adjective = CatMlt ** open ResMlt, Prelude in {
flags coding=utf8 ;
lin

View File

@@ -8,6 +8,21 @@
concrete IrregMlt of IrregMltAbs = CatMlt ** open ParadigmsMlt in {
{-
Known irregular verbs in Maltese:
- ĦA
- TA
- RA
- MAR
- ĠIE
- QAL
- KIEL
- KELLU
- IDDA
- EMMEN
- IŻŻA
- JAF
- KIEN
-}
}

View File

@@ -317,7 +317,7 @@ concrete LexiconMlt of Lexicon = CatMlt **
teacher_N = mkN "għalliem" "għalliema" ; -- għalliema ?
television_N = mkN "televixin" "televixins" ;
thick_A = mkA "oħxon" "ħoxna" "ħoxnin" "eħxen" ;
thin_A = brokenA "irqiq" "irqaq" "irqaq" ;
thin_A = brokenA "rqiq" "rqaq" "rqaq" ;
-- think_V
-- throw_V2
-- tie_V2
@@ -341,7 +341,7 @@ concrete LexiconMlt of Lexicon = CatMlt **
-- wash_V2
-- watch_V2
water_N = mkN "ilma" "ilmijiet" masculine ;
wet_A = mkA "imxarrab" "imxarrba" "imxarrbin" ;
wet_A = mkA "mxarrab" "mxarrba" "mxarrbin" ;
white_A = mkA "abjad" "bajda" "bojod" ;
wide_A = broad_A ;
wife_N = mkN "mara" "nisa" ; -- pronSuffix MARTI

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -12,25 +12,38 @@ resource ResMlt = ParamX - [Tense] ** open Prelude, Predef in {
param
{- General -}
Gender = Masc | Fem ;
GenNum =
GSg Gender -- dak, dik
| GPl ; -- dawk
Agr =
AgP1 Number -- jiena, aħna
| AgP2 Number -- inti, intom
| AgP3Sg Gender -- huwa, hija
| AgP3Pl -- huma
;
NPCase = Nom | Gen ;
Animacy =
Animate
| Inanimate
;
-- Definiteness =
-- Definite -- eg IL-KARTA. In this context same as Determinate
-- | Indefinite -- eg KARTA
-- ;
{- Numerals -}
CardOrd = NCard | NOrd ;
Num_Number =
Num_Sg
| Num_Dl
| Num_Pl
;
-- oper
-- Num_Number : Type = { n : Number ; isDual : Bool } ;
param
DForm =
Unit -- 0..10
| Teen -- 11-19
@@ -39,10 +52,18 @@ resource ResMlt = ParamX - [Tense] ** open Prelude, Predef in {
| Hund -- 100..999
--| Thou -- 1000+
;
Num_Number =
Num_Sg
| Num_Dl
| Num_Pl
;
Num_Case =
NumNominative -- TNEJN, ĦAMSA, TNAX, MIJA
| NumAdjectival ; -- ŻEWĠ, ĦAMES, TNAX-IL, MITT
{- Nouns -}
Noun_Sg_Type =
@@ -63,49 +84,8 @@ resource ResMlt = ParamX - [Tense] ** open Prelude, Predef in {
NRegular -- WIĊĊ
| NPronSuffix Agr ; -- WIĊĊU
{- Other... -}
GenNum = GSg Gender | GPl ; -- masc/fem/plural, e.g. adjective inflection
Animacy =
Animate
| Inanimate
;
Definiteness =
Definite -- eg IL-KARTA. In this context same as Determinate
| Indefinite -- eg KARTA
;
-- Person = P1 | P2 | P3 ;
-- State = Def | Indef | Const ;
-- Mood = Ind | Cnj | Jus ;
-- Voice = Act | Pas ;
-- Order = Verbal | Nominal ;
-- Agreement features
Agr =
AgP1 Number -- Jiena, Aħna
| AgP2 Number -- Inti, Intom
| AgP3Sg Gender -- Huwa, Hija
| AgP3Pl -- Huma
;
-- Agr : Type = {g : Gender ; n : Number ; p : Person} ;
-- Ag : Gender -> Number -> Person -> Agr = \g,n,p -> {g = g ; n = n ; p = p} ;
-- agrP1 : Number -> Agr = \n -> Ag {} n P1 ;
-- agrP3 : Gender -> Number -> Agr = \g,n -> Ag g n P3 ;
-- Possible tenses
-- Tense =
-- Perf -- Perfect tense, eg SERAQ
-- | Impf -- Imperfect tense, eg JISRAQ
-- | Imp -- Imperative, eg ISRAQ
-- | PresPart -- Present Particible. Intransitive and 'motion' verbs only, eg NIEŻEL
-- | PastPart -- Past Particible. Both verbal & adjectival function, eg MISRUQ
-- | VerbalNoun -- Verbal Noun, eg SERQ
-- ;
{- Verb -}
-- Possible verb forms (tense + person)
VForm =
@@ -117,6 +97,14 @@ resource ResMlt = ParamX - [Tense] ** open Prelude, Predef in {
-- | VVerbalNoun -- Verbal Noun
;
-- Inflection of verbs for pronominal suffixes
VSuffixForm =
VSuffixNone -- eg FTAĦT
| VSuffixDir Agr -- eg FTAĦTU
| VSuffixInd Agr -- eg FTAĦTLU
| VSuffixDirInd GenNum Agr -- eg FTAĦTHULU. D.O. is necessarily 3rd person.
;
VDerivedForm =
FormI
| FormII
@@ -126,34 +114,38 @@ resource ResMlt = ParamX - [Tense] ** open Prelude, Predef in {
| FormVI
| FormVII
| FormVIII
| FormXI
| FormIX
| FormX
| FormUnknown
;
-- Verb classification
VClass =
Strong VStrongClass
| Weak VWeakClass
| Loan --- temporary
-- | Romance
-- | English
| Quad VQuadClass
| Loan
-- | Irregular
;
VStrongClass =
Regular
| LiquidMedial
| Reduplicative
| Quad
| Geminated
;
VWeakClass =
Assimilative
| Hollow
| WeakFinal
| Lacking
| Defective
| QuadWeakFinal
;
VQuadClass =
QStrong
| QWeak
;
-- VRomanceEnding =
-- _ARE -- kanta
-- | _ERE | _IRE -- vinċa, serva --- we don't need this distinction, just always use IRE
-- ;
-- VQuadClass =
-- BiradicalBase
-- | RepeatedC3
@@ -161,22 +153,10 @@ resource ResMlt = ParamX - [Tense] ** open Prelude, Predef in {
-- | AdditionalC4
-- ;
VRomanceClass =
Integrated
| NonIntegrated
;
-- Inflection of verbs for pronominal suffixes
VSuffixForm =
VNone -- eg FTAĦT
| VDir Agr -- eg FTAĦTU
| VInd Agr -- eg FTAĦTLU
| VDirInd Agr Agr -- eg FTAĦTHULU
;
{- Adjective -}
-- For Adjectives
AForm =
-- AF Degree GenNum
APosit GenNum
| ACompar
| ASuperl
@@ -184,68 +164,143 @@ resource ResMlt = ParamX - [Tense] ** open Prelude, Predef in {
oper
-- Roots & Patterns
{- ===== Type declarations ===== -}
Noun : Type = {
s : Noun_Number => NForm => Str ;
g : Gender ;
-- anim : Animacy ; -- is the noun animate? e.g. TABIB
} ;
ProperNoun : Type = {
s : Str ;
g : Gender ;
} ;
Verb : Type = {
s : VForm => VSuffixForm => Polarity => Str ;
i : VerbInfo ;
} ;
VerbInfo : Type = {
class : VClass ;
form : VDerivedForm ;
root : Root ; -- radicals
patt : Pattern ; -- vowels extracted from mamma
patt2: Pattern ; -- vowel changes; default to patt (experimental)
-- in particular, patt2 is used to indicate whether an IE sould be shortened
-- to an I or an E (same for entire verb)
imp : Str ; -- Imperative Sg. Gives so much information jaħasra!
} ;
Adjective : Type = {
s : AForm => Str ;
} ;
{- ===== Some character classes ===== -}
Letter : pattern Str = #( "a" | "b" | "ċ" | "d" | "e" | "f" | "ġ" | "g" | "għ" | "h" | "ħ" | "i" | "ie" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "ż" | "z" );
Consonant : pattern Str = #( "b" | "ċ" | "d" | "f" | "ġ" | "g" | "għ" | "h" | "ħ" | "j" | "k" | "l" | "m" | "n" | "p" | "q" | "r" | "s" | "t" | "v" | "w" | "x" | "ż" | "z" );
CoronalCons : pattern Str = #( "ċ" | "d" | "n" | "r" | "s" | "t" | "x" | "ż" | "z" ); -- "konsonanti xemxin"
LiquidCons : pattern Str = #( "l" | "m" | "n" | "r" | "għ" );
SonorantCons : pattern Str = #( "l" | "m" | "n" | "r" ); -- See {SA pg13}. Currently unused, but see DoublingConsN below
DoublingConsT : pattern Str = #( "ċ" | "d" | "ġ" | "s" | "x" | "ż" | "z" ); -- require doubling when prefixed with 't', eg DDUM, ĠĠORR, SSIB, TTIR, ŻŻID {GM pg68,2b} {OM pg90}
DoublingConsN : pattern Str = #( "l" | "m" | "r" ); -- require doubling when prefixed with 'n', eg LLAĦĦAQ, MMUR, RRID {OM pg90}
WeakCons : pattern Str = #( "j" | "w" );
Vowel : pattern Str = #( "a" | "e" | "i" | "o" | "u" );
VowelIE : pattern Str = #( "a" | "e" | "i" | "ie" | "o" | "u" );
Digraph : pattern Str = #( "ie" );
SemiVowel : pattern Str = #( "għ" | "j" );
V = Vowel ;
C = Consonant ;
LC = LiquidCons ;
EorI : Str = "e" | "i" ;
IorE : Str = "i" | "e" ;
{- ===== Roots & Patterns ===== -}
Pattern : Type = {V1, V2 : Str} ;
-- Root3 : Type = {K, T, B : Str} ;
-- Root4 : Type = Root3 ** {L : Str} ;
Root : Type = {C1, C2, C3, C4 : Str} ;
-- Make a root object. Accepts following overloads:
-- mkRoot
-- mkRoot "k-t-b"
-- mkRoot "k-t-b-l"
-- mkRoot "k" "t" "b"
-- mkRoot "k" "t" "b" "l"
mkRoot : Root = overload {
mkRoot : Root =
{ C1=[] ; C2=[] ; C3=[] ; C4=[] } ;
mkRoot : Str -> Root = \root ->
let root = toLower root in
case (charAt 1 root) of {
"-" => { C1=(charAt 0 root) ; C2=(charAt 2 root) ; C3=(charAt 4 root) ; C4=(charAt 6 root) } ; -- "k-t-b"
_ => { C1=(charAt 0 root) ; C2=(charAt 1 root) ; C3=(charAt 2 root) ; C4=(charAt 3 root) } -- "ktb"
case toLower root of {
c1@#Consonant + "-" + c2@#Consonant + "-" + c3@#Consonant =>
{ C1=c1 ; C2=c2 ; C3=c3 ; C4=[] } ; -- "k-t-b"
c1@#Consonant + "-" + c2@#Consonant + "-" + c3@#Consonant + "-" + c4@#Consonant =>
{ C1=c1 ; C2=c2 ; C3=c3 ; C4=c4 } ; -- "k-t-b-l"
_ => { C1=(charAt 0 root) ; C2=(charAt 1 root) ; C3=(charAt 2 root) ; C4=(charAt 3 root) } -- "ktb" (not recommended)
} ;
mkRoot : Str -> Str -> Str -> Root = \c1,c2,c3 ->
{ C1=c1 ; C2=c2 ; C3=c3 ; C4=[] } ;
{ C1=toLower c1 ; C2=toLower c2 ; C3=toLower c3 ; C4=[] } ;
mkRoot : Str -> Str -> Str -> Str -> Root = \c1,c2,c3,c4 ->
{ C1=c1 ; C2=c2 ; C3=c3 ; C4=c4 } ;
{ C1=toLower c1 ; C2=toLower c2 ; C3=toLower c3 ; C4=toLower c4 } ;
} ;
mkPattern : Pattern = overload {
mkPattern : Pattern =
{ V1=[] ; V2=[] } ;
mkPattern : Str -> Pattern = \v1 ->
{ V1=v1 ; V2=[] } ;
{ V1=toLower v1 ; V2=[] } ;
mkPattern : Str -> Str -> Pattern = \v1,v2 ->
{ V1=v1 ; V2=v2 } ;
{ V1=toLower v1 ; V2=case v2 of {"" => [] ; _ => toLower v2}} ;
} ;
-- Some character classes
Consonant : pattern Str = #( "b" | "ċ" | "d" | "f" | "ġ" | "g" | "għ" | "ħ" | "h" | "j" | "k" | "l" | "m" | "n" | "p" | "q" | "r" | "s" | "t" | "v" | "w" | "x" | "ż" | "z" );
CoronalCons : pattern Str = #( "ċ" | "d" | "n" | "r" | "s" | "t" | "x" | "ż" | "z" ); -- "konsonanti xemxin"
ImpfDoublingCons : pattern Str = #( "d" | "ġ" | "s" | "t" | "ż" ); -- require doubling in imperfect, eg (inti) IDDUM, IĠĠOR, ISSIB, ITTIR, IŻŻID. --- only used in hollow paradigm (?)
LiquidCons : pattern Str = #( "l" | "m" | "n" | "r" | "għ" );
WeakCons : pattern Str = #( "j" | "w" );
Vowel : pattern Str = #( "a" | "e" | "i" | "o" | "u" );
Digraph : pattern Str = #( "ie" );
SemiVowel : pattern Str = #( "għ" | "j" );
-- Extract first two vowels from a token (designed for semitic verb forms)
--- potentially slow
extractPattern : Str -> Pattern = \s ->
case s of {
v1@"ie" + _ + v2@#Vowel + _ => mkPattern v1 v2 ; -- IEQAF
v1@#Vowel + _ + v2@#Vowel + _ => mkPattern v1 v2 ; -- IKTEB
_ + v1@"ie" + _ + v2@#Vowel + _ => mkPattern v1 v2 ; -- RIEQED
_ + v1@"ie" + _ => mkPattern v1 ; -- ŻIED
_ + v1@#Vowel + _ + v2@#Vowel + _ => mkPattern v1 v2 ; -- ĦARBAT
_ + v1@#Vowel + _ => mkPattern v1 ; -- ĦOBB
_ => mkPattern
} ;
{- ===== Type declarations ===== -}
-- Create a VerbInfo record, optionally omitting various fields
mkVerbInfo : VerbInfo = overload {
mkVerbInfo : VClass -> VDerivedForm -> VerbInfo = \c,f ->
{ class=c ; form=f ; root=mkRoot ; patt=mkPattern ; patt2=mkPattern ; imp=[] } ;
mkVerbInfo : VClass -> VDerivedForm -> Str -> VerbInfo = \c,f,i ->
{ class=c ; form=f ; root=mkRoot ; patt=mkPattern ; patt2=mkPattern ; imp=i } ;
mkVerbInfo : VClass -> VDerivedForm -> Root -> Pattern -> VerbInfo = \c,f,r,p ->
{ class=c ; form=f ; root=r ; patt=p ; patt2=p ; imp=[] } ;
mkVerbInfo : VClass -> VDerivedForm -> Root -> Pattern -> Str -> VerbInfo = \c,f,r,p,i ->
{ class=c ; form=f ; root=r ; patt=p ; patt2=p ; imp=i } ;
mkVerbInfo : VClass -> VDerivedForm -> Root -> Pattern -> Pattern -> Str -> VerbInfo = \c,f,r,p,p2,i ->
{ class=c ; form=f ; root=r ; patt=p ; patt2=p2 ; imp=i } ;
} ;
Noun : Type = {
s : Noun_Number => NForm => Str ;
g : Gender ;
-- anim : Animacy ; -- is the noun animate? e.g. TABIB
} ;
-- Change certain fields of a VerbInfo record
updateVerbInfo : VerbInfo = overload {
ProperNoun : Type = {
s : Str ;
g : Gender ;
} ;
-- Root
updateVerbInfo : VerbInfo -> Root -> VerbInfo = \i,r ->
{ class=i.class ; form=i.form ; root=r ; patt=i.patt ; patt2=i.patt2 ; imp=i.imp } ;
Verb : Type = {
s : VForm => Str ;
-- s : VForm => VSuffixForm => Str ;
c : VClass ;
} ;
-- DerivedForm
updateVerbInfo : VerbInfo -> VDerivedForm -> VerbInfo = \i,f ->
{ class=i.class ; form=f ; root=i.root ; patt=i.patt ; patt2=i.patt2 ; imp=i.imp } ;
-- DerivedForm, Imperative
updateVerbInfo : VerbInfo -> VDerivedForm -> Str -> VerbInfo = \i,f,imp ->
{ class=i.class ; form=f ; root=i.root ; patt=i.patt ; patt2=i.patt2 ; imp=imp } ;
} ;
Adjective : Type = {
s : AForm => Str ;
} ;
{- ===== Conversions ===== -}
@@ -258,11 +313,98 @@ resource ResMlt = ParamX - [Tense] ** open Prelude, Predef in {
{- ===== Useful helper functions ===== -}
-- Get the character at the specific index (0-based).
-- Negative indexes behave as 0 (first character). Out of range indexes return the empty string.
charAt : Int -> Str -> Str ;
charAt i s = take 1 (drop i s) ;
-- New names for the drop/take operations
takePfx = Predef.take ;
dropPfx = Predef.drop ;
takeSfx = Predef.dp ;
dropSfx = Predef.tk ;
-- Get the character at the specific index (0-based).
-- Negative indices behave as 0 (first character). Out of range indexes return the empty string.
charAt : Int -> Str -> Str ;
charAt i s = takePfx 1 (dropPfx i s) ;
-- Delete character at the specific index (0-based).
-- Out of range indices are just ignored.
delCharAt : Int -> Str -> Str ;
delCharAt i s = (takePfx i s) + (dropPfx (plus i 1) s) ;
-- -- Replace first substring
-- replace : Str -> Str -> Str -> Str ;
-- replace needle haystack replacement =
-- case haystack of {
-- x + needle + y => x + replacement + y ;
-- _ => haystack
-- } ;
-- Prefix with a 'n'/'t' or double initial consonant, as necessary. See {OM pg 90}
pfx_N : Str -> Str = \s -> case takePfx 1 s of {
"" => [] ;
m@#DoublingConsN => m + s ;
_ => "n" + s
} ;
pfx_T : Str -> Str = \s -> case takePfx 1 s of {
"" => [] ;
d@#DoublingConsT => d + s ;
_ => "t" + s
} ;
-- This is just here to standardise
-- pfx_J : Str -> Str = \s -> case takePfx 1 s of {
-- "" => [] ;
-- _ => "j" + s
-- } ;
pfx_J : Str -> Str = \s -> pfx "j" s ;
-- Generically prefix a string (avoiding empty strings)
pfx : Str -> Str -> Str = \p,s -> case <p,s> of {
<_, ""> => [] ;
<"", str> => str ;
<px, str> => px + str
} ;
-- Add suffix, avoiding triple letters {GO pg96-7}
--- add more cases?
--- potentially slow
sfx : Str -> Str -> Str = \a,b ->
case <a,takePfx 1 b> of {
<"",_> => [] ;
<ke+"nn","n"> => ke+"n"+b ;
<ha+"kk","k"> => ha+"k"+b ;
<ho+"ll","l"> => ho+"l"+b ;
<si+"tt","t"> => si+"t"+b ;
<be+"xx","x"> => be+"x"+b ;
_ => a + b
} ;
-- Replace any IE in the word with an I or E --- potentially slow
ie2i : Str -> Str = ie2_ "i" ;
ie2e : Str -> Str = ie2_ "e" ;
ie2_ : Str -> Str -> Str = \iore,serviet ->
case serviet of {
x + "ie" => x + iore ;
x + "ie" + y => x + iore + y ;
x => x
} ;
-- Is a word mono-syllabic?
--- potentially slow
isMonoSyl : Str -> Bool = \s ->
case s of {
#Consonant + ("ie" | #Vowel) => True ; -- ra
#Consonant + #Consonant + ("ie" | #Vowel) => True ; -- bla
("ie" | #Vowel) + #Consonant => True ; -- af
("ie" | #Vowel) + #Consonant + #Consonant => True ; -- elf
#Consonant + ("ie" | #Vowel) + #Consonant => True ; -- miet
#Consonant + ("ie" | #Vowel) + #Consonant + #Consonant => True ; -- mort
#Consonant + #Consonant + ("ie" | #Vowel) + #Consonant => True ; -- ħliet
#Consonant + #Consonant + ("ie" | #Vowel) + #Consonant + #Consonant => True ; -- ħriġt
_ => False
} ;
-- Add a definite preposition in front of your token
addDefinitePreposition : Str -> Str -> Str = \prep,n -> (getDefinitePreposition prep n) ++ n ;
addDefiniteArticle = addDefinitePreposition "il" ;
getDefiniteArticle = getDefinitePreposition "il" ;