--# -path=.:../../prelude -- ----1 A Simple Persian Resource Morphology ---- ---- Shafqat Virk, Aarne Ranta,2010 ---- ---- This resource morphology contains definitions needed in the resource ---- syntax. To build a lexicon, it is better to use $ParadigmsPes$, which ---- gives a higher-level access to this module. -- resource MorphoPes = ParamX ** open Prelude,Predef in { flags optimize=all ; coding = utf8; ---- Orthography oper -- Zero-width non-joiner, used for certain morphemes -- See https://en.wikipedia.org/wiki/Persian_alphabet#Word_boundaries ZWNJ : Str = "‌" ; zwnj : Str -> Str -> Str = \s1,s2 -> s1 + ZWNJ + s2 ; -- kasre : Str = "ِ" ; -- To enable vowels for TTS input -- fatha : Str = "َ" ; kasre,fatha : Str = [] ; -- for appComp -- runtimeKasre : Str -> Str = \s -> glue s kasre ; runtimeKasre : Str -> Str = \s -> s ; ---- Nouns param Animacy = Animate | Inanimate ; Mod = Bare | Ezafe | Clitic | Poss ; Agr = Ag Number Person ; CmpdStatus = IsCmpd | NotCmpd ; Degr = Positive | Comparative ; ------------------------------------------ -- Agreement transformations ----------------------------------------- oper toAgr : Number -> Person -> Agr = \n,p -> Ag n p ; fromAgr : Agr -> {n : Number ; p : Person } = \agr -> case agr of { Ag n p => {n = n ; p = p} } ; conjAgr : Agr -> Agr -> Agr = \a0,b0 -> let a = fromAgr a0 ; b = fromAgr b0 in toAgr (conjNumber a.n b.n) b.p ; giveNumber : Agr -> Number = \a -> case a of { Ag n _ => n } ; defaultAgr : Agr = agrP3 Sg ; agrP3 : Number -> Agr = \n -> Ag n P3 ; agrP1 : Number -> Agr = \n -> Ag n P1 ; ------------------------- -- Ezafe construction ------------------------ oper mkPossStem : Str -> Str = \str -> case str of { _ + ("اه"|"او"|"وه") => str + fatha ; _ + ("ا"|"و") => str + fatha + "ی" ; _ + "ه" => zwnj str "ا" ; _ => str + fatha } ; mkEzafe : Str -> Str = \str -> case str of { st + "اه" => str + kasre ; st + "وه" => str + kasre ; st + "ه" => zwnj str "ی" ;-- alt. st + "ۀ" ; st + "او" => str + kasre ; st + "وو" => str + kasre ; st + "و" => str + "ی" ; st + "ا" => str + "ی" ; _ => str + kasre }; mkEnclic : Str -> Str ; mkEnclic str = case str of { st + ("ا"|"و") => zwnj str "یی" ; -- ی after a long vowel to help pronunciation st + "اه" => str + "ی" ; -- here ه is a consonant, so single ی st + ("ی"|"ه") => zwnj str "ای" ; -- after ی or ه as a vowel, add an alif to help pronunciation _ => str + "ی" -- any other case: just a single ی } ; modTable : Str -> Mod => Str = \str -> table {Bare => str ; Ezafe => mkEzafe str ; Clitic => mkEnclic str ; Poss => mkPossStem str } ; -- Can happen that a complement (of N2, or e.g. PossNP) wants one form -- and determiner wants another form. Heuristic: whichever wants Bare loses. -- Will have to see if this works 100%, the grammar books I've seen -- aren't very clear about this, just basing on some data. /IL replaceBare : Mod -> (Mod=>Str) -> (Mod=>Str) = \m,tbl -> table { Bare => tbl ! m ; mod => tbl ! mod } ; Noun = { s : Number => Mod => Str ; animacy : Animacy ; isCmpd : CmpdStatus -- Affects possession: awkward to use poss. suff. with compound nouns } ; mkN : (x1,x2 : Str) -> Animacy -> Noun = \sg,pl,ani -> indeclN sg ** { s = table {Sg => modTable sg ; Pl => modTable pl} } ; indeclN : Str -> Noun = \s -> { s = \\_,_ => s ; animacy = Inanimate ; isCmpd = NotCmpd } ; -- masculine nouns end with alif, choTi_hay, ain Translitration: (a, h, e) -- Arabic nouns ends with h. also taken as Masc --------------------- --Determiners -------------------- BaseQuant : Type = { mod : Mod ; isNeg : Bool ; isDef : Bool } ; Determiner : Type = BaseQuant ** { s : Str ; sp : Str ; -- stand-alone form for DetNP and possessive pronouns with compound nouns n : Number ; isNum : Bool } ; Quant : Type = BaseQuant ** { s : Number => CmpdStatus => Str} ; makeDet : Str -> Number -> (isNum, isNeg : Bool) -> Determiner = \str,n,isNum,isNeg -> { s,sp = str; isNum = isNum ; isNeg = isNeg ; isDef = True ; mod = Bare ; n = n }; makeQuant : Str -> Str -> Mod -> (isNeg : Bool) -> Quant = \sg,pl,mod,isNeg -> { s = table {Sg => \\_ => sg ; Pl => \\_ => pl} ; mod = mod ; isNeg = isNeg ; isDef = True }; --------------------------- -- Adjectives -------------------------- BaseAdjective : Type = { adv : Str ; isPre : Bool ; -- as attributive afterPrefix : Bool ; -- as predicative, does it go between the prefix and the light verb } ; Adjective : Type = BaseAdjective ** { s : Degr => Mod => Str } ; AP : Type = BaseAdjective ** { s : Mod => Str } ; mkAdj = overload { mkAdj : Str -> Str -> Adjective = \adj,adv -> { s = table { Positive => modTable adj ; Comparative => modTable (adj + "تر") -- Regular comparative. } ; adv = adv ; isPre = False ; afterPrefix = True } ; mkAdj : Str -> Str -> Str -> Adjective = \pos,cmp,adv -> { s = table { Positive => modTable pos ; Comparative => modTable cmp -- Irregular comparative, e.g. xub-behtar } ; adv = adv ; isPre = False ; afterPrefix = True } } ; ------------------------------------------------------------------ -- Verbs ------------------------------------------------------------------ param VerbForm = Inf -- kardan | PastStem -- kard -- Also used for future stem | PresStem -- kon -- Also imperative stem | PerfStem -- kardeh -- Perfect, pluperfect | PastPart -- konandeh | ImpPrefix Polarity -- mi/nmi, except for be and have | VAor Polarity Agr -- konam | VPerf Polarity Agr -- kardeh am/nkardeh am | VPast Polarity Agr -- kardam/nkardam | VSubj Polarity Agr -- bekonam/nakonam | VImp Polarity Number -- bekon,bekonid/nakon,nakonid ; -- Affects clitic placement and passive LightVerb = NotLight | Light -- ateš zadan -> ateš zade šodan | Kardan ; -- gom kardan -> gom ∅ šodan oper impRoot : Str -> Str = \root -> case root of { st + "ی" => st ; _ => root }; modifyFiniteForms : (Str -> Str) -> Verb -> Verb = \f,v -> v ** {s = table { vf@(VAor _ _ | VPerf _ _ | VPast _ _ | VSubj _ _ | VImp _ _) => f (v.s ! vf) ; vf => v.s ! vf } } ; addClitic : LightVerb -> Str -> Verb -> Verb = \light,cl,v -> v ** {s = let f : Str -> Str = case light of { NotLight => \s -> glue s cl ; _ => \s -> BIND ++ cl ++ s } -- hack: put clitic before the verb, so it attaches to the prefix in table { Inf => glue (v.s ! Inf) cl ; vf => (modifyFiniteForms f v).s ! vf } } ; mkVerb : (inf,pres : Str) -> Verb = \kardan,kon -> { s = table { Inf => kardan ; PastStem => kard ; PresStem => kon ; PerfStem => kardeh ; PastPart => kon + "نده" ; ImpPrefix Pos => "می" + ZWNJ ++ BIND ; ImpPrefix Neg => "نمی" + ZWNJ ++ BIND ; VAor _ agr => imperfectSuffixD agr kon ; -- for reg verbs, negation comes from prefix VPerf pol agr => perfectSuffix agr (addN pol kardeh) ; VPast pol agr => imperfectSuffix agr (addN pol kard) ; VSubj Pos agr => addBh (imperfectSuffixD agr kon) ; VSubj Neg agr => addN (imperfectSuffixD agr kon) ; VImp Pos Sg => addBh imp ; VImp Pos Pl => addBh kon + "ید" ; VImp Neg Sg => addN imp ; VImp Neg Pl => addN kon + "ید" } ; prefix = [] ;-- For compound verbs lightverb = NotLight ; } where { kard = tk 1 kardan ; kardeh = kard + "ه" ; imp = impRoot kon ; } ; invarV : (inv : Str) -> Verb = \s -> -- truly invariable let invReg = defectiveVerb s s s in invReg ** {s = table {ImpPrefix p => invReg.s ! ImpPrefix p ; _ => s}} ; defectiveVerb : (inf,pres,past : Str) -> Verb = \bayestan,bayad,bayest -> let invReg = mkVerb bayestan bayad in invReg ** {s = \\vf => case vf of { ImpPrefix _ => [] ; VAor pol _ => addN pol bayad ; VImp pol _ => addN pol bayad ; VSubj pol _ => addN pol bayad ; VPast pol _ => addN pol bayest ; VPerf pol _ => addN pol bayest ; _ => invReg.s ! vf } } ; -- oper Verb = {s : VerbForm => Str ; prefix : Str ; lightverb : LightVerb} ; -- Verbs that end in یدن, ادن or ودن -- Also some verbs that don't: دانستن with stem دان mkVerb1 : (_: Str) -> Verb = \inf -> mkVerb inf (tk 3 inf) ; -- Most verbs that end in C+تن or C+دن mkVerb2 : (_: Str) -> Verb = \inf -> mkVerb inf (tk 2 inf) ; ------------------- -- making negatives ------------------- addN = overload { addN : Str -> Str = addN' ; addN : Polarity -> Str -> Str = \p,s -> case p of {Pos => s ; Neg => addN' s} } ; addN' : Str -> Str = \str -> case str of { "ا" + st => "نی" + str ; "آ" + st => "نیا" + st ; _ => "ن" + str } ; addBh : Str -> Str ; addBh str = case str of { "ا" + st => "بی" + str ; "آ" + st => "بیا" + st ; _ => "ب" + str }; -- TODO: is this needed anywhere? what does it do? /IL addBh2 : Str -> Str ; -- should use drop instead but it gives linking error addBh2 str1 = case str1 of { "می" + str => case str of { "ا" + st => Prelude.glue "بی" str ; "آ" + st => Prelude.glue "بیا" st ; _ => Prelude.glue "ب" str }; _ => "" -- ???? }; ------------------- -- Common suffixes ------------------- imperfectSuffix : Agr -> Str -> Str = \ag,s -> s + case ag of { Ag Sg P1 => "م" ; Ag Sg P2 => "ی" ; Ag Sg P3 => [] ; Ag Pl P1 => "یم" ; Ag Pl P2 => "ید" ; Ag Pl P3 => "ند" } ; imperfectSuffixD : Agr -> Str -> Str = \ag,s -> case ag of { Ag Sg P3 => s + "د" ; _ => imperfectSuffix ag s } ; perfectSuffix : Agr -> Str -> Str = \ag,s -> case ag of { Ag Sg P1 => zwnj s "ام" ; Ag Sg P2 => zwnj s "ای" ; Ag Sg P3 => s ++ "است" ; -- no ZWNJ Ag Pl P1 => zwnj s "ایم" ; Ag Pl P2 => zwnj s "اید" ; Ag Pl P3 => zwnj s "اند" } ; pluperfAux : Polarity -> Agr -> Str = \pol,agr -> addN pol (imperfectSuffix agr "بود") ; futAux : Polarity -> Agr -> Str = \pol,agr -> addN pol (imperfectSuffixD agr "خواه") ; subjAux : Polarity -> Agr -> Str = \pol,agr -> addN pol (imperfectSuffixD agr "باش") ; ---------------------------------- -- Irregular verbs ---------------------------------- haveVerb : Verb = haveRegV ** {s = table { ImpPrefix _ => [] ; VAor Neg agr => imperfectSuffixD agr (addN "دار") ; VSubj pol agr => haveRegV.s ! VPerf pol agr ; vf => haveRegV.s ! vf } } where { haveRegV = mkVerb "داشتن" "دار" } ; beVerb : Verb = beRegV ** {s = table { ImpPrefix _ => [] ; VAor Pos (Ag Sg P3) => "است" ; VAor Pos agr => imperfectSuffix agr "هست" ; VAor Neg agr => imperfectSuffix agr "نیست" ; VSubj pol agr => addN pol (imperfectSuffixD agr "باش") ; VImp Pos Sg => "باش" ; VImp Pos Pl => "باشید" ; VImp Neg Sg => "نباش" ; VImp Neg Pl => "نباشید" ; vf => beRegV.s ! vf } } where { beRegV = mkVerb "بودن" "باش" } ; doVerb : Verb = doRegV ** {s = table { VSubj pol agr => addN pol (imperfectSuffixD agr "کن") ; VImp Pos Sg => "کن" ; VImp Pos Pl => "کنید" ; VImp Neg Sg => "نکن" ; VImp Neg Pl => "نکنید" ; vf => doRegV.s ! vf } ; lightverb = Kardan } where { doRegV = mkVerb "کردن" "کن" } ; becomeVerb : Verb = mkVerb "شدن" "شو" ; }