--# -path=.:../../prelude -- ----1 A Simple Persian Resource Morphology ---- ---- Shafqat Virk, Aarne Ranta,2010 ---- ---- This resource morphology contains definitions needed in the resource ---- syntax. To build a lexicon, it is better to use $ParadigmsPes$, which ---- gives a higher-level access to this module. -- resource MorphoPes = ParamX ** open Prelude,Predef in { flags optimize=all ; coding = utf8; ---- Orthography oper -- Zero-width non-joiner, used for certain morphemes -- See https://en.wikipedia.org/wiki/Persian_alphabet#Word_boundaries ZWNJ : Str = "‌" ; zwnj : Str -> Str -> Str = \s1,s2 -> s1 + ZWNJ + s2 ; ---- Nouns param Animacy = Animate | Inanimate ; Mod = Bare | Ezafe | Clitic | Poss ; Agr = Ag Number Person ; ------------------------------------------ -- Agreement transformations ----------------------------------------- oper toAgr : Number -> Person -> Agr = \n,p -> Ag n p ; fromAgr : Agr -> {n : Number ; p : Person } = \agr -> case agr of { Ag n p => {n = n ; p = p} } ; conjAgr : Agr -> Agr -> Agr = \a0,b0 -> let a = fromAgr a0 ; b = fromAgr b0 in toAgr (conjNumber a.n b.n) b.p ; giveNumber : Agr -> Number = \a -> case a of { Ag n _ => n } ; defaultAgr : Agr = agrP3 Sg ; agrP3 : Number -> Agr = \n -> Ag n P3 ; agrP1 : Number -> Agr = \n -> Ag n P1 ; ------------------------- -- Ezafe construction ------------------------ oper mkPossStem : Str -> Str = \str -> case str of { _+ "اه" => str ; _+ "او" => str ; _+ "وه" => str ; _+ ("ا"|"و") => str + "ی" ; _+ "ه" => zwnj str "ا" ; _ => str } ; mkEzafe : Str -> Str = \str -> --let kasre = "ِ" in -- TODO: Eventually use this let kasre = "" in case str of { st + "اه" => str + kasre ; st + "وه" => str + kasre ; st + "ه" => zwnj str "ی" ;-- alt. st + "ۀ" ; st + "او" => str + kasre ; st + "وو" => str + kasre ; st + "و" => str + "ی" ; st + "ا" => str + "ی" ; _ => str + kasre }; mkEnclic : Str -> Str ; mkEnclic str = case str of { st + ("ا"|"و") => zwnj str "یی" ; -- ی after a long vowel to help pronunciation st + "اه" => str + "ی" ; -- here ه is a consonant, so single ی st + ("ی"|"ه") => zwnj str "ای" ; -- after ی or ه as a vowel, add an alif to help pronunciation _ => str + "ی" -- any other case: just a single ی } ; Noun = {s : Number => Mod => Str ; animacy : Animacy} ; mkN : (x1,x2 : Str) -> Animacy -> Noun = \sg,pl,ani -> { s = table { Sg => table {Bare => sg ; Ezafe => mkEzafe sg ; Clitic => mkEnclic sg ; Poss => mkPossStem sg } ; Pl => table {Bare => pl ; Ezafe => mkEzafe pl ; Clitic => mkEnclic pl ; Poss => mkPossStem pl } } ; animacy = ani } ; -- masculine nouns end with alif, choTi_hay, ain Translitration: (a, h, e) -- Arabic nouns ends with h. also taken as Masc --------------------- --Determiners -------------------- Determiner : Type = {s : Str ; n :Number ; isNum : Bool ; mod : Mod} ; makeDet : Str -> Number -> Bool -> Determiner = \str,n,b -> { s = str; isNum = b; mod = Bare ; n = n }; makeQuant : Str -> Str -> {s : Number => Str ; a : Agr; mod : Mod } = \sg,pl -> { s = table {Sg => sg ; Pl => pl} ; mod = Bare ; a = agrP3 Sg }; --------------------------- -- Adjectives -------------------------- Adjective : Type = {s : Mod => Str ; adv : Str} ; mkAdj : Str -> Str -> Adjective = \adj,adv -> { s = table { Bare => adj; Ezafe => mkEzafe adj ; Clitic => mkEnclic adj ; Poss => mkPossStem adj } ; adv = adv }; ------------------------------------------------------------------ -- Verbs ------------------------------------------------------------------ param VerbForm1 = VF Polarity VTense2 Agr | Vvform Agr | Imp Polarity Number | Inf | Root1 | Root2 ; VTense2 = PPresent2 PrAspect | PPast2 PstAspect | PFut2 FtAspect | Infr_Past2 InfrAspect; PrAspect = PrPerf | PrImperf ; PstAspect = PstPerf | PstImperf | PstAorist ; FtAspect = FtAorist ; -- just keep FtAorist InfrAspect = InfrPerf | InfrImperf ; oper Verb = {s : VerbForm1 => Str} ; mkVerb : (x1,x2 : Str) -> Verb = \inf,root2 -> let root1 = tk 1 inf ; impRoot = impRoot root2 in { s = table { Root1 => root1 ; Root2 => root2 ; Inf => inf ; Imp Pos Sg => addBh impRoot ; Imp Pos Pl => addBh impRoot + "ید" ; Imp Neg Sg => "ن" + impRoot ; Imp Neg Pl => "ن" + impRoot + "ید" ; Vvform ag => mkvVform root2 ag ; VF p t ag => mkCmnVF root1 root2 p t ag } } ; -- Verbs that end in یدن, ادن or ودن -- Also some verbs that don't: دانستن with stem دان mkVerb1 : (_: Str) -> Verb = \inf -> mkVerb inf (tk 3 inf) ; -- Most verbs that end in C+تن or C+دن mkVerb2 : (_: Str) -> Verb = \inf -> mkVerb inf (tk 2 inf) ; mkCmnVF : Str -> Str -> Polarity -> VTense2 -> Agr -> Str = \root1,root2,pol,t,ag -> let khordh = root1 + "ه"; nkhordh = addN khordh ; mekhor = zwnj "می" root2 ; nmekhor = zwnj "نمی" root2 ; mekhord = zwnj "می" root1 ; nmekhord = zwnj "نمی" root1 ; mekhordh = zwnj "می" khordh ; nmekhordh = zwnj "نمی" khordh ; khah = "خواه" ; nkhah = "نخواه" ; -- mekhah = zwnj "می" khah ; -- nmekhah = zwnj "نمی" khah ; bvdh = "بوده" ; impfSuff : Str -> Str = imperfectSuffix ag ; impfSuffD : Str -> Str = imperfectSuffixD ag ; perfSuff : Str -> Str = perfectSuffix ag ; pluperfSuff : Str -> Str = pluperfectSuffix ag in case of { => impfSuffD mekhor ; => perfSuff khordh ; => pluperfSuff khordh ; => impfSuff mekhord ; => impfSuff root1 ; => impfSuffD khah ++ root1; => khordh ++ perfSuff bvdh ; => perfSuff khordh ; -- negatives => impfSuffD nmekhor ; => perfSuff nkhordh ; => pluperfSuff nkhordh ; => impfSuff nmekhord ; => impfSuff (addN root1) ; => impfSuffD nkhah ++ root1 ; => nkhordh ++ perfSuff bvdh ; => perfSuff nmekhordh -- => perfSuffD mekhah ++ addBh (perfSuffD root2) ; -- => perfSuffD nmekhah ++ addBh (perfSuffD root2) ; } ; mkvVform : Str -> Agr -> Str = \root2,ag -> addBh (imperfectSuffixD ag root2) ; impRoot : Str -> Str = \root -> case root of { st + "ی" => st ; _ => root }; ------------------- -- making negatives ------------------- addN : Str -> Str ; addN str = case str of { "ا" + st => "نی" + str ; "آ" + st => "نیا" + st ; _ => "ن" + str } ; addBh : Str -> Str ; addBh str = case str of { "ا" + st => "بی" + str ; "آ" + st => "بیا" + st ; _ => "ب" + str }; -- TODO: is this needed anywhere? what does it do? /IL addBh2 : Str -> Str ; -- should use drop instead but it gives linking error addBh2 str1 = case str1 of { "می" + str => case str of { "ا" + st => Prelude.glue "بی" str ; "آ" + st => Prelude.glue "بیا" st ; _ => Prelude.glue "ب" str }; _ => "" -- ???? }; ------------------- -- Common suffixes ------------------- imperfectSuffix : Agr -> Str -> Str = \ag,s -> s + case ag of { Ag Sg P1 => "م" ; Ag Sg P2 => "ی" ; Ag Sg P3 => [] ; Ag Pl P1 => "یم" ; Ag Pl P2 => "ید" ; Ag Pl P3 => "ند" } ; imperfectSuffixD : Agr -> Str -> Str = \ag,s -> case ag of { Ag Sg P3 => s + "د" ; _ => imperfectSuffix ag s } ; perfectSuffix : Agr -> Str -> Str = \ag,s -> case ag of { Ag Sg P1 => zwnj s "ام" ; Ag Sg P2 => zwnj s "ای" ; Ag Sg P3 => s ++ "است" ; -- no ZWNJ Ag Pl P1 => zwnj s "ایم" ; Ag Pl P2 => zwnj s "اید" ; Ag Pl P3 => zwnj s "اند" } ; pluperfectSuffix : Agr -> Str -> Str = \ag,s -> s ++ case ag of { -- not suffix, just using consistent naming scheme :-P /IL Ag Sg P1 => "بودم" ; Ag Sg P2 => "بودی" ; Ag Sg P3 => "بود" ; Ag Pl P1 => "بودیم" ; Ag Pl P2 => "بودید" ; Ag Pl P3 => "بودند" } ; ---------------------------------- -- Irregular verbs ---------------------------------- haveVerb : Verb = {s = table { Root1 => "داشت" ; Root2 => "دار" ; Inf => "داشتن" ; Imp Pos Sg => "بدار" ; Imp Pos Pl => "بدارید" ; Imp Neg Sg => "ندار" ; Imp Neg Pl => "ندارید" ; Vvform agr => mkvVform "دار" agr ; VF pol tense agr => toHave pol tense agr } } ; toHave : Polarity -> VTense2 -> Agr -> Str = \pol,t,ag -> let dar = "دار" ; ndar = addN dar ; dasht = "داشت" in case of { => imperfectSuffixD ag dar ; => imperfectSuffixD ag ndar ; _ => mkCmnVF dasht dar pol t ag } ; -- TODO: merge with auxBe in ResPes beVerb : Verb = { s = table { Vvform agr => imperfectSuffixD agr "باش" ; Imp Pos Sg => "باش" ; Imp Pos Pl => "باشید" ; Imp Neg Sg => "نباش" ; Imp Neg Pl => "نباشید" ; Inf => "بودن" ; Root1 => "بود" ; Root2 => "باش" ; VF pol tense agr => let impfSuff = imperfectSuffix agr ; perfSuff = perfectSuffix agr in case of { => "است" ; => impfSuff "هست" ; => perfSuff "بوده" ; => "نیست" ; => impfSuff "نیست" ; => perfSuff "نبوده" ; _ => mkCmnVF "بود" "باش" pol tense agr } } } ; }