gf-rgl/src/persian/MorphoPes.gf

--# -path=.:../../prelude
--
----1 A Simple Persian Resource Morphology
----
----  Shafqat Virk, Aarne Ranta,2010
----
---- This resource morphology contains definitions needed in the resource
---- syntax. To build a lexicon, it is better to use $ParadigmsPes$, which
---- gives a higher-level access to this module.
--
resource MorphoPes = ParamX ** open Prelude,Predef in {

  flags optimize=all ;
   coding = utf8;

---- Orthography

oper
  -- Zero-width non-joiner, used for certain morphemes
  -- See https://en.wikipedia.org/wiki/Persian_alphabet#Word_boundaries
  ZWNJ : Str = "‌" ;
  zwnj : Str -> Str -> Str = \s1,s2 -> s1 + ZWNJ + s2 ;

---- Nouns
param
  Animacy = Animate | Inanimate ;
  Mod = Bare | Ezafe | Clitic | Poss ;
  Agr = Ag Number Person ;

------------------------------------------
-- Agreement transformations
-----------------------------------------
oper
  toAgr : Number -> Person -> Agr = \n,p -> Ag n p ;

  fromAgr : Agr -> {n : Number ; p : Person } = \agr -> case agr of {
    Ag n p => {n = n ; p = p}
  } ;

  conjAgr : Agr -> Agr -> Agr = \a0,b0 ->
    let a = fromAgr a0 ; b = fromAgr b0
     in toAgr (conjNumber a.n b.n) b.p ;

  giveNumber : Agr -> Number = \a -> case a of {
     Ag n _ => n
  } ;

  defaultAgr : Agr = agrP3 Sg ;
  agrP3 : Number -> Agr = \n -> Ag n P3 ;
  agrP1 : Number -> Agr = \n -> Ag n P1 ;

-------------------------
-- Ezafe construction
------------------------
oper


mkPossStem : Str -> Str = \str ->

  case str of {
_+ "اه" => str ;
_+ "او" => str ;
_+ "وه" => str ;
_+ ("ا"|"و") => str + "ی" ;
_+ "ه" => zwnj str "ا" ;
_ => str } ;


  mkEzafe : Str -> Str = \str ->
  --let kasre = "ِ" in -- TODO: Eventually use this
    let kasre = "" in
    case str of {
      st + "اه" => str + kasre ;
      st + "وه" => str + kasre ;
      st + "ه"  => zwnj str "ی" ;-- alt. st + "ۀ" ;
      st + "او" => str + kasre ;
      st + "وو" => str + kasre ;
      st + "و"  => str + "ی" ;
      st + "ا"  => str + "ی" ;
      _ => str + kasre
    };

  mkEnclic : Str -> Str ;
  mkEnclic str = case str of {
      st + ("ا"|"و") => zwnj str "یی" ; -- ی after a long vowel to help pronunciation
      st + "اه"      => str + "ی" ; -- here ه is a consonant, so single ی
      st + ("ی"|"ه") => zwnj str "ای" ; -- after ی or ه as a vowel, add an alif to help pronunciation
      _               =>      str + "ی" -- any other case: just a single ی
    } ;

    Noun = {s : Number => Mod => Str ; animacy : Animacy} ;
    mkN : (x1,x2 : Str) -> Animacy -> Noun = \sg,pl,ani -> {
      s = table {
           Sg => table {Bare => sg ;
                        Ezafe => mkEzafe sg ;
                        Clitic => mkEnclic sg ;
                        Poss => mkPossStem sg } ;
           Pl => table {Bare => pl ;
                        Ezafe => mkEzafe pl ;
                        Clitic => mkEnclic pl ;
                        Poss => mkPossStem pl }
      } ;
     animacy = ani
    } ;

-- masculine nouns end with alif, choTi_hay, ain Translitration: (a, h, e)
-- Arabic nouns ends with h. also taken as Masc

---------------------
--Determiners
--------------------
Determiner : Type = {s : Str ; n :Number ; isNum : Bool ; mod : Mod} ;

 makeDet : Str -> Number -> Bool -> Determiner = \str,n,b -> {
   s = str;
   isNum = b;
   mod = Bare ;
   n = n
 };

 makeQuant : Str -> Str  -> {s : Number => Str ; a : Agr; mod : Mod } = \sg,pl -> {
   s = table {Sg => sg ; Pl => pl} ;
   mod = Bare ;
   a = agrP3 Sg
 };
---------------------------
-- Adjectives
--------------------------
 Adjective : Type = {s : Mod => Str ; adv : Str} ;

 mkAdj : Str -> Str -> Adjective = \adj,adv -> {
   s = table { Bare => adj;
               Ezafe => mkEzafe adj ;
               Clitic => mkEnclic adj ;
               Poss => mkPossStem adj
             } ;
   adv = adv
   };

------------------------------------------------------------------
-- Verbs
------------------------------------------------------------------
param
  VerbForm1 = VF Polarity VTense2 Agr
            | Vvform Agr
            | Imp Polarity Number
            | Inf | Root1 | Root2 ;
  VTense2   = PPresent2 PrAspect
            | PPast2 PstAspect
            | PFut2 FtAspect
            | Infr_Past2 InfrAspect;
  PrAspect  = PrPerf | PrImperf ;
  PstAspect = PstPerf | PstImperf | PstAorist ;
  FtAspect  = FtAorist ; -- just keep FtAorist
  InfrAspect = InfrPerf | InfrImperf ;

oper
  Verb = {s : VerbForm1 => Str} ;

  mkVerb : (x1,x2 : Str) -> Verb = \inf,root2 ->
    let root1 = tk 1 inf ;
        impRoot = impRoot root2
     in { s = table {
       Root1    => root1 ;
       Root2    => root2 ;
       Inf      => inf ;
       Imp Pos Sg => addBh impRoot ;
       Imp Pos Pl => addBh impRoot + "ید" ;
       Imp Neg Sg => "ن" + impRoot ;
       Imp Neg Pl => "ن" +  impRoot + "ید" ;
       Vvform ag  => mkvVform root2 ag ;
       VF p t ag  => mkCmnVF root1 root2 p t ag }
    } ;

  -- Verbs that end in یدن, ادن or ودن
  -- Also some verbs that don't: دانستن with stem دان
  mkVerb1 : (_: Str) -> Verb = \inf -> mkVerb inf (tk 3 inf) ;

  -- Most verbs that end in C+تن or C+دن
  mkVerb2 : (_: Str) -> Verb = \inf -> mkVerb inf (tk 2 inf) ;

  mkCmnVF : Str -> Str -> Polarity -> VTense2 -> Agr -> Str = \root1,root2,pol,t,ag ->
    let khordh = root1 + "ه";
        nkhordh = addN khordh ;
        mekhor = zwnj "می" root2 ;
        nmekhor =  zwnj "نمی" root2 ;
        mekhord = zwnj "می" root1 ;
        nmekhord = zwnj "نمی" root1 ;
        mekhordh = zwnj "می" khordh ;
        nmekhordh = zwnj "نمی" khordh ;
        khah = "خواه" ;
        nkhah = "نخواه" ;
        -- mekhah = zwnj "می" khah ;
        -- nmekhah = zwnj "نمی" khah ;
        bvdh = "بوده" ;
        impfSuff : Str -> Str = imperfectSuffix ag ;
        impfSuffD : Str -> Str = imperfectSuffixD ag ;
        perfSuff : Str -> Str = perfectSuffix ag ;
        pluperfSuff : Str -> Str = pluperfectSuffix ag
     in case <pol,t> of {
        <Pos,PPresent2 PrImperf> => impfSuffD mekhor ;
        <Pos,PPresent2 PrPerf>   => perfSuff khordh ;

        <Pos,PPast2 PstPerf>   => pluperfSuff khordh ;
        <Pos,PPast2 PstImperf> => impfSuff mekhord ;
        <Pos,PPast2 PstAorist> => impfSuff root1 ;

        <Pos,PFut2 FtAorist> => impfSuffD khah ++ root1;

        <Pos,Infr_Past2 InfrPerf>   => khordh ++ perfSuff bvdh ;
        <Pos,Infr_Past2 InfrImperf> => perfSuff khordh ;

     -- negatives
        <Meg,PPresent2 PrImperf> => impfSuffD nmekhor ;
        <Neg,PPresent2 PrPerf> => perfSuff nkhordh ;

        <Neg,PPast2 PstPerf> => pluperfSuff nkhordh ;
        <Neg,PPast2 PstImperf> => impfSuff nmekhord ;
        <Neg,PPast2 PstAorist> => impfSuff (addN root1) ;

        <Neg,PFut2 FtAorist> => impfSuffD nkhah ++ root1 ;

        <Neg,Infr_Past2 InfrPerf>   => nkhordh ++ perfSuff bvdh ;
        <Neg,Infr_Past2 InfrImperf> => perfSuff nmekhordh

        -- <Pos,PFut2 FtImperf> => perfSuffD mekhah ++ addBh (perfSuffD root2) ;
        -- <Neg,PFut2 FtImperf> => perfSuffD nmekhah ++ addBh (perfSuffD root2) ;
      } ;

  mkvVform : Str -> Agr -> Str = \root2,ag ->
    addBh (imperfectSuffixD ag root2) ;

  impRoot : Str -> Str = \root -> case root of {
    st + "ی" => st ;
    _        => root
    };

-------------------
-- making negatives
-------------------
  addN : Str -> Str ;
  addN str =
    case str of {
      "ا" + st => "نی" + str ;
      "آ" + st => "نیا" + st ;
       _        => "ن" + str } ;

  addBh : Str -> Str ;
  addBh str =
    case str of {
      "ا" + st => "بی" + str ;
      "آ" + st => "بیا" + st ;
      _        => "ب" + str
         };

  -- TODO: is this needed anywhere? what does it do? /IL
  addBh2 : Str -> Str ; -- should use drop instead but it gives linking error
  addBh2 str1 =
    case str1 of {
      "می" + str =>
         case str of {
           "ا" + st => Prelude.glue "بی" str ;
           "آ" + st => Prelude.glue "بیا" st ;
            _       => Prelude.glue "ب" str
                      };
      _          => "" -- ????
    };

-------------------
-- Common suffixes
-------------------
  imperfectSuffix : Agr -> Str -> Str = \ag,s -> s +
    case ag of {
      Ag Sg P1 => "م" ;
      Ag Sg P2 => "ی" ;
      Ag Sg P3 => [] ;
      Ag Pl P1 => "یم" ;
      Ag Pl P2 => "ید" ;
      Ag Pl P3 => "ند" } ;

  imperfectSuffixD : Agr -> Str -> Str = \ag,s ->
    case ag of {
      Ag Sg P3 => s + "د" ;
      _ => imperfectSuffix ag s } ;

  perfectSuffix : Agr -> Str -> Str = \ag,s ->
    case ag of {
      Ag Sg P1 => zwnj s "ام" ;
      Ag Sg P2 => zwnj s "ای" ;
      Ag Sg P3 => s ++ "است" ; -- no ZWNJ
      Ag Pl P1 => zwnj s "ایم" ;
      Ag Pl P2 => zwnj s "اید" ;
      Ag Pl P3 => zwnj s "اند" } ;

  pluperfectSuffix : Agr -> Str -> Str = \ag,s -> s ++
    case ag of { -- not suffix, just using consistent naming scheme :-P /IL
      Ag Sg P1 => "بودم" ;
      Ag Sg P2 => "بودی" ;
      Ag Sg P3 => "بود" ;
      Ag Pl P1 => "بودیم" ;
      Ag Pl P2 => "بودید" ;
      Ag Pl P3 => "بودند" } ;

----------------------------------
-- Irregular verbs
----------------------------------

  haveVerb : Verb = {s = table {
    Root1 => "داشت" ;
    Root2 => "دار" ;
    Inf   => "داشتن" ;
    Imp Pos Sg => "بدار" ;
    Imp Pos Pl => "بدارید" ;
    Imp Neg Sg => "ندار" ;
    Imp Neg Pl => "ندارید" ;
    Vvform agr => mkvVform "دار" agr ;
    VF pol tense agr => toHave pol tense agr
    }
  } ;

  toHave : Polarity -> VTense2 -> Agr -> Str = \pol,t,ag ->
    let dar =  "دار" ;
        ndar = addN dar ;
        dasht = "داشت"
     in case <pol,t> of {
          <Pos,PPresent2 PrImperf> => imperfectSuffixD ag dar ;
          <Neg,PPresent2 PrImperf> => imperfectSuffixD ag ndar ;
          _ => mkCmnVF dasht dar pol t ag
        } ;

  -- TODO: merge with auxBe in ResPes
  beVerb : Verb = { s = table {
    Vvform agr => imperfectSuffixD agr "باش" ;
    Imp Pos Sg => "باش" ;
    Imp Pos Pl => "باشید" ;
    Imp Neg Sg => "نباش" ;
    Imp Neg Pl => "نباشید" ;
    Inf => "بودن" ;
    Root1 => "بود" ;
    Root2 => "باش" ;
    VF pol tense agr =>
      let impfSuff = imperfectSuffix agr ;
          perfSuff = perfectSuffix agr
      in case <pol,tense,agr> of {
       <Pos,PPresent2 PrImperf,Ag Sg P3> => "است" ;
       <Pos,PPresent2 PrImperf>          => impfSuff "هست" ;
       <Pos,PPresent2 PrPerf>            => perfSuff "بوده" ;
       <Neg,PPresent2 PrImperf,Ag Sg P3> => "نیست" ;
       <Neg,PPresent2 PrImperf>          => impfSuff "نیست" ;
       <Neg,PPresent2 PrPerf>            => perfSuff "نبوده" ;
       _                                 => mkCmnVF "بود" "باش" pol tense agr
       }
    }
  } ;
}