Files
gf-rgl/src/persian/MorphoPes.gf
2019-02-22 17:50:15 +01:00

357 lines
11 KiB
Plaintext
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
--# -path=.:../../prelude
--
----1 A Simple Persian Resource Morphology
----
---- Shafqat Virk, Aarne Ranta,2010
----
---- This resource morphology contains definitions needed in the resource
---- syntax. To build a lexicon, it is better to use $ParadigmsPes$, which
---- gives a higher-level access to this module.
--
resource MorphoPes = ParamX ** open Prelude,Predef in {
flags optimize=all ;
coding = utf8;
---- Orthography
oper
-- Zero-width non-joiner, used for certain morphemes
-- See https://en.wikipedia.org/wiki/Persian_alphabet#Word_boundaries
ZWNJ : Str = "" ;
zwnj : Str -> Str -> Str = \s1,s2 -> s1 + ZWNJ + s2 ;
---- Nouns
param
Animacy = Animate | Inanimate ;
Mod = Bare | Ezafe | Clitic | Poss ;
Agr = Ag Number Person ;
------------------------------------------
-- Agreement transformations
-----------------------------------------
oper
toAgr : Number -> Person -> Agr = \n,p -> Ag n p ;
fromAgr : Agr -> {n : Number ; p : Person } = \agr -> case agr of {
Ag n p => {n = n ; p = p}
} ;
conjAgr : Agr -> Agr -> Agr = \a0,b0 ->
let a = fromAgr a0 ; b = fromAgr b0
in toAgr (conjNumber a.n b.n) b.p ;
giveNumber : Agr -> Number = \a -> case a of {
Ag n _ => n
} ;
defaultAgr : Agr = agrP3 Sg ;
agrP3 : Number -> Agr = \n -> Ag n P3 ;
agrP1 : Number -> Agr = \n -> Ag n P1 ;
-------------------------
-- Ezafe construction
------------------------
oper
mkPossStem : Str -> Str = \str ->
case str of {
_+ "اه" => str ;
_+ "او" => str ;
_+ "وه" => str ;
_+ ("ا"|"و") => str + "ی" ;
_+ "ه" => zwnj str "ا" ;
_ => str } ;
mkEzafe : Str -> Str = \str ->
--let kasre = "ِ" in -- TODO: Eventually use this
let kasre = "" in
case str of {
st + "اه" => str + kasre ;
st + "وه" => str + kasre ;
st + "ه" => zwnj str "ی" ;-- alt. st + "ۀ" ;
st + "او" => str + kasre ;
st + "وو" => str + kasre ;
st + "و" => str + "ی" ;
st + "ا" => str + "ی" ;
_ => str + kasre
};
mkEnclic : Str -> Str ;
mkEnclic str = case str of {
st + ("ا"|"و") => zwnj str "یی" ; -- ی after a long vowel to help pronunciation
st + "اه" => str + "ی" ; -- here ه is a consonant, so single ی
st + ("ی"|"ه") => zwnj str "ای" ; -- after ی or ه as a vowel, add an alif to help pronunciation
_ => str + "ی" -- any other case: just a single ی
} ;
Noun = {s : Number => Mod => Str ; animacy : Animacy} ;
mkN : (x1,x2 : Str) -> Animacy -> Noun = \sg,pl,ani -> {
s = table {
Sg => table {Bare => sg ;
Ezafe => mkEzafe sg ;
Clitic => mkEnclic sg ;
Poss => mkPossStem sg } ;
Pl => table {Bare => pl ;
Ezafe => mkEzafe pl ;
Clitic => mkEnclic pl ;
Poss => mkPossStem pl }
} ;
animacy = ani
} ;
-- masculine nouns end with alif, choTi_hay, ain Translitration: (a, h, e)
-- Arabic nouns ends with h. also taken as Masc
---------------------
--Determiners
--------------------
Determiner : Type = {s : Str ; n :Number ; isNum : Bool ; mod : Mod} ;
makeDet : Str -> Number -> Bool -> Determiner = \str,n,b -> {
s = str;
isNum = b;
mod = Bare ;
n = n
};
makeQuant : Str -> Str -> {s : Number => Str ; a : Agr; mod : Mod } = \sg,pl -> {
s = table {Sg => sg ; Pl => pl} ;
mod = Bare ;
a = agrP3 Sg
};
---------------------------
-- Adjectives
--------------------------
Adjective : Type = {s : Mod => Str ; adv : Str} ;
mkAdj : Str -> Str -> Adjective = \adj,adv -> {
s = table { Bare => adj;
Ezafe => mkEzafe adj ;
Clitic => mkEnclic adj ;
Poss => mkPossStem adj
} ;
adv = adv
};
------------------------------------------------------------------
-- Verbs
------------------------------------------------------------------
param
VerbForm1 = VF Polarity VTense2 Agr
| Vvform Agr
| Imp Polarity Number
| Inf | Root1 | Root2 ;
VTense2 = PPresent2 PrAspect
| PPast2 PstAspect
| PFut2 FtAspect
| Infr_Past2 InfrAspect;
PrAspect = PrPerf | PrImperf ;
PstAspect = PstPerf | PstImperf | PstAorist ;
FtAspect = FtAorist ; -- just keep FtAorist
InfrAspect = InfrPerf | InfrImperf ;
oper
Verb = {s : VerbForm1 => Str} ;
mkVerb : (x1,x2 : Str) -> Verb = \inf,root2 ->
let root1 = tk 1 inf ;
impRoot = impRoot root2
in { s = table {
Root1 => root1 ;
Root2 => root2 ;
Inf => inf ;
Imp Pos Sg => addBh impRoot ;
Imp Pos Pl => addBh impRoot + "ید" ;
Imp Neg Sg => "ن" + impRoot ;
Imp Neg Pl => "ن" + impRoot + "ید" ;
Vvform ag => mkvVform root2 ag ;
VF p t ag => mkCmnVF root1 root2 p t ag }
} ;
-- Verbs that end in یدن, ادن or ودن
-- Also some verbs that don't: دانستن with stem دان
mkVerb1 : (_: Str) -> Verb = \inf -> mkVerb inf (tk 3 inf) ;
-- Most verbs that end in C+تن or C+دن
mkVerb2 : (_: Str) -> Verb = \inf -> mkVerb inf (tk 2 inf) ;
mkCmnVF : Str -> Str -> Polarity -> VTense2 -> Agr -> Str = \root1,root2,pol,t,ag ->
let khordh = root1 + "ه";
nkhordh = addN khordh ;
mekhor = zwnj "می" root2 ;
nmekhor = zwnj "نمی" root2 ;
mekhord = zwnj "می" root1 ;
nmekhord = zwnj "نمی" root1 ;
mekhordh = zwnj "می" khordh ;
nmekhordh = zwnj "نمی" khordh ;
khah = "خواه" ;
nkhah = "نخواه" ;
-- mekhah = zwnj "می" khah ;
-- nmekhah = zwnj "نمی" khah ;
bvdh = "بوده" ;
impfSuff : Str -> Str = imperfectSuffix ag ;
impfSuffD : Str -> Str = imperfectSuffixD ag ;
perfSuff : Str -> Str = perfectSuffix ag ;
pluperfSuff : Str -> Str = pluperfectSuffix ag
in case <pol,t> of {
<Pos,PPresent2 PrImperf> => impfSuffD mekhor ;
<Pos,PPresent2 PrPerf> => perfSuff khordh ;
<Pos,PPast2 PstPerf> => pluperfSuff khordh ;
<Pos,PPast2 PstImperf> => impfSuff mekhord ;
<Pos,PPast2 PstAorist> => impfSuff root1 ;
<Pos,PFut2 FtAorist> => impfSuffD khah ++ root1;
<Pos,Infr_Past2 InfrPerf> => khordh ++ perfSuff bvdh ;
<Pos,Infr_Past2 InfrImperf> => perfSuff khordh ;
-- negatives
<Meg,PPresent2 PrImperf> => impfSuffD nmekhor ;
<Neg,PPresent2 PrPerf> => perfSuff nkhordh ;
<Neg,PPast2 PstPerf> => pluperfSuff nkhordh ;
<Neg,PPast2 PstImperf> => impfSuff nmekhord ;
<Neg,PPast2 PstAorist> => impfSuff (addN root1) ;
<Neg,PFut2 FtAorist> => impfSuffD nkhah ++ root1 ;
<Neg,Infr_Past2 InfrPerf> => nkhordh ++ perfSuff bvdh ;
<Neg,Infr_Past2 InfrImperf> => perfSuff nmekhordh
-- <Pos,PFut2 FtImperf> => perfSuffD mekhah ++ addBh (perfSuffD root2) ;
-- <Neg,PFut2 FtImperf> => perfSuffD nmekhah ++ addBh (perfSuffD root2) ;
} ;
mkvVform : Str -> Agr -> Str = \root2,ag ->
addBh (imperfectSuffixD ag root2) ;
impRoot : Str -> Str = \root -> case root of {
st + "ی" => st ;
_ => root
};
-------------------
-- making negatives
-------------------
addN : Str -> Str ;
addN str =
case str of {
"ا" + st => "نی" + str ;
"آ" + st => "نیا" + st ;
_ => "ن" + str } ;
addBh : Str -> Str ;
addBh str =
case str of {
"ا" + st => "بی" + str ;
"آ" + st => "بیا" + st ;
_ => "ب" + str
};
-- TODO: is this needed anywhere? what does it do? /IL
addBh2 : Str -> Str ; -- should use drop instead but it gives linking error
addBh2 str1 =
case str1 of {
"می" + str =>
case str of {
"ا" + st => Prelude.glue "بی" str ;
"آ" + st => Prelude.glue "بیا" st ;
_ => Prelude.glue "ب" str
};
_ => "" -- ????
};
-------------------
-- Common suffixes
-------------------
imperfectSuffix : Agr -> Str -> Str = \ag,s -> s +
case ag of {
Ag Sg P1 => "م" ;
Ag Sg P2 => "ی" ;
Ag Sg P3 => [] ;
Ag Pl P1 => "یم" ;
Ag Pl P2 => "ید" ;
Ag Pl P3 => "ند" } ;
imperfectSuffixD : Agr -> Str -> Str = \ag,s ->
case ag of {
Ag Sg P3 => s + "د" ;
_ => imperfectSuffix ag s } ;
perfectSuffix : Agr -> Str -> Str = \ag,s ->
case ag of {
Ag Sg P1 => zwnj s "ام" ;
Ag Sg P2 => zwnj s "ای" ;
Ag Sg P3 => s ++ "است" ; -- no ZWNJ
Ag Pl P1 => zwnj s "ایم" ;
Ag Pl P2 => zwnj s "اید" ;
Ag Pl P3 => zwnj s "اند" } ;
pluperfectSuffix : Agr -> Str -> Str = \ag,s -> s ++
case ag of { -- not suffix, just using consistent naming scheme :-P /IL
Ag Sg P1 => "بودم" ;
Ag Sg P2 => "بودی" ;
Ag Sg P3 => "بود" ;
Ag Pl P1 => "بودیم" ;
Ag Pl P2 => "بودید" ;
Ag Pl P3 => "بودند" } ;
----------------------------------
-- Irregular verbs
----------------------------------
haveVerb : Verb = {s = table {
Root1 => "داشت" ;
Root2 => "دار" ;
Inf => "داشتن" ;
Imp Pos Sg => "بدار" ;
Imp Pos Pl => "بدارید" ;
Imp Neg Sg => "ندار" ;
Imp Neg Pl => "ندارید" ;
Vvform agr => mkvVform "دار" agr ;
VF pol tense agr => toHave pol tense agr
}
} ;
toHave : Polarity -> VTense2 -> Agr -> Str = \pol,t,ag ->
let dar = "دار" ;
ndar = addN dar ;
dasht = "داشت"
in case <pol,t> of {
<Pos,PPresent2 PrImperf> => imperfectSuffixD ag dar ;
<Neg,PPresent2 PrImperf> => imperfectSuffixD ag ndar ;
_ => mkCmnVF dasht dar pol t ag
} ;
-- TODO: merge with auxBe in ResPes
beVerb : Verb = { s = table {
Vvform agr => imperfectSuffixD agr "باش" ;
Imp Pos Sg => "باش" ;
Imp Pos Pl => "باشید" ;
Imp Neg Sg => "نباش" ;
Imp Neg Pl => "نباشید" ;
Inf => "بودن" ;
Root1 => "بود" ;
Root2 => "باش" ;
VF pol tense agr =>
let impfSuff = imperfectSuffix agr ;
perfSuff = perfectSuffix agr
in case <pol,tense,agr> of {
<Pos,PPresent2 PrImperf,Ag Sg P3> => "است" ;
<Pos,PPresent2 PrImperf> => impfSuff "هست" ;
<Pos,PPresent2 PrPerf> => perfSuff "بوده" ;
<Neg,PPresent2 PrImperf,Ag Sg P3> => "نیست" ;
<Neg,PPresent2 PrImperf> => impfSuff "نیست" ;
<Neg,PPresent2 PrPerf> => perfSuff "نبوده" ;
_ => mkCmnVF "بود" "باش" pol tense agr
}
}
} ;
}