diff --git a/src/persian/ExtendPes.gf b/src/persian/ExtendPes.gf index 797a67aed..c5e333503 100644 --- a/src/persian/ExtendPes.gf +++ b/src/persian/ExtendPes.gf @@ -2,7 +2,7 @@ concrete ExtendPes of Extend = CatPes ** ExtendFunctor - [ - GenNP, ApposNP, ICompAP, AdvIsNP, InOrderToVP, ByVP + GenNP, ApposNP, ICompAP, AdvIsNP, InOrderToVP, ByVP, AdjAsNP, ComplBareVS ,GerundNP,GerundCN,GerundAdv,EmbedPresPart,EmbedSSlash ] with (Grammar=GrammarPes) @@ -20,18 +20,24 @@ lin s = \\m => np1.s ! m ++ np2.s ! m } ; + -- : AP -> NP + AdjAsNP ap = emptyNP ** ap ; + + -- : VS -> S -> VP + ComplBareVS vs s = embComp (s.s ! vs.compl) (predV vs) ; + ICompAP ap = {s = "چقدر" ++ ap.s ! Bare} ; -- : VP -> CN ; -- publishing of the document (can get a determiner) - GerundCN vp = useN (indeclN (showVPH Inf defaultAgr vp)) ; + GerundCN vp = useN (indeclN (infVP vp)) ; -- : VP -> NP ; -- publishing the document (by nature definite) - GerundNP vp = indeclNP (showVPH Inf defaultAgr vp) ; + GerundNP vp = indeclNP (infVP vp) ; -- : VP -> Adv ; -- publishing the document (prepositionless adverb) - GerundAdv vp = lin Adv {s = showVPH Inf defaultAgr vp} ; + GerundAdv vp = lin Adv {s = infVP vp} ; -- : VP -> SC ; - EmbedPresPart vp = lin SC {s = showVPH Inf defaultAgr vp} ; + EmbedPresPart vp = lin SC {s = infVP vp} ; -- : SSlash -> SC -- Not optimal: complement with آن should go after subject, but SSlash is already fixed. @@ -47,8 +53,8 @@ lin -- : VP -> Adv ; -- (in order) to publish the document InOrderToVP vp = lin Adv {s = for_Prep.s - ++ case vp.passive of { - Replace => showVPH PerfStem defaultAgr []} : VP> ; -- only show prefix + ++ case vp.lightverb of { + Kardan => showVPH PerfStem defaultAgr []} : VP> ; -- only show prefix _ => showVPH PerfStem defaultAgr vp} } ; } diff --git a/src/persian/MorphoPes.gf b/src/persian/MorphoPes.gf index 8cb40def7..1aef024de 100644 --- a/src/persian/MorphoPes.gf +++ b/src/persian/MorphoPes.gf @@ -184,14 +184,36 @@ param | VImp Polarity Number -- bekon,bekonid/nakon,nakonid ; - Passive = Add -- ateš zadan -> ateš zade šodan - | Replace ; -- gom kardan -> gom ∅ šodan + -- Affects clitic placement and passive + LightVerb = NotLight | Light -- ateš zadan -> ateš zade šodan + | Kardan ; -- gom kardan -> gom ∅ šodan oper impRoot : Str -> Str = \root -> case root of { st + "ی" => st ; _ => root }; + modifyFiniteForms : (Str -> Str) -> Verb -> Verb = \f,v -> v ** {s = + table { + vf@(VAor _ _ + | VPerf _ _ + | VPast _ _ + | VSubj _ _ + | VImp _ _) + => f (v.s ! vf) ; + vf => v.s ! vf } + } ; + + addClitic : LightVerb -> Str -> Verb -> Verb = \light,cl,v -> v ** {s = + let f : Str -> Str = case light of { + NotLight => \s -> glue s cl ; + _ => \s -> BIND ++ cl ++ s } -- hack: put clitic before the verb, so it attaches to the prefix + in table { + Inf => glue (v.s ! Inf) cl ; + vf => (modifyFiniteForms f v).s ! vf } + } ; + + mkVerb : (inf,pres : Str) -> Verb = \kardan,kon -> { s = table { Inf => kardan ; @@ -211,7 +233,7 @@ oper VImp Neg Sg => addN imp ; VImp Neg Pl => addN kon + "ید" } ; prefix = [] ;-- For compound verbs - passive = Add ; + lightverb = NotLight ; } where { kard = tk 1 kardan ; kardeh = kard + "ه" ; @@ -235,7 +257,7 @@ oper } ; -- oper - Verb = {s : VerbForm => Str ; prefix : Str ; passive : Passive} ; + Verb = {s : VerbForm => Str ; prefix : Str ; lightverb : LightVerb} ; -- Verbs that end in یدن, ادن or ودن -- Also some verbs that don't: دانستن with stem دان @@ -345,7 +367,7 @@ oper VImp Neg Sg => "نکن" ; VImp Neg Pl => "نکنید" ; vf => doRegV.s ! vf } ; - passive = Replace + lightverb = Kardan } where { doRegV = mkVerb "کردن" "کن" } ; becomeVerb : Verb = mkVerb "شدن" "شو" ; diff --git a/src/persian/NounPes.gf b/src/persian/NounPes.gf index 9a222dcfa..5201e6cf7 100644 --- a/src/persian/NounPes.gf +++ b/src/persian/NounPes.gf @@ -21,7 +21,13 @@ concrete NounPes of Noun = CatPes ** open ResPes, Prelude in { } ; UsePN pn = emptyNP ** pn ** {s = \\_ => pn.s} ; - UsePron p = emptyNP ** p ** {s = \\_ => p.s ; animacy = Animate} ; + UsePron p = emptyNP ** p ** { + s = \\_ => p.s ; + clitic = p.ps ; + isClitic = case p.a of { + Ag Sg P3 => True ; + _ => False } ; + animacy = Animate} ; PredetNP pred np = np ** { s = \\ez => pred.s ++ np.s ! ez @@ -72,6 +78,7 @@ concrete NounPes of Noun = CatPes ** open ResPes, Prelude in { a = agrP3 det.n ; hasAdj = False ; animacy = Inanimate ; + --TODO: isClitic, clitic relpron = Ance -- TODO check if this works for all Dets } ; @@ -132,7 +139,7 @@ concrete NounPes of Noun = CatPes ** open ResPes, Prelude in { AdjCN ap cn = cn ** { s = \\n,m => case ap.isPre of { - True => ap.s ! Bare ++ cn.s ! n ! m ; -- TODO check mod of ap + True => ap.s ! Bare ++ cn.s ! n ! m ; False => cn.s ! n ! Ezafe ++ ap.s ! m } ; hasAdj = True } ; diff --git a/src/persian/ParadigmsPes.gf b/src/persian/ParadigmsPes.gf index 96eb4a154..82ef53add 100644 --- a/src/persian/ParadigmsPes.gf +++ b/src/persian/ParadigmsPes.gf @@ -205,7 +205,7 @@ oper mkPrep : Str -> Prep -- Takes a string, returns a preposition. = \str -> lin Prep (prepOrRa str) ; mkPrep : Str -> Mod -> Prep -- Takes a string and Mod (so far only option is ezafe), returns a preposition. - = \str,m -> lin Prep {s = str ; ra = [] ; mod=m} + = \str,m -> lin Prep ((prepOrRa str) ** {mod=m}) } ; {- @@ -354,7 +354,10 @@ oper -- hidden from public API compoundV = overload { compoundV : Str -> V -> V - = \s,v -> v ** {prefix = s} ; + = \s,v -> v ** { + prefix = s ; + lightverb = case v.lightverb of {Kardan => Kardan ; _ => Light} + } ; compoundV : Str -> V2 -> V -- hidden from public API = \s,v -> lin V (v ** {prefix = s}) ; }; @@ -377,23 +380,26 @@ oper mkV2 : V -> Str -> V2 = \v,ra -> lin V2 (v ** {c2 = prepOrRa ra}) ; mkV2 : V -> Str -> Bool -> V2 - = \v,p,b -> lin V2 (v ** {c2 = {ra = [] ; s = p ; mod=Bare}}) ; + = \v,p,b -> lin V2 (v ** {c2 = prepOrRa p}) ; } ; prepOrRa : Str -> Compl = \s -> case s of { - "را" => {s = [] ; ra = "را" ; mod=Bare} ; - prep => {s = prep ; ra = []; mod=Bare} + ra@("را"|"") + => {s = [] ; ra = ra ; mod=Bare ; isPrep = False} ; + prep => {s = prep ; ra = [] ; mod=Bare ; isPrep = True} } ; noPrep = prepOrRa [] ; - ezafePrep = {s = [] ; ra = [] ; mod=Ezafe} ; - mkPost : Str -> Prep = \s -> lin Prep {s=[] ; ra=s ; mod=Bare} ; + -- NB. The 'mod' field has different meaning for verbs and N2s. + ezafeForN2 = {s = [] ; ra = [] ; mod=Ezafe ; isPrep = False} ; + + mkPost : Str -> Prep = \s -> lin Prep {s=[] ; ra=s ; mod=Bare ; isPrep = False} ; mkN2 = overload { mkN2 : Str -> N2 -- Predictable N2 without complement - = \s -> lin N2 (mkN01 s inanimate ** {c2 = ezafePrep ; compl = []}) ; + = \s -> lin N2 (mkN01 s inanimate ** {c2 = ezafeForN2 ; compl = []}) ; mkN2 : N -> N2 -- N2 from without complement - = \n -> lin N2 (n ** {c2 = ezafePrep ; compl = []}) ; + = \n -> lin N2 (n ** {c2 = ezafeForN2 ; compl = []}) ; mkN2 : N -> Str -> N2 = \n,c -> lin N2 (n ** {c2 = prepOrRa c ; compl = []}) ; mkN2 : N -> Prep -> Str -> N2 -- hidden from puclic API diff --git a/src/persian/PhrasePes.gf b/src/persian/PhrasePes.gf index 89a3549aa..c72a9b189 100644 --- a/src/persian/PhrasePes.gf +++ b/src/persian/PhrasePes.gf @@ -16,7 +16,7 @@ concrete PhrasePes of Phrase = CatPes ** open Prelude, ResPes in { UttNP np = {s = np2str np} ; UttCN cn = {s = cn2str cn}; UttAP ap = {s = ap.s ! Bare} ; - UttVP vp = {s = showVPH Inf defaultAgr vp} ; + UttVP vp = {s = infVP vp} ; PConjConj conj = {s = conj.s2} ; diff --git a/src/persian/ResPes.gf b/src/persian/ResPes.gf index adec61dae..b70ac01c2 100644 --- a/src/persian/ResPes.gf +++ b/src/persian/ResPes.gf @@ -32,6 +32,8 @@ resource ResPes = MorphoPes ** open Prelude,Predef in { a : Agr ; hasAdj : Bool ; -- to get the right form when NP is a predicate animacy : Animacy ; -- to get the right pronoun in FunRP + isClitic : Bool ; -- if isPron, becomes clitic as a direct object + clitic : Str ; relpron : RelPron ; -- contraction for "that which" empty : Str -- to prevent metavariables in case of rel.pron. contraction } ; @@ -42,6 +44,8 @@ resource ResPes = MorphoPes ** open Prelude,Predef in { a = defaultAgr ; hasAdj = False ; animacy = Inanimate ; + isClitic = False ; + clitic = [] ; relpron = Ke ; empty = [] } ; @@ -94,7 +98,7 @@ oper } ; VPH : Type = Verb ** { - comp : Agr => Str; -- complements of a verb, agr for ReflVP "I/you see myself/yourself" and CompCN "I am human/we are humans" + comp : Agr => WordOrder => Str; -- complements of a verb, agr for ReflVP "I/you see myself/yourself" and CompCN "I am human/we are humans" vComp : Agr => VVTense => Str; -- when a verb is used as a complement of an auxiliary verb. Unlike ‘comp’ or ‘obj’, this type of complement follows the auxiliary verb. obj : Str ; -- object of a verb; so far only used for A ("paint it black") ad : Str ; @@ -107,20 +111,23 @@ oper showVPH : VVTense -> VerbForm -> Agr -> VPH -> Str = showVPH' OV False } ; + showVPHwithImpPrefix = showVPH' OV True VVPres ; + infVP : VPH -> Str = showVPH' VO False VVPres Inf defaultAgr ; + showVPH' : WordOrder -> Bool -> VVTense -> VerbForm -> Agr -> VPH -> Str = \wo,showImpPref,ant,vf,agr,vp -> let impPref = case showImpPref of { True => vp.s ! ImpPrefix Pos ; False => [] } in case wo of { - OV => vp.ad ++ vp.comp ! agr ++ vp.obj + OV => vp.ad ++ vp.comp ! agr ! wo ++ vp.obj ++ vp.prefix ++ impPref ++ vp.s ! vf ++ vp.vComp ! agr ! ant ++ vp.embComp ; VO => vp.prefix ++ vp.s ! vf ++ vp.ad - ++ vp.comp ! agr ++ vp.obj ++ impPref + ++ vp.comp ! agr ! wo ++ vp.obj ++ impPref ++ vp.vComp ! agr ! ant ++ vp.embComp } ; - Compl : Type = {s : Str ; ra : Str ; mod : Mod} ; + Compl : Type = {s : Str ; ra : Str ; mod : Mod ; isPrep : Bool} ; VPHSlash : Type = VPH ** { c2 : Compl ; -- prep or ra for the complement @@ -135,7 +142,7 @@ oper obj, embComp = []; vvtype = NoVV ; - comp = \\_ => [] ; + comp = \\_,_ => [] ; vComp = \\_,_ => [] } ; predVc : (Verb ** {c2 : Compl}) -> VPHSlash = \verb -> @@ -145,29 +152,34 @@ oper passVP : VPH -> VPH = \vp -> vp ** { s = becomeVerb.s ; - prefix = case vp.passive of { - Add => vp.s ! PerfStem ++ vp.prefix ; - Replace => vp.prefix + prefix = case vp.lightverb of { + Kardan => vp.prefix ; + _ => vp.s ! PerfStem ++ vp.prefix + } ; } ; -- --------------------- -- VP complementation --------------------- - appComp : Compl -> (Mod=>Str) -> Str = \c2,obj -> - case c2.mod of { - Ezafe => runtimeKasre c2.s ++ obj ! Bare ++ c2.ra ; - _ => c2.s ++ obj ! c2.mod ++ c2.ra } ; + appCompVP : Compl -> (Mod=>Str) -> (WordOrder=>Str) = \c2,obj -> + \\wo => let ra = case wo of {VO => [] ; OV => c2.ra} in + case c2.mod of { + Ezafe => runtimeKasre c2.s ++ obj ! Bare ++ ra ; + _ => c2.s ++ obj ! c2.mod ++ ra } ; + + -- for use outside VP, word order is redundant, ra should be retained. + appComp : Compl -> (Mod=>Str) -> Str = \c2,obj -> appCompVP c2 obj ! OV ; insertComp : (Agr => Str) -> VPH -> VPH = \obj,vp -> vp ** { - comp = \\a => vp.comp ! a ++ obj ! a + comp = \\a,wo => vp.comp ! a ! wo ++ obj ! a } ; insertCompPre : (Agr=>Mod=>Str) -> VPHSlash -> VPH = \obj,vp -> vp ** { - comp = \\a => appComp vp.c2 (obj ! a) ++ vp.comp ! a + comp = \\a,wo => appCompVP vp.c2 (obj ! a) ! wo ++ vp.comp ! a ! wo } ; insertCompPost : (Agr=>Mod=>Str) -> VPHSlash -> VPH = \obj,vp -> vp ** { - comp = \\a => vp.comp ! a ++ appComp vp.c2 (obj ! a) + comp = \\a,wo => vp.comp ! a ! wo ++ appCompVP vp.c2 (obj ! a) ! wo } ; insertVV : VV -> VPH -> VPH = \vv,vp -> predV vv ** { @@ -184,7 +196,18 @@ oper } ; complSlash : VPHSlash -> NP -> VPH = \vp,np -> vp ** { - comp = \\a => appComp vp.c2 np.s ++ vp.comp ! a ; + comp = \\a,wo => + case of { + => [] ; -- clitic is attached to the verb or prefix + => appCompVP vp.c2 (\\_ => (BIND ++ np.clitic)) ! wo ++ vp.comp ! a ! wo ; + _ => appCompVP vp.c2 np.s ! wo ++ vp.comp ! a ! wo + + } ; + s = case of { + -- if it has no prep, the clitic is attached to the verb (or prefix, if it's a light verb). + => (addClitic vp.lightverb np.clitic vp).s ; + _ => vp.s + } ; obj = vp.obj ++ vp.agrObj ! np.a -- "beg her to buy", buy agrees with her } ; @@ -194,7 +217,7 @@ oper \\agr,ant => if_then_Str vv.isAux conjThat [] ++ case of { -- Auxiliaries with defective inflection: complement inflects in tense - => showVPH' OV True VVPres (VPast Pos agr) agr vp ; + => showVPHwithImpPrefix (VPast Pos agr) agr vp ; => showVPH (VPast Pos agr) agr vp ; => showVPH PerfStem agr vp ++ subjAux Pos agr ; @@ -257,9 +280,9 @@ oper vvt = ta2vvt ta vp.vvtype ; in case vp.vvtype of { DefVV - => vps ++ vp.ad ++ vp.comp ! np.a ++ vp.obj + => vps ++ vp.ad ++ vp.comp ! np.a ! OV ++ vp.obj ++ vp.vComp ! np.a ! vvt ++ vp.embComp ; - _ => vp.ad ++ vp.comp ! np.a ++ vp.obj ++ vps + _ => vp.ad ++ vp.comp ! np.a ! OV ++ vp.obj ++ vps ++ vp.vComp ! np.a ! vvt ++ vp.embComp } }; @@ -269,7 +292,7 @@ oper let vps = clTable vp ! agr ! ta ! p ; quest = case ord of { ODir => [] ; OQuest => "آیا" } ; vvt = ta2vvt ta vp.vvtype ; - in quest ++ subj ++ vp.ad ++ vp.comp ! agr ++ vp.obj + in quest ++ subj ++ vp.ad ++ vp.comp ! agr ! OV ++ vp.obj ++ vps ++ vp.vComp ! agr ! vvt ++ vp.embComp }; diff --git a/src/persian/SentencePes.gf b/src/persian/SentencePes.gf index ca73f6397..66752d500 100644 --- a/src/persian/SentencePes.gf +++ b/src/persian/SentencePes.gf @@ -14,8 +14,8 @@ concrete SentencePes of Sentence = CatPes ** open Prelude, ResPes,Predef in { let agr = Ag n P2 ; vps = vp.prefix ++ vp.s ! VImp pol n in case vp.vvtype of { - NoVV => vp.ad ++ vp.comp ! agr ++ vp.obj ++ vp.vComp ! agr ! VVPres ++ vps ++ vp.embComp ; - _ => vps ++ vp.ad ++ vp.comp ! agr ++ vp.obj ++ vp.vComp ! agr ! VVPres ++ vp.embComp } + NoVV => vp.ad ++ vp.comp ! agr ! OV ++ vp.obj ++ vp.vComp ! agr ! VVPres ++ vps ++ vp.embComp ; + _ => vps ++ vp.ad ++ vp.comp ! agr ! OV {-TODO check if legit-} ++ vp.obj ++ vp.vComp ! agr ! VVPres ++ vp.embComp } } ; SlashVP np vp = @@ -38,7 +38,7 @@ concrete SentencePes of Sentence = CatPes ** open Prelude, ResPes,Predef in { EmbedS s = {s = conjThat ++ s.s ! Indic} ; EmbedQS qs = qs ; - EmbedVP vp = {s = showVPH Inf defaultAgr vp} ; --- agr + EmbedVP vp = {s = infVP vp} ; --- agr UseCl temp p cl = { diff --git a/src/persian/StructuralPes.gf b/src/persian/StructuralPes.gf index 01ae07906..dff9008d8 100644 --- a/src/persian/StructuralPes.gf +++ b/src/persian/StructuralPes.gf @@ -101,12 +101,12 @@ concrete StructuralPes of Structural = CatPes ** youSg_Pron = R.agr2pron ! Ag Sg P2 ; youPl_Pron = R.agr2pron ! Ag Pl P2 ; youPol_Pron = R.agr2pron ! Ag Pl P2 ; - no_Quant = mkQuant "هیچ" "هیچ" ; -- TODO: takes object in clitic form + is always singular + no_Quant = mkQuant "هیچ" "هیچ" ; -- TODO: takes object in clitic form + is always singular + VP is negated not_Predet = {s="نه"} ; if_then_Conj = sd2 "اگر" "آنگاه" ** {n = Sg} ; at_least_AdN = ss "حداقل" ; at_most_AdN = ss "حداکثر"; - nothing_NP = R.indeclNP "هیچ" ; + nothing_NP = R.indeclNP "هیچ چیز" ; except_Prep = mkPrep ["به جز"] ; nobody_NP = R.indeclNP "هیچ کس"; @@ -114,7 +114,7 @@ concrete StructuralPes of Structural = CatPes ** ---- have_V2 = mkV2 (mkV "داشتن" "دار") "را" ; - language_title_Utt = ss "پeرسن" ; + language_title_Utt = ss "فارسی" ; ---- AR from Nasrin diff --git a/src/persian/VerbPes.gf b/src/persian/VerbPes.gf index 4c62a6c71..42fb770ec 100644 --- a/src/persian/VerbPes.gf +++ b/src/persian/VerbPes.gf @@ -15,7 +15,7 @@ concrete VerbPes of Verb = CatPes ** open ResPes,Prelude in { ComplVV = insertVV ; ComplVS v s = embComp (conjThat ++ s.s ! v.compl) (predV v) ; ComplVQ v q = embComp (conjThat ++ q.s) (predV v) ; - ComplVA v ap = let adjStr = appComp v.c2 ap.s in + ComplVA v ap = let adjStr = appComp v.c2 ap.s in case ap.afterPrefix of { True => predV (v ** {prefix = v.prefix ++ adjStr}) ; False => insertObj adjStr (predV v) -- check form of adjective @@ -39,10 +39,10 @@ concrete VerbPes of Verb = CatPes ** open ResPes,Prelude in { -- : V2V -> NP -> VPSlash -> VPSlash ; -- beg me to buy SlashV2VNP v2v np vps = predVc v2v ** { - comp = \\a => if_then_Str v2v.isAux conjThat [] -- that - ++ appComp v2v.c2 np.s ; -- I - -- ∅ is placed in comp - vComp = \\_,_ => showVPH (case v2v.compl of { -- buy + comp = \\a,wo => if_then_Str v2v.isAux conjThat [] -- that + ++ appCompVP v2v.c2 np.s ! wo ; -- I + -- ∅ is placed in comp + vComp = \\_,_ => showVPH (case v2v.compl of { -- buy Subj => VSubj Pos np.a ; Indic => VAor Pos np.a }) np.a -- agreement fixed to np.a