From 414137597623ad1a536a45395cae835fbb794252 Mon Sep 17 00:00:00 2001 From: Inari Listenmaa Date: Fri, 4 Jan 2019 13:01:03 +0200 Subject: [PATCH] (Ara) Add a distinction in CN and NP whether it's heavy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes a difference in word order; default is VSO, but if the subject NP is complex, e.g. coming from RelNP or SentCN, the more natural word order is SVO. Example that triggered the change comes from an application grammar: Gold standard by informant: مَا تَوَصْلّنَا إلَيهِ يَبْدُو مَنْطِقِيَّاً Grammar before change: يَشْعُرُ مَاً نَتَوَصَّلُ إِلَىهُ مَنْطِقِيّاً Grammar after change: مَاً نَتَوَصَّلُ إِلَىهُ يَشْعُرُ مَنْطِقِيّاً --- src/arabic/ConjunctionAra.gf | 11 ++++++--- src/arabic/NounAra.gf | 41 +++++++++++++++++++------------ src/arabic/ResAra.gf | 47 +++++++++++++++++++++++------------- src/arabic/SymbolAra.gf | 10 +++----- 4 files changed, 67 insertions(+), 42 deletions(-) diff --git a/src/arabic/ConjunctionAra.gf b/src/arabic/ConjunctionAra.gf index b7aca53a7..025a68d43 100644 --- a/src/arabic/ConjunctionAra.gf +++ b/src/arabic/ConjunctionAra.gf @@ -5,7 +5,7 @@ lincat [S] = {s1,s2 : Order => Str} ; [Adv] = {s1,s2 : Str} ; - [NP] = {s1,s2 : Case => Str ; a : Agr ; empty : Str} ; + [NP] = {s1,s2 : Case => Str ; a : Agr ; empty : Str; isHeavy : Bool} ; [AP] = {s1,s2 : Species => Gender => Number => State => Case => Str} ; lin @@ -21,16 +21,19 @@ lin BaseNP x y = twoTable Case x y ** { a = conjAgr x.a y.a ; - empty = [] + empty = [] ; + isHeavy = True ; } ; ConsNP xs x = consrTable Case comma xs x ** { a = conjAgr xs.a x.a ; - empty = [] + empty = [] ; + isHeavy = True ; } ; ConjNP conj ss = conjunctDistrTable Case conj ss ** { a = let gn = pgn2gn ss.a.pgn in {pgn = Per3 gn.g (conjNumber conj.n gn.n) ; isPron = False} ; - empty = [] + empty = [] ; + isHeavy = True ; } ; BaseAP = twoTable5 Species Gender Number State Case ; diff --git a/src/arabic/NounAra.gf b/src/arabic/NounAra.gf index 0f3c8b134..f3e828292 100644 --- a/src/arabic/NounAra.gf +++ b/src/arabic/NounAra.gf @@ -22,7 +22,7 @@ lin cn.s2 ! number ! (definite ! det.d) -- Indef remains Indef, rest become Def ! c - } in { + } in emptyNP ** { s = \\c => -- Dat is just a hack for liPrep let c' = case c of {Dat => Gen ; x => x} in case cnB4det det of { @@ -37,13 +37,12 @@ lin }; a = { pgn = agrP3 cn.h cn.g number; isPron = False } ; - empty = [] - }; + isHeavy = cn.isHeavy + } ; - UsePN pn = { - s = pn.s; - a = {pgn = Per3 pn.g Sg ; isPron = False} ; - empty = [] + UsePN pn = emptyNP ** { + s = pn.s; + a = {pgn = Per3 pn.g Sg ; isPron = False} }; UsePron p = p ; @@ -70,7 +69,8 @@ lin -} AdvNP np adv = np ** { - s = \\c => np.s ! c ++ adv.s + s = \\c => np.s ! c ++ adv.s ; + isHeavy = True ; }; DetQuantOrd quant num ord = quant ** { @@ -169,10 +169,11 @@ lin isEmpty = True } ; - MassNP cn = - {s = \\c => cn2str cn Sg Indef c ; - a = {pgn = Per3 cn.g Sg ; isPron = False} ; - empty = []} ; + MassNP cn = emptyNP ** { + s = \\c => cn2str cn Sg Indef c ; + a = {pgn = Per3 cn.g Sg ; isPron = False} ; + isHeavy = cn.isHeavy ; + } ; UseN, UseN2 = useN ; @@ -190,12 +191,22 @@ lin }; RelCN cn rs = cn ** { - s2 = \\n,s,c => cn.s2 ! n ! s ! c ++ rs.s ! {pgn=Per3 cn.g n ; isPron=False} ! c}; + s2 = \\n,s,c => cn.s2 ! n ! s ! c + ++ rs.s ! {pgn=Per3 cn.g n ; isPron=False} ! c ; + isHeavy = True + } ; - RelNP np rs = np ** {s = \\c => np.s ! c ++ rs.s ! np.a ! c} ; + + RelNP np rs = np ** { + s = \\c => np.s ! c ++ rs.s ! np.a ! c ; + isHeavy = True + } ; AdvCN, - SentCN = \cn,ss -> cn ** {s2 = \\n,d,c => cn.s2 ! n ! d ! c ++ ss.s} ; + SentCN = \cn,ss -> cn ** { + s2 = \\n,d,c => cn.s2 ! n ! d ! c ++ ss.s ; + isHeavy = True + } ; ApposCN cn np = cn ** { np = \\c => cn.np ! c ++ np.s ! c diff --git a/src/arabic/ResAra.gf b/src/arabic/ResAra.gf index 70feac263..63bffa5dd 100644 --- a/src/arabic/ResAra.gf +++ b/src/arabic/ResAra.gf @@ -173,7 +173,7 @@ resource ResAra = PatternsAra ** open Prelude, Predef, OrthoAra, ParamX in { uttAP : AP -> (Gender => Str) ; uttAP ap = \\g => ap.s ! NoHum ! g ! Sg ! Const ! Bare ; ----IL - CN : Type = Noun ** {np : Case => Str}; + CN : Type = Noun ** {np : Case => Str ; isHeavy : Bool}; -- All fields of NP cn2str : CN -> Number -> State -> Case -> Str = \cn,n,s,c -> @@ -181,7 +181,9 @@ resource ResAra = PatternsAra ** open Prelude, Predef, OrthoAra, ParamX in { cn.s2 ! n ! s ! c ++ cn.np ! c ; - useN : Noun -> CN = \n -> n ** {np = \\_ => []} ; + useN : Noun -> CN = \n -> n ** { + np = \\_ => [] ; + isHeavy = False } ; uttCN : CN -> (Gender => Str) ; uttCN cn = \\_ => cn2str cn Sg Indef Bare ; @@ -555,7 +557,7 @@ v1geminateForms : Str -> Vowel -> Vowel -> DefForms = umdad = "ُ" + mkStrong fcal mdd ; Umdud = (prefixImp ! vowImpf) + mdud; mamdUd = mkStrong mafcUl mdd - } in toDefForms + } in toDefForms madd madad mudd mudid -- VPerf amudd amdud umadd umdad -- VImpf Umdud mudd' mamdUd ; @@ -615,7 +617,7 @@ v1defForms_perfA : Root3 -> Vowel -> DefForms = \rmy,vowImpf -> eirmi = prefixImp ! vowImpf + _rmi; eirmu = prefixImp ! vowImpf + _rmu; marmiy = mkStrong mafcil rmy - } in toDefForms + } in toDefForms rama ramay rumi rumu rumiy -- VPerf armi armu urma -- VImpf eirmi eirmu marmiy ; @@ -683,7 +685,7 @@ v2defective : Root3 -> Verb = \gny -> ugannu = "ُ" + gannu; uganna = "ُ" + ganna; mugannaY = "مُ" + ganna + "ى" - } in verbDef (toDefForms + } in verbDef (toDefForms ganna gannay gunni gunnu gunniy -- VPerf uganni ugannu uganna -- VImpf ganni gannu mugannaY) i ; @@ -818,7 +820,7 @@ v7geminate : Str -> Verb = \fcl -> n => "ُنْ" + vforms ! n -- doesn't exist for form 7 }) ; -v8geminate : Str -> Verb = +v8geminate : Str -> Verb = \rootStr -> let { mdd = mkRoot3 rootStr ; --fcc @@ -838,7 +840,7 @@ v8geminate : Str -> Verb = umtadad = "ُ" + _mtadad ; imtadid = "اِ" + _mtadid ; mumtadd = "مُ" + _mtadd ; - } in verbGeminate (toDefForms + } in verbGeminate (toDefForms imtadd imtadad umtudd umtudid -- VPerf amtadd amtadid umtadd umtadad -- VPres imtadd imtadid mumtadd) ; @@ -1507,6 +1509,7 @@ patGeminateImp : (_,_ :Str) -> Gender => Number => Str = \facc,facic -> NP : Type = { s : Case => Str ; a : Agr ; + isHeavy : Bool ; -- overrides verbal word order, if the subject is very complicated; e.g. built out of RelNP or similar empty : Str -- to prevent ambiguities with prodrop } ; @@ -1516,15 +1519,14 @@ patGeminateImp : (_,_ :Str) -> Gender => Number => Str = \facc,facic -> } ; mkPron : (_,_,_ : Str) -> PerGenNum -> NP = \ana,nI,I,pgn -> - { s = + emptyNP ** {s = table { (Nom|Bare) => ana; Acc => nI ; -- object suffix Gen => I ; -- possessive suffix Dat => I -- will only be used with preposition لِ }; - a = {pgn = pgn; isPron = True }; - empty = [] + a = {pgn = pgn; isPron = True} }; proDrop : NP -> NP = \np -> @@ -1536,6 +1538,7 @@ patGeminateImp : (_,_ :Str) -> Gender => Number => Str = \facc,facic -> emptyNP : NP = { s = \\_ => [] ; a = {pgn = Per3 Masc Sg ; isPron = False} ; + isHeavy = False ; empty = [] } ; agrNP : Agr -> NP = \agr -> emptyNP ** {a = agr} ; @@ -1653,7 +1656,7 @@ patGeminateImp : (_,_ :Str) -> Gender => Number => Str = \facc,facic -> } } ; - ip2np : IP -> Bool -> NP = \ip,isPred -> ip ** { s = ip.s ! isPred ! Masc ! Def ; empty = [] } ; + ip2np : IP -> Bool -> NP = \ip,isPred -> emptyNP ** ip ** {s = ip.s ! isPred ! Masc ! Def} ; np2ip : NP -> IP = \np -> np ** {s = \\_,_,_ => np.s} ; IDet : Type = { @@ -1748,7 +1751,7 @@ patGeminateImp : (_,_ :Str) -> Gender => Number => Str = \facc,facic -> True => np.s ! sc.c } ; } in wordOrder o - vp.obj.a.isPron np.a.isPron + vp.obj.a.isPron np.a.isPron np.isHeavy (vStr vp pgn t p o) vp.obj.s (pred vp pgn t p) @@ -1757,14 +1760,24 @@ patGeminateImp : (_,_ :Str) -> Gender => Number => Str = \facc,facic -> } ; -- seems complicated, but this is to share code with VPS and other similar structures - wordOrder : Order -> (objIsPron,subjIsPron : Bool) -> (verb,obj,pred,adv,subj : Str) -> Str = - \o,objIsPron,subjIsPron,verb,obj,pred,adv,subj -> + wordOrder : Order -> (objIsPron,subjIsPron,subjIsHeavy : Bool) -> (verb,obj,pred,adv,subj : Str) -> Str = + \o,objIsPron,subjIsPron,subjIsHeavy,verb,obj,pred,adv,subj -> let cl = wordOrderNoSubj o objIsPron verb obj pred adv in case o of { + -- If subject is pronoun, affix it in Subord word order. Subord => - let bind = if_then_Str subjIsPron BIND [] -- in subord. clause, subj. pronoun binds to the main verb + let bind = if_then_Str subjIsPron BIND [] in cl.before ++ bind ++ subj ++ cl.after ; - _ => cl.before ++ subj ++ cl.after + + -- If subject is "heavy" (e.g. contains a relative clause), + Verbal => -- then override Verbal word order. + case subjIsHeavy of { + True => subj ++ cl.before ++ cl.after ; + False => cl.before ++ subj ++ cl.after + } ; + + -- Any other word order, no special checks. + _ => cl.before ++ subj ++ cl.after } ; wordOrderNoSubj : Order -> (objIsPron : Bool) -> (verb,obj,pred,adv : Str) -> {before,after : Str} = @@ -1833,7 +1846,7 @@ patGeminateImp : (_,_ :Str) -> Gender => Number => Str = \facc,facic -> Subj : Type = {s : Case => Str ; isPron : Bool} ; np2subj : NP -> Subj = \np -> np ** {isPron = np.a.isPron} ; - subj2np : Subj -> NP = \su -> su ** {a = {pgn = emptyNP.a.pgn ; isPron = su.isPron} ; empty=[]} ; + subj2np : Subj -> NP = \su -> emptyNP ** su ** {a = {pgn = emptyNP.a.pgn ; isPron = su.isPron}} ; emptyObj : Obj = {a = {gn = {g=Masc ; n=Sg} ; isPron = False}; s = []} ; insertObj : NP -> VPSlash -> VP = \np,vp -> vp ** { diff --git a/src/arabic/SymbolAra.gf b/src/arabic/SymbolAra.gf index 10e2b7bb5..0f43ad1d2 100644 --- a/src/arabic/SymbolAra.gf +++ b/src/arabic/SymbolAra.gf @@ -14,15 +14,13 @@ lin -- } ; --IL TODO: check out some opers regarding state in ResAra. These are just dummy values. CNSymbNP det cn xs = - let g = cn.g ; n = sizeToNumber det.n in { + let g = cn.g ; n = sizeToNumber det.n in emptyNP ** { s = \\c => det.s ! NoHum ! g ! c ++ cn2str cn n Def c ++ xs.s; ----IL word order?? Seems to be nontrivial according to ResAra comments. - a = dummyAgrP3 n ; - empty = [] + a = dummyAgrP3 n } ; - CNNumNP cn i = { + CNNumNP cn i = emptyNP ** { s = \\c => cn2str cn Sg Def c ++ uttNum i ! cn.g ; - a = dummyAgrP3 Sg ; - empty = [] + a = dummyAgrP3 Sg } ; SymbS sy = {s = \\_ => sy.s} ;