From 19dca60b6e0f3823807b3c6f64d4bee6466b7363 Mon Sep 17 00:00:00 2001 From: Inari Listenmaa Date: Fri, 4 Jan 2019 12:56:46 +0200 Subject: [PATCH 1/4] (Ara) Add EmptyRelSlash to ExtendAra --- src/arabic/ExtendAra.gf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/arabic/ExtendAra.gf b/src/arabic/ExtendAra.gf index be05864a5..2a0877ed6 100644 --- a/src/arabic/ExtendAra.gf +++ b/src/arabic/ExtendAra.gf @@ -15,6 +15,7 @@ concrete ExtendAra of Extend = ParamX, ResAra, Prelude, + RelativeAra, Coordination in { @@ -28,6 +29,9 @@ concrete ExtendAra of Extend = -- : AP -> IComp ; -- "how old" ICompAP ap = {s = \\gn => "كَمْ" ++ ap.s ! NoHum ! gn.g ! gn.n ! Indef ! Acc} ; + -- : ClSlash -> RCl -- he lives in + EmptyRelSlash = RelSlash (IdRP ** {s = \\_ => []}) ; + lincat VPS = {s : PerGenNum => Str} ; -- finite VP's with tense and polarity [VPS] = {s1,s2 : PerGenNum => Str} ; From 414137597623ad1a536a45395cae835fbb794252 Mon Sep 17 00:00:00 2001 From: Inari Listenmaa Date: Fri, 4 Jan 2019 13:01:03 +0200 Subject: [PATCH 2/4] (Ara) Add a distinction in CN and NP whether it's heavy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes a difference in word order; default is VSO, but if the subject NP is complex, e.g. coming from RelNP or SentCN, the more natural word order is SVO. Example that triggered the change comes from an application grammar: Gold standard by informant: مَا تَوَصْلّنَا إلَيهِ يَبْدُو مَنْطِقِيَّاً Grammar before change: يَشْعُرُ مَاً نَتَوَصَّلُ إِلَىهُ مَنْطِقِيّاً Grammar after change: مَاً نَتَوَصَّلُ إِلَىهُ يَشْعُرُ مَنْطِقِيّاً --- src/arabic/ConjunctionAra.gf | 11 ++++++--- src/arabic/NounAra.gf | 41 +++++++++++++++++++------------ src/arabic/ResAra.gf | 47 +++++++++++++++++++++++------------- src/arabic/SymbolAra.gf | 10 +++----- 4 files changed, 67 insertions(+), 42 deletions(-) diff --git a/src/arabic/ConjunctionAra.gf b/src/arabic/ConjunctionAra.gf index b7aca53a7..025a68d43 100644 --- a/src/arabic/ConjunctionAra.gf +++ b/src/arabic/ConjunctionAra.gf @@ -5,7 +5,7 @@ lincat [S] = {s1,s2 : Order => Str} ; [Adv] = {s1,s2 : Str} ; - [NP] = {s1,s2 : Case => Str ; a : Agr ; empty : Str} ; + [NP] = {s1,s2 : Case => Str ; a : Agr ; empty : Str; isHeavy : Bool} ; [AP] = {s1,s2 : Species => Gender => Number => State => Case => Str} ; lin @@ -21,16 +21,19 @@ lin BaseNP x y = twoTable Case x y ** { a = conjAgr x.a y.a ; - empty = [] + empty = [] ; + isHeavy = True ; } ; ConsNP xs x = consrTable Case comma xs x ** { a = conjAgr xs.a x.a ; - empty = [] + empty = [] ; + isHeavy = True ; } ; ConjNP conj ss = conjunctDistrTable Case conj ss ** { a = let gn = pgn2gn ss.a.pgn in {pgn = Per3 gn.g (conjNumber conj.n gn.n) ; isPron = False} ; - empty = [] + empty = [] ; + isHeavy = True ; } ; BaseAP = twoTable5 Species Gender Number State Case ; diff --git a/src/arabic/NounAra.gf b/src/arabic/NounAra.gf index 0f3c8b134..f3e828292 100644 --- a/src/arabic/NounAra.gf +++ b/src/arabic/NounAra.gf @@ -22,7 +22,7 @@ lin cn.s2 ! number ! (definite ! det.d) -- Indef remains Indef, rest become Def ! c - } in { + } in emptyNP ** { s = \\c => -- Dat is just a hack for liPrep let c' = case c of {Dat => Gen ; x => x} in case cnB4det det of { @@ -37,13 +37,12 @@ lin }; a = { pgn = agrP3 cn.h cn.g number; isPron = False } ; - empty = [] - }; + isHeavy = cn.isHeavy + } ; - UsePN pn = { - s = pn.s; - a = {pgn = Per3 pn.g Sg ; isPron = False} ; - empty = [] + UsePN pn = emptyNP ** { + s = pn.s; + a = {pgn = Per3 pn.g Sg ; isPron = False} }; UsePron p = p ; @@ -70,7 +69,8 @@ lin -} AdvNP np adv = np ** { - s = \\c => np.s ! c ++ adv.s + s = \\c => np.s ! c ++ adv.s ; + isHeavy = True ; }; DetQuantOrd quant num ord = quant ** { @@ -169,10 +169,11 @@ lin isEmpty = True } ; - MassNP cn = - {s = \\c => cn2str cn Sg Indef c ; - a = {pgn = Per3 cn.g Sg ; isPron = False} ; - empty = []} ; + MassNP cn = emptyNP ** { + s = \\c => cn2str cn Sg Indef c ; + a = {pgn = Per3 cn.g Sg ; isPron = False} ; + isHeavy = cn.isHeavy ; + } ; UseN, UseN2 = useN ; @@ -190,12 +191,22 @@ lin }; RelCN cn rs = cn ** { - s2 = \\n,s,c => cn.s2 ! n ! s ! c ++ rs.s ! {pgn=Per3 cn.g n ; isPron=False} ! c}; + s2 = \\n,s,c => cn.s2 ! n ! s ! c + ++ rs.s ! {pgn=Per3 cn.g n ; isPron=False} ! c ; + isHeavy = True + } ; - RelNP np rs = np ** {s = \\c => np.s ! c ++ rs.s ! np.a ! c} ; + + RelNP np rs = np ** { + s = \\c => np.s ! c ++ rs.s ! np.a ! c ; + isHeavy = True + } ; AdvCN, - SentCN = \cn,ss -> cn ** {s2 = \\n,d,c => cn.s2 ! n ! d ! c ++ ss.s} ; + SentCN = \cn,ss -> cn ** { + s2 = \\n,d,c => cn.s2 ! n ! d ! c ++ ss.s ; + isHeavy = True + } ; ApposCN cn np = cn ** { np = \\c => cn.np ! c ++ np.s ! c diff --git a/src/arabic/ResAra.gf b/src/arabic/ResAra.gf index 70feac263..63bffa5dd 100644 --- a/src/arabic/ResAra.gf +++ b/src/arabic/ResAra.gf @@ -173,7 +173,7 @@ resource ResAra = PatternsAra ** open Prelude, Predef, OrthoAra, ParamX in { uttAP : AP -> (Gender => Str) ; uttAP ap = \\g => ap.s ! NoHum ! g ! Sg ! Const ! Bare ; ----IL - CN : Type = Noun ** {np : Case => Str}; + CN : Type = Noun ** {np : Case => Str ; isHeavy : Bool}; -- All fields of NP cn2str : CN -> Number -> State -> Case -> Str = \cn,n,s,c -> @@ -181,7 +181,9 @@ resource ResAra = PatternsAra ** open Prelude, Predef, OrthoAra, ParamX in { cn.s2 ! n ! s ! c ++ cn.np ! c ; - useN : Noun -> CN = \n -> n ** {np = \\_ => []} ; + useN : Noun -> CN = \n -> n ** { + np = \\_ => [] ; + isHeavy = False } ; uttCN : CN -> (Gender => Str) ; uttCN cn = \\_ => cn2str cn Sg Indef Bare ; @@ -555,7 +557,7 @@ v1geminateForms : Str -> Vowel -> Vowel -> DefForms = umdad = "ُ" + mkStrong fcal mdd ; Umdud = (prefixImp ! vowImpf) + mdud; mamdUd = mkStrong mafcUl mdd - } in toDefForms + } in toDefForms madd madad mudd mudid -- VPerf amudd amdud umadd umdad -- VImpf Umdud mudd' mamdUd ; @@ -615,7 +617,7 @@ v1defForms_perfA : Root3 -> Vowel -> DefForms = \rmy,vowImpf -> eirmi = prefixImp ! vowImpf + _rmi; eirmu = prefixImp ! vowImpf + _rmu; marmiy = mkStrong mafcil rmy - } in toDefForms + } in toDefForms rama ramay rumi rumu rumiy -- VPerf armi armu urma -- VImpf eirmi eirmu marmiy ; @@ -683,7 +685,7 @@ v2defective : Root3 -> Verb = \gny -> ugannu = "ُ" + gannu; uganna = "ُ" + ganna; mugannaY = "مُ" + ganna + "ى" - } in verbDef (toDefForms + } in verbDef (toDefForms ganna gannay gunni gunnu gunniy -- VPerf uganni ugannu uganna -- VImpf ganni gannu mugannaY) i ; @@ -818,7 +820,7 @@ v7geminate : Str -> Verb = \fcl -> n => "ُنْ" + vforms ! n -- doesn't exist for form 7 }) ; -v8geminate : Str -> Verb = +v8geminate : Str -> Verb = \rootStr -> let { mdd = mkRoot3 rootStr ; --fcc @@ -838,7 +840,7 @@ v8geminate : Str -> Verb = umtadad = "ُ" + _mtadad ; imtadid = "اِ" + _mtadid ; mumtadd = "مُ" + _mtadd ; - } in verbGeminate (toDefForms + } in verbGeminate (toDefForms imtadd imtadad umtudd umtudid -- VPerf amtadd amtadid umtadd umtadad -- VPres imtadd imtadid mumtadd) ; @@ -1507,6 +1509,7 @@ patGeminateImp : (_,_ :Str) -> Gender => Number => Str = \facc,facic -> NP : Type = { s : Case => Str ; a : Agr ; + isHeavy : Bool ; -- overrides verbal word order, if the subject is very complicated; e.g. built out of RelNP or similar empty : Str -- to prevent ambiguities with prodrop } ; @@ -1516,15 +1519,14 @@ patGeminateImp : (_,_ :Str) -> Gender => Number => Str = \facc,facic -> } ; mkPron : (_,_,_ : Str) -> PerGenNum -> NP = \ana,nI,I,pgn -> - { s = + emptyNP ** {s = table { (Nom|Bare) => ana; Acc => nI ; -- object suffix Gen => I ; -- possessive suffix Dat => I -- will only be used with preposition لِ }; - a = {pgn = pgn; isPron = True }; - empty = [] + a = {pgn = pgn; isPron = True} }; proDrop : NP -> NP = \np -> @@ -1536,6 +1538,7 @@ patGeminateImp : (_,_ :Str) -> Gender => Number => Str = \facc,facic -> emptyNP : NP = { s = \\_ => [] ; a = {pgn = Per3 Masc Sg ; isPron = False} ; + isHeavy = False ; empty = [] } ; agrNP : Agr -> NP = \agr -> emptyNP ** {a = agr} ; @@ -1653,7 +1656,7 @@ patGeminateImp : (_,_ :Str) -> Gender => Number => Str = \facc,facic -> } } ; - ip2np : IP -> Bool -> NP = \ip,isPred -> ip ** { s = ip.s ! isPred ! Masc ! Def ; empty = [] } ; + ip2np : IP -> Bool -> NP = \ip,isPred -> emptyNP ** ip ** {s = ip.s ! isPred ! Masc ! Def} ; np2ip : NP -> IP = \np -> np ** {s = \\_,_,_ => np.s} ; IDet : Type = { @@ -1748,7 +1751,7 @@ patGeminateImp : (_,_ :Str) -> Gender => Number => Str = \facc,facic -> True => np.s ! sc.c } ; } in wordOrder o - vp.obj.a.isPron np.a.isPron + vp.obj.a.isPron np.a.isPron np.isHeavy (vStr vp pgn t p o) vp.obj.s (pred vp pgn t p) @@ -1757,14 +1760,24 @@ patGeminateImp : (_,_ :Str) -> Gender => Number => Str = \facc,facic -> } ; -- seems complicated, but this is to share code with VPS and other similar structures - wordOrder : Order -> (objIsPron,subjIsPron : Bool) -> (verb,obj,pred,adv,subj : Str) -> Str = - \o,objIsPron,subjIsPron,verb,obj,pred,adv,subj -> + wordOrder : Order -> (objIsPron,subjIsPron,subjIsHeavy : Bool) -> (verb,obj,pred,adv,subj : Str) -> Str = + \o,objIsPron,subjIsPron,subjIsHeavy,verb,obj,pred,adv,subj -> let cl = wordOrderNoSubj o objIsPron verb obj pred adv in case o of { + -- If subject is pronoun, affix it in Subord word order. Subord => - let bind = if_then_Str subjIsPron BIND [] -- in subord. clause, subj. pronoun binds to the main verb + let bind = if_then_Str subjIsPron BIND [] in cl.before ++ bind ++ subj ++ cl.after ; - _ => cl.before ++ subj ++ cl.after + + -- If subject is "heavy" (e.g. contains a relative clause), + Verbal => -- then override Verbal word order. + case subjIsHeavy of { + True => subj ++ cl.before ++ cl.after ; + False => cl.before ++ subj ++ cl.after + } ; + + -- Any other word order, no special checks. + _ => cl.before ++ subj ++ cl.after } ; wordOrderNoSubj : Order -> (objIsPron : Bool) -> (verb,obj,pred,adv : Str) -> {before,after : Str} = @@ -1833,7 +1846,7 @@ patGeminateImp : (_,_ :Str) -> Gender => Number => Str = \facc,facic -> Subj : Type = {s : Case => Str ; isPron : Bool} ; np2subj : NP -> Subj = \np -> np ** {isPron = np.a.isPron} ; - subj2np : Subj -> NP = \su -> su ** {a = {pgn = emptyNP.a.pgn ; isPron = su.isPron} ; empty=[]} ; + subj2np : Subj -> NP = \su -> emptyNP ** su ** {a = {pgn = emptyNP.a.pgn ; isPron = su.isPron}} ; emptyObj : Obj = {a = {gn = {g=Masc ; n=Sg} ; isPron = False}; s = []} ; insertObj : NP -> VPSlash -> VP = \np,vp -> vp ** { diff --git a/src/arabic/SymbolAra.gf b/src/arabic/SymbolAra.gf index 10e2b7bb5..0f43ad1d2 100644 --- a/src/arabic/SymbolAra.gf +++ b/src/arabic/SymbolAra.gf @@ -14,15 +14,13 @@ lin -- } ; --IL TODO: check out some opers regarding state in ResAra. These are just dummy values. CNSymbNP det cn xs = - let g = cn.g ; n = sizeToNumber det.n in { + let g = cn.g ; n = sizeToNumber det.n in emptyNP ** { s = \\c => det.s ! NoHum ! g ! c ++ cn2str cn n Def c ++ xs.s; ----IL word order?? Seems to be nontrivial according to ResAra comments. - a = dummyAgrP3 n ; - empty = [] + a = dummyAgrP3 n } ; - CNNumNP cn i = { + CNNumNP cn i = emptyNP ** { s = \\c => cn2str cn Sg Def c ++ uttNum i ! cn.g ; - a = dummyAgrP3 Sg ; - empty = [] + a = dummyAgrP3 Sg } ; SymbS sy = {s = \\_ => sy.s} ; From f32e49ca92959a5223c96470df5cbf7cb18ea3ac Mon Sep 17 00:00:00 2001 From: Inari Listenmaa Date: Fri, 4 Jan 2019 13:39:58 +0200 Subject: [PATCH 3/4] (Ara) Fix bug in ImpersCl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ImpersCl (VP -> Cl) used to check whether the VP is predicative, and in the positive case, force the dummy subject to not be a pronoun. This has been wrong since the time I changed prodrop rules in PredVP. The error that followed manifested when the Cl was put in subordinate position: the subject pronoun did not attach to the conjunction. An example follows. Lang: SubjS that_Subj (UseCl (TTAnt TPres ASimul) PPos (ImpersCl (UseComp (CompNP (DetCN (DetQuant IndefArt NumSg) (UseN girl_N)))))) LangEng: that it is a girl Old LangAra: أنَّ ها بِنتٌ (wrong) New LangAra: أنَّ &+ ها بِنتٌ (correct) --- src/arabic/IdiomAra.gf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/arabic/IdiomAra.gf b/src/arabic/IdiomAra.gf index a7a058b9a..ebd04cd04 100644 --- a/src/arabic/IdiomAra.gf +++ b/src/arabic/IdiomAra.gf @@ -9,10 +9,10 @@ concrete IdiomAra of Idiom = CatAra ** open lin -- : VP -> Cl ; -- it is hot - ImpersCl vp = - let it : ResAra.NP = case vp.isPred of { - True => pron2np (gn2pron vp.obj.a.gn) ; - False => gn2pron vp.obj.a.gn } ; -- if no obj, Per3 Masc Sg chosen by default + -- "it is a girl" becomes "she is a girl"; + -- "it is twins" becomes "they are twins". + ImpersCl vp = -- if no obj, default Per3 Masc Sg + let it : ResAra.NP = gn2pron vp.obj.a.gn ; in predVP it vp ; -- : VP -> Cl ; -- one sleeps From 53c719c70e6160691759399f7d05acc0d345e00c Mon Sep 17 00:00:00 2001 From: Inari Listenmaa Date: Fri, 4 Jan 2019 14:34:49 +0200 Subject: [PATCH 4/4] (Ara) typofix --- src/arabic/StructuralAra.gf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arabic/StructuralAra.gf b/src/arabic/StructuralAra.gf index 5db242869..e86d59397 100644 --- a/src/arabic/StructuralAra.gf +++ b/src/arabic/StructuralAra.gf @@ -78,7 +78,7 @@ concrete StructuralAra of Structural = CatAra ** something_NP = regNP "شَيْء" Sg Indef ; -- somewhere_Adv = ss "سْموهري" ; that_Quant = mkQuant3 "ذَلِكَ" "تِلكَ" "أُلٱِكَ" Def; - that_Subj = mkSubj "أنَّ" ; + that_Subj = mkSubj "أَنَّ" ; ----b that_NP = indeclNP "ذَلِكَ" Sg ; there_Adv = ss "هُناك" ; -- there7to_Adv = ss "تهري" ;