From e6d57f3b032b8637eb8a655e26564b45ecb6fc9d Mon Sep 17 00:00:00 2001 From: Inari Listenmaa Date: Thu, 8 Nov 2018 15:17:09 +0100 Subject: [PATCH 1/2] (Ara) More fixes to hamza rules --- src/arabic/OrthoAra.gf | 105 +++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 51 deletions(-) diff --git a/src/arabic/OrthoAra.gf b/src/arabic/OrthoAra.gf index c5968270..edf4f0f8 100644 --- a/src/arabic/OrthoAra.gf +++ b/src/arabic/OrthoAra.gf @@ -2,67 +2,70 @@ resource OrthoAra = open Prelude, Predef in { flags coding=utf8 ; - oper +oper - vow : pattern Str = #("َ" | "ِ" | "ُ" | "ً" | "ٍ" | "ٌ") ; + vow : pattern Str = #("َ" | "ِ" | "ُ" | "ً" | "ٍ" | "ٌ") ; - weak : pattern Str = #("و"|"ي") ; + weak : pattern Str = #("و"|"ي") ; - -- "Sun letters": assimilate with def. article - sun : pattern Str = #("ت"|"ث"|"د"|"ذ"|"ر"|"ز"|"س"|"ش"|"ص"|"ض"|"ط"|"ظ"|"ل"|"ن") ; + -- "Sun letters": assimilate with def. article + sun : pattern Str = #("ت"|"ث"|"د"|"ذ"|"ر"|"ز"|"س"|"ش"|"ص"|"ض"|"ط"|"ظ"|"ل"|"ن") ; - -- Shadda: https://www.unicode.org/L2/L2017/17253-arabic-ordering.pdf - fixShd : Str -> Str -> Str = \word,suffix -> - case of { - -- => x + v + "ّ" + y ; - => x + "ّ" + v + y ; - _ => word + suffix - } ; - - -- IL: using this to reuse patterns for weak verbs, might be strange/wrong - rmSukun : Str -> Str = \s -> case s of { - x + "ْ" + y => x + y ; - _ => s +-- Shadda: https://www.unicode.org/L2/L2017/17253-arabic-ordering.pdf + fixShd : Str -> Str -> Str = \word,suffix -> + case of { + -- => x + v + "ّ" + y ; + => x + "ّ" + v + y ; + _ => word + suffix } ; - -- Hamza - hamza : pattern Str = #("ء"|"؟") ; +-- IL: using this to reuse patterns for weak verbs, might be strange/wrong + rmSukun : Str -> Str = \s -> case s of { + x + "ْ" + y => x + y ; + _ => s + } ; - rectifyHmz: Str -> Str = \word -> - case word of { - l@(""|"ال") + ("أ"|"أَ") + #hamza + "ْ" + tail => l + "آ" + tail; - l@(""|"ال") + ("أ"|"أَ") + #hamza + tail => l + "آ" + tail; - l@(""|"ال") + #hamza + v@("َ"|"ُ") + tail => l + "أ" + v + tail; - l@(""|"ال") + #hamza + v@("ِ") + tail => l + "إ" + v + tail; +-- Hamza + hamza : pattern Str = #("ء"|"؟") ; - head + v1@("ِ"|"ُ"|"َ"|"ْ"|"ا"|"ي"|"و") + #hamza + v2@(""|"ُ"|"َ"|"ْ"|"ِ") => head + v1 + (tHmz v1) + v2; - head + #hamza + tail => head + (bHmz (dp 2 head) (take 2 tail)) + tail; --last head , take 1 tail - _ => word - }; + rectifyHmz : Str -> Str = \word -> + case word of { + l@(""|"ال") + ("أ"|"أَ") + #hamza + "ْ" + tail => l + "آ" + tail; + l@(""|"ال") + ("أ"|"أَ") + #hamza + tail => l + "آ" + tail; + l@(""|"ال") + #hamza + v@("َ"|"ُ") + tail => l + "أ" + v + tail; + l@(""|"ال") + #hamza + v@("ِ") + tail => l + "إ" + v + tail; + head + v1@("ِ"|"ُ"|"َ"|"ْ"|"ا"|"ي"|"و") + + #hamza + v2@(#vow|"ْ") + tail => + case v2 of { "ْ" => head + v1 + tHmz v1 + tail ; -- unsure about this /IL + _ => head + v1 + tHmz v1 + v2 + tail } ; - --hamza at beginning of word (head) - hHmz : Str -> Str = \d -> - case d of { - "ِ" => "إ"; - _ => "أ" - }; + head + #hamza + tail => head + (bHmz (dp 2 head) (take 2 tail)) + tail; --last head , take 1 tail + _ => word + }; - --hamza in middle of word (body) - bHmz : Str -> Str -> Str = \d1,d2 -> - case of { - <"ِ",_> | <_,"ِ"> => "ئ"; - <"ُ",_> | <_,"ُ"> => "ؤ"; - <"َ",_> | <_,"َ"> => "أ"; - _ => "ء" - }; + --hamza at beginning of word (head) + hHmz : Str -> Str = \d -> + case d of { + "ِ" => "إ"; + _ => "أ" + }; - --hamza carrier sequence - tHmz : Str -> Str = \d -> - case d of { - "ِ" => "ئ"; - "ُ" => "ؤ"; - "َ" => "أ"; - "ْ"|"ا"|"و"|"ي" => "ء" - }; + --hamza in middle of word (body) + bHmz : Str -> Str -> Str = \d1,d2 -> + case of { + <"ِ",_> | <_,"ِ"> => "ئ"; + <"ُ",_> | <_,"ُ"> => "ؤ"; + <"َ",_> | <_,"َ"> => "أ"; + _ => "ء" + }; + + --hamza carrier sequence + tHmz : Str -> Str = \d -> + case d of { + "ِ" => "ئ"; + "ُ" => "ؤ"; + "َ" => "أ"; + "ْ"|"ا"|"و"|"ي" => "ء" + }; } From 7713518ad594ce410d03ec0c673451230077811e Mon Sep 17 00:00:00 2001 From: Inari Listenmaa Date: Thu, 8 Nov 2018 15:52:06 +0100 Subject: [PATCH 2/2] (Ara) More relative clauses + fix questions + update MissingAra --- src/arabic/AdjectiveAra.gf | 10 +++++++--- src/arabic/CatAra.gf | 1 + src/arabic/LexiconAra.gf | 2 +- src/arabic/MissingAra.gf | 10 ---------- src/arabic/NounAra.gf | 3 ++- src/arabic/ParadigmsAra.gf | 8 +------- src/arabic/QuestionAra.gf | 21 +++++++++++---------- src/arabic/RelativeAra.gf | 21 +++++++++++++-------- src/arabic/ResAra.gf | 17 +++++------------ src/arabic/SentenceAra.gf | 4 ++-- 10 files changed, 43 insertions(+), 54 deletions(-) diff --git a/src/arabic/AdjectiveAra.gf b/src/arabic/AdjectiveAra.gf index ec15d535..20988aea 100644 --- a/src/arabic/AdjectiveAra.gf +++ b/src/arabic/AdjectiveAra.gf @@ -37,7 +37,11 @@ concrete AdjectiveAra of Adjective = CatAra ** open ResAra, Prelude in { AdAP ada ap = { s = \\sp,g,n,st,c => ada.s ++ ap.s ! sp ! g ! n ! st ! c } ; --- --- UseA2 a = a ; --- + + UseA2 = PositA ; + + UseComparA a = { + s = \\h,g,n,d,c => a.s ! AComp d c + }; + } diff --git a/src/arabic/CatAra.gf b/src/arabic/CatAra.gf index 95e4aed6..7bc37f08 100644 --- a/src/arabic/CatAra.gf +++ b/src/arabic/CatAra.gf @@ -10,6 +10,7 @@ concrete CatAra of Cat = CommonX - [Utt] ** open ResAra, Prelude, ParamX in { -- Tensed/Untensed + SSlash, S = {s : Str} ; QS = {s : QForm => Str} ; RS = {s : Agr => Case => Str} ; diff --git a/src/arabic/LexiconAra.gf b/src/arabic/LexiconAra.gf index 4f3cff58..6be7abdf 100644 --- a/src/arabic/LexiconAra.gf +++ b/src/arabic/LexiconAra.gf @@ -295,7 +295,7 @@ flags hair_N = sdfN "شعر" "فَعلة" Fem NoHum ; hand_N = brkN "يد" "فَع" "أَفَاعِي" Fem NoHum ; head_N = brkN "رءس" "فَعل" "فُعُول" Masc NoHum; - heart_N = brkN "قلب" "فَعل" "فُعُول" Masc NoHum; + heart_N = brkN "قلب" "فَعْل" "فُعُول" Masc NoHum; horn_N = brkN "قرن" "فَعل" "فُعُول" Masc NoHum; husband_N = brkN "زوج" "فَعل" "أَفعَال" Masc NoHum; ice_N = brkN "ثلج" "فَعل" "فُعُول" Masc NoHum; diff --git a/src/arabic/MissingAra.gf b/src/arabic/MissingAra.gf index 7d82c4eb..68d182d4 100644 --- a/src/arabic/MissingAra.gf +++ b/src/arabic/MissingAra.gf @@ -39,7 +39,6 @@ oper ExistIP : IP -> QCl = notYet "ExistIP" ; oper ExistNP : NP -> Cl = notYet "ExistNP" ; oper FunRP : Prep -> NP -> RP -> RP = notYet "FunRP" ; oper GenericCl : VP -> Cl = notYet "GenericCl" ; -oper IdRP : RP = notYet "IdRP" ; oper ImpPl1 : VP -> Utt = notYet "ImpPl1" ; oper ImpersCl : VP -> Cl = notYet "ImpersCl" ; oper PConjConj : Conj -> PConj = notYet "PConjConj" ; @@ -48,11 +47,6 @@ oper PredSCVP : SC -> VP -> Cl = notYet "PredSCVP" ; oper ProgrVP : VP -> VP = notYet "ProgrVP" ; oper ReflA2 : A2 -> AP = notYet "ReflA2" ; oper ReflVP : VPSlash -> VP = notYet "ReflVP" ; -oper RelCN : CN -> RS -> CN = notYet "RelCN" ; -oper RelCl : Cl -> RCl = notYet "RelCl" ; -oper RelNP : NP -> RS -> NP = notYet "RelNP" ; -oper RelSlash : RP -> ClSlash -> RCl = notYet "RelSlash" ; -oper RelVP : RP -> VP -> RCl = notYet "RelVP" ; oper SentAP : AP -> SC -> AP = notYet "SentAP" ; oper SentCN : CN -> SC -> CN = notYet "SentCN" ; oper Slash2V3 : V3 -> NP -> VPSlash = notYet "Slash2V3" ; @@ -63,10 +57,6 @@ oper SlashV2V : V2V -> VP -> VPSlash = notYet "SlashV2V" ; oper SlashV2VNP : V2V -> NP -> VPSlash -> VPSlash = notYet "SlashV2VNP" ; oper SlashVS : NP -> VS -> SSlash -> ClSlash = notYet "SlashVS" ; oper SubjS : Subj -> S -> Adv = notYet "SubjS" ; -oper UseA2 : A2 -> AP = notYet "UseA2" ; -oper UseComparA : A -> AP = notYet "UseComparA" ; -oper UseRCl : Temp -> Pol -> RCl -> RS = notYet "UseRCl" ; -oper UseSlash : Temp -> Pol -> ClSlash -> SSlash = notYet "UseSlash" ; oper VocNP : NP -> Voc = notYet "VocNP" ; oper pot3plus : Sub1000 -> Sub1000 -> Sub1000000 = notYet "pot3plus" ; diff --git a/src/arabic/NounAra.gf b/src/arabic/NounAra.gf index 834b9ad9..7f7e7092 100644 --- a/src/arabic/NounAra.gf +++ b/src/arabic/NounAra.gf @@ -190,6 +190,8 @@ lin }; RelCN cn rs = cn ** {s = \\n,s,c => cn.s ! n ! s ! c ++ rs.s ! {pgn=Per3 cn.g n ; isPron=False} ! c}; + + RelNP np rs = np ** {s = \\c => np.s ! c ++ rs.s ! np.a ! c} ; -- AdvCN cn ad = {s = \\n,c => cn.s ! n ! c ++ ad.s} ; -- -- SentCN cn sc = {s = \\n,c => cn.s ! n ! c ++ sc.s} ; @@ -201,7 +203,6 @@ lin np = \\c => cn.np ! c ++ np.s ! Gen }; - -- : CN -> NP -> CN ; -- glass of wine --PartNP } diff --git a/src/arabic/ParadigmsAra.gf b/src/arabic/ParadigmsAra.gf index 962c0158..c5a765a4 100644 --- a/src/arabic/ParadigmsAra.gf +++ b/src/arabic/ParadigmsAra.gf @@ -370,13 +370,7 @@ resource ParadigmsAra = open v1 = \rootStr,vPerf,vImpf -> let { raw = v1' rootStr vPerf vImpf } in - { s = \\vf => - case rootStr of { - _ + #hamza + _ => rectifyHmz(raw.s ! vf); - _ => raw.s ! vf - }; - lock_V = <> - } ; + lin V { s = \\vf =>rectifyHmz (raw.s ! vf) } ; v1' : Str -> Vowel -> Vowel -> Verb = \rootStr,vPerf,vImpf -> diff --git a/src/arabic/QuestionAra.gf b/src/arabic/QuestionAra.gf index af35450c..bb14074e 100644 --- a/src/arabic/QuestionAra.gf +++ b/src/arabic/QuestionAra.gf @@ -16,10 +16,7 @@ concrete QuestionAra of Question = CatAra ** open ResAra, ParamX, Prelude, VerbA --IL guessed QuestVP qp vp = - let np = { s = qp.s ! vp.isPred ! Def ; - a = { pgn = Per3 Masc qp.n ; - isPron = False } - } ; + let np = ip2np qp vp.isPred ; cl = PredVP np vp ; in { s = \\t,p,_qf => cl.s ! t ! p ! Nominal } ; @@ -32,17 +29,17 @@ concrete QuestionAra of Question = CatAra ** open ResAra, ParamX, Prelude, VerbA -- : IComp -> NP -> QCl QuestIComp ic np = let vp = kaan (CompNP np) ; - ip = ic ** { s : Bool => State => Case => Str = \\_,_,_ => ic.s ! pgn2gn np.a.pgn } ; + ip : ResAra.IP = np ** { + s = \\_,_,_ => ic.s ! pgn2gn np.a.pgn } ; in QuestVP ip vp ; -- : IP -> IComp ; - CompIP ip = { + CompIP ip = ip ** { s = \\_ => ip.s ! True -- True=IP will be a subject of predicative sentence ! Def ! Nom ; -- IP will be a subject - n = ip.n } ; - CompIAdv iadv = { s = \\_ => iadv.s ; n = ResAra.Sg } ; + CompIAdv iadv = { s = \\_ => iadv.s ; a = ResAra.Sg } ; -- QCl = {s : Tense => Polarity => QForm => Str} ; QuestSlash ip cl = { ----IL just guessing @@ -62,13 +59,17 @@ concrete QuestionAra of Question = CatAra ** open ResAra, ParamX, Prelude, VerbA } ; -- : IDet -> IP - IdetIP idet = idet ** { s = \\isPred => idet.s ! Masc } ; + IdetIP idet = idet ** { + s = \\isPred => idet.s ! Masc ; + a = { pgn = agrP3 NoHum Masc idet.n ; isPron = False } + } ; -- : IDet -> CN -> IP - IdetCN idet cn = idet ** { + IdetCN idet cn = { s = \\isPred,s,c => idet.s ! cn.g ! s ! c ++ cn.s ! idet.n ! Indef ! Gen ; --idaafa + a = { pgn = agrP3 NoHum cn.g idet.n ; isPron = False } } ; -- : IQuant -> Num -> IDet diff --git a/src/arabic/RelativeAra.gf b/src/arabic/RelativeAra.gf index f3a0645b..8f54c37a 100644 --- a/src/arabic/RelativeAra.gf +++ b/src/arabic/RelativeAra.gf @@ -1,11 +1,12 @@ -concrete RelativeAra of Relative = CatAra ** open ResAra, SentenceAra in { +concrete RelativeAra of Relative = CatAra ** + open ResAra, (Se=SentenceAra), (St=StructuralAra) in { flags coding=utf8; lin - -- RelCl cl = { - -- s = \\t,p,agr,c => IdRP.s ! agr2ragr agr c ++ cl.s ! t ! p ! Nominal - -- } ; + RelCl cl = { + s = \\t,p,agr,c => IdRP.s ! agr2ragr agr c ++ cl.s ! t ! p ! Nominal + } ; -- : RP -> VP -> RCl ; -- who loves John RelVP rp vp = { @@ -13,15 +14,19 @@ concrete RelativeAra of Relative = CatAra ** open ResAra, SentenceAra in { let npS : Case => Str = \\_ => rp.s ! agr2ragr agr c ; np = {s = npS ; a = agr} ; - cl = PredVP np vp ; + cl = Se.PredVP np vp ; in cl.s ! t ! p ! Nominal } ; -- : RP -> ClSlash -> RCl ; -- whom John loves - -- TODO: add resumptive pronouns --- RelSlash rp slash = { --- } ; + RelSlash rp cl = cl ** { + s = \\t,p,agr,c => + let obj = case (pgn2gn agr.pgn).g of { + Fem => St.she_Pron ; + Masc => St.he_Pron } ; + in rp.s ! agr2ragr agr c ++ cl.s ! t ! p ! Nominal ++ cl.c2.s ++ obj.s ! cl.c2.c + } ; -- -- FunRP p np rp = { -- s = \\c => np.s ! c ++ p.s ++ rp.s ! Acc ; diff --git a/src/arabic/ResAra.gf b/src/arabic/ResAra.gf index 7841118e..93f046c7 100644 --- a/src/arabic/ResAra.gf +++ b/src/arabic/ResAra.gf @@ -1130,12 +1130,12 @@ patHollowImp : (_,_ :Str) -> Gender => Number => Str =\xaf,xAf -> mkIP = overload { mkIP : Str -> Number -> IP = \maa,n -> { s = \\_p,_s,_c => maa ; - n = n + a = { pgn = agrP3 NoHum Masc n ; isPron = False } } ; mkIP : (_,_ : Str) -> Number -> IP = \maa,maadhaa,n -> { s = table { True => \\_s,_c => maa ; False => \\_s,_c => maadhaa } ; - n = n + a = { pgn = agrP3 NoHum Masc n ; isPron = False } } } ; @@ -1189,7 +1189,6 @@ patHollowImp : (_,_ :Str) -> Gender => Number => Str =\xaf,xAf -> s : AAgr -- "how old": masc or fem for adjective -- no need for Case, IComp is only used by QuestIComp, as grammatical subject => Str ; - n : Number } ; Obj : Type = { @@ -1206,9 +1205,11 @@ patHollowImp : (_,_ :Str) -> Gender => Number => Str =\xaf,xAf -> s : Bool -- different forms for "what is this" and "what do you do" => State => Case -- because of PrepIP: e.g. "in which" chooses definite accusative => Str ; - n : Number + a : Agr -- can be both subject and object of a QCl, needs full agr. info } ; + ip2np : IP -> Bool -> NP = \ip,isPred -> ip ** { s = ip.s ! isPred ! Def } ; + IDet : Type = { s : Gender -- IdetCN needs to choose the gender of the CN => State => Case => Str ; @@ -1313,17 +1314,9 @@ patHollowImp : (_,_ :Str) -> Gender => Number => Str =\xaf,xAf -> Pl => RPl g } } ; - -- ragr2agr : Number -> Case -> Gender -> RAgr = \ra -> - -- case ra of { - -- RSg x => Per3 Sg x ; - -- RPl x => Per3 Pl x ; - -- RDl x => Per3 Dl x } ; - RCl : Type = {s : Tense => Polarity => Agr => Case => Str} ; RP : Type = {s : RAgr => Str } ; ---TODO: slashRCl : ClSlash -> RP -> RCl ; - param Size = One | Two | ThreeTen | Teen | NonTeen | Hundreds | None ; diff --git a/src/arabic/SentenceAra.gf b/src/arabic/SentenceAra.gf index 159e1cea..b13a8022 100644 --- a/src/arabic/SentenceAra.gf +++ b/src/arabic/SentenceAra.gf @@ -111,8 +111,8 @@ concrete SentenceAra of Sentence = CatAra ** open -- EmbedQS qs = {s = qs.s ! QIndir} ; -- EmbedVP vp = {s = infVP False vp (agrP3 Sg)} ; --- agr -- - - UseCl t p cl = + UseSlash, + UseCl = \t,p,cl -> {s = t.s ++ p.s ++ case of { --- IL guessed tenses <(Pres|Cond),Simul> => cl.s ! Pres ! p.p ! Nominal ;