1
0
forked from GitHub/gf-rgl

Merge pull request #68 from inariksit/arabic

Arabic
This commit is contained in:
Inari Listenmaa
2018-11-08 15:57:24 +01:00
committed by GitHub
11 changed files with 97 additions and 105 deletions

View File

@@ -37,7 +37,11 @@ concrete AdjectiveAra of Adjective = CatAra ** open ResAra, Prelude in {
AdAP ada ap = {
s = \\sp,g,n,st,c => ada.s ++ ap.s ! sp ! g ! n ! st ! c
} ;
--
-- UseA2 a = a ;
--
UseA2 = PositA ;
UseComparA a = {
s = \\h,g,n,d,c => a.s ! AComp d c
};
}

View File

@@ -10,6 +10,7 @@ concrete CatAra of Cat = CommonX - [Utt] ** open ResAra, Prelude, ParamX in {
-- Tensed/Untensed
SSlash,
S = {s : Str} ;
QS = {s : QForm => Str} ;
RS = {s : Agr => Case => Str} ;

View File

@@ -295,7 +295,7 @@ flags
hair_N = sdfN "شعر" "فَعلة" Fem NoHum ;
hand_N = brkN "يد" "فَع" "أَفَاعِي" Fem NoHum ;
head_N = brkN "رءس" "فَعل" "فُعُول" Masc NoHum;
heart_N = brkN "قلب" "فَعل" "فُعُول" Masc NoHum;
heart_N = brkN "قلب" "فَعْل" "فُعُول" Masc NoHum;
horn_N = brkN "قرن" "فَعل" "فُعُول" Masc NoHum;
husband_N = brkN "زوج" "فَعل" "أَفعَال" Masc NoHum;
ice_N = brkN "ثلج" "فَعل" "فُعُول" Masc NoHum;

View File

@@ -39,7 +39,6 @@ oper ExistIP : IP -> QCl = notYet "ExistIP" ;
oper ExistNP : NP -> Cl = notYet "ExistNP" ;
oper FunRP : Prep -> NP -> RP -> RP = notYet "FunRP" ;
oper GenericCl : VP -> Cl = notYet "GenericCl" ;
oper IdRP : RP = notYet "IdRP" ;
oper ImpPl1 : VP -> Utt = notYet "ImpPl1" ;
oper ImpersCl : VP -> Cl = notYet "ImpersCl" ;
oper PConjConj : Conj -> PConj = notYet "PConjConj" ;
@@ -48,11 +47,6 @@ oper PredSCVP : SC -> VP -> Cl = notYet "PredSCVP" ;
oper ProgrVP : VP -> VP = notYet "ProgrVP" ;
oper ReflA2 : A2 -> AP = notYet "ReflA2" ;
oper ReflVP : VPSlash -> VP = notYet "ReflVP" ;
oper RelCN : CN -> RS -> CN = notYet "RelCN" ;
oper RelCl : Cl -> RCl = notYet "RelCl" ;
oper RelNP : NP -> RS -> NP = notYet "RelNP" ;
oper RelSlash : RP -> ClSlash -> RCl = notYet "RelSlash" ;
oper RelVP : RP -> VP -> RCl = notYet "RelVP" ;
oper SentAP : AP -> SC -> AP = notYet "SentAP" ;
oper SentCN : CN -> SC -> CN = notYet "SentCN" ;
oper Slash2V3 : V3 -> NP -> VPSlash = notYet "Slash2V3" ;
@@ -63,10 +57,6 @@ oper SlashV2V : V2V -> VP -> VPSlash = notYet "SlashV2V" ;
oper SlashV2VNP : V2V -> NP -> VPSlash -> VPSlash = notYet "SlashV2VNP" ;
oper SlashVS : NP -> VS -> SSlash -> ClSlash = notYet "SlashVS" ;
oper SubjS : Subj -> S -> Adv = notYet "SubjS" ;
oper UseA2 : A2 -> AP = notYet "UseA2" ;
oper UseComparA : A -> AP = notYet "UseComparA" ;
oper UseRCl : Temp -> Pol -> RCl -> RS = notYet "UseRCl" ;
oper UseSlash : Temp -> Pol -> ClSlash -> SSlash = notYet "UseSlash" ;
oper VocNP : NP -> Voc = notYet "VocNP" ;
oper pot3plus : Sub1000 -> Sub1000 -> Sub1000000 = notYet "pot3plus" ;

View File

@@ -190,6 +190,8 @@ lin
};
RelCN cn rs = cn ** {s = \\n,s,c => cn.s ! n ! s ! c ++ rs.s ! {pgn=Per3 cn.g n ; isPron=False} ! c};
RelNP np rs = np ** {s = \\c => np.s ! c ++ rs.s ! np.a ! c} ;
-- AdvCN cn ad = {s = \\n,c => cn.s ! n ! c ++ ad.s} ;
--
-- SentCN cn sc = {s = \\n,c => cn.s ! n ! c ++ sc.s} ;
@@ -201,7 +203,6 @@ lin
np = \\c => cn.np ! c ++ np.s ! Gen
};
-- : CN -> NP -> CN ; -- glass of wine
--PartNP
}

View File

@@ -2,67 +2,70 @@ resource OrthoAra = open Prelude, Predef in {
flags coding=utf8 ;
oper
oper
vow : pattern Str = #("َ" | "ِ" | "ُ" | "ً" | "ٍ" | "ٌ") ;
vow : pattern Str = #("َ" | "ِ" | "ُ" | "ً" | "ٍ" | "ٌ") ;
weak : pattern Str = #("و"|"ي") ;
weak : pattern Str = #("و"|"ي") ;
-- "Sun letters": assimilate with def. article
sun : pattern Str = #("ت"|"ث"|"د"|"ذ"|"ر"|"ز"|"س"|"ش"|"ص"|"ض"|"ط"|"ظ"|"ل"|"ن") ;
-- "Sun letters": assimilate with def. article
sun : pattern Str = #("ت"|"ث"|"د"|"ذ"|"ر"|"ز"|"س"|"ش"|"ص"|"ض"|"ط"|"ظ"|"ل"|"ن") ;
-- Shadda: https://www.unicode.org/L2/L2017/17253-arabic-ordering.pdf
fixShd : Str -> Str -> Str = \word,suffix ->
case <word,suffix> of {
-- <x + "ّ", v@#vow + y> => x + v + "ّ" + y ;
<x + v@#vow, "ّ" + y> => x + "ّ" + v + y ;
_ => word + suffix
} ;
-- IL: using this to reuse patterns for weak verbs, might be strange/wrong
rmSukun : Str -> Str = \s -> case s of {
x + "ْ" + y => x + y ;
_ => s
-- Shadda: https://www.unicode.org/L2/L2017/17253-arabic-ordering.pdf
fixShd : Str -> Str -> Str = \word,suffix ->
case <word,suffix> of {
-- <x + "ّ", v@#vow + y> => x + v + "ّ" + y ;
<x + v@#vow, "ّ" + y> => x + "ّ" + v + y ;
_ => word + suffix
} ;
-- Hamza
hamza : pattern Str = #("ء"|"؟") ;
-- IL: using this to reuse patterns for weak verbs, might be strange/wrong
rmSukun : Str -> Str = \s -> case s of {
x + "ْ" + y => x + y ;
_ => s
} ;
rectifyHmz: Str -> Str = \word ->
case word of {
l@(""|"ال") + ("أ"|"أَ") + #hamza + "ْ" + tail => l + "آ" + tail;
l@(""|"ال") + ("أ"|"أَ") + #hamza + tail => l + "آ" + tail;
l@(""|"ال") + #hamza + v@("َ"|"ُ") + tail => l + "أ" + v + tail;
l@(""|"ال") + #hamza + v@("ِ") + tail => l + "إ" + v + tail;
-- Hamza
hamza : pattern Str = #("ء"|"؟") ;
head + v1@("ِ"|"ُ"|"َ"|"ْ"|"ا"|"ي"|"و") + #hamza + v2@(""|"ُ"|"َ"|"ْ"|"ِ") => head + v1 + (tHmz v1) + v2;
head + #hamza + tail => head + (bHmz (dp 2 head) (take 2 tail)) + tail; --last head , take 1 tail
_ => word
};
rectifyHmz : Str -> Str = \word ->
case word of {
l@(""|"ال") + ("أ"|"أَ") + #hamza + "ْ" + tail => l + "آ" + tail;
l@(""|"ال") + ("أ"|"أَ") + #hamza + tail => l + "آ" + tail;
l@(""|"ال") + #hamza + v@("َ"|"ُ") + tail => l + "أ" + v + tail;
l@(""|"ال") + #hamza + v@("ِ") + tail => l + "إ" + v + tail;
head + v1@("ِ"|"ُ"|"َ"|"ْ"|"ا"|"ي"|"و")
+ #hamza + v2@(#vow|"ْ") + tail =>
case v2 of { "ْ" => head + v1 + tHmz v1 + tail ; -- unsure about this /IL
_ => head + v1 + tHmz v1 + v2 + tail } ;
--hamza at beginning of word (head)
hHmz : Str -> Str = \d ->
case d of {
"ِ" => "إ";
_ => "أ"
};
head + #hamza + tail => head + (bHmz (dp 2 head) (take 2 tail)) + tail; --last head , take 1 tail
_ => word
};
--hamza in middle of word (body)
bHmz : Str -> Str -> Str = \d1,d2 ->
case <d1,d2> of {
<"ِ",_> | <_,"ِ"> => "ئ";
<"ُ",_> | <_,"ُ"> => "ؤ";
<"َ",_> | <_,"َ"> => "أ";
_ => "ء"
};
--hamza at beginning of word (head)
hHmz : Str -> Str = \d ->
case d of {
"ِ" => "إ";
_ => "أ"
};
--hamza carrier sequence
tHmz : Str -> Str = \d ->
case d of {
"ِ" => "ئ";
"ُ" => "ؤ";
"َ" => "أ";
"ْ"|"ا"|"و"|"ي" => "ء"
};
--hamza in middle of word (body)
bHmz : Str -> Str -> Str = \d1,d2 ->
case <d1,d2> of {
<"ِ",_> | <_,"ِ"> => "ئ";
<"ُ",_> | <_,"ُ"> => "ؤ";
<"َ",_> | <_,"َ"> => "أ";
_ => "ء"
};
--hamza carrier sequence
tHmz : Str -> Str = \d ->
case d of {
"ِ" => "ئ";
"ُ" => "ؤ";
"َ" => "أ";
"ْ"|"ا"|"و"|"ي" => "ء"
};
}

View File

@@ -370,13 +370,7 @@ resource ParadigmsAra = open
v1 = \rootStr,vPerf,vImpf ->
let { raw = v1' rootStr vPerf vImpf } in
{ s = \\vf =>
case rootStr of {
_ + #hamza + _ => rectifyHmz(raw.s ! vf);
_ => raw.s ! vf
};
lock_V = <>
} ;
lin V { s = \\vf =>rectifyHmz (raw.s ! vf) } ;
v1' : Str -> Vowel -> Vowel -> Verb =
\rootStr,vPerf,vImpf ->

View File

@@ -16,10 +16,7 @@ concrete QuestionAra of Question = CatAra ** open ResAra, ParamX, Prelude, VerbA
--IL guessed
QuestVP qp vp =
let np = { s = qp.s ! vp.isPred ! Def ;
a = { pgn = Per3 Masc qp.n ;
isPron = False }
} ;
let np = ip2np qp vp.isPred ;
cl = PredVP np vp ;
in { s = \\t,p,_qf => cl.s ! t ! p ! Nominal } ;
@@ -32,17 +29,17 @@ concrete QuestionAra of Question = CatAra ** open ResAra, ParamX, Prelude, VerbA
-- : IComp -> NP -> QCl
QuestIComp ic np =
let vp = kaan (CompNP np) ;
ip = ic ** { s : Bool => State => Case => Str = \\_,_,_ => ic.s ! pgn2gn np.a.pgn } ;
ip : ResAra.IP = np ** {
s = \\_,_,_ => ic.s ! pgn2gn np.a.pgn } ;
in QuestVP ip vp ;
-- : IP -> IComp ;
CompIP ip = {
CompIP ip = ip ** {
s = \\_ => ip.s ! True -- True=IP will be a subject of predicative sentence
! Def ! Nom ; -- IP will be a subject
n = ip.n
} ;
CompIAdv iadv = { s = \\_ => iadv.s ; n = ResAra.Sg } ;
CompIAdv iadv = { s = \\_ => iadv.s ; a = ResAra.Sg } ;
-- QCl = {s : Tense => Polarity => QForm => Str} ;
QuestSlash ip cl = { ----IL just guessing
@@ -62,13 +59,17 @@ concrete QuestionAra of Question = CatAra ** open ResAra, ParamX, Prelude, VerbA
} ;
-- : IDet -> IP
IdetIP idet = idet ** { s = \\isPred => idet.s ! Masc } ;
IdetIP idet = idet ** {
s = \\isPred => idet.s ! Masc ;
a = { pgn = agrP3 NoHum Masc idet.n ; isPron = False }
} ;
-- : IDet -> CN -> IP
IdetCN idet cn = idet ** {
IdetCN idet cn = {
s = \\isPred,s,c
=> idet.s ! cn.g ! s ! c ++
cn.s ! idet.n ! Indef ! Gen ; --idaafa
a = { pgn = agrP3 NoHum cn.g idet.n ; isPron = False }
} ;
-- : IQuant -> Num -> IDet

View File

@@ -1,11 +1,12 @@
concrete RelativeAra of Relative = CatAra ** open ResAra, SentenceAra in {
concrete RelativeAra of Relative = CatAra **
open ResAra, (Se=SentenceAra), (St=StructuralAra) in {
flags coding=utf8;
lin
-- RelCl cl = {
-- s = \\t,p,agr,c => IdRP.s ! agr2ragr agr c ++ cl.s ! t ! p ! Nominal
-- } ;
RelCl cl = {
s = \\t,p,agr,c => IdRP.s ! agr2ragr agr c ++ cl.s ! t ! p ! Nominal
} ;
-- : RP -> VP -> RCl ; -- who loves John
RelVP rp vp = {
@@ -13,15 +14,19 @@ concrete RelativeAra of Relative = CatAra ** open ResAra, SentenceAra in {
let
npS : Case => Str = \\_ => rp.s ! agr2ragr agr c ;
np = {s = npS ; a = agr} ;
cl = PredVP np vp ;
cl = Se.PredVP np vp ;
in
cl.s ! t ! p ! Nominal
} ;
-- : RP -> ClSlash -> RCl ; -- whom John loves
-- TODO: add resumptive pronouns
-- RelSlash rp slash = {
-- } ;
RelSlash rp cl = cl ** {
s = \\t,p,agr,c =>
let obj = case (pgn2gn agr.pgn).g of {
Fem => St.she_Pron ;
Masc => St.he_Pron } ;
in rp.s ! agr2ragr agr c ++ cl.s ! t ! p ! Nominal ++ cl.c2.s ++ obj.s ! cl.c2.c
} ;
--
-- FunRP p np rp = {
-- s = \\c => np.s ! c ++ p.s ++ rp.s ! Acc ;

View File

@@ -1130,12 +1130,12 @@ patHollowImp : (_,_ :Str) -> Gender => Number => Str =\xaf,xAf ->
mkIP = overload {
mkIP : Str -> Number -> IP = \maa,n -> {
s = \\_p,_s,_c => maa ;
n = n
a = { pgn = agrP3 NoHum Masc n ; isPron = False }
} ;
mkIP : (_,_ : Str) -> Number -> IP = \maa,maadhaa,n -> {
s = table { True => \\_s,_c => maa ;
False => \\_s,_c => maadhaa } ;
n = n
a = { pgn = agrP3 NoHum Masc n ; isPron = False }
}
} ;
@@ -1189,7 +1189,6 @@ patHollowImp : (_,_ :Str) -> Gender => Number => Str =\xaf,xAf ->
s : AAgr -- "how old": masc or fem for adjective
-- no need for Case, IComp is only used by QuestIComp, as grammatical subject
=> Str ;
n : Number
} ;
Obj : Type = {
@@ -1206,9 +1205,11 @@ patHollowImp : (_,_ :Str) -> Gender => Number => Str =\xaf,xAf ->
s : Bool -- different forms for "what is this" and "what do you do"
=> State => Case -- because of PrepIP: e.g. "in which" chooses definite accusative
=> Str ;
n : Number
a : Agr -- can be both subject and object of a QCl, needs full agr. info
} ;
ip2np : IP -> Bool -> NP = \ip,isPred -> ip ** { s = ip.s ! isPred ! Def } ;
IDet : Type = {
s : Gender -- IdetCN needs to choose the gender of the CN
=> State => Case => Str ;
@@ -1313,17 +1314,9 @@ patHollowImp : (_,_ :Str) -> Gender => Number => Str =\xaf,xAf ->
Pl => RPl g }
} ;
-- ragr2agr : Number -> Case -> Gender -> RAgr = \ra ->
-- case ra of {
-- RSg x => Per3 Sg x ;
-- RPl x => Per3 Pl x ;
-- RDl x => Per3 Dl x } ;
RCl : Type = {s : Tense => Polarity => Agr => Case => Str} ;
RP : Type = {s : RAgr => Str } ;
--TODO: slashRCl : ClSlash -> RP -> RCl ;
param
Size = One | Two | ThreeTen | Teen | NonTeen | Hundreds | None ;

View File

@@ -111,8 +111,8 @@ concrete SentenceAra of Sentence = CatAra ** open
-- EmbedQS qs = {s = qs.s ! QIndir} ;
-- EmbedVP vp = {s = infVP False vp (agrP3 Sg)} ; --- agr
--
UseCl t p cl =
UseSlash,
UseCl = \t,p,cl ->
{s = t.s ++ p.s ++
case <t.t,t.a> of { --- IL guessed tenses
<(Pres|Cond),Simul> => cl.s ! Pres ! p.p ! Nominal ;