(Ara) updates in hamza rules + sound fem. plural noun constructor

This commit is contained in:
Inari Listenmaa
2018-12-06 12:20:35 +02:00
parent dadcd386c9
commit db2560efe4
3 changed files with 75 additions and 27 deletions
+21 -12
View File
@@ -6,8 +6,16 @@ oper
vow : pattern Str = #("َ" | "ِ" | "ُ" | "ً" | "ٍ" | "ٌ") ;
vstar : pattern Str = #("َ"|"ِ"|"ُ"|"ً"|"ٍ"|"ٌ"|"ْ"|"ا"|"ي"|"و") ; -- long or short vowels
astar : pattern Str = #("َ"|"ً"|"ا") ; -- a: short, nunated or long
istar : pattern Str = #("ِ"|"ي"|"يْ") ; -- i: short, long or long with sukun
ustar : pattern Str = #("ُ"|"و"|"وْ") ; -- u: short, long or long with sukun
weak : pattern Str = #("و"|"ي") ;
hamzaseat : pattern Str = #("أ"|"ؤ"|"ئ") ;
-- "Sun letters": assimilate with def. article
sun : pattern Str = #("ت"|"ث"|"د"|"ذ"|"ر"|"ز"|"س"|"ش"|"ص"|"ض"|"ط"|"ظ"|"ل"|"ن") ;
@@ -30,19 +38,19 @@ oper
rectifyHmz : Str -> Str = \word ->
case word of {
l@(""|"ال") + ("أ"|"أَ") + #hamza + "ْ" + tail => l + "آ" + tail;
l@(""|"ال") + ("أ"|"أَ") + #hamza + tail => l + "آ" + tail;
l@(""|"ل"|"ال") + ("أ"|"أَ") + #hamza + "ْ" + tail => l + "آ" + tail;
l@(""|"ل"|"ال") + ("أ"|"أَ") + #hamza + tail => l + "آ" + tail;
l@(""|"ال") + #hamza + v@("َ"|"ُ") + tail => l + "أ" + v + tail;
l@(""|"ال") + #hamza + v@("ِ") + tail => l + "إ" + v + tail;
head + v1@(#vow|"ْ"|"ا"|"ي"|"و")
head + v1@#vstar
+ #hamza + v2@(#vow|"ْ") + tail =>
case v2 of { "ْ" => head + v1 + bHmz v1 v2 + tail ; -- unsure about this /IL
_ => head + v1 + bHmz v1 v2 + v2 + tail } ;
head + v1@(#vow|"ْ"|"ا"|"ي"|"و") -- the same but it ends in vowel
head + v1@#vstar -- the same but it ends in vowel
+ #hamza + v2@(#vow|"ْ") =>
case v2 of { "ْ" => head + v1 + tHmz v1 ;
_ => head + v1 + tHmz v1 + v2 } ;
head + v1@(#vow|"ْ"|"ا"|"ي"|"و") -- the same but it ends without vowel
head + v1@#vstar -- the same but it ends without vowel
+ #hamza => head + v1 + tHmz v1 ;
head + #hamza + tail => head + (bHmz (dp 2 head) (take 2 tail)) + tail; --last head , take 1 tail
@@ -57,21 +65,22 @@ oper
};
--hamza in middle of word (body)
-- relaxing the pattern matching, so that we can call it from ResAra.sing /IL
bHmz : Str -> Str -> Str = \d1,d2 ->
case <d1,d2> of {
<"ِ",_> | <_,"ِ"> => "ئ";
<"ُ",_> | <_,"ُ"> => "ؤ";
<"َ",_> | <_,"َ"> => "أ";
_ => "ء"
<_+#istar,_> | <_,#istar> => "ئ";
<_+#ustar,_> | <_,#ustar> => "ؤ";
<_+"َ" ,_> | <_,"َ"> => "أ"; -- #astar would allow double alif
_ => "ء"
};
--hamza carrier sequence
tHmz : Str -> Str = \d ->
case d of {
"ِ" => "ئ";
"ُ" => "ؤ";
"َ" => "أ";
"ْ"|"ا"|"و"|"ي" => "ء"
"ُ" => "ؤ";
"َ" => "أ";
_ => "ء" -- long vowels and sukun
};
}