(Ara) More fixes to hamza rules

This commit is contained in:
Inari Listenmaa
2018-11-08 15:17:09 +01:00
parent ec0d36bd1d
commit e6d57f3b03

View File

@@ -2,67 +2,70 @@ resource OrthoAra = open Prelude, Predef in {
flags coding=utf8 ; flags coding=utf8 ;
oper oper
vow : pattern Str = #("َ" | "ِ" | "ُ" | "ً" | "ٍ" | "ٌ") ; vow : pattern Str = #("َ" | "ِ" | "ُ" | "ً" | "ٍ" | "ٌ") ;
weak : pattern Str = #("و"|"ي") ; weak : pattern Str = #("و"|"ي") ;
-- "Sun letters": assimilate with def. article -- "Sun letters": assimilate with def. article
sun : pattern Str = #("ت"|"ث"|"د"|"ذ"|"ر"|"ز"|"س"|"ش"|"ص"|"ض"|"ط"|"ظ"|"ل"|"ن") ; sun : pattern Str = #("ت"|"ث"|"د"|"ذ"|"ر"|"ز"|"س"|"ش"|"ص"|"ض"|"ط"|"ظ"|"ل"|"ن") ;
-- Shadda: https://www.unicode.org/L2/L2017/17253-arabic-ordering.pdf -- Shadda: https://www.unicode.org/L2/L2017/17253-arabic-ordering.pdf
fixShd : Str -> Str -> Str = \word,suffix -> fixShd : Str -> Str -> Str = \word,suffix ->
case <word,suffix> of { case <word,suffix> of {
-- <x + "ّ", v@#vow + y> => x + v + "ّ" + y ; -- <x + "ّ", v@#vow + y> => x + v + "ّ" + y ;
<x + v@#vow, "ّ" + y> => x + "ّ" + v + y ; <x + v@#vow, "ّ" + y> => x + "ّ" + v + y ;
_ => word + suffix _ => word + suffix
} ;
-- IL: using this to reuse patterns for weak verbs, might be strange/wrong
rmSukun : Str -> Str = \s -> case s of {
x + "ْ" + y => x + y ;
_ => s
} ; } ;
-- Hamza -- IL: using this to reuse patterns for weak verbs, might be strange/wrong
hamza : pattern Str = #("ء"|"؟") ; rmSukun : Str -> Str = \s -> case s of {
x + "ْ" + y => x + y ;
_ => s
} ;
rectifyHmz: Str -> Str = \word -> -- Hamza
case word of { hamza : pattern Str = #("ء"|"؟") ;
l@(""|"ال") + ("أ"|"أَ") + #hamza + "ْ" + tail => l + "آ" + tail;
l@(""|"ال") + ("أ"|"أَ") + #hamza + tail => l + "آ" + tail;
l@(""|"ال") + #hamza + v@("َ"|"ُ") + tail => l + "أ" + v + tail;
l@(""|"ال") + #hamza + v@("ِ") + tail => l + "إ" + v + tail;
head + v1@("ِ"|"ُ"|"َ"|"ْ"|"ا"|"ي"|"و") + #hamza + v2@(""|"ُ"|"َ"|"ْ"|"ِ") => head + v1 + (tHmz v1) + v2; rectifyHmz : Str -> Str = \word ->
head + #hamza + tail => head + (bHmz (dp 2 head) (take 2 tail)) + tail; --last head , take 1 tail case word of {
_ => word l@(""|"ال") + ("أ"|"أَ") + #hamza + "ْ" + tail => l + "آ" + tail;
}; l@(""|"ال") + ("أ"|"أَ") + #hamza + tail => l + "آ" + tail;
l@(""|"ال") + #hamza + v@("َ"|"ُ") + tail => l + "أ" + v + tail;
l@(""|"ال") + #hamza + v@("ِ") + tail => l + "إ" + v + tail;
head + v1@("ِ"|"ُ"|"َ"|"ْ"|"ا"|"ي"|"و")
+ #hamza + v2@(#vow|"ْ") + tail =>
case v2 of { "ْ" => head + v1 + tHmz v1 + tail ; -- unsure about this /IL
_ => head + v1 + tHmz v1 + v2 + tail } ;
--hamza at beginning of word (head) head + #hamza + tail => head + (bHmz (dp 2 head) (take 2 tail)) + tail; --last head , take 1 tail
hHmz : Str -> Str = \d -> _ => word
case d of { };
"ِ" => "إ";
_ => "أ"
};
--hamza in middle of word (body) --hamza at beginning of word (head)
bHmz : Str -> Str -> Str = \d1,d2 -> hHmz : Str -> Str = \d ->
case <d1,d2> of { case d of {
<"ِ",_> | <_,"ِ"> => "ئ"; "ِ" => "إ";
<"ُ",_> | <_,"ُ"> => "ؤ"; _ => "أ"
<"َ",_> | <_,"َ"> => "أ"; };
_ => "ء"
};
--hamza carrier sequence --hamza in middle of word (body)
tHmz : Str -> Str = \d -> bHmz : Str -> Str -> Str = \d1,d2 ->
case d of { case <d1,d2> of {
"ِ" => "ئ"; <"ِ",_> | <_,"ِ"> => "ئ";
"ُ" => "ؤ"; <"ُ",_> | <_,"ُ"> => "ؤ";
"َ" => "أ"; <"َ",_> | <_,"َ"> => "أ";
"ْ"|"ا"|"و"|"ي" => "ء" _ => "ء"
}; };
--hamza carrier sequence
tHmz : Str -> Str = \d ->
case d of {
"ِ" => "ئ";
"ُ" => "ؤ";
"َ" => "أ";
"ْ"|"ا"|"و"|"ي" => "ء"
};
} }