diff --git a/src/arabic/OrthoAra.gf b/src/arabic/OrthoAra.gf index c5968270d..edf4f0f81 100644 --- a/src/arabic/OrthoAra.gf +++ b/src/arabic/OrthoAra.gf @@ -2,67 +2,70 @@ resource OrthoAra = open Prelude, Predef in { flags coding=utf8 ; - oper +oper - vow : pattern Str = #("َ" | "ِ" | "ُ" | "ً" | "ٍ" | "ٌ") ; + vow : pattern Str = #("َ" | "ِ" | "ُ" | "ً" | "ٍ" | "ٌ") ; - weak : pattern Str = #("و"|"ي") ; + weak : pattern Str = #("و"|"ي") ; - -- "Sun letters": assimilate with def. article - sun : pattern Str = #("ت"|"ث"|"د"|"ذ"|"ر"|"ز"|"س"|"ش"|"ص"|"ض"|"ط"|"ظ"|"ل"|"ن") ; + -- "Sun letters": assimilate with def. article + sun : pattern Str = #("ت"|"ث"|"د"|"ذ"|"ر"|"ز"|"س"|"ش"|"ص"|"ض"|"ط"|"ظ"|"ل"|"ن") ; - -- Shadda: https://www.unicode.org/L2/L2017/17253-arabic-ordering.pdf - fixShd : Str -> Str -> Str = \word,suffix -> - case of { - -- => x + v + "ّ" + y ; - => x + "ّ" + v + y ; - _ => word + suffix - } ; - - -- IL: using this to reuse patterns for weak verbs, might be strange/wrong - rmSukun : Str -> Str = \s -> case s of { - x + "ْ" + y => x + y ; - _ => s +-- Shadda: https://www.unicode.org/L2/L2017/17253-arabic-ordering.pdf + fixShd : Str -> Str -> Str = \word,suffix -> + case of { + -- => x + v + "ّ" + y ; + => x + "ّ" + v + y ; + _ => word + suffix } ; - -- Hamza - hamza : pattern Str = #("ء"|"؟") ; +-- IL: using this to reuse patterns for weak verbs, might be strange/wrong + rmSukun : Str -> Str = \s -> case s of { + x + "ْ" + y => x + y ; + _ => s + } ; - rectifyHmz: Str -> Str = \word -> - case word of { - l@(""|"ال") + ("أ"|"أَ") + #hamza + "ْ" + tail => l + "آ" + tail; - l@(""|"ال") + ("أ"|"أَ") + #hamza + tail => l + "آ" + tail; - l@(""|"ال") + #hamza + v@("َ"|"ُ") + tail => l + "أ" + v + tail; - l@(""|"ال") + #hamza + v@("ِ") + tail => l + "إ" + v + tail; +-- Hamza + hamza : pattern Str = #("ء"|"؟") ; - head + v1@("ِ"|"ُ"|"َ"|"ْ"|"ا"|"ي"|"و") + #hamza + v2@(""|"ُ"|"َ"|"ْ"|"ِ") => head + v1 + (tHmz v1) + v2; - head + #hamza + tail => head + (bHmz (dp 2 head) (take 2 tail)) + tail; --last head , take 1 tail - _ => word - }; + rectifyHmz : Str -> Str = \word -> + case word of { + l@(""|"ال") + ("أ"|"أَ") + #hamza + "ْ" + tail => l + "آ" + tail; + l@(""|"ال") + ("أ"|"أَ") + #hamza + tail => l + "آ" + tail; + l@(""|"ال") + #hamza + v@("َ"|"ُ") + tail => l + "أ" + v + tail; + l@(""|"ال") + #hamza + v@("ِ") + tail => l + "إ" + v + tail; + head + v1@("ِ"|"ُ"|"َ"|"ْ"|"ا"|"ي"|"و") + + #hamza + v2@(#vow|"ْ") + tail => + case v2 of { "ْ" => head + v1 + tHmz v1 + tail ; -- unsure about this /IL + _ => head + v1 + tHmz v1 + v2 + tail } ; - --hamza at beginning of word (head) - hHmz : Str -> Str = \d -> - case d of { - "ِ" => "إ"; - _ => "أ" - }; + head + #hamza + tail => head + (bHmz (dp 2 head) (take 2 tail)) + tail; --last head , take 1 tail + _ => word + }; - --hamza in middle of word (body) - bHmz : Str -> Str -> Str = \d1,d2 -> - case of { - <"ِ",_> | <_,"ِ"> => "ئ"; - <"ُ",_> | <_,"ُ"> => "ؤ"; - <"َ",_> | <_,"َ"> => "أ"; - _ => "ء" - }; + --hamza at beginning of word (head) + hHmz : Str -> Str = \d -> + case d of { + "ِ" => "إ"; + _ => "أ" + }; - --hamza carrier sequence - tHmz : Str -> Str = \d -> - case d of { - "ِ" => "ئ"; - "ُ" => "ؤ"; - "َ" => "أ"; - "ْ"|"ا"|"و"|"ي" => "ء" - }; + --hamza in middle of word (body) + bHmz : Str -> Str -> Str = \d1,d2 -> + case of { + <"ِ",_> | <_,"ِ"> => "ئ"; + <"ُ",_> | <_,"ُ"> => "ؤ"; + <"َ",_> | <_,"َ"> => "أ"; + _ => "ء" + }; + + --hamza carrier sequence + tHmz : Str -> Str = \d -> + case d of { + "ِ" => "ئ"; + "ُ" => "ؤ"; + "َ" => "أ"; + "ْ"|"ا"|"و"|"ي" => "ء" + }; }