(Ara) More fixes to hamza rules

This commit is contained in:
Inari Listenmaa
2018-11-08 15:17:09 +01:00
parent ec0d36bd1d
commit e6d57f3b03

View File

@@ -2,7 +2,7 @@ resource OrthoAra = open Prelude, Predef in {
flags coding=utf8 ;
oper
oper
vow : pattern Str = #("َ" | "ِ" | "ُ" | "ً" | "ٍ" | "ٌ") ;
@@ -11,7 +11,7 @@ flags coding=utf8 ;
-- "Sun letters": assimilate with def. article
sun : pattern Str = #("ت"|"ث"|"د"|"ذ"|"ر"|"ز"|"س"|"ش"|"ص"|"ض"|"ط"|"ظ"|"ل"|"ن") ;
-- Shadda: https://www.unicode.org/L2/L2017/17253-arabic-ordering.pdf
-- Shadda: https://www.unicode.org/L2/L2017/17253-arabic-ordering.pdf
fixShd : Str -> Str -> Str = \word,suffix ->
case <word,suffix> of {
-- <x + "ّ", v@#vow + y> => x + v + "ّ" + y ;
@@ -19,23 +19,26 @@ flags coding=utf8 ;
_ => word + suffix
} ;
-- IL: using this to reuse patterns for weak verbs, might be strange/wrong
-- IL: using this to reuse patterns for weak verbs, might be strange/wrong
rmSukun : Str -> Str = \s -> case s of {
x + "ْ" + y => x + y ;
_ => s
} ;
-- Hamza
-- Hamza
hamza : pattern Str = #("ء"|"؟") ;
rectifyHmz: Str -> Str = \word ->
rectifyHmz : Str -> Str = \word ->
case word of {
l@(""|"ال") + ("أ"|"أَ") + #hamza + "ْ" + tail => l + "آ" + tail;
l@(""|"ال") + ("أ"|"أَ") + #hamza + tail => l + "آ" + tail;
l@(""|"ال") + #hamza + v@("َ"|"ُ") + tail => l + "أ" + v + tail;
l@(""|"ال") + #hamza + v@("ِ") + tail => l + "إ" + v + tail;
head + v1@("ِ"|"ُ"|"َ"|"ْ"|"ا"|"ي"|"و")
+ #hamza + v2@(#vow|"ْ") + tail =>
case v2 of { "ْ" => head + v1 + tHmz v1 + tail ; -- unsure about this /IL
_ => head + v1 + tHmz v1 + v2 + tail } ;
head + v1@("ِ"|"ُ"|"َ"|"ْ"|"ا"|"ي"|"و") + #hamza + v2@(""|"ُ"|"َ"|"ْ"|"ِ") => head + v1 + (tHmz v1) + v2;
head + #hamza + tail => head + (bHmz (dp 2 head) (take 2 tail)) + tail; --last head , take 1 tail
_ => word
};