mirror of
https://github.com/GrammaticalFramework/gf-rgl.git
synced 2026-05-28 09:28:54 -06:00
(Ara) More fixes to hamza rules
This commit is contained in:
@@ -2,67 +2,70 @@ resource OrthoAra = open Prelude, Predef in {
|
|||||||
|
|
||||||
flags coding=utf8 ;
|
flags coding=utf8 ;
|
||||||
|
|
||||||
oper
|
oper
|
||||||
|
|
||||||
vow : pattern Str = #("َ" | "ِ" | "ُ" | "ً" | "ٍ" | "ٌ") ;
|
vow : pattern Str = #("َ" | "ِ" | "ُ" | "ً" | "ٍ" | "ٌ") ;
|
||||||
|
|
||||||
weak : pattern Str = #("و"|"ي") ;
|
weak : pattern Str = #("و"|"ي") ;
|
||||||
|
|
||||||
-- "Sun letters": assimilate with def. article
|
-- "Sun letters": assimilate with def. article
|
||||||
sun : pattern Str = #("ت"|"ث"|"د"|"ذ"|"ر"|"ز"|"س"|"ش"|"ص"|"ض"|"ط"|"ظ"|"ل"|"ن") ;
|
sun : pattern Str = #("ت"|"ث"|"د"|"ذ"|"ر"|"ز"|"س"|"ش"|"ص"|"ض"|"ط"|"ظ"|"ل"|"ن") ;
|
||||||
|
|
||||||
-- Shadda: https://www.unicode.org/L2/L2017/17253-arabic-ordering.pdf
|
-- Shadda: https://www.unicode.org/L2/L2017/17253-arabic-ordering.pdf
|
||||||
fixShd : Str -> Str -> Str = \word,suffix ->
|
fixShd : Str -> Str -> Str = \word,suffix ->
|
||||||
case <word,suffix> of {
|
case <word,suffix> of {
|
||||||
-- <x + "ّ", v@#vow + y> => x + v + "ّ" + y ;
|
-- <x + "ّ", v@#vow + y> => x + v + "ّ" + y ;
|
||||||
<x + v@#vow, "ّ" + y> => x + "ّ" + v + y ;
|
<x + v@#vow, "ّ" + y> => x + "ّ" + v + y ;
|
||||||
_ => word + suffix
|
_ => word + suffix
|
||||||
} ;
|
|
||||||
|
|
||||||
-- IL: using this to reuse patterns for weak verbs, might be strange/wrong
|
|
||||||
rmSukun : Str -> Str = \s -> case s of {
|
|
||||||
x + "ْ" + y => x + y ;
|
|
||||||
_ => s
|
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
-- Hamza
|
-- IL: using this to reuse patterns for weak verbs, might be strange/wrong
|
||||||
hamza : pattern Str = #("ء"|"؟") ;
|
rmSukun : Str -> Str = \s -> case s of {
|
||||||
|
x + "ْ" + y => x + y ;
|
||||||
|
_ => s
|
||||||
|
} ;
|
||||||
|
|
||||||
rectifyHmz: Str -> Str = \word ->
|
-- Hamza
|
||||||
case word of {
|
hamza : pattern Str = #("ء"|"؟") ;
|
||||||
l@(""|"ال") + ("أ"|"أَ") + #hamza + "ْ" + tail => l + "آ" + tail;
|
|
||||||
l@(""|"ال") + ("أ"|"أَ") + #hamza + tail => l + "آ" + tail;
|
|
||||||
l@(""|"ال") + #hamza + v@("َ"|"ُ") + tail => l + "أ" + v + tail;
|
|
||||||
l@(""|"ال") + #hamza + v@("ِ") + tail => l + "إ" + v + tail;
|
|
||||||
|
|
||||||
head + v1@("ِ"|"ُ"|"َ"|"ْ"|"ا"|"ي"|"و") + #hamza + v2@(""|"ُ"|"َ"|"ْ"|"ِ") => head + v1 + (tHmz v1) + v2;
|
rectifyHmz : Str -> Str = \word ->
|
||||||
head + #hamza + tail => head + (bHmz (dp 2 head) (take 2 tail)) + tail; --last head , take 1 tail
|
case word of {
|
||||||
_ => word
|
l@(""|"ال") + ("أ"|"أَ") + #hamza + "ْ" + tail => l + "آ" + tail;
|
||||||
};
|
l@(""|"ال") + ("أ"|"أَ") + #hamza + tail => l + "آ" + tail;
|
||||||
|
l@(""|"ال") + #hamza + v@("َ"|"ُ") + tail => l + "أ" + v + tail;
|
||||||
|
l@(""|"ال") + #hamza + v@("ِ") + tail => l + "إ" + v + tail;
|
||||||
|
head + v1@("ِ"|"ُ"|"َ"|"ْ"|"ا"|"ي"|"و")
|
||||||
|
+ #hamza + v2@(#vow|"ْ") + tail =>
|
||||||
|
case v2 of { "ْ" => head + v1 + tHmz v1 + tail ; -- unsure about this /IL
|
||||||
|
_ => head + v1 + tHmz v1 + v2 + tail } ;
|
||||||
|
|
||||||
--hamza at beginning of word (head)
|
head + #hamza + tail => head + (bHmz (dp 2 head) (take 2 tail)) + tail; --last head , take 1 tail
|
||||||
hHmz : Str -> Str = \d ->
|
_ => word
|
||||||
case d of {
|
};
|
||||||
"ِ" => "إ";
|
|
||||||
_ => "أ"
|
|
||||||
};
|
|
||||||
|
|
||||||
--hamza in middle of word (body)
|
--hamza at beginning of word (head)
|
||||||
bHmz : Str -> Str -> Str = \d1,d2 ->
|
hHmz : Str -> Str = \d ->
|
||||||
case <d1,d2> of {
|
case d of {
|
||||||
<"ِ",_> | <_,"ِ"> => "ئ";
|
"ِ" => "إ";
|
||||||
<"ُ",_> | <_,"ُ"> => "ؤ";
|
_ => "أ"
|
||||||
<"َ",_> | <_,"َ"> => "أ";
|
};
|
||||||
_ => "ء"
|
|
||||||
};
|
|
||||||
|
|
||||||
--hamza carrier sequence
|
--hamza in middle of word (body)
|
||||||
tHmz : Str -> Str = \d ->
|
bHmz : Str -> Str -> Str = \d1,d2 ->
|
||||||
case d of {
|
case <d1,d2> of {
|
||||||
"ِ" => "ئ";
|
<"ِ",_> | <_,"ِ"> => "ئ";
|
||||||
"ُ" => "ؤ";
|
<"ُ",_> | <_,"ُ"> => "ؤ";
|
||||||
"َ" => "أ";
|
<"َ",_> | <_,"َ"> => "أ";
|
||||||
"ْ"|"ا"|"و"|"ي" => "ء"
|
_ => "ء"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
--hamza carrier sequence
|
||||||
|
tHmz : Str -> Str = \d ->
|
||||||
|
case d of {
|
||||||
|
"ِ" => "ئ";
|
||||||
|
"ُ" => "ؤ";
|
||||||
|
"َ" => "أ";
|
||||||
|
"ْ"|"ا"|"و"|"ي" => "ء"
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user