mirror of
https://github.com/GrammaticalFramework/gf-rgl.git
synced 2026-05-29 01:48:55 -06:00
78 lines
2.5 KiB
Plaintext
78 lines
2.5 KiB
Plaintext
resource OrthoAra = open Prelude, Predef in {
|
||
|
||
flags coding=utf8 ;
|
||
|
||
oper
|
||
|
||
vow : pattern Str = #("َ" | "ِ" | "ُ" | "ً" | "ٍ" | "ٌ") ;
|
||
|
||
weak : pattern Str = #("و"|"ي") ;
|
||
|
||
-- "Sun letters": assimilate with def. article
|
||
sun : pattern Str = #("ت"|"ث"|"د"|"ذ"|"ر"|"ز"|"س"|"ش"|"ص"|"ض"|"ط"|"ظ"|"ل"|"ن") ;
|
||
|
||
-- Shadda: https://www.unicode.org/L2/L2017/17253-arabic-ordering.pdf
|
||
fixShd : Str -> Str -> Str = \word,suffix ->
|
||
case <word,suffix> of {
|
||
-- <x + "ّ", v@#vow + y> => x + v + "ّ" + y ;
|
||
<x + v@#vow, "ّ" + y> => x + "ّ" + v + y ;
|
||
_ => word + suffix
|
||
} ;
|
||
|
||
-- IL: using this to reuse patterns for weak verbs, might be strange/wrong
|
||
rmSukun : Str -> Str = \s -> case s of {
|
||
x + "ْ" + y => x + y ;
|
||
_ => s
|
||
} ;
|
||
|
||
-- Hamza
|
||
hamza : pattern Str = #("ء"|"؟") ;
|
||
|
||
rectifyHmz : Str -> Str = \word ->
|
||
case word of {
|
||
l@(""|"ال") + ("أ"|"أَ") + #hamza + "ْ" + tail => l + "آ" + tail;
|
||
l@(""|"ال") + ("أ"|"أَ") + #hamza + tail => l + "آ" + tail;
|
||
l@(""|"ال") + #hamza + v@("َ"|"ُ") + tail => l + "أ" + v + tail;
|
||
l@(""|"ال") + #hamza + v@("ِ") + tail => l + "إ" + v + tail;
|
||
head + v1@(#vow|"ْ"|"ا"|"ي"|"و")
|
||
+ #hamza + v2@(#vow|"ْ") + tail =>
|
||
case v2 of { "ْ" => head + v1 + bHmz v1 v2 + tail ; -- unsure about this /IL
|
||
_ => head + v1 + bHmz v1 v2 + v2 + tail } ;
|
||
head + v1@(#vow|"ْ"|"ا"|"ي"|"و") -- the same but it ends in vowel
|
||
+ #hamza + v2@(#vow|"ْ") =>
|
||
case v2 of { "ْ" => head + v1 + tHmz v1 ;
|
||
_ => head + v1 + tHmz v1 + v2 } ;
|
||
head + v1@(#vow|"ْ"|"ا"|"ي"|"و") -- the same but it ends without vowel
|
||
+ #hamza => head + v1 + tHmz v1 ;
|
||
|
||
head + #hamza + tail => head + (bHmz (dp 2 head) (take 2 tail)) + tail; --last head , take 1 tail
|
||
_ => word
|
||
};
|
||
|
||
--hamza at beginning of word (head)
|
||
hHmz : Str -> Str = \d ->
|
||
case d of {
|
||
"ِ" => "إ";
|
||
_ => "أ"
|
||
};
|
||
|
||
--hamza in middle of word (body)
|
||
bHmz : Str -> Str -> Str = \d1,d2 ->
|
||
case <d1,d2> of {
|
||
<"ِ",_> | <_,"ِ"> => "ئ";
|
||
<"ُ",_> | <_,"ُ"> => "ؤ";
|
||
<"َ",_> | <_,"َ"> => "أ";
|
||
_ => "ء"
|
||
};
|
||
|
||
--hamza carrier sequence
|
||
tHmz : Str -> Str = \d ->
|
||
case d of {
|
||
"ِ" => "ئ";
|
||
"ُ" => "ؤ";
|
||
"َ" => "أ";
|
||
"ْ"|"ا"|"و"|"ي" => "ء"
|
||
};
|
||
|
||
}
|