mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
Harald's new codings.
This commit is contained in:
83
src/GF/Text/Devanagari.hs
Normal file
83
src/GF/Text/Devanagari.hs
Normal file
@@ -0,0 +1,83 @@
|
||||
module Devanagari where
|
||||
|
||||
mkDevanagari :: String -> String
|
||||
mkDevanagari = digraphWordToUnicode . adHocToDigraphWord
|
||||
|
||||
adHocToDigraphWord :: String -> [(Char, Char)]
|
||||
adHocToDigraphWord str = case str of
|
||||
[] -> []
|
||||
'<' : cs -> ('\\', '<') : spoolMarkup cs
|
||||
' ' : cs -> ('\\', ' ') : adHocToDigraphWord cs -- skip space
|
||||
|
||||
-- if c1 is a vowel
|
||||
-- Two of the same vowel => lengthening
|
||||
c1 : c2 : cs | c1 == c2 && isVowel c1 -> (cap c1, ':') : adHocToDigraphWord cs
|
||||
-- digraphed or long vowel
|
||||
c1 : c2 : cs | isVowel c1 && isVowel c2 -> (cap c1, cap c2) : adHocToDigraphWord cs
|
||||
c1 : cs | isVowel c1 -> (' ', cap c1) : adHocToDigraphWord cs
|
||||
|
||||
-- c1 isn't a vowel
|
||||
-- c1 : 'a' : [] -> [(' ', c1)] -- a inherent
|
||||
-- c1 : c2 : [] | isVowel c2 -> (' ', c1) : [(' ', c2)]
|
||||
|
||||
-- c1 is aspirated
|
||||
c1 : 'H' : c2 : c3 : cs | c2 == c3 && isVowel c2 ->
|
||||
(c1, 'H') : (c2, ':') : adHocToDigraphWord cs
|
||||
c1 : 'H' : c2 : c3 : cs | isVowel c2 && isVowel c3 ->
|
||||
(c1, 'H') : (c2, c3) : adHocToDigraphWord cs
|
||||
c1 : 'H' : 'a' : cs -> (c1, 'H') : adHocToDigraphWord cs -- a inherent
|
||||
c1 : 'H' : c2 : cs | isVowel c2 -> (c1, 'H') : (' ', c2) : adHocToDigraphWord cs
|
||||
-- not vowelless at EOW
|
||||
c1 : 'H' : ' ' : cs -> (c1, 'H') : ('\\', ' ') : adHocToDigraphWord cs
|
||||
c1 : 'H' : [] -> [(c1, 'H')]
|
||||
c1 : 'H' : cs -> (c1, 'H') : (' ', '^') : adHocToDigraphWord cs -- vowelless
|
||||
|
||||
-- c1 unasp.
|
||||
c1 : c2 : c3 : cs | c2 == c3 && isVowel c2 -> (' ', c1) : (c2, ':') : adHocToDigraphWord cs
|
||||
c1 : c2 : c3 : cs | isVowel c2 && isVowel c3 -> (' ', c1) : (c2, c3) : adHocToDigraphWord cs
|
||||
c1 : 'a' : cs -> (' ', c1) : adHocToDigraphWord cs -- a inherent
|
||||
c1 : c2 : cs | isVowel c2 -> (' ', c1) : (' ', c2) : adHocToDigraphWord cs
|
||||
-- not vowelless at EOW
|
||||
c1 : ' ' : cs -> (' ', c1) : ('\\', ' '): adHocToDigraphWord cs
|
||||
c1 : [] -> [(' ', c1)]
|
||||
'M' : cs -> (' ', 'M') : adHocToDigraphWord cs -- vowelless but no vowelless sign for anusvara
|
||||
c1 : cs -> (' ', c1) : (' ', '^') : adHocToDigraphWord cs -- vowelless
|
||||
|
||||
isVowel x = elem x "aeiou:"
|
||||
cap :: Char -> Char
|
||||
cap x = case x of
|
||||
'a' -> 'A'
|
||||
'e' -> 'E'
|
||||
'i' -> 'I'
|
||||
'o' -> 'O'
|
||||
'u' -> 'U'
|
||||
c -> c
|
||||
|
||||
spoolMarkup :: String -> [(Char, Char)]
|
||||
spoolMarkup s = case s of
|
||||
-- [] -> [] -- Shouldn't happen
|
||||
'>' : cs -> ('\\', '>') : adHocToDigraphWord cs
|
||||
c1 : cs -> ('\\', c1) : spoolMarkup cs
|
||||
|
||||
|
||||
digraphWordToUnicode :: [(Char, Char)] -> String
|
||||
digraphWordToUnicode = map digraphToUnicode
|
||||
|
||||
digraphToUnicode :: (Char, Char) -> Char
|
||||
digraphToUnicode (c1, c2) = case lookup (c1, c2) cc of Just c' -> c' ; _ -> c2
|
||||
where
|
||||
cc = zip allDevanagariCodes allDevanagari
|
||||
|
||||
digraphedDevanagari = " ~ M ;__ AA: II: UU:RoLoEvE~ EE:AvA~ OAU kkH ggHNG ccH jjH ñ TTH DDH N ttH ddH nn. ppH bbH m y rr. l LL. v ç S s h____ .-Sa: ii: uu:ror:eve~ eaiava~ oau ^____OM | -dddu______ Q X G zD.RH fy.R:L:mrmR#I#d#0#1#2#3#4#5#6#7#8#9#o"
|
||||
|
||||
allDevanagariCodes :: [(Char, Char)]
|
||||
allDevanagariCodes = mkPairs digraphedDevanagari
|
||||
|
||||
allDevanagari :: String
|
||||
allDevanagari = (map toEnum [0x0901 .. 0x0970])
|
||||
|
||||
mkPairs :: String -> [(Char, Char)]
|
||||
mkPairs str = case str of
|
||||
[] -> []
|
||||
c1 : c2 : cs -> (c1, c2) : mkPairs cs
|
||||
|
||||
57
src/GF/Text/Ethiopic.hs
Normal file
57
src/GF/Text/Ethiopic.hs
Normal file
@@ -0,0 +1,57 @@
|
||||
module Ethiopic where
|
||||
|
||||
mkEthiopic :: String -> String
|
||||
mkEthiopic = digraphWordToUnicode . adHocToDigraphWord
|
||||
|
||||
-- mkEthiopic :: String -> String
|
||||
-- mkEthiopic = reverse . unwords . (map (digraphWordToUnicode . adHocToDigraphWord)) . words
|
||||
--- reverse : assumes everything's on same line
|
||||
|
||||
adHocToDigraphWord :: String -> [(Char, Int)]
|
||||
adHocToDigraphWord str = case str of
|
||||
[] -> []
|
||||
'<' : cs -> ('<', -1) : spoolMarkup cs
|
||||
c1 : cs | isVowel c1 -> (')', vowelOrder c1) : adHocToDigraphWord cs
|
||||
-- c1 isn't a vowel
|
||||
c1 : cs | not (elem c1 allEthiopicCodes) -> (c1, -1) : adHocToDigraphWord cs
|
||||
c1 : c2 : cs | isVowel c2 -> (c1, vowelOrder c2) : adHocToDigraphWord cs
|
||||
c1 : cs -> (c1, 5) : adHocToDigraphWord cs
|
||||
|
||||
spoolMarkup :: String -> [(Char, Int)]
|
||||
spoolMarkup s = case s of
|
||||
-- [] -> [] -- Shouldn't happen
|
||||
'>' : cs -> ('>', -1) : adHocToDigraphWord cs
|
||||
c1 : cs -> (c1, -1) : spoolMarkup cs
|
||||
|
||||
isVowel x = elem x "AäuiïaeoI"
|
||||
|
||||
vowelOrder :: Char -> Int
|
||||
vowelOrder x = case x of
|
||||
'A' -> 0
|
||||
'ä' -> 0
|
||||
'u' -> 1
|
||||
'i' -> 2
|
||||
'a' -> 3
|
||||
'e' -> 4
|
||||
'I' -> 5
|
||||
'ï' -> 5
|
||||
'o' -> 6
|
||||
c -> 5 -- vowelless
|
||||
|
||||
digraphWordToUnicode = map digraphToUnicode
|
||||
|
||||
digraphToUnicode :: (Char, Int) -> Char
|
||||
-- digraphToUnicode (c1, c2) = c1
|
||||
|
||||
digraphToUnicode (c1, -1) = c1
|
||||
digraphToUnicode (c1, c2) = toEnum (0x1200 + c2 + 8*case lookup c1 cc of Just c' -> c')
|
||||
where
|
||||
cc = zip allEthiopicCodes allEthiopic
|
||||
|
||||
allEthiopic :: [Int]
|
||||
allEthiopic = [0 .. 44] -- x 8
|
||||
|
||||
allEthiopicCodes = "hlHmLrs$KQ__bBtcxXnN)kW__w(zZyd_jgG_TCPSLfp"
|
||||
|
||||
-- Q = kW, X = xW, W = kW, G = gW
|
||||
|
||||
58
src/GF/Text/ExtendedArabic.hs
Normal file
58
src/GF/Text/ExtendedArabic.hs
Normal file
@@ -0,0 +1,58 @@
|
||||
module ExtendedArabic where
|
||||
|
||||
mkExtendedArabic :: String -> String
|
||||
mkExtendedArabic = digraphWordToUnicode . adHocToDigraphWord
|
||||
|
||||
adHocToDigraphWord :: String -> [(Char, Char)]
|
||||
adHocToDigraphWord str = case str of
|
||||
[] -> []
|
||||
'<' : cs -> ('\\', '<') : spoolMarkup cs
|
||||
-- Sorani
|
||||
'W' : cs -> (':', 'w') : adHocToDigraphWord cs -- ?? Will do
|
||||
'E' : cs -> (' ', 'i') : adHocToDigraphWord cs -- ?? Letter missing!
|
||||
'j' : cs -> ('d', 'j') : adHocToDigraphWord cs
|
||||
'O' : cs -> ('v', 'w') : adHocToDigraphWord cs
|
||||
'F' : cs -> (' ', 'v') : adHocToDigraphWord cs
|
||||
'Z' : cs -> ('z', 'h') : adHocToDigraphWord cs
|
||||
'I' : cs -> (' ', 'i') : adHocToDigraphWord cs -- ?? Letter missing!
|
||||
'C' : cs -> ('c', 'h') : adHocToDigraphWord cs
|
||||
-- Pashto
|
||||
'e' : cs -> (':', 'y') : adHocToDigraphWord cs
|
||||
'$' : cs -> ('3', 'H') : adHocToDigraphWord cs
|
||||
'X' : cs -> ('s', '.') : adHocToDigraphWord cs
|
||||
'G' : cs -> ('z', '.') : adHocToDigraphWord cs
|
||||
'a' : cs -> (' ', 'A') : adHocToDigraphWord cs
|
||||
'P' : cs -> ('\'', 'H') : adHocToDigraphWord cs
|
||||
'R' : cs -> ('o', 'r') : adHocToDigraphWord cs
|
||||
-- Shared
|
||||
'A' : cs -> (' ', 'h') : adHocToDigraphWord cs -- ?? Maybe to "t or 0x06d5
|
||||
'c' : cs -> ('s', 'h') : adHocToDigraphWord cs
|
||||
c : cs -> (' ', c) : adHocToDigraphWord cs
|
||||
|
||||
|
||||
-- Beginning 0x621 up and including 0x06d1
|
||||
digraphedExtendedArabic = " '~A'A'w,A'i A b\"t tTHdj H X dDH r z ssh S D T Z 3GH__________ - f q k l m n h w i y&a&w&i/a/w/i/W/o/~/'/,/|/6/v_____________#0#1#2#3#4#5#6#7#8#9#%#,#'#*>b>q$|> A2'2,3'A'w'w&y'Tb:b:BoT3b p4b4B'H:H2H\"H3Hch4HTdod.dTD:d:D3d3D4dTrvror.rvRz.:rzh4zs.+s*S:S3S3T33>ff.f: v4f.q3q-k~kok.k3k3K gog:g:G3Gvl.l3l3L:n>nTnon3n?h4H't>Y\"Yow-wvwww|w^w:w3w>y/yvy.w:y3y____ -ae"
|
||||
|
||||
digraphWordToUnicode = map digraphToUnicode
|
||||
|
||||
digraphToUnicode :: (Char, Char) -> Char
|
||||
digraphToUnicode (c1, c2) = case lookup (c1, c2) cc of Just c' -> c' ; _ -> c2
|
||||
where
|
||||
cc = zip allExtendedArabicCodes allExtendedArabic
|
||||
|
||||
allExtendedArabicCodes :: [(Char, Char)]
|
||||
allExtendedArabicCodes = mkPairs digraphedExtendedArabic
|
||||
|
||||
allExtendedArabic :: String
|
||||
allExtendedArabic = (map toEnum [0x0621 .. 0x06d1])
|
||||
|
||||
mkPairs :: String -> [(Char, Char)]
|
||||
mkPairs str = case str of
|
||||
[] -> []
|
||||
c1 : c2 : cs -> (c1, c2) : mkPairs cs
|
||||
|
||||
spoolMarkup :: String -> [(Char, Char)]
|
||||
spoolMarkup s = case s of
|
||||
[] -> [] -- Shouldn't happen
|
||||
'>' : cs -> ('\\', '>') : adHocToDigraphWord cs
|
||||
c1 : cs -> ('\\', c1) : spoolMarkup cs
|
||||
81
src/GF/Text/Hiragana.hs
Normal file
81
src/GF/Text/Hiragana.hs
Normal file
@@ -0,0 +1,81 @@
|
||||
module Hiragana where
|
||||
|
||||
-- long vowel romaaji must be ei, ou not ee, oo
|
||||
|
||||
mkJapanese :: String -> String
|
||||
mkJapanese = digraphWordToUnicode . romaajiToDigraphWord
|
||||
|
||||
romaajiToDigraphWord :: String -> [(Char, Char)]
|
||||
romaajiToDigraphWord str = case str of
|
||||
[] -> []
|
||||
'<' : cs -> ('\\', '<') : spoolMarkup cs
|
||||
' ' : cs -> ('\\', ' ') : romaajiToDigraphWord cs
|
||||
|
||||
c1 : cs | isVowel c1 -> (' ', cap c1) : romaajiToDigraphWord cs
|
||||
|
||||
-- The combinations
|
||||
c1 : 'y' : c2 : cs -> (c1, 'i') : ('y', cap c2) : romaajiToDigraphWord cs
|
||||
|
||||
's' : 'h' : 'a' : cs -> ('S', 'i') : ('y', 'A') : romaajiToDigraphWord cs
|
||||
'c' : 'h' : 'a' : cs -> ('C', 'i') : ('y', 'A') : romaajiToDigraphWord cs
|
||||
'j' : 'a' : cs -> ('j', 'i') : ('y', 'A') : romaajiToDigraphWord cs
|
||||
|
||||
's' : 'h' : 'u' : cs -> ('S', 'i') : ('y', 'U') : romaajiToDigraphWord cs
|
||||
'c' : 'h' : 'u' : cs -> ('C', 'i') : ('y', 'U') : romaajiToDigraphWord cs
|
||||
'j' : 'u' : cs -> ('j', 'i') : ('y', 'U') : romaajiToDigraphWord cs
|
||||
|
||||
's' : 'h' : 'o' : cs -> ('S', 'i') : ('y', 'O') : romaajiToDigraphWord cs
|
||||
'c' : 'h' : 'o' : cs -> ('C', 'i') : ('y', 'O') : romaajiToDigraphWord cs
|
||||
'j' : 'o' : cs -> ('j', 'i') : ('y', 'O') : romaajiToDigraphWord cs
|
||||
|
||||
'd' : 'z' : c3 : cs -> ('D', c3) : romaajiToDigraphWord cs
|
||||
't' : 's' : c3 : cs -> ('T', c3) : romaajiToDigraphWord cs
|
||||
'c' : 'h' : c3 : cs -> ('C', c3) : romaajiToDigraphWord cs
|
||||
's' : 'h' : c3 : cs -> ('S', c3) : romaajiToDigraphWord cs
|
||||
'z' : 'h' : c3 : cs -> ('Z', c3) : romaajiToDigraphWord cs
|
||||
|
||||
c1 : ' ' : cs -> (' ', c1) : ('\\', ' ') : romaajiToDigraphWord cs -- n
|
||||
c1 : [] -> [(' ', c1)] -- n
|
||||
|
||||
c1 : c2 : cs | isVowel c2 -> (c1, c2) : romaajiToDigraphWord cs
|
||||
c1 : c2 : cs | c1 == c2 -> ('T', 'U') : romaajiToDigraphWord (c2 : cs) -- double cons
|
||||
c1 : cs -> (' ', c1) : romaajiToDigraphWord cs -- n
|
||||
|
||||
isVowel x = elem x "aeiou"
|
||||
cap :: Char -> Char
|
||||
cap x = case x of
|
||||
'a' -> 'A'
|
||||
'e' -> 'E'
|
||||
'i' -> 'I'
|
||||
'o' -> 'O'
|
||||
'u' -> 'U'
|
||||
c -> c
|
||||
|
||||
spoolMarkup :: String -> [(Char, Char)]
|
||||
spoolMarkup s = case s of
|
||||
-- [] -> [] -- Shouldn't happen
|
||||
'>' : cs -> ('\\', '>') : romaajiToDigraphWord cs
|
||||
c1 : cs -> ('\\', c1) : spoolMarkup cs
|
||||
|
||||
digraphWordToUnicode :: [(Char, Char)] -> String
|
||||
digraphWordToUnicode = map digraphToUnicode
|
||||
|
||||
digraphToUnicode :: (Char, Char) -> Char
|
||||
digraphToUnicode (c1, c2) = case lookup (c1, c2) cc of Just c' -> c' ; _ -> c2
|
||||
where
|
||||
cc = zip allHiraganaCodes allHiragana
|
||||
|
||||
allHiraganaCodes :: [(Char, Char)]
|
||||
allHiraganaCodes = mkPairs digraphedHiragana
|
||||
|
||||
allHiragana :: String
|
||||
allHiragana = (map toEnum [0x3041 .. 0x309f])
|
||||
|
||||
mkPairs :: String -> [(Char, Char)]
|
||||
mkPairs str = case str of
|
||||
[] -> []
|
||||
c1 : c2 : cs -> (c1, c2) : mkPairs cs
|
||||
|
||||
digraphedHiragana = " a A i I u U e E o OkagakigikugukegekogosazaSiZisuzusezesozotadaCijiTUTuDutedetodonaninunenohabapahibipihubupuhebepehobopomamimumemoyAyayUyuyOyorarirurerowaWawiwewo nvukAkE____<< o>>o >'> b"
|
||||
|
||||
|
||||
55
src/GF/Text/LatinASupplement.hs
Normal file
55
src/GF/Text/LatinASupplement.hs
Normal file
@@ -0,0 +1,55 @@
|
||||
module LatinASupplement where
|
||||
|
||||
mkLatinASupplement :: String -> String
|
||||
mkLatinASupplement = mkLatinASupplementWord
|
||||
|
||||
mkLatinASupplementWord :: String -> String
|
||||
mkLatinASupplementWord str = case str of
|
||||
[] -> []
|
||||
'<' : cs -> '<' : spoolMarkup cs
|
||||
-- Romanian & partly Turkish
|
||||
's' : ',' : cs -> toEnum 0x015f : mkLatinASupplementWord cs
|
||||
'a' : '%' : cs -> toEnum 0x0103 : mkLatinASupplementWord cs
|
||||
-- Slavic and more
|
||||
'c' : '^' : cs -> toEnum 0x010d : mkLatinASupplementWord cs
|
||||
's' : '^' : cs -> toEnum 0x0161 : mkLatinASupplementWord cs
|
||||
'c' : '\'' : cs -> toEnum 0x0107 : mkLatinASupplementWord cs
|
||||
'z' : '^' : cs -> toEnum 0x017e : mkLatinASupplementWord cs
|
||||
-- Turkish
|
||||
'g' : '%' : cs -> toEnum 0x011f : mkLatinASupplementWord cs
|
||||
'I' : cs -> toEnum 0x0131 : mkLatinASupplementWord cs
|
||||
'c' : ',' : cs -> 'ç' : mkLatinASupplementWord cs
|
||||
-- Polish
|
||||
'e' : ',' : cs -> toEnum 0x0119 : mkLatinASupplementWord cs
|
||||
'a' : ',' : cs -> toEnum 0x0105 : mkLatinASupplementWord cs
|
||||
'l' : '/' : cs -> toEnum 0x0142 : mkLatinASupplementWord cs
|
||||
'z' : '.' : cs -> toEnum 0x017c : mkLatinASupplementWord cs
|
||||
'n' : '\'' : cs -> toEnum 0x0144 : mkLatinASupplementWord cs
|
||||
's' : '\'' : cs -> toEnum 0x015b : mkLatinASupplementWord cs
|
||||
-- 'c' : '\'' : cs -> toEnum 0x0107 : mkLatinASupplementWord cs
|
||||
|
||||
-- Hungarian
|
||||
'o' : '%' : cs -> toEnum 0x0151 : mkLatinASupplementWord cs
|
||||
'u' : '%' : cs -> toEnum 0x0171 : mkLatinASupplementWord cs
|
||||
|
||||
-- Mongolian
|
||||
'j' : '^' : cs -> toEnum 0x0135 : mkLatinASupplementWord cs
|
||||
|
||||
-- Khowar (actually in Combining diacritical marks not Latin-A Suppl.)
|
||||
'o' : '.' : cs -> 'o' : (toEnum 0x0323 : mkLatinASupplementWord cs)
|
||||
|
||||
-- Length bars over vowels e.g korean
|
||||
'a' : ':' : cs -> toEnum 0x0101 : mkLatinASupplementWord cs
|
||||
'e' : ':' : cs -> toEnum 0x0113 : mkLatinASupplementWord cs
|
||||
'i' : ':' : cs -> toEnum 0x012b : mkLatinASupplementWord cs
|
||||
'o' : ':' : cs -> toEnum 0x014d : mkLatinASupplementWord cs
|
||||
'u' : ':' : cs -> toEnum 0x016b : mkLatinASupplementWord cs
|
||||
|
||||
-- Default
|
||||
c : cs -> c : mkLatinASupplementWord cs
|
||||
|
||||
spoolMarkup :: String -> String
|
||||
spoolMarkup s = case s of
|
||||
[] -> [] -- Shouldn't happen
|
||||
'>' : cs -> '>' : mkLatinASupplementWord cs
|
||||
c1 : cs -> c1 : spoolMarkup cs
|
||||
34
src/GF/Text/OCSCyrillic.hs
Normal file
34
src/GF/Text/OCSCyrillic.hs
Normal file
@@ -0,0 +1,34 @@
|
||||
module OCSCyrillic where
|
||||
|
||||
mkOCSCyrillic :: String -> String
|
||||
mkOCSCyrillic = mkOCSCyrillicWord
|
||||
|
||||
mkOCSCyrillicWord :: String -> String
|
||||
mkOCSCyrillicWord str = case str of
|
||||
[] -> []
|
||||
' ' : cs -> ' ' : mkOCSCyrillicWord cs
|
||||
'<' : cs -> '<' : spoolMarkup cs
|
||||
'ä' : cs -> toEnum 0x0463 : mkOCSCyrillicWord cs
|
||||
'j' : 'e' : '~' : cs -> toEnum 0x0469 : mkOCSCyrillicWord cs
|
||||
'j' : 'o' : '~' : cs -> toEnum 0x046d : mkOCSCyrillicWord cs
|
||||
'j' : 'e' : cs -> toEnum 0x0465 : mkOCSCyrillicWord cs
|
||||
'e' : '~' : cs -> toEnum 0x0467 : mkOCSCyrillicWord cs
|
||||
'o' : '~' : cs -> toEnum 0x046b : mkOCSCyrillicWord cs
|
||||
'j' : 'u' : cs -> toEnum 0x044e : mkOCSCyrillicWord cs
|
||||
'j' : 'a' : cs -> toEnum 0x044f : mkOCSCyrillicWord cs
|
||||
'u' : cs -> toEnum 0x0479 : mkOCSCyrillicWord cs
|
||||
c : cs -> (mkOCSCyrillicChar c) : mkOCSCyrillicWord cs
|
||||
|
||||
spoolMarkup :: String -> String
|
||||
spoolMarkup s = case s of
|
||||
[] -> [] -- Shouldn't happen
|
||||
'>' : cs -> '>' : mkOCSCyrillicWord cs
|
||||
c1 : cs -> c1 : spoolMarkup cs
|
||||
|
||||
mkOCSCyrillicChar :: Char -> Char
|
||||
mkOCSCyrillicChar c = case lookup c cc of Just c' -> c' ; _ -> c
|
||||
where
|
||||
cc = zip "abvgdeZziJklmnoprstYfxCqwWUyIE" allOCSCyrillic
|
||||
|
||||
allOCSCyrillic :: String
|
||||
allOCSCyrillic = (map toEnum [0x0430 .. 0x044e])
|
||||
63
src/GF/Text/Tamil.hs
Normal file
63
src/GF/Text/Tamil.hs
Normal file
@@ -0,0 +1,63 @@
|
||||
module Tamil where
|
||||
|
||||
mkTamil :: String -> String
|
||||
mkTamil = digraphWordToUnicode . adHocToDigraphWord
|
||||
|
||||
adHocToDigraphWord :: String -> [(Char, Char)]
|
||||
adHocToDigraphWord str = case str of
|
||||
[] -> []
|
||||
'<' : cs -> ('\\', '<') : spoolMarkup cs
|
||||
' ' : cs -> ('\\', ' ') : adHocToDigraphWord cs -- skip space
|
||||
|
||||
-- if c1 is a vowel
|
||||
-- Two of the same vowel => lengthening
|
||||
c1 : c2 : cs | c1 == c2 && isVowel c1 -> (cap c1, ':') : adHocToDigraphWord cs
|
||||
-- digraphed or long vowel
|
||||
c1 : c2 : cs | isVowel c1 && isVowel c2 -> (cap c1, cap c2) : adHocToDigraphWord cs
|
||||
c1 : cs | isVowel c1 -> (' ', cap c1) : adHocToDigraphWord cs
|
||||
|
||||
-- c1 isn't a vowel
|
||||
c1 : c2 : c3 : cs | c2 == c3 && isVowel c2 -> (' ', c1) : (c2, ':') : adHocToDigraphWord cs
|
||||
c1 : c2 : c3 : cs | isVowel c2 && isVowel c3 -> (' ', c1) : (c2, c3) : adHocToDigraphWord cs
|
||||
c1 : 'a' : cs -> (' ', c1) : adHocToDigraphWord cs -- a inherent
|
||||
c1 : c2 : cs | isVowel c2 -> (' ', c1) : (' ', c2) : adHocToDigraphWord cs
|
||||
|
||||
c1 : cs -> (' ', c1) : (' ', '.') : adHocToDigraphWord cs -- vowelless
|
||||
|
||||
isVowel x = elem x "aeiou:"
|
||||
cap :: Char -> Char
|
||||
cap x = case x of
|
||||
'a' -> 'A'
|
||||
'e' -> 'E'
|
||||
'i' -> 'I'
|
||||
'o' -> 'O'
|
||||
'u' -> 'U'
|
||||
c -> c
|
||||
|
||||
spoolMarkup :: String -> [(Char, Char)]
|
||||
spoolMarkup s = case s of
|
||||
-- [] -> [] -- Shouldn't happen
|
||||
'>' : cs -> ('\\', '>') : adHocToDigraphWord cs
|
||||
c1 : cs -> ('\\', c1) : spoolMarkup cs
|
||||
|
||||
digraphWordToUnicode :: [(Char, Char)] -> String
|
||||
digraphWordToUnicode = map digraphToUnicode
|
||||
|
||||
digraphToUnicode :: (Char, Char) -> Char
|
||||
digraphToUnicode (c1, c2) = case lookup (c1, c2) cc of Just c' -> c' ; _ -> c2
|
||||
where
|
||||
cc = zip allTamilCodes allTamil
|
||||
|
||||
mkPairs :: String -> [(Char, Char)]
|
||||
mkPairs str = case str of
|
||||
[] -> []
|
||||
c1 : c2 : cs -> (c1, c2) : mkPairs cs
|
||||
|
||||
allTamilCodes :: [(Char, Char)]
|
||||
allTamilCodes = mkPairs digraphedTamil
|
||||
|
||||
allTamil :: String
|
||||
allTamil = (map toEnum [0x0b85 .. 0x0bfa])
|
||||
|
||||
digraphedTamil = " AA: II: UU:______ EE:AI__ OO:AU k______ G c__ j__ ñ T______ N t______ V n p______ m y r l L M v__ s S h________a: ii: uu:______ ee:ai__ oo:au .__________________ :______________________________#1#2#3#4#5#6#7#8#9^1^2^3=d=m=y=d=c==ru##"
|
||||
|
||||
Reference in New Issue
Block a user