From 33c6309c43734f112e1bda2a960d3263fd3dfa40 Mon Sep 17 00:00:00 2001 From: aarne Date: Fri, 14 Nov 2003 12:37:54 +0000 Subject: [PATCH] Harald's new codings. --- src/GF/Text/Devanagari.hs | 83 +++++++++++++++++++++++++++++++++ src/GF/Text/Ethiopic.hs | 57 ++++++++++++++++++++++ src/GF/Text/ExtendedArabic.hs | 58 +++++++++++++++++++++++ src/GF/Text/Hiragana.hs | 81 ++++++++++++++++++++++++++++++++ src/GF/Text/LatinASupplement.hs | 55 ++++++++++++++++++++++ src/GF/Text/OCSCyrillic.hs | 34 ++++++++++++++ src/GF/Text/Tamil.hs | 63 +++++++++++++++++++++++++ 7 files changed, 431 insertions(+) create mode 100644 src/GF/Text/Devanagari.hs create mode 100644 src/GF/Text/Ethiopic.hs create mode 100644 src/GF/Text/ExtendedArabic.hs create mode 100644 src/GF/Text/Hiragana.hs create mode 100644 src/GF/Text/LatinASupplement.hs create mode 100644 src/GF/Text/OCSCyrillic.hs create mode 100644 src/GF/Text/Tamil.hs diff --git a/src/GF/Text/Devanagari.hs b/src/GF/Text/Devanagari.hs new file mode 100644 index 000000000..1d54134df --- /dev/null +++ b/src/GF/Text/Devanagari.hs @@ -0,0 +1,83 @@ +module Devanagari where + +mkDevanagari :: String -> String +mkDevanagari = digraphWordToUnicode . adHocToDigraphWord + +adHocToDigraphWord :: String -> [(Char, Char)] +adHocToDigraphWord str = case str of + [] -> [] + '<' : cs -> ('\\', '<') : spoolMarkup cs + ' ' : cs -> ('\\', ' ') : adHocToDigraphWord cs -- skip space + +-- if c1 is a vowel + -- Two of the same vowel => lengthening + c1 : c2 : cs | c1 == c2 && isVowel c1 -> (cap c1, ':') : adHocToDigraphWord cs + -- digraphed or long vowel + c1 : c2 : cs | isVowel c1 && isVowel c2 -> (cap c1, cap c2) : adHocToDigraphWord cs + c1 : cs | isVowel c1 -> (' ', cap c1) : adHocToDigraphWord cs + +-- c1 isn't a vowel + -- c1 : 'a' : [] -> [(' ', c1)] -- a inherent + -- c1 : c2 : [] | isVowel c2 -> (' ', c1) : [(' ', c2)] + + -- c1 is aspirated + c1 : 'H' : c2 : c3 : cs | c2 == c3 && isVowel c2 -> + (c1, 'H') : (c2, ':') : adHocToDigraphWord cs + c1 : 'H' : c2 : c3 : cs | isVowel c2 && isVowel c3 -> + (c1, 'H') : (c2, c3) : adHocToDigraphWord cs + c1 : 'H' : 'a' : cs -> (c1, 'H') : adHocToDigraphWord cs -- a inherent + c1 : 'H' : c2 : cs | isVowel c2 -> (c1, 'H') : (' ', c2) : adHocToDigraphWord cs + -- not vowelless at EOW + c1 : 'H' : ' ' : cs -> (c1, 'H') : ('\\', ' ') : adHocToDigraphWord cs + c1 : 'H' : [] -> [(c1, 'H')] + c1 : 'H' : cs -> (c1, 'H') : (' ', '^') : adHocToDigraphWord cs -- vowelless + + -- c1 unasp. + c1 : c2 : c3 : cs | c2 == c3 && isVowel c2 -> (' ', c1) : (c2, ':') : adHocToDigraphWord cs + c1 : c2 : c3 : cs | isVowel c2 && isVowel c3 -> (' ', c1) : (c2, c3) : adHocToDigraphWord cs + c1 : 'a' : cs -> (' ', c1) : adHocToDigraphWord cs -- a inherent + c1 : c2 : cs | isVowel c2 -> (' ', c1) : (' ', c2) : adHocToDigraphWord cs + -- not vowelless at EOW + c1 : ' ' : cs -> (' ', c1) : ('\\', ' '): adHocToDigraphWord cs + c1 : [] -> [(' ', c1)] + 'M' : cs -> (' ', 'M') : adHocToDigraphWord cs -- vowelless but no vowelless sign for anusvara + c1 : cs -> (' ', c1) : (' ', '^') : adHocToDigraphWord cs -- vowelless + +isVowel x = elem x "aeiou:" +cap :: Char -> Char +cap x = case x of + 'a' -> 'A' + 'e' -> 'E' + 'i' -> 'I' + 'o' -> 'O' + 'u' -> 'U' + c -> c + +spoolMarkup :: String -> [(Char, Char)] +spoolMarkup s = case s of + -- [] -> [] -- Shouldn't happen + '>' : cs -> ('\\', '>') : adHocToDigraphWord cs + c1 : cs -> ('\\', c1) : spoolMarkup cs + + +digraphWordToUnicode :: [(Char, Char)] -> String +digraphWordToUnicode = map digraphToUnicode + +digraphToUnicode :: (Char, Char) -> Char +digraphToUnicode (c1, c2) = case lookup (c1, c2) cc of Just c' -> c' ; _ -> c2 + where + cc = zip allDevanagariCodes allDevanagari + +digraphedDevanagari = " ~ M ;__ AA: II: UU:RoLoEvE~ EE:AvA~ OAU kkH ggHNG ccH jjH ñ TTH DDH N ttH ddH nn. ppH bbH m y rr. l LL. v ç S s h____ .-Sa: ii: uu:ror:eve~ eaiava~ oau ^____OM | -dddu______ Q X G zD.RH fy.R:L:mrmR#I#d#0#1#2#3#4#5#6#7#8#9#o" + +allDevanagariCodes :: [(Char, Char)] +allDevanagariCodes = mkPairs digraphedDevanagari + +allDevanagari :: String +allDevanagari = (map toEnum [0x0901 .. 0x0970]) + +mkPairs :: String -> [(Char, Char)] +mkPairs str = case str of + [] -> [] + c1 : c2 : cs -> (c1, c2) : mkPairs cs + diff --git a/src/GF/Text/Ethiopic.hs b/src/GF/Text/Ethiopic.hs new file mode 100644 index 000000000..32d420fd4 --- /dev/null +++ b/src/GF/Text/Ethiopic.hs @@ -0,0 +1,57 @@ +module Ethiopic where + +mkEthiopic :: String -> String +mkEthiopic = digraphWordToUnicode . adHocToDigraphWord + +-- mkEthiopic :: String -> String +-- mkEthiopic = reverse . unwords . (map (digraphWordToUnicode . adHocToDigraphWord)) . words +--- reverse : assumes everything's on same line + +adHocToDigraphWord :: String -> [(Char, Int)] +adHocToDigraphWord str = case str of + [] -> [] + '<' : cs -> ('<', -1) : spoolMarkup cs + c1 : cs | isVowel c1 -> (')', vowelOrder c1) : adHocToDigraphWord cs + -- c1 isn't a vowel + c1 : cs | not (elem c1 allEthiopicCodes) -> (c1, -1) : adHocToDigraphWord cs + c1 : c2 : cs | isVowel c2 -> (c1, vowelOrder c2) : adHocToDigraphWord cs + c1 : cs -> (c1, 5) : adHocToDigraphWord cs + +spoolMarkup :: String -> [(Char, Int)] +spoolMarkup s = case s of + -- [] -> [] -- Shouldn't happen + '>' : cs -> ('>', -1) : adHocToDigraphWord cs + c1 : cs -> (c1, -1) : spoolMarkup cs + +isVowel x = elem x "AäuiïaeoI" + +vowelOrder :: Char -> Int +vowelOrder x = case x of + 'A' -> 0 + 'ä' -> 0 + 'u' -> 1 + 'i' -> 2 + 'a' -> 3 + 'e' -> 4 + 'I' -> 5 + 'ï' -> 5 + 'o' -> 6 + c -> 5 -- vowelless + +digraphWordToUnicode = map digraphToUnicode + +digraphToUnicode :: (Char, Int) -> Char +-- digraphToUnicode (c1, c2) = c1 + +digraphToUnicode (c1, -1) = c1 +digraphToUnicode (c1, c2) = toEnum (0x1200 + c2 + 8*case lookup c1 cc of Just c' -> c') + where + cc = zip allEthiopicCodes allEthiopic + +allEthiopic :: [Int] +allEthiopic = [0 .. 44] -- x 8 + +allEthiopicCodes = "hlHmLrs$KQ__bBtcxXnN)kW__w(zZyd_jgG_TCPSLfp" + +-- Q = kW, X = xW, W = kW, G = gW + diff --git a/src/GF/Text/ExtendedArabic.hs b/src/GF/Text/ExtendedArabic.hs new file mode 100644 index 000000000..d766abc67 --- /dev/null +++ b/src/GF/Text/ExtendedArabic.hs @@ -0,0 +1,58 @@ +module ExtendedArabic where + +mkExtendedArabic :: String -> String +mkExtendedArabic = digraphWordToUnicode . adHocToDigraphWord + +adHocToDigraphWord :: String -> [(Char, Char)] +adHocToDigraphWord str = case str of + [] -> [] + '<' : cs -> ('\\', '<') : spoolMarkup cs + -- Sorani + 'W' : cs -> (':', 'w') : adHocToDigraphWord cs -- ?? Will do + 'E' : cs -> (' ', 'i') : adHocToDigraphWord cs -- ?? Letter missing! + 'j' : cs -> ('d', 'j') : adHocToDigraphWord cs + 'O' : cs -> ('v', 'w') : adHocToDigraphWord cs + 'F' : cs -> (' ', 'v') : adHocToDigraphWord cs + 'Z' : cs -> ('z', 'h') : adHocToDigraphWord cs + 'I' : cs -> (' ', 'i') : adHocToDigraphWord cs -- ?? Letter missing! + 'C' : cs -> ('c', 'h') : adHocToDigraphWord cs + -- Pashto + 'e' : cs -> (':', 'y') : adHocToDigraphWord cs + '$' : cs -> ('3', 'H') : adHocToDigraphWord cs + 'X' : cs -> ('s', '.') : adHocToDigraphWord cs + 'G' : cs -> ('z', '.') : adHocToDigraphWord cs + 'a' : cs -> (' ', 'A') : adHocToDigraphWord cs + 'P' : cs -> ('\'', 'H') : adHocToDigraphWord cs + 'R' : cs -> ('o', 'r') : adHocToDigraphWord cs + -- Shared + 'A' : cs -> (' ', 'h') : adHocToDigraphWord cs -- ?? Maybe to "t or 0x06d5 + 'c' : cs -> ('s', 'h') : adHocToDigraphWord cs + c : cs -> (' ', c) : adHocToDigraphWord cs + + +-- Beginning 0x621 up and including 0x06d1 +digraphedExtendedArabic = " '~A'A'w,A'i A b\"t tTHdj H X dDH r z ssh S D T Z 3GH__________ - f q k l m n h w i y&a&w&i/a/w/i/W/o/~/'/,/|/6/v_____________#0#1#2#3#4#5#6#7#8#9#%#,#'#*>b>q$|> A2'2,3'A'w'w&y'Tb:b:BoT3b p4b4B'H:H2H\"H3Hch4HTdod.dTD:d:D3d3D4dTrvror.rvRz.:rzh4zs.+s*S:S3S3T33>ff.f: v4f.q3q-k~kok.k3k3K gog:g:G3Gvl.l3l3L:n>nTnon3n?h4H't>Y\"Yow-wvwww|w^w:w3w>y/yvy.w:y3y____ -ae" + +digraphWordToUnicode = map digraphToUnicode + +digraphToUnicode :: (Char, Char) -> Char +digraphToUnicode (c1, c2) = case lookup (c1, c2) cc of Just c' -> c' ; _ -> c2 + where + cc = zip allExtendedArabicCodes allExtendedArabic + +allExtendedArabicCodes :: [(Char, Char)] +allExtendedArabicCodes = mkPairs digraphedExtendedArabic + +allExtendedArabic :: String +allExtendedArabic = (map toEnum [0x0621 .. 0x06d1]) + +mkPairs :: String -> [(Char, Char)] +mkPairs str = case str of + [] -> [] + c1 : c2 : cs -> (c1, c2) : mkPairs cs + +spoolMarkup :: String -> [(Char, Char)] +spoolMarkup s = case s of + [] -> [] -- Shouldn't happen + '>' : cs -> ('\\', '>') : adHocToDigraphWord cs + c1 : cs -> ('\\', c1) : spoolMarkup cs diff --git a/src/GF/Text/Hiragana.hs b/src/GF/Text/Hiragana.hs new file mode 100644 index 000000000..76ef0938e --- /dev/null +++ b/src/GF/Text/Hiragana.hs @@ -0,0 +1,81 @@ +module Hiragana where + +-- long vowel romaaji must be ei, ou not ee, oo + +mkJapanese :: String -> String +mkJapanese = digraphWordToUnicode . romaajiToDigraphWord + +romaajiToDigraphWord :: String -> [(Char, Char)] +romaajiToDigraphWord str = case str of + [] -> [] + '<' : cs -> ('\\', '<') : spoolMarkup cs + ' ' : cs -> ('\\', ' ') : romaajiToDigraphWord cs + + c1 : cs | isVowel c1 -> (' ', cap c1) : romaajiToDigraphWord cs + + -- The combinations + c1 : 'y' : c2 : cs -> (c1, 'i') : ('y', cap c2) : romaajiToDigraphWord cs + + 's' : 'h' : 'a' : cs -> ('S', 'i') : ('y', 'A') : romaajiToDigraphWord cs + 'c' : 'h' : 'a' : cs -> ('C', 'i') : ('y', 'A') : romaajiToDigraphWord cs + 'j' : 'a' : cs -> ('j', 'i') : ('y', 'A') : romaajiToDigraphWord cs + + 's' : 'h' : 'u' : cs -> ('S', 'i') : ('y', 'U') : romaajiToDigraphWord cs + 'c' : 'h' : 'u' : cs -> ('C', 'i') : ('y', 'U') : romaajiToDigraphWord cs + 'j' : 'u' : cs -> ('j', 'i') : ('y', 'U') : romaajiToDigraphWord cs + + 's' : 'h' : 'o' : cs -> ('S', 'i') : ('y', 'O') : romaajiToDigraphWord cs + 'c' : 'h' : 'o' : cs -> ('C', 'i') : ('y', 'O') : romaajiToDigraphWord cs + 'j' : 'o' : cs -> ('j', 'i') : ('y', 'O') : romaajiToDigraphWord cs + + 'd' : 'z' : c3 : cs -> ('D', c3) : romaajiToDigraphWord cs + 't' : 's' : c3 : cs -> ('T', c3) : romaajiToDigraphWord cs + 'c' : 'h' : c3 : cs -> ('C', c3) : romaajiToDigraphWord cs + 's' : 'h' : c3 : cs -> ('S', c3) : romaajiToDigraphWord cs + 'z' : 'h' : c3 : cs -> ('Z', c3) : romaajiToDigraphWord cs + + c1 : ' ' : cs -> (' ', c1) : ('\\', ' ') : romaajiToDigraphWord cs -- n + c1 : [] -> [(' ', c1)] -- n + + c1 : c2 : cs | isVowel c2 -> (c1, c2) : romaajiToDigraphWord cs + c1 : c2 : cs | c1 == c2 -> ('T', 'U') : romaajiToDigraphWord (c2 : cs) -- double cons + c1 : cs -> (' ', c1) : romaajiToDigraphWord cs -- n + +isVowel x = elem x "aeiou" +cap :: Char -> Char +cap x = case x of + 'a' -> 'A' + 'e' -> 'E' + 'i' -> 'I' + 'o' -> 'O' + 'u' -> 'U' + c -> c + +spoolMarkup :: String -> [(Char, Char)] +spoolMarkup s = case s of + -- [] -> [] -- Shouldn't happen + '>' : cs -> ('\\', '>') : romaajiToDigraphWord cs + c1 : cs -> ('\\', c1) : spoolMarkup cs + +digraphWordToUnicode :: [(Char, Char)] -> String +digraphWordToUnicode = map digraphToUnicode + +digraphToUnicode :: (Char, Char) -> Char +digraphToUnicode (c1, c2) = case lookup (c1, c2) cc of Just c' -> c' ; _ -> c2 + where + cc = zip allHiraganaCodes allHiragana + +allHiraganaCodes :: [(Char, Char)] +allHiraganaCodes = mkPairs digraphedHiragana + +allHiragana :: String +allHiragana = (map toEnum [0x3041 .. 0x309f]) + +mkPairs :: String -> [(Char, Char)] +mkPairs str = case str of + [] -> [] + c1 : c2 : cs -> (c1, c2) : mkPairs cs + +digraphedHiragana = " a A i I u U e E o OkagakigikugukegekogosazaSiZisuzusezesozotadaCijiTUTuDutedetodonaninunenohabapahibipihubupuhebepehobopomamimumemoyAyayUyuyOyorarirurerowaWawiwewo nvukAkE____<< o>>o >'> b" + + diff --git a/src/GF/Text/LatinASupplement.hs b/src/GF/Text/LatinASupplement.hs new file mode 100644 index 000000000..c06299ae3 --- /dev/null +++ b/src/GF/Text/LatinASupplement.hs @@ -0,0 +1,55 @@ +module LatinASupplement where + +mkLatinASupplement :: String -> String +mkLatinASupplement = mkLatinASupplementWord + +mkLatinASupplementWord :: String -> String +mkLatinASupplementWord str = case str of + [] -> [] + '<' : cs -> '<' : spoolMarkup cs + -- Romanian & partly Turkish + 's' : ',' : cs -> toEnum 0x015f : mkLatinASupplementWord cs + 'a' : '%' : cs -> toEnum 0x0103 : mkLatinASupplementWord cs + -- Slavic and more + 'c' : '^' : cs -> toEnum 0x010d : mkLatinASupplementWord cs + 's' : '^' : cs -> toEnum 0x0161 : mkLatinASupplementWord cs + 'c' : '\'' : cs -> toEnum 0x0107 : mkLatinASupplementWord cs + 'z' : '^' : cs -> toEnum 0x017e : mkLatinASupplementWord cs + -- Turkish + 'g' : '%' : cs -> toEnum 0x011f : mkLatinASupplementWord cs + 'I' : cs -> toEnum 0x0131 : mkLatinASupplementWord cs + 'c' : ',' : cs -> 'ç' : mkLatinASupplementWord cs + -- Polish + 'e' : ',' : cs -> toEnum 0x0119 : mkLatinASupplementWord cs + 'a' : ',' : cs -> toEnum 0x0105 : mkLatinASupplementWord cs + 'l' : '/' : cs -> toEnum 0x0142 : mkLatinASupplementWord cs + 'z' : '.' : cs -> toEnum 0x017c : mkLatinASupplementWord cs + 'n' : '\'' : cs -> toEnum 0x0144 : mkLatinASupplementWord cs + 's' : '\'' : cs -> toEnum 0x015b : mkLatinASupplementWord cs +-- 'c' : '\'' : cs -> toEnum 0x0107 : mkLatinASupplementWord cs + + -- Hungarian + 'o' : '%' : cs -> toEnum 0x0151 : mkLatinASupplementWord cs + 'u' : '%' : cs -> toEnum 0x0171 : mkLatinASupplementWord cs + + -- Mongolian + 'j' : '^' : cs -> toEnum 0x0135 : mkLatinASupplementWord cs + + -- Khowar (actually in Combining diacritical marks not Latin-A Suppl.) + 'o' : '.' : cs -> 'o' : (toEnum 0x0323 : mkLatinASupplementWord cs) + + -- Length bars over vowels e.g korean + 'a' : ':' : cs -> toEnum 0x0101 : mkLatinASupplementWord cs + 'e' : ':' : cs -> toEnum 0x0113 : mkLatinASupplementWord cs + 'i' : ':' : cs -> toEnum 0x012b : mkLatinASupplementWord cs + 'o' : ':' : cs -> toEnum 0x014d : mkLatinASupplementWord cs + 'u' : ':' : cs -> toEnum 0x016b : mkLatinASupplementWord cs + + -- Default + c : cs -> c : mkLatinASupplementWord cs + +spoolMarkup :: String -> String +spoolMarkup s = case s of + [] -> [] -- Shouldn't happen + '>' : cs -> '>' : mkLatinASupplementWord cs + c1 : cs -> c1 : spoolMarkup cs diff --git a/src/GF/Text/OCSCyrillic.hs b/src/GF/Text/OCSCyrillic.hs new file mode 100644 index 000000000..e2028570c --- /dev/null +++ b/src/GF/Text/OCSCyrillic.hs @@ -0,0 +1,34 @@ +module OCSCyrillic where + +mkOCSCyrillic :: String -> String +mkOCSCyrillic = mkOCSCyrillicWord + +mkOCSCyrillicWord :: String -> String +mkOCSCyrillicWord str = case str of + [] -> [] + ' ' : cs -> ' ' : mkOCSCyrillicWord cs + '<' : cs -> '<' : spoolMarkup cs + 'ä' : cs -> toEnum 0x0463 : mkOCSCyrillicWord cs + 'j' : 'e' : '~' : cs -> toEnum 0x0469 : mkOCSCyrillicWord cs + 'j' : 'o' : '~' : cs -> toEnum 0x046d : mkOCSCyrillicWord cs + 'j' : 'e' : cs -> toEnum 0x0465 : mkOCSCyrillicWord cs + 'e' : '~' : cs -> toEnum 0x0467 : mkOCSCyrillicWord cs + 'o' : '~' : cs -> toEnum 0x046b : mkOCSCyrillicWord cs + 'j' : 'u' : cs -> toEnum 0x044e : mkOCSCyrillicWord cs + 'j' : 'a' : cs -> toEnum 0x044f : mkOCSCyrillicWord cs + 'u' : cs -> toEnum 0x0479 : mkOCSCyrillicWord cs + c : cs -> (mkOCSCyrillicChar c) : mkOCSCyrillicWord cs + +spoolMarkup :: String -> String +spoolMarkup s = case s of + [] -> [] -- Shouldn't happen + '>' : cs -> '>' : mkOCSCyrillicWord cs + c1 : cs -> c1 : spoolMarkup cs + +mkOCSCyrillicChar :: Char -> Char +mkOCSCyrillicChar c = case lookup c cc of Just c' -> c' ; _ -> c + where + cc = zip "abvgdeZziJklmnoprstYfxCqwWUyIE" allOCSCyrillic + +allOCSCyrillic :: String +allOCSCyrillic = (map toEnum [0x0430 .. 0x044e]) diff --git a/src/GF/Text/Tamil.hs b/src/GF/Text/Tamil.hs new file mode 100644 index 000000000..a4fe50e65 --- /dev/null +++ b/src/GF/Text/Tamil.hs @@ -0,0 +1,63 @@ +module Tamil where + +mkTamil :: String -> String +mkTamil = digraphWordToUnicode . adHocToDigraphWord + +adHocToDigraphWord :: String -> [(Char, Char)] +adHocToDigraphWord str = case str of + [] -> [] + '<' : cs -> ('\\', '<') : spoolMarkup cs + ' ' : cs -> ('\\', ' ') : adHocToDigraphWord cs -- skip space + +-- if c1 is a vowel + -- Two of the same vowel => lengthening + c1 : c2 : cs | c1 == c2 && isVowel c1 -> (cap c1, ':') : adHocToDigraphWord cs + -- digraphed or long vowel + c1 : c2 : cs | isVowel c1 && isVowel c2 -> (cap c1, cap c2) : adHocToDigraphWord cs + c1 : cs | isVowel c1 -> (' ', cap c1) : adHocToDigraphWord cs + +-- c1 isn't a vowel + c1 : c2 : c3 : cs | c2 == c3 && isVowel c2 -> (' ', c1) : (c2, ':') : adHocToDigraphWord cs + c1 : c2 : c3 : cs | isVowel c2 && isVowel c3 -> (' ', c1) : (c2, c3) : adHocToDigraphWord cs + c1 : 'a' : cs -> (' ', c1) : adHocToDigraphWord cs -- a inherent + c1 : c2 : cs | isVowel c2 -> (' ', c1) : (' ', c2) : adHocToDigraphWord cs + + c1 : cs -> (' ', c1) : (' ', '.') : adHocToDigraphWord cs -- vowelless + +isVowel x = elem x "aeiou:" +cap :: Char -> Char +cap x = case x of + 'a' -> 'A' + 'e' -> 'E' + 'i' -> 'I' + 'o' -> 'O' + 'u' -> 'U' + c -> c + +spoolMarkup :: String -> [(Char, Char)] +spoolMarkup s = case s of + -- [] -> [] -- Shouldn't happen + '>' : cs -> ('\\', '>') : adHocToDigraphWord cs + c1 : cs -> ('\\', c1) : spoolMarkup cs + +digraphWordToUnicode :: [(Char, Char)] -> String +digraphWordToUnicode = map digraphToUnicode + +digraphToUnicode :: (Char, Char) -> Char +digraphToUnicode (c1, c2) = case lookup (c1, c2) cc of Just c' -> c' ; _ -> c2 + where + cc = zip allTamilCodes allTamil + +mkPairs :: String -> [(Char, Char)] +mkPairs str = case str of + [] -> [] + c1 : c2 : cs -> (c1, c2) : mkPairs cs + +allTamilCodes :: [(Char, Char)] +allTamilCodes = mkPairs digraphedTamil + +allTamil :: String +allTamil = (map toEnum [0x0b85 .. 0x0bfa]) + +digraphedTamil = " AA: II: UU:______ EE:AI__ OO:AU k______ G c__ j__ ñ T______ N t______ V n p______ m y r l L M v__ s S h________a: ii: uu:______ ee:ai__ oo:au .__________________ :______________________________#1#2#3#4#5#6#7#8#9^1^2^3=d=m=y=d=c==ru##" +