forked from GitHub/gf-core
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
This commit is contained in:
72
src-3.0/GF/Text/Ethiopic.hs
Normal file
72
src-3.0/GF/Text/Ethiopic.hs
Normal file
@@ -0,0 +1,72 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Ethiopic
|
||||
-- Maintainer : HH
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/21 16:23:35 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.6 $
|
||||
--
|
||||
-- Ascii-Unicode decoding for Ethiopian.
|
||||
-- Copyright (c) Harald Hammarström 2003 under Gnu General Public License
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Text.Ethiopic (mkEthiopic) where
|
||||
|
||||
mkEthiopic :: String -> String
|
||||
mkEthiopic = digraphWordToUnicode . adHocToDigraphWord
|
||||
|
||||
-- mkEthiopic :: String -> String
|
||||
-- mkEthiopic = reverse . unwords . (map (digraphWordToUnicode . adHocToDigraphWord)) . words
|
||||
--- reverse : assumes everything's on same line
|
||||
|
||||
adHocToDigraphWord :: String -> [(Char, Int)]
|
||||
adHocToDigraphWord str = case str of
|
||||
[] -> []
|
||||
'<' : cs -> ('<', -1) : spoolMarkup cs
|
||||
c1 : cs | isVowel c1 -> (')', vowelOrder c1) : adHocToDigraphWord cs
|
||||
-- c1 isn't a vowel
|
||||
c1 : cs | not (elem c1 allEthiopicCodes) -> (c1, -1) : adHocToDigraphWord cs
|
||||
c1 : c2 : cs | isVowel c2 -> (c1, vowelOrder c2) : adHocToDigraphWord cs
|
||||
c1 : cs -> (c1, 5) : adHocToDigraphWord cs
|
||||
|
||||
spoolMarkup :: String -> [(Char, Int)]
|
||||
spoolMarkup s = case s of
|
||||
-- [] -> [] -- Shouldn't happen
|
||||
'>' : cs -> ('>', -1) : adHocToDigraphWord cs
|
||||
c1 : cs -> (c1, -1) : spoolMarkup cs
|
||||
|
||||
isVowel x = elem x "A\228ui\239aeoI"
|
||||
|
||||
vowelOrder :: Char -> Int
|
||||
vowelOrder x = case x of
|
||||
'A' -> 0
|
||||
'\228' -> 0 -- ä
|
||||
'u' -> 1
|
||||
'i' -> 2
|
||||
'a' -> 3
|
||||
'e' -> 4
|
||||
'I' -> 5
|
||||
'\239' -> 5 -- ï
|
||||
'o' -> 6
|
||||
c -> 5 -- vowelless
|
||||
|
||||
digraphWordToUnicode = map digraphToUnicode
|
||||
|
||||
digraphToUnicode :: (Char, Int) -> Char
|
||||
-- digraphToUnicode (c1, c2) = c1
|
||||
|
||||
digraphToUnicode (c1, -1) = c1
|
||||
digraphToUnicode (c1, c2) = toEnum (0x1200 + c2 + 8*case lookup c1 cc of Just c' -> c')
|
||||
where
|
||||
cc = zip allEthiopicCodes allEthiopic
|
||||
|
||||
allEthiopic :: [Int]
|
||||
allEthiopic = [0 .. 44] -- x 8
|
||||
|
||||
allEthiopicCodes = "hlHmLrs$KQ__bBtcxXnN)kW__w(zZyd_jgG_TCPSLfp"
|
||||
|
||||
-- Q = kW, X = xW, W = kW, G = gW
|
||||
|
||||
Reference in New Issue
Block a user