mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-22 11:19:32 -06:00
73 lines
2.0 KiB
Haskell
73 lines
2.0 KiB
Haskell
----------------------------------------------------------------------
|
|
-- |
|
|
-- Module : Ethiopic
|
|
-- Maintainer : HH
|
|
-- Stability : (stable)
|
|
-- Portability : (portable)
|
|
--
|
|
-- > CVS $Date: 2005/04/21 16:23:35 $
|
|
-- > CVS $Author: bringert $
|
|
-- > CVS $Revision: 1.6 $
|
|
--
|
|
-- Ascii-Unicode decoding for Ethiopian.
|
|
-- Copyright (c) Harald Hammarström 2003 under Gnu General Public License
|
|
-----------------------------------------------------------------------------
|
|
|
|
module GF.Text.Ethiopic (mkEthiopic) where
|
|
|
|
mkEthiopic :: String -> String
|
|
mkEthiopic = digraphWordToUnicode . adHocToDigraphWord
|
|
|
|
-- mkEthiopic :: String -> String
|
|
-- mkEthiopic = reverse . unwords . (map (digraphWordToUnicode . adHocToDigraphWord)) . words
|
|
--- reverse : assumes everything's on same line
|
|
|
|
adHocToDigraphWord :: String -> [(Char, Int)]
|
|
adHocToDigraphWord str = case str of
|
|
[] -> []
|
|
'<' : cs -> ('<', -1) : spoolMarkup cs
|
|
c1 : cs | isVowel c1 -> (')', vowelOrder c1) : adHocToDigraphWord cs
|
|
-- c1 isn't a vowel
|
|
c1 : cs | not (elem c1 allEthiopicCodes) -> (c1, -1) : adHocToDigraphWord cs
|
|
c1 : c2 : cs | isVowel c2 -> (c1, vowelOrder c2) : adHocToDigraphWord cs
|
|
c1 : cs -> (c1, 5) : adHocToDigraphWord cs
|
|
|
|
spoolMarkup :: String -> [(Char, Int)]
|
|
spoolMarkup s = case s of
|
|
-- [] -> [] -- Shouldn't happen
|
|
'>' : cs -> ('>', -1) : adHocToDigraphWord cs
|
|
c1 : cs -> (c1, -1) : spoolMarkup cs
|
|
|
|
isVowel x = elem x "A\228ui\239aeoI"
|
|
|
|
vowelOrder :: Char -> Int
|
|
vowelOrder x = case x of
|
|
'A' -> 0
|
|
'\228' -> 0 -- ä
|
|
'u' -> 1
|
|
'i' -> 2
|
|
'a' -> 3
|
|
'e' -> 4
|
|
'I' -> 5
|
|
'\239' -> 5 -- ï
|
|
'o' -> 6
|
|
c -> 5 -- vowelless
|
|
|
|
digraphWordToUnicode = map digraphToUnicode
|
|
|
|
digraphToUnicode :: (Char, Int) -> Char
|
|
-- digraphToUnicode (c1, c2) = c1
|
|
|
|
digraphToUnicode (c1, -1) = c1
|
|
digraphToUnicode (c1, c2) = toEnum (0x1200 + c2 + 8*case lookup c1 cc of Just c' -> c')
|
|
where
|
|
cc = zip allEthiopicCodes allEthiopic
|
|
|
|
allEthiopic :: [Int]
|
|
allEthiopic = [0 .. 44] -- x 8
|
|
|
|
allEthiopicCodes = "hlHmLrs$KQ__bBtcxXnN)kW__w(zZyd_jgG_TCPSLfp"
|
|
|
|
-- Q = kW, X = xW, W = kW, G = gW
|
|
|