added codepage 1250 i.e. central european. Useful for Polish, Czech, Slovak, Hungarian, Slovene, Bosnian, Croatian, Serbian, Romanian and Albanian

This commit is contained in:
krasimir
2008-12-17 23:53:12 +00:00
parent e62ffcbad1
commit 79d51c4479
2 changed files with 80 additions and 0 deletions

77
src/GF/Text/CP1250.hs Normal file
View File

@@ -0,0 +1,77 @@
module GF.Text.CP1250 where
import Data.Char
decodeCP1250 = map convert where
convert c
| c == '\x80' = chr 0x20AC
| c == '\x82' = chr 0x201A
| c == '\x84' = chr 0x201E
| c == '\x85' = chr 0x2026
| c == '\x86' = chr 0x2020
| c == '\x87' = chr 0x2021
| c == '\x89' = chr 0x2030
| c == '\x8A' = chr 0x0160
| c == '\x8B' = chr 0x2039
| c == '\x8C' = chr 0x015A
| c == '\x8D' = chr 0x0164
| c == '\x8E' = chr 0x017D
| c == '\x8F' = chr 0x0179
| c == '\x91' = chr 0x2018
| c == '\x92' = chr 0x2019
| c == '\x93' = chr 0x201C
| c == '\x94' = chr 0x201D
| c == '\x95' = chr 0x2022
| c == '\x96' = chr 0x2013
| c == '\x97' = chr 0x2014
| c == '\x99' = chr 0x2122
| c == '\x9A' = chr 0x0161
| c == '\x9B' = chr 0x203A
| c == '\x9C' = chr 0x015B
| c == '\x9D' = chr 0x0165
| c == '\x9E' = chr 0x017E
| c == '\x9F' = chr 0x017A
| c == '\xA1' = chr 0x02C7
| c == '\xA5' = chr 0x0104
| c == '\xB9' = chr 0x0105
| c == '\xBC' = chr 0x013D
| c == '\xBE' = chr 0x013E
| otherwise = c
encodeCP1250 = map convert where
convert c
| oc == 0x20AC = '\x80'
| oc == 0x201A = '\x82'
| oc == 0x201E = '\x84'
| oc == 0x2026 = '\x85'
| oc == 0x2020 = '\x86'
| oc == 0x2021 = '\x87'
| oc == 0x2030 = '\x89'
| oc == 0x0160 = '\x8A'
| oc == 0x2039 = '\x8B'
| oc == 0x015A = '\x8C'
| oc == 0x0164 = '\x8D'
| oc == 0x017D = '\x8E'
| oc == 0x0179 = '\x8F'
| oc == 0x2018 = '\x91'
| oc == 0x2019 = '\x92'
| oc == 0x201C = '\x93'
| oc == 0x201D = '\x94'
| oc == 0x2022 = '\x95'
| oc == 0x2013 = '\x96'
| oc == 0x2014 = '\x97'
| oc == 0x2122 = '\x99'
| oc == 0x0161 = '\x9A'
| oc == 0x203A = '\x9B'
| oc == 0x015B = '\x9C'
| oc == 0x0165 = '\x9D'
| oc == 0x017E = '\x9E'
| oc == 0x017A = '\x9F'
| oc == 0x02C7 = '\xA1'
| oc == 0x0104 = '\xA5'
| oc == 0x0105 = '\xB9'
| oc == 0x013D = '\xBC'
| oc == 0x013E = '\xBE'
| otherwise = c
where oc = ord c

View File

@@ -1,17 +1,20 @@
module GF.Text.Coding where
import GF.Text.UTF8
import GF.Text.CP1250
import GF.Text.CP1251
import GF.Text.CP1252
encodeUnicode e = case e of
"utf8" -> encodeUTF8
"cp1250" -> encodeCP1250
"cp1251" -> encodeCP1251
"cp1252" -> encodeCP1252
_ -> id
decodeUnicode e = case e of
"utf8" -> decodeUTF8
"cp1250" -> decodeCP1250
"cp1251" -> decodeCP1251
"cp1252" -> decodeCP1252
_ -> id