mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
added codepage 1250 i.e. central european. Useful for Polish, Czech, Slovak, Hungarian, Slovene, Bosnian, Croatian, Serbian, Romanian and Albanian
This commit is contained in:
77
src/GF/Text/CP1250.hs
Normal file
77
src/GF/Text/CP1250.hs
Normal file
@@ -0,0 +1,77 @@
|
||||
module GF.Text.CP1250 where
|
||||
|
||||
import Data.Char
|
||||
|
||||
decodeCP1250 = map convert where
|
||||
convert c
|
||||
| c == '\x80' = chr 0x20AC
|
||||
| c == '\x82' = chr 0x201A
|
||||
| c == '\x84' = chr 0x201E
|
||||
| c == '\x85' = chr 0x2026
|
||||
| c == '\x86' = chr 0x2020
|
||||
| c == '\x87' = chr 0x2021
|
||||
| c == '\x89' = chr 0x2030
|
||||
| c == '\x8A' = chr 0x0160
|
||||
| c == '\x8B' = chr 0x2039
|
||||
| c == '\x8C' = chr 0x015A
|
||||
| c == '\x8D' = chr 0x0164
|
||||
| c == '\x8E' = chr 0x017D
|
||||
| c == '\x8F' = chr 0x0179
|
||||
| c == '\x91' = chr 0x2018
|
||||
| c == '\x92' = chr 0x2019
|
||||
| c == '\x93' = chr 0x201C
|
||||
| c == '\x94' = chr 0x201D
|
||||
| c == '\x95' = chr 0x2022
|
||||
| c == '\x96' = chr 0x2013
|
||||
| c == '\x97' = chr 0x2014
|
||||
| c == '\x99' = chr 0x2122
|
||||
| c == '\x9A' = chr 0x0161
|
||||
| c == '\x9B' = chr 0x203A
|
||||
| c == '\x9C' = chr 0x015B
|
||||
| c == '\x9D' = chr 0x0165
|
||||
| c == '\x9E' = chr 0x017E
|
||||
| c == '\x9F' = chr 0x017A
|
||||
| c == '\xA1' = chr 0x02C7
|
||||
| c == '\xA5' = chr 0x0104
|
||||
| c == '\xB9' = chr 0x0105
|
||||
| c == '\xBC' = chr 0x013D
|
||||
| c == '\xBE' = chr 0x013E
|
||||
| otherwise = c
|
||||
|
||||
|
||||
encodeCP1250 = map convert where
|
||||
convert c
|
||||
| oc == 0x20AC = '\x80'
|
||||
| oc == 0x201A = '\x82'
|
||||
| oc == 0x201E = '\x84'
|
||||
| oc == 0x2026 = '\x85'
|
||||
| oc == 0x2020 = '\x86'
|
||||
| oc == 0x2021 = '\x87'
|
||||
| oc == 0x2030 = '\x89'
|
||||
| oc == 0x0160 = '\x8A'
|
||||
| oc == 0x2039 = '\x8B'
|
||||
| oc == 0x015A = '\x8C'
|
||||
| oc == 0x0164 = '\x8D'
|
||||
| oc == 0x017D = '\x8E'
|
||||
| oc == 0x0179 = '\x8F'
|
||||
| oc == 0x2018 = '\x91'
|
||||
| oc == 0x2019 = '\x92'
|
||||
| oc == 0x201C = '\x93'
|
||||
| oc == 0x201D = '\x94'
|
||||
| oc == 0x2022 = '\x95'
|
||||
| oc == 0x2013 = '\x96'
|
||||
| oc == 0x2014 = '\x97'
|
||||
| oc == 0x2122 = '\x99'
|
||||
| oc == 0x0161 = '\x9A'
|
||||
| oc == 0x203A = '\x9B'
|
||||
| oc == 0x015B = '\x9C'
|
||||
| oc == 0x0165 = '\x9D'
|
||||
| oc == 0x017E = '\x9E'
|
||||
| oc == 0x017A = '\x9F'
|
||||
| oc == 0x02C7 = '\xA1'
|
||||
| oc == 0x0104 = '\xA5'
|
||||
| oc == 0x0105 = '\xB9'
|
||||
| oc == 0x013D = '\xBC'
|
||||
| oc == 0x013E = '\xBE'
|
||||
| otherwise = c
|
||||
where oc = ord c
|
||||
@@ -1,17 +1,20 @@
|
||||
module GF.Text.Coding where
|
||||
|
||||
import GF.Text.UTF8
|
||||
import GF.Text.CP1250
|
||||
import GF.Text.CP1251
|
||||
import GF.Text.CP1252
|
||||
|
||||
encodeUnicode e = case e of
|
||||
"utf8" -> encodeUTF8
|
||||
"cp1250" -> encodeCP1250
|
||||
"cp1251" -> encodeCP1251
|
||||
"cp1252" -> encodeCP1252
|
||||
_ -> id
|
||||
|
||||
decodeUnicode e = case e of
|
||||
"utf8" -> decodeUTF8
|
||||
"cp1250" -> decodeCP1250
|
||||
"cp1251" -> decodeCP1251
|
||||
"cp1252" -> decodeCP1252
|
||||
_ -> id
|
||||
|
||||
Reference in New Issue
Block a user