From 1e51690b71f13c877b19230e70b3be95a154e3ac Mon Sep 17 00:00:00 2001 From: krasimir Date: Tue, 23 Mar 2010 13:44:17 +0000 Subject: [PATCH] added codepage for Turkish --- src/compiler/GF/Infra/Option.hs | 3 +- src/compiler/GF/Text/CP1254.hs | 84 +++++++++++++++++++++++++++++++++ src/compiler/GF/Text/Coding.hs | 3 ++ 3 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 src/compiler/GF/Text/CP1254.hs diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs index 3c35fef00..24b967aff 100644 --- a/src/compiler/GF/Infra/Option.hs +++ b/src/compiler/GF/Infra/Option.hs @@ -77,7 +77,7 @@ data Verbosity = Quiet | Normal | Verbose | Debug data Phase = Preproc | Convert | Compile | Link deriving (Show,Eq,Ord) -data Encoding = UTF_8 | ISO_8859_1 | CP_1250 | CP_1251 | CP_1252 +data Encoding = UTF_8 | ISO_8859_1 | CP_1250 | CP_1251 | CP_1252 | CP_1254 deriving (Eq,Ord) data OutputFormat = FmtPGFPretty @@ -489,6 +489,7 @@ encodings = ("cp1250", CP_1250), ("cp1251", CP_1251), ("cp1252", CP_1252), + ("cp1254", CP_1254), ("latin1", ISO_8859_1) ] diff --git a/src/compiler/GF/Text/CP1254.hs b/src/compiler/GF/Text/CP1254.hs new file mode 100644 index 000000000..488359d70 --- /dev/null +++ b/src/compiler/GF/Text/CP1254.hs @@ -0,0 +1,84 @@ +----------------------------------------------------------------------------- +-- | +-- Module : GF.Text.CP1254 +-- Maintainer : Krasimir Angelov +-- +-- cp1254 is a code page used under Microsoft Windows to write Turkish. +-- Characters with codepoints A0 through FF are compatible with ISO 8859-9. +-- +----------------------------------------------------------------------------- + +module GF.Text.CP1254 where + +import Data.Char + +decodeCP1254 = map convert where + convert c + | c == '\x80' = chr 0x20AC + | c == '\x82' = chr 0x201A + | c == '\x83' = chr 0x192 + | c == '\x84' = chr 0x201E + | c == '\x85' = chr 0x2026 + | c == '\x86' = chr 0x2020 + | c == '\x87' = chr 0x2021 + | c == '\x88' = chr 0x2C6 + | c == '\x89' = chr 0x2030 + | c == '\x8A' = chr 0x160 + | c == '\x8B' = chr 0x2039 + | c == '\x8C' = chr 0x152 + | c == '\x91' = chr 0x2018 + | c == '\x92' = chr 0x2019 + | c == '\x93' = chr 0x201C + | c == '\x94' = chr 0x201D + | c == '\x95' = chr 0x2022 + | c == '\x96' = chr 0x2013 + | c == '\x97' = chr 0x2014 + | c == '\x98' = chr 0x2DC + | c == '\x99' = chr 0x2122 + | c == '\x9A' = chr 0x161 + | c == '\x9B' = chr 0x203A + | c == '\x9C' = chr 0x153 + | c == '\x9F' = chr 0x178 + | c == '\xD0' = chr 0x11E + | c == '\xDD' = chr 0x130 + | c == '\xDE' = chr 0x15E + | c == '\xF0' = chr 0x11F + | c == '\xFD' = chr 0x131 + | c == '\xFE' = chr 0x15F + | otherwise = c + +encodeCP1254 = map convert where + convert c + | oc == 0x20AC = '\x80' + | oc == 0x201A = '\x82' + | oc == 0x192 = '\x83' + | oc == 0x201E = '\x84' + | oc == 0x2026 = '\x85' + | oc == 0x2020 = '\x86' + | oc == 0x2021 = '\x87' + | oc == 0x2C6 = '\x88' + | oc == 0x2030 = '\x89' + | oc == 0x160 = '\x8A' + | oc == 0x2039 = '\x8B' + | oc == 0x152 = '\x8C' + | oc == 0x2018 = '\x91' + | oc == 0x2019 = '\x92' + | oc == 0x201C = '\x93' + | oc == 0x201D = '\x94' + | oc == 0x2022 = '\x95' + | oc == 0x2013 = '\x96' + | oc == 0x2014 = '\x97' + | oc == 0x2DC = '\x98' + | oc == 0x2122 = '\x99' + | oc == 0x161 = '\x9A' + | oc == 0x203A = '\x9B' + | oc == 0x153 = '\x9C' + | oc == 0x178 = '\x9F' + | oc == 0x11E = '\xD0' + | oc == 0x130 = '\xDD' + | oc == 0x15E = '\xDE' + | oc == 0x11F = '\xF0' + | oc == 0x131 = '\xFD' + | oc == 0x15F = '\xFE' + | otherwise = c + where oc = ord c diff --git a/src/compiler/GF/Text/Coding.hs b/src/compiler/GF/Text/Coding.hs index e3cd7b0ea..3481b278d 100644 --- a/src/compiler/GF/Text/Coding.hs +++ b/src/compiler/GF/Text/Coding.hs @@ -5,12 +5,14 @@ import GF.Text.UTF8 import GF.Text.CP1250 import GF.Text.CP1251 import GF.Text.CP1252 +import GF.Text.CP1254 encodeUnicode e = case e of UTF_8 -> encodeUTF8 CP_1250 -> encodeCP1250 CP_1251 -> encodeCP1251 CP_1252 -> encodeCP1252 + CP_1254 -> encodeCP1254 _ -> id decodeUnicode e = case e of @@ -18,4 +20,5 @@ decodeUnicode e = case e of CP_1250 -> decodeCP1250 CP_1251 -> decodeCP1251 CP_1252 -> decodeCP1252 + CP_1254 -> decodeCP1254 _ -> id