mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-24 02:12:50 -06:00
added codepage for Turkish
This commit is contained in:
@@ -77,7 +77,7 @@ data Verbosity = Quiet | Normal | Verbose | Debug
|
|||||||
data Phase = Preproc | Convert | Compile | Link
|
data Phase = Preproc | Convert | Compile | Link
|
||||||
deriving (Show,Eq,Ord)
|
deriving (Show,Eq,Ord)
|
||||||
|
|
||||||
data Encoding = UTF_8 | ISO_8859_1 | CP_1250 | CP_1251 | CP_1252
|
data Encoding = UTF_8 | ISO_8859_1 | CP_1250 | CP_1251 | CP_1252 | CP_1254
|
||||||
deriving (Eq,Ord)
|
deriving (Eq,Ord)
|
||||||
|
|
||||||
data OutputFormat = FmtPGFPretty
|
data OutputFormat = FmtPGFPretty
|
||||||
@@ -489,6 +489,7 @@ encodings =
|
|||||||
("cp1250", CP_1250),
|
("cp1250", CP_1250),
|
||||||
("cp1251", CP_1251),
|
("cp1251", CP_1251),
|
||||||
("cp1252", CP_1252),
|
("cp1252", CP_1252),
|
||||||
|
("cp1254", CP_1254),
|
||||||
("latin1", ISO_8859_1)
|
("latin1", ISO_8859_1)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
84
src/compiler/GF/Text/CP1254.hs
Normal file
84
src/compiler/GF/Text/CP1254.hs
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
-----------------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : GF.Text.CP1254
|
||||||
|
-- Maintainer : Krasimir Angelov
|
||||||
|
--
|
||||||
|
-- cp1254 is a code page used under Microsoft Windows to write Turkish.
|
||||||
|
-- Characters with codepoints A0 through FF are compatible with ISO 8859-9.
|
||||||
|
--
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
module GF.Text.CP1254 where
|
||||||
|
|
||||||
|
import Data.Char
|
||||||
|
|
||||||
|
decodeCP1254 = map convert where
|
||||||
|
convert c
|
||||||
|
| c == '\x80' = chr 0x20AC
|
||||||
|
| c == '\x82' = chr 0x201A
|
||||||
|
| c == '\x83' = chr 0x192
|
||||||
|
| c == '\x84' = chr 0x201E
|
||||||
|
| c == '\x85' = chr 0x2026
|
||||||
|
| c == '\x86' = chr 0x2020
|
||||||
|
| c == '\x87' = chr 0x2021
|
||||||
|
| c == '\x88' = chr 0x2C6
|
||||||
|
| c == '\x89' = chr 0x2030
|
||||||
|
| c == '\x8A' = chr 0x160
|
||||||
|
| c == '\x8B' = chr 0x2039
|
||||||
|
| c == '\x8C' = chr 0x152
|
||||||
|
| c == '\x91' = chr 0x2018
|
||||||
|
| c == '\x92' = chr 0x2019
|
||||||
|
| c == '\x93' = chr 0x201C
|
||||||
|
| c == '\x94' = chr 0x201D
|
||||||
|
| c == '\x95' = chr 0x2022
|
||||||
|
| c == '\x96' = chr 0x2013
|
||||||
|
| c == '\x97' = chr 0x2014
|
||||||
|
| c == '\x98' = chr 0x2DC
|
||||||
|
| c == '\x99' = chr 0x2122
|
||||||
|
| c == '\x9A' = chr 0x161
|
||||||
|
| c == '\x9B' = chr 0x203A
|
||||||
|
| c == '\x9C' = chr 0x153
|
||||||
|
| c == '\x9F' = chr 0x178
|
||||||
|
| c == '\xD0' = chr 0x11E
|
||||||
|
| c == '\xDD' = chr 0x130
|
||||||
|
| c == '\xDE' = chr 0x15E
|
||||||
|
| c == '\xF0' = chr 0x11F
|
||||||
|
| c == '\xFD' = chr 0x131
|
||||||
|
| c == '\xFE' = chr 0x15F
|
||||||
|
| otherwise = c
|
||||||
|
|
||||||
|
encodeCP1254 = map convert where
|
||||||
|
convert c
|
||||||
|
| oc == 0x20AC = '\x80'
|
||||||
|
| oc == 0x201A = '\x82'
|
||||||
|
| oc == 0x192 = '\x83'
|
||||||
|
| oc == 0x201E = '\x84'
|
||||||
|
| oc == 0x2026 = '\x85'
|
||||||
|
| oc == 0x2020 = '\x86'
|
||||||
|
| oc == 0x2021 = '\x87'
|
||||||
|
| oc == 0x2C6 = '\x88'
|
||||||
|
| oc == 0x2030 = '\x89'
|
||||||
|
| oc == 0x160 = '\x8A'
|
||||||
|
| oc == 0x2039 = '\x8B'
|
||||||
|
| oc == 0x152 = '\x8C'
|
||||||
|
| oc == 0x2018 = '\x91'
|
||||||
|
| oc == 0x2019 = '\x92'
|
||||||
|
| oc == 0x201C = '\x93'
|
||||||
|
| oc == 0x201D = '\x94'
|
||||||
|
| oc == 0x2022 = '\x95'
|
||||||
|
| oc == 0x2013 = '\x96'
|
||||||
|
| oc == 0x2014 = '\x97'
|
||||||
|
| oc == 0x2DC = '\x98'
|
||||||
|
| oc == 0x2122 = '\x99'
|
||||||
|
| oc == 0x161 = '\x9A'
|
||||||
|
| oc == 0x203A = '\x9B'
|
||||||
|
| oc == 0x153 = '\x9C'
|
||||||
|
| oc == 0x178 = '\x9F'
|
||||||
|
| oc == 0x11E = '\xD0'
|
||||||
|
| oc == 0x130 = '\xDD'
|
||||||
|
| oc == 0x15E = '\xDE'
|
||||||
|
| oc == 0x11F = '\xF0'
|
||||||
|
| oc == 0x131 = '\xFD'
|
||||||
|
| oc == 0x15F = '\xFE'
|
||||||
|
| otherwise = c
|
||||||
|
where oc = ord c
|
||||||
@@ -5,12 +5,14 @@ import GF.Text.UTF8
|
|||||||
import GF.Text.CP1250
|
import GF.Text.CP1250
|
||||||
import GF.Text.CP1251
|
import GF.Text.CP1251
|
||||||
import GF.Text.CP1252
|
import GF.Text.CP1252
|
||||||
|
import GF.Text.CP1254
|
||||||
|
|
||||||
encodeUnicode e = case e of
|
encodeUnicode e = case e of
|
||||||
UTF_8 -> encodeUTF8
|
UTF_8 -> encodeUTF8
|
||||||
CP_1250 -> encodeCP1250
|
CP_1250 -> encodeCP1250
|
||||||
CP_1251 -> encodeCP1251
|
CP_1251 -> encodeCP1251
|
||||||
CP_1252 -> encodeCP1252
|
CP_1252 -> encodeCP1252
|
||||||
|
CP_1254 -> encodeCP1254
|
||||||
_ -> id
|
_ -> id
|
||||||
|
|
||||||
decodeUnicode e = case e of
|
decodeUnicode e = case e of
|
||||||
@@ -18,4 +20,5 @@ decodeUnicode e = case e of
|
|||||||
CP_1250 -> decodeCP1250
|
CP_1250 -> decodeCP1250
|
||||||
CP_1251 -> decodeCP1251
|
CP_1251 -> decodeCP1251
|
||||||
CP_1252 -> decodeCP1252
|
CP_1252 -> decodeCP1252
|
||||||
|
CP_1254 -> decodeCP1254
|
||||||
_ -> id
|
_ -> id
|
||||||
|
|||||||
Reference in New Issue
Block a user