1
0
forked from GitHub/gf-core

use the native unicode support from GHC 6.12

This commit is contained in:
krasimir
2010-04-19 09:38:36 +00:00
parent 4c757f4683
commit 0b6b30d4a8
23 changed files with 177 additions and 490 deletions

View File

@@ -1,24 +1,69 @@
module GF.Text.Coding where
import GF.Infra.Option
import GF.Text.UTF8
import GF.Text.CP1250
import GF.Text.CP1251
import GF.Text.CP1252
import GF.Text.CP1254
import qualified Data.ByteString as BS
import Data.ByteString.Internal
import GHC.IO
import GHC.IO.Buffer
import GHC.IO.Encoding
import GHC.IO.Exception
import Control.Monad
encodeUnicode e = case e of
UTF_8 -> encodeUTF8
CP_1250 -> encodeCP1250
CP_1251 -> encodeCP1251
CP_1252 -> encodeCP1252
CP_1254 -> encodeCP1254
_ -> id
encodeUnicode :: TextEncoding -> String -> ByteString
encodeUnicode enc s =
unsafePerformIO $ do
let len = length s
cbuf0 <- newCharBuffer (len*4) ReadBuffer
foldM (\i c -> writeCharBuf (bufRaw cbuf0) i c) 0 s
let cbuf = cbuf0{bufR=len}
case enc of
TextEncoding {mkTextEncoder=mk} -> do encoder <- mk
bss <- translate (encode encoder) cbuf
close encoder
return (BS.concat bss)
where
translate cod cbuf
| i < w = do bbuf <- newByteBuffer 128 WriteBuffer
(cbuf,bbuf) <- cod cbuf bbuf
if isEmptyBuffer bbuf
then ioe_invalidCharacter
else do let bs = PS (bufRaw bbuf) (bufL bbuf) (bufR bbuf-bufL bbuf)
bss <- translate cod cbuf
return (bs:bss)
| otherwise = return []
where
i = bufL cbuf
w = bufR cbuf
decodeUnicode e = case e of
UTF_8 -> decodeUTF8
CP_1250 -> decodeCP1250
CP_1251 -> decodeCP1251
CP_1252 -> decodeCP1252
CP_1254 -> decodeCP1254
_ -> id
decodeUnicode :: TextEncoding -> ByteString -> String
decodeUnicode enc (PS fptr l len) =
unsafePerformIO $ do
let bbuf = Buffer{bufRaw=fptr, bufState=ReadBuffer, bufSize=len, bufL=l, bufR=l+len}
cbuf <- newCharBuffer 128 WriteBuffer
case enc of
TextEncoding {mkTextDecoder=mk} -> do decoder <- mk
s <- translate (encode decoder) bbuf cbuf
close decoder
return s
where
translate cod bbuf cbuf
| i < w = do (bbuf,cbuf) <- cod bbuf cbuf
if isEmptyBuffer cbuf
then ioe_invalidCharacter
else unpack cod bbuf cbuf
| otherwise = return []
where
i = bufL bbuf
w = bufR bbuf
unpack cod bbuf cbuf
| i < w = do (c,i') <- readCharBuf (bufRaw cbuf) i
cs <- unpack cod bbuf cbuf{bufL=i'}
return (c:cs)
| otherwise = translate cod bbuf cbuf{bufL=0,bufR=0}
where
i = bufL cbuf
w = bufR cbuf
ioe_invalidCharacter = ioException
(IOError Nothing InvalidArgument ""
("invalid byte sequence for this encoding") Nothing Nothing)