forked from GitHub/gf-core
use the native unicode support from GHC 6.12
This commit is contained in:
@@ -1,24 +1,69 @@
|
||||
module GF.Text.Coding where
|
||||
|
||||
import GF.Infra.Option
|
||||
import GF.Text.UTF8
|
||||
import GF.Text.CP1250
|
||||
import GF.Text.CP1251
|
||||
import GF.Text.CP1252
|
||||
import GF.Text.CP1254
|
||||
import qualified Data.ByteString as BS
|
||||
import Data.ByteString.Internal
|
||||
import GHC.IO
|
||||
import GHC.IO.Buffer
|
||||
import GHC.IO.Encoding
|
||||
import GHC.IO.Exception
|
||||
import Control.Monad
|
||||
|
||||
encodeUnicode e = case e of
|
||||
UTF_8 -> encodeUTF8
|
||||
CP_1250 -> encodeCP1250
|
||||
CP_1251 -> encodeCP1251
|
||||
CP_1252 -> encodeCP1252
|
||||
CP_1254 -> encodeCP1254
|
||||
_ -> id
|
||||
encodeUnicode :: TextEncoding -> String -> ByteString
|
||||
encodeUnicode enc s =
|
||||
unsafePerformIO $ do
|
||||
let len = length s
|
||||
cbuf0 <- newCharBuffer (len*4) ReadBuffer
|
||||
foldM (\i c -> writeCharBuf (bufRaw cbuf0) i c) 0 s
|
||||
let cbuf = cbuf0{bufR=len}
|
||||
case enc of
|
||||
TextEncoding {mkTextEncoder=mk} -> do encoder <- mk
|
||||
bss <- translate (encode encoder) cbuf
|
||||
close encoder
|
||||
return (BS.concat bss)
|
||||
where
|
||||
translate cod cbuf
|
||||
| i < w = do bbuf <- newByteBuffer 128 WriteBuffer
|
||||
(cbuf,bbuf) <- cod cbuf bbuf
|
||||
if isEmptyBuffer bbuf
|
||||
then ioe_invalidCharacter
|
||||
else do let bs = PS (bufRaw bbuf) (bufL bbuf) (bufR bbuf-bufL bbuf)
|
||||
bss <- translate cod cbuf
|
||||
return (bs:bss)
|
||||
| otherwise = return []
|
||||
where
|
||||
i = bufL cbuf
|
||||
w = bufR cbuf
|
||||
|
||||
decodeUnicode e = case e of
|
||||
UTF_8 -> decodeUTF8
|
||||
CP_1250 -> decodeCP1250
|
||||
CP_1251 -> decodeCP1251
|
||||
CP_1252 -> decodeCP1252
|
||||
CP_1254 -> decodeCP1254
|
||||
_ -> id
|
||||
decodeUnicode :: TextEncoding -> ByteString -> String
|
||||
decodeUnicode enc (PS fptr l len) =
|
||||
unsafePerformIO $ do
|
||||
let bbuf = Buffer{bufRaw=fptr, bufState=ReadBuffer, bufSize=len, bufL=l, bufR=l+len}
|
||||
cbuf <- newCharBuffer 128 WriteBuffer
|
||||
case enc of
|
||||
TextEncoding {mkTextDecoder=mk} -> do decoder <- mk
|
||||
s <- translate (encode decoder) bbuf cbuf
|
||||
close decoder
|
||||
return s
|
||||
where
|
||||
translate cod bbuf cbuf
|
||||
| i < w = do (bbuf,cbuf) <- cod bbuf cbuf
|
||||
if isEmptyBuffer cbuf
|
||||
then ioe_invalidCharacter
|
||||
else unpack cod bbuf cbuf
|
||||
| otherwise = return []
|
||||
where
|
||||
i = bufL bbuf
|
||||
w = bufR bbuf
|
||||
|
||||
unpack cod bbuf cbuf
|
||||
| i < w = do (c,i') <- readCharBuf (bufRaw cbuf) i
|
||||
cs <- unpack cod bbuf cbuf{bufL=i'}
|
||||
return (c:cs)
|
||||
| otherwise = translate cod bbuf cbuf{bufL=0,bufR=0}
|
||||
where
|
||||
i = bufL cbuf
|
||||
w = bufR cbuf
|
||||
|
||||
ioe_invalidCharacter = ioException
|
||||
(IOError Nothing InvalidArgument ""
|
||||
("invalid byte sequence for this encoding") Nothing Nothing)
|
||||
|
||||
Reference in New Issue
Block a user