mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
815 lines
28 KiB
Haskell
815 lines
28 KiB
Haskell
{-# LANGUAGE CPP, FlexibleInstances, FlexibleContexts #-}
|
|
-----------------------------------------------------------------------------
|
|
-- |
|
|
-- Module : Data.Binary
|
|
-- Copyright : Lennart Kolmodin
|
|
-- License : BSD3-style (see LICENSE)
|
|
--
|
|
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
|
|
-- Stability : unstable
|
|
-- Portability : portable to Hugs and GHC. Requires the FFI and some flexible instances
|
|
--
|
|
-- Binary serialisation of Haskell values to and from lazy ByteStrings.
|
|
-- The Binary library provides methods for encoding Haskell values as
|
|
-- streams of bytes directly in memory. The resulting @ByteString@ can
|
|
-- then be written to disk, sent over the network, or futher processed
|
|
-- (for example, compressed with gzip).
|
|
--
|
|
-- The 'Binary' package is notable in that it provides both pure, and
|
|
-- high performance serialisation.
|
|
--
|
|
-- Values are always encoded in network order (big endian) form, and
|
|
-- encoded data should be portable across machine endianess, word size,
|
|
-- or compiler version. For example, data encoded using the Binary class
|
|
-- could be written from GHC, and read back in Hugs.
|
|
--
|
|
-----------------------------------------------------------------------------
|
|
|
|
module Data.Binary (
|
|
|
|
-- * The Binary class
|
|
Binary(..)
|
|
|
|
-- $example
|
|
|
|
-- * The Get and Put monads
|
|
, Get
|
|
, Put
|
|
|
|
-- * Useful helpers for writing instances
|
|
, putWord8
|
|
, getWord8
|
|
|
|
-- * Binary serialisation
|
|
, encode -- :: Binary a => a -> ByteString
|
|
, decode -- :: Binary a => ByteString -> a
|
|
|
|
-- * IO functions for serialisation
|
|
, encodeFile -- :: Binary a => FilePath -> a -> IO ()
|
|
, decodeFile -- :: Binary a => FilePath -> IO a
|
|
|
|
, encodeFile_ -- :: FilePath -> Put -> IO ()
|
|
, decodeFile_ -- :: FilePath -> Get a -> IO a
|
|
|
|
-- Lazy put and get
|
|
-- , lazyPut
|
|
-- , lazyGet
|
|
|
|
, module Data.Word -- useful
|
|
|
|
) where
|
|
|
|
#include "MachDeps.h"
|
|
|
|
import Data.Word
|
|
|
|
import Data.Binary.Put
|
|
import Data.Binary.Get
|
|
import Data.Binary.IEEE754 ( putFloat64be, getFloat64be)
|
|
import Control.Monad
|
|
import Control.Exception
|
|
import Foreign
|
|
import System.IO
|
|
|
|
import Data.ByteString.Lazy (ByteString)
|
|
import qualified Data.ByteString.Lazy as L
|
|
|
|
import Data.Char (chr,ord)
|
|
import Data.List (unfoldr)
|
|
|
|
-- And needed for the instances:
|
|
import qualified Data.ByteString as B
|
|
import qualified Data.Map as Map
|
|
import qualified Data.Set as Set
|
|
import qualified Data.IntMap as IntMap
|
|
import qualified Data.IntSet as IntSet
|
|
import qualified Data.Ratio as R
|
|
|
|
import qualified Data.Tree as T
|
|
|
|
import Data.Array.Unboxed
|
|
|
|
--
|
|
-- This isn't available in older Hugs or older GHC
|
|
--
|
|
#if __GLASGOW_HASKELL__ >= 606
|
|
import qualified Data.Sequence as Seq
|
|
import qualified Data.Foldable as Fold
|
|
#endif
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
-- | The @Binary@ class provides 'put' and 'get', methods to encode and
|
|
-- decode a Haskell value to a lazy ByteString. It mirrors the Read and
|
|
-- Show classes for textual representation of Haskell types, and is
|
|
-- suitable for serialising Haskell values to disk, over the network.
|
|
--
|
|
-- For parsing and generating simple external binary formats (e.g. C
|
|
-- structures), Binary may be used, but in general is not suitable
|
|
-- for complex protocols. Instead use the Put and Get primitives
|
|
-- directly.
|
|
--
|
|
-- Instances of Binary should satisfy the following property:
|
|
--
|
|
-- > decode . encode == id
|
|
--
|
|
-- That is, the 'get' and 'put' methods should be the inverse of each
|
|
-- other. A range of instances are provided for basic Haskell types.
|
|
--
|
|
class Binary t where
|
|
-- | Encode a value in the Put monad.
|
|
put :: t -> Put
|
|
-- | Decode a value in the Get monad
|
|
get :: Get t
|
|
|
|
-- $example
|
|
-- To serialise a custom type, an instance of Binary for that type is
|
|
-- required. For example, suppose we have a data structure:
|
|
--
|
|
-- > data Exp = IntE Int
|
|
-- > | OpE String Exp Exp
|
|
-- > deriving Show
|
|
--
|
|
-- We can encode values of this type into bytestrings using the
|
|
-- following instance, which proceeds by recursively breaking down the
|
|
-- structure to serialise:
|
|
--
|
|
-- > instance Binary Exp where
|
|
-- > put (IntE i) = do put (0 :: Word8)
|
|
-- > put i
|
|
-- > put (OpE s e1 e2) = do put (1 :: Word8)
|
|
-- > put s
|
|
-- > put e1
|
|
-- > put e2
|
|
-- >
|
|
-- > get = do t <- get :: Get Word8
|
|
-- > case t of
|
|
-- > 0 -> do i <- get
|
|
-- > return (IntE i)
|
|
-- > 1 -> do s <- get
|
|
-- > e1 <- get
|
|
-- > e2 <- get
|
|
-- > return (OpE s e1 e2)
|
|
--
|
|
-- Note how we write an initial tag byte to indicate each variant of the
|
|
-- data type.
|
|
--
|
|
-- We can simplify the writing of 'get' instances using monadic
|
|
-- combinators:
|
|
--
|
|
-- > get = do tag <- getWord8
|
|
-- > case tag of
|
|
-- > 0 -> liftM IntE get
|
|
-- > 1 -> liftM3 OpE get get get
|
|
--
|
|
-- The generation of Binary instances has been automated by a script
|
|
-- using Scrap Your Boilerplate generics. Use the script here:
|
|
-- <http://darcs.haskell.org/binary/tools/derive/BinaryDerive.hs>.
|
|
--
|
|
-- To derive the instance for a type, load this script into GHCi, and
|
|
-- bring your type into scope. Your type can then have its Binary
|
|
-- instances derived as follows:
|
|
--
|
|
-- > $ ghci -fglasgow-exts BinaryDerive.hs
|
|
-- > *BinaryDerive> :l Example.hs
|
|
-- > *Main> deriveM (undefined :: Drinks)
|
|
-- >
|
|
-- > instance Binary Main.Drinks where
|
|
-- > put (Beer a) = putWord8 0 >> put a
|
|
-- > put Coffee = putWord8 1
|
|
-- > put Tea = putWord8 2
|
|
-- > put EnergyDrink = putWord8 3
|
|
-- > put Water = putWord8 4
|
|
-- > put Wine = putWord8 5
|
|
-- > put Whisky = putWord8 6
|
|
-- > get = do
|
|
-- > tag_ <- getWord8
|
|
-- > case tag_ of
|
|
-- > 0 -> get >>= \a -> return (Beer a)
|
|
-- > 1 -> return Coffee
|
|
-- > 2 -> return Tea
|
|
-- > 3 -> return EnergyDrink
|
|
-- > 4 -> return Water
|
|
-- > 5 -> return Wine
|
|
-- > 6 -> return Whisky
|
|
-- >
|
|
--
|
|
-- To serialise this to a bytestring, we use 'encode', which packs the
|
|
-- data structure into a binary format, in a lazy bytestring
|
|
--
|
|
-- > > let e = OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
|
|
-- > > let v = encode e
|
|
--
|
|
-- Where 'v' is a binary encoded data structure. To reconstruct the
|
|
-- original data, we use 'decode'
|
|
--
|
|
-- > > decode v :: Exp
|
|
-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
|
|
--
|
|
-- The lazy ByteString that results from 'encode' can be written to
|
|
-- disk, and read from disk using Data.ByteString.Lazy IO functions,
|
|
-- such as hPutStr or writeFile:
|
|
--
|
|
-- > > writeFile "/tmp/exp.txt" (encode e)
|
|
--
|
|
-- And read back with:
|
|
--
|
|
-- > > readFile "/tmp/exp.txt" >>= return . decode :: IO Exp
|
|
-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
|
|
--
|
|
-- We can also directly serialise a value to and from a Handle, or a file:
|
|
--
|
|
-- > > v <- decodeFile "/tmp/exp.txt" :: IO Exp
|
|
-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
|
|
--
|
|
-- And write a value to disk
|
|
--
|
|
-- > > encodeFile "/tmp/a.txt" v
|
|
--
|
|
|
|
------------------------------------------------------------------------
|
|
-- Wrappers to run the underlying monad
|
|
|
|
-- | Encode a value using binary serialisation to a lazy ByteString.
|
|
--
|
|
encode :: Binary a => a -> ByteString
|
|
encode = runPut . put
|
|
{-# INLINE encode #-}
|
|
|
|
-- | Decode a value from a lazy ByteString, reconstructing the original structure.
|
|
--
|
|
decode :: Binary a => ByteString -> a
|
|
decode = runGet get
|
|
|
|
------------------------------------------------------------------------
|
|
-- Convenience IO operations
|
|
|
|
-- | Lazily serialise a value to a file
|
|
--
|
|
-- This is just a convenience function, it's defined simply as:
|
|
--
|
|
-- > encodeFile f = B.writeFile f . encode
|
|
--
|
|
-- So for example if you wanted to compress as well, you could use:
|
|
--
|
|
-- > B.writeFile f . compress . encode
|
|
--
|
|
encodeFile :: Binary a => FilePath -> a -> IO ()
|
|
encodeFile f v = L.writeFile f (encode v)
|
|
|
|
encodeFile_ :: FilePath -> Put -> IO ()
|
|
encodeFile_ f m = L.writeFile f (runPut m)
|
|
|
|
-- | Lazily reconstruct a value previously written to a file.
|
|
--
|
|
-- This is just a convenience function, it's defined simply as:
|
|
--
|
|
-- > decodeFile f = return . decode =<< B.readFile f
|
|
--
|
|
-- So for example if you wanted to decompress as well, you could use:
|
|
--
|
|
-- > return . decode . decompress =<< B.readFile f
|
|
--
|
|
decodeFile :: Binary a => FilePath -> IO a
|
|
decodeFile f = bracket (openBinaryFile f ReadMode) hClose $ \h -> do
|
|
s <- L.hGetContents h
|
|
evaluate $ runGet get s
|
|
|
|
decodeFile_ :: FilePath -> Get a -> IO a
|
|
decodeFile_ f m = bracket (openBinaryFile f ReadMode) hClose $ \h -> do
|
|
s <- L.hGetContents h
|
|
evaluate $ runGet m s
|
|
|
|
-- needs bytestring 0.9.1.x to work
|
|
|
|
------------------------------------------------------------------------
|
|
-- Lazy put and get
|
|
|
|
-- lazyPut :: (Binary a) => a -> Put
|
|
-- lazyPut a = put (encode a)
|
|
|
|
-- lazyGet :: (Binary a) => Get a
|
|
-- lazyGet = fmap decode get
|
|
|
|
------------------------------------------------------------------------
|
|
-- Simple instances
|
|
|
|
-- The () type need never be written to disk: values of singleton type
|
|
-- can be reconstructed from the type alone
|
|
instance Binary () where
|
|
put () = return ()
|
|
get = return ()
|
|
|
|
-- Bools are encoded as a byte in the range 0 .. 1
|
|
instance Binary Bool where
|
|
put = putWord8 . fromIntegral . fromEnum
|
|
get = liftM (toEnum . fromIntegral) getWord8
|
|
|
|
-- Values of type 'Ordering' are encoded as a byte in the range 0 .. 2
|
|
instance Binary Ordering where
|
|
put = putWord8 . fromIntegral . fromEnum
|
|
get = liftM (toEnum . fromIntegral) getWord8
|
|
|
|
------------------------------------------------------------------------
|
|
-- Words and Ints
|
|
|
|
-- Words8s are written as bytes
|
|
instance Binary Word8 where
|
|
put = putWord8
|
|
get = getWord8
|
|
|
|
-- Words16s are written as 2 bytes in big-endian (network) order
|
|
instance Binary Word16 where
|
|
put = putWord16be
|
|
get = getWord16be
|
|
|
|
-- Words32s are written as 4 bytes in big-endian (network) order
|
|
instance Binary Word32 where
|
|
put = putWord32be
|
|
get = getWord32be
|
|
|
|
-- Words64s are written as 8 bytes in big-endian (network) order
|
|
instance Binary Word64 where
|
|
put = putWord64be
|
|
get = getWord64be
|
|
|
|
-- Int8s are written as a single byte.
|
|
instance Binary Int8 where
|
|
put i = put (fromIntegral i :: Word8)
|
|
get = liftM fromIntegral (get :: Get Word8)
|
|
|
|
-- Int16s are written as a 2 bytes in big endian format
|
|
instance Binary Int16 where
|
|
put i = put (fromIntegral i :: Word16)
|
|
get = liftM fromIntegral (get :: Get Word16)
|
|
|
|
-- Int32s are written as a 4 bytes in big endian format
|
|
instance Binary Int32 where
|
|
put i = put (fromIntegral i :: Word32)
|
|
get = liftM fromIntegral (get :: Get Word32)
|
|
|
|
-- Int64s are written as a 8 bytes in big endian format
|
|
instance Binary Int64 where
|
|
put i = put (fromIntegral i :: Word64)
|
|
get = liftM fromIntegral (get :: Get Word64)
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
-- Words are written as sequence of bytes. The last bit of each
|
|
-- byte indicates whether there are more bytes to be read
|
|
instance Binary Word where
|
|
put i | i <= 0x7f = do put a
|
|
| i <= 0x3fff = do put (a .|. 0x80)
|
|
put b
|
|
| i <= 0x1fffff = do put (a .|. 0x80)
|
|
put (b .|. 0x80)
|
|
put c
|
|
| i <= 0xfffffff = do put (a .|. 0x80)
|
|
put (b .|. 0x80)
|
|
put (c .|. 0x80)
|
|
put d
|
|
-- -- #if WORD_SIZE_IN_BITS < 64
|
|
| otherwise = do put (a .|. 0x80)
|
|
put (b .|. 0x80)
|
|
put (c .|. 0x80)
|
|
put (d .|. 0x80)
|
|
put e
|
|
{-
|
|
-- Restricted to 32 bits even on 64-bit systems, so that negative
|
|
-- Ints are written as 5 bytes instead of 10 bytes (TH 2013-02-13)
|
|
--#else
|
|
| i <= 0x7ffffffff = do put (a .|. 0x80)
|
|
put (b .|. 0x80)
|
|
put (c .|. 0x80)
|
|
put (d .|. 0x80)
|
|
put e
|
|
| i <= 0x3ffffffffff = do put (a .|. 0x80)
|
|
put (b .|. 0x80)
|
|
put (c .|. 0x80)
|
|
put (d .|. 0x80)
|
|
put (e .|. 0x80)
|
|
put f
|
|
| i <= 0x1ffffffffffff = do put (a .|. 0x80)
|
|
put (b .|. 0x80)
|
|
put (c .|. 0x80)
|
|
put (d .|. 0x80)
|
|
put (e .|. 0x80)
|
|
put (f .|. 0x80)
|
|
put g
|
|
| i <= 0xffffffffffffff = do put (a .|. 0x80)
|
|
put (b .|. 0x80)
|
|
put (c .|. 0x80)
|
|
put (d .|. 0x80)
|
|
put (e .|. 0x80)
|
|
put (f .|. 0x80)
|
|
put (g .|. 0x80)
|
|
put h
|
|
| i <= 0xffffffffffffff = do put (a .|. 0x80)
|
|
put (b .|. 0x80)
|
|
put (c .|. 0x80)
|
|
put (d .|. 0x80)
|
|
put (e .|. 0x80)
|
|
put (f .|. 0x80)
|
|
put (g .|. 0x80)
|
|
put h
|
|
| i <= 0x7fffffffffffffff = do put (a .|. 0x80)
|
|
put (b .|. 0x80)
|
|
put (c .|. 0x80)
|
|
put (d .|. 0x80)
|
|
put (e .|. 0x80)
|
|
put (f .|. 0x80)
|
|
put (g .|. 0x80)
|
|
put (h .|. 0x80)
|
|
put j
|
|
| otherwise = do put (a .|. 0x80)
|
|
put (b .|. 0x80)
|
|
put (c .|. 0x80)
|
|
put (d .|. 0x80)
|
|
put (e .|. 0x80)
|
|
put (f .|. 0x80)
|
|
put (g .|. 0x80)
|
|
put (h .|. 0x80)
|
|
put (j .|. 0x80)
|
|
put k
|
|
-- #endif
|
|
-}
|
|
where
|
|
a = fromIntegral ( i .&. 0x7f) :: Word8
|
|
b = fromIntegral (shiftR i 7 .&. 0x7f) :: Word8
|
|
c = fromIntegral (shiftR i 14 .&. 0x7f) :: Word8
|
|
d = fromIntegral (shiftR i 21 .&. 0x7f) :: Word8
|
|
e = fromIntegral (shiftR i 28 .&. 0x7f) :: Word8
|
|
{-
|
|
f = fromIntegral (shiftR i 35 .&. 0x7f) :: Word8
|
|
g = fromIntegral (shiftR i 42 .&. 0x7f) :: Word8
|
|
h = fromIntegral (shiftR i 49 .&. 0x7f) :: Word8
|
|
j = fromIntegral (shiftR i 56 .&. 0x7f) :: Word8
|
|
k = fromIntegral (shiftR i 63 .&. 0x7f) :: Word8
|
|
-}
|
|
get = do i <- getWord8
|
|
(if i <= 0x7f
|
|
then return (fromIntegral i)
|
|
else do n <- get
|
|
return $ (n `shiftL` 7) .|. (fromIntegral (i .&. 0x7f)))
|
|
|
|
-- Int has the same representation as Word
|
|
instance Binary Int where
|
|
put i = put (fromIntegral i :: Word)
|
|
get = liftM toInt32 (get :: Get Word)
|
|
where
|
|
-- restrict to 32 bits (for PGF portability, TH 2013-02-13)
|
|
toInt32 w = fromIntegral (fromIntegral w::Int32)::Int
|
|
|
|
------------------------------------------------------------------------
|
|
--
|
|
-- Portable, and pretty efficient, serialisation of Integer
|
|
--
|
|
|
|
-- Fixed-size type for a subset of Integer
|
|
type SmallInt = Int32
|
|
|
|
-- Integers are encoded in two ways: if they fit inside a SmallInt,
|
|
-- they're written as a byte tag, and that value. If the Integer value
|
|
-- is too large to fit in a SmallInt, it is written as a byte array,
|
|
-- along with a sign and length field.
|
|
|
|
instance Binary Integer where
|
|
|
|
{-# INLINE put #-}
|
|
put n | n >= lo && n <= hi = do
|
|
putWord8 0
|
|
put (fromIntegral n :: SmallInt) -- fast path
|
|
where
|
|
lo = fromIntegral (minBound :: SmallInt) :: Integer
|
|
hi = fromIntegral (maxBound :: SmallInt) :: Integer
|
|
|
|
put n = do
|
|
putWord8 1
|
|
put sign
|
|
put (unroll (abs n)) -- unroll the bytes
|
|
where
|
|
sign = fromIntegral (signum n) :: Word8
|
|
|
|
{-# INLINE get #-}
|
|
get = do
|
|
tag <- get :: Get Word8
|
|
case tag of
|
|
0 -> liftM fromIntegral (get :: Get SmallInt)
|
|
_ -> do sign <- get
|
|
bytes <- get
|
|
let v = roll bytes
|
|
return $! if sign == (1 :: Word8) then v else - v
|
|
|
|
--
|
|
-- Fold and unfold an Integer to and from a list of its bytes
|
|
--
|
|
unroll :: Integer -> [Word8]
|
|
unroll = unfoldr step
|
|
where
|
|
step 0 = Nothing
|
|
step i = Just (fromIntegral i, i `shiftR` 8)
|
|
|
|
roll :: [Word8] -> Integer
|
|
roll = foldr unstep 0
|
|
where
|
|
unstep b a = a `shiftL` 8 .|. fromIntegral b
|
|
|
|
{-
|
|
|
|
--
|
|
-- An efficient, raw serialisation for Integer (GHC only)
|
|
--
|
|
|
|
-- TODO This instance is not architecture portable. GMP stores numbers as
|
|
-- arrays of machine sized words, so the byte format is not portable across
|
|
-- architectures with different endianess and word size.
|
|
|
|
import Data.ByteString.Base (toForeignPtr,unsafePackAddress, memcpy)
|
|
import GHC.Base hiding (ord, chr)
|
|
import GHC.Prim
|
|
import GHC.Ptr (Ptr(..))
|
|
import GHC.IOBase (IO(..))
|
|
|
|
instance Binary Integer where
|
|
put (S# i) = putWord8 0 >> put (I# i)
|
|
put (J# s ba) = do
|
|
putWord8 1
|
|
put (I# s)
|
|
put (BA ba)
|
|
|
|
get = do
|
|
b <- getWord8
|
|
case b of
|
|
0 -> do (I# i#) <- get
|
|
return (S# i#)
|
|
_ -> do (I# s#) <- get
|
|
(BA a#) <- get
|
|
return (J# s# a#)
|
|
|
|
instance Binary ByteArray where
|
|
|
|
-- Pretty safe.
|
|
put (BA ba) =
|
|
let sz = sizeofByteArray# ba -- (primitive) in *bytes*
|
|
addr = byteArrayContents# ba
|
|
bs = unsafePackAddress (I# sz) addr
|
|
in put bs -- write as a ByteString. easy, yay!
|
|
|
|
-- Pretty scary. Should be quick though
|
|
get = do
|
|
(fp, off, n@(I# sz)) <- liftM toForeignPtr get -- so decode a ByteString
|
|
assert (off == 0) $ return $ unsafePerformIO $ do
|
|
(MBA arr) <- newByteArray sz -- and copy it into a ByteArray#
|
|
let to = byteArrayContents# (unsafeCoerce# arr) -- urk, is this safe?
|
|
withForeignPtr fp $ \from -> memcpy (Ptr to) from (fromIntegral n)
|
|
freezeByteArray arr
|
|
|
|
-- wrapper for ByteArray#
|
|
data ByteArray = BA {-# UNPACK #-} !ByteArray#
|
|
data MBA = MBA {-# UNPACK #-} !(MutableByteArray# RealWorld)
|
|
|
|
newByteArray :: Int# -> IO MBA
|
|
newByteArray sz = IO $ \s ->
|
|
case newPinnedByteArray# sz s of { (# s', arr #) ->
|
|
(# s', MBA arr #) }
|
|
|
|
freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
|
|
freezeByteArray arr = IO $ \s ->
|
|
case unsafeFreezeByteArray# arr s of { (# s', arr' #) ->
|
|
(# s', BA arr' #) }
|
|
|
|
-}
|
|
|
|
instance (Binary a,Integral a) => Binary (R.Ratio a) where
|
|
put r = put (R.numerator r) >> put (R.denominator r)
|
|
get = liftM2 (R.%) get get
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
-- Char is serialised as UTF-8
|
|
instance Binary Char where
|
|
put a | c <= 0x7f = put (fromIntegral c :: Word8)
|
|
| c <= 0x7ff = do put (0xc0 .|. y)
|
|
put (0x80 .|. z)
|
|
| c <= 0xffff = do put (0xe0 .|. x)
|
|
put (0x80 .|. y)
|
|
put (0x80 .|. z)
|
|
| c <= 0x10ffff = do put (0xf0 .|. w)
|
|
put (0x80 .|. x)
|
|
put (0x80 .|. y)
|
|
put (0x80 .|. z)
|
|
| otherwise = error "Not a valid Unicode code point"
|
|
where
|
|
c = ord a
|
|
z, y, x, w :: Word8
|
|
z = fromIntegral (c .&. 0x3f)
|
|
y = fromIntegral (shiftR c 6 .&. 0x3f)
|
|
x = fromIntegral (shiftR c 12 .&. 0x3f)
|
|
w = fromIntegral (shiftR c 18 .&. 0x7)
|
|
|
|
get = do
|
|
let getByte = liftM (fromIntegral :: Word8 -> Int) get
|
|
shiftL6 = flip shiftL 6 :: Int -> Int
|
|
w <- getByte
|
|
r <- case () of
|
|
_ | w < 0x80 -> return w
|
|
| w < 0xe0 -> do
|
|
x <- liftM (xor 0x80) getByte
|
|
return (x .|. shiftL6 (xor 0xc0 w))
|
|
| w < 0xf0 -> do
|
|
x <- liftM (xor 0x80) getByte
|
|
y <- liftM (xor 0x80) getByte
|
|
return (y .|. shiftL6 (x .|. shiftL6
|
|
(xor 0xe0 w)))
|
|
| otherwise -> do
|
|
x <- liftM (xor 0x80) getByte
|
|
y <- liftM (xor 0x80) getByte
|
|
z <- liftM (xor 0x80) getByte
|
|
return (z .|. shiftL6 (y .|. shiftL6
|
|
(x .|. shiftL6 (xor 0xf0 w))))
|
|
return $! chr r
|
|
|
|
------------------------------------------------------------------------
|
|
-- Instances for the first few tuples
|
|
|
|
instance (Binary a, Binary b) => Binary (a,b) where
|
|
put (a,b) = put a >> put b
|
|
get = liftM2 (,) get get
|
|
|
|
instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
|
|
put (a,b,c) = put a >> put b >> put c
|
|
get = liftM3 (,,) get get get
|
|
|
|
instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
|
|
put (a,b,c,d) = put a >> put b >> put c >> put d
|
|
get = liftM4 (,,,) get get get get
|
|
|
|
instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where
|
|
put (a,b,c,d,e) = put a >> put b >> put c >> put d >> put e
|
|
get = liftM5 (,,,,) get get get get get
|
|
|
|
--
|
|
-- and now just recurse:
|
|
--
|
|
|
|
instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f)
|
|
=> Binary (a,b,c,d,e,f) where
|
|
put (a,b,c,d,e,f) = put (a,(b,c,d,e,f))
|
|
get = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
|
|
|
|
instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g)
|
|
=> Binary (a,b,c,d,e,f,g) where
|
|
put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
|
|
get = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
|
|
|
|
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
|
|
Binary f, Binary g, Binary h)
|
|
=> Binary (a,b,c,d,e,f,g,h) where
|
|
put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
|
|
get = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h)
|
|
|
|
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
|
|
Binary f, Binary g, Binary h, Binary i)
|
|
=> Binary (a,b,c,d,e,f,g,h,i) where
|
|
put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
|
|
get = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i)
|
|
|
|
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
|
|
Binary f, Binary g, Binary h, Binary i, Binary j)
|
|
=> Binary (a,b,c,d,e,f,g,h,i,j) where
|
|
put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
|
|
get = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j)
|
|
|
|
------------------------------------------------------------------------
|
|
-- Container types
|
|
|
|
instance Binary a => Binary [a] where
|
|
put l = put (length l) >> mapM_ put l
|
|
get = do n <- get :: Get Int
|
|
xs <- replicateM n get
|
|
return xs
|
|
|
|
instance (Binary a) => Binary (Maybe a) where
|
|
put Nothing = putWord8 0
|
|
put (Just x) = putWord8 1 >> put x
|
|
get = do
|
|
w <- getWord8
|
|
case w of
|
|
0 -> return Nothing
|
|
_ -> liftM Just get
|
|
|
|
instance (Binary a, Binary b) => Binary (Either a b) where
|
|
put (Left a) = putWord8 0 >> put a
|
|
put (Right b) = putWord8 1 >> put b
|
|
get = do
|
|
w <- getWord8
|
|
case w of
|
|
0 -> liftM Left get
|
|
_ -> liftM Right get
|
|
|
|
------------------------------------------------------------------------
|
|
-- ByteStrings (have specially efficient instances)
|
|
|
|
instance Binary B.ByteString where
|
|
put bs = do put (B.length bs)
|
|
putByteString bs
|
|
get = get >>= getByteString
|
|
|
|
--
|
|
-- Using old versions of fps, this is a type synonym, and non portable
|
|
--
|
|
-- Requires 'flexible instances'
|
|
--
|
|
instance Binary ByteString where
|
|
put bs = do put (fromIntegral (L.length bs) :: Int)
|
|
putLazyByteString bs
|
|
get = get >>= getLazyByteString
|
|
|
|
------------------------------------------------------------------------
|
|
-- Maps and Sets
|
|
|
|
instance (Ord a, Binary a) => Binary (Set.Set a) where
|
|
put s = put (Set.size s) >> mapM_ put (Set.toAscList s)
|
|
get = liftM Set.fromDistinctAscList get
|
|
|
|
instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where
|
|
put m = put (Map.size m) >> mapM_ put (Map.toAscList m)
|
|
get = liftM Map.fromDistinctAscList get
|
|
|
|
instance Binary IntSet.IntSet where
|
|
put s = put (IntSet.size s) >> mapM_ put (IntSet.toAscList s)
|
|
get = liftM IntSet.fromDistinctAscList get
|
|
|
|
instance (Binary e) => Binary (IntMap.IntMap e) where
|
|
put m = put (IntMap.size m) >> mapM_ put (IntMap.toAscList m)
|
|
get = liftM IntMap.fromDistinctAscList get
|
|
|
|
------------------------------------------------------------------------
|
|
-- Queues and Sequences
|
|
|
|
#if __GLASGOW_HASKELL__ >= 606
|
|
--
|
|
-- This is valid Hugs, but you need the most recent Hugs
|
|
--
|
|
|
|
instance (Binary e) => Binary (Seq.Seq e) where
|
|
put s = put (Seq.length s) >> Fold.mapM_ put s
|
|
get = do n <- get :: Get Int
|
|
rep Seq.empty n get
|
|
where rep xs 0 _ = return $! xs
|
|
rep xs n g = xs `seq` n `seq` do
|
|
x <- g
|
|
rep (xs Seq.|> x) (n-1) g
|
|
|
|
#endif
|
|
|
|
------------------------------------------------------------------------
|
|
-- Floating point
|
|
|
|
-- instance Binary Double where
|
|
-- put d = put (decodeFloat d)
|
|
-- get = liftM2 encodeFloat get get
|
|
|
|
instance Binary Double where
|
|
put = putFloat64be
|
|
get = getFloat64be
|
|
|
|
instance Binary Float where
|
|
put f = put (decodeFloat f)
|
|
get = liftM2 encodeFloat get get
|
|
|
|
------------------------------------------------------------------------
|
|
-- Trees
|
|
|
|
instance (Binary e) => Binary (T.Tree e) where
|
|
put (T.Node r s) = put r >> put s
|
|
get = liftM2 T.Node get get
|
|
|
|
------------------------------------------------------------------------
|
|
-- Arrays
|
|
|
|
instance (Binary i, Ix i, Binary e) => Binary (Array i e) where
|
|
put a = do
|
|
put (bounds a)
|
|
put (rangeSize $ bounds a) -- write the length
|
|
mapM_ put (elems a) -- now the elems.
|
|
get = do
|
|
bs <- get
|
|
n <- get -- read the length
|
|
xs <- replicateM n get -- now the elems.
|
|
return (listArray bs xs)
|
|
|
|
--
|
|
-- The IArray UArray e constraint is non portable. Requires flexible instances
|
|
--
|
|
instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where
|
|
put a = do
|
|
put (bounds a)
|
|
put (rangeSize $ bounds a) -- now write the length
|
|
mapM_ put (elems a)
|
|
get = do
|
|
bs <- get
|
|
n <- get
|
|
xs <- replicateM n get
|
|
return (listArray bs xs)
|