Add a cabal flag to use the standard binary package

The standard binary package has improved efficiency and error handling [1], so in the long run we should consider switching to it. At the moment, using it is possible but not recommended, since it results in incomatible PGF files. The modified modules from the binary package have been moved from src/runtime/haskell to src/binary. [1] http://lennartkolmodin.blogspot.se/2013/03/binary-07.html
2026-04-12 14:29:31 -06:00 · 2013-10-31 15:43:12 +00:00
parent a7a1563b79
commit 83a10ce25a
7 changed files with 61 additions and 21 deletions
--- a/src/binary/Data/Binary.hs
+++ b/src/binary/Data/Binary.hs
@@ -0,0 +1,814 @@
+{-# LANGUAGE CPP, FlexibleInstances, FlexibleContexts #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      : Data.Binary
+-- Copyright   : Lennart Kolmodin
+-- License     : BSD3-style (see LICENSE)
+-- 
+-- Maintainer  : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
+-- Stability   : unstable
+-- Portability : portable to Hugs and GHC. Requires the FFI and some flexible instances
+--
+-- Binary serialisation of Haskell values to and from lazy ByteStrings.
+-- The Binary library provides methods for encoding Haskell values as
+-- streams of bytes directly in memory. The resulting @ByteString@ can
+-- then be written to disk, sent over the network, or futher processed
+-- (for example, compressed with gzip).
+--
+-- The 'Binary' package is notable in that it provides both pure, and
+-- high performance serialisation.
+--
+-- Values are always encoded in network order (big endian) form, and
+-- encoded data should be portable across machine endianess, word size,
+-- or compiler version. For example, data encoded using the Binary class
+-- could be written from GHC, and read back in Hugs.
+--
+-----------------------------------------------------------------------------
+
+module Data.Binary (
+
+    -- * The Binary class
+      Binary(..)
+
+    -- $example
+
+    -- * The Get and Put monads
+    , Get
+    , Put
+
+    -- * Useful helpers for writing instances
+    , putWord8
+    , getWord8
+
+    -- * Binary serialisation
+    , encode                    -- :: Binary a => a -> ByteString
+    , decode                    -- :: Binary a => ByteString -> a
+
+    -- * IO functions for serialisation
+    , encodeFile                -- :: Binary a => FilePath -> a -> IO ()
+    , decodeFile                -- :: Binary a => FilePath -> IO a
+
+    , encodeFile_               -- :: FilePath -> Put   -> IO ()
+    , decodeFile_               -- :: FilePath -> Get a -> IO a
+
+-- Lazy put and get
+--  , lazyPut
+--  , lazyGet
+
+    , module Data.Word -- useful
+
+    ) where
+
+#include "MachDeps.h"
+
+import Data.Word
+
+import Data.Binary.Put
+import Data.Binary.Get
+import Data.Binary.IEEE754 ( putFloat64be, getFloat64be)
+import Control.Monad
+import Control.Exception
+import Foreign
+import System.IO
+
+import Data.ByteString.Lazy (ByteString)
+import qualified Data.ByteString.Lazy as L
+
+import Data.Char    (chr,ord)
+import Data.List    (unfoldr)
+
+-- And needed for the instances:
+import qualified Data.ByteString as B
+import qualified Data.Map        as Map
+import qualified Data.Set        as Set
+import qualified Data.IntMap     as IntMap
+import qualified Data.IntSet     as IntSet
+import qualified Data.Ratio      as R
+
+import qualified Data.Tree as T
+
+import Data.Array.Unboxed
+
+--
+-- This isn't available in older Hugs or older GHC
+--
+#if __GLASGOW_HASKELL__ >= 606
+import qualified Data.Sequence as Seq
+import qualified Data.Foldable as Fold
+#endif
+
+------------------------------------------------------------------------
+
+-- | The @Binary@ class provides 'put' and 'get', methods to encode and
+-- decode a Haskell value to a lazy ByteString. It mirrors the Read and
+-- Show classes for textual representation of Haskell types, and is
+-- suitable for serialising Haskell values to disk, over the network.
+--
+-- For parsing and generating simple external binary formats (e.g. C
+-- structures), Binary may be used, but in general is not suitable
+-- for complex protocols. Instead use the Put and Get primitives
+-- directly.
+--
+-- Instances of Binary should satisfy the following property:
+--
+-- > decode . encode == id
+--
+-- That is, the 'get' and 'put' methods should be the inverse of each
+-- other. A range of instances are provided for basic Haskell types. 
+--
+class Binary t where
+    -- | Encode a value in the Put monad.
+    put :: t -> Put
+    -- | Decode a value in the Get monad
+    get :: Get t
+
+-- $example
+-- To serialise a custom type, an instance of Binary for that type is
+-- required. For example, suppose we have a data structure:
+--
+-- > data Exp = IntE Int
+-- >          | OpE  String Exp Exp
+-- >    deriving Show
+--
+-- We can encode values of this type into bytestrings using the
+-- following instance, which proceeds by recursively breaking down the
+-- structure to serialise:
+--
+-- > instance Binary Exp where
+-- >       put (IntE i)          = do put (0 :: Word8)
+-- >                                  put i
+-- >       put (OpE s e1 e2)     = do put (1 :: Word8)
+-- >                                  put s
+-- >                                  put e1
+-- >                                  put e2
+-- > 
+-- >       get = do t <- get :: Get Word8
+-- >                case t of
+-- >                     0 -> do i <- get
+-- >                             return (IntE i)
+-- >                     1 -> do s  <- get
+-- >                             e1 <- get
+-- >                             e2 <- get
+-- >                             return (OpE s e1 e2)
+--
+-- Note how we write an initial tag byte to indicate each variant of the
+-- data type.
+--
+-- We can simplify the writing of 'get' instances using monadic
+-- combinators:
+-- 
+-- >       get = do tag <- getWord8
+-- >                case tag of
+-- >                    0 -> liftM  IntE get
+-- >                    1 -> liftM3 OpE  get get get
+--
+-- The generation of Binary instances has been automated by a script
+-- using Scrap Your Boilerplate generics. Use the script here:
+--  <http://darcs.haskell.org/binary/tools/derive/BinaryDerive.hs>.
+--
+-- To derive the instance for a type, load this script into GHCi, and
+-- bring your type into scope. Your type can then have its Binary
+-- instances derived as follows:
+--
+-- > $ ghci -fglasgow-exts BinaryDerive.hs
+-- > *BinaryDerive> :l Example.hs
+-- > *Main> deriveM (undefined :: Drinks)
+-- >
+-- > instance Binary Main.Drinks where
+-- >      put (Beer a) = putWord8 0 >> put a
+-- >      put Coffee = putWord8 1
+-- >      put Tea = putWord8 2
+-- >      put EnergyDrink = putWord8 3
+-- >      put Water = putWord8 4
+-- >      put Wine = putWord8 5
+-- >      put Whisky = putWord8 6
+-- >      get = do
+-- >        tag_ <- getWord8
+-- >        case tag_ of
+-- >          0 -> get >>= \a -> return (Beer a)
+-- >          1 -> return Coffee
+-- >          2 -> return Tea
+-- >          3 -> return EnergyDrink
+-- >          4 -> return Water
+-- >          5 -> return Wine
+-- >          6 -> return Whisky
+-- >
+--
+-- To serialise this to a bytestring, we use 'encode', which packs the
+-- data structure into a binary format, in a lazy bytestring
+--
+-- > > let e = OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
+-- > > let v = encode e
+--
+-- Where 'v' is a binary encoded data structure. To reconstruct the
+-- original data, we use 'decode'
+--
+-- > > decode v :: Exp
+-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
+--
+-- The lazy ByteString that results from 'encode' can be written to
+-- disk, and read from disk using Data.ByteString.Lazy IO functions,
+-- such as hPutStr or writeFile:
+--
+-- > > writeFile "/tmp/exp.txt" (encode e)
+--
+-- And read back with:
+--
+-- > > readFile "/tmp/exp.txt" >>= return . decode :: IO Exp
+-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
+--
+-- We can also directly serialise a value to and from a Handle, or a file:
+-- 
+-- > > v <- decodeFile  "/tmp/exp.txt" :: IO Exp
+-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
+--
+-- And write a value to disk
+--
+-- > > encodeFile "/tmp/a.txt" v
+--
+
+------------------------------------------------------------------------
+-- Wrappers to run the underlying monad
+
+-- | Encode a value using binary serialisation to a lazy ByteString.
+--
+encode :: Binary a => a -> ByteString
+encode = runPut . put
+{-# INLINE encode #-}
+
+-- | Decode a value from a lazy ByteString, reconstructing the original structure.
+--
+decode :: Binary a => ByteString -> a
+decode = runGet get
+
+------------------------------------------------------------------------
+-- Convenience IO operations
+
+-- | Lazily serialise a value to a file
+--
+-- This is just a convenience function, it's defined simply as:
+--
+-- > encodeFile f = B.writeFile f . encode
+--
+-- So for example if you wanted to compress as well, you could use:
+--
+-- > B.writeFile f . compress . encode
+--
+encodeFile :: Binary a => FilePath -> a -> IO ()
+encodeFile f v = L.writeFile f (encode v)
+
+encodeFile_ :: FilePath -> Put -> IO ()
+encodeFile_ f m = L.writeFile f (runPut m)
+
+-- | Lazily reconstruct a value previously written to a file.
+--
+-- This is just a convenience function, it's defined simply as:
+--
+-- > decodeFile f = return . decode =<< B.readFile f
+--
+-- So for example if you wanted to decompress as well, you could use:
+--
+-- > return . decode . decompress =<< B.readFile f
+--
+decodeFile :: Binary a => FilePath -> IO a
+decodeFile f = bracket (openBinaryFile f ReadMode) hClose $ \h -> do
+    s <- L.hGetContents h
+    evaluate $ runGet get s
+
+decodeFile_ :: FilePath -> Get a -> IO a
+decodeFile_ f m = bracket (openBinaryFile f ReadMode) hClose $ \h -> do
+    s <- L.hGetContents h
+    evaluate $ runGet m s
+
+-- needs bytestring 0.9.1.x to work 
+
+------------------------------------------------------------------------
+-- Lazy put and get
+
+-- lazyPut :: (Binary a) => a -> Put
+-- lazyPut a = put (encode a)
+
+-- lazyGet :: (Binary a) => Get a
+-- lazyGet = fmap decode get
+
+------------------------------------------------------------------------
+-- Simple instances
+
+-- The () type need never be written to disk: values of singleton type
+-- can be reconstructed from the type alone
+instance Binary () where
+    put ()  = return ()
+    get     = return ()
+
+-- Bools are encoded as a byte in the range 0 .. 1
+instance Binary Bool where
+    put     = putWord8 . fromIntegral . fromEnum
+    get     = liftM (toEnum . fromIntegral) getWord8
+
+-- Values of type 'Ordering' are encoded as a byte in the range 0 .. 2
+instance Binary Ordering where
+    put     = putWord8 . fromIntegral . fromEnum
+    get     = liftM (toEnum . fromIntegral) getWord8
+
+------------------------------------------------------------------------
+-- Words and Ints
+
+-- Words8s are written as bytes
+instance Binary Word8 where
+    put     = putWord8
+    get     = getWord8
+
+-- Words16s are written as 2 bytes in big-endian (network) order
+instance Binary Word16 where
+    put     = putWord16be
+    get     = getWord16be
+
+-- Words32s are written as 4 bytes in big-endian (network) order
+instance Binary Word32 where
+    put     = putWord32be
+    get     = getWord32be
+
+-- Words64s are written as 8 bytes in big-endian (network) order
+instance Binary Word64 where
+    put     = putWord64be
+    get     = getWord64be
+
+-- Int8s are written as a single byte.
+instance Binary Int8 where
+    put i   = put (fromIntegral i :: Word8)
+    get     = liftM fromIntegral (get :: Get Word8)
+
+-- Int16s are written as a 2 bytes in big endian format
+instance Binary Int16 where
+    put i   = put (fromIntegral i :: Word16)
+    get     = liftM fromIntegral (get :: Get Word16)
+
+-- Int32s are written as a 4 bytes in big endian format
+instance Binary Int32 where
+    put i   = put (fromIntegral i :: Word32)
+    get     = liftM fromIntegral (get :: Get Word32)
+
+-- Int64s are written as a 8 bytes in big endian format
+instance Binary Int64 where
+    put i   = put (fromIntegral i :: Word64)
+    get     = liftM fromIntegral (get :: Get Word64)
+
+------------------------------------------------------------------------
+
+-- Words are written as sequence of bytes. The last bit of each
+-- byte indicates whether there are more bytes to be read
+instance Binary Word where
+    put i | i <=               0x7f = do put  a
+          | i <=             0x3fff = do put (a .|. 0x80)
+                                         put  b
+          | i <=           0x1fffff = do put (a .|. 0x80)
+                                         put (b .|. 0x80)
+                                         put  c
+          | i <=          0xfffffff = do put (a .|. 0x80)
+                                         put (b .|. 0x80)
+                                         put (c .|. 0x80)
+                                         put  d
+-- -- #if WORD_SIZE_IN_BITS < 64
+          | otherwise               = do put (a .|. 0x80)
+                                         put (b .|. 0x80)
+                                         put (c .|. 0x80)
+                                         put (d .|. 0x80)
+                                         put  e
+{-
+-- Restricted to 32 bits even on 64-bit systems, so that negative
+-- Ints are written as 5 bytes instead of 10 bytes (TH 2013-02-13)
+--#else
+          | i <=        0x7ffffffff = do put (a .|. 0x80)
+                                         put (b .|. 0x80)
+                                         put (c .|. 0x80)
+                                         put (d .|. 0x80)
+                                         put  e
+          | i <=      0x3ffffffffff = do put (a .|. 0x80)
+                                         put (b .|. 0x80)
+                                         put (c .|. 0x80)
+                                         put (d .|. 0x80)
+                                         put (e .|. 0x80)
+                                         put  f
+          | i <=    0x1ffffffffffff = do put (a .|. 0x80)
+                                         put (b .|. 0x80)
+                                         put (c .|. 0x80)
+                                         put (d .|. 0x80)
+                                         put (e .|. 0x80)
+                                         put (f .|. 0x80)
+                                         put  g
+          | i <=   0xffffffffffffff = do put (a .|. 0x80)
+                                         put (b .|. 0x80)
+                                         put (c .|. 0x80)
+                                         put (d .|. 0x80)
+                                         put (e .|. 0x80)
+                                         put (f .|. 0x80)
+                                         put (g .|. 0x80)
+                                         put  h
+          | i <=   0xffffffffffffff = do put (a .|. 0x80)
+                                         put (b .|. 0x80)
+                                         put (c .|. 0x80)
+                                         put (d .|. 0x80)
+                                         put (e .|. 0x80)
+                                         put (f .|. 0x80)
+                                         put (g .|. 0x80)
+                                         put  h
+          | i <= 0x7fffffffffffffff = do put (a .|. 0x80)
+                                         put (b .|. 0x80)
+                                         put (c .|. 0x80)
+                                         put (d .|. 0x80)
+                                         put (e .|. 0x80)
+                                         put (f .|. 0x80)
+                                         put (g .|. 0x80)
+                                         put (h .|. 0x80)
+                                         put  j
+          | otherwise               = do put (a .|. 0x80)
+                                         put (b .|. 0x80)
+                                         put (c .|. 0x80)
+                                         put (d .|. 0x80)
+                                         put (e .|. 0x80)
+                                         put (f .|. 0x80)
+                                         put (g .|. 0x80)
+                                         put (h .|. 0x80)
+                                         put (j .|. 0x80)
+                                         put  k
+-- #endif
+-}
+          where
+            a = fromIntegral (       i    .&. 0x7f) :: Word8
+            b = fromIntegral (shiftR i  7 .&. 0x7f) :: Word8
+            c = fromIntegral (shiftR i 14 .&. 0x7f) :: Word8
+            d = fromIntegral (shiftR i 21 .&. 0x7f) :: Word8
+            e = fromIntegral (shiftR i 28 .&. 0x7f) :: Word8
+{-
+            f = fromIntegral (shiftR i 35 .&. 0x7f) :: Word8
+            g = fromIntegral (shiftR i 42 .&. 0x7f) :: Word8
+            h = fromIntegral (shiftR i 49 .&. 0x7f) :: Word8
+            j = fromIntegral (shiftR i 56 .&. 0x7f) :: Word8
+            k = fromIntegral (shiftR i 63 .&. 0x7f) :: Word8
+-}
+    get = do i <- getWord8
+             (if i <= 0x7f
+                then return (fromIntegral i)
+                else do n <- get
+                        return $ (n `shiftL` 7) .|. (fromIntegral (i .&. 0x7f)))
+
+-- Int has the same representation as Word
+instance Binary Int where
+    put i   = put (fromIntegral i :: Word)
+    get     = liftM toInt32 (get :: Get Word)
+      where
+       -- restrict to 32 bits (for PGF portability, TH 2013-02-13)
+       toInt32 w = fromIntegral (fromIntegral w::Int32)::Int
+
+------------------------------------------------------------------------
+-- 
+-- Portable, and pretty efficient, serialisation of Integer
+--
+
+-- Fixed-size type for a subset of Integer
+type SmallInt = Int32
+
+-- Integers are encoded in two ways: if they fit inside a SmallInt,
+-- they're written as a byte tag, and that value.  If the Integer value
+-- is too large to fit in a SmallInt, it is written as a byte array,
+-- along with a sign and length field.
+
+instance Binary Integer where
+
+    {-# INLINE put #-}
+    put n | n >= lo && n <= hi = do
+        putWord8 0
+        put (fromIntegral n :: SmallInt)  -- fast path
+     where
+        lo = fromIntegral (minBound :: SmallInt) :: Integer
+        hi = fromIntegral (maxBound :: SmallInt) :: Integer
+
+    put n = do
+        putWord8 1
+        put sign
+        put (unroll (abs n))         -- unroll the bytes
+     where
+        sign = fromIntegral (signum n) :: Word8
+
+    {-# INLINE get #-}
+    get = do
+        tag <- get :: Get Word8
+        case tag of
+            0 -> liftM fromIntegral (get :: Get SmallInt)
+            _ -> do sign  <- get
+                    bytes <- get
+                    let v = roll bytes
+                    return $! if sign == (1 :: Word8) then v else - v
+
+--
+-- Fold and unfold an Integer to and from a list of its bytes
+--
+unroll :: Integer -> [Word8]
+unroll = unfoldr step
+  where
+    step 0 = Nothing
+    step i = Just (fromIntegral i, i `shiftR` 8)
+
+roll :: [Word8] -> Integer
+roll   = foldr unstep 0
+  where
+    unstep b a = a `shiftL` 8 .|. fromIntegral b
+
+{-
+
+--
+-- An efficient, raw serialisation for Integer (GHC only)
+--
+
+-- TODO  This instance is not architecture portable.  GMP stores numbers as
+-- arrays of machine sized words, so the byte format is not portable across
+-- architectures with different endianess and word size.
+
+import Data.ByteString.Base (toForeignPtr,unsafePackAddress, memcpy)
+import GHC.Base     hiding (ord, chr)
+import GHC.Prim
+import GHC.Ptr (Ptr(..))
+import GHC.IOBase (IO(..))
+
+instance Binary Integer where
+    put (S# i)    = putWord8 0 >> put (I# i)
+    put (J# s ba) = do
+        putWord8 1
+        put (I# s)
+        put (BA ba)
+
+    get = do
+        b <- getWord8
+        case b of
+            0 -> do (I# i#) <- get
+                    return (S# i#)
+            _ -> do (I# s#) <- get
+                    (BA a#) <- get
+                    return (J# s# a#)
+
+instance Binary ByteArray where
+
+    -- Pretty safe.
+    put (BA ba) =
+        let sz   = sizeofByteArray# ba   -- (primitive) in *bytes*
+            addr = byteArrayContents# ba
+            bs   = unsafePackAddress (I# sz) addr
+        in put bs   -- write as a ByteString. easy, yay!
+
+    -- Pretty scary. Should be quick though
+    get = do
+        (fp, off, n@(I# sz)) <- liftM toForeignPtr get      -- so decode a ByteString
+        assert (off == 0) $ return $ unsafePerformIO $ do
+            (MBA arr) <- newByteArray sz                    -- and copy it into a ByteArray#
+            let to = byteArrayContents# (unsafeCoerce# arr) -- urk, is this safe?
+            withForeignPtr fp $ \from -> memcpy (Ptr to) from (fromIntegral n)
+            freezeByteArray arr
+
+-- wrapper for ByteArray#
+data ByteArray = BA  {-# UNPACK #-} !ByteArray#
+data MBA       = MBA {-# UNPACK #-} !(MutableByteArray# RealWorld)
+
+newByteArray :: Int# -> IO MBA
+newByteArray sz = IO $ \s ->
+  case newPinnedByteArray# sz s of { (# s', arr #) ->
+  (# s', MBA arr #) }
+
+freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
+freezeByteArray arr = IO $ \s ->
+  case unsafeFreezeByteArray# arr s of { (# s', arr' #) ->
+  (# s', BA arr' #) }
+
+-}
+
+instance (Binary a,Integral a) => Binary (R.Ratio a) where
+    put r = put (R.numerator r) >> put (R.denominator r)
+    get = liftM2 (R.%) get get
+
+------------------------------------------------------------------------
+
+-- Char is serialised as UTF-8
+instance Binary Char where
+    put a | c <= 0x7f     = put (fromIntegral c :: Word8)
+          | c <= 0x7ff    = do put (0xc0 .|. y)
+                               put (0x80 .|. z)
+          | c <= 0xffff   = do put (0xe0 .|. x)
+                               put (0x80 .|. y)
+                               put (0x80 .|. z)
+          | c <= 0x10ffff = do put (0xf0 .|. w)
+                               put (0x80 .|. x)
+                               put (0x80 .|. y)
+                               put (0x80 .|. z)
+          | otherwise     = error "Not a valid Unicode code point"
+     where
+        c = ord a
+        z, y, x, w :: Word8
+        z = fromIntegral (c           .&. 0x3f)
+        y = fromIntegral (shiftR c 6  .&. 0x3f)
+        x = fromIntegral (shiftR c 12 .&. 0x3f)
+        w = fromIntegral (shiftR c 18 .&. 0x7)
+
+    get = do
+        let getByte = liftM (fromIntegral :: Word8 -> Int) get
+            shiftL6 = flip shiftL 6 :: Int -> Int
+        w <- getByte
+        r <- case () of
+                _ | w < 0x80  -> return w
+                  | w < 0xe0  -> do
+                                    x <- liftM (xor 0x80) getByte
+                                    return (x .|. shiftL6 (xor 0xc0 w))
+                  | w < 0xf0  -> do
+                                    x <- liftM (xor 0x80) getByte
+                                    y <- liftM (xor 0x80) getByte
+                                    return (y .|. shiftL6 (x .|. shiftL6
+                                            (xor 0xe0 w)))
+                  | otherwise -> do
+                                x <- liftM (xor 0x80) getByte
+                                y <- liftM (xor 0x80) getByte
+                                z <- liftM (xor 0x80) getByte
+                                return (z .|. shiftL6 (y .|. shiftL6
+                                        (x .|. shiftL6 (xor 0xf0 w))))
+        return $! chr r
+
+------------------------------------------------------------------------
+-- Instances for the first few tuples
+
+instance (Binary a, Binary b) => Binary (a,b) where
+    put (a,b)           = put a >> put b
+    get                 = liftM2 (,) get get
+
+instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
+    put (a,b,c)         = put a >> put b >> put c
+    get                 = liftM3 (,,) get get get
+
+instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
+    put (a,b,c,d)       = put a >> put b >> put c >> put d
+    get                 = liftM4 (,,,) get get get get
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where
+    put (a,b,c,d,e)     = put a >> put b >> put c >> put d >> put e
+    get                 = liftM5 (,,,,) get get get get get
+
+-- 
+-- and now just recurse:
+--
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f)
+        => Binary (a,b,c,d,e,f) where
+    put (a,b,c,d,e,f)   = put (a,(b,c,d,e,f))
+    get                 = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g)
+        => Binary (a,b,c,d,e,f,g) where
+    put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
+    get                 = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e,
+          Binary f, Binary g, Binary h)
+        => Binary (a,b,c,d,e,f,g,h) where
+    put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
+    get                   = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h)
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e,
+          Binary f, Binary g, Binary h, Binary i)
+        => Binary (a,b,c,d,e,f,g,h,i) where
+    put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
+    get                     = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i)
+
+instance (Binary a, Binary b, Binary c, Binary d, Binary e,
+          Binary f, Binary g, Binary h, Binary i, Binary j)
+        => Binary (a,b,c,d,e,f,g,h,i,j) where
+    put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
+    get                       = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j)
+
+------------------------------------------------------------------------
+-- Container types
+
+instance Binary a => Binary [a] where
+    put l  = put (length l) >> mapM_ put l
+    get    = do n <- get :: Get Int
+                xs <- replicateM n get
+                return xs
+
+instance (Binary a) => Binary (Maybe a) where
+    put Nothing  = putWord8 0
+    put (Just x) = putWord8 1 >> put x
+    get = do
+        w <- getWord8
+        case w of
+            0 -> return Nothing
+            _ -> liftM Just get
+
+instance (Binary a, Binary b) => Binary (Either a b) where
+    put (Left  a) = putWord8 0 >> put a
+    put (Right b) = putWord8 1 >> put b
+    get = do
+        w <- getWord8
+        case w of
+            0 -> liftM Left  get
+            _ -> liftM Right get
+
+------------------------------------------------------------------------
+-- ByteStrings (have specially efficient instances)
+
+instance Binary B.ByteString where
+    put bs = do put (B.length bs)
+                putByteString bs
+    get    = get >>= getByteString
+
+--
+-- Using old versions of fps, this is a type synonym, and non portable
+-- 
+-- Requires 'flexible instances'
+--
+instance Binary ByteString where
+    put bs = do put (fromIntegral (L.length bs) :: Int)
+                putLazyByteString bs
+    get    = get >>= getLazyByteString
+
+------------------------------------------------------------------------
+-- Maps and Sets
+
+instance (Ord a, Binary a) => Binary (Set.Set a) where
+    put s = put (Set.size s) >> mapM_ put (Set.toAscList s)
+    get   = liftM Set.fromDistinctAscList get
+
+instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where
+    put m = put (Map.size m) >> mapM_ put (Map.toAscList m)
+    get   = liftM Map.fromDistinctAscList get
+
+instance Binary IntSet.IntSet where
+    put s = put (IntSet.size s) >> mapM_ put (IntSet.toAscList s)
+    get   = liftM IntSet.fromDistinctAscList get
+
+instance (Binary e) => Binary (IntMap.IntMap e) where
+    put m = put (IntMap.size m) >> mapM_ put (IntMap.toAscList m)
+    get   = liftM IntMap.fromDistinctAscList get
+
+------------------------------------------------------------------------
+-- Queues and Sequences
+
+#if __GLASGOW_HASKELL__ >= 606
+--
+-- This is valid Hugs, but you need the most recent Hugs
+--
+
+instance (Binary e) => Binary (Seq.Seq e) where
+    put s = put (Seq.length s) >> Fold.mapM_ put s
+    get = do n <- get :: Get Int
+             rep Seq.empty n get
+      where rep xs 0 _ = return $! xs
+            rep xs n g = xs `seq` n `seq` do
+                           x <- g
+                           rep (xs Seq.|> x) (n-1) g
+
+#endif
+
+------------------------------------------------------------------------
+-- Floating point
+
+-- instance Binary Double where
+--     put d = put (decodeFloat d)
+--     get   = liftM2 encodeFloat get get
+
+instance Binary Double where
+    put = putFloat64be
+    get = getFloat64be
+
+instance Binary Float where
+    put f = put (decodeFloat f)
+    get   = liftM2 encodeFloat get get
+
+------------------------------------------------------------------------
+-- Trees
+
+instance (Binary e) => Binary (T.Tree e) where
+    put (T.Node r s) = put r >> put s
+    get = liftM2 T.Node get get
+
+------------------------------------------------------------------------
+-- Arrays
+
+instance (Binary i, Ix i, Binary e) => Binary (Array i e) where
+    put a = do
+        put (bounds a)
+        put (rangeSize $ bounds a) -- write the length
+        mapM_ put (elems a)        -- now the elems.
+    get = do
+        bs <- get
+        n  <- get                  -- read the length
+        xs <- replicateM n get     -- now the elems.
+        return (listArray bs xs)
+
+--
+-- The IArray UArray e constraint is non portable. Requires flexible instances
+--
+instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where
+    put a = do
+        put (bounds a)
+        put (rangeSize $ bounds a) -- now write the length
+        mapM_ put (elems a)
+    get = do
+        bs <- get
+        n  <- get
+        xs <- replicateM n get
+        return (listArray bs xs)
--- a/src/binary/Data/Binary/Builder.hs
+++ b/src/binary/Data/Binary/Builder.hs
@@ -0,0 +1,425 @@
+{-# LANGUAGE CPP, MagicHash #-}
+-- for unboxed shifts
+
+-----------------------------------------------------------------------------
+-- |
+-- Module      : Data.Binary.Builder
+-- Copyright   : Lennart Kolmodin, Ross Paterson
+-- License     : BSD3-style (see LICENSE)
+-- 
+-- Maintainer  : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
+-- Stability   : experimental
+-- Portability : portable to Hugs and GHC
+--
+-- Efficient construction of lazy bytestrings.
+--
+-----------------------------------------------------------------------------
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+#include "MachDeps.h"
+#endif
+
+module Data.Binary.Builder (
+
+    -- * The Builder type
+      Builder
+    , toLazyByteString
+
+    -- * Constructing Builders
+    , empty
+    , singleton
+    , append
+    , fromByteString        -- :: S.ByteString -> Builder
+    , fromLazyByteString    -- :: L.ByteString -> Builder
+
+    -- * Flushing the buffer state
+    , flush
+
+    -- * Derived Builders
+    -- ** Big-endian writes
+    , putWord16be           -- :: Word16 -> Builder
+    , putWord32be           -- :: Word32 -> Builder
+    , putWord64be           -- :: Word64 -> Builder
+
+    -- ** Little-endian writes
+    , putWord16le           -- :: Word16 -> Builder
+    , putWord32le           -- :: Word32 -> Builder
+    , putWord64le           -- :: Word64 -> Builder
+
+    -- ** Host-endian, unaligned writes
+    , putWordhost           -- :: Word -> Builder
+    , putWord16host         -- :: Word16 -> Builder
+    , putWord32host         -- :: Word32 -> Builder
+    , putWord64host         -- :: Word64 -> Builder
+
+  ) where
+
+import Foreign
+import Data.Monoid
+import Data.Word
+import qualified Data.ByteString      as S
+import qualified Data.ByteString.Lazy as L
+
+#ifdef BYTESTRING_IN_BASE
+import Data.ByteString.Base (inlinePerformIO)
+import qualified Data.ByteString.Base as S
+#else
+import Data.ByteString.Internal (inlinePerformIO)
+import qualified Data.ByteString.Internal as S
+import qualified Data.ByteString.Lazy.Internal as L
+#endif
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+import GHC.Base
+import GHC.Word (Word32(..),Word16(..),Word64(..))
+
+#if WORD_SIZE_IN_BITS < 64 && __GLASGOW_HASKELL__ >= 608
+import GHC.Word (uncheckedShiftRL64#)
+#endif
+#endif
+
+------------------------------------------------------------------------
+
+-- | A 'Builder' is an efficient way to build lazy 'L.ByteString's.
+-- There are several functions for constructing 'Builder's, but only one
+-- to inspect them: to extract any data, you have to turn them into lazy
+-- 'L.ByteString's using 'toLazyByteString'.
+--
+-- Internally, a 'Builder' constructs a lazy 'L.Bytestring' by filling byte
+-- arrays piece by piece.  As each buffer is filled, it is \'popped\'
+-- off, to become a new chunk of the resulting lazy 'L.ByteString'.
+-- All this is hidden from the user of the 'Builder'.
+
+newtype Builder = Builder {
+        -- Invariant (from Data.ByteString.Lazy):
+        --      The lists include no null ByteStrings.
+        runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString]
+    }
+
+instance Monoid Builder where
+    mempty  = empty
+    {-# INLINE mempty #-}
+    mappend = append
+    {-# INLINE mappend #-}
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ The empty Builder, satisfying
+--
+--  * @'toLazyByteString' 'empty' = 'L.empty'@
+--
+empty :: Builder
+empty = Builder id
+{-# INLINE empty #-}
+
+-- | /O(1)./ A Builder taking a single byte, satisfying
+--
+--  * @'toLazyByteString' ('singleton' b) = 'L.singleton' b@
+--
+singleton :: Word8 -> Builder
+singleton = writeN 1 . flip poke
+{-# INLINE singleton #-}
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ The concatenation of two Builders, an associative operation
+-- with identity 'empty', satisfying
+--
+--  * @'toLazyByteString' ('append' x y) = 'L.append' ('toLazyByteString' x) ('toLazyByteString' y)@
+--
+append :: Builder -> Builder -> Builder
+append (Builder f) (Builder g) = Builder (f . g)
+{-# INLINE append #-}
+
+-- | /O(1)./ A Builder taking a 'S.ByteString', satisfying
+--
+--  * @'toLazyByteString' ('fromByteString' bs) = 'L.fromChunks' [bs]@
+--
+fromByteString :: S.ByteString -> Builder
+fromByteString bs
+  | S.null bs = empty
+  | otherwise = flush `append` mapBuilder (bs :)
+{-# INLINE fromByteString #-}
+
+-- | /O(1)./ A Builder taking a lazy 'L.ByteString', satisfying
+--
+--  * @'toLazyByteString' ('fromLazyByteString' bs) = bs@
+--
+fromLazyByteString :: L.ByteString -> Builder
+fromLazyByteString bss = flush `append` mapBuilder (L.toChunks bss ++)
+{-# INLINE fromLazyByteString #-}
+
+------------------------------------------------------------------------
+
+-- Our internal buffer type
+data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8)
+                     {-# UNPACK #-} !Int                -- offset
+                     {-# UNPACK #-} !Int                -- used bytes
+                     {-# UNPACK #-} !Int                -- length left
+
+------------------------------------------------------------------------
+
+-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'.
+-- The construction work takes place if and when the relevant part of
+-- the lazy 'L.ByteString' is demanded.
+--
+toLazyByteString :: Builder -> L.ByteString
+toLazyByteString m = L.fromChunks $ unsafePerformIO $ do
+    buf <- newBuffer defaultSize
+    return (runBuilder (m `append` flush) (const []) buf)
+
+-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any,
+-- yielding a new chunk in the result lazy 'L.ByteString'.
+flush :: Builder
+flush = Builder $ \ k buf@(Buffer p o u l) ->
+    if u == 0
+      then k buf
+      else S.PS p o u : k (Buffer p (o+u) 0 l)
+
+------------------------------------------------------------------------
+
+--
+-- copied from Data.ByteString.Lazy
+--
+defaultSize :: Int
+defaultSize = 32 * k - overhead
+    where k = 1024
+          overhead = 2 * sizeOf (undefined :: Int)
+
+------------------------------------------------------------------------
+
+-- | Sequence an IO operation on the buffer
+unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder
+unsafeLiftIO f =  Builder $ \ k buf -> inlinePerformIO $ do
+    buf' <- f buf
+    return (k buf')
+{-# INLINE unsafeLiftIO #-}
+
+-- | Get the size of the buffer
+withSize :: (Int -> Builder) -> Builder
+withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
+    runBuilder (f l) k buf
+
+-- | Map the resulting list of bytestrings.
+mapBuilder :: ([S.ByteString] -> [S.ByteString]) -> Builder
+mapBuilder f = Builder (f .)
+
+------------------------------------------------------------------------
+
+-- | Ensure that there are at least @n@ many bytes available.
+ensureFree :: Int -> Builder
+ensureFree n = n `seq` withSize $ \ l ->
+    if n <= l then empty else
+        flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize)))
+{-# INLINE ensureFree #-}
+
+-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
+-- bytes into the memory.
+writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder
+writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f)
+{-# INLINE writeN #-}
+
+writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer
+writeNBuffer n f (Buffer fp o u l) = do
+    withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
+    return (Buffer fp o (u+n) (l-n))
+{-# INLINE writeNBuffer #-}
+
+newBuffer :: Int -> IO Buffer
+newBuffer size = do
+    fp <- S.mallocByteString size
+    return $! Buffer fp 0 0 size
+{-# INLINE newBuffer #-}
+
+------------------------------------------------------------------------
+-- Aligned, host order writes of storable values
+
+-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
+-- storable values into the memory.
+writeNbytes :: Storable a => Int -> (Ptr a -> IO ()) -> Builder
+writeNbytes n f = ensureFree n `append` unsafeLiftIO (writeNBufferBytes n f)
+{-# INLINE writeNbytes #-}
+
+writeNBufferBytes :: Storable a => Int -> (Ptr a -> IO ()) -> Buffer -> IO Buffer
+writeNBufferBytes n f (Buffer fp o u l) = do
+    withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
+    return (Buffer fp o (u+n) (l-n))
+{-# INLINE writeNBufferBytes #-}
+
+------------------------------------------------------------------------
+
+--
+-- We rely on the fromIntegral to do the right masking for us.
+-- The inlining here is critical, and can be worth 4x performance
+--
+
+-- | Write a Word16 in big endian format
+putWord16be :: Word16 -> Builder
+putWord16be w = writeN 2 $ \p -> do
+    poke p               (fromIntegral (shiftr_w16 w 8) :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (w)              :: Word8)
+{-# INLINE putWord16be #-}
+
+-- | Write a Word16 in little endian format
+putWord16le :: Word16 -> Builder
+putWord16le w = writeN 2 $ \p -> do
+    poke p               (fromIntegral (w)              :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (shiftr_w16 w 8) :: Word8)
+{-# INLINE putWord16le #-}
+
+-- putWord16le w16 = writeN 2 (\p -> poke (castPtr p) w16)
+
+-- | Write a Word32 in big endian format
+putWord32be :: Word32 -> Builder
+putWord32be w = writeN 4 $ \p -> do
+    poke p               (fromIntegral (shiftr_w32 w 24) :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 16) :: Word8)
+    poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w  8) :: Word8)
+    poke (p `plusPtr` 3) (fromIntegral (w)               :: Word8)
+{-# INLINE putWord32be #-}
+
+--
+-- a data type to tag Put/Check. writes construct these which are then
+-- inlined and flattened. matching Checks will be more robust with rules.
+--
+
+-- | Write a Word32 in little endian format
+putWord32le :: Word32 -> Builder
+putWord32le w = writeN 4 $ \p -> do
+    poke p               (fromIntegral (w)               :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w  8) :: Word8)
+    poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 16) :: Word8)
+    poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 w 24) :: Word8)
+{-# INLINE putWord32le #-}
+
+-- on a little endian machine:
+-- putWord32le w32 = writeN 4 (\p -> poke (castPtr p) w32)
+
+-- | Write a Word64 in big endian format
+putWord64be :: Word64 -> Builder
+#if WORD_SIZE_IN_BITS < 64
+--
+-- To avoid expensive 64 bit shifts on 32 bit machines, we cast to
+-- Word32, and write that
+--
+putWord64be w =
+    let a = fromIntegral (shiftr_w64 w 32) :: Word32
+        b = fromIntegral w                 :: Word32
+    in writeN 8 $ \p -> do
+    poke p               (fromIntegral (shiftr_w32 a 24) :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 16) :: Word8)
+    poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a  8) :: Word8)
+    poke (p `plusPtr` 3) (fromIntegral (a)               :: Word8)
+    poke (p `plusPtr` 4) (fromIntegral (shiftr_w32 b 24) :: Word8)
+    poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 16) :: Word8)
+    poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b  8) :: Word8)
+    poke (p `plusPtr` 7) (fromIntegral (b)               :: Word8)
+#else
+putWord64be w = writeN 8 $ \p -> do
+    poke p               (fromIntegral (shiftr_w64 w 56) :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 48) :: Word8)
+    poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 40) :: Word8)
+    poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 32) :: Word8)
+    poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 24) :: Word8)
+    poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 16) :: Word8)
+    poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w  8) :: Word8)
+    poke (p `plusPtr` 7) (fromIntegral (w)               :: Word8)
+#endif
+{-# INLINE putWord64be #-}
+
+-- | Write a Word64 in little endian format
+putWord64le :: Word64 -> Builder
+
+#if WORD_SIZE_IN_BITS < 64
+putWord64le w =
+    let b = fromIntegral (shiftr_w64 w 32) :: Word32
+        a = fromIntegral w                 :: Word32
+    in writeN 8 $ \p -> do
+    poke (p)             (fromIntegral (a)               :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a  8) :: Word8)
+    poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 16) :: Word8)
+    poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 a 24) :: Word8)
+    poke (p `plusPtr` 4) (fromIntegral (b)               :: Word8)
+    poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b  8) :: Word8)
+    poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 16) :: Word8)
+    poke (p `plusPtr` 7) (fromIntegral (shiftr_w32 b 24) :: Word8)
+#else
+putWord64le w = writeN 8 $ \p -> do
+    poke p               (fromIntegral (w)               :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w  8) :: Word8)
+    poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 16) :: Word8)
+    poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 24) :: Word8)
+    poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 32) :: Word8)
+    poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 40) :: Word8)
+    poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 48) :: Word8)
+    poke (p `plusPtr` 7) (fromIntegral (shiftr_w64 w 56) :: Word8)
+#endif
+{-# INLINE putWord64le #-}
+
+-- on a little endian machine:
+-- putWord64le w64 = writeN 8 (\p -> poke (castPtr p) w64)
+
+------------------------------------------------------------------------
+-- Unaligned, word size ops
+
+-- | /O(1)./ A Builder taking a single native machine word. The word is
+-- written in host order, host endian form, for the machine you're on.
+-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
+-- 4 bytes. Values written this way are not portable to
+-- different endian or word sized machines, without conversion.
+--
+putWordhost :: Word -> Builder
+putWordhost w = writeNbytes (sizeOf (undefined :: Word)) (\p -> poke p w)
+{-# INLINE putWordhost #-}
+
+-- | Write a Word16 in native host order and host endianness.
+-- 2 bytes will be written, unaligned.
+putWord16host :: Word16 -> Builder
+putWord16host w16 = writeNbytes (sizeOf (undefined :: Word16)) (\p -> poke p w16)
+{-# INLINE putWord16host #-}
+
+-- | Write a Word32 in native host order and host endianness.
+-- 4 bytes will be written, unaligned.
+putWord32host :: Word32 -> Builder
+putWord32host w32 = writeNbytes (sizeOf (undefined :: Word32)) (\p -> poke p w32)
+{-# INLINE putWord32host #-}
+
+-- | Write a Word64 in native host order.
+-- On a 32 bit machine we write two host order Word32s, in big endian form.
+-- 8 bytes will be written, unaligned.
+putWord64host :: Word64 -> Builder
+putWord64host w = writeNbytes (sizeOf (undefined :: Word64)) (\p -> poke p w)
+{-# INLINE putWord64host #-}
+
+------------------------------------------------------------------------
+-- Unchecked shifts
+
+{-# INLINE shiftr_w16 #-}
+shiftr_w16 :: Word16 -> Int -> Word16
+{-# INLINE shiftr_w32 #-}
+shiftr_w32 :: Word32 -> Int -> Word32
+{-# INLINE shiftr_w64 #-}
+shiftr_w64 :: Word64 -> Int -> Word64
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+shiftr_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftRL#`   i)
+shiftr_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftRL#`   i)
+
+#if WORD_SIZE_IN_BITS < 64
+shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL64#` i)
+
+#if __GLASGOW_HASKELL__ <= 606
+-- Exported by GHC.Word in GHC 6.8 and higher
+foreign import ccall unsafe "stg_uncheckedShiftRL64"
+    uncheckedShiftRL64#     :: Word64# -> Int# -> Word64#
+#endif
+
+#else
+shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL#` i)
+#endif
+
+#else
+shiftr_w16 = shiftR
+shiftr_w32 = shiftR
+shiftr_w64 = shiftR
+#endif
--- a/src/binary/Data/Binary/Get.hs
+++ b/src/binary/Data/Binary/Get.hs
@@ -0,0 +1,543 @@
+{-# LANGUAGE CPP, MagicHash #-}
+-- for unboxed shifts
+
+-----------------------------------------------------------------------------
+-- |
+-- Module      : Data.Binary.Get
+-- Copyright   : Lennart Kolmodin
+-- License     : BSD3-style (see LICENSE)
+-- 
+-- Maintainer  : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
+-- Stability   : experimental
+-- Portability : portable to Hugs and GHC.
+--
+-- The Get monad. A monad for efficiently building structures from
+-- encoded lazy ByteStrings
+--
+-----------------------------------------------------------------------------
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+#include "MachDeps.h"
+#endif
+
+module Data.Binary.Get (
+
+    -- * The Get type
+      Get
+    , runGet
+    , runGetState
+
+    -- * Parsing
+    , skip
+    , uncheckedSkip
+    , lookAhead
+    , lookAheadM
+    , lookAheadE
+    , uncheckedLookAhead
+
+    -- * Utility
+    , bytesRead
+    , getBytes
+    , remaining
+    , isEmpty
+
+    -- * Parsing particular types
+    , getWord8
+
+    -- ** ByteStrings
+    , getByteString
+    , getLazyByteString
+    , getLazyByteStringNul
+    , getRemainingLazyByteString
+
+    -- ** Big-endian reads
+    , getWord16be
+    , getWord32be
+    , getWord64be
+
+    -- ** Little-endian reads
+    , getWord16le
+    , getWord32le
+    , getWord64le
+
+    -- ** Host-endian, unaligned reads
+    , getWordhost
+    , getWord16host
+    , getWord32host
+    , getWord64host
+
+  ) where
+
+import Control.Monad (when,liftM,ap)
+import Control.Monad.Fix
+import Data.Maybe (isNothing)
+
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as L
+
+#ifdef BYTESTRING_IN_BASE
+import qualified Data.ByteString.Base as B
+#else
+import qualified Data.ByteString.Internal as B
+import qualified Data.ByteString.Lazy.Internal as L
+#endif
+
+#ifdef APPLICATIVE_IN_BASE
+import Control.Applicative (Applicative(..))
+#endif
+
+import Foreign
+
+-- used by splitAtST
+import Control.Monad.ST
+import Data.STRef
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+import GHC.Base
+import GHC.Word
+import GHC.Int
+#endif
+
+-- | The parse state
+data S = S {-# UNPACK #-} !B.ByteString  -- current chunk
+           L.ByteString                  -- the rest of the input
+           {-# UNPACK #-} !Int64         -- bytes read
+
+-- | The Get monad is just a State monad carrying around the input ByteString
+newtype Get a = Get { unGet :: S -> (a, S) }
+
+instance Functor Get where
+    fmap f m = Get (\s -> case unGet m s of
+                            (a, s') -> (f a, s'))
+    {-# INLINE fmap #-}
+
+#ifdef APPLICATIVE_IN_BASE
+instance Applicative Get where
+    pure  = return
+    (<*>) = ap
+#endif
+
+instance Monad Get where
+    return a  = Get (\s -> (a, s))
+    {-# INLINE return #-}
+
+    m >>= k   = Get (\s -> case unGet m s of
+                             (a, s') -> unGet (k a) s')
+    {-# INLINE (>>=) #-}
+
+    fail      = failDesc
+
+instance MonadFix Get where
+    mfix f = Get (\s -> let (a,s') = unGet (f a) s 
+                        in (a,s'))
+
+------------------------------------------------------------------------
+
+get :: Get S
+get   = Get (\s -> (s, s))
+
+put :: S -> Get ()
+put s = Get (\_ -> ((), s))
+
+------------------------------------------------------------------------
+--
+-- dons, GHC 6.10: explicit inlining disabled, was killing performance.
+-- Without it, GHC seems to do just fine. And we get similar
+-- performance with 6.8.2 anyway.
+--
+
+initState :: L.ByteString -> S
+initState xs = mkState xs 0
+{- INLINE initState -}
+
+{-
+initState (B.LPS xs) =
+    case xs of
+      []      -> S B.empty L.empty 0
+      (x:xs') -> S x (B.LPS xs') 0
+-}
+
+#ifndef BYTESTRING_IN_BASE
+mkState :: L.ByteString -> Int64 -> S
+mkState l = case l of
+    L.Empty      -> S B.empty L.empty
+    L.Chunk x xs -> S x xs
+{- INLINE mkState -}
+
+#else
+mkState :: L.ByteString -> Int64 -> S
+mkState (B.LPS xs) =
+    case xs of
+        [] -> S B.empty L.empty
+        (x:xs') -> S x (B.LPS xs')
+#endif
+
+-- | Run the Get monad applies a 'get'-based parser on the input ByteString
+runGet :: Get a -> L.ByteString -> a
+runGet m str = case unGet m (initState str) of (a, _) -> a
+
+-- | Run the Get monad applies a 'get'-based parser on the input
+-- ByteString. Additional to the result of get it returns the number of
+-- consumed bytes and the rest of the input.
+runGetState :: Get a -> L.ByteString -> Int64 -> (a, L.ByteString, Int64)
+runGetState m str off =
+    case unGet m (mkState str off) of
+      (a, ~(S s ss newOff)) -> (a, s `join` ss, newOff)
+
+------------------------------------------------------------------------
+
+failDesc :: String -> Get a
+failDesc err = do
+    S _ _ bytes <- get
+    Get (error (err ++ ". Failed reading at byte position " ++ show bytes))
+
+-- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available.
+skip :: Int -> Get ()
+skip n = readN (fromIntegral n) (const ())
+
+-- | Skip ahead @n@ bytes. No error if there isn't enough bytes.
+uncheckedSkip :: Int64 -> Get ()
+uncheckedSkip n = do
+    S s ss bytes <- get
+    if fromIntegral (B.length s) >= n
+      then put (S (B.drop (fromIntegral n) s) ss (bytes + n))
+      else do
+        let rest = L.drop (n - fromIntegral (B.length s)) ss
+        put $! mkState rest (bytes + n)
+
+-- | Run @ga@, but return without consuming its input.
+-- Fails if @ga@ fails.
+lookAhead :: Get a -> Get a
+lookAhead ga = do
+    s <- get
+    a <- ga
+    put s
+    return a
+
+-- | Like 'lookAhead', but consume the input if @gma@ returns 'Just _'.
+-- Fails if @gma@ fails.
+lookAheadM :: Get (Maybe a) -> Get (Maybe a)
+lookAheadM gma = do
+    s <- get
+    ma <- gma
+    when (isNothing ma) $
+        put s
+    return ma
+
+-- | Like 'lookAhead', but consume the input if @gea@ returns 'Right _'.
+-- Fails if @gea@ fails.
+lookAheadE :: Get (Either a b) -> Get (Either a b)
+lookAheadE gea = do
+    s <- get
+    ea <- gea
+    case ea of
+        Left _ -> put s
+        _      -> return ()
+    return ea
+
+-- | Get the next up to @n@ bytes as a lazy ByteString, without consuming them. 
+uncheckedLookAhead :: Int64 -> Get L.ByteString
+uncheckedLookAhead n = do
+    S s ss _ <- get
+    if n <= fromIntegral (B.length s)
+        then return (L.fromChunks [B.take (fromIntegral n) s])
+        else return $ L.take n (s `join` ss)
+
+------------------------------------------------------------------------
+-- Utility
+
+-- | Get the total number of bytes read to this point.
+bytesRead :: Get Int64
+bytesRead = do
+    S _ _ b <- get
+    return b
+
+-- | Get the number of remaining unparsed bytes.
+-- Useful for checking whether all input has been consumed.
+-- Note that this forces the rest of the input.
+remaining :: Get Int64
+remaining = do
+    S s ss _ <- get
+    return (fromIntegral (B.length s) + L.length ss)
+
+-- | Test whether all input has been consumed,
+-- i.e. there are no remaining unparsed bytes.
+isEmpty :: Get Bool
+isEmpty = do
+    S s ss _ <- get
+    return (B.null s && L.null ss)
+
+------------------------------------------------------------------------
+-- Utility with ByteStrings
+
+-- | An efficient 'get' method for strict ByteStrings. Fails if fewer
+-- than @n@ bytes are left in the input.
+getByteString :: Int -> Get B.ByteString
+getByteString n = readN n id
+{-# INLINE getByteString #-}
+
+-- | An efficient 'get' method for lazy ByteStrings. Does not fail if fewer than
+-- @n@ bytes are left in the input.
+getLazyByteString :: Int64 -> Get L.ByteString
+getLazyByteString n = do
+    S s ss bytes <- get
+    let big = s `join` ss
+    case splitAtST n big of
+      (consume, rest) -> do put $ mkState rest (bytes + n)
+                            return consume
+{-# INLINE getLazyByteString #-}
+
+-- | Get a lazy ByteString that is terminated with a NUL byte. Fails
+-- if it reaches the end of input without hitting a NUL.
+getLazyByteStringNul :: Get L.ByteString
+getLazyByteStringNul = do
+    S s ss bytes <- get
+    let big = s `join` ss
+        (consume, t) = L.break (== 0) big
+        (h, rest) = L.splitAt 1 t
+    if L.null h
+      then fail "too few bytes"
+      else do
+        put $ mkState rest (bytes + L.length consume + 1)
+        return consume
+{-# INLINE getLazyByteStringNul #-}
+
+-- | Get the remaining bytes as a lazy ByteString
+getRemainingLazyByteString :: Get L.ByteString
+getRemainingLazyByteString = do
+    S s ss _ <- get
+    return (s `join` ss)
+
+------------------------------------------------------------------------
+-- Helpers
+
+-- | Pull @n@ bytes from the input, as a strict ByteString.
+getBytes :: Int -> Get B.ByteString
+getBytes n = do
+    S s ss bytes <- get
+    if n <= B.length s
+        then do let (consume,rest) = B.splitAt n s
+                put $! S rest ss (bytes + fromIntegral n)
+                return $! consume
+        else
+              case L.splitAt (fromIntegral n) (s `join` ss) of
+                (consuming, rest) ->
+                    do let now = B.concat . L.toChunks $ consuming
+                       put $! mkState rest (bytes + fromIntegral n)
+                       -- forces the next chunk before this one is returned
+                       if (B.length now < n)
+                         then
+                            fail "too few bytes"
+                         else
+                            return now
+{- INLINE getBytes -}
+-- ^ important
+
+#ifndef BYTESTRING_IN_BASE
+join :: B.ByteString -> L.ByteString -> L.ByteString
+join bb lb
+    | B.null bb = lb
+    | otherwise = L.Chunk bb lb
+
+#else
+join :: B.ByteString -> L.ByteString -> L.ByteString
+join bb (B.LPS lb)
+    | B.null bb = B.LPS lb
+    | otherwise = B.LPS (bb:lb)
+#endif
+    -- don't use L.append, it's strict in it's second argument :/
+{- INLINE join -}
+
+-- | Split a ByteString. If the first result is consumed before the --
+-- second, this runs in constant heap space.
+--
+-- You must force the returned tuple for that to work, e.g.
+-- 
+-- > case splitAtST n xs of
+-- >    (ys,zs) -> consume ys ... consume zs
+--
+splitAtST :: Int64 -> L.ByteString -> (L.ByteString, L.ByteString)
+splitAtST i ps | i <= 0 = (L.empty, ps)
+#ifndef BYTESTRING_IN_BASE
+splitAtST i ps          = runST (
+     do r  <- newSTRef undefined
+        xs <- first r i ps
+        ys <- unsafeInterleaveST (readSTRef r)
+        return (xs, ys))
+
+  where
+        first r 0 xs@(L.Chunk _ _) = writeSTRef r xs    >> return L.Empty
+        first r _ L.Empty          = writeSTRef r L.Empty >> return L.Empty
+
+        first r n (L.Chunk x xs)
+          | n < l     = do writeSTRef r (L.Chunk (B.drop (fromIntegral n) x) xs)
+                           return $ L.Chunk (B.take (fromIntegral n) x) L.Empty
+          | otherwise = do writeSTRef r (L.drop (n - l) xs)
+                           liftM (L.Chunk x) $ unsafeInterleaveST (first r (n - l) xs)
+
+         where l = fromIntegral (B.length x)
+#else
+splitAtST i (B.LPS ps)  = runST (
+     do r  <- newSTRef undefined
+        xs <- first r i ps
+        ys <- unsafeInterleaveST (readSTRef r)
+        return (B.LPS xs, B.LPS ys))
+
+  where first r 0 xs     = writeSTRef r xs >> return []
+        first r _ []     = writeSTRef r [] >> return []
+        first r n (x:xs)
+          | n < l     = do writeSTRef r (B.drop (fromIntegral n) x : xs)
+                           return [B.take (fromIntegral n) x]
+          | otherwise = do writeSTRef r (L.toChunks (L.drop (n - l) (B.LPS xs)))
+                           fmap (x:) $ unsafeInterleaveST (first r (n - l) xs)
+
+         where l = fromIntegral (B.length x)
+#endif
+{- INLINE splitAtST -}
+
+-- Pull n bytes from the input, and apply a parser to those bytes,
+-- yielding a value. If less than @n@ bytes are available, fail with an
+-- error. This wraps @getBytes@.
+readN :: Int -> (B.ByteString -> a) -> Get a
+readN n f = fmap f $ getBytes n
+{- INLINE readN -}
+-- ^ important
+
+------------------------------------------------------------------------
+-- Primtives
+
+-- helper, get a raw Ptr onto a strict ByteString copied out of the
+-- underlying lazy byteString. So many indirections from the raw parser
+-- state that my head hurts...
+
+getPtr :: Storable a => Int -> Get a
+getPtr n = do
+    (fp,o,_) <- readN n B.toForeignPtr
+    return . B.inlinePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
+{- INLINE getPtr -}
+
+------------------------------------------------------------------------
+
+-- | Read a Word8 from the monad state
+getWord8 :: Get Word8
+getWord8 = getPtr (sizeOf (undefined :: Word8))
+{- INLINE getWord8 -}
+
+-- | Read a Word16 in big endian format
+getWord16be :: Get Word16
+getWord16be = do
+    s <- readN 2 id
+    return $! (fromIntegral (s `B.index` 0) `shiftl_w16` 8) .|.
+              (fromIntegral (s `B.index` 1))
+{- INLINE getWord16be -}
+
+-- | Read a Word16 in little endian format
+getWord16le :: Get Word16
+getWord16le = do
+    s <- readN 2 id
+    return $! (fromIntegral (s `B.index` 1) `shiftl_w16` 8) .|.
+              (fromIntegral (s `B.index` 0) )
+{- INLINE getWord16le -}
+
+-- | Read a Word32 in big endian format
+getWord32be :: Get Word32
+getWord32be = do
+    s <- readN 4 id
+    return $! (fromIntegral (s `B.index` 0) `shiftl_w32` 24) .|.
+              (fromIntegral (s `B.index` 1) `shiftl_w32` 16) .|.
+              (fromIntegral (s `B.index` 2) `shiftl_w32`  8) .|.
+              (fromIntegral (s `B.index` 3) )
+{- INLINE getWord32be -}
+
+-- | Read a Word32 in little endian format
+getWord32le :: Get Word32
+getWord32le = do
+    s <- readN 4 id
+    return $! (fromIntegral (s `B.index` 3) `shiftl_w32` 24) .|.
+              (fromIntegral (s `B.index` 2) `shiftl_w32` 16) .|.
+              (fromIntegral (s `B.index` 1) `shiftl_w32`  8) .|.
+              (fromIntegral (s `B.index` 0) )
+{- INLINE getWord32le -}
+
+-- | Read a Word64 in big endian format
+getWord64be :: Get Word64
+getWord64be = do
+    s <- readN 8 id
+    return $! (fromIntegral (s `B.index` 0) `shiftl_w64` 56) .|.
+              (fromIntegral (s `B.index` 1) `shiftl_w64` 48) .|.
+              (fromIntegral (s `B.index` 2) `shiftl_w64` 40) .|.
+              (fromIntegral (s `B.index` 3) `shiftl_w64` 32) .|.
+              (fromIntegral (s `B.index` 4) `shiftl_w64` 24) .|.
+              (fromIntegral (s `B.index` 5) `shiftl_w64` 16) .|.
+              (fromIntegral (s `B.index` 6) `shiftl_w64`  8) .|.
+              (fromIntegral (s `B.index` 7) )
+{- INLINE getWord64be -}
+
+-- | Read a Word64 in little endian format
+getWord64le :: Get Word64
+getWord64le = do
+    s <- readN 8 id
+    return $! (fromIntegral (s `B.index` 7) `shiftl_w64` 56) .|.
+              (fromIntegral (s `B.index` 6) `shiftl_w64` 48) .|.
+              (fromIntegral (s `B.index` 5) `shiftl_w64` 40) .|.
+              (fromIntegral (s `B.index` 4) `shiftl_w64` 32) .|.
+              (fromIntegral (s `B.index` 3) `shiftl_w64` 24) .|.
+              (fromIntegral (s `B.index` 2) `shiftl_w64` 16) .|.
+              (fromIntegral (s `B.index` 1) `shiftl_w64`  8) .|.
+              (fromIntegral (s `B.index` 0) )
+{- INLINE getWord64le -}
+
+------------------------------------------------------------------------
+-- Host-endian reads
+
+-- | /O(1)./ Read a single native machine word. The word is read in
+-- host order, host endian form, for the machine you're on. On a 64 bit
+-- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes.
+getWordhost :: Get Word
+getWordhost = getPtr (sizeOf (undefined :: Word))
+{- INLINE getWordhost -}
+
+-- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness.
+getWord16host :: Get Word16
+getWord16host = getPtr (sizeOf (undefined :: Word16))
+{- INLINE getWord16host -}
+
+-- | /O(1)./ Read a Word32 in native host order and host endianness.
+getWord32host :: Get Word32
+getWord32host = getPtr  (sizeOf (undefined :: Word32))
+{- INLINE getWord32host -}
+
+-- | /O(1)./ Read a Word64 in native host order and host endianess.
+getWord64host   :: Get Word64
+getWord64host = getPtr  (sizeOf (undefined :: Word64))
+{- INLINE getWord64host -}
+
+------------------------------------------------------------------------
+-- Unchecked shifts
+
+shiftl_w16 :: Word16 -> Int -> Word16
+shiftl_w32 :: Word32 -> Int -> Word32
+shiftl_w64 :: Word64 -> Int -> Word64
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#`   i)
+shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#`   i)
+
+#if WORD_SIZE_IN_BITS < 64
+shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
+
+#if __GLASGOW_HASKELL__ <= 606
+-- Exported by GHC.Word in GHC 6.8 and higher
+foreign import ccall unsafe "stg_uncheckedShiftL64"
+    uncheckedShiftL64#     :: Word64# -> Int# -> Word64#
+#endif
+
+#else
+shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i)
+#endif
+
+#else
+shiftl_w16 = shiftL
+shiftl_w32 = shiftL
+shiftl_w64 = shiftL
+#endif
--- a/src/binary/Data/Binary/IEEE754.lhs
+++ b/src/binary/Data/Binary/IEEE754.lhs
@@ -0,0 +1,402 @@
+% Copyright (C) 2009 John Millikin <jmillikin@gmail.com>
+% 
+% This program is free software: you can redistribute it and/or modify
+% it under the terms of the GNU General Public License as published by
+% the Free Software Foundation, either version 3 of the License, or
+% any later version.
+% 
+% This program is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+% GNU General Public License for more details.
+% 
+% You should have received a copy of the GNU General Public License
+% along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+\ignore{
+\begin{code}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+module Data.Binary.IEEE754 (
+	-- * Parsing
+	  getFloat16be, getFloat16le
+	, getFloat32be, getFloat32le
+	, getFloat64be, getFloat64le
+	
+	-- * Serializing
+	, putFloat32be, putFloat32le
+	, putFloat64be, putFloat64le
+) where
+
+import Data.Bits ((.&.), (.|.), shiftL, shiftR, Bits)
+import Data.Word (Word8)
+import Data.List (foldl')
+
+import qualified Data.ByteString as B
+import Data.Binary.Get (Get, getByteString)
+import Data.Binary.Put (Put, putByteString)
+\end{code}
+}
+
+\section{Parsing}
+
+\subsection{Public interface}
+
+\begin{code}
+getFloat16be :: Get Float
+getFloat16be = getFloat (ByteCount 2) splitBytes
+\end{code}
+
+\begin{code}
+getFloat16le :: Get Float
+getFloat16le = getFloat (ByteCount 2) $ splitBytes . reverse
+\end{code}
+
+\begin{code}
+getFloat32be :: Get Float
+getFloat32be = getFloat (ByteCount 4) splitBytes
+\end{code}
+
+\begin{code}
+getFloat32le :: Get Float
+getFloat32le = getFloat (ByteCount 4) $ splitBytes . reverse
+\end{code}
+
+\begin{code}
+getFloat64be :: Get Double
+getFloat64be = getFloat (ByteCount 8) splitBytes
+\end{code}
+
+\begin{code}
+getFloat64le :: Get Double
+getFloat64le = getFloat (ByteCount 8) $ splitBytes . reverse
+\end{code}
+
+\subsection{Implementation}
+
+Split the raw byte array into (sign, exponent, significand) components.
+The exponent and signifcand are drawn directly from the bits in the
+original float, and have not been unbiased or otherwise modified.
+
+\begin{code}
+splitBytes :: [Word8] -> RawFloat
+splitBytes bs = RawFloat width sign exp' sig expWidth sigWidth where
+	width = ByteCount (length bs)
+	nBits = bitsInWord8 bs
+	sign = if head bs .&. 0x80 == 0x80
+		then Negative
+		else Positive
+	
+	expStart = 1
+	expWidth = exponentWidth nBits
+	expEnd = expStart + expWidth
+	exp' = Exponent . fromIntegral $ bitSlice bs expStart expEnd
+	
+	sigWidth = nBits - expEnd
+	sig  = Significand $ bitSlice bs expEnd nBits
+\end{code}
+
+\subsubsection{Encodings and special values}
+
+The next step depends on the value of the exponent $e$, size of the
+exponent field in bits $w$, and value of the significand.
+
+\begin{table}[h]
+\begin{center}
+\begin{tabular}{lrl}
+\toprule
+Exponent                & Significand & Format \\
+\midrule
+$0$                     & $0$           & Zero \\
+$0$                     & $> 0$         & Denormalised \\
+$1 \leq e \leq 2^w - 2$ & \textit{any} & Normalised \\
+$2^w-1$                 & $0$           & Infinity \\
+$2^w-1$                 & $> 0$         & NaN \\
+\bottomrule
+\end{tabular}
+\end{center}
+\end{table}
+
+There's no built-in literals for Infinity or NaN, so they
+are constructed using the {\tt Read} instances for {\tt Double} and
+{\tt Float}.
+
+\begin{code}
+merge :: (Read a, RealFloat a) => RawFloat -> a
+merge f@(RawFloat _ _ e sig eWidth _)
+	| e == 0 = if sig == 0
+		then 0.0
+		else denormalised f
+	| e == eMax - 1 = if sig == 0
+		then read "Infinity"
+		else read "NaN"
+	| otherwise = normalised f
+	where eMax = 2 `pow` eWidth
+\end{code}
+
+If a value is normalised, its significand has an implied {\tt 1} bit
+in its most-significant bit. The significand must be adjusted by
+this value before being passed to {\tt encodeField}.
+
+\begin{code}
+normalised :: RealFloat a => RawFloat -> a
+normalised f = encodeFloat fraction exp' where
+	Significand sig = rawSignificand f
+	Exponent exp' = unbiased - sigWidth
+	
+	fraction = sig + (1 `bitShiftL` rawSignificandWidth f)
+	
+	sigWidth = fromIntegral $ rawSignificandWidth f
+	unbiased = unbias (rawExponent f) (rawExponentWidth f)
+\end{code}
+
+For denormalised values, the implied {\tt 1} bit is the least-significant
+bit of the exponent.
+
+\begin{code}
+denormalised :: RealFloat a => RawFloat -> a
+denormalised f = encodeFloat sig exp' where
+	Significand sig = rawSignificand f
+	Exponent exp' = unbiased - sigWidth + 1
+	
+	sigWidth = fromIntegral $ rawSignificandWidth f
+	unbiased = unbias (rawExponent f) (rawExponentWidth f)
+\end{code}
+
+By composing {\tt splitBytes} and {\tt merge}, the absolute value of the
+float is calculated. Before being returned to the calling function, it
+must be signed appropriately.
+
+\begin{code}
+getFloat :: (Read a, RealFloat a) => ByteCount
+            -> ([Word8] -> RawFloat) -> Get a
+getFloat (ByteCount width) parser = do
+	raw <- fmap (parser . B.unpack) $ getByteString width
+	let absFloat = merge raw
+	return $ case rawSign raw of
+		Positive ->  absFloat
+		Negative -> -absFloat
+\end{code}
+
+\section{Serialising}
+
+\subsection{Public interface}
+
+\begin{code}
+putFloat32be :: Float -> Put
+putFloat32be = putFloat (ByteCount 4) id
+\end{code}
+
+\begin{code}
+putFloat32le :: Float -> Put
+putFloat32le = putFloat (ByteCount 4) reverse
+\end{code}
+
+\begin{code}
+putFloat64be :: Double -> Put
+putFloat64be = putFloat (ByteCount 8) id
+\end{code}
+
+\begin{code}
+putFloat64le :: Double -> Put
+putFloat64le = putFloat (ByteCount 8) reverse
+\end{code}
+
+\subsection{Implementation}
+
+Serialisation is similar to parsing. First, the float is converted to
+a structure representing raw bitfields. The values returned from
+{\tt decodeFloat} are clamped to their expected lengths before being
+stored in the {\tt RawFloat}.
+
+\begin{code}
+splitFloat :: RealFloat a => ByteCount -> a -> RawFloat
+splitFloat width x = raw where
+	raw = RawFloat width sign clampedExp clampedSig expWidth sigWidth
+	sign = if isNegativeNaN x || isNegativeZero x || x < 0
+		then Negative
+		else Positive
+	clampedExp = clamp expWidth exp'
+	clampedSig = clamp sigWidth sig
+	(exp', sig) = case (dFraction, dExponent, biasedExp) of
+		(0, 0, _) -> (0, 0)
+		(_, _, 0) -> (0, Significand $ truncatedSig + 1)
+		_         -> (biasedExp, Significand truncatedSig)
+	expWidth = exponentWidth $ bitCount width
+	sigWidth = bitCount width - expWidth - 1 -- 1 for sign bit
+	
+	(dFraction, dExponent) = decodeFloat x
+	
+	rawExp = Exponent $ dExponent + fromIntegral sigWidth
+	biasedExp = bias rawExp expWidth
+	truncatedSig = abs dFraction - (1 `bitShiftL` sigWidth)
+\end{code}
+
+Then, the {\tt RawFloat} is converted to a list of bytes by mashing all
+the fields together into an {\tt Integer}, and chopping up that integer
+in 8-bit blocks.
+
+\begin{code}
+rawToBytes :: RawFloat -> [Word8]
+rawToBytes raw = integerToBytes mashed width where
+	RawFloat width sign exp' sig expWidth sigWidth = raw
+	sign' :: Word8
+	sign' = case sign of
+		Positive -> 0
+		Negative -> 1
+	mashed = mashBits sig sigWidth .
+	         mashBits exp' expWidth .
+	         mashBits sign' 1 $ 0
+\end{code}
+
+{\tt clamp}, given a maximum bit count and a value, will strip any 1-bits
+in positions above the count.
+
+\begin{code}
+clamp :: (Num a, Bits a) => BitCount -> a -> a
+clamp = (.&.) . mask where
+	mask 1 = 1
+	mask n | n > 1 = (mask (n - 1) `shiftL` 1) + 1
+	mask _ = undefined
+\end{code}
+
+For merging the fields, just shift the starting integer over a bit and
+then \textsc{or} it with the next value. The weird parameter order allows
+easy composition.
+
+\begin{code}
+mashBits :: (Bits a, Integral a) => a -> BitCount -> Integer -> Integer
+mashBits _ 0 x = x
+mashBits y n x = (x `bitShiftL` n) .|. fromIntegral y
+\end{code}
+
+Given an integer, read it in 255-block increments starting from the LSB.
+Each increment is converted to a byte and added to the final list.
+
+\begin{code}
+integerToBytes :: Integer -> ByteCount -> [Word8]
+integerToBytes _ 0 = []
+integerToBytes x n = bytes where
+	bytes = integerToBytes (x `shiftR` 8) (n - 1) ++ [step]
+	step = fromIntegral x .&. 0xFF
+\end{code}
+
+Finally, the raw parsing is wrapped up in {\tt Put}. The second parameter
+allows the same code paths to be used for little- and big-endian
+serialisation.
+
+\begin{code}
+putFloat :: (RealFloat a) => ByteCount -> ([Word8] -> [Word8]) -> a -> Put
+putFloat width f x = putByteString $ B.pack bytes where
+	bytes = f . rawToBytes . splitFloat width $ x
+\end{code}
+
+\section{Raw float components}
+
+Information about the raw bit patterns in the float is stored in
+{\tt RawFloat}, so they don't have to be passed around to the various
+format cases. The exponent should be biased, and the significand
+shouldn't have it's implied MSB (if applicable).
+
+\begin{code}
+data RawFloat = RawFloat
+	{ rawWidth            :: ByteCount
+	, rawSign             :: Sign
+	, rawExponent         :: Exponent
+	, rawSignificand      :: Significand
+	, rawExponentWidth    :: BitCount
+	, rawSignificandWidth :: BitCount
+	}
+	deriving (Show)
+\end{code}
+
+\section{Exponents}
+
+Calculate the proper size of the exponent field, in bits, given the
+size of the full structure.
+
+\begin{code}
+exponentWidth :: BitCount -> BitCount
+exponentWidth k
+	| k == 16         = 5
+	| k == 32         = 8
+	| k `mod` 32 == 0 = ceiling (4 * logBase 2 (fromIntegral k)) - 13
+	| otherwise       = error "Invalid length of floating-point value"
+\end{code}
+
+\begin{code}
+bias :: Exponent -> BitCount -> Exponent
+bias e eWidth = e - (1 - (2 `pow` (eWidth - 1)))
+\end{code}
+
+\begin{code}
+unbias :: Exponent -> BitCount -> Exponent
+unbias e eWidth = e + 1 - (2 `pow` (eWidth - 1))
+\end{code}
+
+\section{Byte and bit counting}
+
+\begin{code}
+data Sign = Positive | Negative
+	deriving (Show)
+
+newtype Exponent = Exponent Int
+	deriving (Show, Eq, Num, Ord, Real, Enum, Integral, Bits)
+
+newtype Significand = Significand Integer
+	deriving (Show, Eq, Num, Ord, Real, Enum, Integral, Bits)
+
+newtype BitCount = BitCount Int
+	deriving (Show, Eq, Num, Ord, Real, Enum, Integral)
+
+newtype ByteCount = ByteCount Int
+	deriving (Show, Eq, Num, Ord, Real, Enum, Integral)
+
+bitCount :: ByteCount -> BitCount
+bitCount (ByteCount x) = BitCount (x * 8)
+
+bitsInWord8 :: [Word8] -> BitCount
+bitsInWord8 = bitCount . ByteCount . length
+
+bitShiftL :: (Bits a) => a -> BitCount -> a
+bitShiftL x (BitCount n) = shiftL x n
+
+bitShiftR :: (Bits a) => a -> BitCount -> a
+bitShiftR x (BitCount n) = shiftR x n
+\end{code}
+
+\section{Utility}
+
+Considering a byte list as a sequence of bits, slice it from start
+inclusive to end exclusive, and return the resulting bit sequence as an
+integer.
+
+\begin{code}
+bitSlice :: [Word8] -> BitCount -> BitCount -> Integer
+bitSlice bs = sliceInt (foldl' step 0 bs) bitCount' where
+	step acc w     = shiftL acc 8 + fromIntegral w
+	bitCount'      = bitsInWord8 bs
+\end{code}
+
+Slice a single integer by start and end bit location
+
+\begin{code}
+sliceInt :: Integer -> BitCount -> BitCount -> BitCount -> Integer
+sliceInt x xBitCount s e = fromIntegral sliced where
+	sliced = (x .&. startMask) `bitShiftR` (xBitCount - e)
+	startMask = n1Bits (xBitCount - s)
+	n1Bits n  = (2 `pow` n) - 1
+\end{code}
+
+Integral version of {\tt (**)}
+
+\begin{code}
+pow :: (Integral a, Integral b, Integral c) => a -> b -> c
+pow b e = floor $ fromIntegral b ** fromIntegral e
+\end{code}
+
+Detect whether a float is {\tt $-$NaN}
+
+\begin{code}
+isNegativeNaN :: RealFloat a => a -> Bool
+isNegativeNaN x = isNaN x && (floor x > 0)
+\end{code}
--- a/src/binary/Data/Binary/Put.hs
+++ b/src/binary/Data/Binary/Put.hs
@@ -0,0 +1,216 @@
+{-# LANGUAGE CPP #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      : Data.Binary.Put
+-- Copyright   : Lennart Kolmodin
+-- License     : BSD3-style (see LICENSE)
+-- 
+-- Maintainer  : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
+-- Stability   : stable
+-- Portability : Portable to Hugs and GHC. Requires MPTCs
+--
+-- The Put monad. A monad for efficiently constructing lazy bytestrings.
+--
+-----------------------------------------------------------------------------
+
+module Data.Binary.Put (
+
+    -- * The Put type
+      Put
+    , PutM(..)
+    , runPut
+    , runPutM
+    , putBuilder
+    , execPut
+
+    -- * Flushing the implicit parse state
+    , flush
+
+    -- * Primitives
+    , putWord8
+    , putByteString
+    , putLazyByteString
+
+    -- * Big-endian primitives
+    , putWord16be
+    , putWord32be
+    , putWord64be
+
+    -- * Little-endian primitives
+    , putWord16le
+    , putWord32le
+    , putWord64le
+
+    -- * Host-endian, unaligned writes
+    , putWordhost           -- :: Word   -> Put
+    , putWord16host         -- :: Word16 -> Put
+    , putWord32host         -- :: Word32 -> Put
+    , putWord64host         -- :: Word64 -> Put
+
+  ) where
+
+import Data.Monoid
+import Data.Binary.Builder (Builder, toLazyByteString)
+import qualified Data.Binary.Builder as B
+
+import Data.Word
+import qualified Data.ByteString      as S
+import qualified Data.ByteString.Lazy as L
+
+#ifdef APPLICATIVE_IN_BASE
+import Control.Applicative
+#endif
+
+
+------------------------------------------------------------------------
+
+-- XXX Strict in buffer only. 
+data PairS a = PairS a {-# UNPACK #-}!Builder
+
+sndS :: PairS a -> Builder
+sndS (PairS _ b) = b
+
+-- | The PutM type. A Writer monad over the efficient Builder monoid.
+newtype PutM a = Put { unPut :: PairS a }
+
+-- | Put merely lifts Builder into a Writer monad, applied to ().
+type Put = PutM ()
+
+instance Functor PutM where
+        fmap f m = Put $ let PairS a w = unPut m in PairS (f a) w
+        {-# INLINE fmap #-}
+
+#ifdef APPLICATIVE_IN_BASE
+instance Applicative PutM where
+        pure    = return
+        m <*> k = Put $
+            let PairS f w  = unPut m
+                PairS x w' = unPut k
+            in PairS (f x) (w `mappend` w')
+#endif
+
+-- Standard Writer monad, with aggressive inlining
+instance Monad PutM where
+    return a = Put $ PairS a mempty
+    {-# INLINE return #-}
+
+    m >>= k  = Put $
+        let PairS a w  = unPut m
+            PairS b w' = unPut (k a)
+        in PairS b (w `mappend` w')
+    {-# INLINE (>>=) #-}
+
+    m >> k  = Put $
+        let PairS _ w  = unPut m
+            PairS b w' = unPut k
+        in PairS b (w `mappend` w')
+    {-# INLINE (>>) #-}
+
+tell :: Builder -> Put
+tell b = Put $ PairS () b
+{-# INLINE tell #-}
+
+putBuilder :: Builder -> Put
+putBuilder = tell
+{-# INLINE putBuilder #-}
+
+-- | Run the 'Put' monad
+execPut :: PutM a -> Builder
+execPut = sndS . unPut
+{-# INLINE execPut #-}
+
+-- | Run the 'Put' monad with a serialiser
+runPut :: Put -> L.ByteString
+runPut = toLazyByteString . sndS . unPut
+{-# INLINE runPut #-}
+
+-- | Run the 'Put' monad with a serialiser and get its result
+runPutM :: PutM a -> (a, L.ByteString)
+runPutM (Put (PairS f s)) = (f, toLazyByteString s)
+{-# INLINE runPutM #-}
+
+------------------------------------------------------------------------
+
+-- | Pop the ByteString we have constructed so far, if any, yielding a
+-- new chunk in the result ByteString.
+flush               :: Put
+flush               = tell B.flush
+{-# INLINE flush #-}
+
+-- | Efficiently write a byte into the output buffer
+putWord8            :: Word8 -> Put
+putWord8            = tell . B.singleton
+{-# INLINE putWord8 #-}
+
+-- | An efficient primitive to write a strict ByteString into the output buffer.
+-- It flushes the current buffer, and writes the argument into a new chunk.
+putByteString       :: S.ByteString -> Put
+putByteString       = tell . B.fromByteString
+{-# INLINE putByteString #-}
+
+-- | Write a lazy ByteString efficiently, simply appending the lazy
+-- ByteString chunks to the output buffer
+putLazyByteString   :: L.ByteString -> Put
+putLazyByteString   = tell . B.fromLazyByteString
+{-# INLINE putLazyByteString #-}
+
+-- | Write a Word16 in big endian format
+putWord16be         :: Word16 -> Put
+putWord16be         = tell . B.putWord16be
+{-# INLINE putWord16be #-}
+
+-- | Write a Word16 in little endian format
+putWord16le         :: Word16 -> Put
+putWord16le         = tell . B.putWord16le
+{-# INLINE putWord16le #-}
+
+-- | Write a Word32 in big endian format
+putWord32be         :: Word32 -> Put
+putWord32be         = tell . B.putWord32be
+{-# INLINE putWord32be #-}
+
+-- | Write a Word32 in little endian format
+putWord32le         :: Word32 -> Put
+putWord32le         = tell . B.putWord32le
+{-# INLINE putWord32le #-}
+
+-- | Write a Word64 in big endian format
+putWord64be         :: Word64 -> Put
+putWord64be         = tell . B.putWord64be
+{-# INLINE putWord64be #-}
+
+-- | Write a Word64 in little endian format
+putWord64le         :: Word64 -> Put
+putWord64le         = tell . B.putWord64le
+{-# INLINE putWord64le #-}
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ Write a single native machine word. The word is
+-- written in host order, host endian form, for the machine you're on.
+-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
+-- 4 bytes. Values written this way are not portable to
+-- different endian or word sized machines, without conversion.
+--
+putWordhost         :: Word -> Put
+putWordhost         = tell . B.putWordhost
+{-# INLINE putWordhost #-}
+
+-- | /O(1)./ Write a Word16 in native host order and host endianness.
+-- For portability issues see @putWordhost@.
+putWord16host       :: Word16 -> Put
+putWord16host       = tell . B.putWord16host
+{-# INLINE putWord16host #-}
+
+-- | /O(1)./ Write a Word32 in native host order and host endianness.
+-- For portability issues see @putWordhost@.
+putWord32host       :: Word32 -> Put
+putWord32host       = tell . B.putWord32host
+{-# INLINE putWord32host #-}
+
+-- | /O(1)./ Write a Word64 in native host order
+-- On a 32 bit machine we write two host order Word32s, in big endian form.
+-- For portability issues see @putWordhost@.
+putWord64host       :: Word64 -> Put
+putWord64host       = tell . B.putWord64host
+{-# INLINE putWord64host #-}