Add a cabal flag to use the standard binary package

The standard binary package has improved efficiency and error handling [1], so in the long run we should consider switching to it. At the moment, using it is possible but not recommended, since it results in incomatible PGF files. The modified modules from the binary package have been moved from src/runtime/haskell to src/binary. [1] http://lennartkolmodin.blogspot.se/2013/03/binary-07.html
2013-10-31 15:43:12 +00:00
parent 43554552aa
commit d7300ba9fb
7 changed files with 61 additions and 21 deletions
--- a/src/binary/Data/Binary/Builder.hs
+++ b/src/binary/Data/Binary/Builder.hs
@@ -0,0 +1,425 @@
+{-# LANGUAGE CPP, MagicHash #-}
+-- for unboxed shifts
+
+-----------------------------------------------------------------------------
+-- |
+-- Module      : Data.Binary.Builder
+-- Copyright   : Lennart Kolmodin, Ross Paterson
+-- License     : BSD3-style (see LICENSE)
+-- 
+-- Maintainer  : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
+-- Stability   : experimental
+-- Portability : portable to Hugs and GHC
+--
+-- Efficient construction of lazy bytestrings.
+--
+-----------------------------------------------------------------------------
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+#include "MachDeps.h"
+#endif
+
+module Data.Binary.Builder (
+
+    -- * The Builder type
+      Builder
+    , toLazyByteString
+
+    -- * Constructing Builders
+    , empty
+    , singleton
+    , append
+    , fromByteString        -- :: S.ByteString -> Builder
+    , fromLazyByteString    -- :: L.ByteString -> Builder
+
+    -- * Flushing the buffer state
+    , flush
+
+    -- * Derived Builders
+    -- ** Big-endian writes
+    , putWord16be           -- :: Word16 -> Builder
+    , putWord32be           -- :: Word32 -> Builder
+    , putWord64be           -- :: Word64 -> Builder
+
+    -- ** Little-endian writes
+    , putWord16le           -- :: Word16 -> Builder
+    , putWord32le           -- :: Word32 -> Builder
+    , putWord64le           -- :: Word64 -> Builder
+
+    -- ** Host-endian, unaligned writes
+    , putWordhost           -- :: Word -> Builder
+    , putWord16host         -- :: Word16 -> Builder
+    , putWord32host         -- :: Word32 -> Builder
+    , putWord64host         -- :: Word64 -> Builder
+
+  ) where
+
+import Foreign
+import Data.Monoid
+import Data.Word
+import qualified Data.ByteString      as S
+import qualified Data.ByteString.Lazy as L
+
+#ifdef BYTESTRING_IN_BASE
+import Data.ByteString.Base (inlinePerformIO)
+import qualified Data.ByteString.Base as S
+#else
+import Data.ByteString.Internal (inlinePerformIO)
+import qualified Data.ByteString.Internal as S
+import qualified Data.ByteString.Lazy.Internal as L
+#endif
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+import GHC.Base
+import GHC.Word (Word32(..),Word16(..),Word64(..))
+
+#if WORD_SIZE_IN_BITS < 64 && __GLASGOW_HASKELL__ >= 608
+import GHC.Word (uncheckedShiftRL64#)
+#endif
+#endif
+
+------------------------------------------------------------------------
+
+-- | A 'Builder' is an efficient way to build lazy 'L.ByteString's.
+-- There are several functions for constructing 'Builder's, but only one
+-- to inspect them: to extract any data, you have to turn them into lazy
+-- 'L.ByteString's using 'toLazyByteString'.
+--
+-- Internally, a 'Builder' constructs a lazy 'L.Bytestring' by filling byte
+-- arrays piece by piece.  As each buffer is filled, it is \'popped\'
+-- off, to become a new chunk of the resulting lazy 'L.ByteString'.
+-- All this is hidden from the user of the 'Builder'.
+
+newtype Builder = Builder {
+        -- Invariant (from Data.ByteString.Lazy):
+        --      The lists include no null ByteStrings.
+        runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString]
+    }
+
+instance Monoid Builder where
+    mempty  = empty
+    {-# INLINE mempty #-}
+    mappend = append
+    {-# INLINE mappend #-}
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ The empty Builder, satisfying
+--
+--  * @'toLazyByteString' 'empty' = 'L.empty'@
+--
+empty :: Builder
+empty = Builder id
+{-# INLINE empty #-}
+
+-- | /O(1)./ A Builder taking a single byte, satisfying
+--
+--  * @'toLazyByteString' ('singleton' b) = 'L.singleton' b@
+--
+singleton :: Word8 -> Builder
+singleton = writeN 1 . flip poke
+{-# INLINE singleton #-}
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ The concatenation of two Builders, an associative operation
+-- with identity 'empty', satisfying
+--
+--  * @'toLazyByteString' ('append' x y) = 'L.append' ('toLazyByteString' x) ('toLazyByteString' y)@
+--
+append :: Builder -> Builder -> Builder
+append (Builder f) (Builder g) = Builder (f . g)
+{-# INLINE append #-}
+
+-- | /O(1)./ A Builder taking a 'S.ByteString', satisfying
+--
+--  * @'toLazyByteString' ('fromByteString' bs) = 'L.fromChunks' [bs]@
+--
+fromByteString :: S.ByteString -> Builder
+fromByteString bs
+  | S.null bs = empty
+  | otherwise = flush `append` mapBuilder (bs :)
+{-# INLINE fromByteString #-}
+
+-- | /O(1)./ A Builder taking a lazy 'L.ByteString', satisfying
+--
+--  * @'toLazyByteString' ('fromLazyByteString' bs) = bs@
+--
+fromLazyByteString :: L.ByteString -> Builder
+fromLazyByteString bss = flush `append` mapBuilder (L.toChunks bss ++)
+{-# INLINE fromLazyByteString #-}
+
+------------------------------------------------------------------------
+
+-- Our internal buffer type
+data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8)
+                     {-# UNPACK #-} !Int                -- offset
+                     {-# UNPACK #-} !Int                -- used bytes
+                     {-# UNPACK #-} !Int                -- length left
+
+------------------------------------------------------------------------
+
+-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'.
+-- The construction work takes place if and when the relevant part of
+-- the lazy 'L.ByteString' is demanded.
+--
+toLazyByteString :: Builder -> L.ByteString
+toLazyByteString m = L.fromChunks $ unsafePerformIO $ do
+    buf <- newBuffer defaultSize
+    return (runBuilder (m `append` flush) (const []) buf)
+
+-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any,
+-- yielding a new chunk in the result lazy 'L.ByteString'.
+flush :: Builder
+flush = Builder $ \ k buf@(Buffer p o u l) ->
+    if u == 0
+      then k buf
+      else S.PS p o u : k (Buffer p (o+u) 0 l)
+
+------------------------------------------------------------------------
+
+--
+-- copied from Data.ByteString.Lazy
+--
+defaultSize :: Int
+defaultSize = 32 * k - overhead
+    where k = 1024
+          overhead = 2 * sizeOf (undefined :: Int)
+
+------------------------------------------------------------------------
+
+-- | Sequence an IO operation on the buffer
+unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder
+unsafeLiftIO f =  Builder $ \ k buf -> inlinePerformIO $ do
+    buf' <- f buf
+    return (k buf')
+{-# INLINE unsafeLiftIO #-}
+
+-- | Get the size of the buffer
+withSize :: (Int -> Builder) -> Builder
+withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
+    runBuilder (f l) k buf
+
+-- | Map the resulting list of bytestrings.
+mapBuilder :: ([S.ByteString] -> [S.ByteString]) -> Builder
+mapBuilder f = Builder (f .)
+
+------------------------------------------------------------------------
+
+-- | Ensure that there are at least @n@ many bytes available.
+ensureFree :: Int -> Builder
+ensureFree n = n `seq` withSize $ \ l ->
+    if n <= l then empty else
+        flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize)))
+{-# INLINE ensureFree #-}
+
+-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
+-- bytes into the memory.
+writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder
+writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f)
+{-# INLINE writeN #-}
+
+writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer
+writeNBuffer n f (Buffer fp o u l) = do
+    withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
+    return (Buffer fp o (u+n) (l-n))
+{-# INLINE writeNBuffer #-}
+
+newBuffer :: Int -> IO Buffer
+newBuffer size = do
+    fp <- S.mallocByteString size
+    return $! Buffer fp 0 0 size
+{-# INLINE newBuffer #-}
+
+------------------------------------------------------------------------
+-- Aligned, host order writes of storable values
+
+-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
+-- storable values into the memory.
+writeNbytes :: Storable a => Int -> (Ptr a -> IO ()) -> Builder
+writeNbytes n f = ensureFree n `append` unsafeLiftIO (writeNBufferBytes n f)
+{-# INLINE writeNbytes #-}
+
+writeNBufferBytes :: Storable a => Int -> (Ptr a -> IO ()) -> Buffer -> IO Buffer
+writeNBufferBytes n f (Buffer fp o u l) = do
+    withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
+    return (Buffer fp o (u+n) (l-n))
+{-# INLINE writeNBufferBytes #-}
+
+------------------------------------------------------------------------
+
+--
+-- We rely on the fromIntegral to do the right masking for us.
+-- The inlining here is critical, and can be worth 4x performance
+--
+
+-- | Write a Word16 in big endian format
+putWord16be :: Word16 -> Builder
+putWord16be w = writeN 2 $ \p -> do
+    poke p               (fromIntegral (shiftr_w16 w 8) :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (w)              :: Word8)
+{-# INLINE putWord16be #-}
+
+-- | Write a Word16 in little endian format
+putWord16le :: Word16 -> Builder
+putWord16le w = writeN 2 $ \p -> do
+    poke p               (fromIntegral (w)              :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (shiftr_w16 w 8) :: Word8)
+{-# INLINE putWord16le #-}
+
+-- putWord16le w16 = writeN 2 (\p -> poke (castPtr p) w16)
+
+-- | Write a Word32 in big endian format
+putWord32be :: Word32 -> Builder
+putWord32be w = writeN 4 $ \p -> do
+    poke p               (fromIntegral (shiftr_w32 w 24) :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 16) :: Word8)
+    poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w  8) :: Word8)
+    poke (p `plusPtr` 3) (fromIntegral (w)               :: Word8)
+{-# INLINE putWord32be #-}
+
+--
+-- a data type to tag Put/Check. writes construct these which are then
+-- inlined and flattened. matching Checks will be more robust with rules.
+--
+
+-- | Write a Word32 in little endian format
+putWord32le :: Word32 -> Builder
+putWord32le w = writeN 4 $ \p -> do
+    poke p               (fromIntegral (w)               :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w  8) :: Word8)
+    poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 16) :: Word8)
+    poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 w 24) :: Word8)
+{-# INLINE putWord32le #-}
+
+-- on a little endian machine:
+-- putWord32le w32 = writeN 4 (\p -> poke (castPtr p) w32)
+
+-- | Write a Word64 in big endian format
+putWord64be :: Word64 -> Builder
+#if WORD_SIZE_IN_BITS < 64
+--
+-- To avoid expensive 64 bit shifts on 32 bit machines, we cast to
+-- Word32, and write that
+--
+putWord64be w =
+    let a = fromIntegral (shiftr_w64 w 32) :: Word32
+        b = fromIntegral w                 :: Word32
+    in writeN 8 $ \p -> do
+    poke p               (fromIntegral (shiftr_w32 a 24) :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 16) :: Word8)
+    poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a  8) :: Word8)
+    poke (p `plusPtr` 3) (fromIntegral (a)               :: Word8)
+    poke (p `plusPtr` 4) (fromIntegral (shiftr_w32 b 24) :: Word8)
+    poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 16) :: Word8)
+    poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b  8) :: Word8)
+    poke (p `plusPtr` 7) (fromIntegral (b)               :: Word8)
+#else
+putWord64be w = writeN 8 $ \p -> do
+    poke p               (fromIntegral (shiftr_w64 w 56) :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 48) :: Word8)
+    poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 40) :: Word8)
+    poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 32) :: Word8)
+    poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 24) :: Word8)
+    poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 16) :: Word8)
+    poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w  8) :: Word8)
+    poke (p `plusPtr` 7) (fromIntegral (w)               :: Word8)
+#endif
+{-# INLINE putWord64be #-}
+
+-- | Write a Word64 in little endian format
+putWord64le :: Word64 -> Builder
+
+#if WORD_SIZE_IN_BITS < 64
+putWord64le w =
+    let b = fromIntegral (shiftr_w64 w 32) :: Word32
+        a = fromIntegral w                 :: Word32
+    in writeN 8 $ \p -> do
+    poke (p)             (fromIntegral (a)               :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a  8) :: Word8)
+    poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 16) :: Word8)
+    poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 a 24) :: Word8)
+    poke (p `plusPtr` 4) (fromIntegral (b)               :: Word8)
+    poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b  8) :: Word8)
+    poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 16) :: Word8)
+    poke (p `plusPtr` 7) (fromIntegral (shiftr_w32 b 24) :: Word8)
+#else
+putWord64le w = writeN 8 $ \p -> do
+    poke p               (fromIntegral (w)               :: Word8)
+    poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w  8) :: Word8)
+    poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 16) :: Word8)
+    poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 24) :: Word8)
+    poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 32) :: Word8)
+    poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 40) :: Word8)
+    poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 48) :: Word8)
+    poke (p `plusPtr` 7) (fromIntegral (shiftr_w64 w 56) :: Word8)
+#endif
+{-# INLINE putWord64le #-}
+
+-- on a little endian machine:
+-- putWord64le w64 = writeN 8 (\p -> poke (castPtr p) w64)
+
+------------------------------------------------------------------------
+-- Unaligned, word size ops
+
+-- | /O(1)./ A Builder taking a single native machine word. The word is
+-- written in host order, host endian form, for the machine you're on.
+-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
+-- 4 bytes. Values written this way are not portable to
+-- different endian or word sized machines, without conversion.
+--
+putWordhost :: Word -> Builder
+putWordhost w = writeNbytes (sizeOf (undefined :: Word)) (\p -> poke p w)
+{-# INLINE putWordhost #-}
+
+-- | Write a Word16 in native host order and host endianness.
+-- 2 bytes will be written, unaligned.
+putWord16host :: Word16 -> Builder
+putWord16host w16 = writeNbytes (sizeOf (undefined :: Word16)) (\p -> poke p w16)
+{-# INLINE putWord16host #-}
+
+-- | Write a Word32 in native host order and host endianness.
+-- 4 bytes will be written, unaligned.
+putWord32host :: Word32 -> Builder
+putWord32host w32 = writeNbytes (sizeOf (undefined :: Word32)) (\p -> poke p w32)
+{-# INLINE putWord32host #-}
+
+-- | Write a Word64 in native host order.
+-- On a 32 bit machine we write two host order Word32s, in big endian form.
+-- 8 bytes will be written, unaligned.
+putWord64host :: Word64 -> Builder
+putWord64host w = writeNbytes (sizeOf (undefined :: Word64)) (\p -> poke p w)
+{-# INLINE putWord64host #-}
+
+------------------------------------------------------------------------
+-- Unchecked shifts
+
+{-# INLINE shiftr_w16 #-}
+shiftr_w16 :: Word16 -> Int -> Word16
+{-# INLINE shiftr_w32 #-}
+shiftr_w32 :: Word32 -> Int -> Word32
+{-# INLINE shiftr_w64 #-}
+shiftr_w64 :: Word64 -> Int -> Word64
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+shiftr_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftRL#`   i)
+shiftr_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftRL#`   i)
+
+#if WORD_SIZE_IN_BITS < 64
+shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL64#` i)
+
+#if __GLASGOW_HASKELL__ <= 606
+-- Exported by GHC.Word in GHC 6.8 and higher
+foreign import ccall unsafe "stg_uncheckedShiftRL64"
+    uncheckedShiftRL64#     :: Word64# -> Int# -> Word64#
+#endif
+
+#else
+shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL#` i)
+#endif
+
+#else
+shiftr_w16 = shiftR
+shiftr_w32 = shiftR
+shiftr_w64 = shiftR
+#endif
--- a/src/binary/Data/Binary/Get.hs
+++ b/src/binary/Data/Binary/Get.hs
@@ -0,0 +1,543 @@
+{-# LANGUAGE CPP, MagicHash #-}
+-- for unboxed shifts
+
+-----------------------------------------------------------------------------
+-- |
+-- Module      : Data.Binary.Get
+-- Copyright   : Lennart Kolmodin
+-- License     : BSD3-style (see LICENSE)
+-- 
+-- Maintainer  : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
+-- Stability   : experimental
+-- Portability : portable to Hugs and GHC.
+--
+-- The Get monad. A monad for efficiently building structures from
+-- encoded lazy ByteStrings
+--
+-----------------------------------------------------------------------------
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+#include "MachDeps.h"
+#endif
+
+module Data.Binary.Get (
+
+    -- * The Get type
+      Get
+    , runGet
+    , runGetState
+
+    -- * Parsing
+    , skip
+    , uncheckedSkip
+    , lookAhead
+    , lookAheadM
+    , lookAheadE
+    , uncheckedLookAhead
+
+    -- * Utility
+    , bytesRead
+    , getBytes
+    , remaining
+    , isEmpty
+
+    -- * Parsing particular types
+    , getWord8
+
+    -- ** ByteStrings
+    , getByteString
+    , getLazyByteString
+    , getLazyByteStringNul
+    , getRemainingLazyByteString
+
+    -- ** Big-endian reads
+    , getWord16be
+    , getWord32be
+    , getWord64be
+
+    -- ** Little-endian reads
+    , getWord16le
+    , getWord32le
+    , getWord64le
+
+    -- ** Host-endian, unaligned reads
+    , getWordhost
+    , getWord16host
+    , getWord32host
+    , getWord64host
+
+  ) where
+
+import Control.Monad (when,liftM,ap)
+import Control.Monad.Fix
+import Data.Maybe (isNothing)
+
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as L
+
+#ifdef BYTESTRING_IN_BASE
+import qualified Data.ByteString.Base as B
+#else
+import qualified Data.ByteString.Internal as B
+import qualified Data.ByteString.Lazy.Internal as L
+#endif
+
+#ifdef APPLICATIVE_IN_BASE
+import Control.Applicative (Applicative(..))
+#endif
+
+import Foreign
+
+-- used by splitAtST
+import Control.Monad.ST
+import Data.STRef
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+import GHC.Base
+import GHC.Word
+import GHC.Int
+#endif
+
+-- | The parse state
+data S = S {-# UNPACK #-} !B.ByteString  -- current chunk
+           L.ByteString                  -- the rest of the input
+           {-# UNPACK #-} !Int64         -- bytes read
+
+-- | The Get monad is just a State monad carrying around the input ByteString
+newtype Get a = Get { unGet :: S -> (a, S) }
+
+instance Functor Get where
+    fmap f m = Get (\s -> case unGet m s of
+                            (a, s') -> (f a, s'))
+    {-# INLINE fmap #-}
+
+#ifdef APPLICATIVE_IN_BASE
+instance Applicative Get where
+    pure  = return
+    (<*>) = ap
+#endif
+
+instance Monad Get where
+    return a  = Get (\s -> (a, s))
+    {-# INLINE return #-}
+
+    m >>= k   = Get (\s -> case unGet m s of
+                             (a, s') -> unGet (k a) s')
+    {-# INLINE (>>=) #-}
+
+    fail      = failDesc
+
+instance MonadFix Get where
+    mfix f = Get (\s -> let (a,s') = unGet (f a) s 
+                        in (a,s'))
+
+------------------------------------------------------------------------
+
+get :: Get S
+get   = Get (\s -> (s, s))
+
+put :: S -> Get ()
+put s = Get (\_ -> ((), s))
+
+------------------------------------------------------------------------
+--
+-- dons, GHC 6.10: explicit inlining disabled, was killing performance.
+-- Without it, GHC seems to do just fine. And we get similar
+-- performance with 6.8.2 anyway.
+--
+
+initState :: L.ByteString -> S
+initState xs = mkState xs 0
+{- INLINE initState -}
+
+{-
+initState (B.LPS xs) =
+    case xs of
+      []      -> S B.empty L.empty 0
+      (x:xs') -> S x (B.LPS xs') 0
+-}
+
+#ifndef BYTESTRING_IN_BASE
+mkState :: L.ByteString -> Int64 -> S
+mkState l = case l of
+    L.Empty      -> S B.empty L.empty
+    L.Chunk x xs -> S x xs
+{- INLINE mkState -}
+
+#else
+mkState :: L.ByteString -> Int64 -> S
+mkState (B.LPS xs) =
+    case xs of
+        [] -> S B.empty L.empty
+        (x:xs') -> S x (B.LPS xs')
+#endif
+
+-- | Run the Get monad applies a 'get'-based parser on the input ByteString
+runGet :: Get a -> L.ByteString -> a
+runGet m str = case unGet m (initState str) of (a, _) -> a
+
+-- | Run the Get monad applies a 'get'-based parser on the input
+-- ByteString. Additional to the result of get it returns the number of
+-- consumed bytes and the rest of the input.
+runGetState :: Get a -> L.ByteString -> Int64 -> (a, L.ByteString, Int64)
+runGetState m str off =
+    case unGet m (mkState str off) of
+      (a, ~(S s ss newOff)) -> (a, s `join` ss, newOff)
+
+------------------------------------------------------------------------
+
+failDesc :: String -> Get a
+failDesc err = do
+    S _ _ bytes <- get
+    Get (error (err ++ ". Failed reading at byte position " ++ show bytes))
+
+-- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available.
+skip :: Int -> Get ()
+skip n = readN (fromIntegral n) (const ())
+
+-- | Skip ahead @n@ bytes. No error if there isn't enough bytes.
+uncheckedSkip :: Int64 -> Get ()
+uncheckedSkip n = do
+    S s ss bytes <- get
+    if fromIntegral (B.length s) >= n
+      then put (S (B.drop (fromIntegral n) s) ss (bytes + n))
+      else do
+        let rest = L.drop (n - fromIntegral (B.length s)) ss
+        put $! mkState rest (bytes + n)
+
+-- | Run @ga@, but return without consuming its input.
+-- Fails if @ga@ fails.
+lookAhead :: Get a -> Get a
+lookAhead ga = do
+    s <- get
+    a <- ga
+    put s
+    return a
+
+-- | Like 'lookAhead', but consume the input if @gma@ returns 'Just _'.
+-- Fails if @gma@ fails.
+lookAheadM :: Get (Maybe a) -> Get (Maybe a)
+lookAheadM gma = do
+    s <- get
+    ma <- gma
+    when (isNothing ma) $
+        put s
+    return ma
+
+-- | Like 'lookAhead', but consume the input if @gea@ returns 'Right _'.
+-- Fails if @gea@ fails.
+lookAheadE :: Get (Either a b) -> Get (Either a b)
+lookAheadE gea = do
+    s <- get
+    ea <- gea
+    case ea of
+        Left _ -> put s
+        _      -> return ()
+    return ea
+
+-- | Get the next up to @n@ bytes as a lazy ByteString, without consuming them. 
+uncheckedLookAhead :: Int64 -> Get L.ByteString
+uncheckedLookAhead n = do
+    S s ss _ <- get
+    if n <= fromIntegral (B.length s)
+        then return (L.fromChunks [B.take (fromIntegral n) s])
+        else return $ L.take n (s `join` ss)
+
+------------------------------------------------------------------------
+-- Utility
+
+-- | Get the total number of bytes read to this point.
+bytesRead :: Get Int64
+bytesRead = do
+    S _ _ b <- get
+    return b
+
+-- | Get the number of remaining unparsed bytes.
+-- Useful for checking whether all input has been consumed.
+-- Note that this forces the rest of the input.
+remaining :: Get Int64
+remaining = do
+    S s ss _ <- get
+    return (fromIntegral (B.length s) + L.length ss)
+
+-- | Test whether all input has been consumed,
+-- i.e. there are no remaining unparsed bytes.
+isEmpty :: Get Bool
+isEmpty = do
+    S s ss _ <- get
+    return (B.null s && L.null ss)
+
+------------------------------------------------------------------------
+-- Utility with ByteStrings
+
+-- | An efficient 'get' method for strict ByteStrings. Fails if fewer
+-- than @n@ bytes are left in the input.
+getByteString :: Int -> Get B.ByteString
+getByteString n = readN n id
+{-# INLINE getByteString #-}
+
+-- | An efficient 'get' method for lazy ByteStrings. Does not fail if fewer than
+-- @n@ bytes are left in the input.
+getLazyByteString :: Int64 -> Get L.ByteString
+getLazyByteString n = do
+    S s ss bytes <- get
+    let big = s `join` ss
+    case splitAtST n big of
+      (consume, rest) -> do put $ mkState rest (bytes + n)
+                            return consume
+{-# INLINE getLazyByteString #-}
+
+-- | Get a lazy ByteString that is terminated with a NUL byte. Fails
+-- if it reaches the end of input without hitting a NUL.
+getLazyByteStringNul :: Get L.ByteString
+getLazyByteStringNul = do
+    S s ss bytes <- get
+    let big = s `join` ss
+        (consume, t) = L.break (== 0) big
+        (h, rest) = L.splitAt 1 t
+    if L.null h
+      then fail "too few bytes"
+      else do
+        put $ mkState rest (bytes + L.length consume + 1)
+        return consume
+{-# INLINE getLazyByteStringNul #-}
+
+-- | Get the remaining bytes as a lazy ByteString
+getRemainingLazyByteString :: Get L.ByteString
+getRemainingLazyByteString = do
+    S s ss _ <- get
+    return (s `join` ss)
+
+------------------------------------------------------------------------
+-- Helpers
+
+-- | Pull @n@ bytes from the input, as a strict ByteString.
+getBytes :: Int -> Get B.ByteString
+getBytes n = do
+    S s ss bytes <- get
+    if n <= B.length s
+        then do let (consume,rest) = B.splitAt n s
+                put $! S rest ss (bytes + fromIntegral n)
+                return $! consume
+        else
+              case L.splitAt (fromIntegral n) (s `join` ss) of
+                (consuming, rest) ->
+                    do let now = B.concat . L.toChunks $ consuming
+                       put $! mkState rest (bytes + fromIntegral n)
+                       -- forces the next chunk before this one is returned
+                       if (B.length now < n)
+                         then
+                            fail "too few bytes"
+                         else
+                            return now
+{- INLINE getBytes -}
+-- ^ important
+
+#ifndef BYTESTRING_IN_BASE
+join :: B.ByteString -> L.ByteString -> L.ByteString
+join bb lb
+    | B.null bb = lb
+    | otherwise = L.Chunk bb lb
+
+#else
+join :: B.ByteString -> L.ByteString -> L.ByteString
+join bb (B.LPS lb)
+    | B.null bb = B.LPS lb
+    | otherwise = B.LPS (bb:lb)
+#endif
+    -- don't use L.append, it's strict in it's second argument :/
+{- INLINE join -}
+
+-- | Split a ByteString. If the first result is consumed before the --
+-- second, this runs in constant heap space.
+--
+-- You must force the returned tuple for that to work, e.g.
+-- 
+-- > case splitAtST n xs of
+-- >    (ys,zs) -> consume ys ... consume zs
+--
+splitAtST :: Int64 -> L.ByteString -> (L.ByteString, L.ByteString)
+splitAtST i ps | i <= 0 = (L.empty, ps)
+#ifndef BYTESTRING_IN_BASE
+splitAtST i ps          = runST (
+     do r  <- newSTRef undefined
+        xs <- first r i ps
+        ys <- unsafeInterleaveST (readSTRef r)
+        return (xs, ys))
+
+  where
+        first r 0 xs@(L.Chunk _ _) = writeSTRef r xs    >> return L.Empty
+        first r _ L.Empty          = writeSTRef r L.Empty >> return L.Empty
+
+        first r n (L.Chunk x xs)
+          | n < l     = do writeSTRef r (L.Chunk (B.drop (fromIntegral n) x) xs)
+                           return $ L.Chunk (B.take (fromIntegral n) x) L.Empty
+          | otherwise = do writeSTRef r (L.drop (n - l) xs)
+                           liftM (L.Chunk x) $ unsafeInterleaveST (first r (n - l) xs)
+
+         where l = fromIntegral (B.length x)
+#else
+splitAtST i (B.LPS ps)  = runST (
+     do r  <- newSTRef undefined
+        xs <- first r i ps
+        ys <- unsafeInterleaveST (readSTRef r)
+        return (B.LPS xs, B.LPS ys))
+
+  where first r 0 xs     = writeSTRef r xs >> return []
+        first r _ []     = writeSTRef r [] >> return []
+        first r n (x:xs)
+          | n < l     = do writeSTRef r (B.drop (fromIntegral n) x : xs)
+                           return [B.take (fromIntegral n) x]
+          | otherwise = do writeSTRef r (L.toChunks (L.drop (n - l) (B.LPS xs)))
+                           fmap (x:) $ unsafeInterleaveST (first r (n - l) xs)
+
+         where l = fromIntegral (B.length x)
+#endif
+{- INLINE splitAtST -}
+
+-- Pull n bytes from the input, and apply a parser to those bytes,
+-- yielding a value. If less than @n@ bytes are available, fail with an
+-- error. This wraps @getBytes@.
+readN :: Int -> (B.ByteString -> a) -> Get a
+readN n f = fmap f $ getBytes n
+{- INLINE readN -}
+-- ^ important
+
+------------------------------------------------------------------------
+-- Primtives
+
+-- helper, get a raw Ptr onto a strict ByteString copied out of the
+-- underlying lazy byteString. So many indirections from the raw parser
+-- state that my head hurts...
+
+getPtr :: Storable a => Int -> Get a
+getPtr n = do
+    (fp,o,_) <- readN n B.toForeignPtr
+    return . B.inlinePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
+{- INLINE getPtr -}
+
+------------------------------------------------------------------------
+
+-- | Read a Word8 from the monad state
+getWord8 :: Get Word8
+getWord8 = getPtr (sizeOf (undefined :: Word8))
+{- INLINE getWord8 -}
+
+-- | Read a Word16 in big endian format
+getWord16be :: Get Word16
+getWord16be = do
+    s <- readN 2 id
+    return $! (fromIntegral (s `B.index` 0) `shiftl_w16` 8) .|.
+              (fromIntegral (s `B.index` 1))
+{- INLINE getWord16be -}
+
+-- | Read a Word16 in little endian format
+getWord16le :: Get Word16
+getWord16le = do
+    s <- readN 2 id
+    return $! (fromIntegral (s `B.index` 1) `shiftl_w16` 8) .|.
+              (fromIntegral (s `B.index` 0) )
+{- INLINE getWord16le -}
+
+-- | Read a Word32 in big endian format
+getWord32be :: Get Word32
+getWord32be = do
+    s <- readN 4 id
+    return $! (fromIntegral (s `B.index` 0) `shiftl_w32` 24) .|.
+              (fromIntegral (s `B.index` 1) `shiftl_w32` 16) .|.
+              (fromIntegral (s `B.index` 2) `shiftl_w32`  8) .|.
+              (fromIntegral (s `B.index` 3) )
+{- INLINE getWord32be -}
+
+-- | Read a Word32 in little endian format
+getWord32le :: Get Word32
+getWord32le = do
+    s <- readN 4 id
+    return $! (fromIntegral (s `B.index` 3) `shiftl_w32` 24) .|.
+              (fromIntegral (s `B.index` 2) `shiftl_w32` 16) .|.
+              (fromIntegral (s `B.index` 1) `shiftl_w32`  8) .|.
+              (fromIntegral (s `B.index` 0) )
+{- INLINE getWord32le -}
+
+-- | Read a Word64 in big endian format
+getWord64be :: Get Word64
+getWord64be = do
+    s <- readN 8 id
+    return $! (fromIntegral (s `B.index` 0) `shiftl_w64` 56) .|.
+              (fromIntegral (s `B.index` 1) `shiftl_w64` 48) .|.
+              (fromIntegral (s `B.index` 2) `shiftl_w64` 40) .|.
+              (fromIntegral (s `B.index` 3) `shiftl_w64` 32) .|.
+              (fromIntegral (s `B.index` 4) `shiftl_w64` 24) .|.
+              (fromIntegral (s `B.index` 5) `shiftl_w64` 16) .|.
+              (fromIntegral (s `B.index` 6) `shiftl_w64`  8) .|.
+              (fromIntegral (s `B.index` 7) )
+{- INLINE getWord64be -}
+
+-- | Read a Word64 in little endian format
+getWord64le :: Get Word64
+getWord64le = do
+    s <- readN 8 id
+    return $! (fromIntegral (s `B.index` 7) `shiftl_w64` 56) .|.
+              (fromIntegral (s `B.index` 6) `shiftl_w64` 48) .|.
+              (fromIntegral (s `B.index` 5) `shiftl_w64` 40) .|.
+              (fromIntegral (s `B.index` 4) `shiftl_w64` 32) .|.
+              (fromIntegral (s `B.index` 3) `shiftl_w64` 24) .|.
+              (fromIntegral (s `B.index` 2) `shiftl_w64` 16) .|.
+              (fromIntegral (s `B.index` 1) `shiftl_w64`  8) .|.
+              (fromIntegral (s `B.index` 0) )
+{- INLINE getWord64le -}
+
+------------------------------------------------------------------------
+-- Host-endian reads
+
+-- | /O(1)./ Read a single native machine word. The word is read in
+-- host order, host endian form, for the machine you're on. On a 64 bit
+-- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes.
+getWordhost :: Get Word
+getWordhost = getPtr (sizeOf (undefined :: Word))
+{- INLINE getWordhost -}
+
+-- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness.
+getWord16host :: Get Word16
+getWord16host = getPtr (sizeOf (undefined :: Word16))
+{- INLINE getWord16host -}
+
+-- | /O(1)./ Read a Word32 in native host order and host endianness.
+getWord32host :: Get Word32
+getWord32host = getPtr  (sizeOf (undefined :: Word32))
+{- INLINE getWord32host -}
+
+-- | /O(1)./ Read a Word64 in native host order and host endianess.
+getWord64host   :: Get Word64
+getWord64host = getPtr  (sizeOf (undefined :: Word64))
+{- INLINE getWord64host -}
+
+------------------------------------------------------------------------
+-- Unchecked shifts
+
+shiftl_w16 :: Word16 -> Int -> Word16
+shiftl_w32 :: Word32 -> Int -> Word32
+shiftl_w64 :: Word64 -> Int -> Word64
+
+#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
+shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#`   i)
+shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#`   i)
+
+#if WORD_SIZE_IN_BITS < 64
+shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
+
+#if __GLASGOW_HASKELL__ <= 606
+-- Exported by GHC.Word in GHC 6.8 and higher
+foreign import ccall unsafe "stg_uncheckedShiftL64"
+    uncheckedShiftL64#     :: Word64# -> Int# -> Word64#
+#endif
+
+#else
+shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i)
+#endif
+
+#else
+shiftl_w16 = shiftL
+shiftl_w32 = shiftL
+shiftl_w64 = shiftL
+#endif
--- a/src/binary/Data/Binary/IEEE754.lhs
+++ b/src/binary/Data/Binary/IEEE754.lhs
@@ -0,0 +1,402 @@
+% Copyright (C) 2009 John Millikin <jmillikin@gmail.com>
+% 
+% This program is free software: you can redistribute it and/or modify
+% it under the terms of the GNU General Public License as published by
+% the Free Software Foundation, either version 3 of the License, or
+% any later version.
+% 
+% This program is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+% GNU General Public License for more details.
+% 
+% You should have received a copy of the GNU General Public License
+% along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+\ignore{
+\begin{code}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+module Data.Binary.IEEE754 (
+	-- * Parsing
+	  getFloat16be, getFloat16le
+	, getFloat32be, getFloat32le
+	, getFloat64be, getFloat64le
+	
+	-- * Serializing
+	, putFloat32be, putFloat32le
+	, putFloat64be, putFloat64le
+) where
+
+import Data.Bits ((.&.), (.|.), shiftL, shiftR, Bits)
+import Data.Word (Word8)
+import Data.List (foldl')
+
+import qualified Data.ByteString as B
+import Data.Binary.Get (Get, getByteString)
+import Data.Binary.Put (Put, putByteString)
+\end{code}
+}
+
+\section{Parsing}
+
+\subsection{Public interface}
+
+\begin{code}
+getFloat16be :: Get Float
+getFloat16be = getFloat (ByteCount 2) splitBytes
+\end{code}
+
+\begin{code}
+getFloat16le :: Get Float
+getFloat16le = getFloat (ByteCount 2) $ splitBytes . reverse
+\end{code}
+
+\begin{code}
+getFloat32be :: Get Float
+getFloat32be = getFloat (ByteCount 4) splitBytes
+\end{code}
+
+\begin{code}
+getFloat32le :: Get Float
+getFloat32le = getFloat (ByteCount 4) $ splitBytes . reverse
+\end{code}
+
+\begin{code}
+getFloat64be :: Get Double
+getFloat64be = getFloat (ByteCount 8) splitBytes
+\end{code}
+
+\begin{code}
+getFloat64le :: Get Double
+getFloat64le = getFloat (ByteCount 8) $ splitBytes . reverse
+\end{code}
+
+\subsection{Implementation}
+
+Split the raw byte array into (sign, exponent, significand) components.
+The exponent and signifcand are drawn directly from the bits in the
+original float, and have not been unbiased or otherwise modified.
+
+\begin{code}
+splitBytes :: [Word8] -> RawFloat
+splitBytes bs = RawFloat width sign exp' sig expWidth sigWidth where
+	width = ByteCount (length bs)
+	nBits = bitsInWord8 bs
+	sign = if head bs .&. 0x80 == 0x80
+		then Negative
+		else Positive
+	
+	expStart = 1
+	expWidth = exponentWidth nBits
+	expEnd = expStart + expWidth
+	exp' = Exponent . fromIntegral $ bitSlice bs expStart expEnd
+	
+	sigWidth = nBits - expEnd
+	sig  = Significand $ bitSlice bs expEnd nBits
+\end{code}
+
+\subsubsection{Encodings and special values}
+
+The next step depends on the value of the exponent $e$, size of the
+exponent field in bits $w$, and value of the significand.
+
+\begin{table}[h]
+\begin{center}
+\begin{tabular}{lrl}
+\toprule
+Exponent                & Significand & Format \\
+\midrule
+$0$                     & $0$           & Zero \\
+$0$                     & $> 0$         & Denormalised \\
+$1 \leq e \leq 2^w - 2$ & \textit{any} & Normalised \\
+$2^w-1$                 & $0$           & Infinity \\
+$2^w-1$                 & $> 0$         & NaN \\
+\bottomrule
+\end{tabular}
+\end{center}
+\end{table}
+
+There's no built-in literals for Infinity or NaN, so they
+are constructed using the {\tt Read} instances for {\tt Double} and
+{\tt Float}.
+
+\begin{code}
+merge :: (Read a, RealFloat a) => RawFloat -> a
+merge f@(RawFloat _ _ e sig eWidth _)
+	| e == 0 = if sig == 0
+		then 0.0
+		else denormalised f
+	| e == eMax - 1 = if sig == 0
+		then read "Infinity"
+		else read "NaN"
+	| otherwise = normalised f
+	where eMax = 2 `pow` eWidth
+\end{code}
+
+If a value is normalised, its significand has an implied {\tt 1} bit
+in its most-significant bit. The significand must be adjusted by
+this value before being passed to {\tt encodeField}.
+
+\begin{code}
+normalised :: RealFloat a => RawFloat -> a
+normalised f = encodeFloat fraction exp' where
+	Significand sig = rawSignificand f
+	Exponent exp' = unbiased - sigWidth
+	
+	fraction = sig + (1 `bitShiftL` rawSignificandWidth f)
+	
+	sigWidth = fromIntegral $ rawSignificandWidth f
+	unbiased = unbias (rawExponent f) (rawExponentWidth f)
+\end{code}
+
+For denormalised values, the implied {\tt 1} bit is the least-significant
+bit of the exponent.
+
+\begin{code}
+denormalised :: RealFloat a => RawFloat -> a
+denormalised f = encodeFloat sig exp' where
+	Significand sig = rawSignificand f
+	Exponent exp' = unbiased - sigWidth + 1
+	
+	sigWidth = fromIntegral $ rawSignificandWidth f
+	unbiased = unbias (rawExponent f) (rawExponentWidth f)
+\end{code}
+
+By composing {\tt splitBytes} and {\tt merge}, the absolute value of the
+float is calculated. Before being returned to the calling function, it
+must be signed appropriately.
+
+\begin{code}
+getFloat :: (Read a, RealFloat a) => ByteCount
+            -> ([Word8] -> RawFloat) -> Get a
+getFloat (ByteCount width) parser = do
+	raw <- fmap (parser . B.unpack) $ getByteString width
+	let absFloat = merge raw
+	return $ case rawSign raw of
+		Positive ->  absFloat
+		Negative -> -absFloat
+\end{code}
+
+\section{Serialising}
+
+\subsection{Public interface}
+
+\begin{code}
+putFloat32be :: Float -> Put
+putFloat32be = putFloat (ByteCount 4) id
+\end{code}
+
+\begin{code}
+putFloat32le :: Float -> Put
+putFloat32le = putFloat (ByteCount 4) reverse
+\end{code}
+
+\begin{code}
+putFloat64be :: Double -> Put
+putFloat64be = putFloat (ByteCount 8) id
+\end{code}
+
+\begin{code}
+putFloat64le :: Double -> Put
+putFloat64le = putFloat (ByteCount 8) reverse
+\end{code}
+
+\subsection{Implementation}
+
+Serialisation is similar to parsing. First, the float is converted to
+a structure representing raw bitfields. The values returned from
+{\tt decodeFloat} are clamped to their expected lengths before being
+stored in the {\tt RawFloat}.
+
+\begin{code}
+splitFloat :: RealFloat a => ByteCount -> a -> RawFloat
+splitFloat width x = raw where
+	raw = RawFloat width sign clampedExp clampedSig expWidth sigWidth
+	sign = if isNegativeNaN x || isNegativeZero x || x < 0
+		then Negative
+		else Positive
+	clampedExp = clamp expWidth exp'
+	clampedSig = clamp sigWidth sig
+	(exp', sig) = case (dFraction, dExponent, biasedExp) of
+		(0, 0, _) -> (0, 0)
+		(_, _, 0) -> (0, Significand $ truncatedSig + 1)
+		_         -> (biasedExp, Significand truncatedSig)
+	expWidth = exponentWidth $ bitCount width
+	sigWidth = bitCount width - expWidth - 1 -- 1 for sign bit
+	
+	(dFraction, dExponent) = decodeFloat x
+	
+	rawExp = Exponent $ dExponent + fromIntegral sigWidth
+	biasedExp = bias rawExp expWidth
+	truncatedSig = abs dFraction - (1 `bitShiftL` sigWidth)
+\end{code}
+
+Then, the {\tt RawFloat} is converted to a list of bytes by mashing all
+the fields together into an {\tt Integer}, and chopping up that integer
+in 8-bit blocks.
+
+\begin{code}
+rawToBytes :: RawFloat -> [Word8]
+rawToBytes raw = integerToBytes mashed width where
+	RawFloat width sign exp' sig expWidth sigWidth = raw
+	sign' :: Word8
+	sign' = case sign of
+		Positive -> 0
+		Negative -> 1
+	mashed = mashBits sig sigWidth .
+	         mashBits exp' expWidth .
+	         mashBits sign' 1 $ 0
+\end{code}
+
+{\tt clamp}, given a maximum bit count and a value, will strip any 1-bits
+in positions above the count.
+
+\begin{code}
+clamp :: (Num a, Bits a) => BitCount -> a -> a
+clamp = (.&.) . mask where
+	mask 1 = 1
+	mask n | n > 1 = (mask (n - 1) `shiftL` 1) + 1
+	mask _ = undefined
+\end{code}
+
+For merging the fields, just shift the starting integer over a bit and
+then \textsc{or} it with the next value. The weird parameter order allows
+easy composition.
+
+\begin{code}
+mashBits :: (Bits a, Integral a) => a -> BitCount -> Integer -> Integer
+mashBits _ 0 x = x
+mashBits y n x = (x `bitShiftL` n) .|. fromIntegral y
+\end{code}
+
+Given an integer, read it in 255-block increments starting from the LSB.
+Each increment is converted to a byte and added to the final list.
+
+\begin{code}
+integerToBytes :: Integer -> ByteCount -> [Word8]
+integerToBytes _ 0 = []
+integerToBytes x n = bytes where
+	bytes = integerToBytes (x `shiftR` 8) (n - 1) ++ [step]
+	step = fromIntegral x .&. 0xFF
+\end{code}
+
+Finally, the raw parsing is wrapped up in {\tt Put}. The second parameter
+allows the same code paths to be used for little- and big-endian
+serialisation.
+
+\begin{code}
+putFloat :: (RealFloat a) => ByteCount -> ([Word8] -> [Word8]) -> a -> Put
+putFloat width f x = putByteString $ B.pack bytes where
+	bytes = f . rawToBytes . splitFloat width $ x
+\end{code}
+
+\section{Raw float components}
+
+Information about the raw bit patterns in the float is stored in
+{\tt RawFloat}, so they don't have to be passed around to the various
+format cases. The exponent should be biased, and the significand
+shouldn't have it's implied MSB (if applicable).
+
+\begin{code}
+data RawFloat = RawFloat
+	{ rawWidth            :: ByteCount
+	, rawSign             :: Sign
+	, rawExponent         :: Exponent
+	, rawSignificand      :: Significand
+	, rawExponentWidth    :: BitCount
+	, rawSignificandWidth :: BitCount
+	}
+	deriving (Show)
+\end{code}
+
+\section{Exponents}
+
+Calculate the proper size of the exponent field, in bits, given the
+size of the full structure.
+
+\begin{code}
+exponentWidth :: BitCount -> BitCount
+exponentWidth k
+	| k == 16         = 5
+	| k == 32         = 8
+	| k `mod` 32 == 0 = ceiling (4 * logBase 2 (fromIntegral k)) - 13
+	| otherwise       = error "Invalid length of floating-point value"
+\end{code}
+
+\begin{code}
+bias :: Exponent -> BitCount -> Exponent
+bias e eWidth = e - (1 - (2 `pow` (eWidth - 1)))
+\end{code}
+
+\begin{code}
+unbias :: Exponent -> BitCount -> Exponent
+unbias e eWidth = e + 1 - (2 `pow` (eWidth - 1))
+\end{code}
+
+\section{Byte and bit counting}
+
+\begin{code}
+data Sign = Positive | Negative
+	deriving (Show)
+
+newtype Exponent = Exponent Int
+	deriving (Show, Eq, Num, Ord, Real, Enum, Integral, Bits)
+
+newtype Significand = Significand Integer
+	deriving (Show, Eq, Num, Ord, Real, Enum, Integral, Bits)
+
+newtype BitCount = BitCount Int
+	deriving (Show, Eq, Num, Ord, Real, Enum, Integral)
+
+newtype ByteCount = ByteCount Int
+	deriving (Show, Eq, Num, Ord, Real, Enum, Integral)
+
+bitCount :: ByteCount -> BitCount
+bitCount (ByteCount x) = BitCount (x * 8)
+
+bitsInWord8 :: [Word8] -> BitCount
+bitsInWord8 = bitCount . ByteCount . length
+
+bitShiftL :: (Bits a) => a -> BitCount -> a
+bitShiftL x (BitCount n) = shiftL x n
+
+bitShiftR :: (Bits a) => a -> BitCount -> a
+bitShiftR x (BitCount n) = shiftR x n
+\end{code}
+
+\section{Utility}
+
+Considering a byte list as a sequence of bits, slice it from start
+inclusive to end exclusive, and return the resulting bit sequence as an
+integer.
+
+\begin{code}
+bitSlice :: [Word8] -> BitCount -> BitCount -> Integer
+bitSlice bs = sliceInt (foldl' step 0 bs) bitCount' where
+	step acc w     = shiftL acc 8 + fromIntegral w
+	bitCount'      = bitsInWord8 bs
+\end{code}
+
+Slice a single integer by start and end bit location
+
+\begin{code}
+sliceInt :: Integer -> BitCount -> BitCount -> BitCount -> Integer
+sliceInt x xBitCount s e = fromIntegral sliced where
+	sliced = (x .&. startMask) `bitShiftR` (xBitCount - e)
+	startMask = n1Bits (xBitCount - s)
+	n1Bits n  = (2 `pow` n) - 1
+\end{code}
+
+Integral version of {\tt (**)}
+
+\begin{code}
+pow :: (Integral a, Integral b, Integral c) => a -> b -> c
+pow b e = floor $ fromIntegral b ** fromIntegral e
+\end{code}
+
+Detect whether a float is {\tt $-$NaN}
+
+\begin{code}
+isNegativeNaN :: RealFloat a => a -> Bool
+isNegativeNaN x = isNaN x && (floor x > 0)
+\end{code}
--- a/src/binary/Data/Binary/Put.hs
+++ b/src/binary/Data/Binary/Put.hs
@@ -0,0 +1,216 @@
+{-# LANGUAGE CPP #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      : Data.Binary.Put
+-- Copyright   : Lennart Kolmodin
+-- License     : BSD3-style (see LICENSE)
+-- 
+-- Maintainer  : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
+-- Stability   : stable
+-- Portability : Portable to Hugs and GHC. Requires MPTCs
+--
+-- The Put monad. A monad for efficiently constructing lazy bytestrings.
+--
+-----------------------------------------------------------------------------
+
+module Data.Binary.Put (
+
+    -- * The Put type
+      Put
+    , PutM(..)
+    , runPut
+    , runPutM
+    , putBuilder
+    , execPut
+
+    -- * Flushing the implicit parse state
+    , flush
+
+    -- * Primitives
+    , putWord8
+    , putByteString
+    , putLazyByteString
+
+    -- * Big-endian primitives
+    , putWord16be
+    , putWord32be
+    , putWord64be
+
+    -- * Little-endian primitives
+    , putWord16le
+    , putWord32le
+    , putWord64le
+
+    -- * Host-endian, unaligned writes
+    , putWordhost           -- :: Word   -> Put
+    , putWord16host         -- :: Word16 -> Put
+    , putWord32host         -- :: Word32 -> Put
+    , putWord64host         -- :: Word64 -> Put
+
+  ) where
+
+import Data.Monoid
+import Data.Binary.Builder (Builder, toLazyByteString)
+import qualified Data.Binary.Builder as B
+
+import Data.Word
+import qualified Data.ByteString      as S
+import qualified Data.ByteString.Lazy as L
+
+#ifdef APPLICATIVE_IN_BASE
+import Control.Applicative
+#endif
+
+
+------------------------------------------------------------------------
+
+-- XXX Strict in buffer only. 
+data PairS a = PairS a {-# UNPACK #-}!Builder
+
+sndS :: PairS a -> Builder
+sndS (PairS _ b) = b
+
+-- | The PutM type. A Writer monad over the efficient Builder monoid.
+newtype PutM a = Put { unPut :: PairS a }
+
+-- | Put merely lifts Builder into a Writer monad, applied to ().
+type Put = PutM ()
+
+instance Functor PutM where
+        fmap f m = Put $ let PairS a w = unPut m in PairS (f a) w
+        {-# INLINE fmap #-}
+
+#ifdef APPLICATIVE_IN_BASE
+instance Applicative PutM where
+        pure    = return
+        m <*> k = Put $
+            let PairS f w  = unPut m
+                PairS x w' = unPut k
+            in PairS (f x) (w `mappend` w')
+#endif
+
+-- Standard Writer monad, with aggressive inlining
+instance Monad PutM where
+    return a = Put $ PairS a mempty
+    {-# INLINE return #-}
+
+    m >>= k  = Put $
+        let PairS a w  = unPut m
+            PairS b w' = unPut (k a)
+        in PairS b (w `mappend` w')
+    {-# INLINE (>>=) #-}
+
+    m >> k  = Put $
+        let PairS _ w  = unPut m
+            PairS b w' = unPut k
+        in PairS b (w `mappend` w')
+    {-# INLINE (>>) #-}
+
+tell :: Builder -> Put
+tell b = Put $ PairS () b
+{-# INLINE tell #-}
+
+putBuilder :: Builder -> Put
+putBuilder = tell
+{-# INLINE putBuilder #-}
+
+-- | Run the 'Put' monad
+execPut :: PutM a -> Builder
+execPut = sndS . unPut
+{-# INLINE execPut #-}
+
+-- | Run the 'Put' monad with a serialiser
+runPut :: Put -> L.ByteString
+runPut = toLazyByteString . sndS . unPut
+{-# INLINE runPut #-}
+
+-- | Run the 'Put' monad with a serialiser and get its result
+runPutM :: PutM a -> (a, L.ByteString)
+runPutM (Put (PairS f s)) = (f, toLazyByteString s)
+{-# INLINE runPutM #-}
+
+------------------------------------------------------------------------
+
+-- | Pop the ByteString we have constructed so far, if any, yielding a
+-- new chunk in the result ByteString.
+flush               :: Put
+flush               = tell B.flush
+{-# INLINE flush #-}
+
+-- | Efficiently write a byte into the output buffer
+putWord8            :: Word8 -> Put
+putWord8            = tell . B.singleton
+{-# INLINE putWord8 #-}
+
+-- | An efficient primitive to write a strict ByteString into the output buffer.
+-- It flushes the current buffer, and writes the argument into a new chunk.
+putByteString       :: S.ByteString -> Put
+putByteString       = tell . B.fromByteString
+{-# INLINE putByteString #-}
+
+-- | Write a lazy ByteString efficiently, simply appending the lazy
+-- ByteString chunks to the output buffer
+putLazyByteString   :: L.ByteString -> Put
+putLazyByteString   = tell . B.fromLazyByteString
+{-# INLINE putLazyByteString #-}
+
+-- | Write a Word16 in big endian format
+putWord16be         :: Word16 -> Put
+putWord16be         = tell . B.putWord16be
+{-# INLINE putWord16be #-}
+
+-- | Write a Word16 in little endian format
+putWord16le         :: Word16 -> Put
+putWord16le         = tell . B.putWord16le
+{-# INLINE putWord16le #-}
+
+-- | Write a Word32 in big endian format
+putWord32be         :: Word32 -> Put
+putWord32be         = tell . B.putWord32be
+{-# INLINE putWord32be #-}
+
+-- | Write a Word32 in little endian format
+putWord32le         :: Word32 -> Put
+putWord32le         = tell . B.putWord32le
+{-# INLINE putWord32le #-}
+
+-- | Write a Word64 in big endian format
+putWord64be         :: Word64 -> Put
+putWord64be         = tell . B.putWord64be
+{-# INLINE putWord64be #-}
+
+-- | Write a Word64 in little endian format
+putWord64le         :: Word64 -> Put
+putWord64le         = tell . B.putWord64le
+{-# INLINE putWord64le #-}
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ Write a single native machine word. The word is
+-- written in host order, host endian form, for the machine you're on.
+-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
+-- 4 bytes. Values written this way are not portable to
+-- different endian or word sized machines, without conversion.
+--
+putWordhost         :: Word -> Put
+putWordhost         = tell . B.putWordhost
+{-# INLINE putWordhost #-}
+
+-- | /O(1)./ Write a Word16 in native host order and host endianness.
+-- For portability issues see @putWordhost@.
+putWord16host       :: Word16 -> Put
+putWord16host       = tell . B.putWord16host
+{-# INLINE putWord16host #-}
+
+-- | /O(1)./ Write a Word32 in native host order and host endianness.
+-- For portability issues see @putWordhost@.
+putWord32host       :: Word32 -> Put
+putWord32host       = tell . B.putWord32host
+{-# INLINE putWord32host #-}
+
+-- | /O(1)./ Write a Word64 in native host order
+-- On a 32 bit machine we write two host order Word32s, in big endian form.
+-- For portability issues see @putWordhost@.
+putWord64host       :: Word64 -> Put
+putWord64host       = tell . B.putWord64host
+{-# INLINE putWord64host #-}