1
0
forked from GitHub/gf-core

Add a cabal flag to use the standard binary package

The standard binary package has improved efficiency and error handling [1], so
in the long run we should consider switching to it. At the moment, using it is
possible but not recommended, since it results in incomatible PGF files.

The modified modules from the binary package have been moved from
src/runtime/haskell to src/binary.

[1] http://lennartkolmodin.blogspot.se/2013/03/binary-07.html
This commit is contained in:
hallgren
2013-10-31 15:43:12 +00:00
parent 43554552aa
commit d7300ba9fb
7 changed files with 61 additions and 21 deletions

View File

@@ -0,0 +1,425 @@
{-# LANGUAGE CPP, MagicHash #-}
-- for unboxed shifts
-----------------------------------------------------------------------------
-- |
-- Module : Data.Binary.Builder
-- Copyright : Lennart Kolmodin, Ross Paterson
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
-- Stability : experimental
-- Portability : portable to Hugs and GHC
--
-- Efficient construction of lazy bytestrings.
--
-----------------------------------------------------------------------------
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
#include "MachDeps.h"
#endif
module Data.Binary.Builder (
-- * The Builder type
Builder
, toLazyByteString
-- * Constructing Builders
, empty
, singleton
, append
, fromByteString -- :: S.ByteString -> Builder
, fromLazyByteString -- :: L.ByteString -> Builder
-- * Flushing the buffer state
, flush
-- * Derived Builders
-- ** Big-endian writes
, putWord16be -- :: Word16 -> Builder
, putWord32be -- :: Word32 -> Builder
, putWord64be -- :: Word64 -> Builder
-- ** Little-endian writes
, putWord16le -- :: Word16 -> Builder
, putWord32le -- :: Word32 -> Builder
, putWord64le -- :: Word64 -> Builder
-- ** Host-endian, unaligned writes
, putWordhost -- :: Word -> Builder
, putWord16host -- :: Word16 -> Builder
, putWord32host -- :: Word32 -> Builder
, putWord64host -- :: Word64 -> Builder
) where
import Foreign
import Data.Monoid
import Data.Word
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
#ifdef BYTESTRING_IN_BASE
import Data.ByteString.Base (inlinePerformIO)
import qualified Data.ByteString.Base as S
#else
import Data.ByteString.Internal (inlinePerformIO)
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Lazy.Internal as L
#endif
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
import GHC.Base
import GHC.Word (Word32(..),Word16(..),Word64(..))
#if WORD_SIZE_IN_BITS < 64 && __GLASGOW_HASKELL__ >= 608
import GHC.Word (uncheckedShiftRL64#)
#endif
#endif
------------------------------------------------------------------------
-- | A 'Builder' is an efficient way to build lazy 'L.ByteString's.
-- There are several functions for constructing 'Builder's, but only one
-- to inspect them: to extract any data, you have to turn them into lazy
-- 'L.ByteString's using 'toLazyByteString'.
--
-- Internally, a 'Builder' constructs a lazy 'L.Bytestring' by filling byte
-- arrays piece by piece. As each buffer is filled, it is \'popped\'
-- off, to become a new chunk of the resulting lazy 'L.ByteString'.
-- All this is hidden from the user of the 'Builder'.
newtype Builder = Builder {
-- Invariant (from Data.ByteString.Lazy):
-- The lists include no null ByteStrings.
runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString]
}
instance Monoid Builder where
mempty = empty
{-# INLINE mempty #-}
mappend = append
{-# INLINE mappend #-}
------------------------------------------------------------------------
-- | /O(1)./ The empty Builder, satisfying
--
-- * @'toLazyByteString' 'empty' = 'L.empty'@
--
empty :: Builder
empty = Builder id
{-# INLINE empty #-}
-- | /O(1)./ A Builder taking a single byte, satisfying
--
-- * @'toLazyByteString' ('singleton' b) = 'L.singleton' b@
--
singleton :: Word8 -> Builder
singleton = writeN 1 . flip poke
{-# INLINE singleton #-}
------------------------------------------------------------------------
-- | /O(1)./ The concatenation of two Builders, an associative operation
-- with identity 'empty', satisfying
--
-- * @'toLazyByteString' ('append' x y) = 'L.append' ('toLazyByteString' x) ('toLazyByteString' y)@
--
append :: Builder -> Builder -> Builder
append (Builder f) (Builder g) = Builder (f . g)
{-# INLINE append #-}
-- | /O(1)./ A Builder taking a 'S.ByteString', satisfying
--
-- * @'toLazyByteString' ('fromByteString' bs) = 'L.fromChunks' [bs]@
--
fromByteString :: S.ByteString -> Builder
fromByteString bs
| S.null bs = empty
| otherwise = flush `append` mapBuilder (bs :)
{-# INLINE fromByteString #-}
-- | /O(1)./ A Builder taking a lazy 'L.ByteString', satisfying
--
-- * @'toLazyByteString' ('fromLazyByteString' bs) = bs@
--
fromLazyByteString :: L.ByteString -> Builder
fromLazyByteString bss = flush `append` mapBuilder (L.toChunks bss ++)
{-# INLINE fromLazyByteString #-}
------------------------------------------------------------------------
-- Our internal buffer type
data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8)
{-# UNPACK #-} !Int -- offset
{-# UNPACK #-} !Int -- used bytes
{-# UNPACK #-} !Int -- length left
------------------------------------------------------------------------
-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'.
-- The construction work takes place if and when the relevant part of
-- the lazy 'L.ByteString' is demanded.
--
toLazyByteString :: Builder -> L.ByteString
toLazyByteString m = L.fromChunks $ unsafePerformIO $ do
buf <- newBuffer defaultSize
return (runBuilder (m `append` flush) (const []) buf)
-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any,
-- yielding a new chunk in the result lazy 'L.ByteString'.
flush :: Builder
flush = Builder $ \ k buf@(Buffer p o u l) ->
if u == 0
then k buf
else S.PS p o u : k (Buffer p (o+u) 0 l)
------------------------------------------------------------------------
--
-- copied from Data.ByteString.Lazy
--
defaultSize :: Int
defaultSize = 32 * k - overhead
where k = 1024
overhead = 2 * sizeOf (undefined :: Int)
------------------------------------------------------------------------
-- | Sequence an IO operation on the buffer
unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder
unsafeLiftIO f = Builder $ \ k buf -> inlinePerformIO $ do
buf' <- f buf
return (k buf')
{-# INLINE unsafeLiftIO #-}
-- | Get the size of the buffer
withSize :: (Int -> Builder) -> Builder
withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
runBuilder (f l) k buf
-- | Map the resulting list of bytestrings.
mapBuilder :: ([S.ByteString] -> [S.ByteString]) -> Builder
mapBuilder f = Builder (f .)
------------------------------------------------------------------------
-- | Ensure that there are at least @n@ many bytes available.
ensureFree :: Int -> Builder
ensureFree n = n `seq` withSize $ \ l ->
if n <= l then empty else
flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize)))
{-# INLINE ensureFree #-}
-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
-- bytes into the memory.
writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder
writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f)
{-# INLINE writeN #-}
writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer
writeNBuffer n f (Buffer fp o u l) = do
withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
return (Buffer fp o (u+n) (l-n))
{-# INLINE writeNBuffer #-}
newBuffer :: Int -> IO Buffer
newBuffer size = do
fp <- S.mallocByteString size
return $! Buffer fp 0 0 size
{-# INLINE newBuffer #-}
------------------------------------------------------------------------
-- Aligned, host order writes of storable values
-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
-- storable values into the memory.
writeNbytes :: Storable a => Int -> (Ptr a -> IO ()) -> Builder
writeNbytes n f = ensureFree n `append` unsafeLiftIO (writeNBufferBytes n f)
{-# INLINE writeNbytes #-}
writeNBufferBytes :: Storable a => Int -> (Ptr a -> IO ()) -> Buffer -> IO Buffer
writeNBufferBytes n f (Buffer fp o u l) = do
withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
return (Buffer fp o (u+n) (l-n))
{-# INLINE writeNBufferBytes #-}
------------------------------------------------------------------------
--
-- We rely on the fromIntegral to do the right masking for us.
-- The inlining here is critical, and can be worth 4x performance
--
-- | Write a Word16 in big endian format
putWord16be :: Word16 -> Builder
putWord16be w = writeN 2 $ \p -> do
poke p (fromIntegral (shiftr_w16 w 8) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (w) :: Word8)
{-# INLINE putWord16be #-}
-- | Write a Word16 in little endian format
putWord16le :: Word16 -> Builder
putWord16le w = writeN 2 $ \p -> do
poke p (fromIntegral (w) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w16 w 8) :: Word8)
{-# INLINE putWord16le #-}
-- putWord16le w16 = writeN 2 (\p -> poke (castPtr p) w16)
-- | Write a Word32 in big endian format
putWord32be :: Word32 -> Builder
putWord32be w = writeN 4 $ \p -> do
poke p (fromIntegral (shiftr_w32 w 24) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 16) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 8) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (w) :: Word8)
{-# INLINE putWord32be #-}
--
-- a data type to tag Put/Check. writes construct these which are then
-- inlined and flattened. matching Checks will be more robust with rules.
--
-- | Write a Word32 in little endian format
putWord32le :: Word32 -> Builder
putWord32le w = writeN 4 $ \p -> do
poke p (fromIntegral (w) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 8) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 16) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 w 24) :: Word8)
{-# INLINE putWord32le #-}
-- on a little endian machine:
-- putWord32le w32 = writeN 4 (\p -> poke (castPtr p) w32)
-- | Write a Word64 in big endian format
putWord64be :: Word64 -> Builder
#if WORD_SIZE_IN_BITS < 64
--
-- To avoid expensive 64 bit shifts on 32 bit machines, we cast to
-- Word32, and write that
--
putWord64be w =
let a = fromIntegral (shiftr_w64 w 32) :: Word32
b = fromIntegral w :: Word32
in writeN 8 $ \p -> do
poke p (fromIntegral (shiftr_w32 a 24) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 16) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 8) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (a) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (shiftr_w32 b 24) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 16) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 8) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (b) :: Word8)
#else
putWord64be w = writeN 8 $ \p -> do
poke p (fromIntegral (shiftr_w64 w 56) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 48) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 40) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 32) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 24) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 16) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 8) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (w) :: Word8)
#endif
{-# INLINE putWord64be #-}
-- | Write a Word64 in little endian format
putWord64le :: Word64 -> Builder
#if WORD_SIZE_IN_BITS < 64
putWord64le w =
let b = fromIntegral (shiftr_w64 w 32) :: Word32
a = fromIntegral w :: Word32
in writeN 8 $ \p -> do
poke (p) (fromIntegral (a) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 8) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 16) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 a 24) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (b) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 8) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 16) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (shiftr_w32 b 24) :: Word8)
#else
putWord64le w = writeN 8 $ \p -> do
poke p (fromIntegral (w) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 8) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 16) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 24) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 32) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 40) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 48) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (shiftr_w64 w 56) :: Word8)
#endif
{-# INLINE putWord64le #-}
-- on a little endian machine:
-- putWord64le w64 = writeN 8 (\p -> poke (castPtr p) w64)
------------------------------------------------------------------------
-- Unaligned, word size ops
-- | /O(1)./ A Builder taking a single native machine word. The word is
-- written in host order, host endian form, for the machine you're on.
-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
-- 4 bytes. Values written this way are not portable to
-- different endian or word sized machines, without conversion.
--
putWordhost :: Word -> Builder
putWordhost w = writeNbytes (sizeOf (undefined :: Word)) (\p -> poke p w)
{-# INLINE putWordhost #-}
-- | Write a Word16 in native host order and host endianness.
-- 2 bytes will be written, unaligned.
putWord16host :: Word16 -> Builder
putWord16host w16 = writeNbytes (sizeOf (undefined :: Word16)) (\p -> poke p w16)
{-# INLINE putWord16host #-}
-- | Write a Word32 in native host order and host endianness.
-- 4 bytes will be written, unaligned.
putWord32host :: Word32 -> Builder
putWord32host w32 = writeNbytes (sizeOf (undefined :: Word32)) (\p -> poke p w32)
{-# INLINE putWord32host #-}
-- | Write a Word64 in native host order.
-- On a 32 bit machine we write two host order Word32s, in big endian form.
-- 8 bytes will be written, unaligned.
putWord64host :: Word64 -> Builder
putWord64host w = writeNbytes (sizeOf (undefined :: Word64)) (\p -> poke p w)
{-# INLINE putWord64host #-}
------------------------------------------------------------------------
-- Unchecked shifts
{-# INLINE shiftr_w16 #-}
shiftr_w16 :: Word16 -> Int -> Word16
{-# INLINE shiftr_w32 #-}
shiftr_w32 :: Word32 -> Int -> Word32
{-# INLINE shiftr_w64 #-}
shiftr_w64 :: Word64 -> Int -> Word64
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
shiftr_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftRL#` i)
shiftr_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftRL#` i)
#if WORD_SIZE_IN_BITS < 64
shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL64#` i)
#if __GLASGOW_HASKELL__ <= 606
-- Exported by GHC.Word in GHC 6.8 and higher
foreign import ccall unsafe "stg_uncheckedShiftRL64"
uncheckedShiftRL64# :: Word64# -> Int# -> Word64#
#endif
#else
shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL#` i)
#endif
#else
shiftr_w16 = shiftR
shiftr_w32 = shiftR
shiftr_w64 = shiftR
#endif

View File

@@ -0,0 +1,543 @@
{-# LANGUAGE CPP, MagicHash #-}
-- for unboxed shifts
-----------------------------------------------------------------------------
-- |
-- Module : Data.Binary.Get
-- Copyright : Lennart Kolmodin
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
-- Stability : experimental
-- Portability : portable to Hugs and GHC.
--
-- The Get monad. A monad for efficiently building structures from
-- encoded lazy ByteStrings
--
-----------------------------------------------------------------------------
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
#include "MachDeps.h"
#endif
module Data.Binary.Get (
-- * The Get type
Get
, runGet
, runGetState
-- * Parsing
, skip
, uncheckedSkip
, lookAhead
, lookAheadM
, lookAheadE
, uncheckedLookAhead
-- * Utility
, bytesRead
, getBytes
, remaining
, isEmpty
-- * Parsing particular types
, getWord8
-- ** ByteStrings
, getByteString
, getLazyByteString
, getLazyByteStringNul
, getRemainingLazyByteString
-- ** Big-endian reads
, getWord16be
, getWord32be
, getWord64be
-- ** Little-endian reads
, getWord16le
, getWord32le
, getWord64le
-- ** Host-endian, unaligned reads
, getWordhost
, getWord16host
, getWord32host
, getWord64host
) where
import Control.Monad (when,liftM,ap)
import Control.Monad.Fix
import Data.Maybe (isNothing)
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
#ifdef BYTESTRING_IN_BASE
import qualified Data.ByteString.Base as B
#else
import qualified Data.ByteString.Internal as B
import qualified Data.ByteString.Lazy.Internal as L
#endif
#ifdef APPLICATIVE_IN_BASE
import Control.Applicative (Applicative(..))
#endif
import Foreign
-- used by splitAtST
import Control.Monad.ST
import Data.STRef
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
import GHC.Base
import GHC.Word
import GHC.Int
#endif
-- | The parse state
data S = S {-# UNPACK #-} !B.ByteString -- current chunk
L.ByteString -- the rest of the input
{-# UNPACK #-} !Int64 -- bytes read
-- | The Get monad is just a State monad carrying around the input ByteString
newtype Get a = Get { unGet :: S -> (a, S) }
instance Functor Get where
fmap f m = Get (\s -> case unGet m s of
(a, s') -> (f a, s'))
{-# INLINE fmap #-}
#ifdef APPLICATIVE_IN_BASE
instance Applicative Get where
pure = return
(<*>) = ap
#endif
instance Monad Get where
return a = Get (\s -> (a, s))
{-# INLINE return #-}
m >>= k = Get (\s -> case unGet m s of
(a, s') -> unGet (k a) s')
{-# INLINE (>>=) #-}
fail = failDesc
instance MonadFix Get where
mfix f = Get (\s -> let (a,s') = unGet (f a) s
in (a,s'))
------------------------------------------------------------------------
get :: Get S
get = Get (\s -> (s, s))
put :: S -> Get ()
put s = Get (\_ -> ((), s))
------------------------------------------------------------------------
--
-- dons, GHC 6.10: explicit inlining disabled, was killing performance.
-- Without it, GHC seems to do just fine. And we get similar
-- performance with 6.8.2 anyway.
--
initState :: L.ByteString -> S
initState xs = mkState xs 0
{- INLINE initState -}
{-
initState (B.LPS xs) =
case xs of
[] -> S B.empty L.empty 0
(x:xs') -> S x (B.LPS xs') 0
-}
#ifndef BYTESTRING_IN_BASE
mkState :: L.ByteString -> Int64 -> S
mkState l = case l of
L.Empty -> S B.empty L.empty
L.Chunk x xs -> S x xs
{- INLINE mkState -}
#else
mkState :: L.ByteString -> Int64 -> S
mkState (B.LPS xs) =
case xs of
[] -> S B.empty L.empty
(x:xs') -> S x (B.LPS xs')
#endif
-- | Run the Get monad applies a 'get'-based parser on the input ByteString
runGet :: Get a -> L.ByteString -> a
runGet m str = case unGet m (initState str) of (a, _) -> a
-- | Run the Get monad applies a 'get'-based parser on the input
-- ByteString. Additional to the result of get it returns the number of
-- consumed bytes and the rest of the input.
runGetState :: Get a -> L.ByteString -> Int64 -> (a, L.ByteString, Int64)
runGetState m str off =
case unGet m (mkState str off) of
(a, ~(S s ss newOff)) -> (a, s `join` ss, newOff)
------------------------------------------------------------------------
failDesc :: String -> Get a
failDesc err = do
S _ _ bytes <- get
Get (error (err ++ ". Failed reading at byte position " ++ show bytes))
-- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available.
skip :: Int -> Get ()
skip n = readN (fromIntegral n) (const ())
-- | Skip ahead @n@ bytes. No error if there isn't enough bytes.
uncheckedSkip :: Int64 -> Get ()
uncheckedSkip n = do
S s ss bytes <- get
if fromIntegral (B.length s) >= n
then put (S (B.drop (fromIntegral n) s) ss (bytes + n))
else do
let rest = L.drop (n - fromIntegral (B.length s)) ss
put $! mkState rest (bytes + n)
-- | Run @ga@, but return without consuming its input.
-- Fails if @ga@ fails.
lookAhead :: Get a -> Get a
lookAhead ga = do
s <- get
a <- ga
put s
return a
-- | Like 'lookAhead', but consume the input if @gma@ returns 'Just _'.
-- Fails if @gma@ fails.
lookAheadM :: Get (Maybe a) -> Get (Maybe a)
lookAheadM gma = do
s <- get
ma <- gma
when (isNothing ma) $
put s
return ma
-- | Like 'lookAhead', but consume the input if @gea@ returns 'Right _'.
-- Fails if @gea@ fails.
lookAheadE :: Get (Either a b) -> Get (Either a b)
lookAheadE gea = do
s <- get
ea <- gea
case ea of
Left _ -> put s
_ -> return ()
return ea
-- | Get the next up to @n@ bytes as a lazy ByteString, without consuming them.
uncheckedLookAhead :: Int64 -> Get L.ByteString
uncheckedLookAhead n = do
S s ss _ <- get
if n <= fromIntegral (B.length s)
then return (L.fromChunks [B.take (fromIntegral n) s])
else return $ L.take n (s `join` ss)
------------------------------------------------------------------------
-- Utility
-- | Get the total number of bytes read to this point.
bytesRead :: Get Int64
bytesRead = do
S _ _ b <- get
return b
-- | Get the number of remaining unparsed bytes.
-- Useful for checking whether all input has been consumed.
-- Note that this forces the rest of the input.
remaining :: Get Int64
remaining = do
S s ss _ <- get
return (fromIntegral (B.length s) + L.length ss)
-- | Test whether all input has been consumed,
-- i.e. there are no remaining unparsed bytes.
isEmpty :: Get Bool
isEmpty = do
S s ss _ <- get
return (B.null s && L.null ss)
------------------------------------------------------------------------
-- Utility with ByteStrings
-- | An efficient 'get' method for strict ByteStrings. Fails if fewer
-- than @n@ bytes are left in the input.
getByteString :: Int -> Get B.ByteString
getByteString n = readN n id
{-# INLINE getByteString #-}
-- | An efficient 'get' method for lazy ByteStrings. Does not fail if fewer than
-- @n@ bytes are left in the input.
getLazyByteString :: Int64 -> Get L.ByteString
getLazyByteString n = do
S s ss bytes <- get
let big = s `join` ss
case splitAtST n big of
(consume, rest) -> do put $ mkState rest (bytes + n)
return consume
{-# INLINE getLazyByteString #-}
-- | Get a lazy ByteString that is terminated with a NUL byte. Fails
-- if it reaches the end of input without hitting a NUL.
getLazyByteStringNul :: Get L.ByteString
getLazyByteStringNul = do
S s ss bytes <- get
let big = s `join` ss
(consume, t) = L.break (== 0) big
(h, rest) = L.splitAt 1 t
if L.null h
then fail "too few bytes"
else do
put $ mkState rest (bytes + L.length consume + 1)
return consume
{-# INLINE getLazyByteStringNul #-}
-- | Get the remaining bytes as a lazy ByteString
getRemainingLazyByteString :: Get L.ByteString
getRemainingLazyByteString = do
S s ss _ <- get
return (s `join` ss)
------------------------------------------------------------------------
-- Helpers
-- | Pull @n@ bytes from the input, as a strict ByteString.
getBytes :: Int -> Get B.ByteString
getBytes n = do
S s ss bytes <- get
if n <= B.length s
then do let (consume,rest) = B.splitAt n s
put $! S rest ss (bytes + fromIntegral n)
return $! consume
else
case L.splitAt (fromIntegral n) (s `join` ss) of
(consuming, rest) ->
do let now = B.concat . L.toChunks $ consuming
put $! mkState rest (bytes + fromIntegral n)
-- forces the next chunk before this one is returned
if (B.length now < n)
then
fail "too few bytes"
else
return now
{- INLINE getBytes -}
-- ^ important
#ifndef BYTESTRING_IN_BASE
join :: B.ByteString -> L.ByteString -> L.ByteString
join bb lb
| B.null bb = lb
| otherwise = L.Chunk bb lb
#else
join :: B.ByteString -> L.ByteString -> L.ByteString
join bb (B.LPS lb)
| B.null bb = B.LPS lb
| otherwise = B.LPS (bb:lb)
#endif
-- don't use L.append, it's strict in it's second argument :/
{- INLINE join -}
-- | Split a ByteString. If the first result is consumed before the --
-- second, this runs in constant heap space.
--
-- You must force the returned tuple for that to work, e.g.
--
-- > case splitAtST n xs of
-- > (ys,zs) -> consume ys ... consume zs
--
splitAtST :: Int64 -> L.ByteString -> (L.ByteString, L.ByteString)
splitAtST i ps | i <= 0 = (L.empty, ps)
#ifndef BYTESTRING_IN_BASE
splitAtST i ps = runST (
do r <- newSTRef undefined
xs <- first r i ps
ys <- unsafeInterleaveST (readSTRef r)
return (xs, ys))
where
first r 0 xs@(L.Chunk _ _) = writeSTRef r xs >> return L.Empty
first r _ L.Empty = writeSTRef r L.Empty >> return L.Empty
first r n (L.Chunk x xs)
| n < l = do writeSTRef r (L.Chunk (B.drop (fromIntegral n) x) xs)
return $ L.Chunk (B.take (fromIntegral n) x) L.Empty
| otherwise = do writeSTRef r (L.drop (n - l) xs)
liftM (L.Chunk x) $ unsafeInterleaveST (first r (n - l) xs)
where l = fromIntegral (B.length x)
#else
splitAtST i (B.LPS ps) = runST (
do r <- newSTRef undefined
xs <- first r i ps
ys <- unsafeInterleaveST (readSTRef r)
return (B.LPS xs, B.LPS ys))
where first r 0 xs = writeSTRef r xs >> return []
first r _ [] = writeSTRef r [] >> return []
first r n (x:xs)
| n < l = do writeSTRef r (B.drop (fromIntegral n) x : xs)
return [B.take (fromIntegral n) x]
| otherwise = do writeSTRef r (L.toChunks (L.drop (n - l) (B.LPS xs)))
fmap (x:) $ unsafeInterleaveST (first r (n - l) xs)
where l = fromIntegral (B.length x)
#endif
{- INLINE splitAtST -}
-- Pull n bytes from the input, and apply a parser to those bytes,
-- yielding a value. If less than @n@ bytes are available, fail with an
-- error. This wraps @getBytes@.
readN :: Int -> (B.ByteString -> a) -> Get a
readN n f = fmap f $ getBytes n
{- INLINE readN -}
-- ^ important
------------------------------------------------------------------------
-- Primtives
-- helper, get a raw Ptr onto a strict ByteString copied out of the
-- underlying lazy byteString. So many indirections from the raw parser
-- state that my head hurts...
getPtr :: Storable a => Int -> Get a
getPtr n = do
(fp,o,_) <- readN n B.toForeignPtr
return . B.inlinePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
{- INLINE getPtr -}
------------------------------------------------------------------------
-- | Read a Word8 from the monad state
getWord8 :: Get Word8
getWord8 = getPtr (sizeOf (undefined :: Word8))
{- INLINE getWord8 -}
-- | Read a Word16 in big endian format
getWord16be :: Get Word16
getWord16be = do
s <- readN 2 id
return $! (fromIntegral (s `B.index` 0) `shiftl_w16` 8) .|.
(fromIntegral (s `B.index` 1))
{- INLINE getWord16be -}
-- | Read a Word16 in little endian format
getWord16le :: Get Word16
getWord16le = do
s <- readN 2 id
return $! (fromIntegral (s `B.index` 1) `shiftl_w16` 8) .|.
(fromIntegral (s `B.index` 0) )
{- INLINE getWord16le -}
-- | Read a Word32 in big endian format
getWord32be :: Get Word32
getWord32be = do
s <- readN 4 id
return $! (fromIntegral (s `B.index` 0) `shiftl_w32` 24) .|.
(fromIntegral (s `B.index` 1) `shiftl_w32` 16) .|.
(fromIntegral (s `B.index` 2) `shiftl_w32` 8) .|.
(fromIntegral (s `B.index` 3) )
{- INLINE getWord32be -}
-- | Read a Word32 in little endian format
getWord32le :: Get Word32
getWord32le = do
s <- readN 4 id
return $! (fromIntegral (s `B.index` 3) `shiftl_w32` 24) .|.
(fromIntegral (s `B.index` 2) `shiftl_w32` 16) .|.
(fromIntegral (s `B.index` 1) `shiftl_w32` 8) .|.
(fromIntegral (s `B.index` 0) )
{- INLINE getWord32le -}
-- | Read a Word64 in big endian format
getWord64be :: Get Word64
getWord64be = do
s <- readN 8 id
return $! (fromIntegral (s `B.index` 0) `shiftl_w64` 56) .|.
(fromIntegral (s `B.index` 1) `shiftl_w64` 48) .|.
(fromIntegral (s `B.index` 2) `shiftl_w64` 40) .|.
(fromIntegral (s `B.index` 3) `shiftl_w64` 32) .|.
(fromIntegral (s `B.index` 4) `shiftl_w64` 24) .|.
(fromIntegral (s `B.index` 5) `shiftl_w64` 16) .|.
(fromIntegral (s `B.index` 6) `shiftl_w64` 8) .|.
(fromIntegral (s `B.index` 7) )
{- INLINE getWord64be -}
-- | Read a Word64 in little endian format
getWord64le :: Get Word64
getWord64le = do
s <- readN 8 id
return $! (fromIntegral (s `B.index` 7) `shiftl_w64` 56) .|.
(fromIntegral (s `B.index` 6) `shiftl_w64` 48) .|.
(fromIntegral (s `B.index` 5) `shiftl_w64` 40) .|.
(fromIntegral (s `B.index` 4) `shiftl_w64` 32) .|.
(fromIntegral (s `B.index` 3) `shiftl_w64` 24) .|.
(fromIntegral (s `B.index` 2) `shiftl_w64` 16) .|.
(fromIntegral (s `B.index` 1) `shiftl_w64` 8) .|.
(fromIntegral (s `B.index` 0) )
{- INLINE getWord64le -}
------------------------------------------------------------------------
-- Host-endian reads
-- | /O(1)./ Read a single native machine word. The word is read in
-- host order, host endian form, for the machine you're on. On a 64 bit
-- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes.
getWordhost :: Get Word
getWordhost = getPtr (sizeOf (undefined :: Word))
{- INLINE getWordhost -}
-- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness.
getWord16host :: Get Word16
getWord16host = getPtr (sizeOf (undefined :: Word16))
{- INLINE getWord16host -}
-- | /O(1)./ Read a Word32 in native host order and host endianness.
getWord32host :: Get Word32
getWord32host = getPtr (sizeOf (undefined :: Word32))
{- INLINE getWord32host -}
-- | /O(1)./ Read a Word64 in native host order and host endianess.
getWord64host :: Get Word64
getWord64host = getPtr (sizeOf (undefined :: Word64))
{- INLINE getWord64host -}
------------------------------------------------------------------------
-- Unchecked shifts
shiftl_w16 :: Word16 -> Int -> Word16
shiftl_w32 :: Word32 -> Int -> Word32
shiftl_w64 :: Word64 -> Int -> Word64
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#` i)
shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#` i)
#if WORD_SIZE_IN_BITS < 64
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
#if __GLASGOW_HASKELL__ <= 606
-- Exported by GHC.Word in GHC 6.8 and higher
foreign import ccall unsafe "stg_uncheckedShiftL64"
uncheckedShiftL64# :: Word64# -> Int# -> Word64#
#endif
#else
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i)
#endif
#else
shiftl_w16 = shiftL
shiftl_w32 = shiftL
shiftl_w64 = shiftL
#endif

View File

@@ -0,0 +1,402 @@
% Copyright (C) 2009 John Millikin <jmillikin@gmail.com>
%
% This program is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program. If not, see <http://www.gnu.org/licenses/>.
\ignore{
\begin{code}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Data.Binary.IEEE754 (
-- * Parsing
getFloat16be, getFloat16le
, getFloat32be, getFloat32le
, getFloat64be, getFloat64le
-- * Serializing
, putFloat32be, putFloat32le
, putFloat64be, putFloat64le
) where
import Data.Bits ((.&.), (.|.), shiftL, shiftR, Bits)
import Data.Word (Word8)
import Data.List (foldl')
import qualified Data.ByteString as B
import Data.Binary.Get (Get, getByteString)
import Data.Binary.Put (Put, putByteString)
\end{code}
}
\section{Parsing}
\subsection{Public interface}
\begin{code}
getFloat16be :: Get Float
getFloat16be = getFloat (ByteCount 2) splitBytes
\end{code}
\begin{code}
getFloat16le :: Get Float
getFloat16le = getFloat (ByteCount 2) $ splitBytes . reverse
\end{code}
\begin{code}
getFloat32be :: Get Float
getFloat32be = getFloat (ByteCount 4) splitBytes
\end{code}
\begin{code}
getFloat32le :: Get Float
getFloat32le = getFloat (ByteCount 4) $ splitBytes . reverse
\end{code}
\begin{code}
getFloat64be :: Get Double
getFloat64be = getFloat (ByteCount 8) splitBytes
\end{code}
\begin{code}
getFloat64le :: Get Double
getFloat64le = getFloat (ByteCount 8) $ splitBytes . reverse
\end{code}
\subsection{Implementation}
Split the raw byte array into (sign, exponent, significand) components.
The exponent and signifcand are drawn directly from the bits in the
original float, and have not been unbiased or otherwise modified.
\begin{code}
splitBytes :: [Word8] -> RawFloat
splitBytes bs = RawFloat width sign exp' sig expWidth sigWidth where
width = ByteCount (length bs)
nBits = bitsInWord8 bs
sign = if head bs .&. 0x80 == 0x80
then Negative
else Positive
expStart = 1
expWidth = exponentWidth nBits
expEnd = expStart + expWidth
exp' = Exponent . fromIntegral $ bitSlice bs expStart expEnd
sigWidth = nBits - expEnd
sig = Significand $ bitSlice bs expEnd nBits
\end{code}
\subsubsection{Encodings and special values}
The next step depends on the value of the exponent $e$, size of the
exponent field in bits $w$, and value of the significand.
\begin{table}[h]
\begin{center}
\begin{tabular}{lrl}
\toprule
Exponent & Significand & Format \\
\midrule
$0$ & $0$ & Zero \\
$0$ & $> 0$ & Denormalised \\
$1 \leq e \leq 2^w - 2$ & \textit{any} & Normalised \\
$2^w-1$ & $0$ & Infinity \\
$2^w-1$ & $> 0$ & NaN \\
\bottomrule
\end{tabular}
\end{center}
\end{table}
There's no built-in literals for Infinity or NaN, so they
are constructed using the {\tt Read} instances for {\tt Double} and
{\tt Float}.
\begin{code}
merge :: (Read a, RealFloat a) => RawFloat -> a
merge f@(RawFloat _ _ e sig eWidth _)
| e == 0 = if sig == 0
then 0.0
else denormalised f
| e == eMax - 1 = if sig == 0
then read "Infinity"
else read "NaN"
| otherwise = normalised f
where eMax = 2 `pow` eWidth
\end{code}
If a value is normalised, its significand has an implied {\tt 1} bit
in its most-significant bit. The significand must be adjusted by
this value before being passed to {\tt encodeField}.
\begin{code}
normalised :: RealFloat a => RawFloat -> a
normalised f = encodeFloat fraction exp' where
Significand sig = rawSignificand f
Exponent exp' = unbiased - sigWidth
fraction = sig + (1 `bitShiftL` rawSignificandWidth f)
sigWidth = fromIntegral $ rawSignificandWidth f
unbiased = unbias (rawExponent f) (rawExponentWidth f)
\end{code}
For denormalised values, the implied {\tt 1} bit is the least-significant
bit of the exponent.
\begin{code}
denormalised :: RealFloat a => RawFloat -> a
denormalised f = encodeFloat sig exp' where
Significand sig = rawSignificand f
Exponent exp' = unbiased - sigWidth + 1
sigWidth = fromIntegral $ rawSignificandWidth f
unbiased = unbias (rawExponent f) (rawExponentWidth f)
\end{code}
By composing {\tt splitBytes} and {\tt merge}, the absolute value of the
float is calculated. Before being returned to the calling function, it
must be signed appropriately.
\begin{code}
getFloat :: (Read a, RealFloat a) => ByteCount
-> ([Word8] -> RawFloat) -> Get a
getFloat (ByteCount width) parser = do
raw <- fmap (parser . B.unpack) $ getByteString width
let absFloat = merge raw
return $ case rawSign raw of
Positive -> absFloat
Negative -> -absFloat
\end{code}
\section{Serialising}
\subsection{Public interface}
\begin{code}
putFloat32be :: Float -> Put
putFloat32be = putFloat (ByteCount 4) id
\end{code}
\begin{code}
putFloat32le :: Float -> Put
putFloat32le = putFloat (ByteCount 4) reverse
\end{code}
\begin{code}
putFloat64be :: Double -> Put
putFloat64be = putFloat (ByteCount 8) id
\end{code}
\begin{code}
putFloat64le :: Double -> Put
putFloat64le = putFloat (ByteCount 8) reverse
\end{code}
\subsection{Implementation}
Serialisation is similar to parsing. First, the float is converted to
a structure representing raw bitfields. The values returned from
{\tt decodeFloat} are clamped to their expected lengths before being
stored in the {\tt RawFloat}.
\begin{code}
splitFloat :: RealFloat a => ByteCount -> a -> RawFloat
splitFloat width x = raw where
raw = RawFloat width sign clampedExp clampedSig expWidth sigWidth
sign = if isNegativeNaN x || isNegativeZero x || x < 0
then Negative
else Positive
clampedExp = clamp expWidth exp'
clampedSig = clamp sigWidth sig
(exp', sig) = case (dFraction, dExponent, biasedExp) of
(0, 0, _) -> (0, 0)
(_, _, 0) -> (0, Significand $ truncatedSig + 1)
_ -> (biasedExp, Significand truncatedSig)
expWidth = exponentWidth $ bitCount width
sigWidth = bitCount width - expWidth - 1 -- 1 for sign bit
(dFraction, dExponent) = decodeFloat x
rawExp = Exponent $ dExponent + fromIntegral sigWidth
biasedExp = bias rawExp expWidth
truncatedSig = abs dFraction - (1 `bitShiftL` sigWidth)
\end{code}
Then, the {\tt RawFloat} is converted to a list of bytes by mashing all
the fields together into an {\tt Integer}, and chopping up that integer
in 8-bit blocks.
\begin{code}
rawToBytes :: RawFloat -> [Word8]
rawToBytes raw = integerToBytes mashed width where
RawFloat width sign exp' sig expWidth sigWidth = raw
sign' :: Word8
sign' = case sign of
Positive -> 0
Negative -> 1
mashed = mashBits sig sigWidth .
mashBits exp' expWidth .
mashBits sign' 1 $ 0
\end{code}
{\tt clamp}, given a maximum bit count and a value, will strip any 1-bits
in positions above the count.
\begin{code}
clamp :: (Num a, Bits a) => BitCount -> a -> a
clamp = (.&.) . mask where
mask 1 = 1
mask n | n > 1 = (mask (n - 1) `shiftL` 1) + 1
mask _ = undefined
\end{code}
For merging the fields, just shift the starting integer over a bit and
then \textsc{or} it with the next value. The weird parameter order allows
easy composition.
\begin{code}
mashBits :: (Bits a, Integral a) => a -> BitCount -> Integer -> Integer
mashBits _ 0 x = x
mashBits y n x = (x `bitShiftL` n) .|. fromIntegral y
\end{code}
Given an integer, read it in 255-block increments starting from the LSB.
Each increment is converted to a byte and added to the final list.
\begin{code}
integerToBytes :: Integer -> ByteCount -> [Word8]
integerToBytes _ 0 = []
integerToBytes x n = bytes where
bytes = integerToBytes (x `shiftR` 8) (n - 1) ++ [step]
step = fromIntegral x .&. 0xFF
\end{code}
Finally, the raw parsing is wrapped up in {\tt Put}. The second parameter
allows the same code paths to be used for little- and big-endian
serialisation.
\begin{code}
putFloat :: (RealFloat a) => ByteCount -> ([Word8] -> [Word8]) -> a -> Put
putFloat width f x = putByteString $ B.pack bytes where
bytes = f . rawToBytes . splitFloat width $ x
\end{code}
\section{Raw float components}
Information about the raw bit patterns in the float is stored in
{\tt RawFloat}, so they don't have to be passed around to the various
format cases. The exponent should be biased, and the significand
shouldn't have it's implied MSB (if applicable).
\begin{code}
data RawFloat = RawFloat
{ rawWidth :: ByteCount
, rawSign :: Sign
, rawExponent :: Exponent
, rawSignificand :: Significand
, rawExponentWidth :: BitCount
, rawSignificandWidth :: BitCount
}
deriving (Show)
\end{code}
\section{Exponents}
Calculate the proper size of the exponent field, in bits, given the
size of the full structure.
\begin{code}
exponentWidth :: BitCount -> BitCount
exponentWidth k
| k == 16 = 5
| k == 32 = 8
| k `mod` 32 == 0 = ceiling (4 * logBase 2 (fromIntegral k)) - 13
| otherwise = error "Invalid length of floating-point value"
\end{code}
\begin{code}
bias :: Exponent -> BitCount -> Exponent
bias e eWidth = e - (1 - (2 `pow` (eWidth - 1)))
\end{code}
\begin{code}
unbias :: Exponent -> BitCount -> Exponent
unbias e eWidth = e + 1 - (2 `pow` (eWidth - 1))
\end{code}
\section{Byte and bit counting}
\begin{code}
data Sign = Positive | Negative
deriving (Show)
newtype Exponent = Exponent Int
deriving (Show, Eq, Num, Ord, Real, Enum, Integral, Bits)
newtype Significand = Significand Integer
deriving (Show, Eq, Num, Ord, Real, Enum, Integral, Bits)
newtype BitCount = BitCount Int
deriving (Show, Eq, Num, Ord, Real, Enum, Integral)
newtype ByteCount = ByteCount Int
deriving (Show, Eq, Num, Ord, Real, Enum, Integral)
bitCount :: ByteCount -> BitCount
bitCount (ByteCount x) = BitCount (x * 8)
bitsInWord8 :: [Word8] -> BitCount
bitsInWord8 = bitCount . ByteCount . length
bitShiftL :: (Bits a) => a -> BitCount -> a
bitShiftL x (BitCount n) = shiftL x n
bitShiftR :: (Bits a) => a -> BitCount -> a
bitShiftR x (BitCount n) = shiftR x n
\end{code}
\section{Utility}
Considering a byte list as a sequence of bits, slice it from start
inclusive to end exclusive, and return the resulting bit sequence as an
integer.
\begin{code}
bitSlice :: [Word8] -> BitCount -> BitCount -> Integer
bitSlice bs = sliceInt (foldl' step 0 bs) bitCount' where
step acc w = shiftL acc 8 + fromIntegral w
bitCount' = bitsInWord8 bs
\end{code}
Slice a single integer by start and end bit location
\begin{code}
sliceInt :: Integer -> BitCount -> BitCount -> BitCount -> Integer
sliceInt x xBitCount s e = fromIntegral sliced where
sliced = (x .&. startMask) `bitShiftR` (xBitCount - e)
startMask = n1Bits (xBitCount - s)
n1Bits n = (2 `pow` n) - 1
\end{code}
Integral version of {\tt (**)}
\begin{code}
pow :: (Integral a, Integral b, Integral c) => a -> b -> c
pow b e = floor $ fromIntegral b ** fromIntegral e
\end{code}
Detect whether a float is {\tt $-$NaN}
\begin{code}
isNegativeNaN :: RealFloat a => a -> Bool
isNegativeNaN x = isNaN x && (floor x > 0)
\end{code}

View File

@@ -0,0 +1,216 @@
{-# LANGUAGE CPP #-}
-----------------------------------------------------------------------------
-- |
-- Module : Data.Binary.Put
-- Copyright : Lennart Kolmodin
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
-- Stability : stable
-- Portability : Portable to Hugs and GHC. Requires MPTCs
--
-- The Put monad. A monad for efficiently constructing lazy bytestrings.
--
-----------------------------------------------------------------------------
module Data.Binary.Put (
-- * The Put type
Put
, PutM(..)
, runPut
, runPutM
, putBuilder
, execPut
-- * Flushing the implicit parse state
, flush
-- * Primitives
, putWord8
, putByteString
, putLazyByteString
-- * Big-endian primitives
, putWord16be
, putWord32be
, putWord64be
-- * Little-endian primitives
, putWord16le
, putWord32le
, putWord64le
-- * Host-endian, unaligned writes
, putWordhost -- :: Word -> Put
, putWord16host -- :: Word16 -> Put
, putWord32host -- :: Word32 -> Put
, putWord64host -- :: Word64 -> Put
) where
import Data.Monoid
import Data.Binary.Builder (Builder, toLazyByteString)
import qualified Data.Binary.Builder as B
import Data.Word
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
#ifdef APPLICATIVE_IN_BASE
import Control.Applicative
#endif
------------------------------------------------------------------------
-- XXX Strict in buffer only.
data PairS a = PairS a {-# UNPACK #-}!Builder
sndS :: PairS a -> Builder
sndS (PairS _ b) = b
-- | The PutM type. A Writer monad over the efficient Builder monoid.
newtype PutM a = Put { unPut :: PairS a }
-- | Put merely lifts Builder into a Writer monad, applied to ().
type Put = PutM ()
instance Functor PutM where
fmap f m = Put $ let PairS a w = unPut m in PairS (f a) w
{-# INLINE fmap #-}
#ifdef APPLICATIVE_IN_BASE
instance Applicative PutM where
pure = return
m <*> k = Put $
let PairS f w = unPut m
PairS x w' = unPut k
in PairS (f x) (w `mappend` w')
#endif
-- Standard Writer monad, with aggressive inlining
instance Monad PutM where
return a = Put $ PairS a mempty
{-# INLINE return #-}
m >>= k = Put $
let PairS a w = unPut m
PairS b w' = unPut (k a)
in PairS b (w `mappend` w')
{-# INLINE (>>=) #-}
m >> k = Put $
let PairS _ w = unPut m
PairS b w' = unPut k
in PairS b (w `mappend` w')
{-# INLINE (>>) #-}
tell :: Builder -> Put
tell b = Put $ PairS () b
{-# INLINE tell #-}
putBuilder :: Builder -> Put
putBuilder = tell
{-# INLINE putBuilder #-}
-- | Run the 'Put' monad
execPut :: PutM a -> Builder
execPut = sndS . unPut
{-# INLINE execPut #-}
-- | Run the 'Put' monad with a serialiser
runPut :: Put -> L.ByteString
runPut = toLazyByteString . sndS . unPut
{-# INLINE runPut #-}
-- | Run the 'Put' monad with a serialiser and get its result
runPutM :: PutM a -> (a, L.ByteString)
runPutM (Put (PairS f s)) = (f, toLazyByteString s)
{-# INLINE runPutM #-}
------------------------------------------------------------------------
-- | Pop the ByteString we have constructed so far, if any, yielding a
-- new chunk in the result ByteString.
flush :: Put
flush = tell B.flush
{-# INLINE flush #-}
-- | Efficiently write a byte into the output buffer
putWord8 :: Word8 -> Put
putWord8 = tell . B.singleton
{-# INLINE putWord8 #-}
-- | An efficient primitive to write a strict ByteString into the output buffer.
-- It flushes the current buffer, and writes the argument into a new chunk.
putByteString :: S.ByteString -> Put
putByteString = tell . B.fromByteString
{-# INLINE putByteString #-}
-- | Write a lazy ByteString efficiently, simply appending the lazy
-- ByteString chunks to the output buffer
putLazyByteString :: L.ByteString -> Put
putLazyByteString = tell . B.fromLazyByteString
{-# INLINE putLazyByteString #-}
-- | Write a Word16 in big endian format
putWord16be :: Word16 -> Put
putWord16be = tell . B.putWord16be
{-# INLINE putWord16be #-}
-- | Write a Word16 in little endian format
putWord16le :: Word16 -> Put
putWord16le = tell . B.putWord16le
{-# INLINE putWord16le #-}
-- | Write a Word32 in big endian format
putWord32be :: Word32 -> Put
putWord32be = tell . B.putWord32be
{-# INLINE putWord32be #-}
-- | Write a Word32 in little endian format
putWord32le :: Word32 -> Put
putWord32le = tell . B.putWord32le
{-# INLINE putWord32le #-}
-- | Write a Word64 in big endian format
putWord64be :: Word64 -> Put
putWord64be = tell . B.putWord64be
{-# INLINE putWord64be #-}
-- | Write a Word64 in little endian format
putWord64le :: Word64 -> Put
putWord64le = tell . B.putWord64le
{-# INLINE putWord64le #-}
------------------------------------------------------------------------
-- | /O(1)./ Write a single native machine word. The word is
-- written in host order, host endian form, for the machine you're on.
-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
-- 4 bytes. Values written this way are not portable to
-- different endian or word sized machines, without conversion.
--
putWordhost :: Word -> Put
putWordhost = tell . B.putWordhost
{-# INLINE putWordhost #-}
-- | /O(1)./ Write a Word16 in native host order and host endianness.
-- For portability issues see @putWordhost@.
putWord16host :: Word16 -> Put
putWord16host = tell . B.putWord16host
{-# INLINE putWord16host #-}
-- | /O(1)./ Write a Word32 in native host order and host endianness.
-- For portability issues see @putWordhost@.
putWord32host :: Word32 -> Put
putWord32host = tell . B.putWord32host
{-# INLINE putWord32host #-}
-- | /O(1)./ Write a Word64 in native host order
-- On a 32 bit machine we write two host order Word32s, in big endian form.
-- For portability issues see @putWordhost@.
putWord64host :: Word64 -> Put
putWord64host = tell . B.putWord64host
{-# INLINE putWord64host #-}