binary serialization for PGF

This commit is contained in:
krasimir
2008-10-28 13:57:10 +00:00
parent d61f6f1085
commit e44448bad0
14 changed files with 1984 additions and 458 deletions

426
src/Data/Binary/Builder.hs Normal file
View File

@@ -0,0 +1,426 @@
{-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fglasgow-exts #-}
-- for unboxed shifts
-----------------------------------------------------------------------------
-- |
-- Module : Data.Binary.Builder
-- Copyright : Lennart Kolmodin, Ross Paterson
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
-- Stability : experimental
-- Portability : portable to Hugs and GHC
--
-- Efficient construction of lazy bytestrings.
--
-----------------------------------------------------------------------------
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
#include "MachDeps.h"
#endif
module Data.Binary.Builder (
-- * The Builder type
Builder
, toLazyByteString
-- * Constructing Builders
, empty
, singleton
, append
, fromByteString -- :: S.ByteString -> Builder
, fromLazyByteString -- :: L.ByteString -> Builder
-- * Flushing the buffer state
, flush
-- * Derived Builders
-- ** Big-endian writes
, putWord16be -- :: Word16 -> Builder
, putWord32be -- :: Word32 -> Builder
, putWord64be -- :: Word64 -> Builder
-- ** Little-endian writes
, putWord16le -- :: Word16 -> Builder
, putWord32le -- :: Word32 -> Builder
, putWord64le -- :: Word64 -> Builder
-- ** Host-endian, unaligned writes
, putWordhost -- :: Word -> Builder
, putWord16host -- :: Word16 -> Builder
, putWord32host -- :: Word32 -> Builder
, putWord64host -- :: Word64 -> Builder
) where
import Foreign
import Data.Monoid
import Data.Word
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
#ifdef BYTESTRING_IN_BASE
import Data.ByteString.Base (inlinePerformIO)
import qualified Data.ByteString.Base as S
#else
import Data.ByteString.Internal (inlinePerformIO)
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Lazy.Internal as L
#endif
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
import GHC.Base
import GHC.Word (Word32(..),Word16(..),Word64(..))
#if WORD_SIZE_IN_BITS < 64 && __GLASGOW_HASKELL__ >= 608
import GHC.Word (uncheckedShiftRL64#)
#endif
#endif
------------------------------------------------------------------------
-- | A 'Builder' is an efficient way to build lazy 'L.ByteString's.
-- There are several functions for constructing 'Builder's, but only one
-- to inspect them: to extract any data, you have to turn them into lazy
-- 'L.ByteString's using 'toLazyByteString'.
--
-- Internally, a 'Builder' constructs a lazy 'L.Bytestring' by filling byte
-- arrays piece by piece. As each buffer is filled, it is \'popped\'
-- off, to become a new chunk of the resulting lazy 'L.ByteString'.
-- All this is hidden from the user of the 'Builder'.
newtype Builder = Builder {
-- Invariant (from Data.ByteString.Lazy):
-- The lists include no null ByteStrings.
runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString]
}
instance Monoid Builder where
mempty = empty
{-# INLINE mempty #-}
mappend = append
{-# INLINE mappend #-}
------------------------------------------------------------------------
-- | /O(1)./ The empty Builder, satisfying
--
-- * @'toLazyByteString' 'empty' = 'L.empty'@
--
empty :: Builder
empty = Builder id
{-# INLINE empty #-}
-- | /O(1)./ A Builder taking a single byte, satisfying
--
-- * @'toLazyByteString' ('singleton' b) = 'L.singleton' b@
--
singleton :: Word8 -> Builder
singleton = writeN 1 . flip poke
{-# INLINE singleton #-}
------------------------------------------------------------------------
-- | /O(1)./ The concatenation of two Builders, an associative operation
-- with identity 'empty', satisfying
--
-- * @'toLazyByteString' ('append' x y) = 'L.append' ('toLazyByteString' x) ('toLazyByteString' y)@
--
append :: Builder -> Builder -> Builder
append (Builder f) (Builder g) = Builder (f . g)
{-# INLINE append #-}
-- | /O(1)./ A Builder taking a 'S.ByteString', satisfying
--
-- * @'toLazyByteString' ('fromByteString' bs) = 'L.fromChunks' [bs]@
--
fromByteString :: S.ByteString -> Builder
fromByteString bs
| S.null bs = empty
| otherwise = flush `append` mapBuilder (bs :)
{-# INLINE fromByteString #-}
-- | /O(1)./ A Builder taking a lazy 'L.ByteString', satisfying
--
-- * @'toLazyByteString' ('fromLazyByteString' bs) = bs@
--
fromLazyByteString :: L.ByteString -> Builder
fromLazyByteString bss = flush `append` mapBuilder (L.toChunks bss ++)
{-# INLINE fromLazyByteString #-}
------------------------------------------------------------------------
-- Our internal buffer type
data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8)
{-# UNPACK #-} !Int -- offset
{-# UNPACK #-} !Int -- used bytes
{-# UNPACK #-} !Int -- length left
------------------------------------------------------------------------
-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'.
-- The construction work takes place if and when the relevant part of
-- the lazy 'L.ByteString' is demanded.
--
toLazyByteString :: Builder -> L.ByteString
toLazyByteString m = L.fromChunks $ unsafePerformIO $ do
buf <- newBuffer defaultSize
return (runBuilder (m `append` flush) (const []) buf)
-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any,
-- yielding a new chunk in the result lazy 'L.ByteString'.
flush :: Builder
flush = Builder $ \ k buf@(Buffer p o u l) ->
if u == 0
then k buf
else S.PS p o u : k (Buffer p (o+u) 0 l)
------------------------------------------------------------------------
--
-- copied from Data.ByteString.Lazy
--
defaultSize :: Int
defaultSize = 32 * k - overhead
where k = 1024
overhead = 2 * sizeOf (undefined :: Int)
------------------------------------------------------------------------
-- | Sequence an IO operation on the buffer
unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder
unsafeLiftIO f = Builder $ \ k buf -> inlinePerformIO $ do
buf' <- f buf
return (k buf')
{-# INLINE unsafeLiftIO #-}
-- | Get the size of the buffer
withSize :: (Int -> Builder) -> Builder
withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
runBuilder (f l) k buf
-- | Map the resulting list of bytestrings.
mapBuilder :: ([S.ByteString] -> [S.ByteString]) -> Builder
mapBuilder f = Builder (f .)
------------------------------------------------------------------------
-- | Ensure that there are at least @n@ many bytes available.
ensureFree :: Int -> Builder
ensureFree n = n `seq` withSize $ \ l ->
if n <= l then empty else
flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize)))
{-# INLINE ensureFree #-}
-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
-- bytes into the memory.
writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder
writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f)
{-# INLINE writeN #-}
writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer
writeNBuffer n f (Buffer fp o u l) = do
withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
return (Buffer fp o (u+n) (l-n))
{-# INLINE writeNBuffer #-}
newBuffer :: Int -> IO Buffer
newBuffer size = do
fp <- S.mallocByteString size
return $! Buffer fp 0 0 size
{-# INLINE newBuffer #-}
------------------------------------------------------------------------
-- Aligned, host order writes of storable values
-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
-- storable values into the memory.
writeNbytes :: Storable a => Int -> (Ptr a -> IO ()) -> Builder
writeNbytes n f = ensureFree n `append` unsafeLiftIO (writeNBufferBytes n f)
{-# INLINE writeNbytes #-}
writeNBufferBytes :: Storable a => Int -> (Ptr a -> IO ()) -> Buffer -> IO Buffer
writeNBufferBytes n f (Buffer fp o u l) = do
withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
return (Buffer fp o (u+n) (l-n))
{-# INLINE writeNBufferBytes #-}
------------------------------------------------------------------------
--
-- We rely on the fromIntegral to do the right masking for us.
-- The inlining here is critical, and can be worth 4x performance
--
-- | Write a Word16 in big endian format
putWord16be :: Word16 -> Builder
putWord16be w = writeN 2 $ \p -> do
poke p (fromIntegral (shiftr_w16 w 8) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (w) :: Word8)
{-# INLINE putWord16be #-}
-- | Write a Word16 in little endian format
putWord16le :: Word16 -> Builder
putWord16le w = writeN 2 $ \p -> do
poke p (fromIntegral (w) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w16 w 8) :: Word8)
{-# INLINE putWord16le #-}
-- putWord16le w16 = writeN 2 (\p -> poke (castPtr p) w16)
-- | Write a Word32 in big endian format
putWord32be :: Word32 -> Builder
putWord32be w = writeN 4 $ \p -> do
poke p (fromIntegral (shiftr_w32 w 24) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 16) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 8) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (w) :: Word8)
{-# INLINE putWord32be #-}
--
-- a data type to tag Put/Check. writes construct these which are then
-- inlined and flattened. matching Checks will be more robust with rules.
--
-- | Write a Word32 in little endian format
putWord32le :: Word32 -> Builder
putWord32le w = writeN 4 $ \p -> do
poke p (fromIntegral (w) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 8) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 16) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 w 24) :: Word8)
{-# INLINE putWord32le #-}
-- on a little endian machine:
-- putWord32le w32 = writeN 4 (\p -> poke (castPtr p) w32)
-- | Write a Word64 in big endian format
putWord64be :: Word64 -> Builder
#if WORD_SIZE_IN_BITS < 64
--
-- To avoid expensive 64 bit shifts on 32 bit machines, we cast to
-- Word32, and write that
--
putWord64be w =
let a = fromIntegral (shiftr_w64 w 32) :: Word32
b = fromIntegral w :: Word32
in writeN 8 $ \p -> do
poke p (fromIntegral (shiftr_w32 a 24) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 16) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 8) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (a) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (shiftr_w32 b 24) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 16) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 8) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (b) :: Word8)
#else
putWord64be w = writeN 8 $ \p -> do
poke p (fromIntegral (shiftr_w64 w 56) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 48) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 40) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 32) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 24) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 16) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 8) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (w) :: Word8)
#endif
{-# INLINE putWord64be #-}
-- | Write a Word64 in little endian format
putWord64le :: Word64 -> Builder
#if WORD_SIZE_IN_BITS < 64
putWord64le w =
let b = fromIntegral (shiftr_w64 w 32) :: Word32
a = fromIntegral w :: Word32
in writeN 8 $ \p -> do
poke (p) (fromIntegral (a) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 8) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 16) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 a 24) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (b) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 8) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 16) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (shiftr_w32 b 24) :: Word8)
#else
putWord64le w = writeN 8 $ \p -> do
poke p (fromIntegral (w) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 8) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 16) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 24) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 32) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 40) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 48) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (shiftr_w64 w 56) :: Word8)
#endif
{-# INLINE putWord64le #-}
-- on a little endian machine:
-- putWord64le w64 = writeN 8 (\p -> poke (castPtr p) w64)
------------------------------------------------------------------------
-- Unaligned, word size ops
-- | /O(1)./ A Builder taking a single native machine word. The word is
-- written in host order, host endian form, for the machine you're on.
-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
-- 4 bytes. Values written this way are not portable to
-- different endian or word sized machines, without conversion.
--
putWordhost :: Word -> Builder
putWordhost w = writeNbytes (sizeOf (undefined :: Word)) (\p -> poke p w)
{-# INLINE putWordhost #-}
-- | Write a Word16 in native host order and host endianness.
-- 2 bytes will be written, unaligned.
putWord16host :: Word16 -> Builder
putWord16host w16 = writeNbytes (sizeOf (undefined :: Word16)) (\p -> poke p w16)
{-# INLINE putWord16host #-}
-- | Write a Word32 in native host order and host endianness.
-- 4 bytes will be written, unaligned.
putWord32host :: Word32 -> Builder
putWord32host w32 = writeNbytes (sizeOf (undefined :: Word32)) (\p -> poke p w32)
{-# INLINE putWord32host #-}
-- | Write a Word64 in native host order.
-- On a 32 bit machine we write two host order Word32s, in big endian form.
-- 8 bytes will be written, unaligned.
putWord64host :: Word64 -> Builder
putWord64host w = writeNbytes (sizeOf (undefined :: Word64)) (\p -> poke p w)
{-# INLINE putWord64host #-}
------------------------------------------------------------------------
-- Unchecked shifts
{-# INLINE shiftr_w16 #-}
shiftr_w16 :: Word16 -> Int -> Word16
{-# INLINE shiftr_w32 #-}
shiftr_w32 :: Word32 -> Int -> Word32
{-# INLINE shiftr_w64 #-}
shiftr_w64 :: Word64 -> Int -> Word64
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
shiftr_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftRL#` i)
shiftr_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftRL#` i)
#if WORD_SIZE_IN_BITS < 64
shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL64#` i)
#if __GLASGOW_HASKELL__ <= 606
-- Exported by GHC.Word in GHC 6.8 and higher
foreign import ccall unsafe "stg_uncheckedShiftRL64"
uncheckedShiftRL64# :: Word64# -> Int# -> Word64#
#endif
#else
shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL#` i)
#endif
#else
shiftr_w16 = shiftR
shiftr_w32 = shiftR
shiftr_w64 = shiftR
#endif

539
src/Data/Binary/Get.hs Normal file
View File

@@ -0,0 +1,539 @@
{-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fglasgow-exts #-}
-- for unboxed shifts
-----------------------------------------------------------------------------
-- |
-- Module : Data.Binary.Get
-- Copyright : Lennart Kolmodin
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
-- Stability : experimental
-- Portability : portable to Hugs and GHC.
--
-- The Get monad. A monad for efficiently building structures from
-- encoded lazy ByteStrings
--
-----------------------------------------------------------------------------
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
#include "MachDeps.h"
#endif
module Data.Binary.Get (
-- * The Get type
Get
, runGet
, runGetState
-- * Parsing
, skip
, uncheckedSkip
, lookAhead
, lookAheadM
, lookAheadE
, uncheckedLookAhead
-- * Utility
, bytesRead
, getBytes
, remaining
, isEmpty
-- * Parsing particular types
, getWord8
-- ** ByteStrings
, getByteString
, getLazyByteString
, getLazyByteStringNul
, getRemainingLazyByteString
-- ** Big-endian reads
, getWord16be
, getWord32be
, getWord64be
-- ** Little-endian reads
, getWord16le
, getWord32le
, getWord64le
-- ** Host-endian, unaligned reads
, getWordhost
, getWord16host
, getWord32host
, getWord64host
) where
import Control.Monad (when,liftM,ap)
import Control.Monad.Fix
import Data.Maybe (isNothing)
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
#ifdef BYTESTRING_IN_BASE
import qualified Data.ByteString.Base as B
#else
import qualified Data.ByteString.Internal as B
import qualified Data.ByteString.Lazy.Internal as L
#endif
#ifdef APPLICATIVE_IN_BASE
import Control.Applicative (Applicative(..))
#endif
import Foreign
-- used by splitAtST
import Control.Monad.ST
import Data.STRef
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
import GHC.Base
import GHC.Word
import GHC.Int
#endif
-- | The parse state
data S = S {-# UNPACK #-} !B.ByteString -- current chunk
L.ByteString -- the rest of the input
{-# UNPACK #-} !Int64 -- bytes read
-- | The Get monad is just a State monad carrying around the input ByteString
newtype Get a = Get { unGet :: S -> (a, S) }
instance Functor Get where
fmap f m = Get (\s -> let (a, s') = unGet m s
in (f a, s'))
{-# INLINE fmap #-}
#ifdef APPLICATIVE_IN_BASE
instance Applicative Get where
pure = return
(<*>) = ap
#endif
instance Monad Get where
return a = Get (\s -> (a, s))
{-# INLINE return #-}
m >>= k = Get (\s -> case unGet m s of
(a, s') -> unGet (k a) s')
{-# INLINE (>>=) #-}
fail = failDesc
instance MonadFix Get where
mfix f = Get (\s -> let (a,s') = unGet (f a) s
in (a,s'))
------------------------------------------------------------------------
get :: Get S
get = Get (\s -> (s, s))
put :: S -> Get ()
put s = Get (\_ -> ((), s))
------------------------------------------------------------------------
initState :: L.ByteString -> S
initState xs = mkState xs 0
{-# INLINE initState #-}
{-
initState (B.LPS xs) =
case xs of
[] -> S B.empty L.empty 0
(x:xs') -> S x (B.LPS xs') 0
-}
#ifndef BYTESTRING_IN_BASE
mkState :: L.ByteString -> Int64 -> S
mkState l = case l of
L.Empty -> S B.empty L.empty
L.Chunk x xs -> S x xs
{-# INLINE mkState #-}
#else
mkState :: L.ByteString -> Int64 -> S
mkState (B.LPS xs) =
case xs of
[] -> S B.empty L.empty
(x:xs') -> S x (B.LPS xs')
#endif
-- | Run the Get monad applies a 'get'-based parser on the input ByteString
runGet :: Get a -> L.ByteString -> a
runGet m str = case unGet m (initState str) of (a, _) -> a
-- | Run the Get monad applies a 'get'-based parser on the input
-- ByteString. Additional to the result of get it returns the number of
-- consumed bytes and the rest of the input.
runGetState :: Get a -> L.ByteString -> Int64 -> (a, L.ByteString, Int64)
runGetState m str off =
case unGet m (mkState str off) of
(a, ~(S s ss newOff)) -> (a, s `join` ss, newOff)
------------------------------------------------------------------------
failDesc :: String -> Get a
failDesc err = do
S _ _ bytes <- get
Get (error (err ++ ". Failed reading at byte position " ++ show bytes))
-- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available.
skip :: Int -> Get ()
skip n = readN (fromIntegral n) (const ())
-- | Skip ahead @n@ bytes. No error if there isn't enough bytes.
uncheckedSkip :: Int64 -> Get ()
uncheckedSkip n = do
S s ss bytes <- get
if fromIntegral (B.length s) >= n
then put (S (B.drop (fromIntegral n) s) ss (bytes + n))
else do
let rest = L.drop (n - fromIntegral (B.length s)) ss
put $! mkState rest (bytes + n)
-- | Run @ga@, but return without consuming its input.
-- Fails if @ga@ fails.
lookAhead :: Get a -> Get a
lookAhead ga = do
s <- get
a <- ga
put s
return a
-- | Like 'lookAhead', but consume the input if @gma@ returns 'Just _'.
-- Fails if @gma@ fails.
lookAheadM :: Get (Maybe a) -> Get (Maybe a)
lookAheadM gma = do
s <- get
ma <- gma
when (isNothing ma) $
put s
return ma
-- | Like 'lookAhead', but consume the input if @gea@ returns 'Right _'.
-- Fails if @gea@ fails.
lookAheadE :: Get (Either a b) -> Get (Either a b)
lookAheadE gea = do
s <- get
ea <- gea
case ea of
Left _ -> put s
_ -> return ()
return ea
-- | Get the next up to @n@ bytes as a lazy ByteString, without consuming them.
uncheckedLookAhead :: Int64 -> Get L.ByteString
uncheckedLookAhead n = do
S s ss _ <- get
if n <= fromIntegral (B.length s)
then return (L.fromChunks [B.take (fromIntegral n) s])
else return $ L.take n (s `join` ss)
------------------------------------------------------------------------
-- Utility
-- | Get the total number of bytes read to this point.
bytesRead :: Get Int64
bytesRead = do
S _ _ b <- get
return b
-- | Get the number of remaining unparsed bytes.
-- Useful for checking whether all input has been consumed.
-- Note that this forces the rest of the input.
remaining :: Get Int64
remaining = do
S s ss _ <- get
return (fromIntegral (B.length s) + L.length ss)
-- | Test whether all input has been consumed,
-- i.e. there are no remaining unparsed bytes.
isEmpty :: Get Bool
isEmpty = do
S s ss _ <- get
return (B.null s && L.null ss)
------------------------------------------------------------------------
-- Utility with ByteStrings
-- | An efficient 'get' method for strict ByteStrings. Fails if fewer
-- than @n@ bytes are left in the input.
getByteString :: Int -> Get B.ByteString
getByteString n = readN n id
{-# INLINE getByteString #-}
-- | An efficient 'get' method for lazy ByteStrings. Does not fail if fewer than
-- @n@ bytes are left in the input.
getLazyByteString :: Int64 -> Get L.ByteString
getLazyByteString n = do
S s ss bytes <- get
let big = s `join` ss
case splitAtST n big of
(consume, rest) -> do put $ mkState rest (bytes + n)
return consume
{-# INLINE getLazyByteString #-}
-- | Get a lazy ByteString that is terminated with a NUL byte. Fails
-- if it reaches the end of input without hitting a NUL.
getLazyByteStringNul :: Get L.ByteString
getLazyByteStringNul = do
S s ss bytes <- get
let big = s `join` ss
(consume, t) = L.break (== 0) big
(h, rest) = L.splitAt 1 t
if L.null h
then fail "too few bytes"
else do
put $ mkState rest (bytes + L.length consume + 1)
return consume
{-# INLINE getLazyByteStringNul #-}
-- | Get the remaining bytes as a lazy ByteString
getRemainingLazyByteString :: Get L.ByteString
getRemainingLazyByteString = do
S s ss _ <- get
return (s `join` ss)
------------------------------------------------------------------------
-- Helpers
-- | Pull @n@ bytes from the input, as a strict ByteString.
getBytes :: Int -> Get B.ByteString
getBytes n = do
S s ss bytes <- get
if n <= B.length s
then do let (consume,rest) = B.splitAt n s
put $! S rest ss (bytes + fromIntegral n)
return $! consume
else
case L.splitAt (fromIntegral n) (s `join` ss) of
(consuming, rest) ->
do let now = B.concat . L.toChunks $ consuming
put $! mkState rest (bytes + fromIntegral n)
-- forces the next chunk before this one is returned
if (B.length now < n)
then
fail "too few bytes"
else
return now
{-# INLINE getBytes #-}
-- ^ important
#ifndef BYTESTRING_IN_BASE
join :: B.ByteString -> L.ByteString -> L.ByteString
join bb lb
| B.null bb = lb
| otherwise = L.Chunk bb lb
#else
join :: B.ByteString -> L.ByteString -> L.ByteString
join bb (B.LPS lb)
| B.null bb = B.LPS lb
| otherwise = B.LPS (bb:lb)
#endif
-- don't use L.append, it's strict in it's second argument :/
{-# INLINE join #-}
-- | Split a ByteString. If the first result is consumed before the --
-- second, this runs in constant heap space.
--
-- You must force the returned tuple for that to work, e.g.
--
-- > case splitAtST n xs of
-- > (ys,zs) -> consume ys ... consume zs
--
splitAtST :: Int64 -> L.ByteString -> (L.ByteString, L.ByteString)
splitAtST i ps | i <= 0 = (L.empty, ps)
#ifndef BYTESTRING_IN_BASE
splitAtST i ps = runST (
do r <- newSTRef undefined
xs <- first r i ps
ys <- unsafeInterleaveST (readSTRef r)
return (xs, ys))
where
first r 0 xs@(L.Chunk _ _) = writeSTRef r xs >> return L.Empty
first r _ L.Empty = writeSTRef r L.Empty >> return L.Empty
first r n (L.Chunk x xs)
| n < l = do writeSTRef r (L.Chunk (B.drop (fromIntegral n) x) xs)
return $ L.Chunk (B.take (fromIntegral n) x) L.Empty
| otherwise = do writeSTRef r (L.drop (n - l) xs)
liftM (L.Chunk x) $ unsafeInterleaveST (first r (n - l) xs)
where l = fromIntegral (B.length x)
#else
splitAtST i (B.LPS ps) = runST (
do r <- newSTRef undefined
xs <- first r i ps
ys <- unsafeInterleaveST (readSTRef r)
return (B.LPS xs, B.LPS ys))
where first r 0 xs = writeSTRef r xs >> return []
first r _ [] = writeSTRef r [] >> return []
first r n (x:xs)
| n < l = do writeSTRef r (B.drop (fromIntegral n) x : xs)
return [B.take (fromIntegral n) x]
| otherwise = do writeSTRef r (L.toChunks (L.drop (n - l) (B.LPS xs)))
fmap (x:) $ unsafeInterleaveST (first r (n - l) xs)
where l = fromIntegral (B.length x)
#endif
{-# INLINE splitAtST #-}
-- Pull n bytes from the input, and apply a parser to those bytes,
-- yielding a value. If less than @n@ bytes are available, fail with an
-- error. This wraps @getBytes@.
readN :: Int -> (B.ByteString -> a) -> Get a
readN n f = fmap f $ getBytes n
{-# INLINE readN #-}
-- ^ important
------------------------------------------------------------------------
-- Primtives
-- helper, get a raw Ptr onto a strict ByteString copied out of the
-- underlying lazy byteString. So many indirections from the raw parser
-- state that my head hurts...
getPtr :: Storable a => Int -> Get a
getPtr n = do
(fp,o,_) <- readN n B.toForeignPtr
return . B.inlinePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
{-# INLINE getPtr #-}
------------------------------------------------------------------------
-- | Read a Word8 from the monad state
getWord8 :: Get Word8
getWord8 = getPtr (sizeOf (undefined :: Word8))
{-# INLINE getWord8 #-}
-- | Read a Word16 in big endian format
getWord16be :: Get Word16
getWord16be = do
s <- readN 2 id
return $! (fromIntegral (s `B.index` 0) `shiftl_w16` 8) .|.
(fromIntegral (s `B.index` 1))
{-# INLINE getWord16be #-}
-- | Read a Word16 in little endian format
getWord16le :: Get Word16
getWord16le = do
s <- readN 2 id
return $! (fromIntegral (s `B.index` 1) `shiftl_w16` 8) .|.
(fromIntegral (s `B.index` 0) )
{-# INLINE getWord16le #-}
-- | Read a Word32 in big endian format
getWord32be :: Get Word32
getWord32be = do
s <- readN 4 id
return $! (fromIntegral (s `B.index` 0) `shiftl_w32` 24) .|.
(fromIntegral (s `B.index` 1) `shiftl_w32` 16) .|.
(fromIntegral (s `B.index` 2) `shiftl_w32` 8) .|.
(fromIntegral (s `B.index` 3) )
{-# INLINE getWord32be #-}
-- | Read a Word32 in little endian format
getWord32le :: Get Word32
getWord32le = do
s <- readN 4 id
return $! (fromIntegral (s `B.index` 3) `shiftl_w32` 24) .|.
(fromIntegral (s `B.index` 2) `shiftl_w32` 16) .|.
(fromIntegral (s `B.index` 1) `shiftl_w32` 8) .|.
(fromIntegral (s `B.index` 0) )
{-# INLINE getWord32le #-}
-- | Read a Word64 in big endian format
getWord64be :: Get Word64
getWord64be = do
s <- readN 8 id
return $! (fromIntegral (s `B.index` 0) `shiftl_w64` 56) .|.
(fromIntegral (s `B.index` 1) `shiftl_w64` 48) .|.
(fromIntegral (s `B.index` 2) `shiftl_w64` 40) .|.
(fromIntegral (s `B.index` 3) `shiftl_w64` 32) .|.
(fromIntegral (s `B.index` 4) `shiftl_w64` 24) .|.
(fromIntegral (s `B.index` 5) `shiftl_w64` 16) .|.
(fromIntegral (s `B.index` 6) `shiftl_w64` 8) .|.
(fromIntegral (s `B.index` 7) )
{-# INLINE getWord64be #-}
-- | Read a Word64 in little endian format
getWord64le :: Get Word64
getWord64le = do
s <- readN 8 id
return $! (fromIntegral (s `B.index` 7) `shiftl_w64` 56) .|.
(fromIntegral (s `B.index` 6) `shiftl_w64` 48) .|.
(fromIntegral (s `B.index` 5) `shiftl_w64` 40) .|.
(fromIntegral (s `B.index` 4) `shiftl_w64` 32) .|.
(fromIntegral (s `B.index` 3) `shiftl_w64` 24) .|.
(fromIntegral (s `B.index` 2) `shiftl_w64` 16) .|.
(fromIntegral (s `B.index` 1) `shiftl_w64` 8) .|.
(fromIntegral (s `B.index` 0) )
{-# INLINE getWord64le #-}
------------------------------------------------------------------------
-- Host-endian reads
-- | /O(1)./ Read a single native machine word. The word is read in
-- host order, host endian form, for the machine you're on. On a 64 bit
-- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes.
getWordhost :: Get Word
getWordhost = getPtr (sizeOf (undefined :: Word))
{-# INLINE getWordhost #-}
-- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness.
getWord16host :: Get Word16
getWord16host = getPtr (sizeOf (undefined :: Word16))
{-# INLINE getWord16host #-}
-- | /O(1)./ Read a Word32 in native host order and host endianness.
getWord32host :: Get Word32
getWord32host = getPtr (sizeOf (undefined :: Word32))
{-# INLINE getWord32host #-}
-- | /O(1)./ Read a Word64 in native host order and host endianess.
getWord64host :: Get Word64
getWord64host = getPtr (sizeOf (undefined :: Word64))
{-# INLINE getWord64host #-}
------------------------------------------------------------------------
-- Unchecked shifts
shiftl_w16 :: Word16 -> Int -> Word16
shiftl_w32 :: Word32 -> Int -> Word32
shiftl_w64 :: Word64 -> Int -> Word64
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#` i)
shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#` i)
#if WORD_SIZE_IN_BITS < 64
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
#if __GLASGOW_HASKELL__ <= 606
-- Exported by GHC.Word in GHC 6.8 and higher
foreign import ccall unsafe "stg_uncheckedShiftL64"
uncheckedShiftL64# :: Word64# -> Int# -> Word64#
#endif
#else
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i)
#endif
#else
shiftl_w16 = shiftL
shiftl_w32 = shiftL
shiftl_w64 = shiftL
#endif

199
src/Data/Binary/Put.hs Normal file
View File

@@ -0,0 +1,199 @@
{-# LANGUAGE CPP #-}
-----------------------------------------------------------------------------
-- |
-- Module : Data.Binary.Put
-- Copyright : Lennart Kolmodin
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
-- Stability : stable
-- Portability : Portable to Hugs and GHC. Requires MPTCs
--
-- The Put monad. A monad for efficiently constructing lazy bytestrings.
--
-----------------------------------------------------------------------------
module Data.Binary.Put (
-- * The Put type
Put
, PutM(..)
, runPut
-- * Flushing the implicit parse state
, flush
-- * Primitives
, putWord8
, putByteString
, putLazyByteString
-- * Big-endian primitives
, putWord16be
, putWord32be
, putWord64be
-- * Little-endian primitives
, putWord16le
, putWord32le
, putWord64le
-- * Host-endian, unaligned writes
, putWordhost -- :: Word -> Put
, putWord16host -- :: Word16 -> Put
, putWord32host -- :: Word32 -> Put
, putWord64host -- :: Word64 -> Put
) where
import Data.Monoid
import Data.Binary.Builder (Builder, toLazyByteString)
import qualified Data.Binary.Builder as B
import Data.Word
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
#ifdef APPLICATIVE_IN_BASE
import Control.Applicative
#endif
------------------------------------------------------------------------
-- XXX Strict in buffer only.
data PairS a = PairS a {-# UNPACK #-}!Builder
sndS :: PairS a -> Builder
sndS (PairS _ b) = b
-- | The PutM type. A Writer monad over the efficient Builder monoid.
newtype PutM a = Put { unPut :: PairS a }
-- | Put merely lifts Builder into a Writer monad, applied to ().
type Put = PutM ()
instance Functor PutM where
fmap f m = Put $ let PairS a w = unPut m in PairS (f a) w
{-# INLINE fmap #-}
#ifdef APPLICATIVE_IN_BASE
instance Applicative PutM where
pure = return
m <*> k = Put $
let PairS f w = unPut m
PairS x w' = unPut k
in PairS (f x) (w `mappend` w')
#endif
-- Standard Writer monad, with aggressive inlining
instance Monad PutM where
return a = Put $ PairS a mempty
{-# INLINE return #-}
m >>= k = Put $
let PairS a w = unPut m
PairS b w' = unPut (k a)
in PairS b (w `mappend` w')
{-# INLINE (>>=) #-}
m >> k = Put $
let PairS _ w = unPut m
PairS b w' = unPut k
in PairS b (w `mappend` w')
{-# INLINE (>>) #-}
tell :: Builder -> Put
tell b = Put $ PairS () b
{-# INLINE tell #-}
-- | Run the 'Put' monad with a serialiser
runPut :: Put -> L.ByteString
runPut = toLazyByteString . sndS . unPut
{-# INLINE runPut #-}
------------------------------------------------------------------------
-- | Pop the ByteString we have constructed so far, if any, yielding a
-- new chunk in the result ByteString.
flush :: Put
flush = tell B.flush
{-# INLINE flush #-}
-- | Efficiently write a byte into the output buffer
putWord8 :: Word8 -> Put
putWord8 = tell . B.singleton
{-# INLINE putWord8 #-}
-- | An efficient primitive to write a strict ByteString into the output buffer.
-- It flushes the current buffer, and writes the argument into a new chunk.
putByteString :: S.ByteString -> Put
putByteString = tell . B.fromByteString
{-# INLINE putByteString #-}
-- | Write a lazy ByteString efficiently, simply appending the lazy
-- ByteString chunks to the output buffer
putLazyByteString :: L.ByteString -> Put
putLazyByteString = tell . B.fromLazyByteString
{-# INLINE putLazyByteString #-}
-- | Write a Word16 in big endian format
putWord16be :: Word16 -> Put
putWord16be = tell . B.putWord16be
{-# INLINE putWord16be #-}
-- | Write a Word16 in little endian format
putWord16le :: Word16 -> Put
putWord16le = tell . B.putWord16le
{-# INLINE putWord16le #-}
-- | Write a Word32 in big endian format
putWord32be :: Word32 -> Put
putWord32be = tell . B.putWord32be
{-# INLINE putWord32be #-}
-- | Write a Word32 in little endian format
putWord32le :: Word32 -> Put
putWord32le = tell . B.putWord32le
{-# INLINE putWord32le #-}
-- | Write a Word64 in big endian format
putWord64be :: Word64 -> Put
putWord64be = tell . B.putWord64be
{-# INLINE putWord64be #-}
-- | Write a Word64 in little endian format
putWord64le :: Word64 -> Put
putWord64le = tell . B.putWord64le
{-# INLINE putWord64le #-}
------------------------------------------------------------------------
-- | /O(1)./ Write a single native machine word. The word is
-- written in host order, host endian form, for the machine you're on.
-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
-- 4 bytes. Values written this way are not portable to
-- different endian or word sized machines, without conversion.
--
putWordhost :: Word -> Put
putWordhost = tell . B.putWordhost
{-# INLINE putWordhost #-}
-- | /O(1)./ Write a Word16 in native host order and host endianness.
-- For portability issues see @putWordhost@.
putWord16host :: Word16 -> Put
putWord16host = tell . B.putWord16host
{-# INLINE putWord16host #-}
-- | /O(1)./ Write a Word32 in native host order and host endianness.
-- For portability issues see @putWordhost@.
putWord32host :: Word32 -> Put
putWord32host = tell . B.putWord32host
{-# INLINE putWord32host #-}
-- | /O(1)./ Write a Word64 in native host order
-- On a 32 bit machine we write two host order Word32s, in big endian form.
-- For portability issues see @putWordhost@.
putWord64host :: Word64 -> Put
putWord64host = tell . B.putWord64host
{-# INLINE putWord64host #-}