mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-15 07:49:31 -06:00
* In GHC 8.4.1, the operator <> has become a method of the Semigroup class
and is exported from the Prelude. This is unfortunate, since <> is also
exported from the standard library module Text.PrettyPrint, so in any
module that defines a pretty printer, there is likely to be an ambiguity.
This affects ~18 modules in GF. Solution:
import Prelude hiding (<>)
This works also in older versions of GHC, since GHC does't complain if
you hide something that doesn't exists.
* In GHC 8.4.1, Semigroup has become a superclass of Monoid. This means
that anywhere you define an instance of the Monoid class you also have to
define an instance in the Semigroup class.
This affects Data.Binary.Builder in GF. Solution: conditionally define
a Semigroup instance if compiling with base>=4.11 (ghc>=8.4.1)
435 lines
15 KiB
Haskell
435 lines
15 KiB
Haskell
{-# LANGUAGE CPP, MagicHash #-}
|
|
-- for unboxed shifts
|
|
|
|
-----------------------------------------------------------------------------
|
|
-- |
|
|
-- Module : Data.Binary.Builder
|
|
-- Copyright : Lennart Kolmodin, Ross Paterson
|
|
-- License : BSD3-style (see LICENSE)
|
|
--
|
|
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
|
|
-- Stability : experimental
|
|
-- Portability : portable to Hugs and GHC
|
|
--
|
|
-- Efficient construction of lazy bytestrings.
|
|
--
|
|
-----------------------------------------------------------------------------
|
|
|
|
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
|
#include "MachDeps.h"
|
|
#endif
|
|
|
|
module Data.Binary.Builder (
|
|
|
|
-- * The Builder type
|
|
Builder
|
|
, toLazyByteString
|
|
|
|
-- * Constructing Builders
|
|
, empty
|
|
, singleton
|
|
, append
|
|
, fromByteString -- :: S.ByteString -> Builder
|
|
, fromLazyByteString -- :: L.ByteString -> Builder
|
|
|
|
-- * Flushing the buffer state
|
|
, flush
|
|
|
|
-- * Derived Builders
|
|
-- ** Big-endian writes
|
|
, putWord16be -- :: Word16 -> Builder
|
|
, putWord32be -- :: Word32 -> Builder
|
|
, putWord64be -- :: Word64 -> Builder
|
|
|
|
-- ** Little-endian writes
|
|
, putWord16le -- :: Word16 -> Builder
|
|
, putWord32le -- :: Word32 -> Builder
|
|
, putWord64le -- :: Word64 -> Builder
|
|
|
|
-- ** Host-endian, unaligned writes
|
|
, putWordhost -- :: Word -> Builder
|
|
, putWord16host -- :: Word16 -> Builder
|
|
, putWord32host -- :: Word32 -> Builder
|
|
, putWord64host -- :: Word64 -> Builder
|
|
|
|
) where
|
|
|
|
#if MIN_VERSION_base(4,8,0)
|
|
import Prelude hiding (empty)
|
|
#endif
|
|
import Foreign(Word,Word8,Ptr,Storable,ForeignPtr,withForeignPtr,poke,plusPtr,sizeOf)
|
|
import System.IO.Unsafe(unsafePerformIO)
|
|
import Data.Monoid
|
|
--import Data.Word
|
|
import qualified Data.ByteString as S
|
|
import qualified Data.ByteString.Lazy as L
|
|
|
|
#ifdef BYTESTRING_IN_BASE
|
|
import Data.ByteString.Base (inlinePerformIO)
|
|
import qualified Data.ByteString.Base as S
|
|
#else
|
|
import Data.ByteString.Internal (inlinePerformIO)
|
|
import qualified Data.ByteString.Internal as S
|
|
--import qualified Data.ByteString.Lazy.Internal as L
|
|
#endif
|
|
|
|
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
|
import GHC.Base(Int(..),uncheckedShiftRL# )
|
|
import GHC.Word (Word32(..),Word16(..),Word64(..))
|
|
|
|
#if WORD_SIZE_IN_BITS < 64 && __GLASGOW_HASKELL__ >= 608
|
|
import GHC.Word (uncheckedShiftRL64#)
|
|
#endif
|
|
#endif
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
-- | A 'Builder' is an efficient way to build lazy 'L.ByteString's.
|
|
-- There are several functions for constructing 'Builder's, but only one
|
|
-- to inspect them: to extract any data, you have to turn them into lazy
|
|
-- 'L.ByteString's using 'toLazyByteString'.
|
|
--
|
|
-- Internally, a 'Builder' constructs a lazy 'L.Bytestring' by filling byte
|
|
-- arrays piece by piece. As each buffer is filled, it is \'popped\'
|
|
-- off, to become a new chunk of the resulting lazy 'L.ByteString'.
|
|
-- All this is hidden from the user of the 'Builder'.
|
|
|
|
newtype Builder = Builder {
|
|
-- Invariant (from Data.ByteString.Lazy):
|
|
-- The lists include no null ByteStrings.
|
|
runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString]
|
|
}
|
|
|
|
#if MIN_VERSION_base(4,11,0)
|
|
instance Semigroup Builder where
|
|
(<>) = append
|
|
#endif
|
|
|
|
instance Monoid Builder where
|
|
mempty = empty
|
|
{-# INLINE mempty #-}
|
|
mappend = append
|
|
{-# INLINE mappend #-}
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
-- | /O(1)./ The empty Builder, satisfying
|
|
--
|
|
-- * @'toLazyByteString' 'empty' = 'L.empty'@
|
|
--
|
|
empty :: Builder
|
|
empty = Builder id
|
|
{-# INLINE empty #-}
|
|
|
|
-- | /O(1)./ A Builder taking a single byte, satisfying
|
|
--
|
|
-- * @'toLazyByteString' ('singleton' b) = 'L.singleton' b@
|
|
--
|
|
singleton :: Word8 -> Builder
|
|
singleton = writeN 1 . flip poke
|
|
{-# INLINE singleton #-}
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
-- | /O(1)./ The concatenation of two Builders, an associative operation
|
|
-- with identity 'empty', satisfying
|
|
--
|
|
-- * @'toLazyByteString' ('append' x y) = 'L.append' ('toLazyByteString' x) ('toLazyByteString' y)@
|
|
--
|
|
append :: Builder -> Builder -> Builder
|
|
append (Builder f) (Builder g) = Builder (f . g)
|
|
{-# INLINE append #-}
|
|
|
|
-- | /O(1)./ A Builder taking a 'S.ByteString', satisfying
|
|
--
|
|
-- * @'toLazyByteString' ('fromByteString' bs) = 'L.fromChunks' [bs]@
|
|
--
|
|
fromByteString :: S.ByteString -> Builder
|
|
fromByteString bs
|
|
| S.null bs = empty
|
|
| otherwise = flush `append` mapBuilder (bs :)
|
|
{-# INLINE fromByteString #-}
|
|
|
|
-- | /O(1)./ A Builder taking a lazy 'L.ByteString', satisfying
|
|
--
|
|
-- * @'toLazyByteString' ('fromLazyByteString' bs) = bs@
|
|
--
|
|
fromLazyByteString :: L.ByteString -> Builder
|
|
fromLazyByteString bss = flush `append` mapBuilder (L.toChunks bss ++)
|
|
{-# INLINE fromLazyByteString #-}
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
-- Our internal buffer type
|
|
data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8)
|
|
{-# UNPACK #-} !Int -- offset
|
|
{-# UNPACK #-} !Int -- used bytes
|
|
{-# UNPACK #-} !Int -- length left
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'.
|
|
-- The construction work takes place if and when the relevant part of
|
|
-- the lazy 'L.ByteString' is demanded.
|
|
--
|
|
toLazyByteString :: Builder -> L.ByteString
|
|
toLazyByteString m = L.fromChunks $ unsafePerformIO $ do
|
|
buf <- newBuffer defaultSize
|
|
return (runBuilder (m `append` flush) (const []) buf)
|
|
|
|
-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any,
|
|
-- yielding a new chunk in the result lazy 'L.ByteString'.
|
|
flush :: Builder
|
|
flush = Builder $ \ k buf@(Buffer p o u l) ->
|
|
if u == 0
|
|
then k buf
|
|
else S.PS p o u : k (Buffer p (o+u) 0 l)
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
--
|
|
-- copied from Data.ByteString.Lazy
|
|
--
|
|
defaultSize :: Int
|
|
defaultSize = 32 * k - overhead
|
|
where k = 1024
|
|
overhead = 2 * sizeOf (undefined :: Int)
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
-- | Sequence an IO operation on the buffer
|
|
unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder
|
|
unsafeLiftIO f = Builder $ \ k buf -> inlinePerformIO $ do
|
|
buf' <- f buf
|
|
return (k buf')
|
|
{-# INLINE unsafeLiftIO #-}
|
|
|
|
-- | Get the size of the buffer
|
|
withSize :: (Int -> Builder) -> Builder
|
|
withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
|
|
runBuilder (f l) k buf
|
|
|
|
-- | Map the resulting list of bytestrings.
|
|
mapBuilder :: ([S.ByteString] -> [S.ByteString]) -> Builder
|
|
mapBuilder f = Builder (f .)
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
-- | Ensure that there are at least @n@ many bytes available.
|
|
ensureFree :: Int -> Builder
|
|
ensureFree n = n `seq` withSize $ \ l ->
|
|
if n <= l then empty else
|
|
flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize)))
|
|
{-# INLINE ensureFree #-}
|
|
|
|
-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
|
|
-- bytes into the memory.
|
|
writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder
|
|
writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f)
|
|
{-# INLINE writeN #-}
|
|
|
|
writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer
|
|
writeNBuffer n f (Buffer fp o u l) = do
|
|
withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
|
|
return (Buffer fp o (u+n) (l-n))
|
|
{-# INLINE writeNBuffer #-}
|
|
|
|
newBuffer :: Int -> IO Buffer
|
|
newBuffer size = do
|
|
fp <- S.mallocByteString size
|
|
return $! Buffer fp 0 0 size
|
|
{-# INLINE newBuffer #-}
|
|
|
|
------------------------------------------------------------------------
|
|
-- Aligned, host order writes of storable values
|
|
|
|
-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
|
|
-- storable values into the memory.
|
|
writeNbytes :: Storable a => Int -> (Ptr a -> IO ()) -> Builder
|
|
writeNbytes n f = ensureFree n `append` unsafeLiftIO (writeNBufferBytes n f)
|
|
{-# INLINE writeNbytes #-}
|
|
|
|
writeNBufferBytes :: Storable a => Int -> (Ptr a -> IO ()) -> Buffer -> IO Buffer
|
|
writeNBufferBytes n f (Buffer fp o u l) = do
|
|
withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
|
|
return (Buffer fp o (u+n) (l-n))
|
|
{-# INLINE writeNBufferBytes #-}
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
--
|
|
-- We rely on the fromIntegral to do the right masking for us.
|
|
-- The inlining here is critical, and can be worth 4x performance
|
|
--
|
|
|
|
-- | Write a Word16 in big endian format
|
|
putWord16be :: Word16 -> Builder
|
|
putWord16be w = writeN 2 $ \p -> do
|
|
poke p (fromIntegral (shiftr_w16 w 8) :: Word8)
|
|
poke (p `plusPtr` 1) (fromIntegral (w) :: Word8)
|
|
{-# INLINE putWord16be #-}
|
|
|
|
-- | Write a Word16 in little endian format
|
|
putWord16le :: Word16 -> Builder
|
|
putWord16le w = writeN 2 $ \p -> do
|
|
poke p (fromIntegral (w) :: Word8)
|
|
poke (p `plusPtr` 1) (fromIntegral (shiftr_w16 w 8) :: Word8)
|
|
{-# INLINE putWord16le #-}
|
|
|
|
-- putWord16le w16 = writeN 2 (\p -> poke (castPtr p) w16)
|
|
|
|
-- | Write a Word32 in big endian format
|
|
putWord32be :: Word32 -> Builder
|
|
putWord32be w = writeN 4 $ \p -> do
|
|
poke p (fromIntegral (shiftr_w32 w 24) :: Word8)
|
|
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 16) :: Word8)
|
|
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 8) :: Word8)
|
|
poke (p `plusPtr` 3) (fromIntegral (w) :: Word8)
|
|
{-# INLINE putWord32be #-}
|
|
|
|
--
|
|
-- a data type to tag Put/Check. writes construct these which are then
|
|
-- inlined and flattened. matching Checks will be more robust with rules.
|
|
--
|
|
|
|
-- | Write a Word32 in little endian format
|
|
putWord32le :: Word32 -> Builder
|
|
putWord32le w = writeN 4 $ \p -> do
|
|
poke p (fromIntegral (w) :: Word8)
|
|
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 8) :: Word8)
|
|
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 16) :: Word8)
|
|
poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 w 24) :: Word8)
|
|
{-# INLINE putWord32le #-}
|
|
|
|
-- on a little endian machine:
|
|
-- putWord32le w32 = writeN 4 (\p -> poke (castPtr p) w32)
|
|
|
|
-- | Write a Word64 in big endian format
|
|
putWord64be :: Word64 -> Builder
|
|
#if WORD_SIZE_IN_BITS < 64
|
|
--
|
|
-- To avoid expensive 64 bit shifts on 32 bit machines, we cast to
|
|
-- Word32, and write that
|
|
--
|
|
putWord64be w =
|
|
let a = fromIntegral (shiftr_w64 w 32) :: Word32
|
|
b = fromIntegral w :: Word32
|
|
in writeN 8 $ \p -> do
|
|
poke p (fromIntegral (shiftr_w32 a 24) :: Word8)
|
|
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 16) :: Word8)
|
|
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 8) :: Word8)
|
|
poke (p `plusPtr` 3) (fromIntegral (a) :: Word8)
|
|
poke (p `plusPtr` 4) (fromIntegral (shiftr_w32 b 24) :: Word8)
|
|
poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 16) :: Word8)
|
|
poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 8) :: Word8)
|
|
poke (p `plusPtr` 7) (fromIntegral (b) :: Word8)
|
|
#else
|
|
putWord64be w = writeN 8 $ \p -> do
|
|
poke p (fromIntegral (shiftr_w64 w 56) :: Word8)
|
|
poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 48) :: Word8)
|
|
poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 40) :: Word8)
|
|
poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 32) :: Word8)
|
|
poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 24) :: Word8)
|
|
poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 16) :: Word8)
|
|
poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 8) :: Word8)
|
|
poke (p `plusPtr` 7) (fromIntegral (w) :: Word8)
|
|
#endif
|
|
{-# INLINE putWord64be #-}
|
|
|
|
-- | Write a Word64 in little endian format
|
|
putWord64le :: Word64 -> Builder
|
|
|
|
#if WORD_SIZE_IN_BITS < 64
|
|
putWord64le w =
|
|
let b = fromIntegral (shiftr_w64 w 32) :: Word32
|
|
a = fromIntegral w :: Word32
|
|
in writeN 8 $ \p -> do
|
|
poke (p) (fromIntegral (a) :: Word8)
|
|
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 8) :: Word8)
|
|
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 16) :: Word8)
|
|
poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 a 24) :: Word8)
|
|
poke (p `plusPtr` 4) (fromIntegral (b) :: Word8)
|
|
poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 8) :: Word8)
|
|
poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 16) :: Word8)
|
|
poke (p `plusPtr` 7) (fromIntegral (shiftr_w32 b 24) :: Word8)
|
|
#else
|
|
putWord64le w = writeN 8 $ \p -> do
|
|
poke p (fromIntegral (w) :: Word8)
|
|
poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 8) :: Word8)
|
|
poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 16) :: Word8)
|
|
poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 24) :: Word8)
|
|
poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 32) :: Word8)
|
|
poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 40) :: Word8)
|
|
poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 48) :: Word8)
|
|
poke (p `plusPtr` 7) (fromIntegral (shiftr_w64 w 56) :: Word8)
|
|
#endif
|
|
{-# INLINE putWord64le #-}
|
|
|
|
-- on a little endian machine:
|
|
-- putWord64le w64 = writeN 8 (\p -> poke (castPtr p) w64)
|
|
|
|
------------------------------------------------------------------------
|
|
-- Unaligned, word size ops
|
|
|
|
-- | /O(1)./ A Builder taking a single native machine word. The word is
|
|
-- written in host order, host endian form, for the machine you're on.
|
|
-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
|
|
-- 4 bytes. Values written this way are not portable to
|
|
-- different endian or word sized machines, without conversion.
|
|
--
|
|
putWordhost :: Word -> Builder
|
|
putWordhost w = writeNbytes (sizeOf (undefined :: Word)) (\p -> poke p w)
|
|
{-# INLINE putWordhost #-}
|
|
|
|
-- | Write a Word16 in native host order and host endianness.
|
|
-- 2 bytes will be written, unaligned.
|
|
putWord16host :: Word16 -> Builder
|
|
putWord16host w16 = writeNbytes (sizeOf (undefined :: Word16)) (\p -> poke p w16)
|
|
{-# INLINE putWord16host #-}
|
|
|
|
-- | Write a Word32 in native host order and host endianness.
|
|
-- 4 bytes will be written, unaligned.
|
|
putWord32host :: Word32 -> Builder
|
|
putWord32host w32 = writeNbytes (sizeOf (undefined :: Word32)) (\p -> poke p w32)
|
|
{-# INLINE putWord32host #-}
|
|
|
|
-- | Write a Word64 in native host order.
|
|
-- On a 32 bit machine we write two host order Word32s, in big endian form.
|
|
-- 8 bytes will be written, unaligned.
|
|
putWord64host :: Word64 -> Builder
|
|
putWord64host w = writeNbytes (sizeOf (undefined :: Word64)) (\p -> poke p w)
|
|
{-# INLINE putWord64host #-}
|
|
|
|
------------------------------------------------------------------------
|
|
-- Unchecked shifts
|
|
|
|
{-# INLINE shiftr_w16 #-}
|
|
shiftr_w16 :: Word16 -> Int -> Word16
|
|
{-# INLINE shiftr_w32 #-}
|
|
shiftr_w32 :: Word32 -> Int -> Word32
|
|
{-# INLINE shiftr_w64 #-}
|
|
shiftr_w64 :: Word64 -> Int -> Word64
|
|
|
|
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
|
shiftr_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftRL#` i)
|
|
shiftr_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftRL#` i)
|
|
|
|
#if WORD_SIZE_IN_BITS < 64
|
|
shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL64#` i)
|
|
|
|
#if __GLASGOW_HASKELL__ <= 606
|
|
-- Exported by GHC.Word in GHC 6.8 and higher
|
|
foreign import ccall unsafe "stg_uncheckedShiftRL64"
|
|
uncheckedShiftRL64# :: Word64# -> Int# -> Word64#
|
|
#endif
|
|
|
|
#else
|
|
shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL#` i)
|
|
#endif
|
|
|
|
#else
|
|
shiftr_w16 = shiftR
|
|
shiftr_w32 = shiftR
|
|
shiftr_w64 = shiftR
|
|
#endif
|