forked from GitHub/gf-core
554 lines
16 KiB
Haskell
554 lines
16 KiB
Haskell
{-# LANGUAGE CPP, MagicHash #-}
|
|
-- for unboxed shifts
|
|
|
|
-----------------------------------------------------------------------------
|
|
-- |
|
|
-- Module : Data.Binary.Get
|
|
-- Copyright : Lennart Kolmodin
|
|
-- License : BSD3-style (see LICENSE)
|
|
--
|
|
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
|
|
-- Stability : experimental
|
|
-- Portability : portable to Hugs and GHC.
|
|
--
|
|
-- The Get monad. A monad for efficiently building structures from
|
|
-- encoded lazy ByteStrings
|
|
--
|
|
-----------------------------------------------------------------------------
|
|
|
|
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
|
#include "MachDeps.h"
|
|
#endif
|
|
|
|
module Data.Binary.Get (
|
|
|
|
-- * The Get type
|
|
Get
|
|
, runGet
|
|
, runGetState
|
|
|
|
-- * Parsing
|
|
, skip
|
|
, uncheckedSkip
|
|
, lookAhead
|
|
, lookAheadM
|
|
, lookAheadE
|
|
, uncheckedLookAhead
|
|
|
|
-- * Utility
|
|
, bytesRead
|
|
, getBytes
|
|
, remaining
|
|
, isEmpty
|
|
|
|
-- * Parsing particular types
|
|
, getWord8
|
|
|
|
-- ** ByteStrings
|
|
, getByteString
|
|
, getLazyByteString
|
|
, getLazyByteStringNul
|
|
, getRemainingLazyByteString
|
|
|
|
-- ** Big-endian reads
|
|
, getWord16be
|
|
, getWord32be
|
|
, getWord64be
|
|
|
|
-- ** Little-endian reads
|
|
, getWord16le
|
|
, getWord32le
|
|
, getWord64le
|
|
|
|
-- ** Host-endian, unaligned reads
|
|
, getWordhost
|
|
, getWord16host
|
|
, getWord32host
|
|
, getWord64host
|
|
|
|
) where
|
|
|
|
import Control.Monad (when,liftM, ap)
|
|
import Control.Monad.Fix
|
|
import Data.Maybe (isNothing)
|
|
|
|
import qualified Data.ByteString as B
|
|
import qualified Data.ByteString.Lazy as L
|
|
|
|
#ifdef BYTESTRING_IN_BASE
|
|
import qualified Data.ByteString.Base as B
|
|
#else
|
|
import qualified Data.ByteString.Internal as B
|
|
import qualified Data.ByteString.Lazy.Internal as L
|
|
#endif
|
|
|
|
import Control.Applicative (Applicative(..))
|
|
|
|
import Foreign
|
|
|
|
-- used by splitAtST
|
|
#if MIN_VERSION_base(4,6,0)
|
|
import Control.Monad.ST.Unsafe(unsafeInterleaveST)
|
|
#else
|
|
import Control.Monad.ST(unsafeInterleaveST)
|
|
#endif
|
|
import Control.Monad.ST(runST)
|
|
import Data.STRef
|
|
|
|
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
|
import GHC.Base
|
|
import GHC.Word
|
|
--import GHC.Int
|
|
#endif
|
|
|
|
-- Control.Monad.Fail import will become redundant in GHC 8.8+
|
|
import qualified Control.Monad.Fail as Fail
|
|
|
|
|
|
-- | The parse state
|
|
data S = S {-# UNPACK #-} !B.ByteString -- current chunk
|
|
L.ByteString -- the rest of the input
|
|
{-# UNPACK #-} !Int64 -- bytes read
|
|
|
|
-- | The Get monad is just a State monad carrying around the input ByteString
|
|
newtype Get a = Get { unGet :: S -> (a, S) }
|
|
|
|
instance Functor Get where
|
|
fmap f m = Get (\s -> case unGet m s of
|
|
(a, s') -> (f a, s'))
|
|
{-# INLINE fmap #-}
|
|
|
|
instance Applicative Get where
|
|
pure = return
|
|
(<*>) = ap
|
|
|
|
instance Monad Get where
|
|
return a = Get (\s -> (a, s))
|
|
{-# INLINE return #-}
|
|
|
|
m >>= k = Get (\s -> case unGet m s of
|
|
(a, s') -> unGet (k a) s')
|
|
{-# INLINE (>>=) #-}
|
|
|
|
#if !(MIN_VERSION_base(4,13,0))
|
|
fail = failDesc
|
|
#endif
|
|
|
|
instance Fail.MonadFail Get where
|
|
fail = failDesc
|
|
|
|
instance MonadFix Get where
|
|
mfix f = Get (\s -> let (a,s') = unGet (f a) s
|
|
in (a,s'))
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
get :: Get S
|
|
get = Get (\s -> (s, s))
|
|
|
|
put :: S -> Get ()
|
|
put s = Get (\_ -> ((), s))
|
|
|
|
------------------------------------------------------------------------
|
|
--
|
|
-- dons, GHC 6.10: explicit inlining disabled, was killing performance.
|
|
-- Without it, GHC seems to do just fine. And we get similar
|
|
-- performance with 6.8.2 anyway.
|
|
--
|
|
|
|
initState :: L.ByteString -> S
|
|
initState xs = mkState xs 0
|
|
{- INLINE initState -}
|
|
|
|
{-
|
|
initState (B.LPS xs) =
|
|
case xs of
|
|
[] -> S B.empty L.empty 0
|
|
(x:xs') -> S x (B.LPS xs') 0
|
|
-}
|
|
|
|
#ifndef BYTESTRING_IN_BASE
|
|
mkState :: L.ByteString -> Int64 -> S
|
|
mkState l = case l of
|
|
L.Empty -> S B.empty L.empty
|
|
L.Chunk x xs -> S x xs
|
|
{- INLINE mkState -}
|
|
|
|
#else
|
|
mkState :: L.ByteString -> Int64 -> S
|
|
mkState (B.LPS xs) =
|
|
case xs of
|
|
[] -> S B.empty L.empty
|
|
(x:xs') -> S x (B.LPS xs')
|
|
#endif
|
|
|
|
-- | Run the Get monad applies a 'get'-based parser on the input ByteString
|
|
runGet :: Get a -> L.ByteString -> a
|
|
runGet m str = case unGet m (initState str) of (a, _) -> a
|
|
|
|
-- | Run the Get monad applies a 'get'-based parser on the input
|
|
-- ByteString. Additional to the result of get it returns the number of
|
|
-- consumed bytes and the rest of the input.
|
|
runGetState :: Get a -> L.ByteString -> Int64 -> (a, L.ByteString, Int64)
|
|
runGetState m str off =
|
|
case unGet m (mkState str off) of
|
|
(a, ~(S s ss newOff)) -> (a, s `joinBS` ss, newOff)
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
failDesc :: String -> Get a
|
|
failDesc err = do
|
|
S _ _ bytes <- get
|
|
Get (error (err ++ ". Failed reading at byte position " ++ show bytes))
|
|
|
|
-- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available.
|
|
skip :: Int -> Get ()
|
|
skip n = readN (fromIntegral n) (const ())
|
|
|
|
-- | Skip ahead @n@ bytes. No error if there isn't enough bytes.
|
|
uncheckedSkip :: Int64 -> Get ()
|
|
uncheckedSkip n = do
|
|
S s ss bytes <- get
|
|
if fromIntegral (B.length s) >= n
|
|
then put (S (B.drop (fromIntegral n) s) ss (bytes + n))
|
|
else do
|
|
let rest = L.drop (n - fromIntegral (B.length s)) ss
|
|
put $! mkState rest (bytes + n)
|
|
|
|
-- | Run @ga@, but return without consuming its input.
|
|
-- Fails if @ga@ fails.
|
|
lookAhead :: Get a -> Get a
|
|
lookAhead ga = do
|
|
s <- get
|
|
a <- ga
|
|
put s
|
|
return a
|
|
|
|
-- | Like 'lookAhead', but consume the input if @gma@ returns 'Just _'.
|
|
-- Fails if @gma@ fails.
|
|
lookAheadM :: Get (Maybe a) -> Get (Maybe a)
|
|
lookAheadM gma = do
|
|
s <- get
|
|
ma <- gma
|
|
when (isNothing ma) $
|
|
put s
|
|
return ma
|
|
|
|
-- | Like 'lookAhead', but consume the input if @gea@ returns 'Right _'.
|
|
-- Fails if @gea@ fails.
|
|
lookAheadE :: Get (Either a b) -> Get (Either a b)
|
|
lookAheadE gea = do
|
|
s <- get
|
|
ea <- gea
|
|
case ea of
|
|
Left _ -> put s
|
|
_ -> return ()
|
|
return ea
|
|
|
|
-- | Get the next up to @n@ bytes as a lazy ByteString, without consuming them.
|
|
uncheckedLookAhead :: Int64 -> Get L.ByteString
|
|
uncheckedLookAhead n = do
|
|
S s ss _ <- get
|
|
if n <= fromIntegral (B.length s)
|
|
then return (L.fromChunks [B.take (fromIntegral n) s])
|
|
else return $ L.take n (s `joinBS` ss)
|
|
|
|
------------------------------------------------------------------------
|
|
-- Utility
|
|
|
|
-- | Get the total number of bytes read to this point.
|
|
bytesRead :: Get Int64
|
|
bytesRead = do
|
|
S _ _ b <- get
|
|
return b
|
|
|
|
-- | Get the number of remaining unparsed bytes.
|
|
-- Useful for checking whether all input has been consumed.
|
|
-- Note that this forces the rest of the input.
|
|
remaining :: Get Int64
|
|
remaining = do
|
|
S s ss _ <- get
|
|
return (fromIntegral (B.length s) + L.length ss)
|
|
|
|
-- | Test whether all input has been consumed,
|
|
-- i.e. there are no remaining unparsed bytes.
|
|
isEmpty :: Get Bool
|
|
isEmpty = do
|
|
S s ss _ <- get
|
|
return (B.null s && L.null ss)
|
|
|
|
------------------------------------------------------------------------
|
|
-- Utility with ByteStrings
|
|
|
|
-- | An efficient 'get' method for strict ByteStrings. Fails if fewer
|
|
-- than @n@ bytes are left in the input.
|
|
getByteString :: Int -> Get B.ByteString
|
|
getByteString n = readN n id
|
|
{-# INLINE getByteString #-}
|
|
|
|
-- | An efficient 'get' method for lazy ByteStrings. Does not fail if fewer than
|
|
-- @n@ bytes are left in the input.
|
|
getLazyByteString :: Int64 -> Get L.ByteString
|
|
getLazyByteString n = do
|
|
S s ss bytes <- get
|
|
let big = s `joinBS` ss
|
|
case splitAtST n big of
|
|
(consume, rest) -> do put $ mkState rest (bytes + n)
|
|
return consume
|
|
{-# INLINE getLazyByteString #-}
|
|
|
|
-- | Get a lazy ByteString that is terminated with a NUL byte. Fails
|
|
-- if it reaches the end of input without hitting a NUL.
|
|
getLazyByteStringNul :: Get L.ByteString
|
|
getLazyByteStringNul = do
|
|
S s ss bytes <- get
|
|
let big = s `joinBS` ss
|
|
(consume, t) = L.break (== 0) big
|
|
(h, rest) = L.splitAt 1 t
|
|
if L.null h
|
|
then fail "too few bytes"
|
|
else do
|
|
put $ mkState rest (bytes + L.length consume + 1)
|
|
return consume
|
|
{-# INLINE getLazyByteStringNul #-}
|
|
|
|
-- | Get the remaining bytes as a lazy ByteString
|
|
getRemainingLazyByteString :: Get L.ByteString
|
|
getRemainingLazyByteString = do
|
|
S s ss _ <- get
|
|
return (s `joinBS` ss)
|
|
|
|
------------------------------------------------------------------------
|
|
-- Helpers
|
|
|
|
-- | Pull @n@ bytes from the input, as a strict ByteString.
|
|
getBytes :: Int -> Get B.ByteString
|
|
getBytes n = do
|
|
S s ss bytes <- get
|
|
if n <= B.length s
|
|
then do let (consume,rest) = B.splitAt n s
|
|
put $! S rest ss (bytes + fromIntegral n)
|
|
return $! consume
|
|
else
|
|
case L.splitAt (fromIntegral n) (s `joinBS` ss) of
|
|
(consuming, rest) ->
|
|
do let now = B.concat . L.toChunks $ consuming
|
|
put $! mkState rest (bytes + fromIntegral n)
|
|
-- forces the next chunk before this one is returned
|
|
if (B.length now < n)
|
|
then
|
|
fail "too few bytes"
|
|
else
|
|
return now
|
|
{- INLINE getBytes -}
|
|
-- ^ important
|
|
|
|
#ifndef BYTESTRING_IN_BASE
|
|
joinBS :: B.ByteString -> L.ByteString -> L.ByteString
|
|
joinBS bb lb
|
|
| B.null bb = lb
|
|
| otherwise = L.Chunk bb lb
|
|
|
|
#else
|
|
joinBS :: B.ByteString -> L.ByteString -> L.ByteString
|
|
joinBS bb (B.LPS lb)
|
|
| B.null bb = B.LPS lb
|
|
| otherwise = B.LPS (bb:lb)
|
|
#endif
|
|
-- don't use L.append, it's strict in it's second argument :/
|
|
{- INLINE joinBS -}
|
|
|
|
-- | Split a ByteString. If the first result is consumed before the --
|
|
-- second, this runs in constant heap space.
|
|
--
|
|
-- You must force the returned tuple for that to work, e.g.
|
|
--
|
|
-- > case splitAtST n xs of
|
|
-- > (ys,zs) -> consume ys ... consume zs
|
|
--
|
|
splitAtST :: Int64 -> L.ByteString -> (L.ByteString, L.ByteString)
|
|
splitAtST i ps | i <= 0 = (L.empty, ps)
|
|
#ifndef BYTESTRING_IN_BASE
|
|
splitAtST i ps = runST (
|
|
do r <- newSTRef undefined
|
|
xs <- first r i ps
|
|
ys <- unsafeInterleaveST (readSTRef r)
|
|
return (xs, ys))
|
|
|
|
where
|
|
first r 0 xs@(L.Chunk _ _) = writeSTRef r xs >> return L.Empty
|
|
first r _ L.Empty = writeSTRef r L.Empty >> return L.Empty
|
|
|
|
first r n (L.Chunk x xs)
|
|
| n < l = do writeSTRef r (L.Chunk (B.drop (fromIntegral n) x) xs)
|
|
return $ L.Chunk (B.take (fromIntegral n) x) L.Empty
|
|
| otherwise = do writeSTRef r (L.drop (n - l) xs)
|
|
liftM (L.Chunk x) $ unsafeInterleaveST (first r (n - l) xs)
|
|
|
|
where l = fromIntegral (B.length x)
|
|
#else
|
|
splitAtST i (B.LPS ps) = runST (
|
|
do r <- newSTRef undefined
|
|
xs <- first r i ps
|
|
ys <- unsafeInterleaveST (readSTRef r)
|
|
return (B.LPS xs, B.LPS ys))
|
|
|
|
where first r 0 xs = writeSTRef r xs >> return []
|
|
first r _ [] = writeSTRef r [] >> return []
|
|
first r n (x:xs)
|
|
| n < l = do writeSTRef r (B.drop (fromIntegral n) x : xs)
|
|
return [B.take (fromIntegral n) x]
|
|
| otherwise = do writeSTRef r (L.toChunks (L.drop (n - l) (B.LPS xs)))
|
|
fmap (x:) $ unsafeInterleaveST (first r (n - l) xs)
|
|
|
|
where l = fromIntegral (B.length x)
|
|
#endif
|
|
{- INLINE splitAtST -}
|
|
|
|
-- Pull n bytes from the input, and apply a parser to those bytes,
|
|
-- yielding a value. If less than @n@ bytes are available, fail with an
|
|
-- error. This wraps @getBytes@.
|
|
readN :: Int -> (B.ByteString -> a) -> Get a
|
|
readN n f = fmap f $ getBytes n
|
|
{- INLINE readN -}
|
|
-- ^ important
|
|
|
|
------------------------------------------------------------------------
|
|
-- Primtives
|
|
|
|
-- helper, get a raw Ptr onto a strict ByteString copied out of the
|
|
-- underlying lazy byteString. So many indirections from the raw parser
|
|
-- state that my head hurts...
|
|
|
|
getPtr :: Storable a => Int -> Get a
|
|
getPtr n = do
|
|
(fp,o,_) <- readN n B.toForeignPtr
|
|
return . B.accursedUnutterablePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
|
|
{- INLINE getPtr -}
|
|
|
|
------------------------------------------------------------------------
|
|
|
|
-- | Read a Word8 from the monad state
|
|
getWord8 :: Get Word8
|
|
getWord8 = getPtr (sizeOf (undefined :: Word8))
|
|
{- INLINE getWord8 -}
|
|
|
|
-- | Read a Word16 in big endian format
|
|
getWord16be :: Get Word16
|
|
getWord16be = do
|
|
s <- readN 2 id
|
|
return $! (fromIntegral (s `B.index` 0) `shiftl_w16` 8) .|.
|
|
(fromIntegral (s `B.index` 1))
|
|
{- INLINE getWord16be -}
|
|
|
|
-- | Read a Word16 in little endian format
|
|
getWord16le :: Get Word16
|
|
getWord16le = do
|
|
s <- readN 2 id
|
|
return $! (fromIntegral (s `B.index` 1) `shiftl_w16` 8) .|.
|
|
(fromIntegral (s `B.index` 0) )
|
|
{- INLINE getWord16le -}
|
|
|
|
-- | Read a Word32 in big endian format
|
|
getWord32be :: Get Word32
|
|
getWord32be = do
|
|
s <- readN 4 id
|
|
return $! (fromIntegral (s `B.index` 0) `shiftl_w32` 24) .|.
|
|
(fromIntegral (s `B.index` 1) `shiftl_w32` 16) .|.
|
|
(fromIntegral (s `B.index` 2) `shiftl_w32` 8) .|.
|
|
(fromIntegral (s `B.index` 3) )
|
|
{- INLINE getWord32be -}
|
|
|
|
-- | Read a Word32 in little endian format
|
|
getWord32le :: Get Word32
|
|
getWord32le = do
|
|
s <- readN 4 id
|
|
return $! (fromIntegral (s `B.index` 3) `shiftl_w32` 24) .|.
|
|
(fromIntegral (s `B.index` 2) `shiftl_w32` 16) .|.
|
|
(fromIntegral (s `B.index` 1) `shiftl_w32` 8) .|.
|
|
(fromIntegral (s `B.index` 0) )
|
|
{- INLINE getWord32le -}
|
|
|
|
-- | Read a Word64 in big endian format
|
|
getWord64be :: Get Word64
|
|
getWord64be = do
|
|
s <- readN 8 id
|
|
return $! (fromIntegral (s `B.index` 0) `shiftl_w64` 56) .|.
|
|
(fromIntegral (s `B.index` 1) `shiftl_w64` 48) .|.
|
|
(fromIntegral (s `B.index` 2) `shiftl_w64` 40) .|.
|
|
(fromIntegral (s `B.index` 3) `shiftl_w64` 32) .|.
|
|
(fromIntegral (s `B.index` 4) `shiftl_w64` 24) .|.
|
|
(fromIntegral (s `B.index` 5) `shiftl_w64` 16) .|.
|
|
(fromIntegral (s `B.index` 6) `shiftl_w64` 8) .|.
|
|
(fromIntegral (s `B.index` 7) )
|
|
{- INLINE getWord64be -}
|
|
|
|
-- | Read a Word64 in little endian format
|
|
getWord64le :: Get Word64
|
|
getWord64le = do
|
|
s <- readN 8 id
|
|
return $! (fromIntegral (s `B.index` 7) `shiftl_w64` 56) .|.
|
|
(fromIntegral (s `B.index` 6) `shiftl_w64` 48) .|.
|
|
(fromIntegral (s `B.index` 5) `shiftl_w64` 40) .|.
|
|
(fromIntegral (s `B.index` 4) `shiftl_w64` 32) .|.
|
|
(fromIntegral (s `B.index` 3) `shiftl_w64` 24) .|.
|
|
(fromIntegral (s `B.index` 2) `shiftl_w64` 16) .|.
|
|
(fromIntegral (s `B.index` 1) `shiftl_w64` 8) .|.
|
|
(fromIntegral (s `B.index` 0) )
|
|
{- INLINE getWord64le -}
|
|
|
|
------------------------------------------------------------------------
|
|
-- Host-endian reads
|
|
|
|
-- | /O(1)./ Read a single native machine word. The word is read in
|
|
-- host order, host endian form, for the machine you're on. On a 64 bit
|
|
-- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes.
|
|
getWordhost :: Get Word
|
|
getWordhost = getPtr (sizeOf (undefined :: Word))
|
|
{- INLINE getWordhost -}
|
|
|
|
-- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness.
|
|
getWord16host :: Get Word16
|
|
getWord16host = getPtr (sizeOf (undefined :: Word16))
|
|
{- INLINE getWord16host -}
|
|
|
|
-- | /O(1)./ Read a Word32 in native host order and host endianness.
|
|
getWord32host :: Get Word32
|
|
getWord32host = getPtr (sizeOf (undefined :: Word32))
|
|
{- INLINE getWord32host -}
|
|
|
|
-- | /O(1)./ Read a Word64 in native host order and host endianess.
|
|
getWord64host :: Get Word64
|
|
getWord64host = getPtr (sizeOf (undefined :: Word64))
|
|
{- INLINE getWord64host -}
|
|
|
|
------------------------------------------------------------------------
|
|
-- Unchecked shifts
|
|
|
|
shiftl_w16 :: Word16 -> Int -> Word16
|
|
shiftl_w32 :: Word32 -> Int -> Word32
|
|
shiftl_w64 :: Word64 -> Int -> Word64
|
|
|
|
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
|
shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#` i)
|
|
shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#` i)
|
|
|
|
#if WORD_SIZE_IN_BITS < 64
|
|
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
|
|
|
|
#if __GLASGOW_HASKELL__ <= 606
|
|
-- Exported by GHC.Word in GHC 6.8 and higher
|
|
foreign import ccall unsafe "stg_uncheckedShiftL64"
|
|
uncheckedShiftL64# :: Word64# -> Int# -> Word64#
|
|
#endif
|
|
|
|
#else
|
|
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i)
|
|
#endif
|
|
|
|
#else
|
|
shiftl_w16 = shiftL
|
|
shiftl_w32 = shiftL
|
|
shiftl_w64 = shiftL
|
|
#endif
|