forked from GitHub/gf-core
binary serialization for PGF
This commit is contained in:
13
GF.cabal
13
GF.cabal
@@ -575,23 +575,25 @@ library
|
|||||||
PGF.Parsing.FCFG
|
PGF.Parsing.FCFG
|
||||||
PGF.Expr
|
PGF.Expr
|
||||||
PGF.Type
|
PGF.Type
|
||||||
PGF.Raw.Parse
|
|
||||||
PGF.Raw.Print
|
|
||||||
PGF.Raw.Convert
|
|
||||||
PGF.Raw.Abstract
|
|
||||||
PGF.AbsCompute
|
PGF.AbsCompute
|
||||||
PGF.Paraphrase
|
PGF.Paraphrase
|
||||||
PGF.TypeCheck
|
PGF.TypeCheck
|
||||||
|
PGF.Binary
|
||||||
GF.Data.MultiMap
|
GF.Data.MultiMap
|
||||||
GF.Data.Utilities
|
GF.Data.Utilities
|
||||||
GF.Data.SortedList
|
GF.Data.SortedList
|
||||||
GF.Data.Assoc
|
GF.Data.Assoc
|
||||||
GF.Data.ErrM
|
GF.Data.ErrM
|
||||||
GF.Text.UTF8
|
|
||||||
-- needed only for the on demand generation of PMCFG
|
-- needed only for the on demand generation of PMCFG
|
||||||
GF.Data.BacktrackM
|
GF.Data.BacktrackM
|
||||||
GF.Compile.GenerateFCFG
|
GF.Compile.GenerateFCFG
|
||||||
GF.Compile.GeneratePMCFG
|
GF.Compile.GeneratePMCFG
|
||||||
|
-- not really part of GF but I have changed the original binary library
|
||||||
|
-- and we have to keep the copy for now.
|
||||||
|
Data.Binary
|
||||||
|
Data.Binary.Put
|
||||||
|
Data.Binary.Get
|
||||||
|
Data.Binary.Builder
|
||||||
|
|
||||||
executable gf
|
executable gf
|
||||||
build-depends: base,
|
build-depends: base,
|
||||||
@@ -701,6 +703,7 @@ executable gf
|
|||||||
PGF.AbsCompute
|
PGF.AbsCompute
|
||||||
PGF.Paraphrase
|
PGF.Paraphrase
|
||||||
PGF.TypeCheck
|
PGF.TypeCheck
|
||||||
|
PGF.Binary
|
||||||
GFC
|
GFC
|
||||||
GFI
|
GFI
|
||||||
|
|
||||||
|
|||||||
792
src/Data/Binary.hs
Normal file
792
src/Data/Binary.hs
Normal file
@@ -0,0 +1,792 @@
|
|||||||
|
{-# LANGUAGE CPP, FlexibleInstances, FlexibleContexts #-}
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : Data.Binary
|
||||||
|
-- Copyright : Lennart Kolmodin
|
||||||
|
-- License : BSD3-style (see LICENSE)
|
||||||
|
--
|
||||||
|
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
|
||||||
|
-- Stability : unstable
|
||||||
|
-- Portability : portable to Hugs and GHC. Requires the FFI and some flexible instances
|
||||||
|
--
|
||||||
|
-- Binary serialisation of Haskell values to and from lazy ByteStrings.
|
||||||
|
-- The Binary library provides methods for encoding Haskell values as
|
||||||
|
-- streams of bytes directly in memory. The resulting @ByteString@ can
|
||||||
|
-- then be written to disk, sent over the network, or futher processed
|
||||||
|
-- (for example, compressed with gzip).
|
||||||
|
--
|
||||||
|
-- The 'Binary' package is notable in that it provides both pure, and
|
||||||
|
-- high performance serialisation.
|
||||||
|
--
|
||||||
|
-- Values are always encoded in network order (big endian) form, and
|
||||||
|
-- encoded data should be portable across machine endianess, word size,
|
||||||
|
-- or compiler version. For example, data encoded using the Binary class
|
||||||
|
-- could be written from GHC, and read back in Hugs.
|
||||||
|
--
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
module Data.Binary (
|
||||||
|
|
||||||
|
-- * The Binary class
|
||||||
|
Binary(..)
|
||||||
|
|
||||||
|
-- $example
|
||||||
|
|
||||||
|
-- * The Get and Put monads
|
||||||
|
, Get
|
||||||
|
, Put
|
||||||
|
|
||||||
|
-- * Useful helpers for writing instances
|
||||||
|
, putWord8
|
||||||
|
, getWord8
|
||||||
|
|
||||||
|
-- * Binary serialisation
|
||||||
|
, encode -- :: Binary a => a -> ByteString
|
||||||
|
, decode -- :: Binary a => ByteString -> a
|
||||||
|
|
||||||
|
-- * IO functions for serialisation
|
||||||
|
, encodeFile -- :: Binary a => FilePath -> a -> IO ()
|
||||||
|
, decodeFile -- :: Binary a => FilePath -> IO a
|
||||||
|
|
||||||
|
-- Lazy put and get
|
||||||
|
-- , lazyPut
|
||||||
|
-- , lazyGet
|
||||||
|
|
||||||
|
, module Data.Word -- useful
|
||||||
|
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.Word
|
||||||
|
|
||||||
|
import Data.Binary.Put
|
||||||
|
import Data.Binary.Get
|
||||||
|
|
||||||
|
import Control.Monad
|
||||||
|
import Foreign
|
||||||
|
import System.IO
|
||||||
|
|
||||||
|
import Data.ByteString.Lazy (ByteString)
|
||||||
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
|
||||||
|
import Data.Char (chr,ord)
|
||||||
|
import Data.List (unfoldr)
|
||||||
|
|
||||||
|
-- And needed for the instances:
|
||||||
|
import qualified Data.ByteString as B
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
import qualified Data.IntMap as IntMap
|
||||||
|
import qualified Data.IntSet as IntSet
|
||||||
|
import qualified Data.Ratio as R
|
||||||
|
|
||||||
|
import qualified Data.Tree as T
|
||||||
|
|
||||||
|
import Data.Array.Unboxed
|
||||||
|
|
||||||
|
--
|
||||||
|
-- This isn't available in older Hugs or older GHC
|
||||||
|
--
|
||||||
|
#if __GLASGOW_HASKELL__ >= 606
|
||||||
|
import qualified Data.Sequence as Seq
|
||||||
|
import qualified Data.Foldable as Fold
|
||||||
|
#endif
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- | The @Binary@ class provides 'put' and 'get', methods to encode and
|
||||||
|
-- decode a Haskell value to a lazy ByteString. It mirrors the Read and
|
||||||
|
-- Show classes for textual representation of Haskell types, and is
|
||||||
|
-- suitable for serialising Haskell values to disk, over the network.
|
||||||
|
--
|
||||||
|
-- For parsing and generating simple external binary formats (e.g. C
|
||||||
|
-- structures), Binary may be used, but in general is not suitable
|
||||||
|
-- for complex protocols. Instead use the Put and Get primitives
|
||||||
|
-- directly.
|
||||||
|
--
|
||||||
|
-- Instances of Binary should satisfy the following property:
|
||||||
|
--
|
||||||
|
-- > decode . encode == id
|
||||||
|
--
|
||||||
|
-- That is, the 'get' and 'put' methods should be the inverse of each
|
||||||
|
-- other. A range of instances are provided for basic Haskell types.
|
||||||
|
--
|
||||||
|
class Binary t where
|
||||||
|
-- | Encode a value in the Put monad.
|
||||||
|
put :: t -> Put
|
||||||
|
-- | Decode a value in the Get monad
|
||||||
|
get :: Get t
|
||||||
|
|
||||||
|
-- $example
|
||||||
|
-- To serialise a custom type, an instance of Binary for that type is
|
||||||
|
-- required. For example, suppose we have a data structure:
|
||||||
|
--
|
||||||
|
-- > data Exp = IntE Int
|
||||||
|
-- > | OpE String Exp Exp
|
||||||
|
-- > deriving Show
|
||||||
|
--
|
||||||
|
-- We can encode values of this type into bytestrings using the
|
||||||
|
-- following instance, which proceeds by recursively breaking down the
|
||||||
|
-- structure to serialise:
|
||||||
|
--
|
||||||
|
-- > instance Binary Exp where
|
||||||
|
-- > put (IntE i) = do put (0 :: Word8)
|
||||||
|
-- > put i
|
||||||
|
-- > put (OpE s e1 e2) = do put (1 :: Word8)
|
||||||
|
-- > put s
|
||||||
|
-- > put e1
|
||||||
|
-- > put e2
|
||||||
|
-- >
|
||||||
|
-- > get = do t <- get :: Get Word8
|
||||||
|
-- > case t of
|
||||||
|
-- > 0 -> do i <- get
|
||||||
|
-- > return (IntE i)
|
||||||
|
-- > 1 -> do s <- get
|
||||||
|
-- > e1 <- get
|
||||||
|
-- > e2 <- get
|
||||||
|
-- > return (OpE s e1 e2)
|
||||||
|
--
|
||||||
|
-- Note how we write an initial tag byte to indicate each variant of the
|
||||||
|
-- data type.
|
||||||
|
--
|
||||||
|
-- We can simplify the writing of 'get' instances using monadic
|
||||||
|
-- combinators:
|
||||||
|
--
|
||||||
|
-- > get = do tag <- getWord8
|
||||||
|
-- > case tag of
|
||||||
|
-- > 0 -> liftM IntE get
|
||||||
|
-- > 1 -> liftM3 OpE get get get
|
||||||
|
--
|
||||||
|
-- The generation of Binary instances has been automated by a script
|
||||||
|
-- using Scrap Your Boilerplate generics. Use the script here:
|
||||||
|
-- <http://darcs.haskell.org/binary/tools/derive/BinaryDerive.hs>.
|
||||||
|
--
|
||||||
|
-- To derive the instance for a type, load this script into GHCi, and
|
||||||
|
-- bring your type into scope. Your type can then have its Binary
|
||||||
|
-- instances derived as follows:
|
||||||
|
--
|
||||||
|
-- > $ ghci -fglasgow-exts BinaryDerive.hs
|
||||||
|
-- > *BinaryDerive> :l Example.hs
|
||||||
|
-- > *Main> deriveM (undefined :: Drinks)
|
||||||
|
-- >
|
||||||
|
-- > instance Binary Main.Drinks where
|
||||||
|
-- > put (Beer a) = putWord8 0 >> put a
|
||||||
|
-- > put Coffee = putWord8 1
|
||||||
|
-- > put Tea = putWord8 2
|
||||||
|
-- > put EnergyDrink = putWord8 3
|
||||||
|
-- > put Water = putWord8 4
|
||||||
|
-- > put Wine = putWord8 5
|
||||||
|
-- > put Whisky = putWord8 6
|
||||||
|
-- > get = do
|
||||||
|
-- > tag_ <- getWord8
|
||||||
|
-- > case tag_ of
|
||||||
|
-- > 0 -> get >>= \a -> return (Beer a)
|
||||||
|
-- > 1 -> return Coffee
|
||||||
|
-- > 2 -> return Tea
|
||||||
|
-- > 3 -> return EnergyDrink
|
||||||
|
-- > 4 -> return Water
|
||||||
|
-- > 5 -> return Wine
|
||||||
|
-- > 6 -> return Whisky
|
||||||
|
-- >
|
||||||
|
--
|
||||||
|
-- To serialise this to a bytestring, we use 'encode', which packs the
|
||||||
|
-- data structure into a binary format, in a lazy bytestring
|
||||||
|
--
|
||||||
|
-- > > let e = OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
|
||||||
|
-- > > let v = encode e
|
||||||
|
--
|
||||||
|
-- Where 'v' is a binary encoded data structure. To reconstruct the
|
||||||
|
-- original data, we use 'decode'
|
||||||
|
--
|
||||||
|
-- > > decode v :: Exp
|
||||||
|
-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
|
||||||
|
--
|
||||||
|
-- The lazy ByteString that results from 'encode' can be written to
|
||||||
|
-- disk, and read from disk using Data.ByteString.Lazy IO functions,
|
||||||
|
-- such as hPutStr or writeFile:
|
||||||
|
--
|
||||||
|
-- > > writeFile "/tmp/exp.txt" (encode e)
|
||||||
|
--
|
||||||
|
-- And read back with:
|
||||||
|
--
|
||||||
|
-- > > readFile "/tmp/exp.txt" >>= return . decode :: IO Exp
|
||||||
|
-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
|
||||||
|
--
|
||||||
|
-- We can also directly serialise a value to and from a Handle, or a file:
|
||||||
|
--
|
||||||
|
-- > > v <- decodeFile "/tmp/exp.txt" :: IO Exp
|
||||||
|
-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
|
||||||
|
--
|
||||||
|
-- And write a value to disk
|
||||||
|
--
|
||||||
|
-- > > encodeFile "/tmp/a.txt" v
|
||||||
|
--
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Wrappers to run the underlying monad
|
||||||
|
|
||||||
|
-- | Encode a value using binary serialisation to a lazy ByteString.
|
||||||
|
--
|
||||||
|
encode :: Binary a => a -> ByteString
|
||||||
|
encode = runPut . put
|
||||||
|
{-# INLINE encode #-}
|
||||||
|
|
||||||
|
-- | Decode a value from a lazy ByteString, reconstructing the original structure.
|
||||||
|
--
|
||||||
|
decode :: Binary a => ByteString -> a
|
||||||
|
decode = runGet get
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Convenience IO operations
|
||||||
|
|
||||||
|
-- | Lazily serialise a value to a file
|
||||||
|
--
|
||||||
|
-- This is just a convenience function, it's defined simply as:
|
||||||
|
--
|
||||||
|
-- > encodeFile f = B.writeFile f . encode
|
||||||
|
--
|
||||||
|
-- So for example if you wanted to compress as well, you could use:
|
||||||
|
--
|
||||||
|
-- > B.writeFile f . compress . encode
|
||||||
|
--
|
||||||
|
encodeFile :: Binary a => FilePath -> a -> IO ()
|
||||||
|
encodeFile f v = L.writeFile f (encode v)
|
||||||
|
|
||||||
|
-- | Lazily reconstruct a value previously written to a file.
|
||||||
|
--
|
||||||
|
-- This is just a convenience function, it's defined simply as:
|
||||||
|
--
|
||||||
|
-- > decodeFile f = return . decode =<< B.readFile f
|
||||||
|
--
|
||||||
|
-- So for example if you wanted to decompress as well, you could use:
|
||||||
|
--
|
||||||
|
-- > return . decode . decompress =<< B.readFile f
|
||||||
|
--
|
||||||
|
-- After contructing the data from the input file, 'decodeFile' checks
|
||||||
|
-- if the file is empty, and in doing so will force the associated file
|
||||||
|
-- handle closed, if it is indeed empty. If the file is not empty,
|
||||||
|
-- it is up to the decoding instance to consume the rest of the data,
|
||||||
|
-- or otherwise finalise the resource.
|
||||||
|
--
|
||||||
|
decodeFile :: Binary a => FilePath -> IO a
|
||||||
|
decodeFile f = do
|
||||||
|
s <- L.readFile f
|
||||||
|
return $ runGet (do v <- get
|
||||||
|
m <- isEmpty
|
||||||
|
m `seq` return v) s
|
||||||
|
|
||||||
|
-- needs bytestring 0.9.1.x to work
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Lazy put and get
|
||||||
|
|
||||||
|
-- lazyPut :: (Binary a) => a -> Put
|
||||||
|
-- lazyPut a = put (encode a)
|
||||||
|
|
||||||
|
-- lazyGet :: (Binary a) => Get a
|
||||||
|
-- lazyGet = fmap decode get
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Simple instances
|
||||||
|
|
||||||
|
-- The () type need never be written to disk: values of singleton type
|
||||||
|
-- can be reconstructed from the type alone
|
||||||
|
instance Binary () where
|
||||||
|
put () = return ()
|
||||||
|
get = return ()
|
||||||
|
|
||||||
|
-- Bools are encoded as a byte in the range 0 .. 1
|
||||||
|
instance Binary Bool where
|
||||||
|
put = putWord8 . fromIntegral . fromEnum
|
||||||
|
get = liftM (toEnum . fromIntegral) getWord8
|
||||||
|
|
||||||
|
-- Values of type 'Ordering' are encoded as a byte in the range 0 .. 2
|
||||||
|
instance Binary Ordering where
|
||||||
|
put = putWord8 . fromIntegral . fromEnum
|
||||||
|
get = liftM (toEnum . fromIntegral) getWord8
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Words and Ints
|
||||||
|
|
||||||
|
-- Words8s are written as bytes
|
||||||
|
instance Binary Word8 where
|
||||||
|
put = putWord8
|
||||||
|
get = getWord8
|
||||||
|
|
||||||
|
-- Words16s are written as 2 bytes in big-endian (network) order
|
||||||
|
instance Binary Word16 where
|
||||||
|
put = putWord16be
|
||||||
|
get = getWord16be
|
||||||
|
|
||||||
|
-- Words32s are written as 4 bytes in big-endian (network) order
|
||||||
|
instance Binary Word32 where
|
||||||
|
put = putWord32be
|
||||||
|
get = getWord32be
|
||||||
|
|
||||||
|
-- Words64s are written as 8 bytes in big-endian (network) order
|
||||||
|
instance Binary Word64 where
|
||||||
|
put = putWord64be
|
||||||
|
get = getWord64be
|
||||||
|
|
||||||
|
-- Int8s are written as a single byte.
|
||||||
|
instance Binary Int8 where
|
||||||
|
put i = put (fromIntegral i :: Word8)
|
||||||
|
get = liftM fromIntegral (get :: Get Word8)
|
||||||
|
|
||||||
|
-- Int16s are written as a 2 bytes in big endian format
|
||||||
|
instance Binary Int16 where
|
||||||
|
put i = put (fromIntegral i :: Word16)
|
||||||
|
get = liftM fromIntegral (get :: Get Word16)
|
||||||
|
|
||||||
|
-- Int32s are written as a 4 bytes in big endian format
|
||||||
|
instance Binary Int32 where
|
||||||
|
put i = put (fromIntegral i :: Word32)
|
||||||
|
get = liftM fromIntegral (get :: Get Word32)
|
||||||
|
|
||||||
|
-- Int64s are written as a 4 bytes in big endian format
|
||||||
|
instance Binary Int64 where
|
||||||
|
put i = put (fromIntegral i :: Word64)
|
||||||
|
get = liftM fromIntegral (get :: Get Word64)
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Words are written as sequence of bytes. The last bit of each
|
||||||
|
-- byte indicates whether there are more bytes to be read
|
||||||
|
instance Binary Word where
|
||||||
|
put i | i <= 0x7f = do put a
|
||||||
|
| i <= 0x3fff = do put (a .|. 0x80)
|
||||||
|
put b
|
||||||
|
| i <= 0x1fffff = do put (a .|. 0x80)
|
||||||
|
put (b .|. 0x80)
|
||||||
|
put c
|
||||||
|
| i <= 0xfffffff = do put (a .|. 0x80)
|
||||||
|
put (b .|. 0x80)
|
||||||
|
put (c .|. 0x80)
|
||||||
|
put d
|
||||||
|
#if WORD_SIZE_IN_BITS < 64
|
||||||
|
| otherwise = do put (a .|. 0x80)
|
||||||
|
put (b .|. 0x80)
|
||||||
|
put (c .|. 0x80)
|
||||||
|
put (d .|. 0x80)
|
||||||
|
put e
|
||||||
|
#else
|
||||||
|
| i <= 0x7ffffffff = do put (a .|. 0x80)
|
||||||
|
put (b .|. 0x80)
|
||||||
|
put (c .|. 0x80)
|
||||||
|
put (d .|. 0x80)
|
||||||
|
put e
|
||||||
|
| i <= 0x3ffffffffff = do put (a .|. 0x80)
|
||||||
|
put (b .|. 0x80)
|
||||||
|
put (c .|. 0x80)
|
||||||
|
put (d .|. 0x80)
|
||||||
|
put (e .|. 0x80)
|
||||||
|
put f
|
||||||
|
| i <= 0x1ffffffffffff = do put (a .|. 0x80)
|
||||||
|
put (b .|. 0x80)
|
||||||
|
put (c .|. 0x80)
|
||||||
|
put (d .|. 0x80)
|
||||||
|
put (e .|. 0x80)
|
||||||
|
put (f .|. 0x80)
|
||||||
|
put g
|
||||||
|
| i <= 0xffffffffffffff = do put (a .|. 0x80)
|
||||||
|
put (b .|. 0x80)
|
||||||
|
put (c .|. 0x80)
|
||||||
|
put (d .|. 0x80)
|
||||||
|
put (e .|. 0x80)
|
||||||
|
put (f .|. 0x80)
|
||||||
|
put (g .|. 0x80)
|
||||||
|
put h
|
||||||
|
| i <= 0xffffffffffffff = do put (a .|. 0x80)
|
||||||
|
put (b .|. 0x80)
|
||||||
|
put (c .|. 0x80)
|
||||||
|
put (d .|. 0x80)
|
||||||
|
put (e .|. 0x80)
|
||||||
|
put (f .|. 0x80)
|
||||||
|
put (g .|. 0x80)
|
||||||
|
put h
|
||||||
|
| i <= 0x7fffffffffffffff = do put (a .|. 0x80)
|
||||||
|
put (b .|. 0x80)
|
||||||
|
put (c .|. 0x80)
|
||||||
|
put (d .|. 0x80)
|
||||||
|
put (e .|. 0x80)
|
||||||
|
put (f .|. 0x80)
|
||||||
|
put (g .|. 0x80)
|
||||||
|
put (h .|. 0x80)
|
||||||
|
put j
|
||||||
|
| otherwise = do put (a .|. 0x80)
|
||||||
|
put (b .|. 0x80)
|
||||||
|
put (c .|. 0x80)
|
||||||
|
put (d .|. 0x80)
|
||||||
|
put (e .|. 0x80)
|
||||||
|
put (f .|. 0x80)
|
||||||
|
put (g .|. 0x80)
|
||||||
|
put (h .|. 0x80)
|
||||||
|
put (j .|. 0x80)
|
||||||
|
put k
|
||||||
|
#endif
|
||||||
|
where
|
||||||
|
a = fromIntegral ( i .&. 0x7f) :: Word8
|
||||||
|
b = fromIntegral (shiftR i 7 .&. 0x7f) :: Word8
|
||||||
|
c = fromIntegral (shiftR i 14 .&. 0x7f) :: Word8
|
||||||
|
d = fromIntegral (shiftR i 21 .&. 0x7f) :: Word8
|
||||||
|
e = fromIntegral (shiftR i 28 .&. 0x7f) :: Word8
|
||||||
|
f = fromIntegral (shiftR i 35 .&. 0x7f) :: Word8
|
||||||
|
g = fromIntegral (shiftR i 42 .&. 0x7f) :: Word8
|
||||||
|
h = fromIntegral (shiftR i 49 .&. 0x7f) :: Word8
|
||||||
|
j = fromIntegral (shiftR i 56 .&. 0x7f) :: Word8
|
||||||
|
k = fromIntegral (shiftR i 63 .&. 0x7f) :: Word8
|
||||||
|
|
||||||
|
get = do i <- getWord8
|
||||||
|
(if i <= 0x7f
|
||||||
|
then return (fromIntegral i)
|
||||||
|
else do n <- get
|
||||||
|
return $ (n `shiftL` 7) .|. (fromIntegral (i .&. 0x7f)))
|
||||||
|
|
||||||
|
-- Int has the same representation as Word
|
||||||
|
instance Binary Int where
|
||||||
|
put i = put (fromIntegral i :: Word)
|
||||||
|
get = liftM fromIntegral (get :: Get Word)
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
--
|
||||||
|
-- Portable, and pretty efficient, serialisation of Integer
|
||||||
|
--
|
||||||
|
|
||||||
|
-- Fixed-size type for a subset of Integer
|
||||||
|
type SmallInt = Int32
|
||||||
|
|
||||||
|
-- Integers are encoded in two ways: if they fit inside a SmallInt,
|
||||||
|
-- they're written as a byte tag, and that value. If the Integer value
|
||||||
|
-- is too large to fit in a SmallInt, it is written as a byte array,
|
||||||
|
-- along with a sign and length field.
|
||||||
|
|
||||||
|
instance Binary Integer where
|
||||||
|
|
||||||
|
{-# INLINE put #-}
|
||||||
|
put n | n >= lo && n <= hi = do
|
||||||
|
putWord8 0
|
||||||
|
put (fromIntegral n :: SmallInt) -- fast path
|
||||||
|
where
|
||||||
|
lo = fromIntegral (minBound :: SmallInt) :: Integer
|
||||||
|
hi = fromIntegral (maxBound :: SmallInt) :: Integer
|
||||||
|
|
||||||
|
put n = do
|
||||||
|
putWord8 1
|
||||||
|
put sign
|
||||||
|
put (unroll (abs n)) -- unroll the bytes
|
||||||
|
where
|
||||||
|
sign = fromIntegral (signum n) :: Word8
|
||||||
|
|
||||||
|
{-# INLINE get #-}
|
||||||
|
get = do
|
||||||
|
tag <- get :: Get Word8
|
||||||
|
case tag of
|
||||||
|
0 -> liftM fromIntegral (get :: Get SmallInt)
|
||||||
|
_ -> do sign <- get
|
||||||
|
bytes <- get
|
||||||
|
let v = roll bytes
|
||||||
|
return $! if sign == (1 :: Word8) then v else - v
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Fold and unfold an Integer to and from a list of its bytes
|
||||||
|
--
|
||||||
|
unroll :: Integer -> [Word8]
|
||||||
|
unroll = unfoldr step
|
||||||
|
where
|
||||||
|
step 0 = Nothing
|
||||||
|
step i = Just (fromIntegral i, i `shiftR` 8)
|
||||||
|
|
||||||
|
roll :: [Word8] -> Integer
|
||||||
|
roll = foldr unstep 0
|
||||||
|
where
|
||||||
|
unstep b a = a `shiftL` 8 .|. fromIntegral b
|
||||||
|
|
||||||
|
{-
|
||||||
|
|
||||||
|
--
|
||||||
|
-- An efficient, raw serialisation for Integer (GHC only)
|
||||||
|
--
|
||||||
|
|
||||||
|
-- TODO This instance is not architecture portable. GMP stores numbers as
|
||||||
|
-- arrays of machine sized words, so the byte format is not portable across
|
||||||
|
-- architectures with different endianess and word size.
|
||||||
|
|
||||||
|
import Data.ByteString.Base (toForeignPtr,unsafePackAddress, memcpy)
|
||||||
|
import GHC.Base hiding (ord, chr)
|
||||||
|
import GHC.Prim
|
||||||
|
import GHC.Ptr (Ptr(..))
|
||||||
|
import GHC.IOBase (IO(..))
|
||||||
|
|
||||||
|
instance Binary Integer where
|
||||||
|
put (S# i) = putWord8 0 >> put (I# i)
|
||||||
|
put (J# s ba) = do
|
||||||
|
putWord8 1
|
||||||
|
put (I# s)
|
||||||
|
put (BA ba)
|
||||||
|
|
||||||
|
get = do
|
||||||
|
b <- getWord8
|
||||||
|
case b of
|
||||||
|
0 -> do (I# i#) <- get
|
||||||
|
return (S# i#)
|
||||||
|
_ -> do (I# s#) <- get
|
||||||
|
(BA a#) <- get
|
||||||
|
return (J# s# a#)
|
||||||
|
|
||||||
|
instance Binary ByteArray where
|
||||||
|
|
||||||
|
-- Pretty safe.
|
||||||
|
put (BA ba) =
|
||||||
|
let sz = sizeofByteArray# ba -- (primitive) in *bytes*
|
||||||
|
addr = byteArrayContents# ba
|
||||||
|
bs = unsafePackAddress (I# sz) addr
|
||||||
|
in put bs -- write as a ByteString. easy, yay!
|
||||||
|
|
||||||
|
-- Pretty scary. Should be quick though
|
||||||
|
get = do
|
||||||
|
(fp, off, n@(I# sz)) <- liftM toForeignPtr get -- so decode a ByteString
|
||||||
|
assert (off == 0) $ return $ unsafePerformIO $ do
|
||||||
|
(MBA arr) <- newByteArray sz -- and copy it into a ByteArray#
|
||||||
|
let to = byteArrayContents# (unsafeCoerce# arr) -- urk, is this safe?
|
||||||
|
withForeignPtr fp $ \from -> memcpy (Ptr to) from (fromIntegral n)
|
||||||
|
freezeByteArray arr
|
||||||
|
|
||||||
|
-- wrapper for ByteArray#
|
||||||
|
data ByteArray = BA {-# UNPACK #-} !ByteArray#
|
||||||
|
data MBA = MBA {-# UNPACK #-} !(MutableByteArray# RealWorld)
|
||||||
|
|
||||||
|
newByteArray :: Int# -> IO MBA
|
||||||
|
newByteArray sz = IO $ \s ->
|
||||||
|
case newPinnedByteArray# sz s of { (# s', arr #) ->
|
||||||
|
(# s', MBA arr #) }
|
||||||
|
|
||||||
|
freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
|
||||||
|
freezeByteArray arr = IO $ \s ->
|
||||||
|
case unsafeFreezeByteArray# arr s of { (# s', arr' #) ->
|
||||||
|
(# s', BA arr' #) }
|
||||||
|
|
||||||
|
-}
|
||||||
|
|
||||||
|
instance (Binary a,Integral a) => Binary (R.Ratio a) where
|
||||||
|
put r = put (R.numerator r) >> put (R.denominator r)
|
||||||
|
get = liftM2 (R.%) get get
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Char is serialised as UTF-8
|
||||||
|
instance Binary Char where
|
||||||
|
put a | c <= 0x7f = put (fromIntegral c :: Word8)
|
||||||
|
| c <= 0x7ff = do put (0xc0 .|. y)
|
||||||
|
put (0x80 .|. z)
|
||||||
|
| c <= 0xffff = do put (0xe0 .|. x)
|
||||||
|
put (0x80 .|. y)
|
||||||
|
put (0x80 .|. z)
|
||||||
|
| c <= 0x10ffff = do put (0xf0 .|. w)
|
||||||
|
put (0x80 .|. x)
|
||||||
|
put (0x80 .|. y)
|
||||||
|
put (0x80 .|. z)
|
||||||
|
| otherwise = error "Not a valid Unicode code point"
|
||||||
|
where
|
||||||
|
c = ord a
|
||||||
|
z, y, x, w :: Word8
|
||||||
|
z = fromIntegral (c .&. 0x3f)
|
||||||
|
y = fromIntegral (shiftR c 6 .&. 0x3f)
|
||||||
|
x = fromIntegral (shiftR c 12 .&. 0x3f)
|
||||||
|
w = fromIntegral (shiftR c 18 .&. 0x7)
|
||||||
|
|
||||||
|
get = do
|
||||||
|
let getByte = liftM (fromIntegral :: Word8 -> Int) get
|
||||||
|
shiftL6 = flip shiftL 6 :: Int -> Int
|
||||||
|
w <- getByte
|
||||||
|
r <- case () of
|
||||||
|
_ | w < 0x80 -> return w
|
||||||
|
| w < 0xe0 -> do
|
||||||
|
x <- liftM (xor 0x80) getByte
|
||||||
|
return (x .|. shiftL6 (xor 0xc0 w))
|
||||||
|
| w < 0xf0 -> do
|
||||||
|
x <- liftM (xor 0x80) getByte
|
||||||
|
y <- liftM (xor 0x80) getByte
|
||||||
|
return (y .|. shiftL6 (x .|. shiftL6
|
||||||
|
(xor 0xe0 w)))
|
||||||
|
| otherwise -> do
|
||||||
|
x <- liftM (xor 0x80) getByte
|
||||||
|
y <- liftM (xor 0x80) getByte
|
||||||
|
z <- liftM (xor 0x80) getByte
|
||||||
|
return (z .|. shiftL6 (y .|. shiftL6
|
||||||
|
(x .|. shiftL6 (xor 0xf0 w))))
|
||||||
|
return $! chr r
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Instances for the first few tuples
|
||||||
|
|
||||||
|
instance (Binary a, Binary b) => Binary (a,b) where
|
||||||
|
put (a,b) = put a >> put b
|
||||||
|
get = liftM2 (,) get get
|
||||||
|
|
||||||
|
instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
|
||||||
|
put (a,b,c) = put a >> put b >> put c
|
||||||
|
get = liftM3 (,,) get get get
|
||||||
|
|
||||||
|
instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
|
||||||
|
put (a,b,c,d) = put a >> put b >> put c >> put d
|
||||||
|
get = liftM4 (,,,) get get get get
|
||||||
|
|
||||||
|
instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where
|
||||||
|
put (a,b,c,d,e) = put a >> put b >> put c >> put d >> put e
|
||||||
|
get = liftM5 (,,,,) get get get get get
|
||||||
|
|
||||||
|
--
|
||||||
|
-- and now just recurse:
|
||||||
|
--
|
||||||
|
|
||||||
|
instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f)
|
||||||
|
=> Binary (a,b,c,d,e,f) where
|
||||||
|
put (a,b,c,d,e,f) = put (a,(b,c,d,e,f))
|
||||||
|
get = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
|
||||||
|
|
||||||
|
instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g)
|
||||||
|
=> Binary (a,b,c,d,e,f,g) where
|
||||||
|
put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
|
||||||
|
get = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
|
||||||
|
|
||||||
|
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
|
||||||
|
Binary f, Binary g, Binary h)
|
||||||
|
=> Binary (a,b,c,d,e,f,g,h) where
|
||||||
|
put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
|
||||||
|
get = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h)
|
||||||
|
|
||||||
|
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
|
||||||
|
Binary f, Binary g, Binary h, Binary i)
|
||||||
|
=> Binary (a,b,c,d,e,f,g,h,i) where
|
||||||
|
put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
|
||||||
|
get = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i)
|
||||||
|
|
||||||
|
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
|
||||||
|
Binary f, Binary g, Binary h, Binary i, Binary j)
|
||||||
|
=> Binary (a,b,c,d,e,f,g,h,i,j) where
|
||||||
|
put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
|
||||||
|
get = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j)
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Container types
|
||||||
|
|
||||||
|
instance Binary a => Binary [a] where
|
||||||
|
put l = put (length l) >> mapM_ put l
|
||||||
|
get = do n <- get :: Get Int
|
||||||
|
xs <- replicateM n get
|
||||||
|
return xs
|
||||||
|
|
||||||
|
instance (Binary a) => Binary (Maybe a) where
|
||||||
|
put Nothing = putWord8 0
|
||||||
|
put (Just x) = putWord8 1 >> put x
|
||||||
|
get = do
|
||||||
|
w <- getWord8
|
||||||
|
case w of
|
||||||
|
0 -> return Nothing
|
||||||
|
_ -> liftM Just get
|
||||||
|
|
||||||
|
instance (Binary a, Binary b) => Binary (Either a b) where
|
||||||
|
put (Left a) = putWord8 0 >> put a
|
||||||
|
put (Right b) = putWord8 1 >> put b
|
||||||
|
get = do
|
||||||
|
w <- getWord8
|
||||||
|
case w of
|
||||||
|
0 -> liftM Left get
|
||||||
|
_ -> liftM Right get
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- ByteStrings (have specially efficient instances)
|
||||||
|
|
||||||
|
instance Binary B.ByteString where
|
||||||
|
put bs = do put (B.length bs)
|
||||||
|
putByteString bs
|
||||||
|
get = get >>= getByteString
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Using old versions of fps, this is a type synonym, and non portable
|
||||||
|
--
|
||||||
|
-- Requires 'flexible instances'
|
||||||
|
--
|
||||||
|
instance Binary ByteString where
|
||||||
|
put bs = do put (fromIntegral (L.length bs) :: Int)
|
||||||
|
putLazyByteString bs
|
||||||
|
get = get >>= getLazyByteString
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Maps and Sets
|
||||||
|
|
||||||
|
instance (Ord a, Binary a) => Binary (Set.Set a) where
|
||||||
|
put s = put (Set.size s) >> mapM_ put (Set.toAscList s)
|
||||||
|
get = liftM Set.fromDistinctAscList get
|
||||||
|
|
||||||
|
instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where
|
||||||
|
put m = put (Map.size m) >> mapM_ put (Map.toAscList m)
|
||||||
|
get = liftM Map.fromDistinctAscList get
|
||||||
|
|
||||||
|
instance Binary IntSet.IntSet where
|
||||||
|
put s = put (IntSet.size s) >> mapM_ put (IntSet.toAscList s)
|
||||||
|
get = liftM IntSet.fromDistinctAscList get
|
||||||
|
|
||||||
|
instance (Binary e) => Binary (IntMap.IntMap e) where
|
||||||
|
put m = put (IntMap.size m) >> mapM_ put (IntMap.toAscList m)
|
||||||
|
get = liftM IntMap.fromDistinctAscList get
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Queues and Sequences
|
||||||
|
|
||||||
|
#if __GLASGOW_HASKELL__ >= 606
|
||||||
|
--
|
||||||
|
-- This is valid Hugs, but you need the most recent Hugs
|
||||||
|
--
|
||||||
|
|
||||||
|
instance (Binary e) => Binary (Seq.Seq e) where
|
||||||
|
-- any better way to do this?
|
||||||
|
put = put . Fold.toList
|
||||||
|
get = fmap Seq.fromList get
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Floating point
|
||||||
|
|
||||||
|
instance Binary Double where
|
||||||
|
put d = put (decodeFloat d)
|
||||||
|
get = liftM2 encodeFloat get get
|
||||||
|
|
||||||
|
instance Binary Float where
|
||||||
|
put f = put (decodeFloat f)
|
||||||
|
get = liftM2 encodeFloat get get
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Trees
|
||||||
|
|
||||||
|
instance (Binary e) => Binary (T.Tree e) where
|
||||||
|
put (T.Node r s) = put r >> put s
|
||||||
|
get = liftM2 T.Node get get
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Arrays
|
||||||
|
|
||||||
|
instance (Binary i, Ix i, Binary e) => Binary (Array i e) where
|
||||||
|
put a = do
|
||||||
|
put (bounds a)
|
||||||
|
put (rangeSize $ bounds a) -- write the length
|
||||||
|
mapM_ put (elems a) -- now the elems.
|
||||||
|
get = do
|
||||||
|
bs <- get
|
||||||
|
n <- get -- read the length
|
||||||
|
xs <- replicateM n get -- now the elems.
|
||||||
|
return (listArray bs xs)
|
||||||
|
|
||||||
|
--
|
||||||
|
-- The IArray UArray e constraint is non portable. Requires flexible instances
|
||||||
|
--
|
||||||
|
instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where
|
||||||
|
put a = do
|
||||||
|
put (bounds a)
|
||||||
|
put (rangeSize $ bounds a) -- now write the length
|
||||||
|
mapM_ put (elems a)
|
||||||
|
get = do
|
||||||
|
bs <- get
|
||||||
|
n <- get
|
||||||
|
xs <- replicateM n get
|
||||||
|
return (listArray bs xs)
|
||||||
426
src/Data/Binary/Builder.hs
Normal file
426
src/Data/Binary/Builder.hs
Normal file
@@ -0,0 +1,426 @@
|
|||||||
|
{-# LANGUAGE CPP #-}
|
||||||
|
{-# OPTIONS_GHC -fglasgow-exts #-}
|
||||||
|
-- for unboxed shifts
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : Data.Binary.Builder
|
||||||
|
-- Copyright : Lennart Kolmodin, Ross Paterson
|
||||||
|
-- License : BSD3-style (see LICENSE)
|
||||||
|
--
|
||||||
|
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
|
||||||
|
-- Stability : experimental
|
||||||
|
-- Portability : portable to Hugs and GHC
|
||||||
|
--
|
||||||
|
-- Efficient construction of lazy bytestrings.
|
||||||
|
--
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
||||||
|
#include "MachDeps.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
module Data.Binary.Builder (
|
||||||
|
|
||||||
|
-- * The Builder type
|
||||||
|
Builder
|
||||||
|
, toLazyByteString
|
||||||
|
|
||||||
|
-- * Constructing Builders
|
||||||
|
, empty
|
||||||
|
, singleton
|
||||||
|
, append
|
||||||
|
, fromByteString -- :: S.ByteString -> Builder
|
||||||
|
, fromLazyByteString -- :: L.ByteString -> Builder
|
||||||
|
|
||||||
|
-- * Flushing the buffer state
|
||||||
|
, flush
|
||||||
|
|
||||||
|
-- * Derived Builders
|
||||||
|
-- ** Big-endian writes
|
||||||
|
, putWord16be -- :: Word16 -> Builder
|
||||||
|
, putWord32be -- :: Word32 -> Builder
|
||||||
|
, putWord64be -- :: Word64 -> Builder
|
||||||
|
|
||||||
|
-- ** Little-endian writes
|
||||||
|
, putWord16le -- :: Word16 -> Builder
|
||||||
|
, putWord32le -- :: Word32 -> Builder
|
||||||
|
, putWord64le -- :: Word64 -> Builder
|
||||||
|
|
||||||
|
-- ** Host-endian, unaligned writes
|
||||||
|
, putWordhost -- :: Word -> Builder
|
||||||
|
, putWord16host -- :: Word16 -> Builder
|
||||||
|
, putWord32host -- :: Word32 -> Builder
|
||||||
|
, putWord64host -- :: Word64 -> Builder
|
||||||
|
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Foreign
|
||||||
|
import Data.Monoid
|
||||||
|
import Data.Word
|
||||||
|
import qualified Data.ByteString as S
|
||||||
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
|
||||||
|
#ifdef BYTESTRING_IN_BASE
|
||||||
|
import Data.ByteString.Base (inlinePerformIO)
|
||||||
|
import qualified Data.ByteString.Base as S
|
||||||
|
#else
|
||||||
|
import Data.ByteString.Internal (inlinePerformIO)
|
||||||
|
import qualified Data.ByteString.Internal as S
|
||||||
|
import qualified Data.ByteString.Lazy.Internal as L
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
||||||
|
import GHC.Base
|
||||||
|
import GHC.Word (Word32(..),Word16(..),Word64(..))
|
||||||
|
|
||||||
|
#if WORD_SIZE_IN_BITS < 64 && __GLASGOW_HASKELL__ >= 608
|
||||||
|
import GHC.Word (uncheckedShiftRL64#)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- | A 'Builder' is an efficient way to build lazy 'L.ByteString's.
|
||||||
|
-- There are several functions for constructing 'Builder's, but only one
|
||||||
|
-- to inspect them: to extract any data, you have to turn them into lazy
|
||||||
|
-- 'L.ByteString's using 'toLazyByteString'.
|
||||||
|
--
|
||||||
|
-- Internally, a 'Builder' constructs a lazy 'L.Bytestring' by filling byte
|
||||||
|
-- arrays piece by piece. As each buffer is filled, it is \'popped\'
|
||||||
|
-- off, to become a new chunk of the resulting lazy 'L.ByteString'.
|
||||||
|
-- All this is hidden from the user of the 'Builder'.
|
||||||
|
|
||||||
|
newtype Builder = Builder {
|
||||||
|
-- Invariant (from Data.ByteString.Lazy):
|
||||||
|
-- The lists include no null ByteStrings.
|
||||||
|
runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString]
|
||||||
|
}
|
||||||
|
|
||||||
|
instance Monoid Builder where
|
||||||
|
mempty = empty
|
||||||
|
{-# INLINE mempty #-}
|
||||||
|
mappend = append
|
||||||
|
{-# INLINE mappend #-}
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- | /O(1)./ The empty Builder, satisfying
|
||||||
|
--
|
||||||
|
-- * @'toLazyByteString' 'empty' = 'L.empty'@
|
||||||
|
--
|
||||||
|
empty :: Builder
|
||||||
|
empty = Builder id
|
||||||
|
{-# INLINE empty #-}
|
||||||
|
|
||||||
|
-- | /O(1)./ A Builder taking a single byte, satisfying
|
||||||
|
--
|
||||||
|
-- * @'toLazyByteString' ('singleton' b) = 'L.singleton' b@
|
||||||
|
--
|
||||||
|
singleton :: Word8 -> Builder
|
||||||
|
singleton = writeN 1 . flip poke
|
||||||
|
{-# INLINE singleton #-}
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- | /O(1)./ The concatenation of two Builders, an associative operation
|
||||||
|
-- with identity 'empty', satisfying
|
||||||
|
--
|
||||||
|
-- * @'toLazyByteString' ('append' x y) = 'L.append' ('toLazyByteString' x) ('toLazyByteString' y)@
|
||||||
|
--
|
||||||
|
append :: Builder -> Builder -> Builder
|
||||||
|
append (Builder f) (Builder g) = Builder (f . g)
|
||||||
|
{-# INLINE append #-}
|
||||||
|
|
||||||
|
-- | /O(1)./ A Builder taking a 'S.ByteString', satisfying
|
||||||
|
--
|
||||||
|
-- * @'toLazyByteString' ('fromByteString' bs) = 'L.fromChunks' [bs]@
|
||||||
|
--
|
||||||
|
fromByteString :: S.ByteString -> Builder
|
||||||
|
fromByteString bs
|
||||||
|
| S.null bs = empty
|
||||||
|
| otherwise = flush `append` mapBuilder (bs :)
|
||||||
|
{-# INLINE fromByteString #-}
|
||||||
|
|
||||||
|
-- | /O(1)./ A Builder taking a lazy 'L.ByteString', satisfying
|
||||||
|
--
|
||||||
|
-- * @'toLazyByteString' ('fromLazyByteString' bs) = bs@
|
||||||
|
--
|
||||||
|
fromLazyByteString :: L.ByteString -> Builder
|
||||||
|
fromLazyByteString bss = flush `append` mapBuilder (L.toChunks bss ++)
|
||||||
|
{-# INLINE fromLazyByteString #-}
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Our internal buffer type
|
||||||
|
data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8)
|
||||||
|
{-# UNPACK #-} !Int -- offset
|
||||||
|
{-# UNPACK #-} !Int -- used bytes
|
||||||
|
{-# UNPACK #-} !Int -- length left
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'.
|
||||||
|
-- The construction work takes place if and when the relevant part of
|
||||||
|
-- the lazy 'L.ByteString' is demanded.
|
||||||
|
--
|
||||||
|
toLazyByteString :: Builder -> L.ByteString
|
||||||
|
toLazyByteString m = L.fromChunks $ unsafePerformIO $ do
|
||||||
|
buf <- newBuffer defaultSize
|
||||||
|
return (runBuilder (m `append` flush) (const []) buf)
|
||||||
|
|
||||||
|
-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any,
|
||||||
|
-- yielding a new chunk in the result lazy 'L.ByteString'.
|
||||||
|
flush :: Builder
|
||||||
|
flush = Builder $ \ k buf@(Buffer p o u l) ->
|
||||||
|
if u == 0
|
||||||
|
then k buf
|
||||||
|
else S.PS p o u : k (Buffer p (o+u) 0 l)
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
--
|
||||||
|
-- copied from Data.ByteString.Lazy
|
||||||
|
--
|
||||||
|
defaultSize :: Int
|
||||||
|
defaultSize = 32 * k - overhead
|
||||||
|
where k = 1024
|
||||||
|
overhead = 2 * sizeOf (undefined :: Int)
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- | Sequence an IO operation on the buffer
|
||||||
|
unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder
|
||||||
|
unsafeLiftIO f = Builder $ \ k buf -> inlinePerformIO $ do
|
||||||
|
buf' <- f buf
|
||||||
|
return (k buf')
|
||||||
|
{-# INLINE unsafeLiftIO #-}
|
||||||
|
|
||||||
|
-- | Get the size of the buffer
|
||||||
|
withSize :: (Int -> Builder) -> Builder
|
||||||
|
withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
|
||||||
|
runBuilder (f l) k buf
|
||||||
|
|
||||||
|
-- | Map the resulting list of bytestrings.
|
||||||
|
mapBuilder :: ([S.ByteString] -> [S.ByteString]) -> Builder
|
||||||
|
mapBuilder f = Builder (f .)
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- | Ensure that there are at least @n@ many bytes available.
|
||||||
|
ensureFree :: Int -> Builder
|
||||||
|
ensureFree n = n `seq` withSize $ \ l ->
|
||||||
|
if n <= l then empty else
|
||||||
|
flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize)))
|
||||||
|
{-# INLINE ensureFree #-}
|
||||||
|
|
||||||
|
-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
|
||||||
|
-- bytes into the memory.
|
||||||
|
writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder
|
||||||
|
writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f)
|
||||||
|
{-# INLINE writeN #-}
|
||||||
|
|
||||||
|
writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer
|
||||||
|
writeNBuffer n f (Buffer fp o u l) = do
|
||||||
|
withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
|
||||||
|
return (Buffer fp o (u+n) (l-n))
|
||||||
|
{-# INLINE writeNBuffer #-}
|
||||||
|
|
||||||
|
newBuffer :: Int -> IO Buffer
|
||||||
|
newBuffer size = do
|
||||||
|
fp <- S.mallocByteString size
|
||||||
|
return $! Buffer fp 0 0 size
|
||||||
|
{-# INLINE newBuffer #-}
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Aligned, host order writes of storable values
|
||||||
|
|
||||||
|
-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
|
||||||
|
-- storable values into the memory.
|
||||||
|
writeNbytes :: Storable a => Int -> (Ptr a -> IO ()) -> Builder
|
||||||
|
writeNbytes n f = ensureFree n `append` unsafeLiftIO (writeNBufferBytes n f)
|
||||||
|
{-# INLINE writeNbytes #-}
|
||||||
|
|
||||||
|
writeNBufferBytes :: Storable a => Int -> (Ptr a -> IO ()) -> Buffer -> IO Buffer
|
||||||
|
writeNBufferBytes n f (Buffer fp o u l) = do
|
||||||
|
withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
|
||||||
|
return (Buffer fp o (u+n) (l-n))
|
||||||
|
{-# INLINE writeNBufferBytes #-}
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
--
|
||||||
|
-- We rely on the fromIntegral to do the right masking for us.
|
||||||
|
-- The inlining here is critical, and can be worth 4x performance
|
||||||
|
--
|
||||||
|
|
||||||
|
-- | Write a Word16 in big endian format
|
||||||
|
putWord16be :: Word16 -> Builder
|
||||||
|
putWord16be w = writeN 2 $ \p -> do
|
||||||
|
poke p (fromIntegral (shiftr_w16 w 8) :: Word8)
|
||||||
|
poke (p `plusPtr` 1) (fromIntegral (w) :: Word8)
|
||||||
|
{-# INLINE putWord16be #-}
|
||||||
|
|
||||||
|
-- | Write a Word16 in little endian format
|
||||||
|
putWord16le :: Word16 -> Builder
|
||||||
|
putWord16le w = writeN 2 $ \p -> do
|
||||||
|
poke p (fromIntegral (w) :: Word8)
|
||||||
|
poke (p `plusPtr` 1) (fromIntegral (shiftr_w16 w 8) :: Word8)
|
||||||
|
{-# INLINE putWord16le #-}
|
||||||
|
|
||||||
|
-- putWord16le w16 = writeN 2 (\p -> poke (castPtr p) w16)
|
||||||
|
|
||||||
|
-- | Write a Word32 in big endian format
|
||||||
|
putWord32be :: Word32 -> Builder
|
||||||
|
putWord32be w = writeN 4 $ \p -> do
|
||||||
|
poke p (fromIntegral (shiftr_w32 w 24) :: Word8)
|
||||||
|
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 16) :: Word8)
|
||||||
|
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 8) :: Word8)
|
||||||
|
poke (p `plusPtr` 3) (fromIntegral (w) :: Word8)
|
||||||
|
{-# INLINE putWord32be #-}
|
||||||
|
|
||||||
|
--
|
||||||
|
-- a data type to tag Put/Check. writes construct these which are then
|
||||||
|
-- inlined and flattened. matching Checks will be more robust with rules.
|
||||||
|
--
|
||||||
|
|
||||||
|
-- | Write a Word32 in little endian format
|
||||||
|
putWord32le :: Word32 -> Builder
|
||||||
|
putWord32le w = writeN 4 $ \p -> do
|
||||||
|
poke p (fromIntegral (w) :: Word8)
|
||||||
|
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 8) :: Word8)
|
||||||
|
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 16) :: Word8)
|
||||||
|
poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 w 24) :: Word8)
|
||||||
|
{-# INLINE putWord32le #-}
|
||||||
|
|
||||||
|
-- on a little endian machine:
|
||||||
|
-- putWord32le w32 = writeN 4 (\p -> poke (castPtr p) w32)
|
||||||
|
|
||||||
|
-- | Write a Word64 in big endian format
|
||||||
|
putWord64be :: Word64 -> Builder
|
||||||
|
#if WORD_SIZE_IN_BITS < 64
|
||||||
|
--
|
||||||
|
-- To avoid expensive 64 bit shifts on 32 bit machines, we cast to
|
||||||
|
-- Word32, and write that
|
||||||
|
--
|
||||||
|
putWord64be w =
|
||||||
|
let a = fromIntegral (shiftr_w64 w 32) :: Word32
|
||||||
|
b = fromIntegral w :: Word32
|
||||||
|
in writeN 8 $ \p -> do
|
||||||
|
poke p (fromIntegral (shiftr_w32 a 24) :: Word8)
|
||||||
|
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 16) :: Word8)
|
||||||
|
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 8) :: Word8)
|
||||||
|
poke (p `plusPtr` 3) (fromIntegral (a) :: Word8)
|
||||||
|
poke (p `plusPtr` 4) (fromIntegral (shiftr_w32 b 24) :: Word8)
|
||||||
|
poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 16) :: Word8)
|
||||||
|
poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 8) :: Word8)
|
||||||
|
poke (p `plusPtr` 7) (fromIntegral (b) :: Word8)
|
||||||
|
#else
|
||||||
|
putWord64be w = writeN 8 $ \p -> do
|
||||||
|
poke p (fromIntegral (shiftr_w64 w 56) :: Word8)
|
||||||
|
poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 48) :: Word8)
|
||||||
|
poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 40) :: Word8)
|
||||||
|
poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 32) :: Word8)
|
||||||
|
poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 24) :: Word8)
|
||||||
|
poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 16) :: Word8)
|
||||||
|
poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 8) :: Word8)
|
||||||
|
poke (p `plusPtr` 7) (fromIntegral (w) :: Word8)
|
||||||
|
#endif
|
||||||
|
{-# INLINE putWord64be #-}
|
||||||
|
|
||||||
|
-- | Write a Word64 in little endian format
|
||||||
|
putWord64le :: Word64 -> Builder
|
||||||
|
|
||||||
|
#if WORD_SIZE_IN_BITS < 64
|
||||||
|
putWord64le w =
|
||||||
|
let b = fromIntegral (shiftr_w64 w 32) :: Word32
|
||||||
|
a = fromIntegral w :: Word32
|
||||||
|
in writeN 8 $ \p -> do
|
||||||
|
poke (p) (fromIntegral (a) :: Word8)
|
||||||
|
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 8) :: Word8)
|
||||||
|
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 16) :: Word8)
|
||||||
|
poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 a 24) :: Word8)
|
||||||
|
poke (p `plusPtr` 4) (fromIntegral (b) :: Word8)
|
||||||
|
poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 8) :: Word8)
|
||||||
|
poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 16) :: Word8)
|
||||||
|
poke (p `plusPtr` 7) (fromIntegral (shiftr_w32 b 24) :: Word8)
|
||||||
|
#else
|
||||||
|
putWord64le w = writeN 8 $ \p -> do
|
||||||
|
poke p (fromIntegral (w) :: Word8)
|
||||||
|
poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 8) :: Word8)
|
||||||
|
poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 16) :: Word8)
|
||||||
|
poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 24) :: Word8)
|
||||||
|
poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 32) :: Word8)
|
||||||
|
poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 40) :: Word8)
|
||||||
|
poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 48) :: Word8)
|
||||||
|
poke (p `plusPtr` 7) (fromIntegral (shiftr_w64 w 56) :: Word8)
|
||||||
|
#endif
|
||||||
|
{-# INLINE putWord64le #-}
|
||||||
|
|
||||||
|
-- on a little endian machine:
|
||||||
|
-- putWord64le w64 = writeN 8 (\p -> poke (castPtr p) w64)
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Unaligned, word size ops
|
||||||
|
|
||||||
|
-- | /O(1)./ A Builder taking a single native machine word. The word is
|
||||||
|
-- written in host order, host endian form, for the machine you're on.
|
||||||
|
-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
|
||||||
|
-- 4 bytes. Values written this way are not portable to
|
||||||
|
-- different endian or word sized machines, without conversion.
|
||||||
|
--
|
||||||
|
putWordhost :: Word -> Builder
|
||||||
|
putWordhost w = writeNbytes (sizeOf (undefined :: Word)) (\p -> poke p w)
|
||||||
|
{-# INLINE putWordhost #-}
|
||||||
|
|
||||||
|
-- | Write a Word16 in native host order and host endianness.
|
||||||
|
-- 2 bytes will be written, unaligned.
|
||||||
|
putWord16host :: Word16 -> Builder
|
||||||
|
putWord16host w16 = writeNbytes (sizeOf (undefined :: Word16)) (\p -> poke p w16)
|
||||||
|
{-# INLINE putWord16host #-}
|
||||||
|
|
||||||
|
-- | Write a Word32 in native host order and host endianness.
|
||||||
|
-- 4 bytes will be written, unaligned.
|
||||||
|
putWord32host :: Word32 -> Builder
|
||||||
|
putWord32host w32 = writeNbytes (sizeOf (undefined :: Word32)) (\p -> poke p w32)
|
||||||
|
{-# INLINE putWord32host #-}
|
||||||
|
|
||||||
|
-- | Write a Word64 in native host order.
|
||||||
|
-- On a 32 bit machine we write two host order Word32s, in big endian form.
|
||||||
|
-- 8 bytes will be written, unaligned.
|
||||||
|
putWord64host :: Word64 -> Builder
|
||||||
|
putWord64host w = writeNbytes (sizeOf (undefined :: Word64)) (\p -> poke p w)
|
||||||
|
{-# INLINE putWord64host #-}
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Unchecked shifts
|
||||||
|
|
||||||
|
{-# INLINE shiftr_w16 #-}
|
||||||
|
shiftr_w16 :: Word16 -> Int -> Word16
|
||||||
|
{-# INLINE shiftr_w32 #-}
|
||||||
|
shiftr_w32 :: Word32 -> Int -> Word32
|
||||||
|
{-# INLINE shiftr_w64 #-}
|
||||||
|
shiftr_w64 :: Word64 -> Int -> Word64
|
||||||
|
|
||||||
|
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
||||||
|
shiftr_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftRL#` i)
|
||||||
|
shiftr_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftRL#` i)
|
||||||
|
|
||||||
|
#if WORD_SIZE_IN_BITS < 64
|
||||||
|
shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL64#` i)
|
||||||
|
|
||||||
|
#if __GLASGOW_HASKELL__ <= 606
|
||||||
|
-- Exported by GHC.Word in GHC 6.8 and higher
|
||||||
|
foreign import ccall unsafe "stg_uncheckedShiftRL64"
|
||||||
|
uncheckedShiftRL64# :: Word64# -> Int# -> Word64#
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL#` i)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
shiftr_w16 = shiftR
|
||||||
|
shiftr_w32 = shiftR
|
||||||
|
shiftr_w64 = shiftR
|
||||||
|
#endif
|
||||||
539
src/Data/Binary/Get.hs
Normal file
539
src/Data/Binary/Get.hs
Normal file
@@ -0,0 +1,539 @@
|
|||||||
|
{-# LANGUAGE CPP #-}
|
||||||
|
{-# OPTIONS_GHC -fglasgow-exts #-}
|
||||||
|
-- for unboxed shifts
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : Data.Binary.Get
|
||||||
|
-- Copyright : Lennart Kolmodin
|
||||||
|
-- License : BSD3-style (see LICENSE)
|
||||||
|
--
|
||||||
|
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
|
||||||
|
-- Stability : experimental
|
||||||
|
-- Portability : portable to Hugs and GHC.
|
||||||
|
--
|
||||||
|
-- The Get monad. A monad for efficiently building structures from
|
||||||
|
-- encoded lazy ByteStrings
|
||||||
|
--
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
||||||
|
#include "MachDeps.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
module Data.Binary.Get (
|
||||||
|
|
||||||
|
-- * The Get type
|
||||||
|
Get
|
||||||
|
, runGet
|
||||||
|
, runGetState
|
||||||
|
|
||||||
|
-- * Parsing
|
||||||
|
, skip
|
||||||
|
, uncheckedSkip
|
||||||
|
, lookAhead
|
||||||
|
, lookAheadM
|
||||||
|
, lookAheadE
|
||||||
|
, uncheckedLookAhead
|
||||||
|
|
||||||
|
-- * Utility
|
||||||
|
, bytesRead
|
||||||
|
, getBytes
|
||||||
|
, remaining
|
||||||
|
, isEmpty
|
||||||
|
|
||||||
|
-- * Parsing particular types
|
||||||
|
, getWord8
|
||||||
|
|
||||||
|
-- ** ByteStrings
|
||||||
|
, getByteString
|
||||||
|
, getLazyByteString
|
||||||
|
, getLazyByteStringNul
|
||||||
|
, getRemainingLazyByteString
|
||||||
|
|
||||||
|
-- ** Big-endian reads
|
||||||
|
, getWord16be
|
||||||
|
, getWord32be
|
||||||
|
, getWord64be
|
||||||
|
|
||||||
|
-- ** Little-endian reads
|
||||||
|
, getWord16le
|
||||||
|
, getWord32le
|
||||||
|
, getWord64le
|
||||||
|
|
||||||
|
-- ** Host-endian, unaligned reads
|
||||||
|
, getWordhost
|
||||||
|
, getWord16host
|
||||||
|
, getWord32host
|
||||||
|
, getWord64host
|
||||||
|
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Control.Monad (when,liftM,ap)
|
||||||
|
import Control.Monad.Fix
|
||||||
|
import Data.Maybe (isNothing)
|
||||||
|
|
||||||
|
import qualified Data.ByteString as B
|
||||||
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
|
||||||
|
#ifdef BYTESTRING_IN_BASE
|
||||||
|
import qualified Data.ByteString.Base as B
|
||||||
|
#else
|
||||||
|
import qualified Data.ByteString.Internal as B
|
||||||
|
import qualified Data.ByteString.Lazy.Internal as L
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef APPLICATIVE_IN_BASE
|
||||||
|
import Control.Applicative (Applicative(..))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
import Foreign
|
||||||
|
|
||||||
|
-- used by splitAtST
|
||||||
|
import Control.Monad.ST
|
||||||
|
import Data.STRef
|
||||||
|
|
||||||
|
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
||||||
|
import GHC.Base
|
||||||
|
import GHC.Word
|
||||||
|
import GHC.Int
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-- | The parse state
|
||||||
|
data S = S {-# UNPACK #-} !B.ByteString -- current chunk
|
||||||
|
L.ByteString -- the rest of the input
|
||||||
|
{-# UNPACK #-} !Int64 -- bytes read
|
||||||
|
|
||||||
|
-- | The Get monad is just a State monad carrying around the input ByteString
|
||||||
|
newtype Get a = Get { unGet :: S -> (a, S) }
|
||||||
|
|
||||||
|
instance Functor Get where
|
||||||
|
fmap f m = Get (\s -> let (a, s') = unGet m s
|
||||||
|
in (f a, s'))
|
||||||
|
{-# INLINE fmap #-}
|
||||||
|
|
||||||
|
#ifdef APPLICATIVE_IN_BASE
|
||||||
|
instance Applicative Get where
|
||||||
|
pure = return
|
||||||
|
(<*>) = ap
|
||||||
|
#endif
|
||||||
|
|
||||||
|
instance Monad Get where
|
||||||
|
return a = Get (\s -> (a, s))
|
||||||
|
{-# INLINE return #-}
|
||||||
|
|
||||||
|
m >>= k = Get (\s -> case unGet m s of
|
||||||
|
(a, s') -> unGet (k a) s')
|
||||||
|
{-# INLINE (>>=) #-}
|
||||||
|
|
||||||
|
fail = failDesc
|
||||||
|
|
||||||
|
instance MonadFix Get where
|
||||||
|
mfix f = Get (\s -> let (a,s') = unGet (f a) s
|
||||||
|
in (a,s'))
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
get :: Get S
|
||||||
|
get = Get (\s -> (s, s))
|
||||||
|
|
||||||
|
put :: S -> Get ()
|
||||||
|
put s = Get (\_ -> ((), s))
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
initState :: L.ByteString -> S
|
||||||
|
initState xs = mkState xs 0
|
||||||
|
{-# INLINE initState #-}
|
||||||
|
|
||||||
|
{-
|
||||||
|
initState (B.LPS xs) =
|
||||||
|
case xs of
|
||||||
|
[] -> S B.empty L.empty 0
|
||||||
|
(x:xs') -> S x (B.LPS xs') 0
|
||||||
|
-}
|
||||||
|
|
||||||
|
#ifndef BYTESTRING_IN_BASE
|
||||||
|
mkState :: L.ByteString -> Int64 -> S
|
||||||
|
mkState l = case l of
|
||||||
|
L.Empty -> S B.empty L.empty
|
||||||
|
L.Chunk x xs -> S x xs
|
||||||
|
{-# INLINE mkState #-}
|
||||||
|
|
||||||
|
#else
|
||||||
|
mkState :: L.ByteString -> Int64 -> S
|
||||||
|
mkState (B.LPS xs) =
|
||||||
|
case xs of
|
||||||
|
[] -> S B.empty L.empty
|
||||||
|
(x:xs') -> S x (B.LPS xs')
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-- | Run the Get monad applies a 'get'-based parser on the input ByteString
|
||||||
|
runGet :: Get a -> L.ByteString -> a
|
||||||
|
runGet m str = case unGet m (initState str) of (a, _) -> a
|
||||||
|
|
||||||
|
-- | Run the Get monad applies a 'get'-based parser on the input
|
||||||
|
-- ByteString. Additional to the result of get it returns the number of
|
||||||
|
-- consumed bytes and the rest of the input.
|
||||||
|
runGetState :: Get a -> L.ByteString -> Int64 -> (a, L.ByteString, Int64)
|
||||||
|
runGetState m str off =
|
||||||
|
case unGet m (mkState str off) of
|
||||||
|
(a, ~(S s ss newOff)) -> (a, s `join` ss, newOff)
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
failDesc :: String -> Get a
|
||||||
|
failDesc err = do
|
||||||
|
S _ _ bytes <- get
|
||||||
|
Get (error (err ++ ". Failed reading at byte position " ++ show bytes))
|
||||||
|
|
||||||
|
-- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available.
|
||||||
|
skip :: Int -> Get ()
|
||||||
|
skip n = readN (fromIntegral n) (const ())
|
||||||
|
|
||||||
|
-- | Skip ahead @n@ bytes. No error if there isn't enough bytes.
|
||||||
|
uncheckedSkip :: Int64 -> Get ()
|
||||||
|
uncheckedSkip n = do
|
||||||
|
S s ss bytes <- get
|
||||||
|
if fromIntegral (B.length s) >= n
|
||||||
|
then put (S (B.drop (fromIntegral n) s) ss (bytes + n))
|
||||||
|
else do
|
||||||
|
let rest = L.drop (n - fromIntegral (B.length s)) ss
|
||||||
|
put $! mkState rest (bytes + n)
|
||||||
|
|
||||||
|
-- | Run @ga@, but return without consuming its input.
|
||||||
|
-- Fails if @ga@ fails.
|
||||||
|
lookAhead :: Get a -> Get a
|
||||||
|
lookAhead ga = do
|
||||||
|
s <- get
|
||||||
|
a <- ga
|
||||||
|
put s
|
||||||
|
return a
|
||||||
|
|
||||||
|
-- | Like 'lookAhead', but consume the input if @gma@ returns 'Just _'.
|
||||||
|
-- Fails if @gma@ fails.
|
||||||
|
lookAheadM :: Get (Maybe a) -> Get (Maybe a)
|
||||||
|
lookAheadM gma = do
|
||||||
|
s <- get
|
||||||
|
ma <- gma
|
||||||
|
when (isNothing ma) $
|
||||||
|
put s
|
||||||
|
return ma
|
||||||
|
|
||||||
|
-- | Like 'lookAhead', but consume the input if @gea@ returns 'Right _'.
|
||||||
|
-- Fails if @gea@ fails.
|
||||||
|
lookAheadE :: Get (Either a b) -> Get (Either a b)
|
||||||
|
lookAheadE gea = do
|
||||||
|
s <- get
|
||||||
|
ea <- gea
|
||||||
|
case ea of
|
||||||
|
Left _ -> put s
|
||||||
|
_ -> return ()
|
||||||
|
return ea
|
||||||
|
|
||||||
|
-- | Get the next up to @n@ bytes as a lazy ByteString, without consuming them.
|
||||||
|
uncheckedLookAhead :: Int64 -> Get L.ByteString
|
||||||
|
uncheckedLookAhead n = do
|
||||||
|
S s ss _ <- get
|
||||||
|
if n <= fromIntegral (B.length s)
|
||||||
|
then return (L.fromChunks [B.take (fromIntegral n) s])
|
||||||
|
else return $ L.take n (s `join` ss)
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Utility
|
||||||
|
|
||||||
|
-- | Get the total number of bytes read to this point.
|
||||||
|
bytesRead :: Get Int64
|
||||||
|
bytesRead = do
|
||||||
|
S _ _ b <- get
|
||||||
|
return b
|
||||||
|
|
||||||
|
-- | Get the number of remaining unparsed bytes.
|
||||||
|
-- Useful for checking whether all input has been consumed.
|
||||||
|
-- Note that this forces the rest of the input.
|
||||||
|
remaining :: Get Int64
|
||||||
|
remaining = do
|
||||||
|
S s ss _ <- get
|
||||||
|
return (fromIntegral (B.length s) + L.length ss)
|
||||||
|
|
||||||
|
-- | Test whether all input has been consumed,
|
||||||
|
-- i.e. there are no remaining unparsed bytes.
|
||||||
|
isEmpty :: Get Bool
|
||||||
|
isEmpty = do
|
||||||
|
S s ss _ <- get
|
||||||
|
return (B.null s && L.null ss)
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Utility with ByteStrings
|
||||||
|
|
||||||
|
-- | An efficient 'get' method for strict ByteStrings. Fails if fewer
|
||||||
|
-- than @n@ bytes are left in the input.
|
||||||
|
getByteString :: Int -> Get B.ByteString
|
||||||
|
getByteString n = readN n id
|
||||||
|
{-# INLINE getByteString #-}
|
||||||
|
|
||||||
|
-- | An efficient 'get' method for lazy ByteStrings. Does not fail if fewer than
|
||||||
|
-- @n@ bytes are left in the input.
|
||||||
|
getLazyByteString :: Int64 -> Get L.ByteString
|
||||||
|
getLazyByteString n = do
|
||||||
|
S s ss bytes <- get
|
||||||
|
let big = s `join` ss
|
||||||
|
case splitAtST n big of
|
||||||
|
(consume, rest) -> do put $ mkState rest (bytes + n)
|
||||||
|
return consume
|
||||||
|
{-# INLINE getLazyByteString #-}
|
||||||
|
|
||||||
|
-- | Get a lazy ByteString that is terminated with a NUL byte. Fails
|
||||||
|
-- if it reaches the end of input without hitting a NUL.
|
||||||
|
getLazyByteStringNul :: Get L.ByteString
|
||||||
|
getLazyByteStringNul = do
|
||||||
|
S s ss bytes <- get
|
||||||
|
let big = s `join` ss
|
||||||
|
(consume, t) = L.break (== 0) big
|
||||||
|
(h, rest) = L.splitAt 1 t
|
||||||
|
if L.null h
|
||||||
|
then fail "too few bytes"
|
||||||
|
else do
|
||||||
|
put $ mkState rest (bytes + L.length consume + 1)
|
||||||
|
return consume
|
||||||
|
{-# INLINE getLazyByteStringNul #-}
|
||||||
|
|
||||||
|
-- | Get the remaining bytes as a lazy ByteString
|
||||||
|
getRemainingLazyByteString :: Get L.ByteString
|
||||||
|
getRemainingLazyByteString = do
|
||||||
|
S s ss _ <- get
|
||||||
|
return (s `join` ss)
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Helpers
|
||||||
|
|
||||||
|
-- | Pull @n@ bytes from the input, as a strict ByteString.
|
||||||
|
getBytes :: Int -> Get B.ByteString
|
||||||
|
getBytes n = do
|
||||||
|
S s ss bytes <- get
|
||||||
|
if n <= B.length s
|
||||||
|
then do let (consume,rest) = B.splitAt n s
|
||||||
|
put $! S rest ss (bytes + fromIntegral n)
|
||||||
|
return $! consume
|
||||||
|
else
|
||||||
|
case L.splitAt (fromIntegral n) (s `join` ss) of
|
||||||
|
(consuming, rest) ->
|
||||||
|
do let now = B.concat . L.toChunks $ consuming
|
||||||
|
put $! mkState rest (bytes + fromIntegral n)
|
||||||
|
-- forces the next chunk before this one is returned
|
||||||
|
if (B.length now < n)
|
||||||
|
then
|
||||||
|
fail "too few bytes"
|
||||||
|
else
|
||||||
|
return now
|
||||||
|
{-# INLINE getBytes #-}
|
||||||
|
-- ^ important
|
||||||
|
|
||||||
|
#ifndef BYTESTRING_IN_BASE
|
||||||
|
join :: B.ByteString -> L.ByteString -> L.ByteString
|
||||||
|
join bb lb
|
||||||
|
| B.null bb = lb
|
||||||
|
| otherwise = L.Chunk bb lb
|
||||||
|
|
||||||
|
#else
|
||||||
|
join :: B.ByteString -> L.ByteString -> L.ByteString
|
||||||
|
join bb (B.LPS lb)
|
||||||
|
| B.null bb = B.LPS lb
|
||||||
|
| otherwise = B.LPS (bb:lb)
|
||||||
|
#endif
|
||||||
|
-- don't use L.append, it's strict in it's second argument :/
|
||||||
|
{-# INLINE join #-}
|
||||||
|
|
||||||
|
-- | Split a ByteString. If the first result is consumed before the --
|
||||||
|
-- second, this runs in constant heap space.
|
||||||
|
--
|
||||||
|
-- You must force the returned tuple for that to work, e.g.
|
||||||
|
--
|
||||||
|
-- > case splitAtST n xs of
|
||||||
|
-- > (ys,zs) -> consume ys ... consume zs
|
||||||
|
--
|
||||||
|
splitAtST :: Int64 -> L.ByteString -> (L.ByteString, L.ByteString)
|
||||||
|
splitAtST i ps | i <= 0 = (L.empty, ps)
|
||||||
|
#ifndef BYTESTRING_IN_BASE
|
||||||
|
splitAtST i ps = runST (
|
||||||
|
do r <- newSTRef undefined
|
||||||
|
xs <- first r i ps
|
||||||
|
ys <- unsafeInterleaveST (readSTRef r)
|
||||||
|
return (xs, ys))
|
||||||
|
|
||||||
|
where
|
||||||
|
first r 0 xs@(L.Chunk _ _) = writeSTRef r xs >> return L.Empty
|
||||||
|
first r _ L.Empty = writeSTRef r L.Empty >> return L.Empty
|
||||||
|
|
||||||
|
first r n (L.Chunk x xs)
|
||||||
|
| n < l = do writeSTRef r (L.Chunk (B.drop (fromIntegral n) x) xs)
|
||||||
|
return $ L.Chunk (B.take (fromIntegral n) x) L.Empty
|
||||||
|
| otherwise = do writeSTRef r (L.drop (n - l) xs)
|
||||||
|
liftM (L.Chunk x) $ unsafeInterleaveST (first r (n - l) xs)
|
||||||
|
|
||||||
|
where l = fromIntegral (B.length x)
|
||||||
|
#else
|
||||||
|
splitAtST i (B.LPS ps) = runST (
|
||||||
|
do r <- newSTRef undefined
|
||||||
|
xs <- first r i ps
|
||||||
|
ys <- unsafeInterleaveST (readSTRef r)
|
||||||
|
return (B.LPS xs, B.LPS ys))
|
||||||
|
|
||||||
|
where first r 0 xs = writeSTRef r xs >> return []
|
||||||
|
first r _ [] = writeSTRef r [] >> return []
|
||||||
|
first r n (x:xs)
|
||||||
|
| n < l = do writeSTRef r (B.drop (fromIntegral n) x : xs)
|
||||||
|
return [B.take (fromIntegral n) x]
|
||||||
|
| otherwise = do writeSTRef r (L.toChunks (L.drop (n - l) (B.LPS xs)))
|
||||||
|
fmap (x:) $ unsafeInterleaveST (first r (n - l) xs)
|
||||||
|
|
||||||
|
where l = fromIntegral (B.length x)
|
||||||
|
#endif
|
||||||
|
{-# INLINE splitAtST #-}
|
||||||
|
|
||||||
|
-- Pull n bytes from the input, and apply a parser to those bytes,
|
||||||
|
-- yielding a value. If less than @n@ bytes are available, fail with an
|
||||||
|
-- error. This wraps @getBytes@.
|
||||||
|
readN :: Int -> (B.ByteString -> a) -> Get a
|
||||||
|
readN n f = fmap f $ getBytes n
|
||||||
|
{-# INLINE readN #-}
|
||||||
|
-- ^ important
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Primtives
|
||||||
|
|
||||||
|
-- helper, get a raw Ptr onto a strict ByteString copied out of the
|
||||||
|
-- underlying lazy byteString. So many indirections from the raw parser
|
||||||
|
-- state that my head hurts...
|
||||||
|
|
||||||
|
getPtr :: Storable a => Int -> Get a
|
||||||
|
getPtr n = do
|
||||||
|
(fp,o,_) <- readN n B.toForeignPtr
|
||||||
|
return . B.inlinePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
|
||||||
|
{-# INLINE getPtr #-}
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- | Read a Word8 from the monad state
|
||||||
|
getWord8 :: Get Word8
|
||||||
|
getWord8 = getPtr (sizeOf (undefined :: Word8))
|
||||||
|
{-# INLINE getWord8 #-}
|
||||||
|
|
||||||
|
-- | Read a Word16 in big endian format
|
||||||
|
getWord16be :: Get Word16
|
||||||
|
getWord16be = do
|
||||||
|
s <- readN 2 id
|
||||||
|
return $! (fromIntegral (s `B.index` 0) `shiftl_w16` 8) .|.
|
||||||
|
(fromIntegral (s `B.index` 1))
|
||||||
|
{-# INLINE getWord16be #-}
|
||||||
|
|
||||||
|
-- | Read a Word16 in little endian format
|
||||||
|
getWord16le :: Get Word16
|
||||||
|
getWord16le = do
|
||||||
|
s <- readN 2 id
|
||||||
|
return $! (fromIntegral (s `B.index` 1) `shiftl_w16` 8) .|.
|
||||||
|
(fromIntegral (s `B.index` 0) )
|
||||||
|
{-# INLINE getWord16le #-}
|
||||||
|
|
||||||
|
-- | Read a Word32 in big endian format
|
||||||
|
getWord32be :: Get Word32
|
||||||
|
getWord32be = do
|
||||||
|
s <- readN 4 id
|
||||||
|
return $! (fromIntegral (s `B.index` 0) `shiftl_w32` 24) .|.
|
||||||
|
(fromIntegral (s `B.index` 1) `shiftl_w32` 16) .|.
|
||||||
|
(fromIntegral (s `B.index` 2) `shiftl_w32` 8) .|.
|
||||||
|
(fromIntegral (s `B.index` 3) )
|
||||||
|
{-# INLINE getWord32be #-}
|
||||||
|
|
||||||
|
-- | Read a Word32 in little endian format
|
||||||
|
getWord32le :: Get Word32
|
||||||
|
getWord32le = do
|
||||||
|
s <- readN 4 id
|
||||||
|
return $! (fromIntegral (s `B.index` 3) `shiftl_w32` 24) .|.
|
||||||
|
(fromIntegral (s `B.index` 2) `shiftl_w32` 16) .|.
|
||||||
|
(fromIntegral (s `B.index` 1) `shiftl_w32` 8) .|.
|
||||||
|
(fromIntegral (s `B.index` 0) )
|
||||||
|
{-# INLINE getWord32le #-}
|
||||||
|
|
||||||
|
-- | Read a Word64 in big endian format
|
||||||
|
getWord64be :: Get Word64
|
||||||
|
getWord64be = do
|
||||||
|
s <- readN 8 id
|
||||||
|
return $! (fromIntegral (s `B.index` 0) `shiftl_w64` 56) .|.
|
||||||
|
(fromIntegral (s `B.index` 1) `shiftl_w64` 48) .|.
|
||||||
|
(fromIntegral (s `B.index` 2) `shiftl_w64` 40) .|.
|
||||||
|
(fromIntegral (s `B.index` 3) `shiftl_w64` 32) .|.
|
||||||
|
(fromIntegral (s `B.index` 4) `shiftl_w64` 24) .|.
|
||||||
|
(fromIntegral (s `B.index` 5) `shiftl_w64` 16) .|.
|
||||||
|
(fromIntegral (s `B.index` 6) `shiftl_w64` 8) .|.
|
||||||
|
(fromIntegral (s `B.index` 7) )
|
||||||
|
{-# INLINE getWord64be #-}
|
||||||
|
|
||||||
|
-- | Read a Word64 in little endian format
|
||||||
|
getWord64le :: Get Word64
|
||||||
|
getWord64le = do
|
||||||
|
s <- readN 8 id
|
||||||
|
return $! (fromIntegral (s `B.index` 7) `shiftl_w64` 56) .|.
|
||||||
|
(fromIntegral (s `B.index` 6) `shiftl_w64` 48) .|.
|
||||||
|
(fromIntegral (s `B.index` 5) `shiftl_w64` 40) .|.
|
||||||
|
(fromIntegral (s `B.index` 4) `shiftl_w64` 32) .|.
|
||||||
|
(fromIntegral (s `B.index` 3) `shiftl_w64` 24) .|.
|
||||||
|
(fromIntegral (s `B.index` 2) `shiftl_w64` 16) .|.
|
||||||
|
(fromIntegral (s `B.index` 1) `shiftl_w64` 8) .|.
|
||||||
|
(fromIntegral (s `B.index` 0) )
|
||||||
|
{-# INLINE getWord64le #-}
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Host-endian reads
|
||||||
|
|
||||||
|
-- | /O(1)./ Read a single native machine word. The word is read in
|
||||||
|
-- host order, host endian form, for the machine you're on. On a 64 bit
|
||||||
|
-- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes.
|
||||||
|
getWordhost :: Get Word
|
||||||
|
getWordhost = getPtr (sizeOf (undefined :: Word))
|
||||||
|
{-# INLINE getWordhost #-}
|
||||||
|
|
||||||
|
-- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness.
|
||||||
|
getWord16host :: Get Word16
|
||||||
|
getWord16host = getPtr (sizeOf (undefined :: Word16))
|
||||||
|
{-# INLINE getWord16host #-}
|
||||||
|
|
||||||
|
-- | /O(1)./ Read a Word32 in native host order and host endianness.
|
||||||
|
getWord32host :: Get Word32
|
||||||
|
getWord32host = getPtr (sizeOf (undefined :: Word32))
|
||||||
|
{-# INLINE getWord32host #-}
|
||||||
|
|
||||||
|
-- | /O(1)./ Read a Word64 in native host order and host endianess.
|
||||||
|
getWord64host :: Get Word64
|
||||||
|
getWord64host = getPtr (sizeOf (undefined :: Word64))
|
||||||
|
{-# INLINE getWord64host #-}
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
-- Unchecked shifts
|
||||||
|
|
||||||
|
shiftl_w16 :: Word16 -> Int -> Word16
|
||||||
|
shiftl_w32 :: Word32 -> Int -> Word32
|
||||||
|
shiftl_w64 :: Word64 -> Int -> Word64
|
||||||
|
|
||||||
|
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
||||||
|
shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#` i)
|
||||||
|
shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#` i)
|
||||||
|
|
||||||
|
#if WORD_SIZE_IN_BITS < 64
|
||||||
|
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
|
||||||
|
|
||||||
|
#if __GLASGOW_HASKELL__ <= 606
|
||||||
|
-- Exported by GHC.Word in GHC 6.8 and higher
|
||||||
|
foreign import ccall unsafe "stg_uncheckedShiftL64"
|
||||||
|
uncheckedShiftL64# :: Word64# -> Int# -> Word64#
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
shiftl_w16 = shiftL
|
||||||
|
shiftl_w32 = shiftL
|
||||||
|
shiftl_w64 = shiftL
|
||||||
|
#endif
|
||||||
199
src/Data/Binary/Put.hs
Normal file
199
src/Data/Binary/Put.hs
Normal file
@@ -0,0 +1,199 @@
|
|||||||
|
{-# LANGUAGE CPP #-}
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : Data.Binary.Put
|
||||||
|
-- Copyright : Lennart Kolmodin
|
||||||
|
-- License : BSD3-style (see LICENSE)
|
||||||
|
--
|
||||||
|
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
|
||||||
|
-- Stability : stable
|
||||||
|
-- Portability : Portable to Hugs and GHC. Requires MPTCs
|
||||||
|
--
|
||||||
|
-- The Put monad. A monad for efficiently constructing lazy bytestrings.
|
||||||
|
--
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
module Data.Binary.Put (
|
||||||
|
|
||||||
|
-- * The Put type
|
||||||
|
Put
|
||||||
|
, PutM(..)
|
||||||
|
, runPut
|
||||||
|
|
||||||
|
-- * Flushing the implicit parse state
|
||||||
|
, flush
|
||||||
|
|
||||||
|
-- * Primitives
|
||||||
|
, putWord8
|
||||||
|
, putByteString
|
||||||
|
, putLazyByteString
|
||||||
|
|
||||||
|
-- * Big-endian primitives
|
||||||
|
, putWord16be
|
||||||
|
, putWord32be
|
||||||
|
, putWord64be
|
||||||
|
|
||||||
|
-- * Little-endian primitives
|
||||||
|
, putWord16le
|
||||||
|
, putWord32le
|
||||||
|
, putWord64le
|
||||||
|
|
||||||
|
-- * Host-endian, unaligned writes
|
||||||
|
, putWordhost -- :: Word -> Put
|
||||||
|
, putWord16host -- :: Word16 -> Put
|
||||||
|
, putWord32host -- :: Word32 -> Put
|
||||||
|
, putWord64host -- :: Word64 -> Put
|
||||||
|
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.Monoid
|
||||||
|
import Data.Binary.Builder (Builder, toLazyByteString)
|
||||||
|
import qualified Data.Binary.Builder as B
|
||||||
|
|
||||||
|
import Data.Word
|
||||||
|
import qualified Data.ByteString as S
|
||||||
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
|
||||||
|
#ifdef APPLICATIVE_IN_BASE
|
||||||
|
import Control.Applicative
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- XXX Strict in buffer only.
|
||||||
|
data PairS a = PairS a {-# UNPACK #-}!Builder
|
||||||
|
|
||||||
|
sndS :: PairS a -> Builder
|
||||||
|
sndS (PairS _ b) = b
|
||||||
|
|
||||||
|
-- | The PutM type. A Writer monad over the efficient Builder monoid.
|
||||||
|
newtype PutM a = Put { unPut :: PairS a }
|
||||||
|
|
||||||
|
-- | Put merely lifts Builder into a Writer monad, applied to ().
|
||||||
|
type Put = PutM ()
|
||||||
|
|
||||||
|
instance Functor PutM where
|
||||||
|
fmap f m = Put $ let PairS a w = unPut m in PairS (f a) w
|
||||||
|
{-# INLINE fmap #-}
|
||||||
|
|
||||||
|
#ifdef APPLICATIVE_IN_BASE
|
||||||
|
instance Applicative PutM where
|
||||||
|
pure = return
|
||||||
|
m <*> k = Put $
|
||||||
|
let PairS f w = unPut m
|
||||||
|
PairS x w' = unPut k
|
||||||
|
in PairS (f x) (w `mappend` w')
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-- Standard Writer monad, with aggressive inlining
|
||||||
|
instance Monad PutM where
|
||||||
|
return a = Put $ PairS a mempty
|
||||||
|
{-# INLINE return #-}
|
||||||
|
|
||||||
|
m >>= k = Put $
|
||||||
|
let PairS a w = unPut m
|
||||||
|
PairS b w' = unPut (k a)
|
||||||
|
in PairS b (w `mappend` w')
|
||||||
|
{-# INLINE (>>=) #-}
|
||||||
|
|
||||||
|
m >> k = Put $
|
||||||
|
let PairS _ w = unPut m
|
||||||
|
PairS b w' = unPut k
|
||||||
|
in PairS b (w `mappend` w')
|
||||||
|
{-# INLINE (>>) #-}
|
||||||
|
|
||||||
|
tell :: Builder -> Put
|
||||||
|
tell b = Put $ PairS () b
|
||||||
|
{-# INLINE tell #-}
|
||||||
|
|
||||||
|
-- | Run the 'Put' monad with a serialiser
|
||||||
|
runPut :: Put -> L.ByteString
|
||||||
|
runPut = toLazyByteString . sndS . unPut
|
||||||
|
{-# INLINE runPut #-}
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- | Pop the ByteString we have constructed so far, if any, yielding a
|
||||||
|
-- new chunk in the result ByteString.
|
||||||
|
flush :: Put
|
||||||
|
flush = tell B.flush
|
||||||
|
{-# INLINE flush #-}
|
||||||
|
|
||||||
|
-- | Efficiently write a byte into the output buffer
|
||||||
|
putWord8 :: Word8 -> Put
|
||||||
|
putWord8 = tell . B.singleton
|
||||||
|
{-# INLINE putWord8 #-}
|
||||||
|
|
||||||
|
-- | An efficient primitive to write a strict ByteString into the output buffer.
|
||||||
|
-- It flushes the current buffer, and writes the argument into a new chunk.
|
||||||
|
putByteString :: S.ByteString -> Put
|
||||||
|
putByteString = tell . B.fromByteString
|
||||||
|
{-# INLINE putByteString #-}
|
||||||
|
|
||||||
|
-- | Write a lazy ByteString efficiently, simply appending the lazy
|
||||||
|
-- ByteString chunks to the output buffer
|
||||||
|
putLazyByteString :: L.ByteString -> Put
|
||||||
|
putLazyByteString = tell . B.fromLazyByteString
|
||||||
|
{-# INLINE putLazyByteString #-}
|
||||||
|
|
||||||
|
-- | Write a Word16 in big endian format
|
||||||
|
putWord16be :: Word16 -> Put
|
||||||
|
putWord16be = tell . B.putWord16be
|
||||||
|
{-# INLINE putWord16be #-}
|
||||||
|
|
||||||
|
-- | Write a Word16 in little endian format
|
||||||
|
putWord16le :: Word16 -> Put
|
||||||
|
putWord16le = tell . B.putWord16le
|
||||||
|
{-# INLINE putWord16le #-}
|
||||||
|
|
||||||
|
-- | Write a Word32 in big endian format
|
||||||
|
putWord32be :: Word32 -> Put
|
||||||
|
putWord32be = tell . B.putWord32be
|
||||||
|
{-# INLINE putWord32be #-}
|
||||||
|
|
||||||
|
-- | Write a Word32 in little endian format
|
||||||
|
putWord32le :: Word32 -> Put
|
||||||
|
putWord32le = tell . B.putWord32le
|
||||||
|
{-# INLINE putWord32le #-}
|
||||||
|
|
||||||
|
-- | Write a Word64 in big endian format
|
||||||
|
putWord64be :: Word64 -> Put
|
||||||
|
putWord64be = tell . B.putWord64be
|
||||||
|
{-# INLINE putWord64be #-}
|
||||||
|
|
||||||
|
-- | Write a Word64 in little endian format
|
||||||
|
putWord64le :: Word64 -> Put
|
||||||
|
putWord64le = tell . B.putWord64le
|
||||||
|
{-# INLINE putWord64le #-}
|
||||||
|
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- | /O(1)./ Write a single native machine word. The word is
|
||||||
|
-- written in host order, host endian form, for the machine you're on.
|
||||||
|
-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
|
||||||
|
-- 4 bytes. Values written this way are not portable to
|
||||||
|
-- different endian or word sized machines, without conversion.
|
||||||
|
--
|
||||||
|
putWordhost :: Word -> Put
|
||||||
|
putWordhost = tell . B.putWordhost
|
||||||
|
{-# INLINE putWordhost #-}
|
||||||
|
|
||||||
|
-- | /O(1)./ Write a Word16 in native host order and host endianness.
|
||||||
|
-- For portability issues see @putWordhost@.
|
||||||
|
putWord16host :: Word16 -> Put
|
||||||
|
putWord16host = tell . B.putWord16host
|
||||||
|
{-# INLINE putWord16host #-}
|
||||||
|
|
||||||
|
-- | /O(1)./ Write a Word32 in native host order and host endianness.
|
||||||
|
-- For portability issues see @putWordhost@.
|
||||||
|
putWord32host :: Word32 -> Put
|
||||||
|
putWord32host = tell . B.putWord32host
|
||||||
|
{-# INLINE putWord32host #-}
|
||||||
|
|
||||||
|
-- | /O(1)./ Write a Word64 in native host order
|
||||||
|
-- On a 32 bit machine we write two host order Word32s, in big endian form.
|
||||||
|
-- For portability issues see @putWordhost@.
|
||||||
|
putWord64host :: Word64 -> Put
|
||||||
|
putWord64host = tell . B.putWord64host
|
||||||
|
{-# INLINE putWord64host #-}
|
||||||
@@ -2,8 +2,6 @@ module GF.Compile.Export where
|
|||||||
|
|
||||||
import PGF.CId
|
import PGF.CId
|
||||||
import PGF.Data (PGF(..))
|
import PGF.Data (PGF(..))
|
||||||
import PGF.Raw.Print (printTree)
|
|
||||||
import PGF.Raw.Convert (fromPGF)
|
|
||||||
import GF.Compile.GFCCtoHaskell
|
import GF.Compile.GFCCtoHaskell
|
||||||
import GF.Compile.GFCCtoProlog
|
import GF.Compile.GFCCtoProlog
|
||||||
import GF.Compile.GFCCtoJS
|
import GF.Compile.GFCCtoJS
|
||||||
@@ -32,7 +30,6 @@ exportPGF :: Options
|
|||||||
-> [(FilePath,String)] -- ^ List of recommended file names and contents.
|
-> [(FilePath,String)] -- ^ List of recommended file names and contents.
|
||||||
exportPGF opts fmt pgf =
|
exportPGF opts fmt pgf =
|
||||||
case fmt of
|
case fmt of
|
||||||
FmtPGF -> multi "pgf" printPGF
|
|
||||||
FmtPGFPretty -> multi "txt" prPGFPretty
|
FmtPGFPretty -> multi "txt" prPGFPretty
|
||||||
FmtJavaScript -> multi "js" pgf2js
|
FmtJavaScript -> multi "js" pgf2js
|
||||||
FmtHaskell -> multi "hs" (grammar2haskell opts name)
|
FmtHaskell -> multi "hs" (grammar2haskell opts name)
|
||||||
@@ -65,7 +62,3 @@ outputConcr :: PGF -> CId
|
|||||||
outputConcr pgf = case cncnames pgf of
|
outputConcr pgf = case cncnames pgf of
|
||||||
[] -> error "No concrete syntax."
|
[] -> error "No concrete syntax."
|
||||||
cnc:_ -> cnc
|
cnc:_ -> cnc
|
||||||
|
|
||||||
printPGF :: PGF -> String
|
|
||||||
printPGF = encodeUTF8 . printTree . fromPGF
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
{-# LANGUAGE PatternGuards #-}
|
{-# LANGUAGE PatternGuards #-}
|
||||||
module GF.Compile.GrammarToGFCC (prGrammar2gfcc,mkCanon2gfcc,addParsers) where
|
module GF.Compile.GrammarToGFCC (mkCanon2gfcc,addParsers) where
|
||||||
|
|
||||||
import GF.Compile.Export
|
import GF.Compile.Export
|
||||||
import GF.Compile.OptimizeGF (unshareModule)
|
import GF.Compile.OptimizeGF (unshareModule)
|
||||||
@@ -37,11 +37,6 @@ traceD s t = t
|
|||||||
|
|
||||||
|
|
||||||
-- the main function: generate PGF from GF.
|
-- the main function: generate PGF from GF.
|
||||||
|
|
||||||
prGrammar2gfcc :: Options -> String -> SourceGrammar -> (String,String)
|
|
||||||
prGrammar2gfcc opts cnc gr = (abs,printPGF gc) where
|
|
||||||
(abs,gc) = mkCanon2gfcc opts cnc gr
|
|
||||||
|
|
||||||
mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,D.PGF)
|
mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,D.PGF)
|
||||||
mkCanon2gfcc opts cnc gr =
|
mkCanon2gfcc opts cnc gr =
|
||||||
(prIdent abs, (canon2gfcc opts pars . reorder abs . canon2canon abs) gr)
|
(prIdent abs, (canon2gfcc opts pars . reorder abs . canon2canon abs) gr)
|
||||||
|
|||||||
@@ -80,8 +80,7 @@ data Phase = Preproc | Convert | Compile | Link
|
|||||||
data Encoding = UTF_8 | ISO_8859_1 | CP_1251
|
data Encoding = UTF_8 | ISO_8859_1 | CP_1251
|
||||||
deriving (Eq,Ord)
|
deriving (Eq,Ord)
|
||||||
|
|
||||||
data OutputFormat = FmtPGF
|
data OutputFormat = FmtPGFPretty
|
||||||
| FmtPGFPretty
|
|
||||||
| FmtJavaScript
|
| FmtJavaScript
|
||||||
| FmtHaskell
|
| FmtHaskell
|
||||||
| FmtProlog
|
| FmtProlog
|
||||||
@@ -239,7 +238,7 @@ defaultFlags = Flags {
|
|||||||
optShowCPUTime = False,
|
optShowCPUTime = False,
|
||||||
optEmitGFO = True,
|
optEmitGFO = True,
|
||||||
optGFODir = ".",
|
optGFODir = ".",
|
||||||
optOutputFormats = [FmtPGF],
|
optOutputFormats = [],
|
||||||
optSISR = Nothing,
|
optSISR = Nothing,
|
||||||
optHaskellOptions = Set.empty,
|
optHaskellOptions = Set.empty,
|
||||||
optLexicalCats = Set.empty,
|
optLexicalCats = Set.empty,
|
||||||
@@ -427,8 +426,7 @@ optDescr =
|
|||||||
|
|
||||||
outputFormats :: [(String,OutputFormat)]
|
outputFormats :: [(String,OutputFormat)]
|
||||||
outputFormats =
|
outputFormats =
|
||||||
[("pgf", FmtPGF),
|
[("pgf-pretty", FmtPGFPretty),
|
||||||
("pgf-pretty", FmtPGFPretty),
|
|
||||||
("js", FmtJavaScript),
|
("js", FmtJavaScript),
|
||||||
("haskell", FmtHaskell),
|
("haskell", FmtHaskell),
|
||||||
("prolog", FmtProlog),
|
("prolog", FmtProlog),
|
||||||
|
|||||||
18
src/GFC.hs
18
src/GFC.hs
@@ -4,8 +4,6 @@ module GFC (mainGFC) where
|
|||||||
import PGF
|
import PGF
|
||||||
import PGF.CId
|
import PGF.CId
|
||||||
import PGF.Data
|
import PGF.Data
|
||||||
import PGF.Raw.Parse
|
|
||||||
import PGF.Raw.Convert
|
|
||||||
import GF.Compile
|
import GF.Compile
|
||||||
import GF.Compile.Export
|
import GF.Compile.Export
|
||||||
|
|
||||||
@@ -16,6 +14,7 @@ import GF.Infra.Option
|
|||||||
import GF.Data.ErrM
|
import GF.Data.ErrM
|
||||||
|
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
|
import Data.Binary
|
||||||
import System.FilePath
|
import System.FilePath
|
||||||
|
|
||||||
|
|
||||||
@@ -57,10 +56,17 @@ unionPGFFiles opts fs =
|
|||||||
where readPGFVerbose f = putPointE Normal opts ("Reading " ++ f ++ "...") $ ioeIO $ readPGF f
|
where readPGFVerbose f = putPointE Normal opts ("Reading " ++ f ++ "...") $ ioeIO $ readPGF f
|
||||||
|
|
||||||
writeOutputs :: Options -> PGF -> IOE ()
|
writeOutputs :: Options -> PGF -> IOE ()
|
||||||
writeOutputs opts pgf =
|
writeOutputs opts pgf = do
|
||||||
sequence_ [writeOutput opts name str
|
writePGF opts pgf
|
||||||
| fmt <- flag optOutputFormats opts,
|
sequence_ [writeOutput opts name str
|
||||||
(name,str) <- exportPGF opts fmt pgf]
|
| fmt <- flag optOutputFormats opts,
|
||||||
|
(name,str) <- exportPGF opts fmt pgf]
|
||||||
|
|
||||||
|
writePGF :: Options -> PGF -> IOE ()
|
||||||
|
writePGF opts pgf = do
|
||||||
|
let name = fromMaybe (prCId (absname pgf)) (flag optName opts)
|
||||||
|
outfile = name <.> "pgf"
|
||||||
|
putPointE Normal opts ("Writing " ++ outfile ++ "...") $ ioeIO $ encodeFile outfile pgf
|
||||||
|
|
||||||
writeOutput :: Options -> FilePath-> String -> IOE ()
|
writeOutput :: Options -> FilePath-> String -> IOE ()
|
||||||
writeOutput opts file str =
|
writeOutput opts file str =
|
||||||
|
|||||||
10
src/PGF.hs
10
src/PGF.hs
@@ -66,9 +66,7 @@ import PGF.TypeCheck
|
|||||||
import PGF.Paraphrase
|
import PGF.Paraphrase
|
||||||
import PGF.Macros
|
import PGF.Macros
|
||||||
import PGF.Data
|
import PGF.Data
|
||||||
import PGF.Raw.Convert
|
import PGF.Binary
|
||||||
import PGF.Raw.Parse
|
|
||||||
import PGF.Raw.Print (printTree)
|
|
||||||
import PGF.Parsing.FCFG
|
import PGF.Parsing.FCFG
|
||||||
import qualified PGF.Parsing.FCFG.Incremental as Incremental
|
import qualified PGF.Parsing.FCFG.Incremental as Incremental
|
||||||
import qualified GF.Compile.GeneratePMCFG as PMCFG
|
import qualified GF.Compile.GeneratePMCFG as PMCFG
|
||||||
@@ -80,6 +78,7 @@ import GF.Data.Utilities (replace)
|
|||||||
import Data.Char
|
import Data.Char
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
|
import Data.Binary
|
||||||
import System.Random (newStdGen)
|
import System.Random (newStdGen)
|
||||||
import Control.Monad
|
import Control.Monad
|
||||||
|
|
||||||
@@ -210,9 +209,8 @@ readLanguage = readCId
|
|||||||
showLanguage = prCId
|
showLanguage = prCId
|
||||||
|
|
||||||
readPGF f = do
|
readPGF f = do
|
||||||
s <- readFile f >>= return . decodeUTF8 -- pgf is in UTF8, internal in unicode
|
g <- decodeFile f
|
||||||
g <- parseGrammar s
|
return $! addParsers g
|
||||||
return $! addParsers $ toPGF g
|
|
||||||
|
|
||||||
-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
|
-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
|
||||||
addParsers :: PGF -> PGF
|
addParsers :: PGF -> PGF
|
||||||
|
|||||||
@@ -1,14 +0,0 @@
|
|||||||
module PGF.Raw.Abstract where
|
|
||||||
|
|
||||||
data Grammar =
|
|
||||||
Grm [RExp]
|
|
||||||
deriving (Eq,Ord,Show)
|
|
||||||
|
|
||||||
data RExp =
|
|
||||||
App String [RExp]
|
|
||||||
| AInt Integer
|
|
||||||
| AStr String
|
|
||||||
| AFlt Double
|
|
||||||
| AMet
|
|
||||||
deriving (Eq,Ord,Show)
|
|
||||||
|
|
||||||
@@ -1,273 +0,0 @@
|
|||||||
module PGF.Raw.Convert (toPGF,fromPGF) where
|
|
||||||
|
|
||||||
import PGF.CId
|
|
||||||
import PGF.Data
|
|
||||||
import PGF.Raw.Abstract
|
|
||||||
|
|
||||||
import Data.Array.IArray
|
|
||||||
import qualified Data.Map as Map
|
|
||||||
import qualified Data.Set as Set
|
|
||||||
import qualified Data.IntMap as IntMap
|
|
||||||
|
|
||||||
pgfMajorVersion, pgfMinorVersion :: Integer
|
|
||||||
(pgfMajorVersion, pgfMinorVersion) = (1,0)
|
|
||||||
|
|
||||||
-- convert parsed grammar to internal PGF
|
|
||||||
|
|
||||||
toPGF :: Grammar -> PGF
|
|
||||||
toPGF (Grm [
|
|
||||||
App "pgf" (AInt v1 : AInt v2 : App a []:cs),
|
|
||||||
App "flags" gfs,
|
|
||||||
ab@(
|
|
||||||
App "abstract" [
|
|
||||||
App "fun" fs,
|
|
||||||
App "cat" cts
|
|
||||||
]),
|
|
||||||
App "concrete" ccs
|
|
||||||
]) = let pgf = PGF {
|
|
||||||
absname = mkCId a,
|
|
||||||
cncnames = [mkCId c | App c [] <- cs],
|
|
||||||
gflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- gfs],
|
|
||||||
abstract =
|
|
||||||
let
|
|
||||||
aflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- gfs]
|
|
||||||
lfuns = [(mkCId f,(toType typ,toExp def)) | App f [typ, def] <- fs]
|
|
||||||
funs = Map.fromAscList lfuns
|
|
||||||
lcats = [(mkCId c, Prelude.map toHypo hyps) | App c hyps <- cts]
|
|
||||||
cats = Map.fromAscList lcats
|
|
||||||
catfuns = Map.fromAscList
|
|
||||||
[(cat,[f | (f, (DTyp _ c _,_)) <- lfuns, c==cat]) | (cat,_) <- lcats]
|
|
||||||
in Abstr aflags funs cats catfuns,
|
|
||||||
concretes = Map.fromAscList [(mkCId lang, toConcr pgf ts) | App lang ts <- ccs]
|
|
||||||
}
|
|
||||||
in pgf
|
|
||||||
where
|
|
||||||
|
|
||||||
toConcr :: PGF -> [RExp] -> Concr
|
|
||||||
toConcr pgf rexp =
|
|
||||||
let cnc = foldl add (Concr {cflags = Map.empty,
|
|
||||||
lins = Map.empty,
|
|
||||||
opers = Map.empty,
|
|
||||||
lincats = Map.empty,
|
|
||||||
lindefs = Map.empty,
|
|
||||||
printnames = Map.empty,
|
|
||||||
paramlincats = Map.empty,
|
|
||||||
parser = Nothing
|
|
||||||
}) rexp
|
|
||||||
in cnc
|
|
||||||
where
|
|
||||||
add :: Concr -> RExp -> Concr
|
|
||||||
add cnc (App "flags" ts) = cnc { cflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- ts] }
|
|
||||||
add cnc (App "lin" ts) = cnc { lins = mkTermMap ts }
|
|
||||||
add cnc (App "oper" ts) = cnc { opers = mkTermMap ts }
|
|
||||||
add cnc (App "lincat" ts) = cnc { lincats = mkTermMap ts }
|
|
||||||
add cnc (App "lindef" ts) = cnc { lindefs = mkTermMap ts }
|
|
||||||
add cnc (App "printname" ts) = cnc { printnames = mkTermMap ts }
|
|
||||||
add cnc (App "param" ts) = cnc { paramlincats = mkTermMap ts }
|
|
||||||
add cnc (App "parser" ts) = cnc { parser = Just (toPInfo ts) }
|
|
||||||
|
|
||||||
toPInfo :: [RExp] -> ParserInfo
|
|
||||||
toPInfo [App "functions" fs, App "sequences" ss, App "productions" ps,App "categories" (t:cs)] =
|
|
||||||
ParserInfo { functions = functions
|
|
||||||
, sequences = seqs
|
|
||||||
, productions = productions
|
|
||||||
, startCats = cats
|
|
||||||
, totalCats = expToInt t
|
|
||||||
}
|
|
||||||
where
|
|
||||||
functions = mkArray (map toFFun fs)
|
|
||||||
seqs = mkArray (map toFSeq ss)
|
|
||||||
productions = IntMap.fromList (map toProductionSet ps)
|
|
||||||
cats = Map.fromList [(mkCId c, (map expToInt xs)) | App c xs <- cs]
|
|
||||||
|
|
||||||
toFFun :: RExp -> FFun
|
|
||||||
toFFun (App f [App "P" ts,App "R" ls]) = FFun fun prof lins
|
|
||||||
where
|
|
||||||
fun = mkCId f
|
|
||||||
prof = map toProfile ts
|
|
||||||
lins = mkArray [fromIntegral seqid | AInt seqid <- ls]
|
|
||||||
|
|
||||||
toProfile :: RExp -> Profile
|
|
||||||
toProfile AMet = []
|
|
||||||
toProfile (App "_A" [t]) = [expToInt t]
|
|
||||||
toProfile (App "_U" ts) = [expToInt t | App "_A" [t] <- ts]
|
|
||||||
|
|
||||||
toFSeq :: RExp -> FSeq
|
|
||||||
toFSeq (App "seq" ss) = mkArray [toSymbol s | s <- ss]
|
|
||||||
|
|
||||||
toProductionSet :: RExp -> (FCat,Set.Set Production)
|
|
||||||
toProductionSet (App "td" (rt : xs)) = (expToInt rt, Set.fromList (map toProduction xs))
|
|
||||||
where
|
|
||||||
toProduction (App "A" (ruleid : at)) = FApply (expToInt ruleid) (map expToInt at)
|
|
||||||
toProduction (App "C" [fcat]) = FCoerce (expToInt fcat)
|
|
||||||
|
|
||||||
toSymbol :: RExp -> FSymbol
|
|
||||||
toSymbol (App "P" [n,l]) = FSymCat (expToInt n) (expToInt l)
|
|
||||||
toSymbol (App "PL" [n,l]) = FSymLit (expToInt n) (expToInt l)
|
|
||||||
toSymbol (App "KP" (d:alts)) = FSymTok (toKP d alts)
|
|
||||||
toSymbol (AStr t) = FSymTok (KS t)
|
|
||||||
|
|
||||||
toType :: RExp -> Type
|
|
||||||
toType e = case e of
|
|
||||||
App cat [App "H" hypos, App "X" exps] ->
|
|
||||||
DTyp (map toHypo hypos) (mkCId cat) (map toExp exps)
|
|
||||||
_ -> error $ "type " ++ show e
|
|
||||||
|
|
||||||
toHypo :: RExp -> Hypo
|
|
||||||
toHypo e = case e of
|
|
||||||
App x [typ] -> Hyp (mkCId x) (toType typ)
|
|
||||||
_ -> error $ "hypo " ++ show e
|
|
||||||
|
|
||||||
toExp :: RExp -> Expr
|
|
||||||
toExp e = case e of
|
|
||||||
App "Abs" [App x [], exp] -> EAbs (mkCId x) (toExp exp)
|
|
||||||
App "App" [e1,e2] -> EApp (toExp e1) (toExp e2)
|
|
||||||
App "Eq" eqs -> EEq [Equ (map toExp ps) (toExp v) | App "E" (v:ps) <- eqs]
|
|
||||||
App "Var" [App i []] -> EVar (mkCId i)
|
|
||||||
AMet -> EMeta 0
|
|
||||||
AInt i -> ELit (LInt i)
|
|
||||||
AFlt i -> ELit (LFlt i)
|
|
||||||
AStr i -> ELit (LStr i)
|
|
||||||
_ -> error $ "exp " ++ show e
|
|
||||||
|
|
||||||
toTerm :: RExp -> Term
|
|
||||||
toTerm e = case e of
|
|
||||||
App "R" es -> R (map toTerm es)
|
|
||||||
App "S" es -> S (map toTerm es)
|
|
||||||
App "FV" es -> FV (map toTerm es)
|
|
||||||
App "P" [e,v] -> P (toTerm e) (toTerm v)
|
|
||||||
App "W" [AStr s,v] -> W s (toTerm v)
|
|
||||||
App "A" [AInt i] -> V (fromInteger i)
|
|
||||||
App f [] -> F (mkCId f)
|
|
||||||
AInt i -> C (fromInteger i)
|
|
||||||
AMet -> TM "?"
|
|
||||||
App "KP" (d:alts) -> K (toKP d alts)
|
|
||||||
AStr s -> K (KS s)
|
|
||||||
_ -> error $ "term " ++ show e
|
|
||||||
|
|
||||||
toKP d alts = KP (toStr d) (map toAlt alts)
|
|
||||||
where
|
|
||||||
toStr (App "S" vs) = [v | AStr v <- vs]
|
|
||||||
toAlt (App "A" [x,y]) = Alt (toStr x) (toStr y)
|
|
||||||
|
|
||||||
|
|
||||||
------------------------------
|
|
||||||
--- from internal to parser --
|
|
||||||
------------------------------
|
|
||||||
|
|
||||||
fromPGF :: PGF -> Grammar
|
|
||||||
fromPGF pgf = Grm [
|
|
||||||
App "pgf" (AInt pgfMajorVersion:AInt pgfMinorVersion
|
|
||||||
: App (prCId (absname pgf)) [] : map (flip App [] . prCId) (cncnames pgf)),
|
|
||||||
App "flags" [App (prCId f) [AStr v] | (f,v) <- Map.toList (gflags pgf `Map.union` aflags apgf)],
|
|
||||||
App "abstract" [
|
|
||||||
App "fun" [App (prCId f) [fromType t,fromExp d] | (f,(t,d)) <- Map.toList (funs apgf)],
|
|
||||||
App "cat" [App (prCId f) (map fromHypo hs) | (f,hs) <- Map.toList (cats apgf)]
|
|
||||||
],
|
|
||||||
App "concrete" [App (prCId lang) (fromConcrete c) | (lang,c) <- Map.toList (concretes pgf)]
|
|
||||||
]
|
|
||||||
where
|
|
||||||
apgf = abstract pgf
|
|
||||||
fromConcrete cnc = [
|
|
||||||
App "flags" [App (prCId f) [AStr v] | (f,v) <- Map.toList (cflags cnc)],
|
|
||||||
App "lin" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lins cnc)],
|
|
||||||
App "oper" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (opers cnc)],
|
|
||||||
App "lincat" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lincats cnc)],
|
|
||||||
App "lindef" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lindefs cnc)],
|
|
||||||
App "printname" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (printnames cnc)],
|
|
||||||
App "param" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (paramlincats cnc)]
|
|
||||||
] ++ maybe [] (\p -> [fromPInfo p]) (parser cnc)
|
|
||||||
|
|
||||||
fromType :: Type -> RExp
|
|
||||||
fromType e = case e of
|
|
||||||
DTyp hypos cat exps ->
|
|
||||||
App (prCId cat) [
|
|
||||||
App "H" (map fromHypo hypos),
|
|
||||||
App "X" (map fromExp exps)]
|
|
||||||
|
|
||||||
fromHypo :: Hypo -> RExp
|
|
||||||
fromHypo e = case e of
|
|
||||||
Hyp x typ -> App (prCId x) [fromType typ]
|
|
||||||
|
|
||||||
fromExp :: Expr -> RExp
|
|
||||||
fromExp e = case e of
|
|
||||||
EAbs x exp -> App "Abs" [App (prCId x) [], fromExp exp]
|
|
||||||
EApp e1 e2 -> App "App" [fromExp e1, fromExp e2]
|
|
||||||
EVar x -> App "Var" [App (prCId x) []]
|
|
||||||
ELit (LStr s) -> AStr s
|
|
||||||
ELit (LFlt d) -> AFlt d
|
|
||||||
ELit (LInt i) -> AInt (toInteger i)
|
|
||||||
EMeta _ -> AMet ----
|
|
||||||
EEq eqs -> App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs]
|
|
||||||
|
|
||||||
fromTerm :: Term -> RExp
|
|
||||||
fromTerm e = case e of
|
|
||||||
R es -> App "R" (map fromTerm es)
|
|
||||||
S es -> App "S" (map fromTerm es)
|
|
||||||
FV es -> App "FV" (map fromTerm es)
|
|
||||||
P e v -> App "P" [fromTerm e, fromTerm v]
|
|
||||||
W s v -> App "W" [AStr s, fromTerm v]
|
|
||||||
C i -> AInt (toInteger i)
|
|
||||||
TM _ -> AMet
|
|
||||||
F f -> App (prCId f) []
|
|
||||||
V i -> App "A" [AInt (toInteger i)]
|
|
||||||
K t -> fromTokn t
|
|
||||||
|
|
||||||
fromTokn :: Tokn -> RExp
|
|
||||||
fromTokn (KS s) = AStr s
|
|
||||||
fromTokn (KP d vs) = App "KP" (str d : [App "A" [str v, str x] | Alt v x <- vs])
|
|
||||||
where
|
|
||||||
str v = App "S" (map AStr v)
|
|
||||||
|
|
||||||
-- ** Parsing info
|
|
||||||
|
|
||||||
fromPInfo :: ParserInfo -> RExp
|
|
||||||
fromPInfo p = App "parser" [
|
|
||||||
App "functions" [fromFFun fun | fun <- elems (functions p)],
|
|
||||||
App "sequences" [fromFSeq seq | seq <- elems (sequences p)],
|
|
||||||
App "productions" [fromProductionSet xs | xs <- IntMap.toList (productions p)],
|
|
||||||
App "categories" (intToExp (totalCats p) : [App (prCId f) (map intToExp xs) | (f,xs) <- Map.toList (startCats p)])
|
|
||||||
]
|
|
||||||
|
|
||||||
fromFFun :: FFun -> RExp
|
|
||||||
fromFFun (FFun fun prof lins) = App (prCId fun) [App "P" (map fromProfile prof), App "R" [intToExp seqid | seqid <- elems lins]]
|
|
||||||
where
|
|
||||||
fromProfile :: Profile -> RExp
|
|
||||||
fromProfile [] = AMet
|
|
||||||
fromProfile [x] = daughter x
|
|
||||||
fromProfile args = App "_U" (map daughter args)
|
|
||||||
|
|
||||||
daughter n = App "_A" [intToExp n]
|
|
||||||
|
|
||||||
fromSymbol :: FSymbol -> RExp
|
|
||||||
fromSymbol (FSymCat n l) = App "P" [intToExp n, intToExp l]
|
|
||||||
fromSymbol (FSymLit n l) = App "PL" [intToExp n, intToExp l]
|
|
||||||
fromSymbol (FSymTok t) = fromTokn t
|
|
||||||
|
|
||||||
fromFSeq :: FSeq -> RExp
|
|
||||||
fromFSeq seq = App "seq" [fromSymbol s | s <- elems seq]
|
|
||||||
|
|
||||||
fromProductionSet :: (FCat,Set.Set Production) -> RExp
|
|
||||||
fromProductionSet (cat,xs) = App "td" (intToExp cat : map fromPassive (Set.toList xs))
|
|
||||||
where
|
|
||||||
fromPassive (FApply ruleid args) = App "A" (intToExp ruleid : map intToExp args)
|
|
||||||
fromPassive (FCoerce fcat) = App "C" [intToExp fcat]
|
|
||||||
|
|
||||||
-- ** Utilities
|
|
||||||
|
|
||||||
mkTermMap :: [RExp] -> Map.Map CId Term
|
|
||||||
mkTermMap ts = Map.fromAscList [(mkCId f,toTerm v) | App f [v] <- ts]
|
|
||||||
|
|
||||||
mkArray :: IArray a e => [e] -> a Int e
|
|
||||||
mkArray xs = listArray (0, length xs - 1) xs
|
|
||||||
|
|
||||||
expToInt :: Integral a => RExp -> a
|
|
||||||
expToInt (App "neg" [AInt i]) = fromIntegral (negate i)
|
|
||||||
expToInt (AInt i) = fromIntegral i
|
|
||||||
|
|
||||||
expToStr :: RExp -> String
|
|
||||||
expToStr (AStr s) = s
|
|
||||||
|
|
||||||
intToExp :: Integral a => a -> RExp
|
|
||||||
intToExp x | x < 0 = App "neg" [AInt (fromIntegral (negate x))]
|
|
||||||
| otherwise = AInt (fromIntegral x)
|
|
||||||
@@ -1,101 +0,0 @@
|
|||||||
module PGF.Raw.Parse (parseGrammar) where
|
|
||||||
|
|
||||||
import PGF.CId
|
|
||||||
import PGF.Raw.Abstract
|
|
||||||
|
|
||||||
import Control.Monad
|
|
||||||
import Data.Char
|
|
||||||
import qualified Data.ByteString.Char8 as BS
|
|
||||||
|
|
||||||
parseGrammar :: String -> IO Grammar
|
|
||||||
parseGrammar s = case runP pGrammar s of
|
|
||||||
Just (x,"") -> return x
|
|
||||||
_ -> fail "Parse error"
|
|
||||||
|
|
||||||
pGrammar :: P Grammar
|
|
||||||
pGrammar = liftM Grm pTerms
|
|
||||||
|
|
||||||
pTerms :: P [RExp]
|
|
||||||
pTerms = liftM2 (:) (pTerm 1) pTerms <++ (skipSpaces >> return [])
|
|
||||||
|
|
||||||
pTerm :: Int -> P RExp
|
|
||||||
pTerm n = skipSpaces >> (pParen <++ pApp <++ pNum <++ pStr <++ pMeta)
|
|
||||||
where pParen = between (char '(') (char ')') (pTerm 0)
|
|
||||||
pApp = liftM2 App pIdent (if n == 0 then pTerms else return [])
|
|
||||||
pStr = char '"' >> liftM AStr (manyTill (pEsc <++ get) (char '"'))
|
|
||||||
pEsc = char '\\' >> get
|
|
||||||
pNum = do x <- munch1 isDigit
|
|
||||||
((char '.' >> munch1 isDigit >>= \y -> return (AFlt (read (x++"."++y))))
|
|
||||||
<++
|
|
||||||
return (AInt (read x)))
|
|
||||||
pMeta = char '?' >> return AMet
|
|
||||||
pIdent = liftM2 (:) (satisfy isIdentFirst) (munch isIdentRest)
|
|
||||||
isIdentFirst c = c == '_' || isAlpha c
|
|
||||||
isIdentRest c = c == '_' || c == '\'' || isAlphaNum c
|
|
||||||
|
|
||||||
-- Parser combinators with only left-biased choice
|
|
||||||
|
|
||||||
newtype P a = P { runP :: String -> Maybe (a,String) }
|
|
||||||
|
|
||||||
instance Monad P where
|
|
||||||
return x = P (\ts -> Just (x,ts))
|
|
||||||
P p >>= f = P (\ts -> p ts >>= \ (x,ts') -> runP (f x) ts')
|
|
||||||
fail _ = pfail
|
|
||||||
|
|
||||||
instance MonadPlus P where
|
|
||||||
mzero = pfail
|
|
||||||
mplus = (<++)
|
|
||||||
|
|
||||||
|
|
||||||
get :: P Char
|
|
||||||
get = P (\ts -> case ts of
|
|
||||||
[] -> Nothing
|
|
||||||
c:cs -> Just (c,cs))
|
|
||||||
|
|
||||||
look :: P String
|
|
||||||
look = P (\ts -> Just (ts,ts))
|
|
||||||
|
|
||||||
(<++) :: P a -> P a -> P a
|
|
||||||
P p <++ P q = P (\ts -> p ts `mplus` q ts)
|
|
||||||
|
|
||||||
pfail :: P a
|
|
||||||
pfail = P (\ts -> Nothing)
|
|
||||||
|
|
||||||
satisfy :: (Char -> Bool) -> P Char
|
|
||||||
satisfy p = do c <- get
|
|
||||||
if p c then return c else pfail
|
|
||||||
|
|
||||||
char :: Char -> P Char
|
|
||||||
char c = satisfy (c==)
|
|
||||||
|
|
||||||
string :: String -> P String
|
|
||||||
string this = look >>= scan this
|
|
||||||
where
|
|
||||||
scan [] _ = return this
|
|
||||||
scan (x:xs) (y:ys) | x == y = get >> scan xs ys
|
|
||||||
scan _ _ = pfail
|
|
||||||
|
|
||||||
skipSpaces :: P ()
|
|
||||||
skipSpaces = look >>= skip
|
|
||||||
where
|
|
||||||
skip (c:s) | isSpace c = get >> skip s
|
|
||||||
skip _ = return ()
|
|
||||||
|
|
||||||
manyTill :: P a -> P end -> P [a]
|
|
||||||
manyTill p end = scan
|
|
||||||
where scan = (end >> return []) <++ liftM2 (:) p scan
|
|
||||||
|
|
||||||
munch :: (Char -> Bool) -> P String
|
|
||||||
munch p = munch1 p <++ return []
|
|
||||||
|
|
||||||
munch1 :: (Char -> Bool) -> P String
|
|
||||||
munch1 p = liftM2 (:) (satisfy p) (munch p)
|
|
||||||
|
|
||||||
choice :: [P a] -> P a
|
|
||||||
choice = msum
|
|
||||||
|
|
||||||
between :: P open -> P close -> P a -> P a
|
|
||||||
between open close p = do open
|
|
||||||
x <- p
|
|
||||||
close
|
|
||||||
return x
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
module PGF.Raw.Print (printTree) where
|
|
||||||
|
|
||||||
import PGF.CId
|
|
||||||
import PGF.Raw.Abstract
|
|
||||||
|
|
||||||
import Data.List (intersperse)
|
|
||||||
import Numeric (showFFloat)
|
|
||||||
import qualified Data.ByteString.Char8 as BS
|
|
||||||
|
|
||||||
printTree :: Grammar -> String
|
|
||||||
printTree g = prGrammar g ""
|
|
||||||
|
|
||||||
prGrammar :: Grammar -> ShowS
|
|
||||||
prGrammar (Grm xs) = prRExpList xs
|
|
||||||
|
|
||||||
prRExp :: Int -> RExp -> ShowS
|
|
||||||
prRExp _ (App x []) = showString x
|
|
||||||
prRExp n (App x xs) = p (showString x . showChar ' ' . prRExpList xs)
|
|
||||||
where p s = if n == 0 then s else showChar '(' . s . showChar ')'
|
|
||||||
prRExp _ (AInt x) = shows x
|
|
||||||
prRExp _ (AStr x) = showChar '"' . concatS (map mkEsc x) . showChar '"'
|
|
||||||
prRExp _ (AFlt x) = showFFloat Nothing x
|
|
||||||
prRExp _ AMet = showChar '?'
|
|
||||||
|
|
||||||
mkEsc :: Char -> ShowS
|
|
||||||
mkEsc s = case s of
|
|
||||||
'"' -> showString "\\\""
|
|
||||||
'\\' -> showString "\\\\"
|
|
||||||
_ -> showChar s
|
|
||||||
|
|
||||||
prRExpList :: [RExp] -> ShowS
|
|
||||||
prRExpList = concatS . intersperse (showChar ' ') . map (prRExp 1)
|
|
||||||
|
|
||||||
concatS :: [ShowS] -> ShowS
|
|
||||||
concatS = foldr (.) id
|
|
||||||
Reference in New Issue
Block a user