mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 13:09:33 -06:00
Move unused pgf-binary into its own repository, at:
https://github.com/GrammaticalFramework/pgf-binary
This commit is contained in:
@@ -1,489 +0,0 @@
|
||||
{-# LANGUAGE FlexibleInstances, FlexibleContexts #-}
|
||||
-- | This is a layer on top of "Data.Binary" with its own 'Binary' class
|
||||
-- and customised instances for 'Word', 'Int' and 'Double'.
|
||||
-- The 'Int' and 'Word' instance use a variable-length encoding to save space
|
||||
-- for small numbers. The 'Double' instance uses the standard IEEE754 encoding.
|
||||
module PGF.Data.Binary (
|
||||
|
||||
-- * The Binary class
|
||||
Binary(..)
|
||||
|
||||
-- * The Get and Put monads
|
||||
, Get , Put, runPut
|
||||
|
||||
-- * Useful helpers for writing instances
|
||||
, putWord8 , getWord8 , putWord16be , getWord16be
|
||||
|
||||
-- * Binary serialisation
|
||||
, encode , decode
|
||||
|
||||
-- * IO functions for serialisation
|
||||
, encodeFile , decodeFile
|
||||
|
||||
, encodeFile_ , decodeFile_
|
||||
|
||||
-- * Useful
|
||||
, Word8, Word16
|
||||
|
||||
) where
|
||||
|
||||
|
||||
import Data.Word
|
||||
|
||||
import qualified Data.Binary as Bin
|
||||
import Data.Binary.Put
|
||||
import Data.Binary.Get
|
||||
import Data.Binary.IEEE754 ( putFloat64be, getFloat64be)
|
||||
import Control.Monad
|
||||
import Control.Exception
|
||||
import Foreign
|
||||
import System.IO
|
||||
|
||||
import Data.ByteString.Lazy (ByteString)
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
--import Data.Char (chr,ord)
|
||||
--import Data.List (unfoldr)
|
||||
|
||||
-- And needed for the instances:
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.IntSet as IntSet
|
||||
--import qualified Data.Ratio as R
|
||||
|
||||
--import qualified Data.Tree as T
|
||||
|
||||
import Data.Array.Unboxed
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | The @Binary@ class provides 'put' and 'get', methods to encode and
|
||||
-- decode a Haskell value to a lazy ByteString. It mirrors the Read and
|
||||
-- Show classes for textual representation of Haskell types, and is
|
||||
-- suitable for serialising Haskell values to disk, over the network.
|
||||
--
|
||||
-- For parsing and generating simple external binary formats (e.g. C
|
||||
-- structures), Binary may be used, but in general is not suitable
|
||||
-- for complex protocols. Instead use the Put and Get primitives
|
||||
-- directly.
|
||||
--
|
||||
-- Instances of Binary should satisfy the following property:
|
||||
--
|
||||
-- > decode . encode == id
|
||||
--
|
||||
-- That is, the 'get' and 'put' methods should be the inverse of each
|
||||
-- other. A range of instances are provided for basic Haskell types.
|
||||
--
|
||||
class Binary t where
|
||||
-- | Encode a value in the Put monad.
|
||||
put :: t -> Put
|
||||
-- | Decode a value in the Get monad
|
||||
get :: Get t
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Wrappers to run the underlying monad
|
||||
|
||||
-- | Encode a value using binary serialisation to a lazy ByteString.
|
||||
--
|
||||
encode :: Binary a => a -> ByteString
|
||||
encode = runPut . put
|
||||
{-# INLINE encode #-}
|
||||
|
||||
-- | Decode a value from a lazy ByteString, reconstructing the original structure.
|
||||
--
|
||||
decode :: Binary a => ByteString -> a
|
||||
decode = runGet get
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Convenience IO operations
|
||||
|
||||
-- | Lazily serialise a value to a file
|
||||
--
|
||||
-- This is just a convenience function, it's defined simply as:
|
||||
--
|
||||
-- > encodeFile f = B.writeFile f . encode
|
||||
--
|
||||
-- So for example if you wanted to compress as well, you could use:
|
||||
--
|
||||
-- > B.writeFile f . compress . encode
|
||||
--
|
||||
encodeFile :: Binary a => FilePath -> a -> IO ()
|
||||
encodeFile f v = L.writeFile f (encode v)
|
||||
|
||||
encodeFile_ :: FilePath -> Put -> IO ()
|
||||
encodeFile_ f m = L.writeFile f (runPut m)
|
||||
|
||||
-- | Lazily reconstruct a value previously written to a file.
|
||||
--
|
||||
-- This is just a convenience function, it's defined simply as:
|
||||
--
|
||||
-- > decodeFile f = return . decode =<< B.readFile f
|
||||
--
|
||||
-- So for example if you wanted to decompress as well, you could use:
|
||||
--
|
||||
-- > return . decode . decompress =<< B.readFile f
|
||||
--
|
||||
decodeFile :: Binary a => FilePath -> IO a
|
||||
decodeFile f = bracket (openBinaryFile f ReadMode) hClose $ \h -> do
|
||||
s <- L.hGetContents h
|
||||
evaluate $ runGet get s
|
||||
|
||||
decodeFile_ :: FilePath -> Get a -> IO a
|
||||
decodeFile_ f m = bracket (openBinaryFile f ReadMode) hClose $ \h -> do
|
||||
s <- L.hGetContents h
|
||||
evaluate $ runGet m s
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- For ground types, the standard instances can be reused,
|
||||
-- but for container types it would imply using
|
||||
-- the standard instances for all types of values in the container...
|
||||
|
||||
instance Binary () where put=Bin.put; get=Bin.get
|
||||
instance Binary Bool where put=Bin.put; get=Bin.get
|
||||
instance Binary Word8 where put=Bin.put; get=Bin.get
|
||||
instance Binary Word16 where put=Bin.put; get=Bin.get
|
||||
instance Binary Char where put=Bin.put; get=Bin.get
|
||||
|
||||
-- -- GF doesn't need these:
|
||||
--instance Binary Ordering where put=Bin.put; get=Bin.get
|
||||
--instance Binary Word32 where put=Bin.put; get=Bin.get
|
||||
--instance Binary Word64 where put=Bin.put; get=Bin.get
|
||||
--instance Binary Int8 where put=Bin.put; get=Bin.get
|
||||
--instance Binary Int16 where put=Bin.put; get=Bin.get
|
||||
--instance Binary Int32 where put=Bin.put; get=Bin.get
|
||||
|
||||
--instance Binary Int64 where put=Bin.put; get=Bin.get -- needed by instance Binary ByteString
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- Words are written as sequence of bytes. The last bit of each
|
||||
-- byte indicates whether there are more bytes to be read
|
||||
instance Binary Word where
|
||||
put i | i <= 0x7f = do put a
|
||||
| i <= 0x3fff = do put (a .|. 0x80)
|
||||
put b
|
||||
| i <= 0x1fffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put c
|
||||
| i <= 0xfffffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put d
|
||||
-- -- #if WORD_SIZE_IN_BITS < 64
|
||||
| otherwise = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put e
|
||||
{-
|
||||
-- Restricted to 32 bits even on 64-bit systems, so that negative
|
||||
-- Ints are written as 5 bytes instead of 10 bytes (TH 2013-02-13)
|
||||
--#else
|
||||
| i <= 0x7ffffffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put e
|
||||
| i <= 0x3ffffffffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put (e .|. 0x80)
|
||||
put f
|
||||
| i <= 0x1ffffffffffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put (e .|. 0x80)
|
||||
put (f .|. 0x80)
|
||||
put g
|
||||
| i <= 0xffffffffffffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put (e .|. 0x80)
|
||||
put (f .|. 0x80)
|
||||
put (g .|. 0x80)
|
||||
put h
|
||||
| i <= 0xffffffffffffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put (e .|. 0x80)
|
||||
put (f .|. 0x80)
|
||||
put (g .|. 0x80)
|
||||
put h
|
||||
| i <= 0x7fffffffffffffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put (e .|. 0x80)
|
||||
put (f .|. 0x80)
|
||||
put (g .|. 0x80)
|
||||
put (h .|. 0x80)
|
||||
put j
|
||||
| otherwise = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put (e .|. 0x80)
|
||||
put (f .|. 0x80)
|
||||
put (g .|. 0x80)
|
||||
put (h .|. 0x80)
|
||||
put (j .|. 0x80)
|
||||
put k
|
||||
-- #endif
|
||||
-}
|
||||
where
|
||||
a = fromIntegral ( i .&. 0x7f) :: Word8
|
||||
b = fromIntegral (shiftR i 7 .&. 0x7f) :: Word8
|
||||
c = fromIntegral (shiftR i 14 .&. 0x7f) :: Word8
|
||||
d = fromIntegral (shiftR i 21 .&. 0x7f) :: Word8
|
||||
e = fromIntegral (shiftR i 28 .&. 0x7f) :: Word8
|
||||
{-
|
||||
f = fromIntegral (shiftR i 35 .&. 0x7f) :: Word8
|
||||
g = fromIntegral (shiftR i 42 .&. 0x7f) :: Word8
|
||||
h = fromIntegral (shiftR i 49 .&. 0x7f) :: Word8
|
||||
j = fromIntegral (shiftR i 56 .&. 0x7f) :: Word8
|
||||
k = fromIntegral (shiftR i 63 .&. 0x7f) :: Word8
|
||||
-}
|
||||
get = do i <- getWord8
|
||||
(if i <= 0x7f
|
||||
then return (fromIntegral i)
|
||||
else do n <- get
|
||||
return $ (n `shiftL` 7) .|. (fromIntegral (i .&. 0x7f)))
|
||||
|
||||
-- Int has the same representation as Word
|
||||
instance Binary Int where
|
||||
put i = put (fromIntegral i :: Word)
|
||||
get = liftM toInt32 (get :: Get Word)
|
||||
where
|
||||
-- restrict to 32 bits (for PGF portability, TH 2013-02-13)
|
||||
toInt32 w = fromIntegral (fromIntegral w::Int32)::Int
|
||||
|
||||
------------------------------------------------------------------------
|
||||
--
|
||||
-- Portable, and pretty efficient, serialisation of Integer
|
||||
--
|
||||
|
||||
-- Fixed-size type for a subset of Integer
|
||||
--type SmallInt = Int32
|
||||
|
||||
-- Integers are encoded in two ways: if they fit inside a SmallInt,
|
||||
-- they're written as a byte tag, and that value. If the Integer value
|
||||
-- is too large to fit in a SmallInt, it is written as a byte array,
|
||||
-- along with a sign and length field.
|
||||
{-
|
||||
instance Binary Integer where
|
||||
|
||||
{-# INLINE put #-}
|
||||
put n | n >= lo && n <= hi = do
|
||||
putWord8 0
|
||||
put (fromIntegral n :: SmallInt) -- fast path
|
||||
where
|
||||
lo = fromIntegral (minBound :: SmallInt) :: Integer
|
||||
hi = fromIntegral (maxBound :: SmallInt) :: Integer
|
||||
|
||||
put n = do
|
||||
putWord8 1
|
||||
put sign
|
||||
put (unroll (abs n)) -- unroll the bytes
|
||||
where
|
||||
sign = fromIntegral (signum n) :: Word8
|
||||
|
||||
{-# INLINE get #-}
|
||||
get = do
|
||||
tag <- get :: Get Word8
|
||||
case tag of
|
||||
0 -> liftM fromIntegral (get :: Get SmallInt)
|
||||
_ -> do sign <- get
|
||||
bytes <- get
|
||||
let v = roll bytes
|
||||
return $! if sign == (1 :: Word8) then v else - v
|
||||
|
||||
--
|
||||
-- Fold and unfold an Integer to and from a list of its bytes
|
||||
--
|
||||
unroll :: Integer -> [Word8]
|
||||
unroll = unfoldr step
|
||||
where
|
||||
step 0 = Nothing
|
||||
step i = Just (fromIntegral i, i `shiftR` 8)
|
||||
|
||||
roll :: [Word8] -> Integer
|
||||
roll = foldr unstep 0
|
||||
where
|
||||
unstep b a = a `shiftL` 8 .|. fromIntegral b
|
||||
|
||||
instance (Binary a,Integral a) => Binary (R.Ratio a) where
|
||||
put r = put (R.numerator r) >> put (R.denominator r)
|
||||
get = liftM2 (R.%) get get
|
||||
-}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Instances for the first few tuples
|
||||
|
||||
instance (Binary a, Binary b) => Binary (a,b) where
|
||||
put (a,b) = put a >> put b
|
||||
get = liftM2 (,) get get
|
||||
|
||||
instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
|
||||
put (a,b,c) = put a >> put b >> put c
|
||||
get = liftM3 (,,) get get get
|
||||
|
||||
instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
|
||||
put (a,b,c,d) = put a >> put b >> put c >> put d
|
||||
get = liftM4 (,,,) get get get get
|
||||
|
||||
instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where
|
||||
put (a,b,c,d,e) = put a >> put b >> put c >> put d >> put e
|
||||
get = liftM5 (,,,,) get get get get get
|
||||
|
||||
--
|
||||
-- and now just recurse:
|
||||
--
|
||||
|
||||
instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f)
|
||||
=> Binary (a,b,c,d,e,f) where
|
||||
put (a,b,c,d,e,f) = put (a,(b,c,d,e,f))
|
||||
get = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
|
||||
|
||||
instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g)
|
||||
=> Binary (a,b,c,d,e,f,g) where
|
||||
put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
|
||||
get = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
|
||||
|
||||
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
|
||||
Binary f, Binary g, Binary h)
|
||||
=> Binary (a,b,c,d,e,f,g,h) where
|
||||
put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
|
||||
get = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h)
|
||||
|
||||
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
|
||||
Binary f, Binary g, Binary h, Binary i)
|
||||
=> Binary (a,b,c,d,e,f,g,h,i) where
|
||||
put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
|
||||
get = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i)
|
||||
|
||||
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
|
||||
Binary f, Binary g, Binary h, Binary i, Binary j)
|
||||
=> Binary (a,b,c,d,e,f,g,h,i,j) where
|
||||
put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
|
||||
get = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Container types
|
||||
|
||||
instance Binary a => Binary [a] where
|
||||
put l = put (length l) >> mapM_ put l
|
||||
get = do n <- get :: Get Int
|
||||
xs <- replicateM n get
|
||||
return xs
|
||||
|
||||
instance (Binary a) => Binary (Maybe a) where
|
||||
put Nothing = putWord8 0
|
||||
put (Just x) = putWord8 1 >> put x
|
||||
get = do
|
||||
w <- getWord8
|
||||
case w of
|
||||
0 -> return Nothing
|
||||
_ -> liftM Just get
|
||||
|
||||
instance (Binary a, Binary b) => Binary (Either a b) where
|
||||
put (Left a) = putWord8 0 >> put a
|
||||
put (Right b) = putWord8 1 >> put b
|
||||
get = do
|
||||
w <- getWord8
|
||||
case w of
|
||||
0 -> liftM Left get
|
||||
_ -> liftM Right get
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- ByteStrings (have specially efficient instances)
|
||||
|
||||
instance Binary B.ByteString where
|
||||
put bs = do put (B.length bs)
|
||||
putByteString bs
|
||||
get = get >>= getByteString
|
||||
|
||||
--
|
||||
-- Using old versions of fps, this is a type synonym, and non portable
|
||||
--
|
||||
-- Requires 'flexible instances'
|
||||
--
|
||||
{-
|
||||
instance Binary ByteString where
|
||||
put bs = do put (fromIntegral (L.length bs) :: Int)
|
||||
putLazyByteString bs
|
||||
get = get >>= getLazyByteString
|
||||
-}
|
||||
------------------------------------------------------------------------
|
||||
-- Maps and Sets
|
||||
|
||||
instance (Ord a, Binary a) => Binary (Set.Set a) where
|
||||
put s = put (Set.size s) >> mapM_ put (Set.toAscList s)
|
||||
get = liftM Set.fromDistinctAscList get
|
||||
|
||||
instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where
|
||||
put m = put (Map.size m) >> mapM_ put (Map.toAscList m)
|
||||
get = liftM Map.fromDistinctAscList get
|
||||
|
||||
instance Binary IntSet.IntSet where
|
||||
put s = put (IntSet.size s) >> mapM_ put (IntSet.toAscList s)
|
||||
get = liftM IntSet.fromDistinctAscList get
|
||||
|
||||
instance (Binary e) => Binary (IntMap.IntMap e) where
|
||||
put m = put (IntMap.size m) >> mapM_ put (IntMap.toAscList m)
|
||||
get = liftM IntMap.fromDistinctAscList get
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Floating point
|
||||
|
||||
-- instance Binary Double where
|
||||
-- put d = put (decodeFloat d)
|
||||
-- get = liftM2 encodeFloat get get
|
||||
|
||||
instance Binary Double where
|
||||
put = putFloat64be
|
||||
get = getFloat64be
|
||||
{-
|
||||
instance Binary Float where
|
||||
put f = put (decodeFloat f)
|
||||
get = liftM2 encodeFloat get get
|
||||
-}
|
||||
------------------------------------------------------------------------
|
||||
-- Trees
|
||||
{-
|
||||
instance (Binary e) => Binary (T.Tree e) where
|
||||
put (T.Node r s) = put r >> put s
|
||||
get = liftM2 T.Node get get
|
||||
-}
|
||||
------------------------------------------------------------------------
|
||||
-- Arrays
|
||||
|
||||
instance (Binary i, Ix i, Binary e) => Binary (Array i e) where
|
||||
put a = do
|
||||
put (bounds a)
|
||||
put (rangeSize $ bounds a) -- write the length
|
||||
mapM_ put (elems a) -- now the elems.
|
||||
get = do
|
||||
bs <- get
|
||||
n <- get -- read the length
|
||||
xs <- replicateM n get -- now the elems.
|
||||
return (listArray bs xs)
|
||||
|
||||
--
|
||||
-- The IArray UArray e constraint is non portable. Requires flexible instances
|
||||
--
|
||||
instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where
|
||||
put a = do
|
||||
put (bounds a)
|
||||
put (rangeSize $ bounds a) -- now write the length
|
||||
mapM_ put (elems a)
|
||||
get = do
|
||||
bs <- get
|
||||
n <- get
|
||||
xs <- replicateM n get
|
||||
return (listArray bs xs)
|
||||
@@ -1,27 +0,0 @@
|
||||
name: pgf-binary
|
||||
version: 0.5
|
||||
|
||||
cabal-version: >= 1.10
|
||||
build-type: Simple
|
||||
license: BSD3
|
||||
--license-file: LICENSE
|
||||
synopsis: Custom version of the binary-0.5 package for the PGF library
|
||||
homepage: http://www.grammaticalframework.org/
|
||||
--bug-reports: http://code.google.com/p/grammatical-framework/issues/list
|
||||
maintainer: Thomas Hallgren
|
||||
stability: provisional
|
||||
category: Data, Parsing
|
||||
tested-with: GHC==7.4.2, GHC==7.8.3
|
||||
|
||||
source-repository head
|
||||
type: darcs
|
||||
location: http://www.grammaticalframework.org/
|
||||
|
||||
Library
|
||||
default-language: Haskell2010
|
||||
build-depends: base >= 4.3 && <5, binary, data-binary-ieee754,
|
||||
containers, array, bytestring
|
||||
exposed-modules: PGF.Data.Binary
|
||||
|
||||
ghc-options: -fwarn-unused-imports -O2
|
||||
extensions: FlexibleInstances, FlexibleContexts
|
||||
Reference in New Issue
Block a user