1
0
forked from GitHub/gf-core

binary serialization for PGF

This commit is contained in:
krasimir
2008-10-28 13:57:10 +00:00
parent 8e43cfb8a8
commit ebd98056ce
14 changed files with 1984 additions and 458 deletions

View File

@@ -575,23 +575,25 @@ library
PGF.Parsing.FCFG PGF.Parsing.FCFG
PGF.Expr PGF.Expr
PGF.Type PGF.Type
PGF.Raw.Parse
PGF.Raw.Print
PGF.Raw.Convert
PGF.Raw.Abstract
PGF.AbsCompute PGF.AbsCompute
PGF.Paraphrase PGF.Paraphrase
PGF.TypeCheck PGF.TypeCheck
PGF.Binary
GF.Data.MultiMap GF.Data.MultiMap
GF.Data.Utilities GF.Data.Utilities
GF.Data.SortedList GF.Data.SortedList
GF.Data.Assoc GF.Data.Assoc
GF.Data.ErrM GF.Data.ErrM
GF.Text.UTF8
-- needed only for the on demand generation of PMCFG -- needed only for the on demand generation of PMCFG
GF.Data.BacktrackM GF.Data.BacktrackM
GF.Compile.GenerateFCFG GF.Compile.GenerateFCFG
GF.Compile.GeneratePMCFG GF.Compile.GeneratePMCFG
-- not really part of GF but I have changed the original binary library
-- and we have to keep the copy for now.
Data.Binary
Data.Binary.Put
Data.Binary.Get
Data.Binary.Builder
executable gf executable gf
build-depends: base, build-depends: base,
@@ -701,6 +703,7 @@ executable gf
PGF.AbsCompute PGF.AbsCompute
PGF.Paraphrase PGF.Paraphrase
PGF.TypeCheck PGF.TypeCheck
PGF.Binary
GFC GFC
GFI GFI

792
src/Data/Binary.hs Normal file
View File

@@ -0,0 +1,792 @@
{-# LANGUAGE CPP, FlexibleInstances, FlexibleContexts #-}
-----------------------------------------------------------------------------
-- |
-- Module : Data.Binary
-- Copyright : Lennart Kolmodin
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
-- Stability : unstable
-- Portability : portable to Hugs and GHC. Requires the FFI and some flexible instances
--
-- Binary serialisation of Haskell values to and from lazy ByteStrings.
-- The Binary library provides methods for encoding Haskell values as
-- streams of bytes directly in memory. The resulting @ByteString@ can
-- then be written to disk, sent over the network, or futher processed
-- (for example, compressed with gzip).
--
-- The 'Binary' package is notable in that it provides both pure, and
-- high performance serialisation.
--
-- Values are always encoded in network order (big endian) form, and
-- encoded data should be portable across machine endianess, word size,
-- or compiler version. For example, data encoded using the Binary class
-- could be written from GHC, and read back in Hugs.
--
-----------------------------------------------------------------------------
module Data.Binary (
-- * The Binary class
Binary(..)
-- $example
-- * The Get and Put monads
, Get
, Put
-- * Useful helpers for writing instances
, putWord8
, getWord8
-- * Binary serialisation
, encode -- :: Binary a => a -> ByteString
, decode -- :: Binary a => ByteString -> a
-- * IO functions for serialisation
, encodeFile -- :: Binary a => FilePath -> a -> IO ()
, decodeFile -- :: Binary a => FilePath -> IO a
-- Lazy put and get
-- , lazyPut
-- , lazyGet
, module Data.Word -- useful
) where
import Data.Word
import Data.Binary.Put
import Data.Binary.Get
import Control.Monad
import Foreign
import System.IO
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as L
import Data.Char (chr,ord)
import Data.List (unfoldr)
-- And needed for the instances:
import qualified Data.ByteString as B
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import qualified Data.IntSet as IntSet
import qualified Data.Ratio as R
import qualified Data.Tree as T
import Data.Array.Unboxed
--
-- This isn't available in older Hugs or older GHC
--
#if __GLASGOW_HASKELL__ >= 606
import qualified Data.Sequence as Seq
import qualified Data.Foldable as Fold
#endif
------------------------------------------------------------------------
-- | The @Binary@ class provides 'put' and 'get', methods to encode and
-- decode a Haskell value to a lazy ByteString. It mirrors the Read and
-- Show classes for textual representation of Haskell types, and is
-- suitable for serialising Haskell values to disk, over the network.
--
-- For parsing and generating simple external binary formats (e.g. C
-- structures), Binary may be used, but in general is not suitable
-- for complex protocols. Instead use the Put and Get primitives
-- directly.
--
-- Instances of Binary should satisfy the following property:
--
-- > decode . encode == id
--
-- That is, the 'get' and 'put' methods should be the inverse of each
-- other. A range of instances are provided for basic Haskell types.
--
class Binary t where
-- | Encode a value in the Put monad.
put :: t -> Put
-- | Decode a value in the Get monad
get :: Get t
-- $example
-- To serialise a custom type, an instance of Binary for that type is
-- required. For example, suppose we have a data structure:
--
-- > data Exp = IntE Int
-- > | OpE String Exp Exp
-- > deriving Show
--
-- We can encode values of this type into bytestrings using the
-- following instance, which proceeds by recursively breaking down the
-- structure to serialise:
--
-- > instance Binary Exp where
-- > put (IntE i) = do put (0 :: Word8)
-- > put i
-- > put (OpE s e1 e2) = do put (1 :: Word8)
-- > put s
-- > put e1
-- > put e2
-- >
-- > get = do t <- get :: Get Word8
-- > case t of
-- > 0 -> do i <- get
-- > return (IntE i)
-- > 1 -> do s <- get
-- > e1 <- get
-- > e2 <- get
-- > return (OpE s e1 e2)
--
-- Note how we write an initial tag byte to indicate each variant of the
-- data type.
--
-- We can simplify the writing of 'get' instances using monadic
-- combinators:
--
-- > get = do tag <- getWord8
-- > case tag of
-- > 0 -> liftM IntE get
-- > 1 -> liftM3 OpE get get get
--
-- The generation of Binary instances has been automated by a script
-- using Scrap Your Boilerplate generics. Use the script here:
-- <http://darcs.haskell.org/binary/tools/derive/BinaryDerive.hs>.
--
-- To derive the instance for a type, load this script into GHCi, and
-- bring your type into scope. Your type can then have its Binary
-- instances derived as follows:
--
-- > $ ghci -fglasgow-exts BinaryDerive.hs
-- > *BinaryDerive> :l Example.hs
-- > *Main> deriveM (undefined :: Drinks)
-- >
-- > instance Binary Main.Drinks where
-- > put (Beer a) = putWord8 0 >> put a
-- > put Coffee = putWord8 1
-- > put Tea = putWord8 2
-- > put EnergyDrink = putWord8 3
-- > put Water = putWord8 4
-- > put Wine = putWord8 5
-- > put Whisky = putWord8 6
-- > get = do
-- > tag_ <- getWord8
-- > case tag_ of
-- > 0 -> get >>= \a -> return (Beer a)
-- > 1 -> return Coffee
-- > 2 -> return Tea
-- > 3 -> return EnergyDrink
-- > 4 -> return Water
-- > 5 -> return Wine
-- > 6 -> return Whisky
-- >
--
-- To serialise this to a bytestring, we use 'encode', which packs the
-- data structure into a binary format, in a lazy bytestring
--
-- > > let e = OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
-- > > let v = encode e
--
-- Where 'v' is a binary encoded data structure. To reconstruct the
-- original data, we use 'decode'
--
-- > > decode v :: Exp
-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
--
-- The lazy ByteString that results from 'encode' can be written to
-- disk, and read from disk using Data.ByteString.Lazy IO functions,
-- such as hPutStr or writeFile:
--
-- > > writeFile "/tmp/exp.txt" (encode e)
--
-- And read back with:
--
-- > > readFile "/tmp/exp.txt" >>= return . decode :: IO Exp
-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
--
-- We can also directly serialise a value to and from a Handle, or a file:
--
-- > > v <- decodeFile "/tmp/exp.txt" :: IO Exp
-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
--
-- And write a value to disk
--
-- > > encodeFile "/tmp/a.txt" v
--
------------------------------------------------------------------------
-- Wrappers to run the underlying monad
-- | Encode a value using binary serialisation to a lazy ByteString.
--
encode :: Binary a => a -> ByteString
encode = runPut . put
{-# INLINE encode #-}
-- | Decode a value from a lazy ByteString, reconstructing the original structure.
--
decode :: Binary a => ByteString -> a
decode = runGet get
------------------------------------------------------------------------
-- Convenience IO operations
-- | Lazily serialise a value to a file
--
-- This is just a convenience function, it's defined simply as:
--
-- > encodeFile f = B.writeFile f . encode
--
-- So for example if you wanted to compress as well, you could use:
--
-- > B.writeFile f . compress . encode
--
encodeFile :: Binary a => FilePath -> a -> IO ()
encodeFile f v = L.writeFile f (encode v)
-- | Lazily reconstruct a value previously written to a file.
--
-- This is just a convenience function, it's defined simply as:
--
-- > decodeFile f = return . decode =<< B.readFile f
--
-- So for example if you wanted to decompress as well, you could use:
--
-- > return . decode . decompress =<< B.readFile f
--
-- After contructing the data from the input file, 'decodeFile' checks
-- if the file is empty, and in doing so will force the associated file
-- handle closed, if it is indeed empty. If the file is not empty,
-- it is up to the decoding instance to consume the rest of the data,
-- or otherwise finalise the resource.
--
decodeFile :: Binary a => FilePath -> IO a
decodeFile f = do
s <- L.readFile f
return $ runGet (do v <- get
m <- isEmpty
m `seq` return v) s
-- needs bytestring 0.9.1.x to work
------------------------------------------------------------------------
-- Lazy put and get
-- lazyPut :: (Binary a) => a -> Put
-- lazyPut a = put (encode a)
-- lazyGet :: (Binary a) => Get a
-- lazyGet = fmap decode get
------------------------------------------------------------------------
-- Simple instances
-- The () type need never be written to disk: values of singleton type
-- can be reconstructed from the type alone
instance Binary () where
put () = return ()
get = return ()
-- Bools are encoded as a byte in the range 0 .. 1
instance Binary Bool where
put = putWord8 . fromIntegral . fromEnum
get = liftM (toEnum . fromIntegral) getWord8
-- Values of type 'Ordering' are encoded as a byte in the range 0 .. 2
instance Binary Ordering where
put = putWord8 . fromIntegral . fromEnum
get = liftM (toEnum . fromIntegral) getWord8
------------------------------------------------------------------------
-- Words and Ints
-- Words8s are written as bytes
instance Binary Word8 where
put = putWord8
get = getWord8
-- Words16s are written as 2 bytes in big-endian (network) order
instance Binary Word16 where
put = putWord16be
get = getWord16be
-- Words32s are written as 4 bytes in big-endian (network) order
instance Binary Word32 where
put = putWord32be
get = getWord32be
-- Words64s are written as 8 bytes in big-endian (network) order
instance Binary Word64 where
put = putWord64be
get = getWord64be
-- Int8s are written as a single byte.
instance Binary Int8 where
put i = put (fromIntegral i :: Word8)
get = liftM fromIntegral (get :: Get Word8)
-- Int16s are written as a 2 bytes in big endian format
instance Binary Int16 where
put i = put (fromIntegral i :: Word16)
get = liftM fromIntegral (get :: Get Word16)
-- Int32s are written as a 4 bytes in big endian format
instance Binary Int32 where
put i = put (fromIntegral i :: Word32)
get = liftM fromIntegral (get :: Get Word32)
-- Int64s are written as a 4 bytes in big endian format
instance Binary Int64 where
put i = put (fromIntegral i :: Word64)
get = liftM fromIntegral (get :: Get Word64)
------------------------------------------------------------------------
-- Words are written as sequence of bytes. The last bit of each
-- byte indicates whether there are more bytes to be read
instance Binary Word where
put i | i <= 0x7f = do put a
| i <= 0x3fff = do put (a .|. 0x80)
put b
| i <= 0x1fffff = do put (a .|. 0x80)
put (b .|. 0x80)
put c
| i <= 0xfffffff = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put d
#if WORD_SIZE_IN_BITS < 64
| otherwise = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put e
#else
| i <= 0x7ffffffff = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put e
| i <= 0x3ffffffffff = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put (e .|. 0x80)
put f
| i <= 0x1ffffffffffff = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put (e .|. 0x80)
put (f .|. 0x80)
put g
| i <= 0xffffffffffffff = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put (e .|. 0x80)
put (f .|. 0x80)
put (g .|. 0x80)
put h
| i <= 0xffffffffffffff = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put (e .|. 0x80)
put (f .|. 0x80)
put (g .|. 0x80)
put h
| i <= 0x7fffffffffffffff = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put (e .|. 0x80)
put (f .|. 0x80)
put (g .|. 0x80)
put (h .|. 0x80)
put j
| otherwise = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put (e .|. 0x80)
put (f .|. 0x80)
put (g .|. 0x80)
put (h .|. 0x80)
put (j .|. 0x80)
put k
#endif
where
a = fromIntegral ( i .&. 0x7f) :: Word8
b = fromIntegral (shiftR i 7 .&. 0x7f) :: Word8
c = fromIntegral (shiftR i 14 .&. 0x7f) :: Word8
d = fromIntegral (shiftR i 21 .&. 0x7f) :: Word8
e = fromIntegral (shiftR i 28 .&. 0x7f) :: Word8
f = fromIntegral (shiftR i 35 .&. 0x7f) :: Word8
g = fromIntegral (shiftR i 42 .&. 0x7f) :: Word8
h = fromIntegral (shiftR i 49 .&. 0x7f) :: Word8
j = fromIntegral (shiftR i 56 .&. 0x7f) :: Word8
k = fromIntegral (shiftR i 63 .&. 0x7f) :: Word8
get = do i <- getWord8
(if i <= 0x7f
then return (fromIntegral i)
else do n <- get
return $ (n `shiftL` 7) .|. (fromIntegral (i .&. 0x7f)))
-- Int has the same representation as Word
instance Binary Int where
put i = put (fromIntegral i :: Word)
get = liftM fromIntegral (get :: Get Word)
------------------------------------------------------------------------
--
-- Portable, and pretty efficient, serialisation of Integer
--
-- Fixed-size type for a subset of Integer
type SmallInt = Int32
-- Integers are encoded in two ways: if they fit inside a SmallInt,
-- they're written as a byte tag, and that value. If the Integer value
-- is too large to fit in a SmallInt, it is written as a byte array,
-- along with a sign and length field.
instance Binary Integer where
{-# INLINE put #-}
put n | n >= lo && n <= hi = do
putWord8 0
put (fromIntegral n :: SmallInt) -- fast path
where
lo = fromIntegral (minBound :: SmallInt) :: Integer
hi = fromIntegral (maxBound :: SmallInt) :: Integer
put n = do
putWord8 1
put sign
put (unroll (abs n)) -- unroll the bytes
where
sign = fromIntegral (signum n) :: Word8
{-# INLINE get #-}
get = do
tag <- get :: Get Word8
case tag of
0 -> liftM fromIntegral (get :: Get SmallInt)
_ -> do sign <- get
bytes <- get
let v = roll bytes
return $! if sign == (1 :: Word8) then v else - v
--
-- Fold and unfold an Integer to and from a list of its bytes
--
unroll :: Integer -> [Word8]
unroll = unfoldr step
where
step 0 = Nothing
step i = Just (fromIntegral i, i `shiftR` 8)
roll :: [Word8] -> Integer
roll = foldr unstep 0
where
unstep b a = a `shiftL` 8 .|. fromIntegral b
{-
--
-- An efficient, raw serialisation for Integer (GHC only)
--
-- TODO This instance is not architecture portable. GMP stores numbers as
-- arrays of machine sized words, so the byte format is not portable across
-- architectures with different endianess and word size.
import Data.ByteString.Base (toForeignPtr,unsafePackAddress, memcpy)
import GHC.Base hiding (ord, chr)
import GHC.Prim
import GHC.Ptr (Ptr(..))
import GHC.IOBase (IO(..))
instance Binary Integer where
put (S# i) = putWord8 0 >> put (I# i)
put (J# s ba) = do
putWord8 1
put (I# s)
put (BA ba)
get = do
b <- getWord8
case b of
0 -> do (I# i#) <- get
return (S# i#)
_ -> do (I# s#) <- get
(BA a#) <- get
return (J# s# a#)
instance Binary ByteArray where
-- Pretty safe.
put (BA ba) =
let sz = sizeofByteArray# ba -- (primitive) in *bytes*
addr = byteArrayContents# ba
bs = unsafePackAddress (I# sz) addr
in put bs -- write as a ByteString. easy, yay!
-- Pretty scary. Should be quick though
get = do
(fp, off, n@(I# sz)) <- liftM toForeignPtr get -- so decode a ByteString
assert (off == 0) $ return $ unsafePerformIO $ do
(MBA arr) <- newByteArray sz -- and copy it into a ByteArray#
let to = byteArrayContents# (unsafeCoerce# arr) -- urk, is this safe?
withForeignPtr fp $ \from -> memcpy (Ptr to) from (fromIntegral n)
freezeByteArray arr
-- wrapper for ByteArray#
data ByteArray = BA {-# UNPACK #-} !ByteArray#
data MBA = MBA {-# UNPACK #-} !(MutableByteArray# RealWorld)
newByteArray :: Int# -> IO MBA
newByteArray sz = IO $ \s ->
case newPinnedByteArray# sz s of { (# s', arr #) ->
(# s', MBA arr #) }
freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
freezeByteArray arr = IO $ \s ->
case unsafeFreezeByteArray# arr s of { (# s', arr' #) ->
(# s', BA arr' #) }
-}
instance (Binary a,Integral a) => Binary (R.Ratio a) where
put r = put (R.numerator r) >> put (R.denominator r)
get = liftM2 (R.%) get get
------------------------------------------------------------------------
-- Char is serialised as UTF-8
instance Binary Char where
put a | c <= 0x7f = put (fromIntegral c :: Word8)
| c <= 0x7ff = do put (0xc0 .|. y)
put (0x80 .|. z)
| c <= 0xffff = do put (0xe0 .|. x)
put (0x80 .|. y)
put (0x80 .|. z)
| c <= 0x10ffff = do put (0xf0 .|. w)
put (0x80 .|. x)
put (0x80 .|. y)
put (0x80 .|. z)
| otherwise = error "Not a valid Unicode code point"
where
c = ord a
z, y, x, w :: Word8
z = fromIntegral (c .&. 0x3f)
y = fromIntegral (shiftR c 6 .&. 0x3f)
x = fromIntegral (shiftR c 12 .&. 0x3f)
w = fromIntegral (shiftR c 18 .&. 0x7)
get = do
let getByte = liftM (fromIntegral :: Word8 -> Int) get
shiftL6 = flip shiftL 6 :: Int -> Int
w <- getByte
r <- case () of
_ | w < 0x80 -> return w
| w < 0xe0 -> do
x <- liftM (xor 0x80) getByte
return (x .|. shiftL6 (xor 0xc0 w))
| w < 0xf0 -> do
x <- liftM (xor 0x80) getByte
y <- liftM (xor 0x80) getByte
return (y .|. shiftL6 (x .|. shiftL6
(xor 0xe0 w)))
| otherwise -> do
x <- liftM (xor 0x80) getByte
y <- liftM (xor 0x80) getByte
z <- liftM (xor 0x80) getByte
return (z .|. shiftL6 (y .|. shiftL6
(x .|. shiftL6 (xor 0xf0 w))))
return $! chr r
------------------------------------------------------------------------
-- Instances for the first few tuples
instance (Binary a, Binary b) => Binary (a,b) where
put (a,b) = put a >> put b
get = liftM2 (,) get get
instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
put (a,b,c) = put a >> put b >> put c
get = liftM3 (,,) get get get
instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
put (a,b,c,d) = put a >> put b >> put c >> put d
get = liftM4 (,,,) get get get get
instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where
put (a,b,c,d,e) = put a >> put b >> put c >> put d >> put e
get = liftM5 (,,,,) get get get get get
--
-- and now just recurse:
--
instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f)
=> Binary (a,b,c,d,e,f) where
put (a,b,c,d,e,f) = put (a,(b,c,d,e,f))
get = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g)
=> Binary (a,b,c,d,e,f,g) where
put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
get = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
Binary f, Binary g, Binary h)
=> Binary (a,b,c,d,e,f,g,h) where
put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
get = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h)
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
Binary f, Binary g, Binary h, Binary i)
=> Binary (a,b,c,d,e,f,g,h,i) where
put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
get = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i)
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
Binary f, Binary g, Binary h, Binary i, Binary j)
=> Binary (a,b,c,d,e,f,g,h,i,j) where
put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
get = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j)
------------------------------------------------------------------------
-- Container types
instance Binary a => Binary [a] where
put l = put (length l) >> mapM_ put l
get = do n <- get :: Get Int
xs <- replicateM n get
return xs
instance (Binary a) => Binary (Maybe a) where
put Nothing = putWord8 0
put (Just x) = putWord8 1 >> put x
get = do
w <- getWord8
case w of
0 -> return Nothing
_ -> liftM Just get
instance (Binary a, Binary b) => Binary (Either a b) where
put (Left a) = putWord8 0 >> put a
put (Right b) = putWord8 1 >> put b
get = do
w <- getWord8
case w of
0 -> liftM Left get
_ -> liftM Right get
------------------------------------------------------------------------
-- ByteStrings (have specially efficient instances)
instance Binary B.ByteString where
put bs = do put (B.length bs)
putByteString bs
get = get >>= getByteString
--
-- Using old versions of fps, this is a type synonym, and non portable
--
-- Requires 'flexible instances'
--
instance Binary ByteString where
put bs = do put (fromIntegral (L.length bs) :: Int)
putLazyByteString bs
get = get >>= getLazyByteString
------------------------------------------------------------------------
-- Maps and Sets
instance (Ord a, Binary a) => Binary (Set.Set a) where
put s = put (Set.size s) >> mapM_ put (Set.toAscList s)
get = liftM Set.fromDistinctAscList get
instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where
put m = put (Map.size m) >> mapM_ put (Map.toAscList m)
get = liftM Map.fromDistinctAscList get
instance Binary IntSet.IntSet where
put s = put (IntSet.size s) >> mapM_ put (IntSet.toAscList s)
get = liftM IntSet.fromDistinctAscList get
instance (Binary e) => Binary (IntMap.IntMap e) where
put m = put (IntMap.size m) >> mapM_ put (IntMap.toAscList m)
get = liftM IntMap.fromDistinctAscList get
------------------------------------------------------------------------
-- Queues and Sequences
#if __GLASGOW_HASKELL__ >= 606
--
-- This is valid Hugs, but you need the most recent Hugs
--
instance (Binary e) => Binary (Seq.Seq e) where
-- any better way to do this?
put = put . Fold.toList
get = fmap Seq.fromList get
#endif
------------------------------------------------------------------------
-- Floating point
instance Binary Double where
put d = put (decodeFloat d)
get = liftM2 encodeFloat get get
instance Binary Float where
put f = put (decodeFloat f)
get = liftM2 encodeFloat get get
------------------------------------------------------------------------
-- Trees
instance (Binary e) => Binary (T.Tree e) where
put (T.Node r s) = put r >> put s
get = liftM2 T.Node get get
------------------------------------------------------------------------
-- Arrays
instance (Binary i, Ix i, Binary e) => Binary (Array i e) where
put a = do
put (bounds a)
put (rangeSize $ bounds a) -- write the length
mapM_ put (elems a) -- now the elems.
get = do
bs <- get
n <- get -- read the length
xs <- replicateM n get -- now the elems.
return (listArray bs xs)
--
-- The IArray UArray e constraint is non portable. Requires flexible instances
--
instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where
put a = do
put (bounds a)
put (rangeSize $ bounds a) -- now write the length
mapM_ put (elems a)
get = do
bs <- get
n <- get
xs <- replicateM n get
return (listArray bs xs)

426
src/Data/Binary/Builder.hs Normal file
View File

@@ -0,0 +1,426 @@
{-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fglasgow-exts #-}
-- for unboxed shifts
-----------------------------------------------------------------------------
-- |
-- Module : Data.Binary.Builder
-- Copyright : Lennart Kolmodin, Ross Paterson
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
-- Stability : experimental
-- Portability : portable to Hugs and GHC
--
-- Efficient construction of lazy bytestrings.
--
-----------------------------------------------------------------------------
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
#include "MachDeps.h"
#endif
module Data.Binary.Builder (
-- * The Builder type
Builder
, toLazyByteString
-- * Constructing Builders
, empty
, singleton
, append
, fromByteString -- :: S.ByteString -> Builder
, fromLazyByteString -- :: L.ByteString -> Builder
-- * Flushing the buffer state
, flush
-- * Derived Builders
-- ** Big-endian writes
, putWord16be -- :: Word16 -> Builder
, putWord32be -- :: Word32 -> Builder
, putWord64be -- :: Word64 -> Builder
-- ** Little-endian writes
, putWord16le -- :: Word16 -> Builder
, putWord32le -- :: Word32 -> Builder
, putWord64le -- :: Word64 -> Builder
-- ** Host-endian, unaligned writes
, putWordhost -- :: Word -> Builder
, putWord16host -- :: Word16 -> Builder
, putWord32host -- :: Word32 -> Builder
, putWord64host -- :: Word64 -> Builder
) where
import Foreign
import Data.Monoid
import Data.Word
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
#ifdef BYTESTRING_IN_BASE
import Data.ByteString.Base (inlinePerformIO)
import qualified Data.ByteString.Base as S
#else
import Data.ByteString.Internal (inlinePerformIO)
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Lazy.Internal as L
#endif
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
import GHC.Base
import GHC.Word (Word32(..),Word16(..),Word64(..))
#if WORD_SIZE_IN_BITS < 64 && __GLASGOW_HASKELL__ >= 608
import GHC.Word (uncheckedShiftRL64#)
#endif
#endif
------------------------------------------------------------------------
-- | A 'Builder' is an efficient way to build lazy 'L.ByteString's.
-- There are several functions for constructing 'Builder's, but only one
-- to inspect them: to extract any data, you have to turn them into lazy
-- 'L.ByteString's using 'toLazyByteString'.
--
-- Internally, a 'Builder' constructs a lazy 'L.Bytestring' by filling byte
-- arrays piece by piece. As each buffer is filled, it is \'popped\'
-- off, to become a new chunk of the resulting lazy 'L.ByteString'.
-- All this is hidden from the user of the 'Builder'.
newtype Builder = Builder {
-- Invariant (from Data.ByteString.Lazy):
-- The lists include no null ByteStrings.
runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString]
}
instance Monoid Builder where
mempty = empty
{-# INLINE mempty #-}
mappend = append
{-# INLINE mappend #-}
------------------------------------------------------------------------
-- | /O(1)./ The empty Builder, satisfying
--
-- * @'toLazyByteString' 'empty' = 'L.empty'@
--
empty :: Builder
empty = Builder id
{-# INLINE empty #-}
-- | /O(1)./ A Builder taking a single byte, satisfying
--
-- * @'toLazyByteString' ('singleton' b) = 'L.singleton' b@
--
singleton :: Word8 -> Builder
singleton = writeN 1 . flip poke
{-# INLINE singleton #-}
------------------------------------------------------------------------
-- | /O(1)./ The concatenation of two Builders, an associative operation
-- with identity 'empty', satisfying
--
-- * @'toLazyByteString' ('append' x y) = 'L.append' ('toLazyByteString' x) ('toLazyByteString' y)@
--
append :: Builder -> Builder -> Builder
append (Builder f) (Builder g) = Builder (f . g)
{-# INLINE append #-}
-- | /O(1)./ A Builder taking a 'S.ByteString', satisfying
--
-- * @'toLazyByteString' ('fromByteString' bs) = 'L.fromChunks' [bs]@
--
fromByteString :: S.ByteString -> Builder
fromByteString bs
| S.null bs = empty
| otherwise = flush `append` mapBuilder (bs :)
{-# INLINE fromByteString #-}
-- | /O(1)./ A Builder taking a lazy 'L.ByteString', satisfying
--
-- * @'toLazyByteString' ('fromLazyByteString' bs) = bs@
--
fromLazyByteString :: L.ByteString -> Builder
fromLazyByteString bss = flush `append` mapBuilder (L.toChunks bss ++)
{-# INLINE fromLazyByteString #-}
------------------------------------------------------------------------
-- Our internal buffer type
data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8)
{-# UNPACK #-} !Int -- offset
{-# UNPACK #-} !Int -- used bytes
{-# UNPACK #-} !Int -- length left
------------------------------------------------------------------------
-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'.
-- The construction work takes place if and when the relevant part of
-- the lazy 'L.ByteString' is demanded.
--
toLazyByteString :: Builder -> L.ByteString
toLazyByteString m = L.fromChunks $ unsafePerformIO $ do
buf <- newBuffer defaultSize
return (runBuilder (m `append` flush) (const []) buf)
-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any,
-- yielding a new chunk in the result lazy 'L.ByteString'.
flush :: Builder
flush = Builder $ \ k buf@(Buffer p o u l) ->
if u == 0
then k buf
else S.PS p o u : k (Buffer p (o+u) 0 l)
------------------------------------------------------------------------
--
-- copied from Data.ByteString.Lazy
--
defaultSize :: Int
defaultSize = 32 * k - overhead
where k = 1024
overhead = 2 * sizeOf (undefined :: Int)
------------------------------------------------------------------------
-- | Sequence an IO operation on the buffer
unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder
unsafeLiftIO f = Builder $ \ k buf -> inlinePerformIO $ do
buf' <- f buf
return (k buf')
{-# INLINE unsafeLiftIO #-}
-- | Get the size of the buffer
withSize :: (Int -> Builder) -> Builder
withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
runBuilder (f l) k buf
-- | Map the resulting list of bytestrings.
mapBuilder :: ([S.ByteString] -> [S.ByteString]) -> Builder
mapBuilder f = Builder (f .)
------------------------------------------------------------------------
-- | Ensure that there are at least @n@ many bytes available.
ensureFree :: Int -> Builder
ensureFree n = n `seq` withSize $ \ l ->
if n <= l then empty else
flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize)))
{-# INLINE ensureFree #-}
-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
-- bytes into the memory.
writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder
writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f)
{-# INLINE writeN #-}
writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer
writeNBuffer n f (Buffer fp o u l) = do
withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
return (Buffer fp o (u+n) (l-n))
{-# INLINE writeNBuffer #-}
newBuffer :: Int -> IO Buffer
newBuffer size = do
fp <- S.mallocByteString size
return $! Buffer fp 0 0 size
{-# INLINE newBuffer #-}
------------------------------------------------------------------------
-- Aligned, host order writes of storable values
-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
-- storable values into the memory.
writeNbytes :: Storable a => Int -> (Ptr a -> IO ()) -> Builder
writeNbytes n f = ensureFree n `append` unsafeLiftIO (writeNBufferBytes n f)
{-# INLINE writeNbytes #-}
writeNBufferBytes :: Storable a => Int -> (Ptr a -> IO ()) -> Buffer -> IO Buffer
writeNBufferBytes n f (Buffer fp o u l) = do
withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
return (Buffer fp o (u+n) (l-n))
{-# INLINE writeNBufferBytes #-}
------------------------------------------------------------------------
--
-- We rely on the fromIntegral to do the right masking for us.
-- The inlining here is critical, and can be worth 4x performance
--
-- | Write a Word16 in big endian format
putWord16be :: Word16 -> Builder
putWord16be w = writeN 2 $ \p -> do
poke p (fromIntegral (shiftr_w16 w 8) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (w) :: Word8)
{-# INLINE putWord16be #-}
-- | Write a Word16 in little endian format
putWord16le :: Word16 -> Builder
putWord16le w = writeN 2 $ \p -> do
poke p (fromIntegral (w) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w16 w 8) :: Word8)
{-# INLINE putWord16le #-}
-- putWord16le w16 = writeN 2 (\p -> poke (castPtr p) w16)
-- | Write a Word32 in big endian format
putWord32be :: Word32 -> Builder
putWord32be w = writeN 4 $ \p -> do
poke p (fromIntegral (shiftr_w32 w 24) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 16) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 8) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (w) :: Word8)
{-# INLINE putWord32be #-}
--
-- a data type to tag Put/Check. writes construct these which are then
-- inlined and flattened. matching Checks will be more robust with rules.
--
-- | Write a Word32 in little endian format
putWord32le :: Word32 -> Builder
putWord32le w = writeN 4 $ \p -> do
poke p (fromIntegral (w) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 8) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 16) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 w 24) :: Word8)
{-# INLINE putWord32le #-}
-- on a little endian machine:
-- putWord32le w32 = writeN 4 (\p -> poke (castPtr p) w32)
-- | Write a Word64 in big endian format
putWord64be :: Word64 -> Builder
#if WORD_SIZE_IN_BITS < 64
--
-- To avoid expensive 64 bit shifts on 32 bit machines, we cast to
-- Word32, and write that
--
putWord64be w =
let a = fromIntegral (shiftr_w64 w 32) :: Word32
b = fromIntegral w :: Word32
in writeN 8 $ \p -> do
poke p (fromIntegral (shiftr_w32 a 24) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 16) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 8) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (a) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (shiftr_w32 b 24) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 16) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 8) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (b) :: Word8)
#else
putWord64be w = writeN 8 $ \p -> do
poke p (fromIntegral (shiftr_w64 w 56) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 48) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 40) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 32) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 24) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 16) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 8) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (w) :: Word8)
#endif
{-# INLINE putWord64be #-}
-- | Write a Word64 in little endian format
putWord64le :: Word64 -> Builder
#if WORD_SIZE_IN_BITS < 64
putWord64le w =
let b = fromIntegral (shiftr_w64 w 32) :: Word32
a = fromIntegral w :: Word32
in writeN 8 $ \p -> do
poke (p) (fromIntegral (a) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 8) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 16) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 a 24) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (b) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 8) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 16) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (shiftr_w32 b 24) :: Word8)
#else
putWord64le w = writeN 8 $ \p -> do
poke p (fromIntegral (w) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 8) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 16) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 24) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 32) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 40) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 48) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (shiftr_w64 w 56) :: Word8)
#endif
{-# INLINE putWord64le #-}
-- on a little endian machine:
-- putWord64le w64 = writeN 8 (\p -> poke (castPtr p) w64)
------------------------------------------------------------------------
-- Unaligned, word size ops
-- | /O(1)./ A Builder taking a single native machine word. The word is
-- written in host order, host endian form, for the machine you're on.
-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
-- 4 bytes. Values written this way are not portable to
-- different endian or word sized machines, without conversion.
--
putWordhost :: Word -> Builder
putWordhost w = writeNbytes (sizeOf (undefined :: Word)) (\p -> poke p w)
{-# INLINE putWordhost #-}
-- | Write a Word16 in native host order and host endianness.
-- 2 bytes will be written, unaligned.
putWord16host :: Word16 -> Builder
putWord16host w16 = writeNbytes (sizeOf (undefined :: Word16)) (\p -> poke p w16)
{-# INLINE putWord16host #-}
-- | Write a Word32 in native host order and host endianness.
-- 4 bytes will be written, unaligned.
putWord32host :: Word32 -> Builder
putWord32host w32 = writeNbytes (sizeOf (undefined :: Word32)) (\p -> poke p w32)
{-# INLINE putWord32host #-}
-- | Write a Word64 in native host order.
-- On a 32 bit machine we write two host order Word32s, in big endian form.
-- 8 bytes will be written, unaligned.
putWord64host :: Word64 -> Builder
putWord64host w = writeNbytes (sizeOf (undefined :: Word64)) (\p -> poke p w)
{-# INLINE putWord64host #-}
------------------------------------------------------------------------
-- Unchecked shifts
{-# INLINE shiftr_w16 #-}
shiftr_w16 :: Word16 -> Int -> Word16
{-# INLINE shiftr_w32 #-}
shiftr_w32 :: Word32 -> Int -> Word32
{-# INLINE shiftr_w64 #-}
shiftr_w64 :: Word64 -> Int -> Word64
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
shiftr_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftRL#` i)
shiftr_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftRL#` i)
#if WORD_SIZE_IN_BITS < 64
shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL64#` i)
#if __GLASGOW_HASKELL__ <= 606
-- Exported by GHC.Word in GHC 6.8 and higher
foreign import ccall unsafe "stg_uncheckedShiftRL64"
uncheckedShiftRL64# :: Word64# -> Int# -> Word64#
#endif
#else
shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL#` i)
#endif
#else
shiftr_w16 = shiftR
shiftr_w32 = shiftR
shiftr_w64 = shiftR
#endif

539
src/Data/Binary/Get.hs Normal file
View File

@@ -0,0 +1,539 @@
{-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fglasgow-exts #-}
-- for unboxed shifts
-----------------------------------------------------------------------------
-- |
-- Module : Data.Binary.Get
-- Copyright : Lennart Kolmodin
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
-- Stability : experimental
-- Portability : portable to Hugs and GHC.
--
-- The Get monad. A monad for efficiently building structures from
-- encoded lazy ByteStrings
--
-----------------------------------------------------------------------------
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
#include "MachDeps.h"
#endif
module Data.Binary.Get (
-- * The Get type
Get
, runGet
, runGetState
-- * Parsing
, skip
, uncheckedSkip
, lookAhead
, lookAheadM
, lookAheadE
, uncheckedLookAhead
-- * Utility
, bytesRead
, getBytes
, remaining
, isEmpty
-- * Parsing particular types
, getWord8
-- ** ByteStrings
, getByteString
, getLazyByteString
, getLazyByteStringNul
, getRemainingLazyByteString
-- ** Big-endian reads
, getWord16be
, getWord32be
, getWord64be
-- ** Little-endian reads
, getWord16le
, getWord32le
, getWord64le
-- ** Host-endian, unaligned reads
, getWordhost
, getWord16host
, getWord32host
, getWord64host
) where
import Control.Monad (when,liftM,ap)
import Control.Monad.Fix
import Data.Maybe (isNothing)
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
#ifdef BYTESTRING_IN_BASE
import qualified Data.ByteString.Base as B
#else
import qualified Data.ByteString.Internal as B
import qualified Data.ByteString.Lazy.Internal as L
#endif
#ifdef APPLICATIVE_IN_BASE
import Control.Applicative (Applicative(..))
#endif
import Foreign
-- used by splitAtST
import Control.Monad.ST
import Data.STRef
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
import GHC.Base
import GHC.Word
import GHC.Int
#endif
-- | The parse state
data S = S {-# UNPACK #-} !B.ByteString -- current chunk
L.ByteString -- the rest of the input
{-# UNPACK #-} !Int64 -- bytes read
-- | The Get monad is just a State monad carrying around the input ByteString
newtype Get a = Get { unGet :: S -> (a, S) }
instance Functor Get where
fmap f m = Get (\s -> let (a, s') = unGet m s
in (f a, s'))
{-# INLINE fmap #-}
#ifdef APPLICATIVE_IN_BASE
instance Applicative Get where
pure = return
(<*>) = ap
#endif
instance Monad Get where
return a = Get (\s -> (a, s))
{-# INLINE return #-}
m >>= k = Get (\s -> case unGet m s of
(a, s') -> unGet (k a) s')
{-# INLINE (>>=) #-}
fail = failDesc
instance MonadFix Get where
mfix f = Get (\s -> let (a,s') = unGet (f a) s
in (a,s'))
------------------------------------------------------------------------
get :: Get S
get = Get (\s -> (s, s))
put :: S -> Get ()
put s = Get (\_ -> ((), s))
------------------------------------------------------------------------
initState :: L.ByteString -> S
initState xs = mkState xs 0
{-# INLINE initState #-}
{-
initState (B.LPS xs) =
case xs of
[] -> S B.empty L.empty 0
(x:xs') -> S x (B.LPS xs') 0
-}
#ifndef BYTESTRING_IN_BASE
mkState :: L.ByteString -> Int64 -> S
mkState l = case l of
L.Empty -> S B.empty L.empty
L.Chunk x xs -> S x xs
{-# INLINE mkState #-}
#else
mkState :: L.ByteString -> Int64 -> S
mkState (B.LPS xs) =
case xs of
[] -> S B.empty L.empty
(x:xs') -> S x (B.LPS xs')
#endif
-- | Run the Get monad applies a 'get'-based parser on the input ByteString
runGet :: Get a -> L.ByteString -> a
runGet m str = case unGet m (initState str) of (a, _) -> a
-- | Run the Get monad applies a 'get'-based parser on the input
-- ByteString. Additional to the result of get it returns the number of
-- consumed bytes and the rest of the input.
runGetState :: Get a -> L.ByteString -> Int64 -> (a, L.ByteString, Int64)
runGetState m str off =
case unGet m (mkState str off) of
(a, ~(S s ss newOff)) -> (a, s `join` ss, newOff)
------------------------------------------------------------------------
failDesc :: String -> Get a
failDesc err = do
S _ _ bytes <- get
Get (error (err ++ ". Failed reading at byte position " ++ show bytes))
-- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available.
skip :: Int -> Get ()
skip n = readN (fromIntegral n) (const ())
-- | Skip ahead @n@ bytes. No error if there isn't enough bytes.
uncheckedSkip :: Int64 -> Get ()
uncheckedSkip n = do
S s ss bytes <- get
if fromIntegral (B.length s) >= n
then put (S (B.drop (fromIntegral n) s) ss (bytes + n))
else do
let rest = L.drop (n - fromIntegral (B.length s)) ss
put $! mkState rest (bytes + n)
-- | Run @ga@, but return without consuming its input.
-- Fails if @ga@ fails.
lookAhead :: Get a -> Get a
lookAhead ga = do
s <- get
a <- ga
put s
return a
-- | Like 'lookAhead', but consume the input if @gma@ returns 'Just _'.
-- Fails if @gma@ fails.
lookAheadM :: Get (Maybe a) -> Get (Maybe a)
lookAheadM gma = do
s <- get
ma <- gma
when (isNothing ma) $
put s
return ma
-- | Like 'lookAhead', but consume the input if @gea@ returns 'Right _'.
-- Fails if @gea@ fails.
lookAheadE :: Get (Either a b) -> Get (Either a b)
lookAheadE gea = do
s <- get
ea <- gea
case ea of
Left _ -> put s
_ -> return ()
return ea
-- | Get the next up to @n@ bytes as a lazy ByteString, without consuming them.
uncheckedLookAhead :: Int64 -> Get L.ByteString
uncheckedLookAhead n = do
S s ss _ <- get
if n <= fromIntegral (B.length s)
then return (L.fromChunks [B.take (fromIntegral n) s])
else return $ L.take n (s `join` ss)
------------------------------------------------------------------------
-- Utility
-- | Get the total number of bytes read to this point.
bytesRead :: Get Int64
bytesRead = do
S _ _ b <- get
return b
-- | Get the number of remaining unparsed bytes.
-- Useful for checking whether all input has been consumed.
-- Note that this forces the rest of the input.
remaining :: Get Int64
remaining = do
S s ss _ <- get
return (fromIntegral (B.length s) + L.length ss)
-- | Test whether all input has been consumed,
-- i.e. there are no remaining unparsed bytes.
isEmpty :: Get Bool
isEmpty = do
S s ss _ <- get
return (B.null s && L.null ss)
------------------------------------------------------------------------
-- Utility with ByteStrings
-- | An efficient 'get' method for strict ByteStrings. Fails if fewer
-- than @n@ bytes are left in the input.
getByteString :: Int -> Get B.ByteString
getByteString n = readN n id
{-# INLINE getByteString #-}
-- | An efficient 'get' method for lazy ByteStrings. Does not fail if fewer than
-- @n@ bytes are left in the input.
getLazyByteString :: Int64 -> Get L.ByteString
getLazyByteString n = do
S s ss bytes <- get
let big = s `join` ss
case splitAtST n big of
(consume, rest) -> do put $ mkState rest (bytes + n)
return consume
{-# INLINE getLazyByteString #-}
-- | Get a lazy ByteString that is terminated with a NUL byte. Fails
-- if it reaches the end of input without hitting a NUL.
getLazyByteStringNul :: Get L.ByteString
getLazyByteStringNul = do
S s ss bytes <- get
let big = s `join` ss
(consume, t) = L.break (== 0) big
(h, rest) = L.splitAt 1 t
if L.null h
then fail "too few bytes"
else do
put $ mkState rest (bytes + L.length consume + 1)
return consume
{-# INLINE getLazyByteStringNul #-}
-- | Get the remaining bytes as a lazy ByteString
getRemainingLazyByteString :: Get L.ByteString
getRemainingLazyByteString = do
S s ss _ <- get
return (s `join` ss)
------------------------------------------------------------------------
-- Helpers
-- | Pull @n@ bytes from the input, as a strict ByteString.
getBytes :: Int -> Get B.ByteString
getBytes n = do
S s ss bytes <- get
if n <= B.length s
then do let (consume,rest) = B.splitAt n s
put $! S rest ss (bytes + fromIntegral n)
return $! consume
else
case L.splitAt (fromIntegral n) (s `join` ss) of
(consuming, rest) ->
do let now = B.concat . L.toChunks $ consuming
put $! mkState rest (bytes + fromIntegral n)
-- forces the next chunk before this one is returned
if (B.length now < n)
then
fail "too few bytes"
else
return now
{-# INLINE getBytes #-}
-- ^ important
#ifndef BYTESTRING_IN_BASE
join :: B.ByteString -> L.ByteString -> L.ByteString
join bb lb
| B.null bb = lb
| otherwise = L.Chunk bb lb
#else
join :: B.ByteString -> L.ByteString -> L.ByteString
join bb (B.LPS lb)
| B.null bb = B.LPS lb
| otherwise = B.LPS (bb:lb)
#endif
-- don't use L.append, it's strict in it's second argument :/
{-# INLINE join #-}
-- | Split a ByteString. If the first result is consumed before the --
-- second, this runs in constant heap space.
--
-- You must force the returned tuple for that to work, e.g.
--
-- > case splitAtST n xs of
-- > (ys,zs) -> consume ys ... consume zs
--
splitAtST :: Int64 -> L.ByteString -> (L.ByteString, L.ByteString)
splitAtST i ps | i <= 0 = (L.empty, ps)
#ifndef BYTESTRING_IN_BASE
splitAtST i ps = runST (
do r <- newSTRef undefined
xs <- first r i ps
ys <- unsafeInterleaveST (readSTRef r)
return (xs, ys))
where
first r 0 xs@(L.Chunk _ _) = writeSTRef r xs >> return L.Empty
first r _ L.Empty = writeSTRef r L.Empty >> return L.Empty
first r n (L.Chunk x xs)
| n < l = do writeSTRef r (L.Chunk (B.drop (fromIntegral n) x) xs)
return $ L.Chunk (B.take (fromIntegral n) x) L.Empty
| otherwise = do writeSTRef r (L.drop (n - l) xs)
liftM (L.Chunk x) $ unsafeInterleaveST (first r (n - l) xs)
where l = fromIntegral (B.length x)
#else
splitAtST i (B.LPS ps) = runST (
do r <- newSTRef undefined
xs <- first r i ps
ys <- unsafeInterleaveST (readSTRef r)
return (B.LPS xs, B.LPS ys))
where first r 0 xs = writeSTRef r xs >> return []
first r _ [] = writeSTRef r [] >> return []
first r n (x:xs)
| n < l = do writeSTRef r (B.drop (fromIntegral n) x : xs)
return [B.take (fromIntegral n) x]
| otherwise = do writeSTRef r (L.toChunks (L.drop (n - l) (B.LPS xs)))
fmap (x:) $ unsafeInterleaveST (first r (n - l) xs)
where l = fromIntegral (B.length x)
#endif
{-# INLINE splitAtST #-}
-- Pull n bytes from the input, and apply a parser to those bytes,
-- yielding a value. If less than @n@ bytes are available, fail with an
-- error. This wraps @getBytes@.
readN :: Int -> (B.ByteString -> a) -> Get a
readN n f = fmap f $ getBytes n
{-# INLINE readN #-}
-- ^ important
------------------------------------------------------------------------
-- Primtives
-- helper, get a raw Ptr onto a strict ByteString copied out of the
-- underlying lazy byteString. So many indirections from the raw parser
-- state that my head hurts...
getPtr :: Storable a => Int -> Get a
getPtr n = do
(fp,o,_) <- readN n B.toForeignPtr
return . B.inlinePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
{-# INLINE getPtr #-}
------------------------------------------------------------------------
-- | Read a Word8 from the monad state
getWord8 :: Get Word8
getWord8 = getPtr (sizeOf (undefined :: Word8))
{-# INLINE getWord8 #-}
-- | Read a Word16 in big endian format
getWord16be :: Get Word16
getWord16be = do
s <- readN 2 id
return $! (fromIntegral (s `B.index` 0) `shiftl_w16` 8) .|.
(fromIntegral (s `B.index` 1))
{-# INLINE getWord16be #-}
-- | Read a Word16 in little endian format
getWord16le :: Get Word16
getWord16le = do
s <- readN 2 id
return $! (fromIntegral (s `B.index` 1) `shiftl_w16` 8) .|.
(fromIntegral (s `B.index` 0) )
{-# INLINE getWord16le #-}
-- | Read a Word32 in big endian format
getWord32be :: Get Word32
getWord32be = do
s <- readN 4 id
return $! (fromIntegral (s `B.index` 0) `shiftl_w32` 24) .|.
(fromIntegral (s `B.index` 1) `shiftl_w32` 16) .|.
(fromIntegral (s `B.index` 2) `shiftl_w32` 8) .|.
(fromIntegral (s `B.index` 3) )
{-# INLINE getWord32be #-}
-- | Read a Word32 in little endian format
getWord32le :: Get Word32
getWord32le = do
s <- readN 4 id
return $! (fromIntegral (s `B.index` 3) `shiftl_w32` 24) .|.
(fromIntegral (s `B.index` 2) `shiftl_w32` 16) .|.
(fromIntegral (s `B.index` 1) `shiftl_w32` 8) .|.
(fromIntegral (s `B.index` 0) )
{-# INLINE getWord32le #-}
-- | Read a Word64 in big endian format
getWord64be :: Get Word64
getWord64be = do
s <- readN 8 id
return $! (fromIntegral (s `B.index` 0) `shiftl_w64` 56) .|.
(fromIntegral (s `B.index` 1) `shiftl_w64` 48) .|.
(fromIntegral (s `B.index` 2) `shiftl_w64` 40) .|.
(fromIntegral (s `B.index` 3) `shiftl_w64` 32) .|.
(fromIntegral (s `B.index` 4) `shiftl_w64` 24) .|.
(fromIntegral (s `B.index` 5) `shiftl_w64` 16) .|.
(fromIntegral (s `B.index` 6) `shiftl_w64` 8) .|.
(fromIntegral (s `B.index` 7) )
{-# INLINE getWord64be #-}
-- | Read a Word64 in little endian format
getWord64le :: Get Word64
getWord64le = do
s <- readN 8 id
return $! (fromIntegral (s `B.index` 7) `shiftl_w64` 56) .|.
(fromIntegral (s `B.index` 6) `shiftl_w64` 48) .|.
(fromIntegral (s `B.index` 5) `shiftl_w64` 40) .|.
(fromIntegral (s `B.index` 4) `shiftl_w64` 32) .|.
(fromIntegral (s `B.index` 3) `shiftl_w64` 24) .|.
(fromIntegral (s `B.index` 2) `shiftl_w64` 16) .|.
(fromIntegral (s `B.index` 1) `shiftl_w64` 8) .|.
(fromIntegral (s `B.index` 0) )
{-# INLINE getWord64le #-}
------------------------------------------------------------------------
-- Host-endian reads
-- | /O(1)./ Read a single native machine word. The word is read in
-- host order, host endian form, for the machine you're on. On a 64 bit
-- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes.
getWordhost :: Get Word
getWordhost = getPtr (sizeOf (undefined :: Word))
{-# INLINE getWordhost #-}
-- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness.
getWord16host :: Get Word16
getWord16host = getPtr (sizeOf (undefined :: Word16))
{-# INLINE getWord16host #-}
-- | /O(1)./ Read a Word32 in native host order and host endianness.
getWord32host :: Get Word32
getWord32host = getPtr (sizeOf (undefined :: Word32))
{-# INLINE getWord32host #-}
-- | /O(1)./ Read a Word64 in native host order and host endianess.
getWord64host :: Get Word64
getWord64host = getPtr (sizeOf (undefined :: Word64))
{-# INLINE getWord64host #-}
------------------------------------------------------------------------
-- Unchecked shifts
shiftl_w16 :: Word16 -> Int -> Word16
shiftl_w32 :: Word32 -> Int -> Word32
shiftl_w64 :: Word64 -> Int -> Word64
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#` i)
shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#` i)
#if WORD_SIZE_IN_BITS < 64
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
#if __GLASGOW_HASKELL__ <= 606
-- Exported by GHC.Word in GHC 6.8 and higher
foreign import ccall unsafe "stg_uncheckedShiftL64"
uncheckedShiftL64# :: Word64# -> Int# -> Word64#
#endif
#else
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i)
#endif
#else
shiftl_w16 = shiftL
shiftl_w32 = shiftL
shiftl_w64 = shiftL
#endif

199
src/Data/Binary/Put.hs Normal file
View File

@@ -0,0 +1,199 @@
{-# LANGUAGE CPP #-}
-----------------------------------------------------------------------------
-- |
-- Module : Data.Binary.Put
-- Copyright : Lennart Kolmodin
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
-- Stability : stable
-- Portability : Portable to Hugs and GHC. Requires MPTCs
--
-- The Put monad. A monad for efficiently constructing lazy bytestrings.
--
-----------------------------------------------------------------------------
module Data.Binary.Put (
-- * The Put type
Put
, PutM(..)
, runPut
-- * Flushing the implicit parse state
, flush
-- * Primitives
, putWord8
, putByteString
, putLazyByteString
-- * Big-endian primitives
, putWord16be
, putWord32be
, putWord64be
-- * Little-endian primitives
, putWord16le
, putWord32le
, putWord64le
-- * Host-endian, unaligned writes
, putWordhost -- :: Word -> Put
, putWord16host -- :: Word16 -> Put
, putWord32host -- :: Word32 -> Put
, putWord64host -- :: Word64 -> Put
) where
import Data.Monoid
import Data.Binary.Builder (Builder, toLazyByteString)
import qualified Data.Binary.Builder as B
import Data.Word
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
#ifdef APPLICATIVE_IN_BASE
import Control.Applicative
#endif
------------------------------------------------------------------------
-- XXX Strict in buffer only.
data PairS a = PairS a {-# UNPACK #-}!Builder
sndS :: PairS a -> Builder
sndS (PairS _ b) = b
-- | The PutM type. A Writer monad over the efficient Builder monoid.
newtype PutM a = Put { unPut :: PairS a }
-- | Put merely lifts Builder into a Writer monad, applied to ().
type Put = PutM ()
instance Functor PutM where
fmap f m = Put $ let PairS a w = unPut m in PairS (f a) w
{-# INLINE fmap #-}
#ifdef APPLICATIVE_IN_BASE
instance Applicative PutM where
pure = return
m <*> k = Put $
let PairS f w = unPut m
PairS x w' = unPut k
in PairS (f x) (w `mappend` w')
#endif
-- Standard Writer monad, with aggressive inlining
instance Monad PutM where
return a = Put $ PairS a mempty
{-# INLINE return #-}
m >>= k = Put $
let PairS a w = unPut m
PairS b w' = unPut (k a)
in PairS b (w `mappend` w')
{-# INLINE (>>=) #-}
m >> k = Put $
let PairS _ w = unPut m
PairS b w' = unPut k
in PairS b (w `mappend` w')
{-# INLINE (>>) #-}
tell :: Builder -> Put
tell b = Put $ PairS () b
{-# INLINE tell #-}
-- | Run the 'Put' monad with a serialiser
runPut :: Put -> L.ByteString
runPut = toLazyByteString . sndS . unPut
{-# INLINE runPut #-}
------------------------------------------------------------------------
-- | Pop the ByteString we have constructed so far, if any, yielding a
-- new chunk in the result ByteString.
flush :: Put
flush = tell B.flush
{-# INLINE flush #-}
-- | Efficiently write a byte into the output buffer
putWord8 :: Word8 -> Put
putWord8 = tell . B.singleton
{-# INLINE putWord8 #-}
-- | An efficient primitive to write a strict ByteString into the output buffer.
-- It flushes the current buffer, and writes the argument into a new chunk.
putByteString :: S.ByteString -> Put
putByteString = tell . B.fromByteString
{-# INLINE putByteString #-}
-- | Write a lazy ByteString efficiently, simply appending the lazy
-- ByteString chunks to the output buffer
putLazyByteString :: L.ByteString -> Put
putLazyByteString = tell . B.fromLazyByteString
{-# INLINE putLazyByteString #-}
-- | Write a Word16 in big endian format
putWord16be :: Word16 -> Put
putWord16be = tell . B.putWord16be
{-# INLINE putWord16be #-}
-- | Write a Word16 in little endian format
putWord16le :: Word16 -> Put
putWord16le = tell . B.putWord16le
{-# INLINE putWord16le #-}
-- | Write a Word32 in big endian format
putWord32be :: Word32 -> Put
putWord32be = tell . B.putWord32be
{-# INLINE putWord32be #-}
-- | Write a Word32 in little endian format
putWord32le :: Word32 -> Put
putWord32le = tell . B.putWord32le
{-# INLINE putWord32le #-}
-- | Write a Word64 in big endian format
putWord64be :: Word64 -> Put
putWord64be = tell . B.putWord64be
{-# INLINE putWord64be #-}
-- | Write a Word64 in little endian format
putWord64le :: Word64 -> Put
putWord64le = tell . B.putWord64le
{-# INLINE putWord64le #-}
------------------------------------------------------------------------
-- | /O(1)./ Write a single native machine word. The word is
-- written in host order, host endian form, for the machine you're on.
-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
-- 4 bytes. Values written this way are not portable to
-- different endian or word sized machines, without conversion.
--
putWordhost :: Word -> Put
putWordhost = tell . B.putWordhost
{-# INLINE putWordhost #-}
-- | /O(1)./ Write a Word16 in native host order and host endianness.
-- For portability issues see @putWordhost@.
putWord16host :: Word16 -> Put
putWord16host = tell . B.putWord16host
{-# INLINE putWord16host #-}
-- | /O(1)./ Write a Word32 in native host order and host endianness.
-- For portability issues see @putWordhost@.
putWord32host :: Word32 -> Put
putWord32host = tell . B.putWord32host
{-# INLINE putWord32host #-}
-- | /O(1)./ Write a Word64 in native host order
-- On a 32 bit machine we write two host order Word32s, in big endian form.
-- For portability issues see @putWordhost@.
putWord64host :: Word64 -> Put
putWord64host = tell . B.putWord64host
{-# INLINE putWord64host #-}

View File

@@ -2,8 +2,6 @@ module GF.Compile.Export where
import PGF.CId import PGF.CId
import PGF.Data (PGF(..)) import PGF.Data (PGF(..))
import PGF.Raw.Print (printTree)
import PGF.Raw.Convert (fromPGF)
import GF.Compile.GFCCtoHaskell import GF.Compile.GFCCtoHaskell
import GF.Compile.GFCCtoProlog import GF.Compile.GFCCtoProlog
import GF.Compile.GFCCtoJS import GF.Compile.GFCCtoJS
@@ -32,7 +30,6 @@ exportPGF :: Options
-> [(FilePath,String)] -- ^ List of recommended file names and contents. -> [(FilePath,String)] -- ^ List of recommended file names and contents.
exportPGF opts fmt pgf = exportPGF opts fmt pgf =
case fmt of case fmt of
FmtPGF -> multi "pgf" printPGF
FmtPGFPretty -> multi "txt" prPGFPretty FmtPGFPretty -> multi "txt" prPGFPretty
FmtJavaScript -> multi "js" pgf2js FmtJavaScript -> multi "js" pgf2js
FmtHaskell -> multi "hs" (grammar2haskell opts name) FmtHaskell -> multi "hs" (grammar2haskell opts name)
@@ -65,7 +62,3 @@ outputConcr :: PGF -> CId
outputConcr pgf = case cncnames pgf of outputConcr pgf = case cncnames pgf of
[] -> error "No concrete syntax." [] -> error "No concrete syntax."
cnc:_ -> cnc cnc:_ -> cnc
printPGF :: PGF -> String
printPGF = encodeUTF8 . printTree . fromPGF

View File

@@ -1,5 +1,5 @@
{-# LANGUAGE PatternGuards #-} {-# LANGUAGE PatternGuards #-}
module GF.Compile.GrammarToGFCC (prGrammar2gfcc,mkCanon2gfcc,addParsers) where module GF.Compile.GrammarToGFCC (mkCanon2gfcc,addParsers) where
import GF.Compile.Export import GF.Compile.Export
import GF.Compile.OptimizeGF (unshareModule) import GF.Compile.OptimizeGF (unshareModule)
@@ -37,11 +37,6 @@ traceD s t = t
-- the main function: generate PGF from GF. -- the main function: generate PGF from GF.
prGrammar2gfcc :: Options -> String -> SourceGrammar -> (String,String)
prGrammar2gfcc opts cnc gr = (abs,printPGF gc) where
(abs,gc) = mkCanon2gfcc opts cnc gr
mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,D.PGF) mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,D.PGF)
mkCanon2gfcc opts cnc gr = mkCanon2gfcc opts cnc gr =
(prIdent abs, (canon2gfcc opts pars . reorder abs . canon2canon abs) gr) (prIdent abs, (canon2gfcc opts pars . reorder abs . canon2canon abs) gr)

View File

@@ -80,8 +80,7 @@ data Phase = Preproc | Convert | Compile | Link
data Encoding = UTF_8 | ISO_8859_1 | CP_1251 data Encoding = UTF_8 | ISO_8859_1 | CP_1251
deriving (Eq,Ord) deriving (Eq,Ord)
data OutputFormat = FmtPGF data OutputFormat = FmtPGFPretty
| FmtPGFPretty
| FmtJavaScript | FmtJavaScript
| FmtHaskell | FmtHaskell
| FmtProlog | FmtProlog
@@ -239,7 +238,7 @@ defaultFlags = Flags {
optShowCPUTime = False, optShowCPUTime = False,
optEmitGFO = True, optEmitGFO = True,
optGFODir = ".", optGFODir = ".",
optOutputFormats = [FmtPGF], optOutputFormats = [],
optSISR = Nothing, optSISR = Nothing,
optHaskellOptions = Set.empty, optHaskellOptions = Set.empty,
optLexicalCats = Set.empty, optLexicalCats = Set.empty,
@@ -427,8 +426,7 @@ optDescr =
outputFormats :: [(String,OutputFormat)] outputFormats :: [(String,OutputFormat)]
outputFormats = outputFormats =
[("pgf", FmtPGF), [("pgf-pretty", FmtPGFPretty),
("pgf-pretty", FmtPGFPretty),
("js", FmtJavaScript), ("js", FmtJavaScript),
("haskell", FmtHaskell), ("haskell", FmtHaskell),
("prolog", FmtProlog), ("prolog", FmtProlog),

View File

@@ -4,8 +4,6 @@ module GFC (mainGFC) where
import PGF import PGF
import PGF.CId import PGF.CId
import PGF.Data import PGF.Data
import PGF.Raw.Parse
import PGF.Raw.Convert
import GF.Compile import GF.Compile
import GF.Compile.Export import GF.Compile.Export
@@ -16,6 +14,7 @@ import GF.Infra.Option
import GF.Data.ErrM import GF.Data.ErrM
import Data.Maybe import Data.Maybe
import Data.Binary
import System.FilePath import System.FilePath
@@ -57,10 +56,17 @@ unionPGFFiles opts fs =
where readPGFVerbose f = putPointE Normal opts ("Reading " ++ f ++ "...") $ ioeIO $ readPGF f where readPGFVerbose f = putPointE Normal opts ("Reading " ++ f ++ "...") $ ioeIO $ readPGF f
writeOutputs :: Options -> PGF -> IOE () writeOutputs :: Options -> PGF -> IOE ()
writeOutputs opts pgf = writeOutputs opts pgf = do
sequence_ [writeOutput opts name str writePGF opts pgf
| fmt <- flag optOutputFormats opts, sequence_ [writeOutput opts name str
(name,str) <- exportPGF opts fmt pgf] | fmt <- flag optOutputFormats opts,
(name,str) <- exportPGF opts fmt pgf]
writePGF :: Options -> PGF -> IOE ()
writePGF opts pgf = do
let name = fromMaybe (prCId (absname pgf)) (flag optName opts)
outfile = name <.> "pgf"
putPointE Normal opts ("Writing " ++ outfile ++ "...") $ ioeIO $ encodeFile outfile pgf
writeOutput :: Options -> FilePath-> String -> IOE () writeOutput :: Options -> FilePath-> String -> IOE ()
writeOutput opts file str = writeOutput opts file str =

View File

@@ -66,9 +66,7 @@ import PGF.TypeCheck
import PGF.Paraphrase import PGF.Paraphrase
import PGF.Macros import PGF.Macros
import PGF.Data import PGF.Data
import PGF.Raw.Convert import PGF.Binary
import PGF.Raw.Parse
import PGF.Raw.Print (printTree)
import PGF.Parsing.FCFG import PGF.Parsing.FCFG
import qualified PGF.Parsing.FCFG.Incremental as Incremental import qualified PGF.Parsing.FCFG.Incremental as Incremental
import qualified GF.Compile.GeneratePMCFG as PMCFG import qualified GF.Compile.GeneratePMCFG as PMCFG
@@ -80,6 +78,7 @@ import GF.Data.Utilities (replace)
import Data.Char import Data.Char
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Maybe import Data.Maybe
import Data.Binary
import System.Random (newStdGen) import System.Random (newStdGen)
import Control.Monad import Control.Monad
@@ -210,9 +209,8 @@ readLanguage = readCId
showLanguage = prCId showLanguage = prCId
readPGF f = do readPGF f = do
s <- readFile f >>= return . decodeUTF8 -- pgf is in UTF8, internal in unicode g <- decodeFile f
g <- parseGrammar s return $! addParsers g
return $! addParsers $ toPGF g
-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand. -- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
addParsers :: PGF -> PGF addParsers :: PGF -> PGF

View File

@@ -1,14 +0,0 @@
module PGF.Raw.Abstract where
data Grammar =
Grm [RExp]
deriving (Eq,Ord,Show)
data RExp =
App String [RExp]
| AInt Integer
| AStr String
| AFlt Double
| AMet
deriving (Eq,Ord,Show)

View File

@@ -1,273 +0,0 @@
module PGF.Raw.Convert (toPGF,fromPGF) where
import PGF.CId
import PGF.Data
import PGF.Raw.Abstract
import Data.Array.IArray
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
pgfMajorVersion, pgfMinorVersion :: Integer
(pgfMajorVersion, pgfMinorVersion) = (1,0)
-- convert parsed grammar to internal PGF
toPGF :: Grammar -> PGF
toPGF (Grm [
App "pgf" (AInt v1 : AInt v2 : App a []:cs),
App "flags" gfs,
ab@(
App "abstract" [
App "fun" fs,
App "cat" cts
]),
App "concrete" ccs
]) = let pgf = PGF {
absname = mkCId a,
cncnames = [mkCId c | App c [] <- cs],
gflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- gfs],
abstract =
let
aflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- gfs]
lfuns = [(mkCId f,(toType typ,toExp def)) | App f [typ, def] <- fs]
funs = Map.fromAscList lfuns
lcats = [(mkCId c, Prelude.map toHypo hyps) | App c hyps <- cts]
cats = Map.fromAscList lcats
catfuns = Map.fromAscList
[(cat,[f | (f, (DTyp _ c _,_)) <- lfuns, c==cat]) | (cat,_) <- lcats]
in Abstr aflags funs cats catfuns,
concretes = Map.fromAscList [(mkCId lang, toConcr pgf ts) | App lang ts <- ccs]
}
in pgf
where
toConcr :: PGF -> [RExp] -> Concr
toConcr pgf rexp =
let cnc = foldl add (Concr {cflags = Map.empty,
lins = Map.empty,
opers = Map.empty,
lincats = Map.empty,
lindefs = Map.empty,
printnames = Map.empty,
paramlincats = Map.empty,
parser = Nothing
}) rexp
in cnc
where
add :: Concr -> RExp -> Concr
add cnc (App "flags" ts) = cnc { cflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- ts] }
add cnc (App "lin" ts) = cnc { lins = mkTermMap ts }
add cnc (App "oper" ts) = cnc { opers = mkTermMap ts }
add cnc (App "lincat" ts) = cnc { lincats = mkTermMap ts }
add cnc (App "lindef" ts) = cnc { lindefs = mkTermMap ts }
add cnc (App "printname" ts) = cnc { printnames = mkTermMap ts }
add cnc (App "param" ts) = cnc { paramlincats = mkTermMap ts }
add cnc (App "parser" ts) = cnc { parser = Just (toPInfo ts) }
toPInfo :: [RExp] -> ParserInfo
toPInfo [App "functions" fs, App "sequences" ss, App "productions" ps,App "categories" (t:cs)] =
ParserInfo { functions = functions
, sequences = seqs
, productions = productions
, startCats = cats
, totalCats = expToInt t
}
where
functions = mkArray (map toFFun fs)
seqs = mkArray (map toFSeq ss)
productions = IntMap.fromList (map toProductionSet ps)
cats = Map.fromList [(mkCId c, (map expToInt xs)) | App c xs <- cs]
toFFun :: RExp -> FFun
toFFun (App f [App "P" ts,App "R" ls]) = FFun fun prof lins
where
fun = mkCId f
prof = map toProfile ts
lins = mkArray [fromIntegral seqid | AInt seqid <- ls]
toProfile :: RExp -> Profile
toProfile AMet = []
toProfile (App "_A" [t]) = [expToInt t]
toProfile (App "_U" ts) = [expToInt t | App "_A" [t] <- ts]
toFSeq :: RExp -> FSeq
toFSeq (App "seq" ss) = mkArray [toSymbol s | s <- ss]
toProductionSet :: RExp -> (FCat,Set.Set Production)
toProductionSet (App "td" (rt : xs)) = (expToInt rt, Set.fromList (map toProduction xs))
where
toProduction (App "A" (ruleid : at)) = FApply (expToInt ruleid) (map expToInt at)
toProduction (App "C" [fcat]) = FCoerce (expToInt fcat)
toSymbol :: RExp -> FSymbol
toSymbol (App "P" [n,l]) = FSymCat (expToInt n) (expToInt l)
toSymbol (App "PL" [n,l]) = FSymLit (expToInt n) (expToInt l)
toSymbol (App "KP" (d:alts)) = FSymTok (toKP d alts)
toSymbol (AStr t) = FSymTok (KS t)
toType :: RExp -> Type
toType e = case e of
App cat [App "H" hypos, App "X" exps] ->
DTyp (map toHypo hypos) (mkCId cat) (map toExp exps)
_ -> error $ "type " ++ show e
toHypo :: RExp -> Hypo
toHypo e = case e of
App x [typ] -> Hyp (mkCId x) (toType typ)
_ -> error $ "hypo " ++ show e
toExp :: RExp -> Expr
toExp e = case e of
App "Abs" [App x [], exp] -> EAbs (mkCId x) (toExp exp)
App "App" [e1,e2] -> EApp (toExp e1) (toExp e2)
App "Eq" eqs -> EEq [Equ (map toExp ps) (toExp v) | App "E" (v:ps) <- eqs]
App "Var" [App i []] -> EVar (mkCId i)
AMet -> EMeta 0
AInt i -> ELit (LInt i)
AFlt i -> ELit (LFlt i)
AStr i -> ELit (LStr i)
_ -> error $ "exp " ++ show e
toTerm :: RExp -> Term
toTerm e = case e of
App "R" es -> R (map toTerm es)
App "S" es -> S (map toTerm es)
App "FV" es -> FV (map toTerm es)
App "P" [e,v] -> P (toTerm e) (toTerm v)
App "W" [AStr s,v] -> W s (toTerm v)
App "A" [AInt i] -> V (fromInteger i)
App f [] -> F (mkCId f)
AInt i -> C (fromInteger i)
AMet -> TM "?"
App "KP" (d:alts) -> K (toKP d alts)
AStr s -> K (KS s)
_ -> error $ "term " ++ show e
toKP d alts = KP (toStr d) (map toAlt alts)
where
toStr (App "S" vs) = [v | AStr v <- vs]
toAlt (App "A" [x,y]) = Alt (toStr x) (toStr y)
------------------------------
--- from internal to parser --
------------------------------
fromPGF :: PGF -> Grammar
fromPGF pgf = Grm [
App "pgf" (AInt pgfMajorVersion:AInt pgfMinorVersion
: App (prCId (absname pgf)) [] : map (flip App [] . prCId) (cncnames pgf)),
App "flags" [App (prCId f) [AStr v] | (f,v) <- Map.toList (gflags pgf `Map.union` aflags apgf)],
App "abstract" [
App "fun" [App (prCId f) [fromType t,fromExp d] | (f,(t,d)) <- Map.toList (funs apgf)],
App "cat" [App (prCId f) (map fromHypo hs) | (f,hs) <- Map.toList (cats apgf)]
],
App "concrete" [App (prCId lang) (fromConcrete c) | (lang,c) <- Map.toList (concretes pgf)]
]
where
apgf = abstract pgf
fromConcrete cnc = [
App "flags" [App (prCId f) [AStr v] | (f,v) <- Map.toList (cflags cnc)],
App "lin" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lins cnc)],
App "oper" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (opers cnc)],
App "lincat" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lincats cnc)],
App "lindef" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lindefs cnc)],
App "printname" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (printnames cnc)],
App "param" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (paramlincats cnc)]
] ++ maybe [] (\p -> [fromPInfo p]) (parser cnc)
fromType :: Type -> RExp
fromType e = case e of
DTyp hypos cat exps ->
App (prCId cat) [
App "H" (map fromHypo hypos),
App "X" (map fromExp exps)]
fromHypo :: Hypo -> RExp
fromHypo e = case e of
Hyp x typ -> App (prCId x) [fromType typ]
fromExp :: Expr -> RExp
fromExp e = case e of
EAbs x exp -> App "Abs" [App (prCId x) [], fromExp exp]
EApp e1 e2 -> App "App" [fromExp e1, fromExp e2]
EVar x -> App "Var" [App (prCId x) []]
ELit (LStr s) -> AStr s
ELit (LFlt d) -> AFlt d
ELit (LInt i) -> AInt (toInteger i)
EMeta _ -> AMet ----
EEq eqs -> App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs]
fromTerm :: Term -> RExp
fromTerm e = case e of
R es -> App "R" (map fromTerm es)
S es -> App "S" (map fromTerm es)
FV es -> App "FV" (map fromTerm es)
P e v -> App "P" [fromTerm e, fromTerm v]
W s v -> App "W" [AStr s, fromTerm v]
C i -> AInt (toInteger i)
TM _ -> AMet
F f -> App (prCId f) []
V i -> App "A" [AInt (toInteger i)]
K t -> fromTokn t
fromTokn :: Tokn -> RExp
fromTokn (KS s) = AStr s
fromTokn (KP d vs) = App "KP" (str d : [App "A" [str v, str x] | Alt v x <- vs])
where
str v = App "S" (map AStr v)
-- ** Parsing info
fromPInfo :: ParserInfo -> RExp
fromPInfo p = App "parser" [
App "functions" [fromFFun fun | fun <- elems (functions p)],
App "sequences" [fromFSeq seq | seq <- elems (sequences p)],
App "productions" [fromProductionSet xs | xs <- IntMap.toList (productions p)],
App "categories" (intToExp (totalCats p) : [App (prCId f) (map intToExp xs) | (f,xs) <- Map.toList (startCats p)])
]
fromFFun :: FFun -> RExp
fromFFun (FFun fun prof lins) = App (prCId fun) [App "P" (map fromProfile prof), App "R" [intToExp seqid | seqid <- elems lins]]
where
fromProfile :: Profile -> RExp
fromProfile [] = AMet
fromProfile [x] = daughter x
fromProfile args = App "_U" (map daughter args)
daughter n = App "_A" [intToExp n]
fromSymbol :: FSymbol -> RExp
fromSymbol (FSymCat n l) = App "P" [intToExp n, intToExp l]
fromSymbol (FSymLit n l) = App "PL" [intToExp n, intToExp l]
fromSymbol (FSymTok t) = fromTokn t
fromFSeq :: FSeq -> RExp
fromFSeq seq = App "seq" [fromSymbol s | s <- elems seq]
fromProductionSet :: (FCat,Set.Set Production) -> RExp
fromProductionSet (cat,xs) = App "td" (intToExp cat : map fromPassive (Set.toList xs))
where
fromPassive (FApply ruleid args) = App "A" (intToExp ruleid : map intToExp args)
fromPassive (FCoerce fcat) = App "C" [intToExp fcat]
-- ** Utilities
mkTermMap :: [RExp] -> Map.Map CId Term
mkTermMap ts = Map.fromAscList [(mkCId f,toTerm v) | App f [v] <- ts]
mkArray :: IArray a e => [e] -> a Int e
mkArray xs = listArray (0, length xs - 1) xs
expToInt :: Integral a => RExp -> a
expToInt (App "neg" [AInt i]) = fromIntegral (negate i)
expToInt (AInt i) = fromIntegral i
expToStr :: RExp -> String
expToStr (AStr s) = s
intToExp :: Integral a => a -> RExp
intToExp x | x < 0 = App "neg" [AInt (fromIntegral (negate x))]
| otherwise = AInt (fromIntegral x)

View File

@@ -1,101 +0,0 @@
module PGF.Raw.Parse (parseGrammar) where
import PGF.CId
import PGF.Raw.Abstract
import Control.Monad
import Data.Char
import qualified Data.ByteString.Char8 as BS
parseGrammar :: String -> IO Grammar
parseGrammar s = case runP pGrammar s of
Just (x,"") -> return x
_ -> fail "Parse error"
pGrammar :: P Grammar
pGrammar = liftM Grm pTerms
pTerms :: P [RExp]
pTerms = liftM2 (:) (pTerm 1) pTerms <++ (skipSpaces >> return [])
pTerm :: Int -> P RExp
pTerm n = skipSpaces >> (pParen <++ pApp <++ pNum <++ pStr <++ pMeta)
where pParen = between (char '(') (char ')') (pTerm 0)
pApp = liftM2 App pIdent (if n == 0 then pTerms else return [])
pStr = char '"' >> liftM AStr (manyTill (pEsc <++ get) (char '"'))
pEsc = char '\\' >> get
pNum = do x <- munch1 isDigit
((char '.' >> munch1 isDigit >>= \y -> return (AFlt (read (x++"."++y))))
<++
return (AInt (read x)))
pMeta = char '?' >> return AMet
pIdent = liftM2 (:) (satisfy isIdentFirst) (munch isIdentRest)
isIdentFirst c = c == '_' || isAlpha c
isIdentRest c = c == '_' || c == '\'' || isAlphaNum c
-- Parser combinators with only left-biased choice
newtype P a = P { runP :: String -> Maybe (a,String) }
instance Monad P where
return x = P (\ts -> Just (x,ts))
P p >>= f = P (\ts -> p ts >>= \ (x,ts') -> runP (f x) ts')
fail _ = pfail
instance MonadPlus P where
mzero = pfail
mplus = (<++)
get :: P Char
get = P (\ts -> case ts of
[] -> Nothing
c:cs -> Just (c,cs))
look :: P String
look = P (\ts -> Just (ts,ts))
(<++) :: P a -> P a -> P a
P p <++ P q = P (\ts -> p ts `mplus` q ts)
pfail :: P a
pfail = P (\ts -> Nothing)
satisfy :: (Char -> Bool) -> P Char
satisfy p = do c <- get
if p c then return c else pfail
char :: Char -> P Char
char c = satisfy (c==)
string :: String -> P String
string this = look >>= scan this
where
scan [] _ = return this
scan (x:xs) (y:ys) | x == y = get >> scan xs ys
scan _ _ = pfail
skipSpaces :: P ()
skipSpaces = look >>= skip
where
skip (c:s) | isSpace c = get >> skip s
skip _ = return ()
manyTill :: P a -> P end -> P [a]
manyTill p end = scan
where scan = (end >> return []) <++ liftM2 (:) p scan
munch :: (Char -> Bool) -> P String
munch p = munch1 p <++ return []
munch1 :: (Char -> Bool) -> P String
munch1 p = liftM2 (:) (satisfy p) (munch p)
choice :: [P a] -> P a
choice = msum
between :: P open -> P close -> P a -> P a
between open close p = do open
x <- p
close
return x

View File

@@ -1,35 +0,0 @@
module PGF.Raw.Print (printTree) where
import PGF.CId
import PGF.Raw.Abstract
import Data.List (intersperse)
import Numeric (showFFloat)
import qualified Data.ByteString.Char8 as BS
printTree :: Grammar -> String
printTree g = prGrammar g ""
prGrammar :: Grammar -> ShowS
prGrammar (Grm xs) = prRExpList xs
prRExp :: Int -> RExp -> ShowS
prRExp _ (App x []) = showString x
prRExp n (App x xs) = p (showString x . showChar ' ' . prRExpList xs)
where p s = if n == 0 then s else showChar '(' . s . showChar ')'
prRExp _ (AInt x) = shows x
prRExp _ (AStr x) = showChar '"' . concatS (map mkEsc x) . showChar '"'
prRExp _ (AFlt x) = showFFloat Nothing x
prRExp _ AMet = showChar '?'
mkEsc :: Char -> ShowS
mkEsc s = case s of
'"' -> showString "\\\""
'\\' -> showString "\\\\"
_ -> showChar s
prRExpList :: [RExp] -> ShowS
prRExpList = concatS . intersperse (showChar ' ') . map (prRExp 1)
concatS :: [ShowS] -> ShowS
concatS = foldr (.) id