binary serialization for PGF

2008-10-28 13:57:10 +00:00
parent 8e43cfb8a8
commit ebd98056ce
14 changed files with 1984 additions and 458 deletions
--- a/GF.cabal
+++ b/GF.cabal
@@ -575,23 +575,25 @@ library
    PGF.Parsing.FCFG
    PGF.Expr
    PGF.Type
    PGF.Raw.Parse
    PGF.Raw.Print
    PGF.Raw.Convert
    PGF.Raw.Abstract
    PGF.AbsCompute
    PGF.Paraphrase
    PGF.TypeCheck
    PGF.Binary
    GF.Data.MultiMap
    GF.Data.Utilities
    GF.Data.SortedList
    GF.Data.Assoc
    GF.Data.ErrM
    GF.Text.UTF8
 -- needed only for the on demand generation of PMCFG
    GF.Data.BacktrackM
    GF.Compile.GenerateFCFG
    GF.Compile.GeneratePMCFG
 -- not really part of GF but I have changed the original binary library
 -- and we have to keep the copy for now.
    Data.Binary
    Data.Binary.Put
    Data.Binary.Get
    Data.Binary.Builder
 executable gf
  build-depends: base,
@@ -701,6 +703,7 @@ executable gf
    PGF.AbsCompute
    PGF.Paraphrase
    PGF.TypeCheck
    PGF.Binary
    GFC
    GFI
--- a/src/Data/Binary.hs
+++ b/src/Data/Binary.hs
@@ -0,0 +1,792 @@
 {-# LANGUAGE CPP, FlexibleInstances, FlexibleContexts #-}
 -----------------------------------------------------------------------------
 -- |
 -- Module      : Data.Binary
 -- Copyright   : Lennart Kolmodin
 -- License     : BSD3-style (see LICENSE)
 -- 
 -- Maintainer  : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
 -- Stability   : unstable
 -- Portability : portable to Hugs and GHC. Requires the FFI and some flexible instances
 --
 -- Binary serialisation of Haskell values to and from lazy ByteStrings.
 -- The Binary library provides methods for encoding Haskell values as
 -- streams of bytes directly in memory. The resulting @ByteString@ can
 -- then be written to disk, sent over the network, or futher processed
 -- (for example, compressed with gzip).
 --
 -- The 'Binary' package is notable in that it provides both pure, and
 -- high performance serialisation.
 --
 -- Values are always encoded in network order (big endian) form, and
 -- encoded data should be portable across machine endianess, word size,
 -- or compiler version. For example, data encoded using the Binary class
 -- could be written from GHC, and read back in Hugs.
 --
 -----------------------------------------------------------------------------
 module Data.Binary (
    -- * The Binary class
      Binary(..)
    -- $example
    -- * The Get and Put monads
    , Get
    , Put
    -- * Useful helpers for writing instances
    , putWord8
    , getWord8
    -- * Binary serialisation
    , encode                    -- :: Binary a => a -> ByteString
    , decode                    -- :: Binary a => ByteString -> a
    -- * IO functions for serialisation
    , encodeFile                -- :: Binary a => FilePath -> a -> IO ()
    , decodeFile                -- :: Binary a => FilePath -> IO a
 -- Lazy put and get
 --  , lazyPut
 --  , lazyGet
    , module Data.Word -- useful
    ) where
 import Data.Word
 import Data.Binary.Put
 import Data.Binary.Get
 import Control.Monad
 import Foreign
 import System.IO
 import Data.ByteString.Lazy (ByteString)
 import qualified Data.ByteString.Lazy as L
 import Data.Char    (chr,ord)
 import Data.List    (unfoldr)
 -- And needed for the instances:
 import qualified Data.ByteString as B
 import qualified Data.Map        as Map
 import qualified Data.Set        as Set
 import qualified Data.IntMap     as IntMap
 import qualified Data.IntSet     as IntSet
 import qualified Data.Ratio      as R
 import qualified Data.Tree as T
 import Data.Array.Unboxed
 --
 -- This isn't available in older Hugs or older GHC
 --
 #if __GLASGOW_HASKELL__ >= 606
 import qualified Data.Sequence as Seq
 import qualified Data.Foldable as Fold
 #endif
 ------------------------------------------------------------------------
 -- | The @Binary@ class provides 'put' and 'get', methods to encode and
 -- decode a Haskell value to a lazy ByteString. It mirrors the Read and
 -- Show classes for textual representation of Haskell types, and is
 -- suitable for serialising Haskell values to disk, over the network.
 --
 -- For parsing and generating simple external binary formats (e.g. C
 -- structures), Binary may be used, but in general is not suitable
 -- for complex protocols. Instead use the Put and Get primitives
 -- directly.
 --
 -- Instances of Binary should satisfy the following property:
 --
 -- > decode . encode == id
 --
 -- That is, the 'get' and 'put' methods should be the inverse of each
 -- other. A range of instances are provided for basic Haskell types. 
 --
 class Binary t where
    -- | Encode a value in the Put monad.
    put :: t -> Put
    -- | Decode a value in the Get monad
    get :: Get t
 -- $example
 -- To serialise a custom type, an instance of Binary for that type is
 -- required. For example, suppose we have a data structure:
 --
 -- > data Exp = IntE Int
 -- >          | OpE  String Exp Exp
 -- >    deriving Show
 --
 -- We can encode values of this type into bytestrings using the
 -- following instance, which proceeds by recursively breaking down the
 -- structure to serialise:
 --
 -- > instance Binary Exp where
 -- >       put (IntE i)          = do put (0 :: Word8)
 -- >                                  put i
 -- >       put (OpE s e1 e2)     = do put (1 :: Word8)
 -- >                                  put s
 -- >                                  put e1
 -- >                                  put e2
 -- > 
 -- >       get = do t <- get :: Get Word8
 -- >                case t of
 -- >                     0 -> do i <- get
 -- >                             return (IntE i)
 -- >                     1 -> do s  <- get
 -- >                             e1 <- get
 -- >                             e2 <- get
 -- >                             return (OpE s e1 e2)
 --
 -- Note how we write an initial tag byte to indicate each variant of the
 -- data type.
 --
 -- We can simplify the writing of 'get' instances using monadic
 -- combinators:
 -- 
 -- >       get = do tag <- getWord8
 -- >                case tag of
 -- >                    0 -> liftM  IntE get
 -- >                    1 -> liftM3 OpE  get get get
 --
 -- The generation of Binary instances has been automated by a script
 -- using Scrap Your Boilerplate generics. Use the script here:
 --  <http://darcs.haskell.org/binary/tools/derive/BinaryDerive.hs>.
 --
 -- To derive the instance for a type, load this script into GHCi, and
 -- bring your type into scope. Your type can then have its Binary
 -- instances derived as follows:
 --
 -- > $ ghci -fglasgow-exts BinaryDerive.hs
 -- > *BinaryDerive> :l Example.hs
 -- > *Main> deriveM (undefined :: Drinks)
 -- >
 -- > instance Binary Main.Drinks where
 -- >      put (Beer a) = putWord8 0 >> put a
 -- >      put Coffee = putWord8 1
 -- >      put Tea = putWord8 2
 -- >      put EnergyDrink = putWord8 3
 -- >      put Water = putWord8 4
 -- >      put Wine = putWord8 5
 -- >      put Whisky = putWord8 6
 -- >      get = do
 -- >        tag_ <- getWord8
 -- >        case tag_ of
 -- >          0 -> get >>= \a -> return (Beer a)
 -- >          1 -> return Coffee
 -- >          2 -> return Tea
 -- >          3 -> return EnergyDrink
 -- >          4 -> return Water
 -- >          5 -> return Wine
 -- >          6 -> return Whisky
 -- >
 --
 -- To serialise this to a bytestring, we use 'encode', which packs the
 -- data structure into a binary format, in a lazy bytestring
 --
 -- > > let e = OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
 -- > > let v = encode e
 --
 -- Where 'v' is a binary encoded data structure. To reconstruct the
 -- original data, we use 'decode'
 --
 -- > > decode v :: Exp
 -- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
 --
 -- The lazy ByteString that results from 'encode' can be written to
 -- disk, and read from disk using Data.ByteString.Lazy IO functions,
 -- such as hPutStr or writeFile:
 --
 -- > > writeFile "/tmp/exp.txt" (encode e)
 --
 -- And read back with:
 --
 -- > > readFile "/tmp/exp.txt" >>= return . decode :: IO Exp
 -- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
 --
 -- We can also directly serialise a value to and from a Handle, or a file:
 -- 
 -- > > v <- decodeFile  "/tmp/exp.txt" :: IO Exp
 -- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
 --
 -- And write a value to disk
 --
 -- > > encodeFile "/tmp/a.txt" v
 --
 ------------------------------------------------------------------------
 -- Wrappers to run the underlying monad
 -- | Encode a value using binary serialisation to a lazy ByteString.
 --
 encode :: Binary a => a -> ByteString
 encode = runPut . put
 {-# INLINE encode #-}
 -- | Decode a value from a lazy ByteString, reconstructing the original structure.
 --
 decode :: Binary a => ByteString -> a
 decode = runGet get
 ------------------------------------------------------------------------
 -- Convenience IO operations
 -- | Lazily serialise a value to a file
 --
 -- This is just a convenience function, it's defined simply as:
 --
 -- > encodeFile f = B.writeFile f . encode
 --
 -- So for example if you wanted to compress as well, you could use:
 --
 -- > B.writeFile f . compress . encode
 --
 encodeFile :: Binary a => FilePath -> a -> IO ()
 encodeFile f v = L.writeFile f (encode v)
 -- | Lazily reconstruct a value previously written to a file.
 --
 -- This is just a convenience function, it's defined simply as:
 --
 -- > decodeFile f = return . decode =<< B.readFile f
 --
 -- So for example if you wanted to decompress as well, you could use:
 --
 -- > return . decode . decompress =<< B.readFile f
 --
 -- After contructing the data from the input file, 'decodeFile' checks
 -- if the file is empty, and in doing so will force the associated file
 -- handle closed, if it is indeed empty. If the file is not empty, 
 -- it is up to the decoding instance to consume the rest of the data,
 -- or otherwise finalise the resource.
 --
 decodeFile :: Binary a => FilePath -> IO a
 decodeFile f = do
    s <- L.readFile f
    return $ runGet (do v <- get
                        m <- isEmpty
                        m `seq` return v) s
 -- needs bytestring 0.9.1.x to work 
 ------------------------------------------------------------------------
 -- Lazy put and get
 -- lazyPut :: (Binary a) => a -> Put
 -- lazyPut a = put (encode a)
 -- lazyGet :: (Binary a) => Get a
 -- lazyGet = fmap decode get
 ------------------------------------------------------------------------
 -- Simple instances
 -- The () type need never be written to disk: values of singleton type
 -- can be reconstructed from the type alone
 instance Binary () where
    put ()  = return ()
    get     = return ()
 -- Bools are encoded as a byte in the range 0 .. 1
 instance Binary Bool where
    put     = putWord8 . fromIntegral . fromEnum
    get     = liftM (toEnum . fromIntegral) getWord8
 -- Values of type 'Ordering' are encoded as a byte in the range 0 .. 2
 instance Binary Ordering where
    put     = putWord8 . fromIntegral . fromEnum
    get     = liftM (toEnum . fromIntegral) getWord8
 ------------------------------------------------------------------------
 -- Words and Ints
 -- Words8s are written as bytes
 instance Binary Word8 where
    put     = putWord8
    get     = getWord8
 -- Words16s are written as 2 bytes in big-endian (network) order
 instance Binary Word16 where
    put     = putWord16be
    get     = getWord16be
 -- Words32s are written as 4 bytes in big-endian (network) order
 instance Binary Word32 where
    put     = putWord32be
    get     = getWord32be
 -- Words64s are written as 8 bytes in big-endian (network) order
 instance Binary Word64 where
    put     = putWord64be
    get     = getWord64be
 -- Int8s are written as a single byte.
 instance Binary Int8 where
    put i   = put (fromIntegral i :: Word8)
    get     = liftM fromIntegral (get :: Get Word8)
 -- Int16s are written as a 2 bytes in big endian format
 instance Binary Int16 where
    put i   = put (fromIntegral i :: Word16)
    get     = liftM fromIntegral (get :: Get Word16)
 -- Int32s are written as a 4 bytes in big endian format
 instance Binary Int32 where
    put i   = put (fromIntegral i :: Word32)
    get     = liftM fromIntegral (get :: Get Word32)
 -- Int64s are written as a 4 bytes in big endian format
 instance Binary Int64 where
    put i   = put (fromIntegral i :: Word64)
    get     = liftM fromIntegral (get :: Get Word64)
 ------------------------------------------------------------------------
 -- Words are written as sequence of bytes. The last bit of each
 -- byte indicates whether there are more bytes to be read
 instance Binary Word where
    put i | i <=               0x7f = do put  a
          | i <=             0x3fff = do put (a .|. 0x80)
                                         put  b
          | i <=           0x1fffff = do put (a .|. 0x80)
                                         put (b .|. 0x80)
                                         put  c
          | i <=          0xfffffff = do put (a .|. 0x80)
                                         put (b .|. 0x80)
                                         put (c .|. 0x80)
                                         put  d
 #if WORD_SIZE_IN_BITS < 64
          | otherwise               = do put (a .|. 0x80)
                                         put (b .|. 0x80)
                                         put (c .|. 0x80)
                                         put (d .|. 0x80)
                                         put  e
 #else
          | i <=        0x7ffffffff = do put (a .|. 0x80)
                                         put (b .|. 0x80)
                                         put (c .|. 0x80)
                                         put (d .|. 0x80)
                                         put  e
          | i <=      0x3ffffffffff = do put (a .|. 0x80)
                                         put (b .|. 0x80)
                                         put (c .|. 0x80)
                                         put (d .|. 0x80)
                                         put (e .|. 0x80)
                                         put  f
          | i <=    0x1ffffffffffff = do put (a .|. 0x80)
                                         put (b .|. 0x80)
                                         put (c .|. 0x80)
                                         put (d .|. 0x80)
                                         put (e .|. 0x80)
                                         put (f .|. 0x80)
                                         put  g
          | i <=   0xffffffffffffff = do put (a .|. 0x80)
                                         put (b .|. 0x80)
                                         put (c .|. 0x80)
                                         put (d .|. 0x80)
                                         put (e .|. 0x80)
                                         put (f .|. 0x80)
                                         put (g .|. 0x80)
                                         put  h
          | i <=   0xffffffffffffff = do put (a .|. 0x80)
                                         put (b .|. 0x80)
                                         put (c .|. 0x80)
                                         put (d .|. 0x80)
                                         put (e .|. 0x80)
                                         put (f .|. 0x80)
                                         put (g .|. 0x80)
                                         put  h
          | i <= 0x7fffffffffffffff = do put (a .|. 0x80)
                                         put (b .|. 0x80)
                                         put (c .|. 0x80)
                                         put (d .|. 0x80)
                                         put (e .|. 0x80)
                                         put (f .|. 0x80)
                                         put (g .|. 0x80)
                                         put (h .|. 0x80)
                                         put  j
          | otherwise               = do put (a .|. 0x80)
                                         put (b .|. 0x80)
                                         put (c .|. 0x80)
                                         put (d .|. 0x80)
                                         put (e .|. 0x80)
                                         put (f .|. 0x80)
                                         put (g .|. 0x80)
                                         put (h .|. 0x80)
                                         put (j .|. 0x80)
                                         put  k
 #endif
          where
            a = fromIntegral (       i    .&. 0x7f) :: Word8
            b = fromIntegral (shiftR i  7 .&. 0x7f) :: Word8
            c = fromIntegral (shiftR i 14 .&. 0x7f) :: Word8
            d = fromIntegral (shiftR i 21 .&. 0x7f) :: Word8
            e = fromIntegral (shiftR i 28 .&. 0x7f) :: Word8
            f = fromIntegral (shiftR i 35 .&. 0x7f) :: Word8
            g = fromIntegral (shiftR i 42 .&. 0x7f) :: Word8
            h = fromIntegral (shiftR i 49 .&. 0x7f) :: Word8
            j = fromIntegral (shiftR i 56 .&. 0x7f) :: Word8
            k = fromIntegral (shiftR i 63 .&. 0x7f) :: Word8
    get = do i <- getWord8
             (if i <= 0x7f
                then return (fromIntegral i)
                else do n <- get
                        return $ (n `shiftL` 7) .|. (fromIntegral (i .&. 0x7f)))
 -- Int has the same representation as Word
 instance Binary Int where
    put i   = put (fromIntegral i :: Word)
    get     = liftM fromIntegral (get :: Get Word)
 ------------------------------------------------------------------------
 -- 
 -- Portable, and pretty efficient, serialisation of Integer
 --
 -- Fixed-size type for a subset of Integer
 type SmallInt = Int32
 -- Integers are encoded in two ways: if they fit inside a SmallInt,
 -- they're written as a byte tag, and that value.  If the Integer value
 -- is too large to fit in a SmallInt, it is written as a byte array,
 -- along with a sign and length field.
 instance Binary Integer where
    {-# INLINE put #-}
    put n | n >= lo && n <= hi = do
        putWord8 0
        put (fromIntegral n :: SmallInt)  -- fast path
     where
        lo = fromIntegral (minBound :: SmallInt) :: Integer
        hi = fromIntegral (maxBound :: SmallInt) :: Integer
    put n = do
        putWord8 1
        put sign
        put (unroll (abs n))         -- unroll the bytes
     where
        sign = fromIntegral (signum n) :: Word8
    {-# INLINE get #-}
    get = do
        tag <- get :: Get Word8
        case tag of
            0 -> liftM fromIntegral (get :: Get SmallInt)
            _ -> do sign  <- get
                    bytes <- get
                    let v = roll bytes
                    return $! if sign == (1 :: Word8) then v else - v
 --
 -- Fold and unfold an Integer to and from a list of its bytes
 --
 unroll :: Integer -> [Word8]
 unroll = unfoldr step
  where
    step 0 = Nothing
    step i = Just (fromIntegral i, i `shiftR` 8)
 roll :: [Word8] -> Integer
 roll   = foldr unstep 0
  where
    unstep b a = a `shiftL` 8 .|. fromIntegral b
 {-
 --
 -- An efficient, raw serialisation for Integer (GHC only)
 --
 -- TODO  This instance is not architecture portable.  GMP stores numbers as
 -- arrays of machine sized words, so the byte format is not portable across
 -- architectures with different endianess and word size.
 import Data.ByteString.Base (toForeignPtr,unsafePackAddress, memcpy)
 import GHC.Base     hiding (ord, chr)
 import GHC.Prim
 import GHC.Ptr (Ptr(..))
 import GHC.IOBase (IO(..))
 instance Binary Integer where
    put (S# i)    = putWord8 0 >> put (I# i)
    put (J# s ba) = do
        putWord8 1
        put (I# s)
        put (BA ba)
    get = do
        b <- getWord8
        case b of
            0 -> do (I# i#) <- get
                    return (S# i#)
            _ -> do (I# s#) <- get
                    (BA a#) <- get
                    return (J# s# a#)
 instance Binary ByteArray where
    -- Pretty safe.
    put (BA ba) =
        let sz   = sizeofByteArray# ba   -- (primitive) in *bytes*
            addr = byteArrayContents# ba
            bs   = unsafePackAddress (I# sz) addr
        in put bs   -- write as a ByteString. easy, yay!
    -- Pretty scary. Should be quick though
    get = do
        (fp, off, n@(I# sz)) <- liftM toForeignPtr get      -- so decode a ByteString
        assert (off == 0) $ return $ unsafePerformIO $ do
            (MBA arr) <- newByteArray sz                    -- and copy it into a ByteArray#
            let to = byteArrayContents# (unsafeCoerce# arr) -- urk, is this safe?
            withForeignPtr fp $ \from -> memcpy (Ptr to) from (fromIntegral n)
            freezeByteArray arr
 -- wrapper for ByteArray#
 data ByteArray = BA  {-# UNPACK #-} !ByteArray#
 data MBA       = MBA {-# UNPACK #-} !(MutableByteArray# RealWorld)
 newByteArray :: Int# -> IO MBA
 newByteArray sz = IO $ \s ->
  case newPinnedByteArray# sz s of { (# s', arr #) ->
  (# s', MBA arr #) }
 freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
 freezeByteArray arr = IO $ \s ->
  case unsafeFreezeByteArray# arr s of { (# s', arr' #) ->
  (# s', BA arr' #) }
 -}
 instance (Binary a,Integral a) => Binary (R.Ratio a) where
    put r = put (R.numerator r) >> put (R.denominator r)
    get = liftM2 (R.%) get get
 ------------------------------------------------------------------------
 -- Char is serialised as UTF-8
 instance Binary Char where
    put a | c <= 0x7f     = put (fromIntegral c :: Word8)
          | c <= 0x7ff    = do put (0xc0 .|. y)
                               put (0x80 .|. z)
          | c <= 0xffff   = do put (0xe0 .|. x)
                               put (0x80 .|. y)
                               put (0x80 .|. z)
          | c <= 0x10ffff = do put (0xf0 .|. w)
                               put (0x80 .|. x)
                               put (0x80 .|. y)
                               put (0x80 .|. z)
          | otherwise     = error "Not a valid Unicode code point"
     where
        c = ord a
        z, y, x, w :: Word8
        z = fromIntegral (c           .&. 0x3f)
        y = fromIntegral (shiftR c 6  .&. 0x3f)
        x = fromIntegral (shiftR c 12 .&. 0x3f)
        w = fromIntegral (shiftR c 18 .&. 0x7)
    get = do
        let getByte = liftM (fromIntegral :: Word8 -> Int) get
            shiftL6 = flip shiftL 6 :: Int -> Int
        w <- getByte
        r <- case () of
                _ | w < 0x80  -> return w
                  | w < 0xe0  -> do
                                    x <- liftM (xor 0x80) getByte
                                    return (x .|. shiftL6 (xor 0xc0 w))
                  | w < 0xf0  -> do
                                    x <- liftM (xor 0x80) getByte
                                    y <- liftM (xor 0x80) getByte
                                    return (y .|. shiftL6 (x .|. shiftL6
                                            (xor 0xe0 w)))
                  | otherwise -> do
                                x <- liftM (xor 0x80) getByte
                                y <- liftM (xor 0x80) getByte
                                z <- liftM (xor 0x80) getByte
                                return (z .|. shiftL6 (y .|. shiftL6
                                        (x .|. shiftL6 (xor 0xf0 w))))
        return $! chr r
 ------------------------------------------------------------------------
 -- Instances for the first few tuples
 instance (Binary a, Binary b) => Binary (a,b) where
    put (a,b)           = put a >> put b
    get                 = liftM2 (,) get get
 instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
    put (a,b,c)         = put a >> put b >> put c
    get                 = liftM3 (,,) get get get
 instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
    put (a,b,c,d)       = put a >> put b >> put c >> put d
    get                 = liftM4 (,,,) get get get get
 instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where
    put (a,b,c,d,e)     = put a >> put b >> put c >> put d >> put e
    get                 = liftM5 (,,,,) get get get get get
 -- 
 -- and now just recurse:
 --
 instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f)
        => Binary (a,b,c,d,e,f) where
    put (a,b,c,d,e,f)   = put (a,(b,c,d,e,f))
    get                 = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
 instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g)
        => Binary (a,b,c,d,e,f,g) where
    put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
    get                 = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
 instance (Binary a, Binary b, Binary c, Binary d, Binary e,
          Binary f, Binary g, Binary h)
        => Binary (a,b,c,d,e,f,g,h) where
    put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
    get                   = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h)
 instance (Binary a, Binary b, Binary c, Binary d, Binary e,
          Binary f, Binary g, Binary h, Binary i)
        => Binary (a,b,c,d,e,f,g,h,i) where
    put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
    get                     = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i)
 instance (Binary a, Binary b, Binary c, Binary d, Binary e,
          Binary f, Binary g, Binary h, Binary i, Binary j)
        => Binary (a,b,c,d,e,f,g,h,i,j) where
    put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
    get                       = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j)
 ------------------------------------------------------------------------
 -- Container types
 instance Binary a => Binary [a] where
    put l  = put (length l) >> mapM_ put l
    get    = do n <- get :: Get Int
                xs <- replicateM n get
                return xs
 instance (Binary a) => Binary (Maybe a) where
    put Nothing  = putWord8 0
    put (Just x) = putWord8 1 >> put x
    get = do
        w <- getWord8
        case w of
            0 -> return Nothing
            _ -> liftM Just get
 instance (Binary a, Binary b) => Binary (Either a b) where
    put (Left  a) = putWord8 0 >> put a
    put (Right b) = putWord8 1 >> put b
    get = do
        w <- getWord8
        case w of
            0 -> liftM Left  get
            _ -> liftM Right get
 ------------------------------------------------------------------------
 -- ByteStrings (have specially efficient instances)
 instance Binary B.ByteString where
    put bs = do put (B.length bs)
                putByteString bs
    get    = get >>= getByteString
 --
 -- Using old versions of fps, this is a type synonym, and non portable
 -- 
 -- Requires 'flexible instances'
 --
 instance Binary ByteString where
    put bs = do put (fromIntegral (L.length bs) :: Int)
                putLazyByteString bs
    get    = get >>= getLazyByteString
 ------------------------------------------------------------------------
 -- Maps and Sets
 instance (Ord a, Binary a) => Binary (Set.Set a) where
    put s = put (Set.size s) >> mapM_ put (Set.toAscList s)
    get   = liftM Set.fromDistinctAscList get
 instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where
    put m = put (Map.size m) >> mapM_ put (Map.toAscList m)
    get   = liftM Map.fromDistinctAscList get
 instance Binary IntSet.IntSet where
    put s = put (IntSet.size s) >> mapM_ put (IntSet.toAscList s)
    get   = liftM IntSet.fromDistinctAscList get
 instance (Binary e) => Binary (IntMap.IntMap e) where
    put m = put (IntMap.size m) >> mapM_ put (IntMap.toAscList m)
    get   = liftM IntMap.fromDistinctAscList get
 ------------------------------------------------------------------------
 -- Queues and Sequences
 #if __GLASGOW_HASKELL__ >= 606
 --
 -- This is valid Hugs, but you need the most recent Hugs
 --
 instance (Binary e) => Binary (Seq.Seq e) where
    -- any better way to do this?
    put = put . Fold.toList
    get = fmap Seq.fromList get
 #endif
 ------------------------------------------------------------------------
 -- Floating point
 instance Binary Double where
    put d = put (decodeFloat d)
    get   = liftM2 encodeFloat get get
 instance Binary Float where
    put f = put (decodeFloat f)
    get   = liftM2 encodeFloat get get
 ------------------------------------------------------------------------
 -- Trees
 instance (Binary e) => Binary (T.Tree e) where
    put (T.Node r s) = put r >> put s
    get = liftM2 T.Node get get
 ------------------------------------------------------------------------
 -- Arrays
 instance (Binary i, Ix i, Binary e) => Binary (Array i e) where
    put a = do
        put (bounds a)
        put (rangeSize $ bounds a) -- write the length
        mapM_ put (elems a)        -- now the elems.
    get = do
        bs <- get
        n  <- get                  -- read the length
        xs <- replicateM n get     -- now the elems.
        return (listArray bs xs)
 --
 -- The IArray UArray e constraint is non portable. Requires flexible instances
 --
 instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where
    put a = do
        put (bounds a)
        put (rangeSize $ bounds a) -- now write the length
        mapM_ put (elems a)
    get = do
        bs <- get
        n  <- get
        xs <- replicateM n get
        return (listArray bs xs)
--- a/src/Data/Binary/Builder.hs
+++ b/src/Data/Binary/Builder.hs
@@ -0,0 +1,426 @@
 {-# LANGUAGE CPP #-}
 {-# OPTIONS_GHC -fglasgow-exts #-}
 -- for unboxed shifts
 -----------------------------------------------------------------------------
 -- |
 -- Module      : Data.Binary.Builder
 -- Copyright   : Lennart Kolmodin, Ross Paterson
 -- License     : BSD3-style (see LICENSE)
 -- 
 -- Maintainer  : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
 -- Stability   : experimental
 -- Portability : portable to Hugs and GHC
 --
 -- Efficient construction of lazy bytestrings.
 --
 -----------------------------------------------------------------------------
 #if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
 #include "MachDeps.h"
 #endif
 module Data.Binary.Builder (
    -- * The Builder type
      Builder
    , toLazyByteString
    -- * Constructing Builders
    , empty
    , singleton
    , append
    , fromByteString        -- :: S.ByteString -> Builder
    , fromLazyByteString    -- :: L.ByteString -> Builder
    -- * Flushing the buffer state
    , flush
    -- * Derived Builders
    -- ** Big-endian writes
    , putWord16be           -- :: Word16 -> Builder
    , putWord32be           -- :: Word32 -> Builder
    , putWord64be           -- :: Word64 -> Builder
    -- ** Little-endian writes
    , putWord16le           -- :: Word16 -> Builder
    , putWord32le           -- :: Word32 -> Builder
    , putWord64le           -- :: Word64 -> Builder
    -- ** Host-endian, unaligned writes
    , putWordhost           -- :: Word -> Builder
    , putWord16host         -- :: Word16 -> Builder
    , putWord32host         -- :: Word32 -> Builder
    , putWord64host         -- :: Word64 -> Builder
  ) where
 import Foreign
 import Data.Monoid
 import Data.Word
 import qualified Data.ByteString      as S
 import qualified Data.ByteString.Lazy as L
 #ifdef BYTESTRING_IN_BASE
 import Data.ByteString.Base (inlinePerformIO)
 import qualified Data.ByteString.Base as S
 #else
 import Data.ByteString.Internal (inlinePerformIO)
 import qualified Data.ByteString.Internal as S
 import qualified Data.ByteString.Lazy.Internal as L
 #endif
 #if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
 import GHC.Base
 import GHC.Word (Word32(..),Word16(..),Word64(..))
 #if WORD_SIZE_IN_BITS < 64 && __GLASGOW_HASKELL__ >= 608
 import GHC.Word (uncheckedShiftRL64#)
 #endif
 #endif
 ------------------------------------------------------------------------
 -- | A 'Builder' is an efficient way to build lazy 'L.ByteString's.
 -- There are several functions for constructing 'Builder's, but only one
 -- to inspect them: to extract any data, you have to turn them into lazy
 -- 'L.ByteString's using 'toLazyByteString'.
 --
 -- Internally, a 'Builder' constructs a lazy 'L.Bytestring' by filling byte
 -- arrays piece by piece.  As each buffer is filled, it is \'popped\'
 -- off, to become a new chunk of the resulting lazy 'L.ByteString'.
 -- All this is hidden from the user of the 'Builder'.
 newtype Builder = Builder {
        -- Invariant (from Data.ByteString.Lazy):
        --      The lists include no null ByteStrings.
        runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString]
    }
 instance Monoid Builder where
    mempty  = empty
    {-# INLINE mempty #-}
    mappend = append
    {-# INLINE mappend #-}
 ------------------------------------------------------------------------
 -- | /O(1)./ The empty Builder, satisfying
 --
 --  * @'toLazyByteString' 'empty' = 'L.empty'@
 --
 empty :: Builder
 empty = Builder id
 {-# INLINE empty #-}
 -- | /O(1)./ A Builder taking a single byte, satisfying
 --
 --  * @'toLazyByteString' ('singleton' b) = 'L.singleton' b@
 --
 singleton :: Word8 -> Builder
 singleton = writeN 1 . flip poke
 {-# INLINE singleton #-}
 ------------------------------------------------------------------------
 -- | /O(1)./ The concatenation of two Builders, an associative operation
 -- with identity 'empty', satisfying
 --
 --  * @'toLazyByteString' ('append' x y) = 'L.append' ('toLazyByteString' x) ('toLazyByteString' y)@
 --
 append :: Builder -> Builder -> Builder
 append (Builder f) (Builder g) = Builder (f . g)
 {-# INLINE append #-}
 -- | /O(1)./ A Builder taking a 'S.ByteString', satisfying
 --
 --  * @'toLazyByteString' ('fromByteString' bs) = 'L.fromChunks' [bs]@
 --
 fromByteString :: S.ByteString -> Builder
 fromByteString bs
  | S.null bs = empty
  | otherwise = flush `append` mapBuilder (bs :)
 {-# INLINE fromByteString #-}
 -- | /O(1)./ A Builder taking a lazy 'L.ByteString', satisfying
 --
 --  * @'toLazyByteString' ('fromLazyByteString' bs) = bs@
 --
 fromLazyByteString :: L.ByteString -> Builder
 fromLazyByteString bss = flush `append` mapBuilder (L.toChunks bss ++)
 {-# INLINE fromLazyByteString #-}
 ------------------------------------------------------------------------
 -- Our internal buffer type
 data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8)
                     {-# UNPACK #-} !Int                -- offset
                     {-# UNPACK #-} !Int                -- used bytes
                     {-# UNPACK #-} !Int                -- length left
 ------------------------------------------------------------------------
 -- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'.
 -- The construction work takes place if and when the relevant part of
 -- the lazy 'L.ByteString' is demanded.
 --
 toLazyByteString :: Builder -> L.ByteString
 toLazyByteString m = L.fromChunks $ unsafePerformIO $ do
    buf <- newBuffer defaultSize
    return (runBuilder (m `append` flush) (const []) buf)
 -- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any,
 -- yielding a new chunk in the result lazy 'L.ByteString'.
 flush :: Builder
 flush = Builder $ \ k buf@(Buffer p o u l) ->
    if u == 0
      then k buf
      else S.PS p o u : k (Buffer p (o+u) 0 l)
 ------------------------------------------------------------------------
 --
 -- copied from Data.ByteString.Lazy
 --
 defaultSize :: Int
 defaultSize = 32 * k - overhead
    where k = 1024
          overhead = 2 * sizeOf (undefined :: Int)
 ------------------------------------------------------------------------
 -- | Sequence an IO operation on the buffer
 unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder
 unsafeLiftIO f =  Builder $ \ k buf -> inlinePerformIO $ do
    buf' <- f buf
    return (k buf')
 {-# INLINE unsafeLiftIO #-}
 -- | Get the size of the buffer
 withSize :: (Int -> Builder) -> Builder
 withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
    runBuilder (f l) k buf
 -- | Map the resulting list of bytestrings.
 mapBuilder :: ([S.ByteString] -> [S.ByteString]) -> Builder
 mapBuilder f = Builder (f .)
 ------------------------------------------------------------------------
 -- | Ensure that there are at least @n@ many bytes available.
 ensureFree :: Int -> Builder
 ensureFree n = n `seq` withSize $ \ l ->
    if n <= l then empty else
        flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize)))
 {-# INLINE ensureFree #-}
 -- | Ensure that @n@ many bytes are available, and then use @f@ to write some
 -- bytes into the memory.
 writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder
 writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f)
 {-# INLINE writeN #-}
 writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer
 writeNBuffer n f (Buffer fp o u l) = do
    withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
    return (Buffer fp o (u+n) (l-n))
 {-# INLINE writeNBuffer #-}
 newBuffer :: Int -> IO Buffer
 newBuffer size = do
    fp <- S.mallocByteString size
    return $! Buffer fp 0 0 size
 {-# INLINE newBuffer #-}
 ------------------------------------------------------------------------
 -- Aligned, host order writes of storable values
 -- | Ensure that @n@ many bytes are available, and then use @f@ to write some
 -- storable values into the memory.
 writeNbytes :: Storable a => Int -> (Ptr a -> IO ()) -> Builder
 writeNbytes n f = ensureFree n `append` unsafeLiftIO (writeNBufferBytes n f)
 {-# INLINE writeNbytes #-}
 writeNBufferBytes :: Storable a => Int -> (Ptr a -> IO ()) -> Buffer -> IO Buffer
 writeNBufferBytes n f (Buffer fp o u l) = do
    withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
    return (Buffer fp o (u+n) (l-n))
 {-# INLINE writeNBufferBytes #-}
 ------------------------------------------------------------------------
 --
 -- We rely on the fromIntegral to do the right masking for us.
 -- The inlining here is critical, and can be worth 4x performance
 --
 -- | Write a Word16 in big endian format
 putWord16be :: Word16 -> Builder
 putWord16be w = writeN 2 $ \p -> do
    poke p               (fromIntegral (shiftr_w16 w 8) :: Word8)
    poke (p `plusPtr` 1) (fromIntegral (w)              :: Word8)
 {-# INLINE putWord16be #-}
 -- | Write a Word16 in little endian format
 putWord16le :: Word16 -> Builder
 putWord16le w = writeN 2 $ \p -> do
    poke p               (fromIntegral (w)              :: Word8)
    poke (p `plusPtr` 1) (fromIntegral (shiftr_w16 w 8) :: Word8)
 {-# INLINE putWord16le #-}
 -- putWord16le w16 = writeN 2 (\p -> poke (castPtr p) w16)
 -- | Write a Word32 in big endian format
 putWord32be :: Word32 -> Builder
 putWord32be w = writeN 4 $ \p -> do
    poke p               (fromIntegral (shiftr_w32 w 24) :: Word8)
    poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 16) :: Word8)
    poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w  8) :: Word8)
    poke (p `plusPtr` 3) (fromIntegral (w)               :: Word8)
 {-# INLINE putWord32be #-}
 --
 -- a data type to tag Put/Check. writes construct these which are then
 -- inlined and flattened. matching Checks will be more robust with rules.
 --
 -- | Write a Word32 in little endian format
 putWord32le :: Word32 -> Builder
 putWord32le w = writeN 4 $ \p -> do
    poke p               (fromIntegral (w)               :: Word8)
    poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w  8) :: Word8)
    poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 16) :: Word8)
    poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 w 24) :: Word8)
 {-# INLINE putWord32le #-}
 -- on a little endian machine:
 -- putWord32le w32 = writeN 4 (\p -> poke (castPtr p) w32)
 -- | Write a Word64 in big endian format
 putWord64be :: Word64 -> Builder
 #if WORD_SIZE_IN_BITS < 64
 --
 -- To avoid expensive 64 bit shifts on 32 bit machines, we cast to
 -- Word32, and write that
 --
 putWord64be w =
    let a = fromIntegral (shiftr_w64 w 32) :: Word32
        b = fromIntegral w                 :: Word32
    in writeN 8 $ \p -> do
    poke p               (fromIntegral (shiftr_w32 a 24) :: Word8)
    poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 16) :: Word8)
    poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a  8) :: Word8)
    poke (p `plusPtr` 3) (fromIntegral (a)               :: Word8)
    poke (p `plusPtr` 4) (fromIntegral (shiftr_w32 b 24) :: Word8)
    poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 16) :: Word8)
    poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b  8) :: Word8)
    poke (p `plusPtr` 7) (fromIntegral (b)               :: Word8)
 #else
 putWord64be w = writeN 8 $ \p -> do
    poke p               (fromIntegral (shiftr_w64 w 56) :: Word8)
    poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 48) :: Word8)
    poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 40) :: Word8)
    poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 32) :: Word8)
    poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 24) :: Word8)
    poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 16) :: Word8)
    poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w  8) :: Word8)
    poke (p `plusPtr` 7) (fromIntegral (w)               :: Word8)
 #endif
 {-# INLINE putWord64be #-}
 -- | Write a Word64 in little endian format
 putWord64le :: Word64 -> Builder
 #if WORD_SIZE_IN_BITS < 64
 putWord64le w =
    let b = fromIntegral (shiftr_w64 w 32) :: Word32
        a = fromIntegral w                 :: Word32
    in writeN 8 $ \p -> do
    poke (p)             (fromIntegral (a)               :: Word8)
    poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a  8) :: Word8)
    poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 16) :: Word8)
    poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 a 24) :: Word8)
    poke (p `plusPtr` 4) (fromIntegral (b)               :: Word8)
    poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b  8) :: Word8)
    poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 16) :: Word8)
    poke (p `plusPtr` 7) (fromIntegral (shiftr_w32 b 24) :: Word8)
 #else
 putWord64le w = writeN 8 $ \p -> do
    poke p               (fromIntegral (w)               :: Word8)
    poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w  8) :: Word8)
    poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 16) :: Word8)
    poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 24) :: Word8)
    poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 32) :: Word8)
    poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 40) :: Word8)
    poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 48) :: Word8)
    poke (p `plusPtr` 7) (fromIntegral (shiftr_w64 w 56) :: Word8)
 #endif
 {-# INLINE putWord64le #-}
 -- on a little endian machine:
 -- putWord64le w64 = writeN 8 (\p -> poke (castPtr p) w64)
 ------------------------------------------------------------------------
 -- Unaligned, word size ops
 -- | /O(1)./ A Builder taking a single native machine word. The word is
 -- written in host order, host endian form, for the machine you're on.
 -- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
 -- 4 bytes. Values written this way are not portable to
 -- different endian or word sized machines, without conversion.
 --
 putWordhost :: Word -> Builder
 putWordhost w = writeNbytes (sizeOf (undefined :: Word)) (\p -> poke p w)
 {-# INLINE putWordhost #-}
 -- | Write a Word16 in native host order and host endianness.
 -- 2 bytes will be written, unaligned.
 putWord16host :: Word16 -> Builder
 putWord16host w16 = writeNbytes (sizeOf (undefined :: Word16)) (\p -> poke p w16)
 {-# INLINE putWord16host #-}
 -- | Write a Word32 in native host order and host endianness.
 -- 4 bytes will be written, unaligned.
 putWord32host :: Word32 -> Builder
 putWord32host w32 = writeNbytes (sizeOf (undefined :: Word32)) (\p -> poke p w32)
 {-# INLINE putWord32host #-}
 -- | Write a Word64 in native host order.
 -- On a 32 bit machine we write two host order Word32s, in big endian form.
 -- 8 bytes will be written, unaligned.
 putWord64host :: Word64 -> Builder
 putWord64host w = writeNbytes (sizeOf (undefined :: Word64)) (\p -> poke p w)
 {-# INLINE putWord64host #-}
 ------------------------------------------------------------------------
 -- Unchecked shifts
 {-# INLINE shiftr_w16 #-}
 shiftr_w16 :: Word16 -> Int -> Word16
 {-# INLINE shiftr_w32 #-}
 shiftr_w32 :: Word32 -> Int -> Word32
 {-# INLINE shiftr_w64 #-}
 shiftr_w64 :: Word64 -> Int -> Word64
 #if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
 shiftr_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftRL#`   i)
 shiftr_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftRL#`   i)
 #if WORD_SIZE_IN_BITS < 64
 shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL64#` i)
 #if __GLASGOW_HASKELL__ <= 606
 -- Exported by GHC.Word in GHC 6.8 and higher
 foreign import ccall unsafe "stg_uncheckedShiftRL64"
    uncheckedShiftRL64#     :: Word64# -> Int# -> Word64#
 #endif
 #else
 shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL#` i)
 #endif
 #else
 shiftr_w16 = shiftR
 shiftr_w32 = shiftR
 shiftr_w64 = shiftR
 #endif
--- a/src/Data/Binary/Get.hs
+++ b/src/Data/Binary/Get.hs
@@ -0,0 +1,539 @@
 {-# LANGUAGE CPP #-}
 {-# OPTIONS_GHC -fglasgow-exts #-}
 -- for unboxed shifts
 -----------------------------------------------------------------------------
 -- |
 -- Module      : Data.Binary.Get
 -- Copyright   : Lennart Kolmodin
 -- License     : BSD3-style (see LICENSE)
 -- 
 -- Maintainer  : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
 -- Stability   : experimental
 -- Portability : portable to Hugs and GHC.
 --
 -- The Get monad. A monad for efficiently building structures from
 -- encoded lazy ByteStrings
 --
 -----------------------------------------------------------------------------
 #if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
 #include "MachDeps.h"
 #endif
 module Data.Binary.Get (
    -- * The Get type
      Get
    , runGet
    , runGetState
    -- * Parsing
    , skip
    , uncheckedSkip
    , lookAhead
    , lookAheadM
    , lookAheadE
    , uncheckedLookAhead
    -- * Utility
    , bytesRead
    , getBytes
    , remaining
    , isEmpty
    -- * Parsing particular types
    , getWord8
    -- ** ByteStrings
    , getByteString
    , getLazyByteString
    , getLazyByteStringNul
    , getRemainingLazyByteString
    -- ** Big-endian reads
    , getWord16be
    , getWord32be
    , getWord64be
    -- ** Little-endian reads
    , getWord16le
    , getWord32le
    , getWord64le
    -- ** Host-endian, unaligned reads
    , getWordhost
    , getWord16host
    , getWord32host
    , getWord64host
  ) where
 import Control.Monad (when,liftM,ap)
 import Control.Monad.Fix
 import Data.Maybe (isNothing)
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as L
 #ifdef BYTESTRING_IN_BASE
 import qualified Data.ByteString.Base as B
 #else
 import qualified Data.ByteString.Internal as B
 import qualified Data.ByteString.Lazy.Internal as L
 #endif
 #ifdef APPLICATIVE_IN_BASE
 import Control.Applicative (Applicative(..))
 #endif
 import Foreign
 -- used by splitAtST
 import Control.Monad.ST
 import Data.STRef
 #if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
 import GHC.Base
 import GHC.Word
 import GHC.Int
 #endif
 -- | The parse state
 data S = S {-# UNPACK #-} !B.ByteString  -- current chunk
           L.ByteString                  -- the rest of the input
           {-# UNPACK #-} !Int64         -- bytes read
 -- | The Get monad is just a State monad carrying around the input ByteString
 newtype Get a = Get { unGet :: S -> (a, S) }
 instance Functor Get where
    fmap f m = Get (\s -> let (a, s') = unGet m s
                          in (f a, s'))
    {-# INLINE fmap #-}
 #ifdef APPLICATIVE_IN_BASE
 instance Applicative Get where
    pure  = return
    (<*>) = ap
 #endif
 instance Monad Get where
    return a  = Get (\s -> (a, s))
    {-# INLINE return #-}
    m >>= k   = Get (\s -> case unGet m s of
                             (a, s') -> unGet (k a) s')
    {-# INLINE (>>=) #-}
    fail      = failDesc
 instance MonadFix Get where
    mfix f = Get (\s -> let (a,s') = unGet (f a) s 
                        in (a,s'))
 ------------------------------------------------------------------------
 get :: Get S
 get   = Get (\s -> (s, s))
 put :: S -> Get ()
 put s = Get (\_ -> ((), s))
 ------------------------------------------------------------------------
 initState :: L.ByteString -> S
 initState xs = mkState xs 0
 {-# INLINE initState #-}
 {-
 initState (B.LPS xs) =
    case xs of
      []      -> S B.empty L.empty 0
      (x:xs') -> S x (B.LPS xs') 0
 -}
 #ifndef BYTESTRING_IN_BASE
 mkState :: L.ByteString -> Int64 -> S
 mkState l = case l of
    L.Empty      -> S B.empty L.empty
    L.Chunk x xs -> S x xs
 {-# INLINE mkState #-}
 #else
 mkState :: L.ByteString -> Int64 -> S
 mkState (B.LPS xs) =
    case xs of
        [] -> S B.empty L.empty
        (x:xs') -> S x (B.LPS xs')
 #endif
 -- | Run the Get monad applies a 'get'-based parser on the input ByteString
 runGet :: Get a -> L.ByteString -> a
 runGet m str = case unGet m (initState str) of (a, _) -> a
 -- | Run the Get monad applies a 'get'-based parser on the input
 -- ByteString. Additional to the result of get it returns the number of
 -- consumed bytes and the rest of the input.
 runGetState :: Get a -> L.ByteString -> Int64 -> (a, L.ByteString, Int64)
 runGetState m str off =
    case unGet m (mkState str off) of
      (a, ~(S s ss newOff)) -> (a, s `join` ss, newOff)
 ------------------------------------------------------------------------
 failDesc :: String -> Get a
 failDesc err = do
    S _ _ bytes <- get
    Get (error (err ++ ". Failed reading at byte position " ++ show bytes))
 -- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available.
 skip :: Int -> Get ()
 skip n = readN (fromIntegral n) (const ())
 -- | Skip ahead @n@ bytes. No error if there isn't enough bytes.
 uncheckedSkip :: Int64 -> Get ()
 uncheckedSkip n = do
    S s ss bytes <- get
    if fromIntegral (B.length s) >= n
      then put (S (B.drop (fromIntegral n) s) ss (bytes + n))
      else do
        let rest = L.drop (n - fromIntegral (B.length s)) ss
        put $! mkState rest (bytes + n)
 -- | Run @ga@, but return without consuming its input.
 -- Fails if @ga@ fails.
 lookAhead :: Get a -> Get a
 lookAhead ga = do
    s <- get
    a <- ga
    put s
    return a
 -- | Like 'lookAhead', but consume the input if @gma@ returns 'Just _'.
 -- Fails if @gma@ fails.
 lookAheadM :: Get (Maybe a) -> Get (Maybe a)
 lookAheadM gma = do
    s <- get
    ma <- gma
    when (isNothing ma) $
        put s
    return ma
 -- | Like 'lookAhead', but consume the input if @gea@ returns 'Right _'.
 -- Fails if @gea@ fails.
 lookAheadE :: Get (Either a b) -> Get (Either a b)
 lookAheadE gea = do
    s <- get
    ea <- gea
    case ea of
        Left _ -> put s
        _      -> return ()
    return ea
 -- | Get the next up to @n@ bytes as a lazy ByteString, without consuming them. 
 uncheckedLookAhead :: Int64 -> Get L.ByteString
 uncheckedLookAhead n = do
    S s ss _ <- get
    if n <= fromIntegral (B.length s)
        then return (L.fromChunks [B.take (fromIntegral n) s])
        else return $ L.take n (s `join` ss)
 ------------------------------------------------------------------------
 -- Utility
 -- | Get the total number of bytes read to this point.
 bytesRead :: Get Int64
 bytesRead = do
    S _ _ b <- get
    return b
 -- | Get the number of remaining unparsed bytes.
 -- Useful for checking whether all input has been consumed.
 -- Note that this forces the rest of the input.
 remaining :: Get Int64
 remaining = do
    S s ss _ <- get
    return (fromIntegral (B.length s) + L.length ss)
 -- | Test whether all input has been consumed,
 -- i.e. there are no remaining unparsed bytes.
 isEmpty :: Get Bool
 isEmpty = do
    S s ss _ <- get
    return (B.null s && L.null ss)
 ------------------------------------------------------------------------
 -- Utility with ByteStrings
 -- | An efficient 'get' method for strict ByteStrings. Fails if fewer
 -- than @n@ bytes are left in the input.
 getByteString :: Int -> Get B.ByteString
 getByteString n = readN n id
 {-# INLINE getByteString #-}
 -- | An efficient 'get' method for lazy ByteStrings. Does not fail if fewer than
 -- @n@ bytes are left in the input.
 getLazyByteString :: Int64 -> Get L.ByteString
 getLazyByteString n = do
    S s ss bytes <- get
    let big = s `join` ss
    case splitAtST n big of
      (consume, rest) -> do put $ mkState rest (bytes + n)
                            return consume
 {-# INLINE getLazyByteString #-}
 -- | Get a lazy ByteString that is terminated with a NUL byte. Fails
 -- if it reaches the end of input without hitting a NUL.
 getLazyByteStringNul :: Get L.ByteString
 getLazyByteStringNul = do
    S s ss bytes <- get
    let big = s `join` ss
        (consume, t) = L.break (== 0) big
        (h, rest) = L.splitAt 1 t
    if L.null h
      then fail "too few bytes"
      else do
        put $ mkState rest (bytes + L.length consume + 1)
        return consume
 {-# INLINE getLazyByteStringNul #-}
 -- | Get the remaining bytes as a lazy ByteString
 getRemainingLazyByteString :: Get L.ByteString
 getRemainingLazyByteString = do
    S s ss _ <- get
    return (s `join` ss)
 ------------------------------------------------------------------------
 -- Helpers
 -- | Pull @n@ bytes from the input, as a strict ByteString.
 getBytes :: Int -> Get B.ByteString
 getBytes n = do
    S s ss bytes <- get
    if n <= B.length s
        then do let (consume,rest) = B.splitAt n s
                put $! S rest ss (bytes + fromIntegral n)
                return $! consume
        else
              case L.splitAt (fromIntegral n) (s `join` ss) of
                (consuming, rest) ->
                    do let now = B.concat . L.toChunks $ consuming
                       put $! mkState rest (bytes + fromIntegral n)
                       -- forces the next chunk before this one is returned
                       if (B.length now < n)
                         then
                            fail "too few bytes"
                         else
                            return now
 {-# INLINE getBytes #-}
 -- ^ important
 #ifndef BYTESTRING_IN_BASE
 join :: B.ByteString -> L.ByteString -> L.ByteString
 join bb lb
    | B.null bb = lb
    | otherwise = L.Chunk bb lb
 #else
 join :: B.ByteString -> L.ByteString -> L.ByteString
 join bb (B.LPS lb)
    | B.null bb = B.LPS lb
    | otherwise = B.LPS (bb:lb)
 #endif
    -- don't use L.append, it's strict in it's second argument :/
 {-# INLINE join #-}
 -- | Split a ByteString. If the first result is consumed before the --
 -- second, this runs in constant heap space.
 --
 -- You must force the returned tuple for that to work, e.g.
 -- 
 -- > case splitAtST n xs of
 -- >    (ys,zs) -> consume ys ... consume zs
 --
 splitAtST :: Int64 -> L.ByteString -> (L.ByteString, L.ByteString)
 splitAtST i ps | i <= 0 = (L.empty, ps)
 #ifndef BYTESTRING_IN_BASE
 splitAtST i ps          = runST (
     do r  <- newSTRef undefined
        xs <- first r i ps
        ys <- unsafeInterleaveST (readSTRef r)
        return (xs, ys))
  where
        first r 0 xs@(L.Chunk _ _) = writeSTRef r xs    >> return L.Empty
        first r _ L.Empty          = writeSTRef r L.Empty >> return L.Empty
        first r n (L.Chunk x xs)
          | n < l     = do writeSTRef r (L.Chunk (B.drop (fromIntegral n) x) xs)
                           return $ L.Chunk (B.take (fromIntegral n) x) L.Empty
          | otherwise = do writeSTRef r (L.drop (n - l) xs)
                           liftM (L.Chunk x) $ unsafeInterleaveST (first r (n - l) xs)
         where l = fromIntegral (B.length x)
 #else
 splitAtST i (B.LPS ps)  = runST (
     do r  <- newSTRef undefined
        xs <- first r i ps
        ys <- unsafeInterleaveST (readSTRef r)
        return (B.LPS xs, B.LPS ys))
  where first r 0 xs     = writeSTRef r xs >> return []
        first r _ []     = writeSTRef r [] >> return []
        first r n (x:xs)
          | n < l     = do writeSTRef r (B.drop (fromIntegral n) x : xs)
                           return [B.take (fromIntegral n) x]
          | otherwise = do writeSTRef r (L.toChunks (L.drop (n - l) (B.LPS xs)))
                           fmap (x:) $ unsafeInterleaveST (first r (n - l) xs)
         where l = fromIntegral (B.length x)
 #endif
 {-# INLINE splitAtST #-}
 -- Pull n bytes from the input, and apply a parser to those bytes,
 -- yielding a value. If less than @n@ bytes are available, fail with an
 -- error. This wraps @getBytes@.
 readN :: Int -> (B.ByteString -> a) -> Get a
 readN n f = fmap f $ getBytes n
 {-# INLINE readN #-}
 -- ^ important
 ------------------------------------------------------------------------
 -- Primtives
 -- helper, get a raw Ptr onto a strict ByteString copied out of the
 -- underlying lazy byteString. So many indirections from the raw parser
 -- state that my head hurts...
 getPtr :: Storable a => Int -> Get a
 getPtr n = do
    (fp,o,_) <- readN n B.toForeignPtr
    return . B.inlinePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
 {-# INLINE getPtr #-}
 ------------------------------------------------------------------------
 -- | Read a Word8 from the monad state
 getWord8 :: Get Word8
 getWord8 = getPtr (sizeOf (undefined :: Word8))
 {-# INLINE getWord8 #-}
 -- | Read a Word16 in big endian format
 getWord16be :: Get Word16
 getWord16be = do
    s <- readN 2 id
    return $! (fromIntegral (s `B.index` 0) `shiftl_w16` 8) .|.
              (fromIntegral (s `B.index` 1))
 {-# INLINE getWord16be #-}
 -- | Read a Word16 in little endian format
 getWord16le :: Get Word16
 getWord16le = do
    s <- readN 2 id
    return $! (fromIntegral (s `B.index` 1) `shiftl_w16` 8) .|.
              (fromIntegral (s `B.index` 0) )
 {-# INLINE getWord16le #-}
 -- | Read a Word32 in big endian format
 getWord32be :: Get Word32
 getWord32be = do
    s <- readN 4 id
    return $! (fromIntegral (s `B.index` 0) `shiftl_w32` 24) .|.
              (fromIntegral (s `B.index` 1) `shiftl_w32` 16) .|.
              (fromIntegral (s `B.index` 2) `shiftl_w32`  8) .|.
              (fromIntegral (s `B.index` 3) )
 {-# INLINE getWord32be #-}
 -- | Read a Word32 in little endian format
 getWord32le :: Get Word32
 getWord32le = do
    s <- readN 4 id
    return $! (fromIntegral (s `B.index` 3) `shiftl_w32` 24) .|.
              (fromIntegral (s `B.index` 2) `shiftl_w32` 16) .|.
              (fromIntegral (s `B.index` 1) `shiftl_w32`  8) .|.
              (fromIntegral (s `B.index` 0) )
 {-# INLINE getWord32le #-}
 -- | Read a Word64 in big endian format
 getWord64be :: Get Word64
 getWord64be = do
    s <- readN 8 id
    return $! (fromIntegral (s `B.index` 0) `shiftl_w64` 56) .|.
              (fromIntegral (s `B.index` 1) `shiftl_w64` 48) .|.
              (fromIntegral (s `B.index` 2) `shiftl_w64` 40) .|.
              (fromIntegral (s `B.index` 3) `shiftl_w64` 32) .|.
              (fromIntegral (s `B.index` 4) `shiftl_w64` 24) .|.
              (fromIntegral (s `B.index` 5) `shiftl_w64` 16) .|.
              (fromIntegral (s `B.index` 6) `shiftl_w64`  8) .|.
              (fromIntegral (s `B.index` 7) )
 {-# INLINE getWord64be #-}
 -- | Read a Word64 in little endian format
 getWord64le :: Get Word64
 getWord64le = do
    s <- readN 8 id
    return $! (fromIntegral (s `B.index` 7) `shiftl_w64` 56) .|.
              (fromIntegral (s `B.index` 6) `shiftl_w64` 48) .|.
              (fromIntegral (s `B.index` 5) `shiftl_w64` 40) .|.
              (fromIntegral (s `B.index` 4) `shiftl_w64` 32) .|.
              (fromIntegral (s `B.index` 3) `shiftl_w64` 24) .|.
              (fromIntegral (s `B.index` 2) `shiftl_w64` 16) .|.
              (fromIntegral (s `B.index` 1) `shiftl_w64`  8) .|.
              (fromIntegral (s `B.index` 0) )
 {-# INLINE getWord64le #-}
 ------------------------------------------------------------------------
 -- Host-endian reads
 -- | /O(1)./ Read a single native machine word. The word is read in
 -- host order, host endian form, for the machine you're on. On a 64 bit
 -- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes.
 getWordhost :: Get Word
 getWordhost = getPtr (sizeOf (undefined :: Word))
 {-# INLINE getWordhost #-}
 -- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness.
 getWord16host :: Get Word16
 getWord16host = getPtr (sizeOf (undefined :: Word16))
 {-# INLINE getWord16host #-}
 -- | /O(1)./ Read a Word32 in native host order and host endianness.
 getWord32host :: Get Word32
 getWord32host = getPtr  (sizeOf (undefined :: Word32))
 {-# INLINE getWord32host #-}
 -- | /O(1)./ Read a Word64 in native host order and host endianess.
 getWord64host   :: Get Word64
 getWord64host = getPtr  (sizeOf (undefined :: Word64))
 {-# INLINE getWord64host #-}
 ------------------------------------------------------------------------
 -- Unchecked shifts
 shiftl_w16 :: Word16 -> Int -> Word16
 shiftl_w32 :: Word32 -> Int -> Word32
 shiftl_w64 :: Word64 -> Int -> Word64
 #if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
 shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#`   i)
 shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#`   i)
 #if WORD_SIZE_IN_BITS < 64
 shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
 #if __GLASGOW_HASKELL__ <= 606
 -- Exported by GHC.Word in GHC 6.8 and higher
 foreign import ccall unsafe "stg_uncheckedShiftL64"
    uncheckedShiftL64#     :: Word64# -> Int# -> Word64#
 #endif
 #else
 shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i)
 #endif
 #else
 shiftl_w16 = shiftL
 shiftl_w32 = shiftL
 shiftl_w64 = shiftL
 #endif
--- a/src/Data/Binary/Put.hs
+++ b/src/Data/Binary/Put.hs
@@ -0,0 +1,199 @@
 {-# LANGUAGE CPP #-}
 -----------------------------------------------------------------------------
 -- |
 -- Module      : Data.Binary.Put
 -- Copyright   : Lennart Kolmodin
 -- License     : BSD3-style (see LICENSE)
 -- 
 -- Maintainer  : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
 -- Stability   : stable
 -- Portability : Portable to Hugs and GHC. Requires MPTCs
 --
 -- The Put monad. A monad for efficiently constructing lazy bytestrings.
 --
 -----------------------------------------------------------------------------
 module Data.Binary.Put (
    -- * The Put type
      Put
    , PutM(..)
    , runPut
    -- * Flushing the implicit parse state
    , flush
    -- * Primitives
    , putWord8
    , putByteString
    , putLazyByteString
    -- * Big-endian primitives
    , putWord16be
    , putWord32be
    , putWord64be
    -- * Little-endian primitives
    , putWord16le
    , putWord32le
    , putWord64le
    -- * Host-endian, unaligned writes
    , putWordhost           -- :: Word   -> Put
    , putWord16host         -- :: Word16 -> Put
    , putWord32host         -- :: Word32 -> Put
    , putWord64host         -- :: Word64 -> Put
  ) where
 import Data.Monoid
 import Data.Binary.Builder (Builder, toLazyByteString)
 import qualified Data.Binary.Builder as B
 import Data.Word
 import qualified Data.ByteString      as S
 import qualified Data.ByteString.Lazy as L
 #ifdef APPLICATIVE_IN_BASE
 import Control.Applicative
 #endif
 ------------------------------------------------------------------------
 -- XXX Strict in buffer only. 
 data PairS a = PairS a {-# UNPACK #-}!Builder
 sndS :: PairS a -> Builder
 sndS (PairS _ b) = b
 -- | The PutM type. A Writer monad over the efficient Builder monoid.
 newtype PutM a = Put { unPut :: PairS a }
 -- | Put merely lifts Builder into a Writer monad, applied to ().
 type Put = PutM ()
 instance Functor PutM where
        fmap f m = Put $ let PairS a w = unPut m in PairS (f a) w
        {-# INLINE fmap #-}
 #ifdef APPLICATIVE_IN_BASE
 instance Applicative PutM where
        pure    = return
        m <*> k = Put $
            let PairS f w  = unPut m
                PairS x w' = unPut k
            in PairS (f x) (w `mappend` w')
 #endif
 -- Standard Writer monad, with aggressive inlining
 instance Monad PutM where
    return a = Put $ PairS a mempty
    {-# INLINE return #-}
    m >>= k  = Put $
        let PairS a w  = unPut m
            PairS b w' = unPut (k a)
        in PairS b (w `mappend` w')
    {-# INLINE (>>=) #-}
    m >> k  = Put $
        let PairS _ w  = unPut m
            PairS b w' = unPut k
        in PairS b (w `mappend` w')
    {-# INLINE (>>) #-}
 tell :: Builder -> Put
 tell b = Put $ PairS () b
 {-# INLINE tell #-}
 -- | Run the 'Put' monad with a serialiser
 runPut :: Put -> L.ByteString
 runPut = toLazyByteString . sndS . unPut
 {-# INLINE runPut #-}
 ------------------------------------------------------------------------
 -- | Pop the ByteString we have constructed so far, if any, yielding a
 -- new chunk in the result ByteString.
 flush               :: Put
 flush               = tell B.flush
 {-# INLINE flush #-}
 -- | Efficiently write a byte into the output buffer
 putWord8            :: Word8 -> Put
 putWord8            = tell . B.singleton
 {-# INLINE putWord8 #-}
 -- | An efficient primitive to write a strict ByteString into the output buffer.
 -- It flushes the current buffer, and writes the argument into a new chunk.
 putByteString       :: S.ByteString -> Put
 putByteString       = tell . B.fromByteString
 {-# INLINE putByteString #-}
 -- | Write a lazy ByteString efficiently, simply appending the lazy
 -- ByteString chunks to the output buffer
 putLazyByteString   :: L.ByteString -> Put
 putLazyByteString   = tell . B.fromLazyByteString
 {-# INLINE putLazyByteString #-}
 -- | Write a Word16 in big endian format
 putWord16be         :: Word16 -> Put
 putWord16be         = tell . B.putWord16be
 {-# INLINE putWord16be #-}
 -- | Write a Word16 in little endian format
 putWord16le         :: Word16 -> Put
 putWord16le         = tell . B.putWord16le
 {-# INLINE putWord16le #-}
 -- | Write a Word32 in big endian format
 putWord32be         :: Word32 -> Put
 putWord32be         = tell . B.putWord32be
 {-# INLINE putWord32be #-}
 -- | Write a Word32 in little endian format
 putWord32le         :: Word32 -> Put
 putWord32le         = tell . B.putWord32le
 {-# INLINE putWord32le #-}
 -- | Write a Word64 in big endian format
 putWord64be         :: Word64 -> Put
 putWord64be         = tell . B.putWord64be
 {-# INLINE putWord64be #-}
 -- | Write a Word64 in little endian format
 putWord64le         :: Word64 -> Put
 putWord64le         = tell . B.putWord64le
 {-# INLINE putWord64le #-}
 ------------------------------------------------------------------------
 -- | /O(1)./ Write a single native machine word. The word is
 -- written in host order, host endian form, for the machine you're on.
 -- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
 -- 4 bytes. Values written this way are not portable to
 -- different endian or word sized machines, without conversion.
 --
 putWordhost         :: Word -> Put
 putWordhost         = tell . B.putWordhost
 {-# INLINE putWordhost #-}
 -- | /O(1)./ Write a Word16 in native host order and host endianness.
 -- For portability issues see @putWordhost@.
 putWord16host       :: Word16 -> Put
 putWord16host       = tell . B.putWord16host
 {-# INLINE putWord16host #-}
 -- | /O(1)./ Write a Word32 in native host order and host endianness.
 -- For portability issues see @putWordhost@.
 putWord32host       :: Word32 -> Put
 putWord32host       = tell . B.putWord32host
 {-# INLINE putWord32host #-}
 -- | /O(1)./ Write a Word64 in native host order
 -- On a 32 bit machine we write two host order Word32s, in big endian form.
 -- For portability issues see @putWordhost@.
 putWord64host       :: Word64 -> Put
 putWord64host       = tell . B.putWord64host
 {-# INLINE putWord64host #-}
--- a/src/GF/Compile/Export.hs
+++ b/src/GF/Compile/Export.hs
@@ -2,8 +2,6 @@ module GF.Compile.Export where
 import PGF.CId
 import PGF.Data (PGF(..))
 import PGF.Raw.Print (printTree)
 import PGF.Raw.Convert (fromPGF)
 import GF.Compile.GFCCtoHaskell
 import GF.Compile.GFCCtoProlog
 import GF.Compile.GFCCtoJS
@@ -32,7 +30,6 @@ exportPGF :: Options
          -> [(FilePath,String)] -- ^ List of recommended file names and contents.
 exportPGF opts fmt pgf = 
    case fmt of
      FmtPGF          -> multi "pgf" printPGF
      FmtPGFPretty    -> multi "txt" prPGFPretty
      FmtJavaScript   -> multi "js"  pgf2js
      FmtHaskell      -> multi "hs"  (grammar2haskell opts name)
@@ -65,7 +62,3 @@ outputConcr :: PGF -> CId
 outputConcr pgf = case cncnames pgf of
                    []     -> error "No concrete syntax."
                    cnc:_  -> cnc
 printPGF :: PGF -> String
 printPGF = encodeUTF8 . printTree . fromPGF
--- a/src/GF/Compile/GrammarToGFCC.hs
+++ b/src/GF/Compile/GrammarToGFCC.hs
@@ -1,5 +1,5 @@
 {-# LANGUAGE PatternGuards #-}
-module GF.Compile.GrammarToGFCC (prGrammar2gfcc,mkCanon2gfcc,addParsers) where
+module GF.Compile.GrammarToGFCC (mkCanon2gfcc,addParsers) where
 import GF.Compile.Export
 import GF.Compile.OptimizeGF (unshareModule)
@@ -37,11 +37,6 @@ traceD s t = t
 -- the main function: generate PGF from GF.
 prGrammar2gfcc :: Options -> String -> SourceGrammar -> (String,String)
 prGrammar2gfcc opts cnc gr = (abs,printPGF gc) where
  (abs,gc) = mkCanon2gfcc opts cnc gr 
 mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,D.PGF)
 mkCanon2gfcc opts cnc gr = 
  (prIdent abs, (canon2gfcc opts pars . reorder abs . canon2canon abs) gr)
--- a/src/GF/Infra/Option.hs
+++ b/src/GF/Infra/Option.hs
@@ -80,8 +80,7 @@ data Phase = Preproc | Convert | Compile | Link
 data Encoding = UTF_8 | ISO_8859_1 | CP_1251
  deriving (Eq,Ord)
-data OutputFormat = FmtPGF 
+data OutputFormat = FmtPGFPretty
                  | FmtPGFPretty
                  | FmtJavaScript 
                  | FmtHaskell 
                  | FmtProlog
@@ -239,7 +238,7 @@ defaultFlags = Flags {
      optShowCPUTime     = False,
      optEmitGFO         = True,
      optGFODir          = ".",
-      optOutputFormats   = [FmtPGF],
+      optOutputFormats   = [],
      optSISR            = Nothing,
      optHaskellOptions  = Set.empty,
      optLexicalCats     = Set.empty,
@@ -427,8 +426,7 @@ optDescr =
 outputFormats :: [(String,OutputFormat)]
 outputFormats = 
-    [("pgf",          FmtPGF),
+    [("pgf-pretty",   FmtPGFPretty),
     ("pgf-pretty",   FmtPGFPretty),
     ("js",           FmtJavaScript),
     ("haskell",      FmtHaskell),
     ("prolog",       FmtProlog),
--- a/src/GFC.hs
+++ b/src/GFC.hs
@@ -4,8 +4,6 @@ module GFC (mainGFC) where
 import PGF
 import PGF.CId
 import PGF.Data
 import PGF.Raw.Parse
 import PGF.Raw.Convert
 import GF.Compile
 import GF.Compile.Export
@@ -16,6 +14,7 @@ import GF.Infra.Option
 import GF.Data.ErrM
 import Data.Maybe
 import Data.Binary
 import System.FilePath
@@ -57,10 +56,17 @@ unionPGFFiles opts fs =
  where readPGFVerbose f = putPointE Normal opts ("Reading " ++ f ++ "...") $ ioeIO $ readPGF f
 writeOutputs :: Options -> PGF -> IOE ()
-writeOutputs opts pgf =
+writeOutputs opts pgf = do
-    sequence_ [writeOutput opts name str 
+  writePGF opts pgf
-                   | fmt <- flag optOutputFormats opts, 
+  sequence_ [writeOutput opts name str 
-                     (name,str) <- exportPGF opts fmt pgf]
+                 | fmt <- flag optOutputFormats opts, 
                   (name,str) <- exportPGF opts fmt pgf]
 writePGF :: Options -> PGF -> IOE ()
 writePGF opts pgf = do
  let name = fromMaybe (prCId (absname pgf)) (flag optName opts)
      outfile = name <.> "pgf"
  putPointE Normal opts ("Writing " ++ outfile ++ "...") $ ioeIO $ encodeFile outfile pgf
 writeOutput :: Options -> FilePath-> String -> IOE ()
 writeOutput opts file str =
--- a/src/PGF.hs
+++ b/src/PGF.hs
@@ -66,9 +66,7 @@ import PGF.TypeCheck
 import PGF.Paraphrase
 import PGF.Macros
 import PGF.Data
-import PGF.Raw.Convert
+import PGF.Binary
 import PGF.Raw.Parse
 import PGF.Raw.Print (printTree)
 import PGF.Parsing.FCFG
 import qualified PGF.Parsing.FCFG.Incremental as Incremental
 import qualified GF.Compile.GeneratePMCFG as PMCFG
@@ -80,6 +78,7 @@ import GF.Data.Utilities (replace)
 import Data.Char
 import qualified Data.Map as Map
 import Data.Maybe
 import Data.Binary
 import System.Random (newStdGen)
 import Control.Monad
@@ -210,9 +209,8 @@ readLanguage = readCId
 showLanguage = prCId
 readPGF f = do
-  s <- readFile f >>= return . decodeUTF8 -- pgf is in UTF8, internal in unicode
+  g <- decodeFile f
-  g <- parseGrammar s
+  return $! addParsers g
  return $! addParsers $ toPGF g
 -- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
 addParsers :: PGF -> PGF
--- a/src/PGF/Raw/Abstract.hs
+++ b/src/PGF/Raw/Abstract.hs
@@ -1,14 +0,0 @@
 module PGF.Raw.Abstract where
 data Grammar =
   Grm [RExp]
  deriving (Eq,Ord,Show)
 data RExp =
   App String [RExp]
 | AInt Integer
 | AStr String
 | AFlt Double
 | AMet
  deriving (Eq,Ord,Show)
--- a/src/PGF/Raw/Convert.hs
+++ b/src/PGF/Raw/Convert.hs
@@ -1,273 +0,0 @@
 module PGF.Raw.Convert (toPGF,fromPGF) where
 import PGF.CId
 import PGF.Data
 import PGF.Raw.Abstract
 import Data.Array.IArray
 import qualified Data.Map    as Map
 import qualified Data.Set    as Set
 import qualified Data.IntMap as IntMap
 pgfMajorVersion, pgfMinorVersion :: Integer
 (pgfMajorVersion, pgfMinorVersion) = (1,0)
 -- convert parsed grammar to internal PGF
 toPGF :: Grammar -> PGF
 toPGF (Grm [
  App "pgf" (AInt v1 : AInt v2 : App a []:cs),
  App "flags"     gfs, 
  ab@(
    App "abstract" [
      App "fun"   fs,
      App "cat"   cts
      ]), 
  App "concrete" ccs
  ]) = let pgf = PGF {
    absname = mkCId a,
    cncnames = [mkCId c | App c [] <- cs],
    gflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- gfs],
    abstract = 
     let
      aflags  = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- gfs]
      lfuns   = [(mkCId f,(toType typ,toExp def)) | App f [typ, def] <- fs]
      funs    = Map.fromAscList lfuns
      lcats   = [(mkCId c, Prelude.map toHypo hyps) | App c hyps <- cts]
      cats    = Map.fromAscList lcats
      catfuns = Map.fromAscList 
        [(cat,[f | (f, (DTyp _ c _,_)) <- lfuns, c==cat]) | (cat,_) <- lcats]
     in Abstr aflags funs cats catfuns,
    concretes = Map.fromAscList [(mkCId lang, toConcr pgf ts) | App lang ts <- ccs]
    }
    in pgf
 where
 toConcr :: PGF -> [RExp] -> Concr
 toConcr pgf rexp = 
  let cnc = foldl add (Concr {cflags       = Map.empty,
                              lins         = Map.empty,
                              opers        = Map.empty,
                              lincats      = Map.empty,
                              lindefs      = Map.empty,
                              printnames   = Map.empty,
                              paramlincats = Map.empty,
                              parser       = Nothing
                             }) rexp
  in cnc
  where
    add :: Concr -> RExp -> Concr
    add cnc (App "flags" ts)     = cnc { cflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- ts] }
    add cnc (App "lin" ts)       = cnc { lins = mkTermMap ts }
    add cnc (App "oper" ts)      = cnc { opers = mkTermMap ts }
    add cnc (App "lincat" ts)    = cnc { lincats = mkTermMap ts }
    add cnc (App "lindef" ts)    = cnc { lindefs = mkTermMap ts }
    add cnc (App "printname" ts) = cnc { printnames = mkTermMap ts }
    add cnc (App "param" ts)     = cnc { paramlincats = mkTermMap ts }
    add cnc (App "parser" ts)    = cnc { parser = Just (toPInfo ts) }
 toPInfo :: [RExp] -> ParserInfo
 toPInfo [App "functions" fs, App "sequences" ss, App "productions" ps,App "categories" (t:cs)] =
  ParserInfo { functions   = functions
             , sequences   = seqs
 	     , productions = productions
 	     , startCats   = cats
 	     , totalCats   = expToInt t
 	     }
  where 
    functions   = mkArray (map toFFun fs)
    seqs        = mkArray (map toFSeq ss)
    productions = IntMap.fromList (map toProductionSet ps)
    cats        = Map.fromList [(mkCId c, (map expToInt xs)) | App c xs <- cs]
    toFFun :: RExp -> FFun
    toFFun (App f [App "P" ts,App "R" ls]) = FFun fun prof lins
      where
        fun  = mkCId f
        prof = map toProfile ts
        lins = mkArray [fromIntegral seqid | AInt seqid <- ls]
    toProfile :: RExp -> Profile
    toProfile AMet           = []
    toProfile (App "_A" [t]) = [expToInt t]
    toProfile (App "_U" ts)  = [expToInt t | App "_A" [t] <- ts]
    toFSeq :: RExp -> FSeq
    toFSeq (App "seq" ss) = mkArray [toSymbol s | s <- ss]
    toProductionSet :: RExp -> (FCat,Set.Set Production)
    toProductionSet (App "td" (rt : xs)) = (expToInt rt, Set.fromList (map toProduction xs))
      where 
        toProduction (App "A" (ruleid : at)) = FApply (expToInt ruleid) (map expToInt at)
        toProduction (App "C" [fcat])        = FCoerce (expToInt fcat)
 toSymbol :: RExp -> FSymbol
 toSymbol (App "P"  [n,l]) = FSymCat (expToInt n) (expToInt l)
 toSymbol (App "PL" [n,l]) = FSymLit (expToInt n) (expToInt l)
 toSymbol (App "KP" (d:alts)) = FSymTok (toKP d alts)
 toSymbol (AStr t) = FSymTok (KS t)
 toType :: RExp -> Type
 toType e = case e of
  App cat [App "H" hypos, App "X" exps] -> 
    DTyp (map toHypo hypos) (mkCId cat) (map toExp exps) 
  _ -> error $ "type " ++ show e
 toHypo :: RExp -> Hypo
 toHypo e = case e of
  App x [typ] -> Hyp (mkCId x) (toType typ)
  _ -> error $ "hypo " ++ show e
 toExp :: RExp -> Expr
 toExp e = case e of
  App "Abs" [App x [], exp] -> EAbs (mkCId x) (toExp exp)
  App "App" [e1,e2] -> EApp (toExp e1) (toExp e2)
  App "Eq" eqs -> EEq [Equ (map toExp ps) (toExp v) | App "E" (v:ps) <- eqs]
  App "Var" [App i []] -> EVar (mkCId i)
  AMet   -> EMeta  0
  AInt i -> ELit (LInt i)
  AFlt i -> ELit (LFlt i)
  AStr i -> ELit (LStr i)
  _ -> error $ "exp " ++ show e
 toTerm :: RExp -> Term
 toTerm e = case e of
  App "R" es    -> R  (map toTerm es)
  App "S" es    -> S  (map toTerm es)
  App "FV" es   -> FV (map toTerm es)
  App "P" [e,v] -> P  (toTerm e) (toTerm v)
  App "W" [AStr s,v] -> W s (toTerm v)
  App "A" [AInt i] -> V (fromInteger i)
  App f []  -> F (mkCId f)
  AInt i -> C (fromInteger i)
  AMet   -> TM "?"
  App "KP" (d:alts) -> K (toKP d alts)
  AStr s -> K (KS s)
  _ -> error $ "term " ++ show e
 toKP d alts = KP (toStr d) (map toAlt alts)
  where
    toStr (App "S" vs) = [v | AStr v <- vs]
    toAlt (App "A" [x,y]) = Alt (toStr x) (toStr y)
 ------------------------------
 --- from internal to parser --
 ------------------------------
 fromPGF :: PGF -> Grammar
 fromPGF pgf = Grm [
  App "pgf" (AInt pgfMajorVersion:AInt pgfMinorVersion
             : App (prCId (absname pgf)) [] : map (flip App [] . prCId) (cncnames pgf)), 
  App "flags" [App (prCId f) [AStr v] | (f,v) <- Map.toList (gflags pgf `Map.union` aflags apgf)],
  App "abstract" [
    App "fun"   [App (prCId f) [fromType t,fromExp d] | (f,(t,d)) <- Map.toList (funs apgf)],
    App "cat"   [App (prCId f) (map fromHypo hs) | (f,hs) <- Map.toList (cats apgf)]
    ],
  App "concrete" [App (prCId lang) (fromConcrete c) | (lang,c) <- Map.toList (concretes pgf)]
  ]
 where
  apgf = abstract pgf
  fromConcrete cnc = [
      App "flags"     [App (prCId f) [AStr v]     | (f,v) <- Map.toList (cflags cnc)],
      App "lin"       [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lins cnc)],
      App "oper"      [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (opers cnc)],
      App "lincat"    [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lincats cnc)],
      App "lindef"    [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lindefs cnc)],
      App "printname" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (printnames cnc)],
      App "param"     [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (paramlincats cnc)]
     ] ++ maybe [] (\p -> [fromPInfo p]) (parser cnc)
 fromType :: Type -> RExp
 fromType e = case e of
  DTyp hypos cat exps -> 
    App (prCId cat) [
      App "H" (map fromHypo hypos), 
      App "X" (map fromExp exps)] 
 fromHypo :: Hypo -> RExp
 fromHypo e = case e of
  Hyp x typ -> App (prCId x) [fromType typ]
 fromExp :: Expr -> RExp
 fromExp e = case e of
  EAbs x exp   -> App "Abs" [App (prCId x) [], fromExp exp]
  EApp e1 e2 -> App "App" [fromExp e1, fromExp e2]
  EVar   x -> App "Var" [App (prCId x) []]
  ELit (LStr s) -> AStr s
  ELit (LFlt d) -> AFlt d
  ELit (LInt i) -> AInt (toInteger i)
  EMeta _  -> AMet ----
  EEq eqs  -> App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs]
 fromTerm :: Term -> RExp
 fromTerm e = case e of
  R es    -> App "R" (map fromTerm es)
  S es    -> App "S" (map fromTerm es)
  FV es   -> App "FV" (map fromTerm es)
  P e v   -> App "P"  [fromTerm e, fromTerm v]
  W s v   -> App "W" [AStr s, fromTerm v]
  C i     -> AInt (toInteger i)
  TM _    -> AMet
  F f     -> App (prCId f) []
  V i     -> App "A" [AInt (toInteger i)]
  K t     -> fromTokn t
 fromTokn :: Tokn -> RExp
 fromTokn (KS s)    = AStr s
 fromTokn (KP d vs) = App "KP" (str d : [App "A" [str v, str x] | Alt v x <- vs])
 where
   str v = App "S" (map AStr v)
 -- ** Parsing info
 fromPInfo :: ParserInfo -> RExp
 fromPInfo p = App "parser" [
          App "functions"   [fromFFun fun | fun <- elems (functions p)],
          App "sequences"   [fromFSeq seq | seq <- elems (sequences p)],
          App "productions" [fromProductionSet xs  | xs <- IntMap.toList (productions p)],
          App "categories"  (intToExp (totalCats p) : [App (prCId f) (map intToExp xs) | (f,xs) <- Map.toList (startCats p)])
        ]
 fromFFun :: FFun -> RExp
 fromFFun (FFun fun prof lins) = App (prCId fun) [App "P" (map fromProfile prof), App "R" [intToExp seqid | seqid <- elems lins]]
  where
    fromProfile :: Profile -> RExp
    fromProfile []   = AMet
    fromProfile [x]  = daughter x
    fromProfile args = App "_U" (map daughter args)
    daughter n = App "_A" [intToExp n]
 fromSymbol :: FSymbol -> RExp
 fromSymbol (FSymCat n l) = App "P"  [intToExp n, intToExp l]
 fromSymbol (FSymLit n l) = App "PL" [intToExp n, intToExp l]
 fromSymbol (FSymTok t)   = fromTokn t
 fromFSeq :: FSeq -> RExp
 fromFSeq seq = App "seq" [fromSymbol s | s <- elems seq]
 fromProductionSet :: (FCat,Set.Set Production) -> RExp
 fromProductionSet (cat,xs) = App "td" (intToExp cat : map fromPassive (Set.toList xs))
  where
    fromPassive (FApply ruleid args) = App "A" (intToExp ruleid : map intToExp args)
    fromPassive (FCoerce fcat)       = App "C" [intToExp fcat]
 -- ** Utilities
 mkTermMap :: [RExp] -> Map.Map CId Term
 mkTermMap ts = Map.fromAscList [(mkCId f,toTerm v) | App f [v] <- ts]
 mkArray :: IArray a e => [e] -> a Int e
 mkArray xs = listArray (0, length xs - 1) xs
 expToInt :: Integral a => RExp -> a
 expToInt (App "neg" [AInt i]) = fromIntegral (negate i)
 expToInt (AInt i) = fromIntegral i
 expToStr :: RExp -> String
 expToStr (AStr s) = s
 intToExp :: Integral a => a -> RExp
 intToExp x | x < 0 = App "neg" [AInt (fromIntegral (negate x))]
           | otherwise =  AInt (fromIntegral x)
--- a/src/PGF/Raw/Parse.hs
+++ b/src/PGF/Raw/Parse.hs
@@ -1,101 +0,0 @@
 module PGF.Raw.Parse (parseGrammar) where
 import PGF.CId
 import PGF.Raw.Abstract
 import Control.Monad
 import Data.Char
 import qualified Data.ByteString.Char8 as BS
 parseGrammar :: String -> IO Grammar
 parseGrammar s = case runP pGrammar s of
                   Just (x,"") -> return x
                   _           -> fail "Parse error"
 pGrammar :: P Grammar
 pGrammar = liftM Grm pTerms
 pTerms :: P [RExp]
 pTerms = liftM2 (:) (pTerm 1) pTerms <++ (skipSpaces >> return [])
 pTerm :: Int -> P RExp
 pTerm n = skipSpaces >> (pParen <++ pApp <++ pNum <++ pStr <++ pMeta)
  where pParen = between (char '(') (char ')') (pTerm 0)
        pApp = liftM2 App pIdent (if n == 0 then pTerms else return [])
        pStr = char '"' >> liftM AStr (manyTill (pEsc <++ get) (char '"'))
        pEsc = char '\\' >> get
        pNum = do x <- munch1 isDigit
                  ((char '.' >> munch1 isDigit >>= \y -> return (AFlt (read (x++"."++y))))
                   <++
                   return (AInt (read x)))
        pMeta = char '?' >> return AMet
        pIdent = liftM2 (:) (satisfy isIdentFirst) (munch isIdentRest)
        isIdentFirst c = c == '_' || isAlpha c
        isIdentRest c = c == '_' || c == '\'' || isAlphaNum c
 -- Parser combinators with only left-biased choice
 newtype P a = P { runP :: String -> Maybe (a,String) }
 instance Monad P where
    return x = P (\ts -> Just (x,ts))
    P p >>= f = P (\ts -> p ts >>= \ (x,ts') -> runP (f x) ts')
    fail _ = pfail
 instance MonadPlus P where
    mzero = pfail
    mplus = (<++)
 get :: P Char
 get = P (\ts -> case ts of
                  [] -> Nothing
                  c:cs -> Just (c,cs))
 look :: P String
 look = P (\ts -> Just (ts,ts))
 (<++) :: P a -> P a -> P a
 P p <++ P q = P (\ts -> p ts `mplus` q ts)
 pfail :: P a
 pfail = P (\ts -> Nothing)
 satisfy :: (Char -> Bool) -> P Char
 satisfy p = do c <- get
               if p c then return c else pfail
 char :: Char -> P Char
 char c = satisfy (c==)
 string :: String -> P String
 string this = look >>= scan this
    where
      scan []     _               = return this
      scan (x:xs) (y:ys) | x == y = get >> scan xs ys
      scan _      _               = pfail
 skipSpaces :: P ()
 skipSpaces = look >>= skip
    where
      skip (c:s) | isSpace c = get >> skip s
      skip _                 = return ()
 manyTill :: P a -> P end -> P [a]
 manyTill p end = scan
  where scan = (end >> return []) <++ liftM2 (:) p scan
 munch :: (Char -> Bool) -> P String
 munch p = munch1 p <++ return []
 munch1 :: (Char -> Bool) -> P String
 munch1 p = liftM2 (:) (satisfy p) (munch p)
 choice :: [P a] -> P a
 choice = msum
 between :: P open -> P close -> P a -> P a
 between open close p = do open
                          x <- p
                          close
                          return x
--- a/src/PGF/Raw/Print.hs
+++ b/src/PGF/Raw/Print.hs
@@ -1,35 +0,0 @@
 module PGF.Raw.Print (printTree) where
 import PGF.CId
 import PGF.Raw.Abstract
 import Data.List (intersperse)
 import Numeric (showFFloat)
 import qualified Data.ByteString.Char8 as BS
 printTree :: Grammar -> String
 printTree g = prGrammar g ""
 prGrammar :: Grammar -> ShowS
 prGrammar (Grm xs) = prRExpList xs
 prRExp :: Int -> RExp -> ShowS
 prRExp _ (App x []) = showString x
 prRExp n (App x xs) = p (showString x . showChar ' ' . prRExpList xs)
    where p s = if n == 0 then s else showChar '(' . s . showChar ')'
 prRExp _ (AInt x) = shows x
 prRExp _ (AStr x) = showChar '"' . concatS (map mkEsc x) . showChar '"'
 prRExp _ (AFlt x) = showFFloat Nothing x
 prRExp _ AMet = showChar '?'
 mkEsc :: Char -> ShowS
 mkEsc s = case s of
  '"'  -> showString "\\\""
  '\\' -> showString "\\\\"
  _    -> showChar s
 prRExpList :: [RExp] -> ShowS
 prRExpList = concatS . intersperse (showChar ' ') . map (prRExp 1)
 concatS :: [ShowS] -> ShowS
 concatS = foldr (.) id