mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-29 06:22:51 -06:00
403 lines
12 KiB
Plaintext
403 lines
12 KiB
Plaintext
% Copyright (C) 2009 John Millikin <jmillikin@gmail.com>
|
|
%
|
|
% This program is free software: you can redistribute it and/or modify
|
|
% it under the terms of the GNU General Public License as published by
|
|
% the Free Software Foundation, either version 3 of the License, or
|
|
% any later version.
|
|
%
|
|
% This program is distributed in the hope that it will be useful,
|
|
% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
% GNU General Public License for more details.
|
|
%
|
|
% You should have received a copy of the GNU General Public License
|
|
% along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
\ignore{
|
|
\begin{code}
|
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
|
module Data.Binary.IEEE754 (
|
|
-- * Parsing
|
|
getFloat16be, getFloat16le
|
|
, getFloat32be, getFloat32le
|
|
, getFloat64be, getFloat64le
|
|
|
|
-- * Serializing
|
|
, putFloat32be, putFloat32le
|
|
, putFloat64be, putFloat64le
|
|
) where
|
|
|
|
import Data.Bits ((.&.), (.|.), shiftL, shiftR, Bits)
|
|
import Data.Word (Word8)
|
|
import Data.List (foldl')
|
|
|
|
import qualified Data.ByteString as B
|
|
import Data.Binary.Get (Get, getByteString)
|
|
import Data.Binary.Put (Put, putByteString)
|
|
\end{code}
|
|
}
|
|
|
|
\section{Parsing}
|
|
|
|
\subsection{Public interface}
|
|
|
|
\begin{code}
|
|
getFloat16be :: Get Float
|
|
getFloat16be = getFloat (ByteCount 2) splitBytes
|
|
\end{code}
|
|
|
|
\begin{code}
|
|
getFloat16le :: Get Float
|
|
getFloat16le = getFloat (ByteCount 2) $ splitBytes . reverse
|
|
\end{code}
|
|
|
|
\begin{code}
|
|
getFloat32be :: Get Float
|
|
getFloat32be = getFloat (ByteCount 4) splitBytes
|
|
\end{code}
|
|
|
|
\begin{code}
|
|
getFloat32le :: Get Float
|
|
getFloat32le = getFloat (ByteCount 4) $ splitBytes . reverse
|
|
\end{code}
|
|
|
|
\begin{code}
|
|
getFloat64be :: Get Double
|
|
getFloat64be = getFloat (ByteCount 8) splitBytes
|
|
\end{code}
|
|
|
|
\begin{code}
|
|
getFloat64le :: Get Double
|
|
getFloat64le = getFloat (ByteCount 8) $ splitBytes . reverse
|
|
\end{code}
|
|
|
|
\subsection{Implementation}
|
|
|
|
Split the raw byte array into (sign, exponent, significand) components.
|
|
The exponent and signifcand are drawn directly from the bits in the
|
|
original float, and have not been unbiased or otherwise modified.
|
|
|
|
\begin{code}
|
|
splitBytes :: [Word8] -> RawFloat
|
|
splitBytes bs = RawFloat width sign exp' sig expWidth sigWidth where
|
|
width = ByteCount (length bs)
|
|
nBits = bitsInWord8 bs
|
|
sign = if head bs .&. 0x80 == 0x80
|
|
then Negative
|
|
else Positive
|
|
|
|
expStart = 1
|
|
expWidth = exponentWidth nBits
|
|
expEnd = expStart + expWidth
|
|
exp' = Exponent . fromIntegral $ bitSlice bs expStart expEnd
|
|
|
|
sigWidth = nBits - expEnd
|
|
sig = Significand $ bitSlice bs expEnd nBits
|
|
\end{code}
|
|
|
|
\subsubsection{Encodings and special values}
|
|
|
|
The next step depends on the value of the exponent $e$, size of the
|
|
exponent field in bits $w$, and value of the significand.
|
|
|
|
\begin{table}[h]
|
|
\begin{center}
|
|
\begin{tabular}{lrl}
|
|
\toprule
|
|
Exponent & Significand & Format \\
|
|
\midrule
|
|
$0$ & $0$ & Zero \\
|
|
$0$ & $> 0$ & Denormalised \\
|
|
$1 \leq e \leq 2^w - 2$ & \textit{any} & Normalised \\
|
|
$2^w-1$ & $0$ & Infinity \\
|
|
$2^w-1$ & $> 0$ & NaN \\
|
|
\bottomrule
|
|
\end{tabular}
|
|
\end{center}
|
|
\end{table}
|
|
|
|
There's no built-in literals for Infinity or NaN, so they
|
|
are constructed using the {\tt Read} instances for {\tt Double} and
|
|
{\tt Float}.
|
|
|
|
\begin{code}
|
|
merge :: (Read a, RealFloat a) => RawFloat -> a
|
|
merge f@(RawFloat _ _ e sig eWidth _)
|
|
| e == 0 = if sig == 0
|
|
then 0.0
|
|
else denormalised f
|
|
| e == eMax - 1 = if sig == 0
|
|
then read "Infinity"
|
|
else read "NaN"
|
|
| otherwise = normalised f
|
|
where eMax = 2 `pow` eWidth
|
|
\end{code}
|
|
|
|
If a value is normalised, its significand has an implied {\tt 1} bit
|
|
in its most-significant bit. The significand must be adjusted by
|
|
this value before being passed to {\tt encodeField}.
|
|
|
|
\begin{code}
|
|
normalised :: RealFloat a => RawFloat -> a
|
|
normalised f = encodeFloat fraction exp' where
|
|
Significand sig = rawSignificand f
|
|
Exponent exp' = unbiased - sigWidth
|
|
|
|
fraction = sig + (1 `bitShiftL` rawSignificandWidth f)
|
|
|
|
sigWidth = fromIntegral $ rawSignificandWidth f
|
|
unbiased = unbias (rawExponent f) (rawExponentWidth f)
|
|
\end{code}
|
|
|
|
For denormalised values, the implied {\tt 1} bit is the least-significant
|
|
bit of the exponent.
|
|
|
|
\begin{code}
|
|
denormalised :: RealFloat a => RawFloat -> a
|
|
denormalised f = encodeFloat sig exp' where
|
|
Significand sig = rawSignificand f
|
|
Exponent exp' = unbiased - sigWidth + 1
|
|
|
|
sigWidth = fromIntegral $ rawSignificandWidth f
|
|
unbiased = unbias (rawExponent f) (rawExponentWidth f)
|
|
\end{code}
|
|
|
|
By composing {\tt splitBytes} and {\tt merge}, the absolute value of the
|
|
float is calculated. Before being returned to the calling function, it
|
|
must be signed appropriately.
|
|
|
|
\begin{code}
|
|
getFloat :: (Read a, RealFloat a) => ByteCount
|
|
-> ([Word8] -> RawFloat) -> Get a
|
|
getFloat (ByteCount width) parser = do
|
|
raw <- fmap (parser . B.unpack) $ getByteString width
|
|
let absFloat = merge raw
|
|
return $ case rawSign raw of
|
|
Positive -> absFloat
|
|
Negative -> -absFloat
|
|
\end{code}
|
|
|
|
\section{Serialising}
|
|
|
|
\subsection{Public interface}
|
|
|
|
\begin{code}
|
|
putFloat32be :: Float -> Put
|
|
putFloat32be = putFloat (ByteCount 4) id
|
|
\end{code}
|
|
|
|
\begin{code}
|
|
putFloat32le :: Float -> Put
|
|
putFloat32le = putFloat (ByteCount 4) reverse
|
|
\end{code}
|
|
|
|
\begin{code}
|
|
putFloat64be :: Double -> Put
|
|
putFloat64be = putFloat (ByteCount 8) id
|
|
\end{code}
|
|
|
|
\begin{code}
|
|
putFloat64le :: Double -> Put
|
|
putFloat64le = putFloat (ByteCount 8) reverse
|
|
\end{code}
|
|
|
|
\subsection{Implementation}
|
|
|
|
Serialisation is similar to parsing. First, the float is converted to
|
|
a structure representing raw bitfields. The values returned from
|
|
{\tt decodeFloat} are clamped to their expected lengths before being
|
|
stored in the {\tt RawFloat}.
|
|
|
|
\begin{code}
|
|
splitFloat :: RealFloat a => ByteCount -> a -> RawFloat
|
|
splitFloat width x = raw where
|
|
raw = RawFloat width sign clampedExp clampedSig expWidth sigWidth
|
|
sign = if isNegativeNaN x || isNegativeZero x || x < 0
|
|
then Negative
|
|
else Positive
|
|
clampedExp = clamp expWidth exp'
|
|
clampedSig = clamp sigWidth sig
|
|
(exp', sig) = case (dFraction, dExponent, biasedExp) of
|
|
(0, 0, _) -> (0, 0)
|
|
(_, _, 0) -> (0, Significand $ truncatedSig + 1)
|
|
_ -> (biasedExp, Significand truncatedSig)
|
|
expWidth = exponentWidth $ bitCount width
|
|
sigWidth = bitCount width - expWidth - 1 -- 1 for sign bit
|
|
|
|
(dFraction, dExponent) = decodeFloat x
|
|
|
|
rawExp = Exponent $ dExponent + fromIntegral sigWidth
|
|
biasedExp = bias rawExp expWidth
|
|
truncatedSig = abs dFraction - (1 `bitShiftL` sigWidth)
|
|
\end{code}
|
|
|
|
Then, the {\tt RawFloat} is converted to a list of bytes by mashing all
|
|
the fields together into an {\tt Integer}, and chopping up that integer
|
|
in 8-bit blocks.
|
|
|
|
\begin{code}
|
|
rawToBytes :: RawFloat -> [Word8]
|
|
rawToBytes raw = integerToBytes mashed width where
|
|
RawFloat width sign exp' sig expWidth sigWidth = raw
|
|
sign' :: Word8
|
|
sign' = case sign of
|
|
Positive -> 0
|
|
Negative -> 1
|
|
mashed = mashBits sig sigWidth .
|
|
mashBits exp' expWidth .
|
|
mashBits sign' 1 $ 0
|
|
\end{code}
|
|
|
|
{\tt clamp}, given a maximum bit count and a value, will strip any 1-bits
|
|
in positions above the count.
|
|
|
|
\begin{code}
|
|
clamp :: (Num a, Bits a) => BitCount -> a -> a
|
|
clamp = (.&.) . mask where
|
|
mask 1 = 1
|
|
mask n | n > 1 = (mask (n - 1) `shiftL` 1) + 1
|
|
mask _ = undefined
|
|
\end{code}
|
|
|
|
For merging the fields, just shift the starting integer over a bit and
|
|
then \textsc{or} it with the next value. The weird parameter order allows
|
|
easy composition.
|
|
|
|
\begin{code}
|
|
mashBits :: (Bits a, Integral a) => a -> BitCount -> Integer -> Integer
|
|
mashBits _ 0 x = x
|
|
mashBits y n x = (x `bitShiftL` n) .|. fromIntegral y
|
|
\end{code}
|
|
|
|
Given an integer, read it in 255-block increments starting from the LSB.
|
|
Each increment is converted to a byte and added to the final list.
|
|
|
|
\begin{code}
|
|
integerToBytes :: Integer -> ByteCount -> [Word8]
|
|
integerToBytes _ 0 = []
|
|
integerToBytes x n = bytes where
|
|
bytes = integerToBytes (x `shiftR` 8) (n - 1) ++ [step]
|
|
step = fromIntegral x .&. 0xFF
|
|
\end{code}
|
|
|
|
Finally, the raw parsing is wrapped up in {\tt Put}. The second parameter
|
|
allows the same code paths to be used for little- and big-endian
|
|
serialisation.
|
|
|
|
\begin{code}
|
|
putFloat :: (RealFloat a) => ByteCount -> ([Word8] -> [Word8]) -> a -> Put
|
|
putFloat width f x = putByteString $ B.pack bytes where
|
|
bytes = f . rawToBytes . splitFloat width $ x
|
|
\end{code}
|
|
|
|
\section{Raw float components}
|
|
|
|
Information about the raw bit patterns in the float is stored in
|
|
{\tt RawFloat}, so they don't have to be passed around to the various
|
|
format cases. The exponent should be biased, and the significand
|
|
shouldn't have it's implied MSB (if applicable).
|
|
|
|
\begin{code}
|
|
data RawFloat = RawFloat
|
|
{ rawWidth :: ByteCount
|
|
, rawSign :: Sign
|
|
, rawExponent :: Exponent
|
|
, rawSignificand :: Significand
|
|
, rawExponentWidth :: BitCount
|
|
, rawSignificandWidth :: BitCount
|
|
}
|
|
deriving (Show)
|
|
\end{code}
|
|
|
|
\section{Exponents}
|
|
|
|
Calculate the proper size of the exponent field, in bits, given the
|
|
size of the full structure.
|
|
|
|
\begin{code}
|
|
exponentWidth :: BitCount -> BitCount
|
|
exponentWidth k
|
|
| k == 16 = 5
|
|
| k == 32 = 8
|
|
| k `mod` 32 == 0 = ceiling (4 * logBase 2 (fromIntegral k)) - 13
|
|
| otherwise = error "Invalid length of floating-point value"
|
|
\end{code}
|
|
|
|
\begin{code}
|
|
bias :: Exponent -> BitCount -> Exponent
|
|
bias e eWidth = e - (1 - (2 `pow` (eWidth - 1)))
|
|
\end{code}
|
|
|
|
\begin{code}
|
|
unbias :: Exponent -> BitCount -> Exponent
|
|
unbias e eWidth = e + 1 - (2 `pow` (eWidth - 1))
|
|
\end{code}
|
|
|
|
\section{Byte and bit counting}
|
|
|
|
\begin{code}
|
|
data Sign = Positive | Negative
|
|
deriving (Show)
|
|
|
|
newtype Exponent = Exponent Int
|
|
deriving (Show, Eq, Num, Ord, Real, Enum, Integral, Bits)
|
|
|
|
newtype Significand = Significand Integer
|
|
deriving (Show, Eq, Num, Ord, Real, Enum, Integral, Bits)
|
|
|
|
newtype BitCount = BitCount Int
|
|
deriving (Show, Eq, Num, Ord, Real, Enum, Integral)
|
|
|
|
newtype ByteCount = ByteCount Int
|
|
deriving (Show, Eq, Num, Ord, Real, Enum, Integral)
|
|
|
|
bitCount :: ByteCount -> BitCount
|
|
bitCount (ByteCount x) = BitCount (x * 8)
|
|
|
|
bitsInWord8 :: [Word8] -> BitCount
|
|
bitsInWord8 = bitCount . ByteCount . length
|
|
|
|
bitShiftL :: (Bits a) => a -> BitCount -> a
|
|
bitShiftL x (BitCount n) = shiftL x n
|
|
|
|
bitShiftR :: (Bits a) => a -> BitCount -> a
|
|
bitShiftR x (BitCount n) = shiftR x n
|
|
\end{code}
|
|
|
|
\section{Utility}
|
|
|
|
Considering a byte list as a sequence of bits, slice it from start
|
|
inclusive to end exclusive, and return the resulting bit sequence as an
|
|
integer.
|
|
|
|
\begin{code}
|
|
bitSlice :: [Word8] -> BitCount -> BitCount -> Integer
|
|
bitSlice bs = sliceInt (foldl' step 0 bs) bitCount' where
|
|
step acc w = shiftL acc 8 + fromIntegral w
|
|
bitCount' = bitsInWord8 bs
|
|
\end{code}
|
|
|
|
Slice a single integer by start and end bit location
|
|
|
|
\begin{code}
|
|
sliceInt :: Integer -> BitCount -> BitCount -> BitCount -> Integer
|
|
sliceInt x xBitCount s e = fromIntegral sliced where
|
|
sliced = (x .&. startMask) `bitShiftR` (xBitCount - e)
|
|
startMask = n1Bits (xBitCount - s)
|
|
n1Bits n = (2 `pow` n) - 1
|
|
\end{code}
|
|
|
|
Integral version of {\tt (**)}
|
|
|
|
\begin{code}
|
|
pow :: (Integral a, Integral b, Integral c) => a -> b -> c
|
|
pow b e = floor $ fromIntegral b ** fromIntegral e
|
|
\end{code}
|
|
|
|
Detect whether a float is {\tt $-$NaN}
|
|
|
|
\begin{code}
|
|
isNegativeNaN :: RealFloat a => a -> Bool
|
|
isNegativeNaN x = isNaN x && (floor x > 0)
|
|
\end{code}
|