|
| 1 | +{-# LANGUAGE BangPatterns #-} |
| 2 | +{-# LANGUAGE CPP #-} |
| 3 | +{-# LANGUAGE DataKinds #-} |
| 4 | +{-# LANGUAGE DeriveGeneric #-} |
| 5 | +{-# LANGUAGE DerivingVia #-} |
| 6 | +{-# LANGUAGE GeneralizedNewtypeDeriving #-} |
| 7 | +{-# LANGUAGE ScopedTypeVariables #-} |
| 8 | +{-# LANGUAGE LambdaCase #-} |
| 9 | +{-# LANGUAGE TypeApplications #-} |
| 10 | + |
| 11 | +-- | A measure for practical byte sizes. |
| 12 | +-- |
| 13 | +-- 'ByteSize' is for summation, which might overflow. The other types are for |
| 14 | +-- storing, serializing, comparing, etc the results of calculations that did |
| 15 | +-- not overflow. |
| 16 | +-- |
| 17 | +-- Import this module qualified. |
| 18 | +-- |
| 19 | +-- > import Data.Measure.ByteSize (ByteSize) |
| 20 | +-- > import qualified Data.Measure.ByteSize as ByteSize |
| 21 | +module Data.Measure.ByteSize ( |
| 22 | + ByteSize, |
| 23 | + -- * Observers |
| 24 | + compare, |
| 25 | + isOverflowed, |
| 26 | + -- * Safe result types |
| 27 | + ByteSize8 (ByteSize8, unByteSize8), |
| 28 | + ByteSize16 (ByteSize16, unByteSize16), |
| 29 | + ByteSize32 (ByteSize32, unByteSize32), |
| 30 | + ByteSize64 (ByteSize64, unByteSize64), |
| 31 | + -- * Conversions |
| 32 | + ByteSizeFrom, |
| 33 | + ByteSizePartialFrom, |
| 34 | + ByteSizeTo, |
| 35 | + from, |
| 36 | + partialFrom, |
| 37 | + partialFromDefault, |
| 38 | + to, |
| 39 | + -- * Unstable and unsafe |
| 40 | + unsafeCoercionWord64, |
| 41 | + ) where |
| 42 | + |
| 43 | +import Cardano.Binary (FromCBOR, ToCBOR) |
| 44 | +import Control.DeepSeq (NFData) |
| 45 | +import Data.Coerce (coerce) |
| 46 | +import Data.DerivingVia (InstantiatedAt (InstantiatedAt)) |
| 47 | +import Data.Int (Int8, Int16, Int32, Int64) |
| 48 | +import Data.Maybe.Strict (StrictMaybe (..)) |
| 49 | +import Data.Measure (BoundedMeasure, Measure) |
| 50 | +import qualified Data.Measure as Measure |
| 51 | +import Data.Type.Coercion (Coercion (Coercion)) |
| 52 | +import Data.Word (Word8, Word16, Word32, Word64) |
| 53 | +import GHC.Generics (Generic) |
| 54 | +#if __GLASGOW_HASKELL__ < 900 |
| 55 | +-- Use the GHC version here because this is compiler dependent, and only indirectly lib dependent. |
| 56 | +import GHC.Natural (Natural) |
| 57 | +#endif |
| 58 | +import NoThunks.Class (NoThunks, |
| 59 | + OnlyCheckWhnfNamed (OnlyCheckWhnfNamed)) |
| 60 | +import Prelude hiding (compare, fromInteger, toInteger) |
| 61 | +import qualified Prelude |
| 62 | +import Quiet (Quiet (Quiet)) |
| 63 | + |
| 64 | +-- | A measure of byte size |
| 65 | +-- |
| 66 | +-- INVARIANT @ 0 <= x <= 2^64 - 2 @ |
| 67 | +-- |
| 68 | +-- Note well that the uppermost value is reserved for representing overflow: |
| 69 | +-- there's a minus 2 in the invariant instead of the usual minus 1. |
| 70 | +-- |
| 71 | +-- This type supports exactly one binary operator, checked addition, via |
| 72 | +-- 'Semigroup'. (Recall that 'Data.Semigroup.stimes' automatically derives |
| 73 | +-- non-negative integer scaling from 'Semigroup'.) |
| 74 | +-- |
| 75 | +-- Given that the estimated total global data storage in 2024 is merely 5000 |
| 76 | +-- times the maximum representable value of this type, this representation |
| 77 | +-- should suffice for the chain's actual needs for a long time. It's only bugs |
| 78 | +-- and/or attack vectors that will incur overflows here. Even so, it's |
| 79 | +-- important to detect and handle those cases. |
| 80 | +-- |
| 81 | +-- No 'Eq' and 'Ord'. This is not a /saturated arithmetic/ type, and so two |
| 82 | +-- overflows are not considered equivalent. This prohibits lawful instances of |
| 83 | +-- 'Eq' and 'Ord' (eg this type's arithmetic comparisons are not reflexive). |
| 84 | +-- |
| 85 | +-- No 'Enum' or 'Num'. We do not support 'Num' because we don't want all those |
| 86 | +-- operators. Moreover, we don't want bare literals, so no 'Enum'. Literals |
| 87 | +-- must explicitly include a constructor, such as @ByteSize.'from'@ or |
| 88 | +-- @ByteSize.'maybeByteSizeFrom'@. |
| 89 | +-- |
| 90 | +-- We in particular exclude subtraction because a representation of both |
| 91 | +-- underflows and overflows would in turn require a representation for the sum |
| 92 | +-- of those, which could not be usefully ordered --- it's essentially NaN. Such |
| 93 | +-- an indeterminate would make it confusing to write (useful) monotonic |
| 94 | +-- predicates over this type, eg when using it as a measure in finger trees. |
| 95 | +-- |
| 96 | +-- No 'FromCBOR' and 'ToCBOR'. Every measure being sent across the network |
| 97 | +-- should fit in a smaller type ('ByteSize8', 'ByteSize16', or 'ByteSize32'), |
| 98 | +-- moreover, those types do not need to represent an overflow. |
| 99 | +-- |
| 100 | +-- TODO pointer tagging might achieve comparable performance without reserving |
| 101 | +-- the @2^63 - 1@ value, which is likely to cause /some/ confusion. But then it |
| 102 | +-- would no longer be compatible with the @UNPACK@ pragma, for example. |
| 103 | +newtype ByteSize = ByteSize Word64 -- ^ See the type's Haddock. |
| 104 | + deriving stock (Read, Show) |
| 105 | + deriving newtype (NFData) |
| 106 | + deriving (Bounded, Monoid, Semigroup) |
| 107 | + via InstantiatedAt Measure ByteSize |
| 108 | + deriving (NoThunks) |
| 109 | + via OnlyCheckWhnfNamed "ByteSize" ByteSize |
| 110 | + |
| 111 | +-- | Not part of the stable interface! Use at your own risk. |
| 112 | +unsafeCoercionWord64 :: Coercion ByteSize Word64 |
| 113 | +unsafeCoercionWord64 = Coercion |
| 114 | + |
| 115 | +-- | This sentinel value represents the result of overflow. |
| 116 | +sentinel :: Word64 |
| 117 | +sentinel = maxBound |
| 118 | + |
| 119 | +instance Measure ByteSize where |
| 120 | + max = coerce $ max @Word64 |
| 121 | + min = coerce $ min @Word64 |
| 122 | + |
| 123 | + plus (ByteSize x) (ByteSize y) = |
| 124 | + let !z = x + y |
| 125 | + in |
| 126 | + -- obviously equivalent to sentinel <= x + y, but avoids boundaries |
| 127 | + ByteSize $ if sentinel - x <= y then sentinel else z |
| 128 | + |
| 129 | + zero = ByteSize 0 |
| 130 | + |
| 131 | +instance BoundedMeasure ByteSize where |
| 132 | + maxBound = ByteSize sentinel |
| 133 | + |
| 134 | +-------------------------------------------------------------------------------- |
| 135 | +-- Observers |
| 136 | +-------------------------------------------------------------------------------- |
| 137 | + |
| 138 | +isOverflowed :: ByteSize -> Bool |
| 139 | +isOverflowed (ByteSize x) = x == sentinel |
| 140 | + |
| 141 | +-- | Returns 'SNothing' if and only if both values were overflowed. |
| 142 | +compare :: ByteSize -> ByteSize -> StrictMaybe Ordering |
| 143 | +compare (ByteSize x) (ByteSize y) = case Prelude.compare x y of |
| 144 | + LT -> SJust LT |
| 145 | + EQ -> if x == sentinel then SNothing else SJust EQ |
| 146 | + GT -> SJust GT |
| 147 | + |
| 148 | +-------------------------------------------------------------------------------- |
| 149 | +-- Safe result types |
| 150 | +-------------------------------------------------------------------------------- |
| 151 | + |
| 152 | +-- | The types 'ByteSize8', 'ByteSize16', 'ByteSize32', and 'ByteSize64' safely |
| 153 | +-- capture the result of 'ByteSize' calculations that did not overflow |
| 154 | +-- 'ByteSize' /and/ fit in the type. |
| 155 | +-- |
| 156 | +-- They intentionally have no operators! All calculations should be done in |
| 157 | +-- 'ByteSize'. |
| 158 | +newtype ByteSize8 = ByteSize8 { unByteSize8 :: Word8 } |
| 159 | + -- ^ See the type's Haddock. |
| 160 | + deriving stock (Generic) |
| 161 | + deriving stock (Eq, Ord) |
| 162 | + deriving newtype (Bounded) |
| 163 | + deriving newtype (NFData) |
| 164 | + deriving newtype (FromCBOR, ToCBOR) |
| 165 | + deriving (Read, Show) |
| 166 | + via Quiet ByteSize8 |
| 167 | + deriving (NoThunks) |
| 168 | + via OnlyCheckWhnfNamed "ByteSize8" ByteSize8 |
| 169 | + |
| 170 | +-- | See the documentation on 'ByteSize8'. |
| 171 | +newtype ByteSize16 = ByteSize16 { unByteSize16 :: Word16 } |
| 172 | + -- ^ See the documentation on 'ByteSize8'. |
| 173 | + deriving stock (Generic) |
| 174 | + deriving stock (Eq, Ord) |
| 175 | + deriving newtype (Bounded) |
| 176 | + deriving newtype (NFData) |
| 177 | + deriving newtype (FromCBOR, ToCBOR) |
| 178 | + deriving (Read, Show) |
| 179 | + via Quiet ByteSize16 |
| 180 | + deriving (NoThunks) |
| 181 | + via OnlyCheckWhnfNamed "ByteSize16" ByteSize16 |
| 182 | + |
| 183 | +-- | See the documentation on 'ByteSize8' |
| 184 | +newtype ByteSize32 = ByteSize32 { unByteSize32 :: Word32 } |
| 185 | + -- ^ See the documentation on 'ByteSize8'. |
| 186 | + deriving stock (Generic) |
| 187 | + deriving stock (Eq, Ord) |
| 188 | + deriving newtype (Bounded) |
| 189 | + deriving newtype (NFData) |
| 190 | + deriving newtype (FromCBOR, ToCBOR) |
| 191 | + deriving (Read, Show) |
| 192 | + via Quiet ByteSize32 |
| 193 | + deriving (NoThunks) |
| 194 | + via OnlyCheckWhnfNamed "ByteSize32" ByteSize32 |
| 195 | + |
| 196 | +-- | See the documentation on 'ByteSize8' |
| 197 | +newtype ByteSize64 = ByteSize64 { unByteSize64 :: Word64 } |
| 198 | + -- ^ See the documentation on 'ByteSize8'. |
| 199 | + deriving stock (Generic) |
| 200 | + deriving stock (Eq, Ord) |
| 201 | + deriving newtype (Bounded) |
| 202 | + deriving newtype (NFData) |
| 203 | + deriving newtype (FromCBOR, ToCBOR) |
| 204 | + deriving (Read, Show) |
| 205 | + via Quiet ByteSize64 |
| 206 | + deriving (NoThunks) |
| 207 | + via OnlyCheckWhnfNamed "ByteSize64" ByteSize64 |
| 208 | + |
| 209 | +-------------------------------------------------------------------------------- |
| 210 | +-- Conversions |
| 211 | +-------------------------------------------------------------------------------- |
| 212 | + |
| 213 | +-- | Types where every value can be decidably classified as one of the following. |
| 214 | +-- |
| 215 | +-- - An integer within the INVARIANT interval of 'ByteSize'. |
| 216 | +-- - An integer greater than the INVARIANT interval of 'ByteSize'. |
| 217 | +-- - Neither of those. For example, negative numbers, fractions, orange, etc. |
| 218 | +-- |
| 219 | +-- Law: 'partialFrom' returns 'SNothing' only in the third case above. |
| 220 | +-- |
| 221 | +-- Law: 'partialFrom' returns 'SJust' an overflow if and only if the second |
| 222 | +-- case above. |
| 223 | +class ByteSizePartialFrom a where |
| 224 | + -- | See 'ByteSizePartialFrom'. |
| 225 | + partialFrom :: a -> StrictMaybe ByteSize |
| 226 | + |
| 227 | +partialFromDefault :: ByteSizeFrom a => a -> StrictMaybe ByteSize |
| 228 | +partialFromDefault = SJust . from |
| 229 | + |
| 230 | +-- | Law: @'partialFrom' = 'SJust' . 'from'@ |
| 231 | +class ByteSizePartialFrom a => ByteSizeFrom a where |
| 232 | + -- | See 'ByteSizeFrom'. |
| 233 | + from :: a -> ByteSize |
| 234 | + |
| 235 | +-- | Returns 'SNothing' if and only if the 'ByteSize' cannot fit in the other |
| 236 | +-- type, assuming an overflowed 'ByteSize' cannot fit into any type. |
| 237 | +-- |
| 238 | +-- Because 'from' and 'partialFrom' might overflow, which loses information, it |
| 239 | +-- is not required that @'partialFrom' x >>= 'to' = 'SJust' x@; 'to' is not |
| 240 | +-- quite an inverse of 'from' and 'partialFrom'. |
| 241 | +-- |
| 242 | +-- Law: @'to' x = 'SJust' y@ implies @'SJust' x = 'partialFrom' y@. |
| 243 | +class ByteSizePartialFrom a => ByteSizeTo a where |
| 244 | + -- | See 'ByteSizeTo'. |
| 245 | + to :: ByteSize -> StrictMaybe a |
| 246 | + |
| 247 | +instance ByteSizePartialFrom ByteSize8 where partialFrom = partialFromDefault |
| 248 | +instance ByteSizeFrom ByteSize8 where from = from . unByteSize8 |
| 249 | +instance ByteSizeTo ByteSize8 where to = fmap ByteSize8 . to |
| 250 | + |
| 251 | +instance ByteSizePartialFrom ByteSize16 where partialFrom = partialFromDefault |
| 252 | +instance ByteSizeFrom ByteSize16 where from = from . unByteSize16 |
| 253 | +instance ByteSizeTo ByteSize16 where to = fmap ByteSize16 . to |
| 254 | + |
| 255 | +instance ByteSizePartialFrom ByteSize32 where partialFrom = partialFromDefault |
| 256 | +instance ByteSizeFrom ByteSize32 where from = from . unByteSize32 |
| 257 | +instance ByteSizeTo ByteSize32 where to = fmap ByteSize32 . to |
| 258 | + |
| 259 | +instance ByteSizePartialFrom ByteSize64 where partialFrom = partialFromDefault |
| 260 | +instance ByteSizeFrom ByteSize64 where from = from . unByteSize64 |
| 261 | +-- | NB the result will never be @'SJust' 'maxBound'@. |
| 262 | +instance ByteSizeTo ByteSize64 where to = fmap ByteSize64 . to |
| 263 | + |
| 264 | +instance ByteSizePartialFrom Word where partialFrom = partialFromDefault |
| 265 | +instance ByteSizeFrom Word where from = ByteSize . fromIntegral |
| 266 | +instance ByteSizeTo Word where to = toSmallerBoundedIntegral -- also works if its bigger, ie Word64 |
| 267 | + |
| 268 | +instance ByteSizePartialFrom Word8 where partialFrom = partialFromDefault |
| 269 | +instance ByteSizeFrom Word8 where from = fromSmallerUnsignedIntegral |
| 270 | +instance ByteSizeTo Word8 where to = toSmallerBoundedIntegral |
| 271 | + |
| 272 | +instance ByteSizePartialFrom Word16 where partialFrom = partialFromDefault |
| 273 | +instance ByteSizeFrom Word16 where from = fromSmallerUnsignedIntegral |
| 274 | +instance ByteSizeTo Word16 where to = toSmallerBoundedIntegral |
| 275 | + |
| 276 | +instance ByteSizePartialFrom Word32 where partialFrom = partialFromDefault |
| 277 | +instance ByteSizeFrom Word32 where from = fromSmallerUnsignedIntegral |
| 278 | +instance ByteSizeTo Word32 where to = toSmallerBoundedIntegral |
| 279 | + |
| 280 | +instance ByteSizePartialFrom Word64 where partialFrom = partialFromDefault |
| 281 | +instance ByteSizeFrom Word64 where from = coerce |
| 282 | +-- | NB the result will never be @'SJust' 'maxBound'@. |
| 283 | +instance ByteSizeTo Word64 where to = toBiggerIntegral |
| 284 | + |
| 285 | +instance ByteSizePartialFrom Natural where partialFrom = partialFromDefault |
| 286 | +instance ByteSizeFrom Natural where |
| 287 | + from a = |
| 288 | + ByteSize |
| 289 | + $ if fromIntegral sentinel <= a then sentinel else fromIntegral a |
| 290 | +instance ByteSizeTo Natural where to = toBiggerIntegral |
| 291 | + |
| 292 | +instance ByteSizePartialFrom Integer where |
| 293 | + partialFrom a = |
| 294 | + if a < 0 || fromIntegral sentinel <= a then SNothing else |
| 295 | + SJust $ ByteSize $ Prelude.fromInteger a |
| 296 | +instance ByteSizeTo Integer where to = toBiggerIntegral |
| 297 | + |
| 298 | +instance ByteSizePartialFrom Int where partialFrom = fromSmallerSignedIntegral |
| 299 | +instance ByteSizeTo Int where to = toSmallerBoundedIntegral -- even Int64 is smaller |
| 300 | + |
| 301 | +instance ByteSizePartialFrom Int8 where partialFrom = fromSmallerSignedIntegral |
| 302 | +instance ByteSizeTo Int8 where to = toSmallerBoundedIntegral |
| 303 | + |
| 304 | +instance ByteSizePartialFrom Int16 where partialFrom = fromSmallerSignedIntegral |
| 305 | +instance ByteSizeTo Int16 where to = toSmallerBoundedIntegral |
| 306 | + |
| 307 | +instance ByteSizePartialFrom Int32 where partialFrom = fromSmallerSignedIntegral |
| 308 | +instance ByteSizeTo Int32 where to = toSmallerBoundedIntegral |
| 309 | + |
| 310 | +instance ByteSizePartialFrom Int64 where partialFrom = fromSmallerSignedIntegral |
| 311 | +instance ByteSizeTo Int64 where to = toSmallerBoundedIntegral |
| 312 | + |
| 313 | +fromSmallerUnsignedIntegral :: Integral a => a -> ByteSize |
| 314 | +fromSmallerUnsignedIntegral = ByteSize . fromIntegral |
| 315 | + |
| 316 | +fromSmallerSignedIntegral :: Integral a => a -> StrictMaybe ByteSize |
| 317 | +fromSmallerSignedIntegral a = |
| 318 | + if a < 0 then SNothing else SJust $ ByteSize $ fromIntegral a |
| 319 | + |
| 320 | +toBiggerIntegral :: Integral a => ByteSize -> StrictMaybe a |
| 321 | +toBiggerIntegral (ByteSize x) = |
| 322 | + if sentinel == x then SNothing else SJust $ fromIntegral x |
| 323 | + |
| 324 | +toSmallerBoundedIntegral :: forall a. (Bounded a, Integral a) => ByteSize -> StrictMaybe a |
| 325 | +toSmallerBoundedIntegral (ByteSize x) = |
| 326 | + if overflow || tooBig then SNothing else SJust $ fromIntegral x |
| 327 | + where |
| 328 | + !tooBig = fromIntegral (maxBound :: a) < x |
| 329 | + !overflow = sentinel == x |
0 commit comments