Skip to content

Commit ec36533

Browse files
committed
measures: add ByteSize
1 parent c76fc65 commit ec36533

File tree

2 files changed

+335
-0
lines changed

2 files changed

+335
-0
lines changed

measures/measures.cabal

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,16 @@ library
2929

3030
exposed-modules:
3131
Data.Measure
32+
Data.Measure.ByteSize
3233
Data.Measure.Class
3334

3435
build-depends: base
3536
, base-deriving-via
37+
, cardano-binary
38+
, cardano-strict-containers
39+
, deepseq
40+
, nothunks
41+
, quiet
3642

3743
test-suite test
3844
hs-source-dirs: test
Lines changed: 329 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,329 @@
1+
{-# LANGUAGE BangPatterns #-}
2+
{-# LANGUAGE CPP #-}
3+
{-# LANGUAGE DataKinds #-}
4+
{-# LANGUAGE DeriveGeneric #-}
5+
{-# LANGUAGE DerivingVia #-}
6+
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
7+
{-# LANGUAGE ScopedTypeVariables #-}
8+
{-# LANGUAGE LambdaCase #-}
9+
{-# LANGUAGE TypeApplications #-}
10+
11+
-- | A measure for practical byte sizes.
12+
--
13+
-- 'ByteSize' is for summation, which might overflow. The other types are for
14+
-- storing, serializing, comparing, etc the results of calculations that did
15+
-- not overflow.
16+
--
17+
-- Import this module qualified.
18+
--
19+
-- > import Data.Measure.ByteSize (ByteSize)
20+
-- > import qualified Data.Measure.ByteSize as ByteSize
21+
module Data.Measure.ByteSize (
22+
ByteSize,
23+
-- * Observers
24+
compare,
25+
isOverflowed,
26+
-- * Safe result types
27+
ByteSize8 (ByteSize8, unByteSize8),
28+
ByteSize16 (ByteSize16, unByteSize16),
29+
ByteSize32 (ByteSize32, unByteSize32),
30+
ByteSize64 (ByteSize64, unByteSize64),
31+
-- * Conversions
32+
ByteSizeFrom,
33+
ByteSizePartialFrom,
34+
ByteSizeTo,
35+
from,
36+
partialFrom,
37+
partialFromDefault,
38+
to,
39+
-- * Unstable and unsafe
40+
unsafeCoercionWord64,
41+
) where
42+
43+
import Cardano.Binary (FromCBOR, ToCBOR)
44+
import Control.DeepSeq (NFData)
45+
import Data.Coerce (coerce)
46+
import Data.DerivingVia (InstantiatedAt (InstantiatedAt))
47+
import Data.Int (Int8, Int16, Int32, Int64)
48+
import Data.Maybe.Strict (StrictMaybe (..))
49+
import Data.Measure (BoundedMeasure, Measure)
50+
import qualified Data.Measure as Measure
51+
import Data.Type.Coercion (Coercion (Coercion))
52+
import Data.Word (Word8, Word16, Word32, Word64)
53+
import GHC.Generics (Generic)
54+
#if __GLASGOW_HASKELL__ < 900
55+
-- Use the GHC version here because this is compiler dependent, and only indirectly lib dependent.
56+
import GHC.Natural (Natural)
57+
#endif
58+
import NoThunks.Class (NoThunks,
59+
OnlyCheckWhnfNamed (OnlyCheckWhnfNamed))
60+
import Prelude hiding (compare, fromInteger, toInteger)
61+
import qualified Prelude
62+
import Quiet (Quiet (Quiet))
63+
64+
-- | A measure of byte size
65+
--
66+
-- INVARIANT @ 0 <= x <= 2^64 - 2 @
67+
--
68+
-- Note well that the uppermost value is reserved for representing overflow:
69+
-- there's a minus 2 in the invariant instead of the usual minus 1.
70+
--
71+
-- This type supports exactly one binary operator, checked addition, via
72+
-- 'Semigroup'. (Recall that 'Data.Semigroup.stimes' automatically derives
73+
-- non-negative integer scaling from 'Semigroup'.)
74+
--
75+
-- Given that the estimated total global data storage in 2024 is merely 5000
76+
-- times the maximum representable value of this type, this representation
77+
-- should suffice for the chain's actual needs for a long time. It's only bugs
78+
-- and/or attack vectors that will incur overflows here. Even so, it's
79+
-- important to detect and handle those cases.
80+
--
81+
-- No 'Eq' and 'Ord'. This is not a /saturated arithmetic/ type, and so two
82+
-- overflows are not considered equivalent. This prohibits lawful instances of
83+
-- 'Eq' and 'Ord' (eg this type's arithmetic comparisons are not reflexive).
84+
--
85+
-- No 'Enum' or 'Num'. We do not support 'Num' because we don't want all those
86+
-- operators. Moreover, we don't want bare literals, so no 'Enum'. Literals
87+
-- must explicitly include a constructor, such as @ByteSize.'from'@ or
88+
-- @ByteSize.'maybeByteSizeFrom'@.
89+
--
90+
-- We in particular exclude subtraction because a representation of both
91+
-- underflows and overflows would in turn require a representation for the sum
92+
-- of those, which could not be usefully ordered --- it's essentially NaN. Such
93+
-- an indeterminate would make it confusing to write (useful) monotonic
94+
-- predicates over this type, eg when using it as a measure in finger trees.
95+
--
96+
-- No 'FromCBOR' and 'ToCBOR'. Every measure being sent across the network
97+
-- should fit in a smaller type ('ByteSize8', 'ByteSize16', or 'ByteSize32'),
98+
-- moreover, those types do not need to represent an overflow.
99+
--
100+
-- TODO pointer tagging might achieve comparable performance without reserving
101+
-- the @2^63 - 1@ value, which is likely to cause /some/ confusion. But then it
102+
-- would no longer be compatible with the @UNPACK@ pragma, for example.
103+
newtype ByteSize = ByteSize Word64 -- ^ See the type's Haddock.
104+
deriving stock (Read, Show)
105+
deriving newtype (NFData)
106+
deriving (Bounded, Monoid, Semigroup)
107+
via InstantiatedAt Measure ByteSize
108+
deriving (NoThunks)
109+
via OnlyCheckWhnfNamed "ByteSize" ByteSize
110+
111+
-- | Not part of the stable interface! Use at your own risk.
112+
unsafeCoercionWord64 :: Coercion ByteSize Word64
113+
unsafeCoercionWord64 = Coercion
114+
115+
-- | This sentinel value represents the result of overflow.
116+
sentinel :: Word64
117+
sentinel = maxBound
118+
119+
instance Measure ByteSize where
120+
max = coerce $ max @Word64
121+
min = coerce $ min @Word64
122+
123+
plus (ByteSize x) (ByteSize y) =
124+
let !z = x + y
125+
in
126+
-- obviously equivalent to sentinel <= x + y, but avoids boundaries
127+
ByteSize $ if sentinel - x <= y then sentinel else z
128+
129+
zero = ByteSize 0
130+
131+
instance BoundedMeasure ByteSize where
132+
maxBound = ByteSize sentinel
133+
134+
--------------------------------------------------------------------------------
135+
-- Observers
136+
--------------------------------------------------------------------------------
137+
138+
isOverflowed :: ByteSize -> Bool
139+
isOverflowed (ByteSize x) = x == sentinel
140+
141+
-- | Returns 'SNothing' if and only if both values were overflowed.
142+
compare :: ByteSize -> ByteSize -> StrictMaybe Ordering
143+
compare (ByteSize x) (ByteSize y) = case Prelude.compare x y of
144+
LT -> SJust LT
145+
EQ -> if x == sentinel then SNothing else SJust EQ
146+
GT -> SJust GT
147+
148+
--------------------------------------------------------------------------------
149+
-- Safe result types
150+
--------------------------------------------------------------------------------
151+
152+
-- | The types 'ByteSize8', 'ByteSize16', 'ByteSize32', and 'ByteSize64' safely
153+
-- capture the result of 'ByteSize' calculations that did not overflow
154+
-- 'ByteSize' /and/ fit in the type.
155+
--
156+
-- They intentionally have no operators! All calculations should be done in
157+
-- 'ByteSize'.
158+
newtype ByteSize8 = ByteSize8 { unByteSize8 :: Word8 }
159+
-- ^ See the type's Haddock.
160+
deriving stock (Generic)
161+
deriving stock (Eq, Ord)
162+
deriving newtype (Bounded)
163+
deriving newtype (NFData)
164+
deriving newtype (FromCBOR, ToCBOR)
165+
deriving (Read, Show)
166+
via Quiet ByteSize8
167+
deriving (NoThunks)
168+
via OnlyCheckWhnfNamed "ByteSize8" ByteSize8
169+
170+
-- | See the documentation on 'ByteSize8'.
171+
newtype ByteSize16 = ByteSize16 { unByteSize16 :: Word16 }
172+
-- ^ See the documentation on 'ByteSize8'.
173+
deriving stock (Generic)
174+
deriving stock (Eq, Ord)
175+
deriving newtype (Bounded)
176+
deriving newtype (NFData)
177+
deriving newtype (FromCBOR, ToCBOR)
178+
deriving (Read, Show)
179+
via Quiet ByteSize16
180+
deriving (NoThunks)
181+
via OnlyCheckWhnfNamed "ByteSize16" ByteSize16
182+
183+
-- | See the documentation on 'ByteSize8'
184+
newtype ByteSize32 = ByteSize32 { unByteSize32 :: Word32 }
185+
-- ^ See the documentation on 'ByteSize8'.
186+
deriving stock (Generic)
187+
deriving stock (Eq, Ord)
188+
deriving newtype (Bounded)
189+
deriving newtype (NFData)
190+
deriving newtype (FromCBOR, ToCBOR)
191+
deriving (Read, Show)
192+
via Quiet ByteSize32
193+
deriving (NoThunks)
194+
via OnlyCheckWhnfNamed "ByteSize32" ByteSize32
195+
196+
-- | See the documentation on 'ByteSize8'
197+
newtype ByteSize64 = ByteSize64 { unByteSize64 :: Word64 }
198+
-- ^ See the documentation on 'ByteSize8'.
199+
deriving stock (Generic)
200+
deriving stock (Eq, Ord)
201+
deriving newtype (Bounded)
202+
deriving newtype (NFData)
203+
deriving newtype (FromCBOR, ToCBOR)
204+
deriving (Read, Show)
205+
via Quiet ByteSize64
206+
deriving (NoThunks)
207+
via OnlyCheckWhnfNamed "ByteSize64" ByteSize64
208+
209+
--------------------------------------------------------------------------------
210+
-- Conversions
211+
--------------------------------------------------------------------------------
212+
213+
-- | Types where every value can be decidably classified as one of the following.
214+
--
215+
-- - An integer within the INVARIANT interval of 'ByteSize'.
216+
-- - An integer greater than the INVARIANT interval of 'ByteSize'.
217+
-- - Neither of those. For example, negative numbers, fractions, orange, etc.
218+
--
219+
-- Law: 'partialFrom' returns 'SNothing' only in the third case above.
220+
--
221+
-- Law: 'partialFrom' returns 'SJust' an overflow if and only if the second
222+
-- case above.
223+
class ByteSizePartialFrom a where
224+
-- | See 'ByteSizePartialFrom'.
225+
partialFrom :: a -> StrictMaybe ByteSize
226+
227+
partialFromDefault :: ByteSizeFrom a => a -> StrictMaybe ByteSize
228+
partialFromDefault = SJust . from
229+
230+
-- | Law: @'partialFrom' = 'SJust' . 'from'@
231+
class ByteSizePartialFrom a => ByteSizeFrom a where
232+
-- | See 'ByteSizeFrom'.
233+
from :: a -> ByteSize
234+
235+
-- | Returns 'SNothing' if and only if the 'ByteSize' cannot fit in the other
236+
-- type, assuming an overflowed 'ByteSize' cannot fit into any type.
237+
--
238+
-- Because 'from' and 'partialFrom' might overflow, which loses information, it
239+
-- is not required that @'partialFrom' x >>= 'to' = 'SJust' x@; 'to' is not
240+
-- quite an inverse of 'from' and 'partialFrom'.
241+
--
242+
-- Law: @'to' x = 'SJust' y@ implies @'SJust' x = 'partialFrom' y@.
243+
class ByteSizePartialFrom a => ByteSizeTo a where
244+
-- | See 'ByteSizeTo'.
245+
to :: ByteSize -> StrictMaybe a
246+
247+
instance ByteSizePartialFrom ByteSize8 where partialFrom = partialFromDefault
248+
instance ByteSizeFrom ByteSize8 where from = from . unByteSize8
249+
instance ByteSizeTo ByteSize8 where to = fmap ByteSize8 . to
250+
251+
instance ByteSizePartialFrom ByteSize16 where partialFrom = partialFromDefault
252+
instance ByteSizeFrom ByteSize16 where from = from . unByteSize16
253+
instance ByteSizeTo ByteSize16 where to = fmap ByteSize16 . to
254+
255+
instance ByteSizePartialFrom ByteSize32 where partialFrom = partialFromDefault
256+
instance ByteSizeFrom ByteSize32 where from = from . unByteSize32
257+
instance ByteSizeTo ByteSize32 where to = fmap ByteSize32 . to
258+
259+
instance ByteSizePartialFrom ByteSize64 where partialFrom = partialFromDefault
260+
instance ByteSizeFrom ByteSize64 where from = from . unByteSize64
261+
-- | NB the result will never be @'SJust' 'maxBound'@.
262+
instance ByteSizeTo ByteSize64 where to = fmap ByteSize64 . to
263+
264+
instance ByteSizePartialFrom Word where partialFrom = partialFromDefault
265+
instance ByteSizeFrom Word where from = ByteSize . fromIntegral
266+
instance ByteSizeTo Word where to = toSmallerBoundedIntegral -- also works if its bigger, ie Word64
267+
268+
instance ByteSizePartialFrom Word8 where partialFrom = partialFromDefault
269+
instance ByteSizeFrom Word8 where from = fromSmallerUnsignedIntegral
270+
instance ByteSizeTo Word8 where to = toSmallerBoundedIntegral
271+
272+
instance ByteSizePartialFrom Word16 where partialFrom = partialFromDefault
273+
instance ByteSizeFrom Word16 where from = fromSmallerUnsignedIntegral
274+
instance ByteSizeTo Word16 where to = toSmallerBoundedIntegral
275+
276+
instance ByteSizePartialFrom Word32 where partialFrom = partialFromDefault
277+
instance ByteSizeFrom Word32 where from = fromSmallerUnsignedIntegral
278+
instance ByteSizeTo Word32 where to = toSmallerBoundedIntegral
279+
280+
instance ByteSizePartialFrom Word64 where partialFrom = partialFromDefault
281+
instance ByteSizeFrom Word64 where from = coerce
282+
-- | NB the result will never be @'SJust' 'maxBound'@.
283+
instance ByteSizeTo Word64 where to = toBiggerIntegral
284+
285+
instance ByteSizePartialFrom Natural where partialFrom = partialFromDefault
286+
instance ByteSizeFrom Natural where
287+
from a =
288+
ByteSize
289+
$ if fromIntegral sentinel <= a then sentinel else fromIntegral a
290+
instance ByteSizeTo Natural where to = toBiggerIntegral
291+
292+
instance ByteSizePartialFrom Integer where
293+
partialFrom a =
294+
if a < 0 || fromIntegral sentinel <= a then SNothing else
295+
SJust $ ByteSize $ Prelude.fromInteger a
296+
instance ByteSizeTo Integer where to = toBiggerIntegral
297+
298+
instance ByteSizePartialFrom Int where partialFrom = fromSmallerSignedIntegral
299+
instance ByteSizeTo Int where to = toSmallerBoundedIntegral -- even Int64 is smaller
300+
301+
instance ByteSizePartialFrom Int8 where partialFrom = fromSmallerSignedIntegral
302+
instance ByteSizeTo Int8 where to = toSmallerBoundedIntegral
303+
304+
instance ByteSizePartialFrom Int16 where partialFrom = fromSmallerSignedIntegral
305+
instance ByteSizeTo Int16 where to = toSmallerBoundedIntegral
306+
307+
instance ByteSizePartialFrom Int32 where partialFrom = fromSmallerSignedIntegral
308+
instance ByteSizeTo Int32 where to = toSmallerBoundedIntegral
309+
310+
instance ByteSizePartialFrom Int64 where partialFrom = fromSmallerSignedIntegral
311+
instance ByteSizeTo Int64 where to = toSmallerBoundedIntegral
312+
313+
fromSmallerUnsignedIntegral :: Integral a => a -> ByteSize
314+
fromSmallerUnsignedIntegral = ByteSize . fromIntegral
315+
316+
fromSmallerSignedIntegral :: Integral a => a -> StrictMaybe ByteSize
317+
fromSmallerSignedIntegral a =
318+
if a < 0 then SNothing else SJust $ ByteSize $ fromIntegral a
319+
320+
toBiggerIntegral :: Integral a => ByteSize -> StrictMaybe a
321+
toBiggerIntegral (ByteSize x) =
322+
if sentinel == x then SNothing else SJust $ fromIntegral x
323+
324+
toSmallerBoundedIntegral :: forall a. (Bounded a, Integral a) => ByteSize -> StrictMaybe a
325+
toSmallerBoundedIntegral (ByteSize x) =
326+
if overflow || tooBig then SNothing else SJust $ fromIntegral x
327+
where
328+
!tooBig = fromIntegral (maxBound :: a) < x
329+
!overflow = sentinel == x

0 commit comments

Comments
 (0)