7
7
"""
8
8
from __future__ import annotations
9
9
10
+ from typing import Iterator
10
11
import datetime
11
12
import pprint
12
13
23
24
# ---------------------------------
24
25
#
25
26
# Before we create our first collection, we will create a dataset to record.
26
- def create_dataset ():
27
+ def create_dataset () -> zcollection . Dataset :
27
28
"""Create a dataset to record."""
28
- generator = zcollection .tests .data .create_test_dataset_with_fillvalue ()
29
+ generator : Iterator [zcollection .Dataset ] = \
30
+ zcollection .tests .data .create_test_dataset_with_fillvalue ()
29
31
return next (generator )
30
32
31
33
32
- ds = create_dataset ()
33
- ds .to_xarray ()
34
+ zds : zcollection .Dataset | None = create_dataset ()
35
+ assert zds is not None
36
+ zds .to_xarray ()
34
37
35
38
# %%
36
39
# We will create the file system that we will use. In this example, a file
37
40
# system in memory.
38
- fs = fsspec .filesystem ('memory' )
41
+ fs : fsspec . AbstractFileSystem = fsspec .filesystem ('memory' )
39
42
40
43
# %%
41
44
# Finally we create a local dask cluster using only threads in order to work
@@ -54,11 +57,8 @@ def create_dataset():
54
57
55
58
# %%
56
59
# Finally, we create our collection:
57
- collection = zcollection .create_collection ('time' ,
58
- ds ,
59
- partition_handler ,
60
- '/my_collection' ,
61
- filesystem = fs )
60
+ collection : zcollection .Collection = zcollection .create_collection (
61
+ 'time' , zds , partition_handler , '/my_collection' , filesystem = fs )
62
62
63
63
# %%
64
64
# .. note::
@@ -75,7 +75,7 @@ def create_dataset():
75
75
76
76
# %%
77
77
# Now that the collection has been created, we can insert new records.
78
- collection .insert (ds )
78
+ collection .insert (zds )
79
79
80
80
# %%
81
81
# .. note::
@@ -103,9 +103,15 @@ def create_dataset():
103
103
# To load the dataset call the method
104
104
# :py:meth:`load<zcollection.collection.Collection.load>` on the instance. By
105
105
# default, the method loads all partitions stored in the collection.
106
- collection .load ()
106
+ collection .load (delayed = True )
107
107
108
108
# %%
109
+ # .. note::
110
+ #
111
+ # By default, the data is loaded as a :py:class:`dask.array<da.Array>`. It is
112
+ # possible to load the data as a :py:class:`numpy.ndarray` by specifying the
113
+ # parameter ``delayed=False``.
114
+ #
109
115
# You can also filter the partitions to be considered by filtering the
110
116
# partitions using keywords used for partitioning in a valid Python expression.
111
117
collection .load (filters = 'year == 2000 and month == 2' )
@@ -145,13 +151,13 @@ def create_dataset():
145
151
# %%
146
152
# The :py:meth:`add_variable<zcollection.collection.Collection.add_variable>`
147
153
# method allows you to add a new variable to the collection.
148
- collection .add_variable (ds .metadata ().variables ['var2' ])
154
+ collection .add_variable (zds .metadata ().variables ['var2' ])
149
155
150
156
# %%
151
157
# The newly created variable is initialized with its default value.
152
- ds = collection .load ()
153
- assert ds is not None
154
- ds .variables ['var2' ].values
158
+ zds = collection .load ()
159
+ assert zds is not None
160
+ zds .variables ['var2' ].values
155
161
156
162
157
163
# %%
@@ -161,19 +167,28 @@ def create_dataset():
161
167
#
162
168
# In this example, we will alter the variable ``var2`` by setting it to 1
163
169
# anywhere the variable ``var1`` is defined.
164
- def ones (ds ) :
170
+ def ones (zds ) -> dict [ str , numpy . ndarray ] :
165
171
"""Returns a variable with ones everywhere."""
166
- return dict (var2 = ds .variables ['var1' ].values * 0 + 1 )
172
+ return dict (var2 = zds .variables ['var1' ].values * 0 + 1 )
167
173
168
174
169
175
collection .update (ones ) # type: ignore[arg-type]
170
176
171
- ds = collection .load ()
172
- assert ds is not None
173
- ds .variables ['var2' ].values
177
+ zds = collection .load ()
178
+ assert zds is not None
179
+ zds .variables ['var2' ].values
174
180
175
181
176
182
# %%
183
+ # ..note::
184
+ #
185
+ # The method :py:meth:`update<zcollection.collection.Collection.update>`
186
+ # supports the ``delayed`` parameter. If ``delayed=True``, the function
187
+ # ``ones`` is applied to each partition using a Dask array as container
188
+ # for the variables data stored in the provided dataset. This is the default
189
+ # behavior. If ``delayed=False``, the function ``ones`` is applied to each
190
+ # partition using a Numpy array as container.
191
+ #
177
192
# Sometime is it important to know the values of the neighboring partitions.
178
193
# This can be done using the
179
194
# :py:meth:`update<zcollection.collection.Collection.update>` method with the
@@ -188,7 +203,7 @@ def ones(ds):
188
203
# start of the slice is 0, it means that the left partition is missing. If the
189
204
# stop of the slice is equal to the length of the given dataset, it means that
190
205
# the right partition is missing.
191
- def twos (ds , partition_info : tuple [str , slice ]):
206
+ def twos (ds , partition_info : tuple [str , slice ]) -> dict [ str , numpy . ndarray ] :
192
207
"""Returns a variable with twos everywhere if the partition is surrounded
193
208
by partitions on both sides, -1 if the left partition is missing and -2 if
194
209
the right partition is missing."""
@@ -206,9 +221,9 @@ def twos(ds, partition_info: tuple[str, slice]):
206
221
207
222
collection .update (twos , depth = 1 ) # type: ignore[arg-type]
208
223
209
- ds = collection .load ()
210
- assert ds is not None
211
- ds .variables ['var2' ].values
224
+ zds = collection .load ()
225
+ assert zds is not None
226
+ zds .variables ['var2' ].values
212
227
213
228
# %%
214
229
# Map a function over the collection
0 commit comments