Skip to content

Commit dd6b846

Browse files
authored
Merge branch 'main' into implement_canonical_structs
2 parents 21777b7 + be0a7b8 commit dd6b846

File tree

16 files changed

+941
-90
lines changed

16 files changed

+941
-90
lines changed

changes/3679.feature.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Adds a new in-memory storage backend called `ManagedMemoryStore`. Instances of `ManagedMemoryStore`
2+
function similarly to `MemoryStore`, but instances of `ManagedMemoryStore` can be constructed from
3+
a URL like `memory://store`.

docs/user-guide/arrays.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,14 @@ np.random.seed(0)
1414

1515
```python exec="true" session="arrays" source="above" result="ansi"
1616
import zarr
17-
store = zarr.storage.MemoryStore()
18-
z = zarr.create_array(store=store, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32')
17+
z = zarr.create_array(store="memory://arrays-demo", shape=(10000, 10000), chunks=(1000, 1000), dtype='int32')
1918
print(z)
2019
```
2120

2221
The code above creates a 2-dimensional array of 32-bit integers with 10000 rows
2322
and 10000 columns, divided into chunks where each chunk has 1000 rows and 1000
24-
columns (and so there will be 100 chunks in total). The data is written to a
25-
[`zarr.storage.MemoryStore`][] (e.g. an in-memory dict). See
23+
columns (and so there will be 100 chunks in total). The data is written to an
24+
in-memory store (see [`zarr.storage.MemoryStore`][] for more details). See
2625
[Persistent arrays](#persistent-arrays) for details on storing arrays in other stores,
2726
and see [Data types](data_types.md) for an in-depth look at the data types supported
2827
by Zarr.

docs/user-guide/attributes.md

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,33 +3,32 @@
33
Zarr arrays and groups support custom key/value attributes, which can be useful for
44
storing application-specific metadata. For example:
55

6-
```python exec="true" session="arrays" source="above" result="ansi"
6+
```python exec="true" session="attributes" source="above" result="ansi"
77
import zarr
8-
store = zarr.storage.MemoryStore()
9-
root = zarr.create_group(store=store)
8+
root = zarr.create_group(store="memory://attributes-demo")
109
root.attrs['foo'] = 'bar'
1110
z = root.create_array(name='zzz', shape=(10000, 10000), dtype='int32')
1211
z.attrs['baz'] = 42
1312
z.attrs['qux'] = [1, 4, 7, 12]
1413
print(sorted(root.attrs))
1514
```
1615

17-
```python exec="true" session="arrays" source="above" result="ansi"
16+
```python exec="true" session="attributes" source="above" result="ansi"
1817
print('foo' in root.attrs)
1918
```
2019

21-
```python exec="true" session="arrays" source="above" result="ansi"
20+
```python exec="true" session="attributes" source="above" result="ansi"
2221
print(root.attrs['foo'])
2322
```
24-
```python exec="true" session="arrays" source="above" result="ansi"
23+
```python exec="true" session="attributes" source="above" result="ansi"
2524
print(sorted(z.attrs))
2625
```
2726

28-
```python exec="true" session="arrays" source="above" result="ansi"
27+
```python exec="true" session="attributes" source="above" result="ansi"
2928
print(z.attrs['baz'])
3029
```
3130

32-
```python exec="true" session="arrays" source="above" result="ansi"
31+
```python exec="true" session="attributes" source="above" result="ansi"
3332
print(z.attrs['qux'])
3433
```
3534

docs/user-guide/consolidated_metadata.md

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ import zarr
2727
import warnings
2828

2929
warnings.filterwarnings("ignore", category=UserWarning)
30-
store = zarr.storage.MemoryStore()
31-
group = zarr.create_group(store=store)
30+
group = zarr.create_group(store="memory://consolidated-metadata-demo")
3231
print(group)
3332
array = group.create_array(shape=(1,), name='a', dtype='float64')
3433
print(array)
@@ -45,7 +44,7 @@ print(array)
4544
```
4645

4746
```python exec="true" session="consolidated_metadata" source="above" result="ansi"
48-
result = zarr.consolidate_metadata(store)
47+
result = zarr.consolidate_metadata("memory://consolidated-metadata-demo")
4948
print(result)
5049
```
5150

@@ -56,7 +55,7 @@ that can be used.:
5655
from pprint import pprint
5756
import io
5857

59-
consolidated = zarr.open_group(store=store)
58+
consolidated = zarr.open_group(store="memory://consolidated-metadata-demo")
6059
consolidated_metadata = consolidated.metadata.consolidated_metadata.metadata
6160

6261
# Note: pprint can be users without capturing the output regularly
@@ -76,7 +75,7 @@ With nested groups, the consolidated metadata is available on the children, recu
7675
```python exec="true" session="consolidated_metadata" source="above" result="ansi"
7776
child = group.create_group('child', attributes={'kind': 'child'})
7877
grandchild = child.create_group('child', attributes={'kind': 'grandchild'})
79-
consolidated = zarr.consolidate_metadata(store)
78+
consolidated = zarr.consolidate_metadata("memory://consolidated-metadata-demo")
8079

8180
output = io.StringIO()
8281
pprint(consolidated['child'].metadata.consolidated_metadata, stream=output, width=60)

docs/user-guide/gpu.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,8 @@ buffers used internally by Zarr via `enable_gpu()`.
2020
import zarr
2121
import cupy as cp
2222
zarr.config.enable_gpu()
23-
store = zarr.storage.MemoryStore()
2423
z = zarr.create_array(
25-
store=store, shape=(100, 100), chunks=(10, 10), dtype="float32",
24+
store="memory://gpu-demo", shape=(100, 100), chunks=(10, 10), dtype="float32",
2625
)
2726
type(z[:10, :10])
2827
# cupy.ndarray

docs/user-guide/groups.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ To create a group, use the [`zarr.group`][] function:
88

99
```python exec="true" session="groups" source="above" result="ansi"
1010
import zarr
11-
store = zarr.storage.MemoryStore()
12-
root = zarr.create_group(store=store)
11+
root = zarr.create_group(store="memory://groups-demo")
1312
print(root)
1413
```
1514

@@ -105,8 +104,7 @@ Diagnostic information about arrays and groups is available via the `info`
105104
property. E.g.:
106105

107106
```python exec="true" session="groups" source="above" result="ansi"
108-
store = zarr.storage.MemoryStore()
109-
root = zarr.group(store=store)
107+
root = zarr.group(store="memory://diagnostics-demo")
110108
foo = root.create_group('foo')
111109
bar = foo.create_array(name='bar', shape=1000000, chunks=100000, dtype='int64')
112110
bar[:] = 42

src/zarr/storage/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from zarr.storage._fsspec import FsspecStore
99
from zarr.storage._local import LocalStore
1010
from zarr.storage._logging import LoggingStore
11-
from zarr.storage._memory import GpuMemoryStore, MemoryStore
11+
from zarr.storage._memory import GpuMemoryStore, ManagedMemoryStore, MemoryStore
1212
from zarr.storage._obstore import ObjectStore
1313
from zarr.storage._wrapper import WrapperStore
1414
from zarr.storage._zip import ZipStore
@@ -18,6 +18,7 @@
1818
"GpuMemoryStore",
1919
"LocalStore",
2020
"LoggingStore",
21+
"ManagedMemoryStore",
2122
"MemoryStore",
2223
"ObjectStore",
2324
"StoreLike",

src/zarr/storage/_common.py

Lines changed: 24 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
)
2424
from zarr.errors import ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError
2525
from zarr.storage._local import LocalStore
26-
from zarr.storage._memory import MemoryStore
27-
from zarr.storage._utils import normalize_path
26+
from zarr.storage._memory import ManagedMemoryStore, MemoryStore
27+
from zarr.storage._utils import _join_paths, normalize_path, parse_store_url
2828

2929
_has_fsspec = importlib.util.find_spec("fsspec")
3030
if _has_fsspec:
@@ -36,18 +36,6 @@
3636
from zarr.core.buffer import BufferPrototype
3737

3838

39-
def _dereference_path(root: str, path: str) -> str:
40-
if not isinstance(root, str):
41-
msg = f"{root=} is not a string ({type(root)=})" # type: ignore[unreachable]
42-
raise TypeError(msg)
43-
if not isinstance(path, str):
44-
msg = f"{path=} is not a string ({type(path)=})" # type: ignore[unreachable]
45-
raise TypeError(msg)
46-
root = root.rstrip("/")
47-
path = f"{root}/{path}" if root else path
48-
return path.rstrip("/")
49-
50-
5139
class StorePath:
5240
"""
5341
Path-like interface for a Store.
@@ -267,10 +255,10 @@ def delete_sync(self) -> None:
267255

268256
def __truediv__(self, other: str) -> StorePath:
269257
"""Combine this store path with another path"""
270-
return self.__class__(self.store, _dereference_path(self.path, other))
258+
return self.__class__(self.store, _join_paths([self.path, other]))
271259

272260
def __str__(self) -> str:
273-
return _dereference_path(str(self.store), self.path)
261+
return _join_paths([str(self.store), self.path])
274262

275263
def __repr__(self) -> str:
276264
return f"StorePath({self.store.__class__.__name__}, '{self}')"
@@ -342,14 +330,17 @@ async def make_store(
342330
"""
343331
from zarr.storage._fsspec import FsspecStore # circular import
344332

345-
if (
346-
not (isinstance(store_like, str) and _is_fsspec_uri(store_like))
347-
and storage_options is not None
348-
):
349-
raise TypeError(
350-
"'storage_options' was provided but unused. "
351-
"'storage_options' is only used when the store is passed as an FSSpec URI string.",
352-
)
333+
# Parse URL early so we can reuse the result for both validation and routing
334+
parsed = parse_store_url(store_like) if isinstance(store_like, str) else None
335+
336+
# Check if storage_options is valid for this store_like
337+
if storage_options is not None:
338+
is_fsspec_uri = parsed is not None and parsed.scheme not in ("", "memory", "file")
339+
if not is_fsspec_uri:
340+
raise TypeError(
341+
"'storage_options' was provided but unused. "
342+
"'storage_options' is only used when the store is passed as an FSSpec URI string.",
343+
)
353344

354345
assert mode in (None, "r", "r+", "a", "w", "w-")
355346
_read_only = mode == "r"
@@ -377,15 +368,18 @@ async def make_store(
377368
# Create a new LocalStore
378369
return await LocalStore.open(root=store_like, mode=mode, read_only=_read_only)
379370

380-
elif isinstance(store_like, str):
381-
# Either an FSSpec URI or a local filesystem path
382-
if _is_fsspec_uri(store_like):
371+
elif isinstance(store_like, str) and parsed is not None:
372+
if parsed.scheme == "memory":
373+
# Create or get a ManagedMemoryStore
374+
return ManagedMemoryStore(name=parsed.name, path=parsed.path, read_only=_read_only)
375+
elif parsed.scheme == "file" or not parsed.scheme:
376+
# Local filesystem path — use parsed.path to strip the file:// scheme
377+
return await make_store(Path(parsed.path), mode=mode, storage_options=storage_options)
378+
else:
379+
# Assume fsspec can handle it (s3://, gs://, http://, etc.)
383380
return FsspecStore.from_url(
384381
store_like, storage_options=storage_options, read_only=_read_only
385382
)
386-
else:
387-
# Assume a filesystem path
388-
return await make_store(Path(store_like), mode=mode, storage_options=storage_options)
389383

390384
elif _has_fsspec and isinstance(store_like, FSMap):
391385
return FsspecStore.from_mapper(store_like, read_only=_read_only)
@@ -460,25 +454,6 @@ async def make_store_path(
460454
return await StorePath.open(store, path=path_normalized, mode=mode)
461455

462456

463-
def _is_fsspec_uri(uri: str) -> bool:
464-
"""
465-
Check if a URI looks like a non-local fsspec URI.
466-
467-
Examples
468-
--------
469-
```python
470-
from zarr.storage._common import _is_fsspec_uri
471-
_is_fsspec_uri("s3://bucket")
472-
# True
473-
_is_fsspec_uri("my-directory")
474-
# False
475-
_is_fsspec_uri("local://my-directory")
476-
# False
477-
```
478-
"""
479-
return "://" in uri or ("::" in uri and "local://" not in uri)
480-
481-
482457
async def ensure_no_existing_node(
483458
store_path: StorePath,
484459
zarr_format: ZarrFormat,

src/zarr/storage/_fsspec.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
)
1717
from zarr.core.buffer import Buffer
1818
from zarr.errors import ZarrUserWarning
19-
from zarr.storage._common import _dereference_path
19+
from zarr.storage._utils import _join_paths
2020

2121
if TYPE_CHECKING:
2222
from collections.abc import AsyncIterator, Iterable
@@ -282,7 +282,7 @@ async def get(
282282
# docstring inherited
283283
if not self._is_open:
284284
await self._open()
285-
path = _dereference_path(self.path, key)
285+
path = _join_paths([self.path, key])
286286

287287
try:
288288
if byte_range is None:
@@ -329,7 +329,7 @@ async def set(
329329
raise TypeError(
330330
f"FsspecStore.set(): `value` must be a Buffer instance. Got an instance of {type(value)} instead."
331331
)
332-
path = _dereference_path(self.path, key)
332+
path = _join_paths([self.path, key])
333333
# write data
334334
if byte_range:
335335
raise NotImplementedError
@@ -338,7 +338,7 @@ async def set(
338338
async def delete(self, key: str) -> None:
339339
# docstring inherited
340340
self._check_writable()
341-
path = _dereference_path(self.path, key)
341+
path = _join_paths([self.path, key])
342342
try:
343343
await self.fs._rm(path)
344344
except FileNotFoundError:
@@ -354,14 +354,14 @@ async def delete_dir(self, prefix: str) -> None:
354354
)
355355
self._check_writable()
356356

357-
path_to_delete = _dereference_path(self.path, prefix)
357+
path_to_delete = _join_paths([self.path, prefix])
358358

359359
with suppress(*self.allowed_exceptions):
360360
await self.fs._rm(path_to_delete, recursive=True)
361361

362362
async def exists(self, key: str) -> bool:
363363
# docstring inherited
364-
path = _dereference_path(self.path, key)
364+
path = _join_paths([self.path, key])
365365
exists: bool = await self.fs._exists(path)
366366
return exists
367367

@@ -378,7 +378,7 @@ async def get_partial_values(
378378
starts: list[int | None] = []
379379
stops: list[int | None] = []
380380
for key, byte_range in key_ranges:
381-
paths.append(_dereference_path(self.path, key))
381+
paths.append(_join_paths([self.path, key]))
382382
if byte_range is None:
383383
starts.append(None)
384384
stops.append(None)
@@ -429,7 +429,7 @@ async def list_prefix(self, prefix: str) -> AsyncIterator[str]:
429429
yield onefile.removeprefix(f"{self.path}/")
430430

431431
async def getsize(self, key: str) -> int:
432-
path = _dereference_path(self.path, key)
432+
path = _join_paths([self.path, key])
433433
info = await self.fs._info(path)
434434

435435
size = info.get("size")

0 commit comments

Comments
 (0)