diff --git a/changes/3998.misc.md b/changes/3998.misc.md new file mode 100644 index 0000000000..bacfa93a8b --- /dev/null +++ b/changes/3998.misc.md @@ -0,0 +1 @@ +Centralized JSON document I/O behind free functions in `zarr.core._json` and removed the unused private `Store._get_bytes`/`_get_json` methods and their per-store overrides. diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 3247649f10..304d0cddb5 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -1,15 +1,12 @@ from __future__ import annotations import asyncio -import json from abc import ABC, abstractmethod from dataclasses import dataclass from functools import partial from itertools import starmap from typing import TYPE_CHECKING, Literal, Protocol, runtime_checkable -from zarr.core.sync import sync - if TYPE_CHECKING: from collections.abc import AsyncGenerator, AsyncIterator, Iterable, Sequence from types import TracebackType @@ -219,211 +216,6 @@ async def get( """ ... - async def _get_bytes( - self, key: str, *, prototype: BufferPrototype, byte_range: ByteRequest | None = None - ) -> bytes: - """ - Retrieve raw bytes from the store asynchronously. - - This is a convenience method that wraps ``get()`` and converts the result - to bytes. Use this when you need the raw byte content of a stored value. - - Parameters - ---------- - key : str - The key identifying the data to retrieve. - prototype : BufferPrototype - The buffer prototype to use for reading the data. - byte_range : ByteRequest, optional - If specified, only retrieve a portion of the stored data. - Can be a ``RangeByteRequest``, ``OffsetByteRequest``, or ``SuffixByteRequest``. - - Returns - ------- - bytes - The raw bytes stored at the given key. - - Raises - ------ - FileNotFoundError - If the key does not exist in the store. - - See Also - -------- - get : Lower-level method that returns a Buffer object. - get_bytes : Synchronous version of this method. - get_json : Asynchronous method for retrieving and parsing JSON data. - - Examples - -------- - >>> store = await MemoryStore.open() - >>> await store.set("data", Buffer.from_bytes(b"hello world")) - >>> data = await store.get_bytes("data", prototype=default_buffer_prototype()) - >>> print(data) - b'hello world' - """ - buffer = await self.get(key, prototype, byte_range) - if buffer is None: - raise FileNotFoundError(key) - return buffer.to_bytes() - - def _get_bytes_sync( - self, key: str = "", *, prototype: BufferPrototype, byte_range: ByteRequest | None = None - ) -> bytes: - """ - Retrieve raw bytes from the store synchronously. - - This is a synchronous wrapper around ``get_bytes()``. It should only - be called from non-async code. For async contexts, use ``get_bytes()`` - instead. - - Parameters - ---------- - key : str, optional - The key identifying the data to retrieve. Defaults to an empty string. - prototype : BufferPrototype - The buffer prototype to use for reading the data. - byte_range : ByteRequest, optional - If specified, only retrieve a portion of the stored data. - Can be a ``RangeByteRequest``, ``OffsetByteRequest``, or ``SuffixByteRequest``. - - Returns - ------- - bytes - The raw bytes stored at the given key. - - Raises - ------ - FileNotFoundError - If the key does not exist in the store. - - Warnings - -------- - Do not call this method from async functions. Use ``get_bytes()`` instead - to avoid blocking the event loop. - - See Also - -------- - get_bytes : Asynchronous version of this method. - get_json_sync : Synchronous method for retrieving and parsing JSON data. - - Examples - -------- - >>> store = MemoryStore() - >>> await store.set("data", Buffer.from_bytes(b"hello world")) - >>> data = store.get_bytes_sync("data", prototype=default_buffer_prototype()) - >>> print(data) - b'hello world' - """ - - return sync(self._get_bytes(key, prototype=prototype, byte_range=byte_range)) - - async def _get_json( - self, key: str, *, prototype: BufferPrototype, byte_range: ByteRequest | None = None - ) -> Any: - """ - Retrieve and parse JSON data from the store asynchronously. - - This is a convenience method that retrieves bytes from the store and - parses them as JSON. - - Parameters - ---------- - key : str - The key identifying the JSON data to retrieve. - prototype : BufferPrototype - The buffer prototype to use for reading the data. - byte_range : ByteRequest, optional - If specified, only retrieve a portion of the stored data. - Can be a ``RangeByteRequest``, ``OffsetByteRequest``, or ``SuffixByteRequest``. - Note: Using byte ranges with JSON may result in invalid JSON. - - Returns - ------- - Any - The parsed JSON data. This follows the behavior of ``json.loads()`` and - can be any JSON-serializable type: dict, list, str, int, float, bool, or None. - - Raises - ------ - FileNotFoundError - If the key does not exist in the store. - json.JSONDecodeError - If the stored data is not valid JSON. - - See Also - -------- - get_bytes : Method for retrieving raw bytes. - get_json_sync : Synchronous version of this method. - - Examples - -------- - >>> store = await MemoryStore.open() - >>> metadata = {"zarr_format": 3, "node_type": "array"} - >>> await store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) - >>> data = await store.get_json("zarr.json", prototype=default_buffer_prototype()) - >>> print(data) - {'zarr_format': 3, 'node_type': 'array'} - """ - - return json.loads(await self._get_bytes(key, prototype=prototype, byte_range=byte_range)) - - def _get_json_sync( - self, key: str = "", *, prototype: BufferPrototype, byte_range: ByteRequest | None = None - ) -> Any: - """ - Retrieve and parse JSON data from the store synchronously. - - This is a synchronous wrapper around ``get_json()``. It should only - be called from non-async code. For async contexts, use ``get_json()`` - instead. - - Parameters - ---------- - key : str, optional - The key identifying the JSON data to retrieve. Defaults to an empty string. - prototype : BufferPrototype - The buffer prototype to use for reading the data. - byte_range : ByteRequest, optional - If specified, only retrieve a portion of the stored data. - Can be a ``RangeByteRequest``, ``OffsetByteRequest``, or ``SuffixByteRequest``. - Note: Using byte ranges with JSON may result in invalid JSON. - - Returns - ------- - Any - The parsed JSON data. This follows the behavior of ``json.loads()`` and - can be any JSON-serializable type: dict, list, str, int, float, bool, or None. - - Raises - ------ - FileNotFoundError - If the key does not exist in the store. - json.JSONDecodeError - If the stored data is not valid JSON. - - Warnings - -------- - Do not call this method from async functions. Use ``get_json()`` instead - to avoid blocking the event loop. - - See Also - -------- - get_json : Asynchronous version of this method. - get_bytes_sync : Synchronous method for retrieving raw bytes without parsing. - - Examples - -------- - >>> store = MemoryStore() - >>> metadata = {"zarr_format": 3, "node_type": "array"} - >>> store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) - >>> data = store.get_json_sync("zarr.json", prototype=default_buffer_prototype()) - >>> print(data) - {'zarr_format': 3, 'node_type': 'array'} - """ - - return sync(self._get_json(key, prototype=prototype, byte_range=byte_range)) - @abstractmethod async def get_partial_values( self, diff --git a/src/zarr/core/_json.py b/src/zarr/core/_json.py new file mode 100644 index 0000000000..efe8152a4f --- /dev/null +++ b/src/zarr/core/_json.py @@ -0,0 +1,133 @@ +"""Helpers for moving JSON documents in and out of zarr stores. + +These are free functions, deliberately not methods on the ``Store`` ABC: +reading and writing JSON is a composition of the store's ``get``/``set`` +primitives with a buffer/JSON conversion, not part of the store contract. +Keeping them as functions means stores cannot (and need not) override them, +and the ``Store`` definition stays free of any dependency on the buffer +prototype. + +These functions are pure: the JSON encoding parameters (``indent``, +``allow_nan``) are explicit arguments rather than read from the global config. +Callers that want zarr's configured indentation pass +``indent=config.get("json_indent")``. + +Two layers: + +- ``buffer_to_json`` / ``json_to_buffer`` convert between a ``Buffer`` and a + parsed JSON value. The buffer prototype lives here, at buffer construction, + where it is meaningful. +- ``get_json`` / ``set_json`` compose those with ``Store.get`` / ``Store.set``. + ``get_json`` returns ``None`` for a missing key (the contract most callers + want); callers that require presence check for ``None`` themselves. +""" + +from __future__ import annotations + +import json +from typing import TYPE_CHECKING, cast + +from zarr.core.buffer import default_buffer_prototype + +if TYPE_CHECKING: + from zarr.abc.store import ByteRequest, Store + from zarr.core.buffer import Buffer, BufferPrototype + from zarr.core.common import JSON + + +def buffer_to_json(buffer: Buffer) -> JSON: + """Parse the contents of a `Buffer` as a JSON value.""" + # json.loads is typed as returning Any; the result is by definition JSON. + return cast("JSON", json.loads(buffer.to_bytes())) + + +def buffer_to_json_object(buffer: Buffer) -> dict[str, JSON]: + """Parse the contents of a `Buffer` as a JSON object (a `dict`). + + Every metadata document zarr reads is a JSON object, so this narrows the + `JSON` union to `dict[str, JSON]` once, here, instead of at each call site. + + Parameters + ---------- + buffer + The buffer whose contents are parsed as a JSON object. + + Raises + ------ + TypeError + If the parsed value is not a JSON object. + """ + obj = buffer_to_json(buffer) + if not isinstance(obj, dict): + raise TypeError(f"Expected a JSON object, got {type(obj).__name__}.") + return obj + + +def json_to_buffer( + obj: JSON, + *, + prototype: BufferPrototype | None = None, + indent: int | None = None, + allow_nan: bool = True, +) -> Buffer: + """Serialize a JSON value into a `Buffer`. + + Parameters + ---------- + obj + The JSON-serializable value to encode. + prototype + The buffer prototype to construct the result with. Defaults to + `default_buffer_prototype()`. + indent + Indentation passed to `json.dumps`. `None` (the default) writes + without newline indentation, using json's default separators. + Callers that want zarr's configured indentation pass + `indent=config.get("json_indent")`. + allow_nan + Whether to permit `NaN`/`Infinity` in the output, passed to + `json.dumps`. + """ + if prototype is None: + prototype = default_buffer_prototype() + return prototype.buffer.from_bytes(json.dumps(obj, indent=indent, allow_nan=allow_nan).encode()) + + +async def get_json(store: Store, key: str, *, byte_range: ByteRequest | None = None) -> JSON | None: + """Read and parse the JSON document at `key`, or `None` if it is absent. + + Parameters + ---------- + store + The store to read from. + key + The key identifying the JSON document. + byte_range + If given, read only this portion of the value. Note that a partial + read of a JSON document may not be valid JSON. + + Returns + ------- + JSON or None + The parsed JSON value, or `None` if `key` does not exist. + """ + buffer = await store.get(key, default_buffer_prototype(), byte_range) + return None if buffer is None else buffer_to_json(buffer) + + +async def set_json( + store: Store, + key: str, + obj: JSON, + *, + prototype: BufferPrototype | None = None, + indent: int | None = None, + allow_nan: bool = True, +) -> None: + """Serialize `obj` as JSON and write it to `key` in `store`. + + `indent` and `allow_nan` are forwarded to `json_to_buffer`. + """ + await store.set( + key, json_to_buffer(obj, prototype=prototype, indent=indent, allow_nan=allow_nan) + ) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 4e9bd6e12f..51bb87a81e 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1,6 +1,5 @@ from __future__ import annotations -import json import warnings from asyncio import gather from collections.abc import Iterable, Mapping, Sequence @@ -28,6 +27,7 @@ from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec from zarr.codecs.zstd import ZstdCodec from zarr.core._info import ArrayInfo +from zarr.core._json import buffer_to_json_object from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArraySpec, parse_array_config from zarr.core.attributes import Attributes from zarr.core.buffer import ( @@ -289,13 +289,13 @@ async def get_array_metadata( if zarr_format == 2: # V2 arrays are comprised of a .zarray and .zattrs objects assert zarray_bytes is not None - metadata_dict = json.loads(zarray_bytes.to_bytes()) - zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {} + metadata_dict = buffer_to_json_object(zarray_bytes) + zattrs_dict = buffer_to_json_object(zattrs_bytes) if zattrs_bytes is not None else {} metadata_dict["attributes"] = zattrs_dict else: # V3 arrays are comprised of a zarr.json object assert zarr_json_bytes is not None - metadata_dict = json.loads(zarr_json_bytes.to_bytes()) + metadata_dict = buffer_to_json_object(zarr_json_bytes) parse_node_type_array(metadata_dict.get("node_type")) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 213b7fb607..3f2e8cdbfb 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -2,7 +2,6 @@ import asyncio import itertools -import json import logging import unicodedata import warnings @@ -18,6 +17,7 @@ from zarr.abc.metadata import Metadata from zarr.abc.store import Store, set_or_delete from zarr.core._info import GroupInfo +from zarr.core._json import buffer_to_json_object, json_to_buffer from zarr.core.array import ( DEFAULT_FILL_VALUE, Array, @@ -356,21 +356,15 @@ class GroupMetadata(Metadata): node_type: Literal["group"] = field(default="group", init=False) def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: - json_indent = config.get("json_indent") + indent = config.get("json_indent") if self.zarr_format == 3: - return { - ZARR_JSON: prototype.buffer.from_bytes( - json.dumps(self.to_dict(), indent=json_indent, allow_nan=True).encode() - ) - } + return {ZARR_JSON: json_to_buffer(self.to_dict(), prototype=prototype, indent=indent)} else: items = { - ZGROUP_JSON: prototype.buffer.from_bytes( - json.dumps({"zarr_format": self.zarr_format}, indent=json_indent).encode() - ), - ZATTRS_JSON: prototype.buffer.from_bytes( - json.dumps(self.attributes, indent=json_indent, allow_nan=True).encode() + ZGROUP_JSON: json_to_buffer( + {"zarr_format": self.zarr_format}, prototype=prototype, indent=indent ), + ZATTRS_JSON: json_to_buffer(self.attributes, prototype=prototype, indent=indent), } if self.consolidated_metadata: d = { @@ -395,10 +389,9 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: }, } - items[ZMETADATA_V2_JSON] = prototype.buffer.from_bytes( - json.dumps( - {"metadata": d, "zarr_consolidated_format": 1}, allow_nan=True - ).encode() + # The consolidated metadata blob is written compactly (no indent). + items[ZMETADATA_V2_JSON] = json_to_buffer( + {"metadata": d, "zarr_consolidated_format": 1}, prototype=prototype ) return items @@ -626,13 +619,13 @@ def _from_bytes_v2( consolidated_metadata_bytes: Buffer | None, ) -> AsyncGroup: # V2 groups are comprised of a .zgroup and .zattrs objects - zgroup = json.loads(zgroup_bytes.to_bytes()) - zattrs = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {} - group_metadata = {**zgroup, "attributes": zattrs} + zgroup = buffer_to_json_object(zgroup_bytes) + zattrs = buffer_to_json_object(zattrs_bytes) if zattrs_bytes is not None else {} + group_metadata: dict[str, Any] = {**zgroup, "attributes": zattrs} if consolidated_metadata_bytes is not None: - v2_consolidated_metadata = json.loads(consolidated_metadata_bytes.to_bytes()) - v2_consolidated_metadata = v2_consolidated_metadata["metadata"] + v2_consolidated_doc = buffer_to_json_object(consolidated_metadata_bytes) + v2_consolidated_metadata = cast("dict[str, Any]", v2_consolidated_doc["metadata"]) # We already read zattrs and zgroup. Should we ignore these? v2_consolidated_metadata.pop(".zattrs", None) v2_consolidated_metadata.pop(".zgroup", None) @@ -667,7 +660,7 @@ def _from_bytes_v3( zarr_json_bytes: Buffer, use_consolidated: bool | None, ) -> AsyncGroup: - group_metadata = json.loads(zarr_json_bytes.to_bytes()) + group_metadata = buffer_to_json_object(zarr_json_bytes) if use_consolidated and group_metadata.get("consolidated_metadata") is None: msg = f"Consolidated metadata requested with 'use_consolidated=True' but not found in '{store_path.path}'." raise ValueError(msg) @@ -3376,9 +3369,7 @@ async def _read_metadata_v3(store: Store, path: str) -> ArrayV3Metadata | GroupM ) if zarr_json_bytes is None: raise FileNotFoundError(path) - else: - zarr_json = json.loads(zarr_json_bytes.to_bytes()) - return _build_metadata_v3(zarr_json) + return _build_metadata_v3(buffer_to_json_object(zarr_json_bytes)) async def _read_metadata_v2(store: Store, path: str) -> ArrayV2Metadata | GroupMetadata: @@ -3395,22 +3386,23 @@ async def _read_metadata_v2(store: Store, path: str) -> ArrayV2Metadata | GroupM store.get(_join_paths([path, ZATTRS_JSON]), prototype=default_buffer_prototype()), ) + zattrs: dict[str, JSON] if zattrs_bytes is None: zattrs = {} else: - zattrs = json.loads(zattrs_bytes.to_bytes()) + zattrs = buffer_to_json_object(zattrs_bytes) # TODO: decide how to handle finding both array and group metadata. The spec does not seem to # consider this situation. A practical approach would be to ignore that combination, and only # return the array metadata. if zarray_bytes is not None: - zmeta = json.loads(zarray_bytes.to_bytes()) + zmeta = buffer_to_json_object(zarray_bytes) else: if zgroup_bytes is None: # neither .zarray or .zgroup were found results in KeyError raise FileNotFoundError(path) else: - zmeta = json.loads(zgroup_bytes.to_bytes()) + zmeta = buffer_to_json_object(zgroup_bytes) return _build_metadata_v2(zmeta, zattrs) diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index 29ed496eb1..ac32521239 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -30,6 +30,7 @@ import numpy as np +from zarr.core._json import json_to_buffer from zarr.core.array_spec import ArrayConfig, ArraySpec from zarr.core.chunk_key_encodings import parse_separator from zarr.core.common import ( @@ -140,14 +141,10 @@ def shards(self) -> tuple[int, ...] | None: def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: zarray_dict = self.to_dict() zattrs_dict = zarray_dict.pop("attributes", {}) - json_indent = config.get("json_indent") + indent = config.get("json_indent") return { - ZARRAY_JSON: prototype.buffer.from_bytes( - json.dumps(zarray_dict, indent=json_indent, allow_nan=True).encode() - ), - ZATTRS_JSON: prototype.buffer.from_bytes( - json.dumps(zattrs_dict, indent=json_indent, allow_nan=True).encode() - ), + ZARRAY_JSON: json_to_buffer(zarray_dict, prototype=prototype, indent=indent), + ZATTRS_JSON: json_to_buffer(zattrs_dict, prototype=prototype, indent=indent), } @classmethod diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 626c18eb72..9eaccc5076 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -9,6 +9,7 @@ from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec from zarr.abc.metadata import Metadata +from zarr.core._json import json_to_buffer from zarr.core.array_spec import ArrayConfig, ArraySpec from zarr.core.buffer.core import default_buffer_prototype from zarr.core.chunk_grids import is_regular_nd @@ -289,8 +290,6 @@ class RectilinearChunkGridMetadata(Metadata): chunk_shapes: tuple[int | tuple[int, ...], ...] def __post_init__(self) -> None: - from zarr.core.config import config - if not config.get("array.rectilinear_chunks"): raise ValueError( "Rectilinear chunk grids are experimental and disabled by default. " @@ -606,13 +605,8 @@ def encode_chunk_key(self, chunk_coords: tuple[int, ...]) -> str: return self.chunk_key_encoding.encode_chunk_key(chunk_coords) def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: - json_indent = config.get("json_indent") - d = self.to_dict() - return { - ZARR_JSON: prototype.buffer.from_bytes( - json.dumps(d, allow_nan=True, indent=json_indent).encode() - ) - } + indent = config.get("json_indent") + return {ZARR_JSON: json_to_buffer(self.to_dict(), prototype=prototype, indent=indent)} @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: diff --git a/src/zarr/storage/_common.py b/src/zarr/storage/_common.py index e4ee0bc4ac..1e13a9ac3f 100644 --- a/src/zarr/storage/_common.py +++ b/src/zarr/storage/_common.py @@ -12,6 +12,7 @@ SupportsGetSync, SupportsSetSync, ) +from zarr.core._json import buffer_to_json_object, get_json from zarr.core.buffer import Buffer, default_buffer_prototype from zarr.core.common import ( ANY_ACCESS_MODE, @@ -34,6 +35,7 @@ if TYPE_CHECKING: from zarr.core.buffer import BufferPrototype + from zarr.core.common import JSON class StorePath: @@ -161,6 +163,23 @@ async def get( prototype = default_buffer_prototype() return await self.store.get(self.path, prototype=prototype, byte_range=byte_range) + async def get_json(self, *, byte_range: ByteRequest | None = None) -> JSON | None: + """ + Read and parse the JSON document at this path, or None if it is absent. + + Parameters + ---------- + byte_range : ByteRequest, optional + If given, read only this portion of the value. Note that a partial + read of a JSON document may not be valid JSON. + + Returns + ------- + JSON or None + The parsed JSON value, or None if this path does not exist. + """ + return await get_json(self.store, self.path, byte_range=byte_range) + async def set(self, value: Buffer) -> None: """ Write bytes to the store. @@ -521,14 +540,14 @@ async def _contains_node_v3(store_path: StorePath) -> Literal["array", "group", # if no metadata document could be loaded, then we just return "nothing" if extant_meta_bytes is not None: try: - extant_meta_json = json.loads(extant_meta_bytes.to_bytes()) + extant_meta_json = buffer_to_json_object(extant_meta_bytes) # avoid constructing a full metadata document here in the name of speed. if extant_meta_json["node_type"] == "array": result = "array" elif extant_meta_json["node_type"] == "group": result = "group" - except (KeyError, json.JSONDecodeError): - # either of these errors is consistent with no array or group present. + except (KeyError, TypeError, json.JSONDecodeError): + # any of these errors is consistent with no array or group present. pass return result @@ -592,11 +611,11 @@ async def contains_array(store_path: StorePath, zarr_format: ZarrFormat) -> bool return False else: try: - extant_meta_json = json.loads(extant_meta_bytes.to_bytes()) + extant_meta_json = buffer_to_json_object(extant_meta_bytes) # we avoid constructing a full metadata document here in the name of speed. if extant_meta_json["node_type"] == "array": return True - except (ValueError, KeyError): + except (ValueError, KeyError, TypeError): return False elif zarr_format == 2: return await (store_path / ZARRAY_JSON).exists() @@ -629,10 +648,10 @@ async def contains_group(store_path: StorePath, zarr_format: ZarrFormat) -> bool return False else: try: - extant_meta_json = json.loads(extant_meta_bytes.to_bytes()) + extant_meta_json = buffer_to_json_object(extant_meta_bytes) # we avoid constructing a full metadata document here in the name of speed. result: bool = extant_meta_json["node_type"] == "group" - except (ValueError, KeyError): + except (ValueError, KeyError, TypeError): return False else: return result diff --git a/src/zarr/storage/_local.py b/src/zarr/storage/_local.py index 96f1e61746..3d9882d3db 100644 --- a/src/zarr/storage/_local.py +++ b/src/zarr/storage/_local.py @@ -8,7 +8,7 @@ import sys import uuid from pathlib import Path -from typing import TYPE_CHECKING, Any, BinaryIO, Literal, Self +from typing import TYPE_CHECKING, BinaryIO, Literal, Self from zarr.abc.store import ( ByteRequest, @@ -356,236 +356,6 @@ async def list_dir(self, prefix: str) -> AsyncIterator[str]: except (FileNotFoundError, NotADirectoryError): pass - async def _get_bytes( - self, - key: str = "", - *, - prototype: BufferPrototype | None = None, - byte_range: ByteRequest | None = None, - ) -> bytes: - """ - Retrieve raw bytes from the local store asynchronously. - - This is a convenience override that makes the ``prototype`` parameter optional - by defaulting to the standard buffer prototype. See the base ``Store.get_bytes`` - for full documentation. - - Parameters - ---------- - key : str, optional - The key identifying the data to retrieve. Defaults to an empty string. - prototype : BufferPrototype, optional - The buffer prototype to use for reading the data. If None, uses - ``default_buffer_prototype()``. - byte_range : ByteRequest, optional - If specified, only retrieve a portion of the stored data. - - Returns - ------- - bytes - The raw bytes stored at the given key. - - Raises - ------ - FileNotFoundError - If the key does not exist in the store. - - See Also - -------- - Store.get_bytes : Base implementation with full documentation. - get_bytes_sync : Synchronous version of this method. - - Examples - -------- - >>> store = await LocalStore.open("data") - >>> await store.set("data", Buffer.from_bytes(b"hello")) - >>> # No need to specify prototype for LocalStore - >>> data = await store.get_bytes("data") - >>> print(data) - b'hello' - """ - if prototype is None: - prototype = default_buffer_prototype() - return await super()._get_bytes(key, prototype=prototype, byte_range=byte_range) - - def _get_bytes_sync( - self, - key: str = "", - *, - prototype: BufferPrototype | None = None, - byte_range: ByteRequest | None = None, - ) -> bytes: - """ - Retrieve raw bytes from the local store synchronously. - - This is a convenience override that makes the ``prototype`` parameter optional - by defaulting to the standard buffer prototype. See the base ``Store.get_bytes`` - for full documentation. - - Parameters - ---------- - key : str, optional - The key identifying the data to retrieve. Defaults to an empty string. - prototype : BufferPrototype, optional - The buffer prototype to use for reading the data. If None, uses - ``default_buffer_prototype()``. - byte_range : ByteRequest, optional - If specified, only retrieve a portion of the stored data. - - Returns - ------- - bytes - The raw bytes stored at the given key. - - Raises - ------ - FileNotFoundError - If the key does not exist in the store. - - Warnings - -------- - Do not call this method from async functions. Use ``get_bytes()`` instead. - - See Also - -------- - Store.get_bytes_sync : Base implementation with full documentation. - get_bytes : Asynchronous version of this method. - - Examples - -------- - >>> store = LocalStore("data") - >>> store.set("data", Buffer.from_bytes(b"hello")) - >>> # No need to specify prototype for LocalStore - >>> data = store.get_bytes("data") - >>> print(data) - b'hello' - """ - if prototype is None: - prototype = default_buffer_prototype() - return super()._get_bytes_sync(key, prototype=prototype, byte_range=byte_range) - - async def _get_json( - self, - key: str = "", - *, - prototype: BufferPrototype | None = None, - byte_range: ByteRequest | None = None, - ) -> Any: - """ - Retrieve and parse JSON data from the local store asynchronously. - - This is a convenience override that makes the ``prototype`` parameter optional - by defaulting to the standard buffer prototype. See the base ``Store.get_json`` - for full documentation. - - Parameters - ---------- - key : str, optional - The key identifying the JSON data to retrieve. Defaults to an empty string. - prototype : BufferPrototype, optional - The buffer prototype to use for reading the data. If None, uses - ``default_buffer_prototype()``. - byte_range : ByteRequest, optional - If specified, only retrieve a portion of the stored data. - Note: Using byte ranges with JSON may result in invalid JSON. - - Returns - ------- - Any - The parsed JSON data. This follows the behavior of ``json.loads()`` and - can be any JSON-serializable type: dict, list, str, int, float, bool, or None. - - Raises - ------ - FileNotFoundError - If the key does not exist in the store. - json.JSONDecodeError - If the stored data is not valid JSON. - - See Also - -------- - Store.get_json : Base implementation with full documentation. - get_json_sync : Synchronous version of this method. - get_bytes : Method for retrieving raw bytes without parsing. - - Examples - -------- - >>> store = await LocalStore.open("data") - >>> import json - >>> metadata = {"zarr_format": 3, "node_type": "array"} - >>> await store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) - >>> # No need to specify prototype for LocalStore - >>> data = await store.get_json("zarr.json") - >>> print(data) - {'zarr_format': 3, 'node_type': 'array'} - """ - if prototype is None: - prototype = default_buffer_prototype() - return await super()._get_json(key, prototype=prototype, byte_range=byte_range) - - def _get_json_sync( - self, - key: str = "", - *, - prototype: BufferPrototype | None = None, - byte_range: ByteRequest | None = None, - ) -> Any: - """ - Retrieve and parse JSON data from the local store synchronously. - - This is a convenience override that makes the ``prototype`` parameter optional - by defaulting to the standard buffer prototype. See the base ``Store.get_json`` - for full documentation. - - Parameters - ---------- - key : str, optional - The key identifying the JSON data to retrieve. Defaults to an empty string. - prototype : BufferPrototype, optional - The buffer prototype to use for reading the data. If None, uses - ``default_buffer_prototype()``. - byte_range : ByteRequest, optional - If specified, only retrieve a portion of the stored data. - Note: Using byte ranges with JSON may result in invalid JSON. - - Returns - ------- - Any - The parsed JSON data. This follows the behavior of ``json.loads()`` and - can be any JSON-serializable type: dict, list, str, int, float, bool, or None. - - Raises - ------ - FileNotFoundError - If the key does not exist in the store. - json.JSONDecodeError - If the stored data is not valid JSON. - - Warnings - -------- - Do not call this method from async functions. Use ``get_json()`` instead. - - See Also - -------- - Store.get_json_sync : Base implementation with full documentation. - get_json : Asynchronous version of this method. - get_bytes_sync : Method for retrieving raw bytes without parsing. - - Examples - -------- - >>> store = LocalStore("data") - >>> import json - >>> metadata = {"zarr_format": 3, "node_type": "array"} - >>> store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) - >>> # No need to specify prototype for LocalStore - >>> data = store.get_json("zarr.json") - >>> print(data) - {'zarr_format': 3, 'node_type': 'array'} - """ - if prototype is None: - prototype = default_buffer_prototype() - return super()._get_json_sync(key, prototype=prototype, byte_range=byte_range) - async def move(self, dest_root: Path | str) -> None: """ Move the store to another path. The old root directory is deleted. diff --git a/src/zarr/storage/_memory.py b/src/zarr/storage/_memory.py index bd91029732..e867706155 100644 --- a/src/zarr/storage/_memory.py +++ b/src/zarr/storage/_memory.py @@ -225,236 +225,6 @@ async def list_dir(self, prefix: str) -> AsyncIterator[str]: for key in keys_unique: yield key - async def _get_bytes( - self, - key: str = "", - *, - prototype: BufferPrototype | None = None, - byte_range: ByteRequest | None = None, - ) -> bytes: - """ - Retrieve raw bytes from the memory store asynchronously. - - This is a convenience override that makes the ``prototype`` parameter optional - by defaulting to the standard buffer prototype. See the base ``Store.get_bytes`` - for full documentation. - - Parameters - ---------- - key : str, optional - The key identifying the data to retrieve. Defaults to an empty string. - prototype : BufferPrototype, optional - The buffer prototype to use for reading the data. If None, uses - ``default_buffer_prototype()``. - byte_range : ByteRequest, optional - If specified, only retrieve a portion of the stored data. - - Returns - ------- - bytes - The raw bytes stored at the given key. - - Raises - ------ - FileNotFoundError - If the key does not exist in the store. - - See Also - -------- - Store.get_bytes : Base implementation with full documentation. - get_bytes_sync : Synchronous version of this method. - - Examples - -------- - >>> store = await MemoryStore.open() - >>> await store.set("data", Buffer.from_bytes(b"hello")) - >>> # No need to specify prototype for MemoryStore - >>> data = await store.get_bytes("data") - >>> print(data) - b'hello' - """ - if prototype is None: - prototype = default_buffer_prototype() - return await super()._get_bytes(key, prototype=prototype, byte_range=byte_range) - - def _get_bytes_sync( - self, - key: str = "", - *, - prototype: BufferPrototype | None = None, - byte_range: ByteRequest | None = None, - ) -> bytes: - """ - Retrieve raw bytes from the memory store synchronously. - - This is a convenience override that makes the ``prototype`` parameter optional - by defaulting to the standard buffer prototype. See the base ``Store.get_bytes`` - for full documentation. - - Parameters - ---------- - key : str, optional - The key identifying the data to retrieve. Defaults to an empty string. - prototype : BufferPrototype, optional - The buffer prototype to use for reading the data. If None, uses - ``default_buffer_prototype()``. - byte_range : ByteRequest, optional - If specified, only retrieve a portion of the stored data. - - Returns - ------- - bytes - The raw bytes stored at the given key. - - Raises - ------ - FileNotFoundError - If the key does not exist in the store. - - Warnings - -------- - Do not call this method from async functions. Use ``get_bytes()`` instead. - - See Also - -------- - Store.get_bytes_sync : Base implementation with full documentation. - get_bytes : Asynchronous version of this method. - - Examples - -------- - >>> store = MemoryStore() - >>> store.set("data", Buffer.from_bytes(b"hello")) - >>> # No need to specify prototype for MemoryStore - >>> data = store.get_bytes("data") - >>> print(data) - b'hello' - """ - if prototype is None: - prototype = default_buffer_prototype() - return super()._get_bytes_sync(key, prototype=prototype, byte_range=byte_range) - - async def _get_json( - self, - key: str = "", - *, - prototype: BufferPrototype | None = None, - byte_range: ByteRequest | None = None, - ) -> Any: - """ - Retrieve and parse JSON data from the memory store asynchronously. - - This is a convenience override that makes the ``prototype`` parameter optional - by defaulting to the standard buffer prototype. See the base ``Store.get_json`` - for full documentation. - - Parameters - ---------- - key : str, optional - The key identifying the JSON data to retrieve. Defaults to an empty string. - prototype : BufferPrototype, optional - The buffer prototype to use for reading the data. If None, uses - ``default_buffer_prototype()``. - byte_range : ByteRequest, optional - If specified, only retrieve a portion of the stored data. - Note: Using byte ranges with JSON may result in invalid JSON. - - Returns - ------- - Any - The parsed JSON data. This follows the behavior of ``json.loads()`` and - can be any JSON-serializable type: dict, list, str, int, float, bool, or None. - - Raises - ------ - FileNotFoundError - If the key does not exist in the store. - json.JSONDecodeError - If the stored data is not valid JSON. - - See Also - -------- - Store.get_json : Base implementation with full documentation. - get_json_sync : Synchronous version of this method. - get_bytes : Method for retrieving raw bytes without parsing. - - Examples - -------- - >>> store = await MemoryStore.open() - >>> import json - >>> metadata = {"zarr_format": 3, "node_type": "array"} - >>> await store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) - >>> # No need to specify prototype for MemoryStore - >>> data = await store.get_json("zarr.json") - >>> print(data) - {'zarr_format': 3, 'node_type': 'array'} - """ - if prototype is None: - prototype = default_buffer_prototype() - return await super()._get_json(key, prototype=prototype, byte_range=byte_range) - - def _get_json_sync( - self, - key: str = "", - *, - prototype: BufferPrototype | None = None, - byte_range: ByteRequest | None = None, - ) -> Any: - """ - Retrieve and parse JSON data from the memory store synchronously. - - This is a convenience override that makes the ``prototype`` parameter optional - by defaulting to the standard buffer prototype. See the base ``Store.get_json`` - for full documentation. - - Parameters - ---------- - key : str, optional - The key identifying the JSON data to retrieve. Defaults to an empty string. - prototype : BufferPrototype, optional - The buffer prototype to use for reading the data. If None, uses - ``default_buffer_prototype()``. - byte_range : ByteRequest, optional - If specified, only retrieve a portion of the stored data. - Note: Using byte ranges with JSON may result in invalid JSON. - - Returns - ------- - Any - The parsed JSON data. This follows the behavior of ``json.loads()`` and - can be any JSON-serializable type: dict, list, str, int, float, bool, or None. - - Raises - ------ - FileNotFoundError - If the key does not exist in the store. - json.JSONDecodeError - If the stored data is not valid JSON. - - Warnings - -------- - Do not call this method from async functions. Use ``get_json()`` instead. - - See Also - -------- - Store.get_json_sync : Base implementation with full documentation. - get_json : Asynchronous version of this method. - get_bytes_sync : Method for retrieving raw bytes without parsing. - - Examples - -------- - >>> store = MemoryStore() - >>> import json - >>> metadata = {"zarr_format": 3, "node_type": "array"} - >>> store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) - >>> # No need to specify prototype for MemoryStore - >>> data = store.get_json("zarr.json") - >>> print(data) - {'zarr_format': 3, 'node_type': 'array'} - """ - if prototype is None: - prototype = default_buffer_prototype() - return super()._get_json_sync(key, prototype=prototype, byte_range=byte_range) - class GpuMemoryStore(MemoryStore): """ diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index a0bbe6b4b2..81024c85c8 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -1,7 +1,6 @@ from __future__ import annotations import asyncio -import json import pickle from abc import abstractmethod from typing import TYPE_CHECKING, Self @@ -558,46 +557,6 @@ async def test_set_if_not_exists(self, store: S) -> None: result = await store.get("k2", default_buffer_prototype()) assert result == new - async def test_get_bytes(self, store: S) -> None: - """ - Test that the get_bytes method reads bytes. - """ - data = b"hello world" - key = "zarr.json" - await self.set(store, key, self.buffer_cls.from_bytes(data)) - assert await store._get_bytes(key, prototype=default_buffer_prototype()) == data - with pytest.raises(FileNotFoundError): - await store._get_bytes("nonexistent_key", prototype=default_buffer_prototype()) - - def test_get_bytes_sync(self, store: S) -> None: - """ - Test that the get_bytes_sync method reads bytes. - """ - data = b"hello world" - key = "zarr.json" - sync(self.set(store, key, self.buffer_cls.from_bytes(data))) - assert store._get_bytes_sync(key, prototype=default_buffer_prototype()) == data - - async def test_get_json(self, store: S) -> None: - """ - Test that the get_json method reads json. - """ - data = {"foo": "bar"} - data_bytes = json.dumps(data).encode("utf-8") - key = "zarr.json" - await self.set(store, key, self.buffer_cls.from_bytes(data_bytes)) - assert await store._get_json(key, prototype=default_buffer_prototype()) == data - - def test_get_json_sync(self, store: S) -> None: - """ - Test that the get_json method reads json. - """ - data = {"foo": "bar"} - data_bytes = json.dumps(data).encode("utf-8") - key = "zarr.json" - sync(self.set(store, key, self.buffer_cls.from_bytes(data_bytes))) - assert store._get_json_sync(key, prototype=default_buffer_prototype()) == data - # ------------------------------------------------------------------- # Synchronous store methods (SupportsSyncStore protocol) # ------------------------------------------------------------------- diff --git a/tests/test_json.py b/tests/test_json.py new file mode 100644 index 0000000000..17a8c631d5 --- /dev/null +++ b/tests/test_json.py @@ -0,0 +1,116 @@ +from __future__ import annotations + +import math +from typing import TYPE_CHECKING + +import pytest + +from zarr.core._json import ( + buffer_to_json, + buffer_to_json_object, + get_json, + json_to_buffer, + set_json, +) +from zarr.core.buffer import cpu, default_buffer_prototype +from zarr.storage import MemoryStore +from zarr.storage._common import StorePath + +if TYPE_CHECKING: + from zarr.core.common import JSON + + +def test_json_to_buffer_round_trips() -> None: + """`buffer_to_json` inverts `json_to_buffer` for an arbitrary JSON value.""" + obj: JSON = {"zarr_format": 3, "node_type": "group", "attributes": {"a": [1, 2, 3]}} + buffer = json_to_buffer(obj) + assert buffer_to_json(buffer) == obj + + +def test_json_to_buffer_uses_given_prototype() -> None: + """`json_to_buffer` constructs the buffer from the supplied prototype.""" + prototype = default_buffer_prototype() + buffer = json_to_buffer({"x": 1}, prototype=prototype) + assert isinstance(buffer, prototype.buffer) + + +def test_json_to_buffer_allows_nan_by_default() -> None: + """`json_to_buffer` permits NaN by default (writes it as `NaN`).""" + buffer = json_to_buffer({"fill_value": math.nan}) + decoded = buffer_to_json(buffer) + assert isinstance(decoded, dict) + assert math.isnan(decoded["fill_value"]) + + +def test_json_to_buffer_allow_nan_false_rejects_nan() -> None: + """`json_to_buffer(allow_nan=False)` raises on a non-finite value.""" + with pytest.raises(ValueError, match="Out of range float"): + json_to_buffer({"fill_value": math.nan}, allow_nan=False) + + +def test_json_to_buffer_indent_controls_formatting() -> None: + """`json_to_buffer(indent=...)` controls whitespace in the serialized bytes.""" + obj: JSON = {"a": 1, "b": 2} + compact = json_to_buffer(obj).to_bytes() + indented = json_to_buffer(obj, indent=2).to_bytes() + assert b"\n" not in compact + assert b"\n" in indented + # both still round-trip to the same value + assert buffer_to_json(json_to_buffer(obj, indent=2)) == obj + + +async def test_get_json_reads_existing_key() -> None: + """`get_json` returns the parsed document stored at an existing key.""" + store = MemoryStore() + obj: JSON = {"zarr_format": 3, "node_type": "array"} + await set_json(store, "zarr.json", obj) + assert await get_json(store, "zarr.json") == obj + + +async def test_get_json_returns_none_for_missing_key() -> None: + """`get_json` returns None (rather than raising) when the key is absent.""" + store = MemoryStore() + assert await get_json(store, "does-not-exist") is None + + +async def test_set_json_then_get_json_round_trips() -> None: + """`set_json` followed by `get_json` returns the original value.""" + store = MemoryStore() + obj: JSON = {"a": 1, "b": [2, 3], "c": {"d": None}} + await set_json(store, "doc.json", obj) + assert await get_json(store, "doc.json") == obj + + +async def test_storepath_get_json_reads_existing_key() -> None: + """`StorePath.get_json` reads and parses the document at its own path.""" + store = MemoryStore() + obj: JSON = {"zarr_format": 2} + await set_json(store, "group/.zgroup", obj) + sp = StorePath(store, "group/.zgroup") + assert await sp.get_json() == obj + + +async def test_storepath_get_json_returns_none_for_missing() -> None: + """`StorePath.get_json` returns None when its path is absent.""" + store = MemoryStore() + sp = StorePath(store, "missing") + assert await sp.get_json() is None + + +def test_buffer_to_json_on_cpu_buffer() -> None: + """`buffer_to_json` works on a plain CPU buffer built from raw bytes.""" + buffer = cpu.Buffer.from_bytes(b'{"hello": "world"}') + assert buffer_to_json(buffer) == {"hello": "world"} + + +def test_buffer_to_json_object_returns_dict() -> None: + """`buffer_to_json_object` returns the parsed object as a dict.""" + buffer = cpu.Buffer.from_bytes(b'{"node_type": "group"}') + assert buffer_to_json_object(buffer) == {"node_type": "group"} + + +def test_buffer_to_json_object_rejects_non_object() -> None: + """`buffer_to_json_object` raises TypeError when the document is not an object.""" + buffer = cpu.Buffer.from_bytes(b"[1, 2, 3]") + with pytest.raises(TypeError, match="Expected a JSON object"): + buffer_to_json_object(buffer) diff --git a/tests/test_store/test_core.py b/tests/test_store/test_core.py index e673bfd40b..f2c81b87f9 100644 --- a/tests/test_store/test_core.py +++ b/tests/test_store/test_core.py @@ -1,5 +1,5 @@ import tempfile -from collections.abc import Callable, Generator +from collections.abc import Awaitable, Callable, Generator from pathlib import Path from typing import Any, Literal @@ -8,9 +8,15 @@ import zarr from zarr import Group -from zarr.core.common import AccessModeLiteral, ZarrFormat +from zarr.core.buffer import cpu +from zarr.core.common import ZARR_JSON, AccessModeLiteral, ZarrFormat from zarr.storage import FsspecStore, LocalStore, MemoryStore, StoreLike, StorePath, ZipStore -from zarr.storage._common import contains_array, contains_group, make_store_path +from zarr.storage._common import ( + _contains_node_v3, + contains_array, + contains_group, + make_store_path, +) from zarr.storage._utils import ( _join_paths, _normalize_path_keys, @@ -19,6 +25,9 @@ normalize_path, ) +# contains_array and contains_group share this signature. +_ContainsFunc = Callable[[StorePath, ZarrFormat], Awaitable[bool]] + @pytest.fixture( params=["none", "temp_dir_str", "temp_dir_path", "store_path", "memory_store", "dict"] @@ -75,15 +84,69 @@ async def test_contains_array( @pytest.mark.parametrize("func", [contains_array, contains_group]) -async def test_contains_invalid_format_raises( - local_store: LocalStore, func: Callable[[Any], Any] -) -> None: +async def test_contains_invalid_format_raises(local_store: LocalStore, func: _ContainsFunc) -> None: """ Test contains_group and contains_array raise errors for invalid zarr_formats """ store_path = StorePath(local_store) with pytest.raises(ValueError): - assert await func(store_path, zarr_format="3.0") # type: ignore[call-arg] + assert await func(store_path, "3.0") # type: ignore[arg-type] + + +async def _write_zarr_json(store_path: StorePath, data: bytes) -> None: + """Write raw bytes to the v3 metadata key under `store_path`.""" + await (store_path / ZARR_JSON).set(cpu.Buffer.from_bytes(data)) + + +@pytest.mark.parametrize("func", [contains_array, contains_group]) +async def test_contains_malformed_json_returns_false( + local_store: LocalStore, func: _ContainsFunc +) -> None: + """A v3 metadata document that is not valid JSON reads as 'not present'.""" + store_path = StorePath(local_store, path="foo") + await _write_zarr_json(store_path, b"{not valid json") + assert await func(store_path, 3) is False + + +@pytest.mark.parametrize("func", [contains_array, contains_group]) +async def test_contains_non_object_json_returns_false( + local_store: LocalStore, func: _ContainsFunc +) -> None: + """A v3 metadata document that is valid JSON but not an object reads as 'not present'.""" + store_path = StorePath(local_store, path="foo") + await _write_zarr_json(store_path, b"[1, 2, 3]") + assert await func(store_path, 3) is False + + +@pytest.mark.parametrize("func", [contains_array, contains_group]) +async def test_contains_missing_node_type_returns_false( + local_store: LocalStore, func: _ContainsFunc +) -> None: + """A v3 metadata document with no 'node_type' key reads as 'not present'.""" + store_path = StorePath(local_store, path="foo") + await _write_zarr_json(store_path, b'{"zarr_format": 3}') + assert await func(store_path, 3) is False + + +async def test_contains_node_v3_malformed_json_returns_nothing(local_store: LocalStore) -> None: + """`_contains_node_v3` returns 'nothing' when the document is not valid JSON.""" + store_path = StorePath(local_store, path="foo") + await _write_zarr_json(store_path, b"{not valid json") + assert await _contains_node_v3(store_path) == "nothing" + + +async def test_contains_node_v3_non_object_json_returns_nothing(local_store: LocalStore) -> None: + """`_contains_node_v3` returns 'nothing' when the document is not a JSON object.""" + store_path = StorePath(local_store, path="foo") + await _write_zarr_json(store_path, b"[1, 2, 3]") + assert await _contains_node_v3(store_path) == "nothing" + + +async def test_contains_node_v3_missing_node_type_returns_nothing(local_store: LocalStore) -> None: + """`_contains_node_v3` returns 'nothing' when the document lacks a 'node_type' key.""" + store_path = StorePath(local_store, path="foo") + await _write_zarr_json(store_path, b'{"zarr_format": 3}') + assert await _contains_node_v3(store_path) == "nothing" @pytest.mark.parametrize("path", [None, "", "bar"]) diff --git a/tests/test_store/test_local.py b/tests/test_store/test_local.py index bdc9b48121..6756bc83d9 100644 --- a/tests/test_store/test_local.py +++ b/tests/test_store/test_local.py @@ -1,9 +1,7 @@ from __future__ import annotations -import json import pathlib import re -from typing import TYPE_CHECKING import numpy as np import pytest @@ -11,15 +9,11 @@ import zarr from zarr import create_array from zarr.core.buffer import Buffer, cpu -from zarr.core.sync import sync from zarr.storage import LocalStore from zarr.storage._local import _atomic_write from zarr.testing.store import StoreTests from zarr.testing.utils import assert_bytes_equal -if TYPE_CHECKING: - from zarr.core.buffer import BufferPrototype - class TestLocalStore(StoreTests[LocalStore, cpu.Buffer]): store_cls = LocalStore @@ -114,54 +108,6 @@ async def test_move( ): await store2.move(destination) - @pytest.mark.parametrize("buffer_cls", [None, cpu.buffer_prototype]) - async def test_get_bytes_with_prototype_none( - self, store: LocalStore, buffer_cls: None | BufferPrototype - ) -> None: - """Test that get_bytes works with prototype=None.""" - data = b"hello world" - key = "test_key" - await self.set(store, key, self.buffer_cls.from_bytes(data)) - - result = await store._get_bytes(key, prototype=buffer_cls) - assert result == data - - @pytest.mark.parametrize("buffer_cls", [None, cpu.buffer_prototype]) - def test_get_bytes_sync_with_prototype_none( - self, store: LocalStore, buffer_cls: None | BufferPrototype - ) -> None: - """Test that get_bytes_sync works with prototype=None.""" - data = b"hello world" - key = "test_key" - sync(self.set(store, key, self.buffer_cls.from_bytes(data))) - - result = store._get_bytes_sync(key, prototype=buffer_cls) - assert result == data - - @pytest.mark.parametrize("buffer_cls", [None, cpu.buffer_prototype]) - async def test_get_json_with_prototype_none( - self, store: LocalStore, buffer_cls: None | BufferPrototype - ) -> None: - """Test that get_json works with prototype=None.""" - data = {"foo": "bar", "number": 42} - key = "test.json" - await self.set(store, key, self.buffer_cls.from_bytes(json.dumps(data).encode())) - - result = await store._get_json(key, prototype=buffer_cls) - assert result == data - - @pytest.mark.parametrize("buffer_cls", [None, cpu.buffer_prototype]) - def test_get_json_sync_with_prototype_none( - self, store: LocalStore, buffer_cls: None | BufferPrototype - ) -> None: - """Test that get_json_sync works with prototype=None.""" - data = {"foo": "bar", "number": 42} - key = "test.json" - sync(self.set(store, key, self.buffer_cls.from_bytes(json.dumps(data).encode()))) - - result = store._get_json_sync(key, prototype=buffer_cls) - assert result == data - @pytest.mark.parametrize("exclusive", [True, False]) def test_atomic_write_successful(tmp_path: pathlib.Path, exclusive: bool) -> None: diff --git a/tests/test_store/test_memory.py b/tests/test_store/test_memory.py index 92e292bef1..1e3ee89e92 100644 --- a/tests/test_store/test_memory.py +++ b/tests/test_store/test_memory.py @@ -1,6 +1,5 @@ from __future__ import annotations -import json import re from typing import TYPE_CHECKING, Any @@ -10,14 +9,12 @@ import zarr from zarr.core.buffer import Buffer, cpu, gpu -from zarr.core.sync import sync from zarr.errors import ZarrUserWarning from zarr.storage import GpuMemoryStore, ManagedMemoryStore, MemoryStore from zarr.testing.store import StoreTests from zarr.testing.utils import gpu_test if TYPE_CHECKING: - from zarr.core.buffer import BufferPrototype from zarr.core.common import ZarrFormat @@ -79,54 +76,6 @@ async def test_deterministic_size( np.testing.assert_array_equal(a[:3], 1) np.testing.assert_array_equal(a[3:], 0) - @pytest.mark.parametrize("buffer_cls", [None, cpu.buffer_prototype]) - async def test_get_bytes_with_prototype_none( - self, store: MemoryStore, buffer_cls: None | BufferPrototype - ) -> None: - """Test that get_bytes works with prototype=None.""" - data = b"hello world" - key = "test_key" - await self.set(store, key, self.buffer_cls.from_bytes(data)) - - result = await store._get_bytes(key, prototype=buffer_cls) - assert result == data - - @pytest.mark.parametrize("buffer_cls", [None, cpu.buffer_prototype]) - def test_get_bytes_sync_with_prototype_none( - self, store: MemoryStore, buffer_cls: None | BufferPrototype - ) -> None: - """Test that get_bytes_sync works with prototype=None.""" - data = b"hello world" - key = "test_key" - sync(self.set(store, key, self.buffer_cls.from_bytes(data))) - - result = store._get_bytes_sync(key, prototype=buffer_cls) - assert result == data - - @pytest.mark.parametrize("buffer_cls", [None, cpu.buffer_prototype]) - async def test_get_json_with_prototype_none( - self, store: MemoryStore, buffer_cls: None | BufferPrototype - ) -> None: - """Test that get_json works with prototype=None.""" - data = {"foo": "bar", "number": 42} - key = "test.json" - await self.set(store, key, self.buffer_cls.from_bytes(json.dumps(data).encode())) - - result = await store._get_json(key, prototype=buffer_cls) - assert result == data - - @pytest.mark.parametrize("buffer_cls", [None, cpu.buffer_prototype]) - def test_get_json_sync_with_prototype_none( - self, store: MemoryStore, buffer_cls: None | BufferPrototype - ) -> None: - """Test that get_json_sync works with prototype=None.""" - data = {"foo": "bar", "number": 42} - key = "test.json" - sync(self.set(store, key, self.buffer_cls.from_bytes(json.dumps(data).encode()))) - - result = store._get_json_sync(key, prototype=buffer_cls) - assert result == data - # TODO: fix this warning @pytest.mark.filterwarnings("ignore:Unclosed client session:ResourceWarning") @@ -324,54 +273,6 @@ async def test_deterministic_size( np.testing.assert_array_equal(a[:3], 1) np.testing.assert_array_equal(a[3:], 0) - @pytest.mark.parametrize("buffer_cls", [None, cpu.buffer_prototype]) - async def test_get_bytes_with_prototype_none( - self, store: ManagedMemoryStore, buffer_cls: None | BufferPrototype - ) -> None: - """Test that get_bytes works with prototype=None.""" - data = b"hello world" - key = "test_key" - await self.set(store, key, self.buffer_cls.from_bytes(data)) - - result = await store._get_bytes(key, prototype=buffer_cls) - assert result == data - - @pytest.mark.parametrize("buffer_cls", [None, cpu.buffer_prototype]) - def test_get_bytes_sync_with_prototype_none( - self, store: ManagedMemoryStore, buffer_cls: None | BufferPrototype - ) -> None: - """Test that get_bytes_sync works with prototype=None.""" - data = b"hello world" - key = "test_key" - sync(self.set(store, key, self.buffer_cls.from_bytes(data))) - - result = store._get_bytes_sync(key, prototype=buffer_cls) - assert result == data - - @pytest.mark.parametrize("buffer_cls", [None, cpu.buffer_prototype]) - async def test_get_json_with_prototype_none( - self, store: ManagedMemoryStore, buffer_cls: None | BufferPrototype - ) -> None: - """Test that get_json works with prototype=None.""" - data = {"foo": "bar", "number": 42} - key = "test.json" - await self.set(store, key, self.buffer_cls.from_bytes(json.dumps(data).encode())) - - result = await store._get_json(key, prototype=buffer_cls) - assert result == data - - @pytest.mark.parametrize("buffer_cls", [None, cpu.buffer_prototype]) - def test_get_json_sync_with_prototype_none( - self, store: ManagedMemoryStore, buffer_cls: None | BufferPrototype - ) -> None: - """Test that get_json_sync works with prototype=None.""" - data = {"foo": "bar", "number": 42} - key = "test.json" - sync(self.set(store, key, self.buffer_cls.from_bytes(json.dumps(data).encode()))) - - result = store._get_json_sync(key, prototype=buffer_cls) - assert result == data - def test_from_url(self, store: ManagedMemoryStore) -> None: """Test that from_url creates a store sharing the same dict.""" url = str(store)