From ede5752eb65d2666fa4fc27945021cfd43eeb5cf Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 10 Apr 2026 14:50:44 +0200 Subject: [PATCH 1/5] chore: make types more accurate --- src/zarr/core/metadata/v3.py | 49 +++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 7773e2489d..681253225b 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -3,7 +3,9 @@ import json from collections.abc import Iterable, Mapping, Sequence from dataclasses import dataclass, field, replace -from typing import TYPE_CHECKING, Any, Literal, NotRequired, TypedDict, TypeGuard, cast +from typing import TYPE_CHECKING, Any, Final, Literal, NotRequired, TypeGuard, cast + +from typing_extensions import TypedDict from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec from zarr.abc.metadata import Metadata @@ -136,10 +138,11 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]: ) -class AllowedExtraField(TypedDict): +class AllowedExtraField(TypedDict, extra_items=JSON): # type: ignore[call-arg] """ This class models allowed extra fields in array metadata. - They are ignored by Zarr Python. + They must have ``must_understand`` set to ``False``, and may contain + arbitrary additional JSON data. """ must_understand: Literal[False] @@ -411,25 +414,43 @@ def parse_chunk_grid( raise ValueError(f"Unknown chunk grid name: {name!r}") -class ArrayMetadataJSON_V3(TypedDict): +class ArrayMetadataJSON_V3(TypedDict, extra_items=AllowedExtraField): # type: ignore[call-arg] """ - A typed dictionary model for zarr v3 metadata. + A typed dictionary model for zarr v3 array metadata. + + Extra keys are permitted if they conform to ``AllowedExtraField`` + (i.e. they are mappings with ``must_understand: false``). """ zarr_format: Literal[3] node_type: Literal["array"] - data_type: str | NamedConfig[str, Mapping[str, object]] + data_type: str | NamedConfig[str, Mapping[str, JSON]] shape: tuple[int, ...] - chunk_grid: NamedConfig[str, Mapping[str, object]] - chunk_key_encoding: NamedConfig[str, Mapping[str, object]] - fill_value: object - codecs: tuple[str | NamedConfig[str, Mapping[str, object]], ...] + chunk_grid: str | NamedConfig[str, Mapping[str, JSON]] + chunk_key_encoding: str | NamedConfig[str, Mapping[str, JSON]] + fill_value: JSON + codecs: tuple[str | NamedConfig[str, Mapping[str, JSON]], ...] attributes: NotRequired[Mapping[str, JSON]] - storage_transformers: NotRequired[tuple[NamedConfig[str, Mapping[str, object]], ...]] + storage_transformers: NotRequired[tuple[str | NamedConfig[str, Mapping[str, JSON]], ...]] dimension_names: NotRequired[tuple[str | None]] -ARRAY_METADATA_KEYS = set(ArrayMetadataJSON_V3.__annotations__.keys()) +""" +The names of the fields of the array metadata document defined in the zarr V3 spec. +""" +ARRAY_METADATA_KEYS: Final[set[str]] = { + "zarr_format", + "node_type", + "data_type", + "shape", + "chunk_grid", + "chunk_key_encoding", + "fill_value", + "codecs", + "attributes", + "storage_transformers", + "dimension_names", +} @dataclass(frozen=True, kw_only=True) @@ -617,8 +638,8 @@ def from_dict(cls, data: dict[str, JSON]) -> Self: return cls( shape=_data_typed["shape"], - chunk_grid=_data_typed["chunk_grid"], - chunk_key_encoding=_data_typed["chunk_key_encoding"], + chunk_grid=_data_typed["chunk_grid"], # type: ignore[arg-type] + chunk_key_encoding=_data_typed["chunk_key_encoding"], # type: ignore[arg-type] codecs=_data_typed["codecs"], attributes=_data_typed.get("attributes", {}), # type: ignore[arg-type] dimension_names=_data_typed.get("dimension_names", None), From b1d6e86059d0714db080b098b952adc66af9a910 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 10 Apr 2026 14:59:55 +0200 Subject: [PATCH 2/5] test: add test helpers and simplify metadata tests --- tests/conftest.py | 19 + tests/test_metadata/conftest.py | 41 +++ tests/test_metadata/test_v3.py | 608 +++++++++++++------------------- 3 files changed, 298 insertions(+), 370 deletions(-) create mode 100644 tests/test_metadata/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py index a02006d6a9..bc9b8c35aa 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -58,6 +58,25 @@ from zarr.core.dtype.wrapper import ZDType +@dataclass +class Expect[TIn, TOut]: + """A test case with explicit input, expected output, and a human-readable id.""" + + input: TIn + output: TOut + id: str + + +@dataclass +class ExpectFail[TIn]: + """A test case that should raise an exception.""" + + input: TIn + exception: type[Exception] + id: str + msg: str | None = None + + async def parse_store( store: Literal["local", "memory", "fsspec", "zip", "memory_get_latency"], path: str ) -> LocalStore | MemoryStore | FsspecStore | ZipStore | LatencyStore: diff --git a/tests/test_metadata/conftest.py b/tests/test_metadata/conftest.py new file mode 100644 index 0000000000..9984a1cfec --- /dev/null +++ b/tests/test_metadata/conftest.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from zarr.codecs.bytes import BytesCodec + +if TYPE_CHECKING: + from zarr.core.metadata.v3 import ArrayMetadataJSON_V3 + + +def minimal_metadata_dict_v3( + extra_fields: dict[str, Any] | None = None, **overrides: Any +) -> ArrayMetadataJSON_V3: + """Build a minimal valid V3 array metadata JSON dict. + + The output matches the shape of ``ArrayV3Metadata.to_dict()`` — all + fields that ``to_dict`` always emits are included. + + Parameters + ---------- + extra_fields : dict, optional + Extra keys to inject into the dict (e.g. extension fields). + **overrides + Override any of the standard metadata fields. + """ + d: ArrayMetadataJSON_V3 = { + "zarr_format": 3, + "node_type": "array", + "shape": (4, 4), + "data_type": "uint8", + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (4, 4)}}, + "chunk_key_encoding": {"name": "default", "configuration": {"separator": "/"}}, + "fill_value": 0, + "codecs": (BytesCodec().to_dict(),), + "attributes": {}, + "storage_transformers": (), + } + d.update(overrides) + if extra_fields is not None: + d.update(extra_fields) + return d diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py index 8658b7b393..2be1d040be 100644 --- a/tests/test_metadata/test_v3.py +++ b/tests/test_metadata/test_v3.py @@ -1,406 +1,255 @@ +"""Tests for zarr v3 metadata classes and parsing helpers.""" + from __future__ import annotations import json -import re -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING -import numpy as np import pytest -from zarr import consolidate_metadata, create_group -from zarr.codecs.bytes import BytesCodec +from tests.conftest import Expect, ExpectFail +from tests.test_metadata.conftest import minimal_metadata_dict_v3 from zarr.core.buffer import default_buffer_prototype -from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding from zarr.core.config import config -from zarr.core.dtype import UInt8, get_data_type_from_native_dtype -from zarr.core.dtype.npy.string import _NUMPY_SUPPORTS_VLEN_STRING -from zarr.core.dtype.npy.time import DateTime64 +from zarr.core.dtype import UInt8 from zarr.core.group import GroupMetadata, parse_node_type from zarr.core.metadata.v3 import ( - ArrayMetadataJSON_V3, ArrayV3Metadata, parse_codecs, parse_dimension_names, + parse_node_type_array, parse_zarr_format, ) from zarr.errors import ( MetadataValidationError, NodeTypeValidationError, UnknownCodecError, - ZarrUserWarning, ) if TYPE_CHECKING: - from collections.abc import Sequence from typing import Any - from zarr.core.types import JSON - - from zarr.abc.codec import Codec - -from zarr.core.metadata.v3 import ( - parse_node_type_array, -) +# --------------------------------------------------------------------------- +# Parsing helpers +# --------------------------------------------------------------------------- -bool_dtypes = ("bool",) - -int_dtypes = ( - "int8", - "int16", - "int32", - "int64", - "uint8", - "uint16", - "uint32", - "uint64", -) -float_dtypes = ( - "float16", - "float32", - "float64", -) - -complex_dtypes = ("complex64", "complex128") -flexible_dtypes = ("str", "bytes", "void") -if _NUMPY_SUPPORTS_VLEN_STRING: - vlen_string_dtypes = ("T",) -else: - vlen_string_dtypes = ("O",) - -dtypes = ( - *bool_dtypes, - *int_dtypes, - *float_dtypes, - *complex_dtypes, - *flexible_dtypes, - *vlen_string_dtypes, -) +def test_parse_zarr_format_valid() -> None: + """The integer 3 is the only valid zarr_format for v3.""" + assert parse_zarr_format(3) == 3 @pytest.mark.parametrize("data", [None, 1, 2, 4, 5, "3"]) def test_parse_zarr_format_invalid(data: Any) -> None: - with pytest.raises( - MetadataValidationError, - match=f"Invalid value for 'zarr_format'. Expected '3'. Got '{data}'.", - ): + """Non-3 values are rejected.""" + with pytest.raises(MetadataValidationError): parse_zarr_format(data) -def test_parse_zarr_format_valid() -> None: - assert parse_zarr_format(3) == 3 - - def test_parse_node_type_valid() -> None: + """'array' and 'group' are the only valid node types.""" assert parse_node_type("array") == "array" assert parse_node_type("group") == "group" -@pytest.mark.parametrize("node_type", [None, 2, "other"]) -def test_parse_node_type_invalid(node_type: Any) -> None: - with pytest.raises( - MetadataValidationError, - match=f"Invalid value for 'node_type'. Expected 'array' or 'group'. Got '{node_type}'.", - ): - parse_node_type(node_type) +@pytest.mark.parametrize("data", [None, 2, "other"]) +def test_parse_node_type_invalid(data: Any) -> None: + """Non-string and unrecognized values are rejected.""" + with pytest.raises(MetadataValidationError): + parse_node_type(data) + + +def test_parse_node_type_array_valid() -> None: + """parse_node_type_array accepts only 'array'.""" + assert parse_node_type_array("array") == "array" @pytest.mark.parametrize("data", [None, "group"]) def test_parse_node_type_array_invalid(data: Any) -> None: - with pytest.raises( - NodeTypeValidationError, - match=f"Invalid value for 'node_type'. Expected 'array'. Got '{data}'.", - ): + """parse_node_type_array rejects 'group' and non-string values.""" + with pytest.raises(NodeTypeValidationError): parse_node_type_array(data) -def test_parse_node_typev_array_alid() -> None: - assert parse_node_type_array("array") == "array" +@pytest.mark.parametrize("data", [None, ("a", "b", "c"), ["a", "a", "a"], ()]) +def test_parse_dimension_names_valid(data: Any) -> None: + """None, tuples of strings, lists of strings, and empty tuples are accepted.""" + result = parse_dimension_names(data) + if data is None: + assert result is None + else: + assert result == tuple(data) -@pytest.mark.parametrize("data", [(), [1, 2, "a"], {"foo": 10}]) -def parse_dimension_names_invalid(data: Any) -> None: - with pytest.raises(TypeError, match="Expected either None or iterable of str,"): +@pytest.mark.parametrize("data", [[1, 2, "a"], [None, 3]]) +def test_parse_dimension_names_invalid(data: Any) -> None: + """Iterables containing non-string elements are rejected.""" + with pytest.raises(TypeError, match="Expected either None or"): parse_dimension_names(data) -@pytest.mark.parametrize("data", [None, ("a", "b", "c"), ["a", "a", "a"]]) -def parse_dimension_names_valid(data: Sequence[str] | None) -> None: - assert parse_dimension_names(data) == data +def test_parse_codecs_unknown_raises(monkeypatch: pytest.MonkeyPatch) -> None: + """An unregistered codec name raises UnknownCodecError.""" + from collections import defaultdict + import zarr.registry + from zarr.registry import Registry + + monkeypatch.setattr(zarr.registry, "_codec_registries", defaultdict(Registry)) + with pytest.raises(UnknownCodecError): + parse_codecs([{"name": "unknown"}]) -@pytest.mark.parametrize("fill_value", [[1.0, 0.0], [0, 1]]) -@pytest.mark.parametrize("dtype_str", [*complex_dtypes]) -def test_jsonify_fill_value_complex(fill_value: Any, dtype_str: str) -> None: - """ - Test that parse_fill_value(fill_value, dtype) correctly handles complex values represented - as length-2 sequences - """ - zarr_format: Literal[3] = 3 - dtype = get_data_type_from_native_dtype(dtype_str) - expected = dtype.to_native_dtype().type(complex(*fill_value)) - observed = dtype.from_json_scalar(fill_value, zarr_format=zarr_format) - assert observed == expected - assert dtype.to_json_scalar(observed, zarr_format=zarr_format) == tuple(fill_value) +# --------------------------------------------------------------------------- +# ArrayV3Metadata: round-trip +# --------------------------------------------------------------------------- -@pytest.mark.parametrize("fill_value", [{"foo": 10}]) -@pytest.mark.parametrize("dtype_str", [*int_dtypes, *float_dtypes, *complex_dtypes]) -def test_parse_fill_value_invalid_type(fill_value: Any, dtype_str: str) -> None: - """ - Test that parse_fill_value(fill_value, dtype) raises TypeError for invalid non-sequential types. - This test excludes bool because the bool constructor takes anything. - """ - dtype_instance = get_data_type_from_native_dtype(dtype_str) - with pytest.raises(TypeError, match=f"Invalid type: {fill_value}"): - dtype_instance.from_json_scalar(fill_value, zarr_format=3) +# Codecs after evolution for single-byte (uint8) and multi-byte (float64) types. +_UINT8_CODECS = ({"name": "bytes"},) +_FLOAT64_CODECS = ({"name": "bytes", "configuration": {"endian": "little"}},) @pytest.mark.parametrize( - "fill_value", + "case", [ - [ - 1, - ], - (1, 23, 4), + Expect( + input={}, + output=minimal_metadata_dict_v3(codecs=_UINT8_CODECS), + id="minimal", + ), + Expect( + input={"attributes": {"key": "value"}}, + output=minimal_metadata_dict_v3(attributes={"key": "value"}, codecs=_UINT8_CODECS), + id="with_attributes", + ), + Expect( + input={"dimension_names": ("x", "y")}, + output=minimal_metadata_dict_v3(dimension_names=("x", "y"), codecs=_UINT8_CODECS), + id="with_dimension_names", + ), + Expect( + input={"storage_transformers": ()}, + output=minimal_metadata_dict_v3(storage_transformers=(), codecs=_UINT8_CODECS), + id="with_storage_transformers", + ), + Expect( + input={"data_type": "float64", "fill_value": 0.0}, + output=minimal_metadata_dict_v3( + data_type="float64", fill_value=0.0, codecs=_FLOAT64_CODECS + ), + id="float64", + ), + Expect( + input={"chunk_key_encoding": {"name": "v2", "configuration": {"separator": "."}}}, + output=minimal_metadata_dict_v3( + chunk_key_encoding={"name": "v2", "configuration": {"separator": "."}}, + codecs=_UINT8_CODECS, + ), + id="v2_chunk_key_encoding", + ), + Expect( + input={"data_type": "float64", "fill_value": "NaN"}, + output=minimal_metadata_dict_v3( + data_type="float64", fill_value="NaN", codecs=_FLOAT64_CODECS + ), + id="nan_fill_value", + ), + Expect( + input={"data_type": "float64", "fill_value": "Infinity"}, + output=minimal_metadata_dict_v3( + data_type="float64", fill_value="Infinity", codecs=_FLOAT64_CODECS + ), + id="inf_fill_value", + ), + Expect( + input={"data_type": "float64", "fill_value": "-Infinity"}, + output=minimal_metadata_dict_v3( + data_type="float64", fill_value="-Infinity", codecs=_FLOAT64_CODECS + ), + id="neg_inf_fill_value", + ), + Expect( + input={ + "attributes": {}, + "storage_transformers": (), + "extra_fields": {"my_ext": {"must_understand": False, "data": [1, 2, 3]}}, + }, + output=minimal_metadata_dict_v3( + attributes={}, + storage_transformers=(), + codecs=_UINT8_CODECS, + extra_fields={"my_ext": {"must_understand": False, "data": [1, 2, 3]}}, + ), + id="extra_fields", + ), ], + ids=lambda case: case.id, ) -@pytest.mark.parametrize("dtype_str", [*int_dtypes, *float_dtypes]) -def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str) -> None: - """ - Test that parse_fill_value(fill_value, dtype) raises TypeError for invalid sequential types. - This test excludes bool because the bool constructor takes anything, and complex because - complex values can be created from length-2 sequences. - """ - dtype_instance = get_data_type_from_native_dtype(dtype_str) - with pytest.raises(TypeError, match=re.escape(f"Invalid type: {fill_value}")): - dtype_instance.from_json_scalar(fill_value, zarr_format=3) - - -@pytest.mark.parametrize("chunk_grid", ["regular"]) -@pytest.mark.parametrize("attributes", [None, {"foo": "bar"}]) -@pytest.mark.parametrize("codecs", [[BytesCodec(endian=None)]]) -@pytest.mark.parametrize("fill_value", [0, 1]) -@pytest.mark.parametrize("chunk_key_encoding", ["v2", "default"]) -@pytest.mark.parametrize("dimension_separator", [".", "/", None]) -@pytest.mark.parametrize("dimension_names", ["nones", "strings", "missing"]) -@pytest.mark.parametrize("storage_transformers", [None, ()]) -def test_metadata_to_dict( - chunk_grid: str, - codecs: list[Codec], - fill_value: Any, - chunk_key_encoding: Literal["v2", "default"], - dimension_separator: Literal[".", "/"] | None, - dimension_names: Literal["nones", "strings", "missing"], - attributes: dict[str, Any] | None, - storage_transformers: tuple[dict[str, JSON]] | None, -) -> None: - shape = (1, 2, 3) - data_type_str = "uint8" - if chunk_grid == "regular": - cgrid = {"name": "regular", "configuration": {"chunk_shape": (1, 1, 1)}} - - cke: dict[str, Any] - cke_name_dict = {"name": chunk_key_encoding} - if dimension_separator is not None: - cke = cke_name_dict | {"configuration": {"separator": dimension_separator}} - else: - cke = cke_name_dict - dnames: tuple[str | None, ...] | None - - if dimension_names == "strings": - dnames = tuple(map(str, range(len(shape)))) - elif dimension_names == "missing": - dnames = None - elif dimension_names == "nones": - dnames = (None,) * len(shape) - - metadata_dict = { - "zarr_format": 3, - "node_type": "array", - "shape": shape, - "chunk_grid": cgrid, - "data_type": data_type_str, - "chunk_key_encoding": cke, - "codecs": tuple(c.to_dict() for c in codecs), - "fill_value": fill_value, - "storage_transformers": storage_transformers, - } - - if attributes is not None: - metadata_dict["attributes"] = attributes - if dnames is not None: - metadata_dict["dimension_names"] = dnames - - metadata = ArrayV3Metadata.from_dict(metadata_dict) - observed = metadata.to_dict() - expected = metadata_dict.copy() +def test_array_metadata_roundtrip(case: Expect[dict[str, Any], dict[str, Any]]) -> None: + """from_dict(d).to_dict() produces the expected output, including codec evolution.""" + d = minimal_metadata_dict_v3(**case.input) + m = ArrayV3Metadata.from_dict(d) + assert m.to_dict() == case.output - # if unset or None or (), storage_transformers gets normalized to () - assert observed["storage_transformers"] == () - observed.pop("storage_transformers") - expected.pop("storage_transformers") - if attributes is None: - assert observed["attributes"] == {} - observed.pop("attributes") - - if dimension_separator is None: - if chunk_key_encoding == "default": - expected_cke_dict = DefaultChunkKeyEncoding(separator="/").to_dict() - else: - expected_cke_dict = V2ChunkKeyEncoding(separator=".").to_dict() - assert observed["chunk_key_encoding"] == expected_cke_dict - observed.pop("chunk_key_encoding") - expected.pop("chunk_key_encoding") - assert observed == expected - - -@pytest.mark.parametrize("indent", [2, 4, None]) -def test_json_indent(indent: int) -> None: - with config.set({"json_indent": indent}): - m = GroupMetadata() - d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes() - assert d == json.dumps(json.loads(d), indent=indent).encode() - - -@pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897]) -@pytest.mark.parametrize("precision", ["ns", "D"]) -async def test_datetime_metadata(fill_value: int, precision: Literal["ns", "D"]) -> None: - dtype = DateTime64(unit=precision) - metadata_dict: dict[str, Any] = { - "zarr_format": 3, - "node_type": "array", - "shape": (1,), - "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, - "data_type": dtype.to_json(zarr_format=3), - "chunk_key_encoding": {"name": "default", "separator": "."}, - "codecs": (BytesCodec(),), - "fill_value": dtype.to_json_scalar( - dtype.to_native_dtype().type(fill_value, dtype.unit), zarr_format=3 - ), - } - metadata = ArrayV3Metadata.from_dict(metadata_dict) - # ensure there isn't a TypeError here. - d = metadata.to_buffer_dict(default_buffer_prototype()) - - result = json.loads(d["zarr.json"].to_bytes()) - assert result["fill_value"] == fill_value +# --------------------------------------------------------------------------- +# ArrayV3Metadata: failure modes +# --------------------------------------------------------------------------- @pytest.mark.parametrize( - ("data_type", "fill_value"), [("uint8", {}), ("int32", [0, 1]), ("float32", "foo")] + "case", + [ + ExpectFail( + input={"dimension_names": ("x", "y", "z")}, + exception=ValueError, + msg="dimension_names.*shape", + id="dimension_names_length_mismatch", + ), + ExpectFail( + input={"data_type": "uint8", "fill_value": {}}, + exception=TypeError, + id="invalid_fill_value_type", + ), + ], + ids=lambda case: case.id, ) -async def test_invalid_fill_value_raises(data_type: str, fill_value: float) -> None: - metadata_dict: dict[str, Any] = { - "zarr_format": 3, - "node_type": "array", - "shape": (1,), - "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, - "data_type": data_type, - "chunk_key_encoding": {"name": "default", "separator": "."}, - "codecs": ({"name": "bytes"},), - "fill_value": fill_value, # this is not a valid fill value for uint8 - } - # multiple things can go wrong here, so we don't match on the error message. - with pytest.raises(TypeError): - ArrayV3Metadata.from_dict(metadata_dict) - - -@pytest.mark.parametrize("fill_value", [("NaN"), "Infinity", "-Infinity"]) -async def test_special_float_fill_values(fill_value: str) -> None: - metadata_dict: dict[str, Any] = { - "zarr_format": 3, - "node_type": "array", - "shape": (1,), - "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, - "data_type": "float64", - "chunk_key_encoding": {"name": "default", "separator": "."}, - "codecs": [{"name": "bytes"}], - "fill_value": fill_value, # this is not a valid fill value for uint8 - } - m = ArrayV3Metadata.from_dict(metadata_dict) - d = json.loads(m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes()) - assert m.fill_value is not None - if fill_value == "NaN": - assert np.isnan(m.fill_value) - assert d["fill_value"] == "NaN" - elif fill_value == "Infinity": - assert np.isposinf(m.fill_value) - assert d["fill_value"] == "Infinity" - elif fill_value == "-Infinity": - assert np.isneginf(m.fill_value) - assert d["fill_value"] == "-Infinity" - - -def test_parse_codecs_unknown_codec_raises(monkeypatch: pytest.MonkeyPatch) -> None: - from collections import defaultdict - - import zarr.registry - from zarr.registry import Registry - - # to make sure the codec is always unknown (not sure if that's necessary) - monkeypatch.setattr(zarr.registry, "_codec_registries", defaultdict(Registry)) - - codecs = [{"name": "unknown"}] - with pytest.raises(UnknownCodecError): - parse_codecs(codecs) +def test_array_metadata_from_dict_fails(case: ExpectFail[dict[str, Any]]) -> None: + """from_dict rejects invalid metadata documents.""" + d = minimal_metadata_dict_v3(**case.input) + with pytest.raises(case.exception, match=case.msg): + ArrayV3Metadata.from_dict(d) @pytest.mark.parametrize( - "extra_value", + "case", [ - {"must_understand": False, "param": 10}, - {"must_understand": True}, - 10, + ExpectFail( + input=minimal_metadata_dict_v3(extra_fields={"my_ext": {"must_understand": True}}), + exception=MetadataValidationError, + msg="disallowed extra fields", + id="must_understand_true", + ), + ExpectFail( + input=minimal_metadata_dict_v3(extra_fields={"my_ext": 42}), + exception=MetadataValidationError, + msg="disallowed extra fields", + id="non_dict_extra_field", + ), ], + ids=lambda case: case.id, ) -def test_from_dict_extra_fields(extra_value: dict[str, object] | int) -> None: - """ - Test that from_dict accepts extra fields if they have are a JSON object with - "must_understand": false, and raises an exception otherwise. - """ - metadata_dict: ArrayMetadataJSON_V3 = { # type: ignore[typeddict-unknown-key] - "zarr_format": 3, - "node_type": "array", - "shape": (1,), - "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, - "data_type": "uint8", - "chunk_key_encoding": {"name": "default", "configuration": {"separator": "."}}, - "codecs": ({"name": "bytes"},), - "fill_value": 0, - "storage_transformers": (), - "attributes": {}, - "foo": extra_value, - } - - if isinstance(extra_value, dict) and extra_value.get("must_understand") is False: - # should be accepted - metadata = ArrayV3Metadata.from_dict(metadata_dict) # type: ignore[arg-type] - assert isinstance(metadata, ArrayV3Metadata) - assert metadata.to_dict() == metadata_dict - else: - # should raise an exception - with pytest.raises(MetadataValidationError, match="Got a Zarr V3 metadata document"): - metadata = ArrayV3Metadata.from_dict(metadata_dict) # type: ignore[arg-type] +def test_array_metadata_extra_fields_rejected(case: ExpectFail[dict[str, Any]]) -> None: + """from_dict rejects extra fields that don't conform to the spec.""" + with pytest.raises(case.exception, match=case.msg): + ArrayV3Metadata.from_dict(case.input) -def test_init_invalid_extra_fields() -> None: - """ - Test that initializing ArrayV3Metadata with extra fields fails when those fields - shadow the array metadata fields. - """ +def test_init_extra_fields_collision() -> None: + """Extra field keys that collide with reserved metadata field names are rejected.""" extra_fields: dict[str, object] = {"shape": (10,), "data_type": "uint8"} - conflict_keys = set(extra_fields.keys()) - msg = ( - "Invalid extra fields. " - "The following keys: " - f"{sorted(conflict_keys)} " - "are invalid because they collide with keys reserved for use by the " - "array metadata document." - ) - with pytest.raises(ValueError, match=re.escape(msg)): + with pytest.raises(ValueError, match="collide with keys reserved"): ArrayV3Metadata( shape=(10,), data_type=UInt8(), @@ -410,54 +259,73 @@ def test_init_invalid_extra_fields() -> None: codecs=({"name": "bytes", "configuration": {"endian": "little"}},), attributes={}, dimension_names=None, - extra_fields=extra_fields, # type: ignore[arg-type] + extra_fields=extra_fields, ) -@pytest.mark.parametrize("use_consolidated", [True, False]) +# --------------------------------------------------------------------------- +# JSON indent +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("indent", [2, 4, None]) +def test_json_indent(indent: int | None) -> None: + """The json_indent config setting controls indentation in to_buffer_dict output.""" + with config.set({"json_indent": indent}): + m = GroupMetadata() + d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes() + assert d == json.dumps(json.loads(d), indent=indent).encode() + + +# --------------------------------------------------------------------------- +# GroupMetadata.to_dict +# --------------------------------------------------------------------------- + + @pytest.mark.parametrize("attributes", [None, {"foo": "bar"}]) -def test_group_to_dict(use_consolidated: bool, attributes: None | dict[str, Any]) -> None: - """ - Test that the output of GroupMetadata.to_dict() is what we expect - """ - store: dict[str, object] = {} - if attributes is None: - expect_attributes = {} - else: - expect_attributes = attributes +def test_group_metadata_to_dict(attributes: dict[str, Any] | None) -> None: + """GroupMetadata.to_dict produces the expected v3 JSON structure.""" + meta = GroupMetadata(attributes=attributes) + assert meta.to_dict() == { + "zarr_format": 3, + "node_type": "group", + "attributes": attributes or {}, + } + + +@pytest.mark.parametrize("attributes", [None, {"foo": "bar"}]) +def test_group_metadata_to_dict_consolidated(attributes: dict[str, Any] | None) -> None: + """GroupMetadata.to_dict includes consolidated_metadata when present.""" + from zarr import consolidate_metadata, create_group + from zarr.errors import ZarrUserWarning + store: dict[str, object] = {} group = create_group(store, attributes=attributes, zarr_format=3) group.create_group("foo") - if use_consolidated: - with pytest.warns( - ZarrUserWarning, - match="Consolidated metadata is currently not part in the Zarr format 3 specification.", - ): - group = consolidate_metadata(store) - meta = group.metadata - expect = { - "node_type": "group", - "zarr_format": 3, - "consolidated_metadata": { - "kind": "inline", - "must_understand": False, - "metadata": { - "foo": { - "attributes": {}, - "zarr_format": 3, - "node_type": "group", - "consolidated_metadata": { - "kind": "inline", - "metadata": {}, - "must_understand": False, - }, - } - }, - }, - "attributes": expect_attributes, - } - else: - meta = group.metadata - expect = {"node_type": "group", "zarr_format": 3, "attributes": expect_attributes} + with pytest.warns( + ZarrUserWarning, + match="Consolidated metadata is currently not part in the Zarr format 3 specification.", + ): + group = consolidate_metadata(store) - assert meta.to_dict() == expect + assert group.metadata.to_dict() == { + "zarr_format": 3, + "node_type": "group", + "attributes": attributes or {}, + "consolidated_metadata": { + "kind": "inline", + "must_understand": False, + "metadata": { + "foo": { + "attributes": {}, + "zarr_format": 3, + "node_type": "group", + "consolidated_metadata": { + "kind": "inline", + "metadata": {}, + "must_understand": False, + }, + } + }, + }, + } From 2b61da4016d3ac1150f90c314d98c6bfd165fe4f Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 10 Apr 2026 15:05:50 +0200 Subject: [PATCH 3/5] chore: lint --- tests/test_metadata/conftest.py | 6 +++--- tests/test_metadata/test_v3.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_metadata/conftest.py b/tests/test_metadata/conftest.py index 9984a1cfec..24f2417fce 100644 --- a/tests/test_metadata/conftest.py +++ b/tests/test_metadata/conftest.py @@ -31,11 +31,11 @@ def minimal_metadata_dict_v3( "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (4, 4)}}, "chunk_key_encoding": {"name": "default", "configuration": {"separator": "/"}}, "fill_value": 0, - "codecs": (BytesCodec().to_dict(),), + "codecs": (BytesCodec().to_dict(),), # type: ignore[typeddict-item] "attributes": {}, "storage_transformers": (), } - d.update(overrides) + d.update(overrides) # type: ignore[typeddict-item] if extra_fields is not None: - d.update(extra_fields) + d.update(extra_fields) # type: ignore[typeddict-item] return d diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py index 2be1d040be..db2593c126 100644 --- a/tests/test_metadata/test_v3.py +++ b/tests/test_metadata/test_v3.py @@ -189,7 +189,7 @@ def test_parse_codecs_unknown_raises(monkeypatch: pytest.MonkeyPatch) -> None: def test_array_metadata_roundtrip(case: Expect[dict[str, Any], dict[str, Any]]) -> None: """from_dict(d).to_dict() produces the expected output, including codec evolution.""" d = minimal_metadata_dict_v3(**case.input) - m = ArrayV3Metadata.from_dict(d) + m = ArrayV3Metadata.from_dict(d) # type: ignore[arg-type] assert m.to_dict() == case.output @@ -219,7 +219,7 @@ def test_array_metadata_from_dict_fails(case: ExpectFail[dict[str, Any]]) -> Non """from_dict rejects invalid metadata documents.""" d = minimal_metadata_dict_v3(**case.input) with pytest.raises(case.exception, match=case.msg): - ArrayV3Metadata.from_dict(d) + ArrayV3Metadata.from_dict(d) # type: ignore[arg-type] @pytest.mark.parametrize( @@ -259,7 +259,7 @@ def test_init_extra_fields_collision() -> None: codecs=({"name": "bytes", "configuration": {"endian": "little"}},), attributes={}, dimension_names=None, - extra_fields=extra_fields, + extra_fields=extra_fields, # type: ignore[arg-type] ) From 8e62cb3653e462d9c630d94dd88cc71dce715e74 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 10 Apr 2026 15:18:59 +0200 Subject: [PATCH 4/5] changelog --- changes/3897.misc.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 changes/3897.misc.md diff --git a/changes/3897.misc.md b/changes/3897.misc.md new file mode 100644 index 0000000000..53b2bd9b52 --- /dev/null +++ b/changes/3897.misc.md @@ -0,0 +1,2 @@ +Bump the minimum version of `typing-extensions` to 4.13 to support the `extra_items` +keyword argument on `TypedDict` (PEP 728). \ No newline at end of file From 3db30b5dab3d0ed0a5f1f77bae992e149ba9bb88 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 10 Apr 2026 15:19:18 +0200 Subject: [PATCH 5/5] chore: bump typing-extensions --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 93ef8c5e4b..b4783b5be3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ 'numpy>=2', 'numcodecs>=0.14', 'google-crc32c>=1.5', - 'typing_extensions>=4.12', + 'typing_extensions>=4.13', 'donfig>=0.8', ] @@ -243,7 +243,7 @@ extra-dependencies = [ 'fsspec==2023.10.0', 's3fs==2023.10.0', 'universal_pathlib==0.2.0', - 'typing_extensions==4.12.*', + 'typing_extensions==4.13.*', 'donfig==0.8.*', 'obstore==0.5.*', ]