diff --git a/xrspatial/geotiff/_write_layout.py b/xrspatial/geotiff/_write_layout.py index 872c1f68..3e37c706 100644 --- a/xrspatial/geotiff/_write_layout.py +++ b/xrspatial/geotiff/_write_layout.py @@ -300,11 +300,36 @@ def _assemble_cog_layout(header_size: int, total_ifd_size = sum(bs + ov for bs, ov in ifd_blocks) pixel_data_start = header_size + total_ifd_size - # Second pass: pixel data offsets per level + # COG block-order requirement (issue #2308): on-disk pixel data must + # run smallest-overview first, then progressively larger overviews, + # with the main-resolution image's blocks last. ``pixel_data_parts`` + # arrives in ``[main, ov_factor_2, ov_factor_4, ...]`` order (full + # res first, then overviews of increasing decimation), so the + # emission order is the overview tail reversed (smallest factor = + # largest decimation = last entry, emitted first) followed by index + # 0 (main resolution). The IFD chain still walks ``[main, ov1, ov2, + # ...]`` -- only the byte-level placement of the tile/strip blocks + # changes, which is what external validators like rio-cogeo check. + n_parts = len(pixel_data_parts) + if n_parts > 1: + pixel_emission_order = list(range(n_parts - 1, 0, -1)) + [0] + else: + # In normal use, _assemble_tiff routes single-IFD outputs to + # ``_assemble_standard_layout``; the upstream gate + # ``is_cog and len(ifd_specs) > 1`` guarantees n_parts >= 2 here. + # The single-entry fallback keeps the helper safe for direct + # unit tests that exercise the COG layout with one IFD. + pixel_emission_order = [0] + + # Second pass: pixel data offsets per level. We walk the emission + # order to accumulate offsets, then store them indexed by the + # original level so the third pass can look up each IFD's tile/strip + # base directly. + level_pixel_offsets = [0] * n_parts current_pixel_offset = pixel_data_start - level_pixel_offsets = [] - for _arr, _lw, _lh, rel_offsets, byte_counts, comp_chunks in pixel_data_parts: - level_pixel_offsets.append(current_pixel_offset) + for emit_idx in pixel_emission_order: + _arr, _lw, _lh, _rel, _bc, comp_chunks = pixel_data_parts[emit_idx] + level_pixel_offsets[emit_idx] = current_pixel_offset current_pixel_offset += sum(len(c) for c in comp_chunks) # Third pass: build IFDs with correct offsets @@ -355,8 +380,11 @@ def _assemble_cog_layout(header_size: int, output.extend(overflow_bytes) current_ifd_pos = len(output) - # Append all pixel data - for _arr, _lw, _lh, _rel_offsets, _byte_counts, comp_chunks in pixel_data_parts: + # Append all pixel data in COG-compliant order: smallest overview + # first, then progressively larger, with the main resolution image + # last (issue #2308). + for emit_idx in pixel_emission_order: + _arr, _lw, _lh, _rel, _bc, comp_chunks = pixel_data_parts[emit_idx] for chunk in comp_chunks: output.extend(chunk) diff --git a/xrspatial/geotiff/tests/test_overview_block_order_2308.py b/xrspatial/geotiff/tests/test_overview_block_order_2308.py new file mode 100644 index 00000000..8b3c56b4 --- /dev/null +++ b/xrspatial/geotiff/tests/test_overview_block_order_2308.py @@ -0,0 +1,143 @@ +"""COG overview tile-block ordering invariant (issue #2308). + +The COG spec requires the on-disk pixel-data layout to run from the +smallest overview through progressively larger overviews and end with +the main-resolution image. External readers (rio-cogeo, GDAL's +``validate_cloud_optimized_geotiff``) flag a file when the byte +ordering reverses or interleaves these blocks even though the IFD +chain walks main -> ov1 -> ov2 the conventional way. These tests lock +the byte order in as a regression gate so the writer cannot drift back +to the old layout. +""" +from __future__ import annotations + +import os +import tempfile + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import to_geotiff +from xrspatial.geotiff._header import parse_all_ifds, parse_header + + +def _min_block_offset(ifd) -> int: + """Return the smallest tile-offset (or strip-offset) for an IFD.""" + offsets = ifd.tile_offsets + if offsets is None: + offsets = ifd.strip_offsets + assert offsets is not None and len(offsets) > 0, ( + "IFD has neither tile_offsets nor strip_offsets") + return min(offsets) + + +def _read_block_order(path: str) -> list[int]: + """Return ``min_block_offset`` for each IFD in walk order.""" + with open(path, "rb") as f: + data = f.read() + header = parse_header(data) + ifds = parse_all_ifds(data, header) + return [_min_block_offset(ifd) for ifd in ifds] + + +def _make_da(shape, bands: int | None = None) -> xr.DataArray: + """Build a synthetic DataArray with a sane CRS / coordinate grid.""" + rng = np.random.RandomState(17) + if bands is None: + arr = rng.rand(*shape).astype("float32") + dims = ("y", "x") + else: + arr = rng.rand(bands, *shape).astype("float32") + dims = ("band", "y", "x") + h, w = shape + coords = { + "y": np.linspace(45, 44, h), + "x": np.linspace(-120, -119, w), + } + return xr.DataArray(arr, dims=dims, coords=coords, attrs={"crs": 4326}) + + +@pytest.mark.parametrize("bands", [None, 3]) +def test_cog_overview_block_order_invariant_2308(bands): + """Pixel blocks must run smallest-overview -> larger -> main. + + The IFD walk order is ``[main, ov_factor_2, ov_factor_4]`` (full + resolution first). The on-disk pixel-block order must be the + reverse: factor-4 overview blocks first, then factor-2 overview + blocks, with the main-resolution blocks last (issue #2308). + """ + da = _make_da((256, 256), bands=bands) + with tempfile.TemporaryDirectory() as td: + suffix = "rgb" if bands else "mono" + path = os.path.join(td, f"order_2308_{suffix}.tif") + to_geotiff( + da, path, compression="deflate", cog=True, + tile_size=64, overview_levels=[2, 4], + ) + + block_offsets = _read_block_order(path) + # IFD walk: [main, ov_factor_2, ov_factor_4] + main_min, ov2_min, ov4_min = block_offsets + # COG layout: factor-4 (smallest overview) -> factor-2 -> main. + assert ov4_min < ov2_min, ( + f"smallest overview blocks should sit before larger " + f"overview blocks: ov4_min={ov4_min}, ov2_min={ov2_min}") + assert ov2_min < main_min, ( + f"overview blocks should sit before main-resolution " + f"blocks: ov2_min={ov2_min}, main_min={main_min}") + + +def test_cog_overview_block_order_three_levels_2308(): + """Same invariant with three overview levels (factor 2/4/8).""" + da = _make_da((512, 512)) + with tempfile.TemporaryDirectory() as td: + path = os.path.join(td, "order_2308_three.tif") + to_geotiff( + da, path, compression="deflate", cog=True, + tile_size=64, overview_levels=[2, 4, 8], + ) + + block_offsets = _read_block_order(path) + # IFD walk: [main, ov2, ov4, ov8] + main_min, ov2_min, ov4_min, ov8_min = block_offsets + # On-disk: ov8 -> ov4 -> ov2 -> main + assert ov8_min < ov4_min < ov2_min < main_min, ( + f"COG block order broken: ov8={ov8_min} ov4={ov4_min} " + f"ov2={ov2_min} main={main_min}") + + +def _rio_cogeo_or_skip(): + """Skip the rio-cogeo gate when the dependency isn't installed. + + Mirrors the skip semantics used in ``test_cog_writer_compliance``: + contributor laptops without rio-cogeo see a skip, CI with rio-cogeo + runs the strict check. + """ + try: + from rio_cogeo.cogeo import cog_validate + except ImportError: + pytest.skip("rio-cogeo not installed") + return cog_validate + + +@pytest.mark.parametrize("bands", [None, 3]) +def test_cog_overview_block_order_rio_cogeo_2308(bands): + """``rio-cogeo cog_validate`` returns valid=True with no block-order errors.""" + cog_validate = _rio_cogeo_or_skip() + da = _make_da((256, 256), bands=bands) + with tempfile.TemporaryDirectory() as td: + suffix = "rgb" if bands else "mono" + path = os.path.join(td, f"order_2308_rio_{suffix}.tif") + to_geotiff( + da, path, compression="deflate", cog=True, + tile_size=64, overview_levels=[2, 4], + ) + valid, errors, _warnings = cog_validate(path, strict=False) + assert valid, f"rio_cogeo cog_validate failed: {errors}" + # Defensive secondary assertion: the two block-order messages + # from #2308 must not reappear even if some future writer + # change keeps the validator happy on the headline check. + joined = " ".join(errors).lower() + assert "offset of the first block" not in joined, ( + f"block-order errors regressed: {errors}")