Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 34 additions & 6 deletions xrspatial/geotiff/_write_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,11 +300,36 @@ def _assemble_cog_layout(header_size: int,
total_ifd_size = sum(bs + ov for bs, ov in ifd_blocks)
pixel_data_start = header_size + total_ifd_size

# Second pass: pixel data offsets per level
# COG block-order requirement (issue #2308): on-disk pixel data must
# run smallest-overview first, then progressively larger overviews,
# with the main-resolution image's blocks last. ``pixel_data_parts``
# arrives in ``[main, ov_factor_2, ov_factor_4, ...]`` order (full
# res first, then overviews of increasing decimation), so the
# emission order is the overview tail reversed (smallest factor =
# largest decimation = last entry, emitted first) followed by index
# 0 (main resolution). The IFD chain still walks ``[main, ov1, ov2,
# ...]`` -- only the byte-level placement of the tile/strip blocks
# changes, which is what external validators like rio-cogeo check.
n_parts = len(pixel_data_parts)
if n_parts > 1:
pixel_emission_order = list(range(n_parts - 1, 0, -1)) + [0]
else:
# In normal use, _assemble_tiff routes single-IFD outputs to
# ``_assemble_standard_layout``; the upstream gate
# ``is_cog and len(ifd_specs) > 1`` guarantees n_parts >= 2 here.
# The single-entry fallback keeps the helper safe for direct
# unit tests that exercise the COG layout with one IFD.
pixel_emission_order = [0]

# Second pass: pixel data offsets per level. We walk the emission
# order to accumulate offsets, then store them indexed by the
# original level so the third pass can look up each IFD's tile/strip
# base directly.
level_pixel_offsets = [0] * n_parts
current_pixel_offset = pixel_data_start
level_pixel_offsets = []
for _arr, _lw, _lh, rel_offsets, byte_counts, comp_chunks in pixel_data_parts:
level_pixel_offsets.append(current_pixel_offset)
for emit_idx in pixel_emission_order:
_arr, _lw, _lh, _rel, _bc, comp_chunks = pixel_data_parts[emit_idx]
level_pixel_offsets[emit_idx] = current_pixel_offset
current_pixel_offset += sum(len(c) for c in comp_chunks)

# Third pass: build IFDs with correct offsets
Expand Down Expand Up @@ -355,8 +380,11 @@ def _assemble_cog_layout(header_size: int,
output.extend(overflow_bytes)
current_ifd_pos = len(output)

# Append all pixel data
for _arr, _lw, _lh, _rel_offsets, _byte_counts, comp_chunks in pixel_data_parts:
# Append all pixel data in COG-compliant order: smallest overview
# first, then progressively larger, with the main resolution image
# last (issue #2308).
for emit_idx in pixel_emission_order:
_arr, _lw, _lh, _rel, _bc, comp_chunks = pixel_data_parts[emit_idx]
for chunk in comp_chunks:
output.extend(chunk)

Expand Down
143 changes: 143 additions & 0 deletions xrspatial/geotiff/tests/test_overview_block_order_2308.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
"""COG overview tile-block ordering invariant (issue #2308).

The COG spec requires the on-disk pixel-data layout to run from the
smallest overview through progressively larger overviews and end with
the main-resolution image. External readers (rio-cogeo, GDAL's
``validate_cloud_optimized_geotiff``) flag a file when the byte
ordering reverses or interleaves these blocks even though the IFD
chain walks main -> ov1 -> ov2 the conventional way. These tests lock
the byte order in as a regression gate so the writer cannot drift back
to the old layout.
"""
from __future__ import annotations

import os
import tempfile

import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import to_geotiff
from xrspatial.geotiff._header import parse_all_ifds, parse_header


def _min_block_offset(ifd) -> int:
"""Return the smallest tile-offset (or strip-offset) for an IFD."""
offsets = ifd.tile_offsets
if offsets is None:
offsets = ifd.strip_offsets
assert offsets is not None and len(offsets) > 0, (
"IFD has neither tile_offsets nor strip_offsets")
return min(offsets)


def _read_block_order(path: str) -> list[int]:
"""Return ``min_block_offset`` for each IFD in walk order."""
with open(path, "rb") as f:
data = f.read()
header = parse_header(data)
ifds = parse_all_ifds(data, header)
return [_min_block_offset(ifd) for ifd in ifds]


def _make_da(shape, bands: int | None = None) -> xr.DataArray:
"""Build a synthetic DataArray with a sane CRS / coordinate grid."""
rng = np.random.RandomState(17)
if bands is None:
arr = rng.rand(*shape).astype("float32")
dims = ("y", "x")
else:
arr = rng.rand(bands, *shape).astype("float32")
dims = ("band", "y", "x")
h, w = shape
coords = {
"y": np.linspace(45, 44, h),
"x": np.linspace(-120, -119, w),
}
return xr.DataArray(arr, dims=dims, coords=coords, attrs={"crs": 4326})


@pytest.mark.parametrize("bands", [None, 3])
def test_cog_overview_block_order_invariant_2308(bands):
"""Pixel blocks must run smallest-overview -> larger -> main.

The IFD walk order is ``[main, ov_factor_2, ov_factor_4]`` (full
resolution first). The on-disk pixel-block order must be the
reverse: factor-4 overview blocks first, then factor-2 overview
blocks, with the main-resolution blocks last (issue #2308).
"""
da = _make_da((256, 256), bands=bands)
with tempfile.TemporaryDirectory() as td:
suffix = "rgb" if bands else "mono"
path = os.path.join(td, f"order_2308_{suffix}.tif")
to_geotiff(
da, path, compression="deflate", cog=True,
tile_size=64, overview_levels=[2, 4],
)

block_offsets = _read_block_order(path)
# IFD walk: [main, ov_factor_2, ov_factor_4]
main_min, ov2_min, ov4_min = block_offsets
# COG layout: factor-4 (smallest overview) -> factor-2 -> main.
assert ov4_min < ov2_min, (
f"smallest overview blocks should sit before larger "
f"overview blocks: ov4_min={ov4_min}, ov2_min={ov2_min}")
assert ov2_min < main_min, (
f"overview blocks should sit before main-resolution "
f"blocks: ov2_min={ov2_min}, main_min={main_min}")


def test_cog_overview_block_order_three_levels_2308():
"""Same invariant with three overview levels (factor 2/4/8)."""
da = _make_da((512, 512))
with tempfile.TemporaryDirectory() as td:
path = os.path.join(td, "order_2308_three.tif")
to_geotiff(
da, path, compression="deflate", cog=True,
tile_size=64, overview_levels=[2, 4, 8],
)

block_offsets = _read_block_order(path)
# IFD walk: [main, ov2, ov4, ov8]
main_min, ov2_min, ov4_min, ov8_min = block_offsets
# On-disk: ov8 -> ov4 -> ov2 -> main
assert ov8_min < ov4_min < ov2_min < main_min, (
f"COG block order broken: ov8={ov8_min} ov4={ov4_min} "
f"ov2={ov2_min} main={main_min}")


def _rio_cogeo_or_skip():
"""Skip the rio-cogeo gate when the dependency isn't installed.

Mirrors the skip semantics used in ``test_cog_writer_compliance``:
contributor laptops without rio-cogeo see a skip, CI with rio-cogeo
runs the strict check.
"""
try:
from rio_cogeo.cogeo import cog_validate
except ImportError:
pytest.skip("rio-cogeo not installed")
return cog_validate


@pytest.mark.parametrize("bands", [None, 3])
def test_cog_overview_block_order_rio_cogeo_2308(bands):
"""``rio-cogeo cog_validate`` returns valid=True with no block-order errors."""
cog_validate = _rio_cogeo_or_skip()
da = _make_da((256, 256), bands=bands)
with tempfile.TemporaryDirectory() as td:
suffix = "rgb" if bands else "mono"
path = os.path.join(td, f"order_2308_rio_{suffix}.tif")
to_geotiff(
da, path, compression="deflate", cog=True,
tile_size=64, overview_levels=[2, 4],
)
valid, errors, _warnings = cog_validate(path, strict=False)
assert valid, f"rio_cogeo cog_validate failed: {errors}"
# Defensive secondary assertion: the two block-order messages
# from #2308 must not reappear even if some future writer
# change keeps the validator happy on the headline check.
joined = " ".join(errors).lower()
assert "offset of the first block" not in joined, (
f"block-order errors regressed: {errors}")
Loading