diff --git a/.claude/sweep-test-coverage-state.csv b/.claude/sweep-test-coverage-state.csv index 6dc4cac33..2381bf6a0 100644 --- a/.claude/sweep-test-coverage-state.csv +++ b/.claude/sweep-test-coverage-state.csv @@ -2,5 +2,6 @@ module,last_inspected,issue,severity_max,categories_found,notes geotiff,2026-05-18,,HIGH,3;4,"Pass 18 (2026-05-18): added test_parallel_strip_decode_sparse_2100.py closing Cat 3 HIGH geometric-edge / Cat 4 HIGH parameter-coverage gap on the parallel-decode strip paths (#2100/#2104). The strip-decode parallelisation in _read_strips (lines 1942-2014) and _fetch_decode_cog_http_strips (lines 2685-2740) added a collect-decode-place pipeline whose job-collection loop filters sparse strips (byte_counts[idx] == 0) before they reach the ThreadPoolExecutor. The existing test_parallel_strip_decode_2100.py covers parallel/serial parity, the pool-engaged branch, single-strip serial short-circuit, windowed strip reads, and planar=2 multi-band, but every fixture is fully populated. The 128x128 sparse fixture in test_sparse_cog.py is below the 64K-pixel parallel gate, so the sparse-strip filter inside the parallel branch is wholly untested. A regression that lost the byte_counts==0 guard would silently ship: the decoder would receive an empty data[offsets[idx]:offsets[idx]+0] slice and either raise 'Decompressed tile/strip size mismatch' or return corrupt pixels. 7 new tests, all passing: local-strip full-image parallel/serial parity with sparse strips, parallel-pool-engaged on multi-strip sparse images, windowed reads across the sparse boundary, all-sparse degenerate (zero filled rows -> empty job list -> short-circuit gate), planar=2 sparse parity (dedicated 'planar == 2 and samples > 1' branch with its own byte_counts==0 guard at lines 1949-1962), HTTP windowed read on a non-sparse strict subset (parallel decode of fetched strips), and HTTP windowed read across the sparse boundary (parallel decode of the fetched strips with placement matching the local read). Mutation against the strip-job collection sparse guard (delete the byte_counts == 0 continue) flips 5 of 5 local tests red with 'Decompressed tile/strip size mismatch: expected ... got 0'; mutation against the HTTP path sparse guard at line 2646 flips the boundary HTTP test red. Confirmed clean restore via md5sum. Source untouched. Cat 3 HIGH + Cat 4 HIGH (geometric edge case + parameter coverage on the sparse-strip code path under parallel decode). Pass 17 (2026-05-18): added test_mask_nodata_gpu_vrt_2052.py closing Cat 1 HIGH backend-coverage gap on the mask_nodata= opt-out kwarg (#2052). The kwarg was added in #2052 and wired through the four public readers (open_geotiff, read_geotiff_gpu, read_geotiff_dask, read_vrt), but test_mask_nodata_kwarg_2052.py only exercised the eager-numpy and dask+numpy branches. The pure-GPU mask gating at _backends/gpu.py:709, the dask+GPU dispatcher forwarding at _backends/gpu.py:991, the eager VRT mask gating at _backends/vrt.py:320, and the chunked VRT graph builder at _backends/vrt.py:408/588 had zero direct coverage. 19 new tests, all passing on GPU host: GPU eager + dask+GPU mask_nodata=False preserves uint16, GPU defaults still promote to float64, dispatcher thread-through for open_geotiff(gpu=True, mask_nodata=False) and open_geotiff(gpu=True, chunks=N, mask_nodata=False), VRT eager and chunked branches mirror, cross-backend parity (eager vs dask, eager vs GPU, eager vs dask+GPU, eager vs VRT) bit-exact under mask_nodata=False, direct read_geotiff_dask entry-point coverage. Fixture uses tiled+deflate compression so the pure nvCOMP decode path is exercised, not the CPU-fallback piggyback path. Mutation against gpu.py:709 (force mask_nodata=True) flipped 4 GPU tests red; mutation against vrt.py eager mask gate flipped 4 VRT tests red. Cat 1 HIGH (backend coverage on mask_nodata=False for GPU, dask+GPU, VRT eager, VRT chunked). Pass 16 (2026-05-15): added test_max_cloud_bytes_dispatcher_silent_drop_2026_05_15.py closing Cat 4 HIGH parameter-coverage gap on the open_geotiff dispatcher's max_cloud_bytes kwarg. The kwarg was added in #1928 (eager fsspec budget) and re-ordered into the canonical reader signature by #1957, but open_geotiff only forwards it to _read_to_array on the eager non-VRT branch (__init__.py:431). The GPU branch at line 410, the dask branch at line 422, and the VRT branch at line 362 never reference the kwarg, so open_geotiff(p, max_cloud_bytes=8, gpu=True) / open_geotiff(p, max_cloud_bytes=8, chunks=N) / open_geotiff(vrt, max_cloud_bytes=8) all silently drop the budget. Same class of dispatcher-silently-drops-backend-kwarg bug fixed by #1561 / #1605 / #1685 / #1810 for other kwargs; the two sibling kwargs on_gpu_failure (line 339) and missing_sources (line 355) already raise ValueError when used on a path where they do not apply. 11 tests: 4 xfail(strict=True) pinning the fix surface (gpu, dask, vrt, dask+gpu), 3 passing pins on the current silent-drop behaviour so the fix is visible as a diff, 4 positive pins that the eager local + file-like paths accept the kwarg (docstring no-op contract). Filed issue #1974 for the dispatcher fix (sweep is test-only). Cat 4 HIGH (silent backend-kwarg drop). Pass 15 (2026-05-15): added test_write_vrt_bool_nodata_1921.py closing Cat 1 HIGH backend-parity gap on bool nodata rejection. Issue #1911 added the isinstance(nodata, (bool, np.bool_)) -> TypeError guard at to_geotiff and build_geo_tags, but the sibling writers were left unchecked: write_vrt(nodata=True) silently emits True into the VRT XML (str(True) drops the sentinel because no reader parses 'True' as numeric); write_geotiff_gpu direct call relies on the build_geo_tags defense-in-depth rather than an entry-point check, so a future refactor moving that guard would regress the GPU writer with no test coverage. 17 new tests: 4 xfail (strict=True) pinning the write_vrt fix surface (issue #1921), 1 passing pin on the current buggy str(True) emission so the fix is visible as a diff, 6 numeric/None happy-path tests on write_vrt, 4 GPU writer direct-call bool-reject tests (4 dtypes x 1 call), 1 to_geotiff(gpu=True) dispatcher thread-through. Filed issue #1921 for the write_vrt fix (sweep is test-only). Cat 1 HIGH (write_vrt backend parity bug) + Cat 1 MEDIUM (write_geotiff_gpu defense-in-depth pin). Pass 14 (2026-05-15): added test_dask_streaming_write_degenerate_2026_05_15.py closing Cat 3 HIGH and Cat 2 HIGH/MEDIUM gaps on the dask streaming write path (to_geotiff with dask-backed DataArray, #1084). test_streaming_write.py covered 100x100 with a NaN block plus a 2x2 small raster but had nothing 1-pixel-row, 1-pixel-column, all-NaN, all-Inf, or +/-Inf-mixed. The streaming tile-row segmenter (#1485) on a 1-pixel-tall raster and the streaming nodata-mask coercion on an all-NaN chunk were reachable only with a dask input and had no direct coverage; a regression on either would not surface from the eager numpy path or the write_geotiff_gpu path (pass 5 covered the GPU writer's degenerate shapes). 16 new tests, all passing: 1x1 chunk-matches-shape + nodata-attr round-trip + uint16, 1xN single chunk + chunks-split-columns + wide-segmented-by-buffer (#1485 streaming_buffer_bytes=1 forces the segmenter), Nx1 single chunk + chunks-split-rows, all-NaN with finite sentinel + all-NaN without sentinel, mixed NaN/+Inf/-Inf preserving Inf bit-exact + sentinel masking NaN only, all-+Inf and all--Inf, predictor=3 (float predictor) round-trip on float32 + float64 plus int-dtype ValueError. predictor=3 streaming coverage extends the small-chunk and int-rejection geometry around test_predictor_fp_write_1313.test_predictor3_streaming_dask (which already covers a 128x192 predictor=3 dask streaming write with a Predictor-tag assertion). Cat 3 HIGH (1x1/1xN/Nx1) + Cat 2 HIGH (all-NaN with sentinel) + Cat 2 MEDIUM (mixed-Inf, all-Inf) + Cat 4 MEDIUM (predictor=3 streaming). Pass 13 (2026-05-13): added test_size_param_validation_gpu_vrt_1776.py closing Cat 4 HIGH parameter-coverage gap on size-arg validation. Issue #1752 added tile_size validation to to_geotiff and chunks validation to read_geotiff_dask, but the matching kwargs on three sibling entry points were left unchecked: write_geotiff_gpu(tile_size=) raised ZeroDivisionError for 0, struct.error for -1, TypeError for 256.0; read_geotiff_gpu(chunks=) and read_vrt(chunks=) raised ZeroDivisionError for 0 and silently accepted negative values. Factored two shared validators (_validate_tile_size_arg, _validate_chunks_arg) and called them up front from each entry point. 34 new tests, all passing on GPU host: tile_size matrix on write_geotiff_gpu (0/-1/256.0/True/False/positive/np.int64), chunks matrix on read_geotiff_gpu and read_vrt (0/-1/(0,N)/(N,-1)/wrong-length/bool/non-int/(N,float)/positive/np.int64), dispatcher thread-through tests (open_geotiff(gpu=True, chunks=0), to_geotiff(gpu=True, tile_size=0)). Pre-existing 13 #1752 tests still pass after refactor. Filed issue #1776. Pass 12 (2026-05-12): added test_gpu_writer_overview_mode_and_compression_level_1740.py closing Cat 4 HIGH and Cat 4 MEDIUM parameter-coverage gaps. (1) write_geotiff_gpu(overview_resampling='mode') and the dedicated _block_reduce_2d_gpu mode-fallback branch (_gpu_decode.py:3051-3056) had zero direct tests; six of the seven overview_resampling modes were covered (mean/nearest by test_features, min/max/median by pass 6, cubic by test_signature_parity_1631) but mode was the odd one out -- a regression dropping the mode dispatch from _block_reduce_2d_gpu would fall through to the mean reshape branch and emit wrong overview pixels for integer rasters. (2) write_geotiff_gpu(compression_level=) documented as accepted-but-ignored had no test; the CPU writer rejects out-of-range levels with ValueError, the GPU writer is documented not to -- a regression wiring the GPU writer up to the CPU range validator would silently break every to_geotiff(gpu=True, compression_level=X) caller for in-range levels and noisily for out-of-range. 19 tests, all passing on GPU host: _block_reduce_2d_gpu(method='mode') CPU-parity on 4x4 deterministic + random 8x8 + dtype-preserved across u8/u16/i16/i32, write_geotiff_gpu(cog=True, overview_resampling='mode') end-to-end round trip, to_geotiff(gpu=True, ..., overview_resampling='mode') dispatcher thread-through, GPU-vs-CPU pixel parity on 8x8 input, write_geotiff_gpu(compression_level=) in-range matrix on zstd/deflate, out-of-range matrix (zstd=999/-5, deflate=50/0) accepted without raising + round-trip preserved, to_geotiff(gpu=True, compression_level=999) dispatcher thread-through, companion CPU rejects-OOR pin to lock the asymmetry. Mutation against the mode branch (drop the 'if method == mode' block in _block_reduce_2d_gpu) flipped 9 mode tests red. Filed issue #1740. Pass 11 (2026-05-12): added test_gpu_writer_cpu_fallback_codecs_2026_05_12.py closing a Cat 4 HIGH parameter-coverage gap on write_geotiff_gpu compression= modes for the CPU-fallback codecs (lzw, packbits, lz4, lerc, jpeg2000/j2k). Pass 7 (test_gpu_writer_compression_modes_2026_05_11) covered only none/deflate/zstd/jpeg; the remaining five codecs route through dedicated branches in gpu_compress_tiles (_gpu_decode.py:2974-3019) with CPU fallbacks (lerc_compress, jpeg2000_compress, cpu_compress) that had zero direct tests via write_geotiff_gpu. A regression in routing/tag-wiring/fallback dispatch would ship silently because the internal reader uses the same compression-tag table. 17 tests, all passing on GPU host: lzw/packbits/lz4 round-trip + compression-tag pin on uint16, lerc lossless float32 + uint16 round-trip + tag pin, jpeg2000 uint8 single-band + RGB multi-band lossless round-trip + j2k-alias parity + tag pin, GPU-vs-CPU writer pixel parity for lzw/packbits, to_geotiff(gpu=True, compression=lzw/packbits) dispatcher thread-through. Mutation against compression dispatch (swap lzw bytes to zstd; swap lerc bytes to deflate) flipped round-trip tests red. Filed issue #1706. Pass 10 (2026-05-12): added test_kwarg_behaviour_2026_05_12_v2.py closing two Cat 4 HIGH parameter-coverage gaps. (1) write_geotiff_gpu(predictor=True/2/3) had zero direct tests; the GPU writer threads predictor= through normalize_predictor and gpu_compress_tiles into five CUDA encode kernels (_predictor_encode_kernel_u8/u16/u32/u64 for predictor=2, _fp_predictor_encode_kernel for predictor=3) and a regression dropping the encode-kernel calls would ship corrupt files. (2) read_vrt(window=) had no behaviour tests (only a signature pin in test_signature_annotations_1654); the kwarg is documented and _vrt.read_vrt implements full windowed-read semantics (clip, multi-source overlap, src/dst scaling, GeoTransform origin shift on coords + attrs['transform']). 23 tests, all passing on GPU host: predictor=True/2 round-trips on u8/u16/i32 + 3-band RGB samples_per_pixel stride; predictor=3 lossless round-trip on f32 and f64; predictor=3 int-dtype ValueError (CPU/GPU parity); CPU/GPU pixel-exact parity for pred=2 u16 and pred=3 f32; read_vrt(window=) subregion + full + clamp-overflow + clamp-negative + 2x1 mosaic seam straddle + offset past seam + transform-attr origin shift + y/x coords half-pixel shift + window+band + window+chunks (dask) + window+gpu (cupy) + window+gpu+chunks (dask+cupy). Mutation against the encode dispatch flipped 7 predictor tests red. Filed issue #1690. Pass 9 (2026-05-12): added test_kwarg_behaviour_2026_05_12.py closing three Cat 4 MEDIUM parameter-coverage gaps plus one Cat 4 LOW error path. write_vrt documented kwargs (relative/crs_wkt/nodata) had a smoke-test pinning that the kwargs are accepted but no test verified the override *effect* -- a regression dropping the override branch and silently using the default-from-first-source would ship undetected. read_geotiff_gpu(dtype=) cast had zero direct tests; the eager path has TestDtypeEager and dask has TestDtypeDask but the GPU branch had no equivalent. write_geotiff_gpu(bigtiff=) threads through to _assemble_tiff(force_bigtiff=) but no test asserted the on-disk header byte switches; the CPU writer had it via test_features::test_force_bigtiff_via_public_api. write_vrt(source_files=[]) ValueError was uncovered. 26 tests, all passing on GPU host: write_vrt relative=True/False XML attribute + path inspection + parse-back round-trip, write_vrt crs_wkt= override distinct-from-default XML check, write_vrt nodata= override + default-from-source coverage, write_vrt([]) ValueError + no-file side effect, read_geotiff_gpu dtype= matrix (float64->float32, float64->float16, uint16->int32, uint16->uint8, float-to-int raise, dtype=None preserves native), open_geotiff(gpu=True, dtype=) dispatcher, read_geotiff_gpu(chunks=, dtype=) dask+GPU branch, write_geotiff_gpu bigtiff=True/False/None header verification, to_geotiff(gpu=True, bigtiff=True) dispatcher thread-through. Pass 8 (2026-05-11): added test_lz4_compression_level_2026_05_11.py closing Cat 4 MEDIUM parameter-coverage gap on compression='lz4' + compression_level=. _LEVEL_RANGES advertises lz4: (0, 16) but only deflate (1, 9) and zstd (1, 22) had direct level boundary + round-trip + reject tests. The range check is the gatekeeper -- lz4_compress silently accepts any int level -- so a regression dropping 'lz4' from _LEVEL_RANGES would ship undetected. 18 tests, all passing: round-trip at levels 0/1/9/16 (lossless), default-level no-arg path, higher-level-not-larger smoke check on compressible input, out-of-range reject at -1/-10/17/100 on eager path, valid-range message format pin (lz4 valid: 0-16), dask streaming round-trip at 0/1/8/16, dask streaming out-of-range reject at -1/17/50 (separate _LEVEL_RANGES call site). Pass 7 (2026-05-11): added test_gpu_writer_compression_modes_2026_05_11.py closing Cat 4 HIGH gap on write_geotiff_gpu compression= modes. The writer documents zstd (default, fastest GPU), deflate, jpeg, and none, but only deflate + none had round-trip tests; the default zstd and the jpeg (nvJPEG/Pillow) paths shipped without targeted coverage. 11 new tests, all passing on GPU host: zstd round-trip + default-codec pinning, jpeg round-trip on 3-band RGB uint8 + 1-band greyscale, TIFF compression-tag header check across none/deflate/zstd/jpeg, plain deflate + none round-trips outside the COG/sentinel paths, and a cross-codec lossless parity check (zstd/deflate/none agree pixel-exact). nvJPEG path was exercised live, not just the Pillow fallback. Pass 6 (2026-05-11): added test_overview_resampling_min_max_median_2026_05_11.py covering Cat 4 HIGH parameter-coverage gap on overview_resampling=min/max/median. CPU end-to-end paths were already covered by test_cog_overview_nodata_1613::test_cpu_cog_overview_aggregations_ignore_sentinel; the GPU end-to-end paths and the direct CPU+GPU block-reducer branches had no targeted tests, so a regression on those code paths would ship undetected. 26 tests, all passing on GPU host: block-reducer unit tests (finite + partial-NaN), end-to-end COG writes for both to_geotiff and write_geotiff_gpu, CPU/GPU parity for to_geotiff(gpu=True), CPU nodata-sentinel regression check, and ValueError error-path tests for unknown method names on both backends. Pass 5 (2026-05-11): added test_degenerate_shapes_backends_2026_05_11.py covering Cat 3 HIGH geometric gaps (1x1 / 1xN / Nx1 reads on dask+numpy, GPU, dask+cupy backends; 1x1 / 1xN / Nx1 writes through write_geotiff_gpu) and Cat 2 MEDIUM NaN/Inf gaps (all-NaN read on GPU + dask+cupy, Inf / -Inf reads on all non-eager backends, NaN sentinel mask on dask read path including sentinel block split across chunk boundary). 23 tests, all passing on GPU host. Prior passes still hold: pass 4 (r4) closed read_geotiff_gpu/dask name= + max_pixels= kwargs (Cat 4), pass 3 (r3) closed read_vrt GPU/dask+GPU backend dispatch (Cat 1) and dtype/name kwargs (Cat 4)." +polygonize,2026-05-19,2155,HIGH,1;2;3;4,"Pass 1 (2026-05-19): added test_polygonize_coverage_2026_05_19.py with 58 tests, all passing on a CUDA host. Closes Cat 3 HIGH 1x1 / Nx1 single-column geometric gaps (Nx1 exercises the nx==1 padding path at polygonize.py:565 and the cupy nx==1 numpy-fallback at polygonize.py:671), Cat 3 MEDIUM 1xN single-row and all-equal-value rasters on all four backends. Closes Cat 2 HIGH NaN parity for cupy + dask+cupy (numpy/dask were already covered by test_polygonize_nan_pixels_excluded*), Cat 2 MEDIUM all-NaN raster on all four backends, Cat 2 HIGH +/-Inf pins on all four backends. Filed source-bug issue #2155: numpy/dask/dask+cupy backends silently absorb Inf cells into adjacent finite polygons because _is_close reduces abs(inf-inf) to nan; cupy backend handles Inf correctly. Pins lock the asymmetric behaviour so the fix is visible. Closes Cat 1 MEDIUM simplify_tolerance + mask= parity gaps on dask+cupy backend (numpy/cupy/dask were already covered). Closes Cat 4 MEDIUM column_name non-default value across geopandas/spatialpandas/geojson return types and Cat 4 MEDIUM validation error paths (bad connectivity, bad transform length, mask shape mismatch, mask underlying-type mismatch). Cat 5 N/A: polygonize returns lists/dataframes, not a DataArray with attrs to propagate." rasterize,2026-05-17,,HIGH,1;3;4,"Pass 1 (2026-05-17): added test_rasterize_coverage_2026_05_17.py with 34 tests, all passing on a CUDA host. Closes four documented public-API gaps left after the pass-0 audit. (1) Cat 3 HIGH 1x1 single-pixel raster -- test_rasterize.py covers 1xN strips and Nx1 strips but never width=1 AND height=1, so the polygon scanline / line Bresenham / point burn kernels all ship without the single-cell degenerate case; the new TestSinglePixelRaster class pins polygon/point/line on eager numpy plus polygon parity across cupy / dask+numpy / dask+cupy. (2) Cat 4 HIGH like= template-raster parameter is documented at rasterize.py:2038 and implemented by _extract_grid_from_like (line 1930) but no test exercises it; TestLikeParameter pins dtype/bounds/coords inheritance, the three override branches (dtype, bounds, width/height), the three validation branches (not-DataArray, 3D, wrong dim names) and like= on all four backends. Mutation against the like-dtype branch (rasterize.py:2183-2184) flipped the inheritance test red. (3) Cat 4 HIGH resolution= happy path -- only the oversize-rejection error path was tested (line 304); TestResolutionParameter pins the scalar branch, the tuple branch, the ceil-and-clamp-to-1 semantics, and resolution= on all four backends. (4) Cat 4 HIGH non-empty GeometryCollection unpacking is documented at rasterize.py:1995 and implemented by _classify_geometries_loop (line 228) but only the empty-GC case was tested (line 269); TestGeometryCollection pins polygon+point and polygon+line+point collections on eager numpy plus parity across cupy / dask+numpy / dask+cupy so the loop classifier's polygon/line/point sub-bucketing has direct coverage. Cat 1 MEDIUM gap closed: eager cupy all_touched=True parity vs eager numpy (TestEagerCupyAllTouched) -- the existing test only covered dask+cupy all_touched, leaving the direct GPU all_touched kernel untested. Cat 2 MEDIUM gap closed: int32 dtype with default NaN fill silently casts to the int32-min sentinel (TestIntegerDtypeNanFill) -- pin the cast so any future ValueError-raises switch is visible as a code-review diff. Pre-existing 143 passing + 2 skipped tests in test_rasterize.py untouched." reproject,2026-05-10,,HIGH,1;4;5,"Added 39 tests: LiteCRS direct coverage, itrf_transform behaviour/roundtrip/array, itrf_frames, geoid_height numerical correctness + raster happy-path, vertical helpers (ellipsoidal<->orthometric/depth), reproject() lat/lon and latitude/longitude dim propagation. Note: _merge_arrays_cupy is imported but unused (no cupy merge dispatch in merge()); flagged as feature gap not test gap." diff --git a/xrspatial/tests/test_polygonize_coverage_2026_05_19.py b/xrspatial/tests/test_polygonize_coverage_2026_05_19.py new file mode 100644 index 000000000..84ede50b4 --- /dev/null +++ b/xrspatial/tests/test_polygonize_coverage_2026_05_19.py @@ -0,0 +1,610 @@ +"""Test coverage gap closures for polygonize (deep-sweep test-coverage, 2026-05-19). + +Closes documented gaps from the test-coverage sweep audit: + +Cat 1 (backend coverage) + - MEDIUM: simplify_tolerance parity for dask+cupy backend. + - MEDIUM: mask= parity for dask+cupy backend. + +Cat 2 (NaN/Inf/nodata edge cases) + - HIGH: Inf inputs on numpy / cupy / dask / dask+cupy. + The numpy / dask backends currently silently collapse +/-Inf + pixels into surrounding regions (see file_issue note below); + cupy / dask+cupy produce the correct multi-polygon result. + Tests pin BOTH behaviours so the asymmetry is visible. + - HIGH: NaN parity with cupy + dask+cupy (numpy/dask already covered). + - MEDIUM: all-NaN raster on numpy / cupy / dask / dask+cupy + (empty polygon list). + +Cat 3 (geometric edge cases) + - HIGH: 1x1 single-pixel raster on all four backends + non-default + return_types (numpy / cupy / dask / dask+cupy). + - HIGH: Nx1 single-column raster on all four backends. polygonize has + a dedicated nx==1 padding path (polygonize.py:565) and the + CuPy backend has its own nx==1 fallback to numpy + (polygonize.py:671). Neither was directly tested. + - MEDIUM: 1xN single-row raster on all four backends. + - MEDIUM: All-equal-value raster on all four backends (zero-variance, + single-polygon-covering-everything). + +Cat 4 (parameter coverage) + - MEDIUM: column_name= non-default value (geopandas/spatialpandas/geojson). + - MEDIUM: Error paths: bad connectivity, bad transform length, mask + shape mismatch, mask underlying-type mismatch. + +Cat 5 not applicable: polygonize returns (column, polygon_points) tuples +or dataframes, not a DataArray. There is no input-attrs/coords propagation +contract to assert. +""" +import numpy as np +import pytest +import xarray as xr +from numpy.testing import assert_allclose + +try: + import cupy +except ImportError: + cupy = None + +try: + import dask.array as da +except ImportError: + da = None + +try: + import geopandas as gpd +except ImportError: + gpd = None + +try: + import spatialpandas as sp +except ImportError: + sp = None + +from ..polygonize import polygonize +from .general_checks import cuda_and_cupy_available, dask_array_available + + +# --------------------------------------------------------------------------- +# helpers +# --------------------------------------------------------------------------- + + +def _ring_area(ring): + """Shoelace signed area (CCW positive).""" + x = ring[:, 0] + y = ring[:, 1] + return 0.5 * (np.dot(x[:-1], y[1:]) - np.dot(x[1:], y[:-1])) + + +def _polygon_area(rings): + """Total signed area for a polygon (exterior + holes).""" + return sum(_ring_area(r) for r in rings) + + +def _areas_by_value(values, polygons): + out = {} + for val, rings in zip(values, polygons): + out.setdefault(val, 0.0) + out[val] += _polygon_area(rings) + return out + + +def _to_dask(arr, chunks): + return xr.DataArray(da.from_array(arr, chunks=chunks)) + + +def _to_dask_cupy(arr, chunks): + return xr.DataArray(da.from_array(cupy.asarray(arr), chunks=chunks)) + + +def _to_cupy(arr): + return xr.DataArray(cupy.asarray(arr)) + + +# --------------------------------------------------------------------------- +# Cat 3 HIGH: 1x1 single-pixel raster, all four backends +# --------------------------------------------------------------------------- + + +class TestSinglePixelRaster: + """1x1 raster on every backend. Output is one polygon, area=1.""" + + DATA = np.array([[7]], dtype=np.int64) + + def _assert_unit_square(self, values, polygons): + assert len(values) == 1 + assert int(values[0]) == 7 + assert len(polygons) == 1 + rings = polygons[0] + assert len(rings) == 1 # no holes + assert_allclose(_ring_area(rings[0]), 1.0) + + def test_numpy(self): + v, p = polygonize(xr.DataArray(self.DATA)) + self._assert_unit_square(v, p) + + @cuda_and_cupy_available + def test_cupy(self): + v, p = polygonize(_to_cupy(self.DATA)) + self._assert_unit_square(v, p) + + @dask_array_available + def test_dask(self): + v, p = polygonize(_to_dask(self.DATA, chunks=(1, 1))) + self._assert_unit_square(v, p) + + @cuda_and_cupy_available + @dask_array_available + def test_dask_cupy(self): + v, p = polygonize(_to_dask_cupy(self.DATA, chunks=(1, 1))) + self._assert_unit_square(v, p) + + @pytest.mark.skipif(gpd is None, reason="geopandas not installed") + def test_numpy_geopandas(self): + df = polygonize(xr.DataArray(self.DATA), return_type="geopandas") + assert len(df) == 1 + assert int(df.DN.iloc[0]) == 7 + assert_allclose(df.geometry.area.iloc[0], 1.0) + + +# --------------------------------------------------------------------------- +# Cat 3 HIGH: Nx1 single-column raster, all four backends +# +# polygonize() pads nx==1 with a masked second column inside the numpy +# backend; the cupy backend short-circuits and routes through the numpy +# fallback (polygonize.py:671). Both code paths were untested. +# --------------------------------------------------------------------------- + + +class TestSingleColumnRaster: + + DATA = np.array([[1], [2], [1], [3]], dtype=np.int64) + + def _assert_four_strips(self, values, polygons): + assert_allclose(sorted(values), [1, 1, 2, 3]) + assert len(polygons) == 4 + # Each pixel becomes its own unit square. + for rings in polygons: + assert len(rings) == 1 + assert_allclose(_ring_area(rings[0]), 1.0) + + @pytest.mark.parametrize("connectivity", [4, 8]) + def test_numpy(self, connectivity): + v, p = polygonize(xr.DataArray(self.DATA), connectivity=connectivity) + self._assert_four_strips(v, p) + + @cuda_and_cupy_available + @pytest.mark.parametrize("connectivity", [4, 8]) + def test_cupy(self, connectivity): + v, p = polygonize(_to_cupy(self.DATA), connectivity=connectivity) + self._assert_four_strips(v, p) + + @dask_array_available + @pytest.mark.parametrize("connectivity", [4, 8]) + def test_dask(self, connectivity): + v, p = polygonize(_to_dask(self.DATA, chunks=(2, 1)), + connectivity=connectivity) + self._assert_four_strips(v, p) + + @cuda_and_cupy_available + @dask_array_available + @pytest.mark.parametrize("connectivity", [4, 8]) + def test_dask_cupy(self, connectivity): + v, p = polygonize(_to_dask_cupy(self.DATA, chunks=(2, 1)), + connectivity=connectivity) + self._assert_four_strips(v, p) + + +# --------------------------------------------------------------------------- +# Cat 3 MEDIUM: 1xN single-row raster +# --------------------------------------------------------------------------- + + +class TestSingleRowRaster: + + DATA = np.array([[1, 2, 1, 3]], dtype=np.int64) + + def _assert_four_strips(self, values, polygons): + assert_allclose(sorted(values), [1, 1, 2, 3]) + assert len(polygons) == 4 + for rings in polygons: + assert len(rings) == 1 + assert_allclose(_ring_area(rings[0]), 1.0) + + @pytest.mark.parametrize("connectivity", [4, 8]) + def test_numpy(self, connectivity): + v, p = polygonize(xr.DataArray(self.DATA), connectivity=connectivity) + self._assert_four_strips(v, p) + + @cuda_and_cupy_available + @pytest.mark.parametrize("connectivity", [4, 8]) + def test_cupy(self, connectivity): + v, p = polygonize(_to_cupy(self.DATA), connectivity=connectivity) + self._assert_four_strips(v, p) + + @dask_array_available + @pytest.mark.parametrize("connectivity", [4, 8]) + def test_dask(self, connectivity): + v, p = polygonize(_to_dask(self.DATA, chunks=(1, 2)), + connectivity=connectivity) + self._assert_four_strips(v, p) + + @cuda_and_cupy_available + @dask_array_available + @pytest.mark.parametrize("connectivity", [4, 8]) + def test_dask_cupy(self, connectivity): + v, p = polygonize(_to_dask_cupy(self.DATA, chunks=(1, 2)), + connectivity=connectivity) + self._assert_four_strips(v, p) + + +# --------------------------------------------------------------------------- +# Cat 3 MEDIUM: all-equal-value raster (zero-variance, one polygon) +# --------------------------------------------------------------------------- + + +class TestAllEqualRaster: + + DATA = np.full((4, 5), 9, dtype=np.int64) + + def _assert_single_polygon(self, values, polygons): + assert len(values) == 1 + assert int(values[0]) == 9 + assert len(polygons) == 1 + # Exterior only, area = ny*nx = 20. + rings = polygons[0] + assert len(rings) == 1 + assert_allclose(_ring_area(rings[0]), 20.0) + + @pytest.mark.parametrize("connectivity", [4, 8]) + def test_numpy(self, connectivity): + v, p = polygonize(xr.DataArray(self.DATA), connectivity=connectivity) + self._assert_single_polygon(v, p) + + @cuda_and_cupy_available + @pytest.mark.parametrize("connectivity", [4, 8]) + def test_cupy(self, connectivity): + v, p = polygonize(_to_cupy(self.DATA), connectivity=connectivity) + self._assert_single_polygon(v, p) + + @dask_array_available + @pytest.mark.parametrize("connectivity", [4, 8]) + def test_dask(self, connectivity): + v, p = polygonize(_to_dask(self.DATA, chunks=(2, 2)), + connectivity=connectivity) + self._assert_single_polygon(v, p) + + @cuda_and_cupy_available + @dask_array_available + @pytest.mark.parametrize("connectivity", [4, 8]) + def test_dask_cupy(self, connectivity): + v, p = polygonize(_to_dask_cupy(self.DATA, chunks=(2, 2)), + connectivity=connectivity) + self._assert_single_polygon(v, p) + + +# --------------------------------------------------------------------------- +# Cat 2 HIGH: NaN parity with cupy + dask+cupy +# +# numpy and dask are already covered by test_polygonize_nan_pixels_excluded +# and test_polygonize_nan_pixels_excluded_dask. These pins close the +# matching cupy / dask+cupy holes. +# --------------------------------------------------------------------------- + + +class TestNanCupy: + + DATA = np.array([ + [1.0, np.nan, 2.0], + [np.nan, 1.0, np.nan], + [3.0, np.nan, 1.0], + ], dtype=np.float64) + + @cuda_and_cupy_available + @pytest.mark.parametrize("connectivity", [4, 8]) + def test_cupy_excludes_nan(self, connectivity): + v, p = polygonize(_to_cupy(self.DATA), connectivity=connectivity) + assert not any(np.isnan(val) for val in v) + # Same finite values as numpy: 1.0, 2.0, 3.0 (no NaN cell appears). + assert set(float(x) for x in v) == {1.0, 2.0, 3.0} + + @cuda_and_cupy_available + @dask_array_available + @pytest.mark.parametrize("connectivity", [4, 8]) + def test_dask_cupy_excludes_nan(self, connectivity): + v, p = polygonize(_to_dask_cupy(self.DATA, chunks=(2, 2)), + connectivity=connectivity) + assert not any(np.isnan(val) for val in v) + assert set(float(x) for x in v) == {1.0, 2.0, 3.0} + + @cuda_and_cupy_available + def test_cupy_matches_numpy_per_value_area(self): + v_np, p_np = polygonize(xr.DataArray(self.DATA), connectivity=4) + v_cp, p_cp = polygonize(_to_cupy(self.DATA), connectivity=4) + a_np = _areas_by_value(v_np, p_np) + a_cp = _areas_by_value(v_cp, p_cp) + assert set(a_np) == set(a_cp) + for k in a_np: + assert_allclose(a_cp[k], a_np[k]) + + +# --------------------------------------------------------------------------- +# Cat 2 MEDIUM: all-NaN raster. Empty polygon list on every backend. +# --------------------------------------------------------------------------- + + +class TestAllNanRaster: + + DATA = np.full((3, 3), np.nan, dtype=np.float64) + + def test_numpy(self): + v, p = polygonize(xr.DataArray(self.DATA)) + assert v == [] + assert p == [] + + @cuda_and_cupy_available + def test_cupy(self): + v, p = polygonize(_to_cupy(self.DATA)) + assert v == [] + assert p == [] + + @dask_array_available + def test_dask(self): + v, p = polygonize(_to_dask(self.DATA, chunks=(2, 2))) + assert v == [] + assert p == [] + + @cuda_and_cupy_available + @dask_array_available + def test_dask_cupy(self): + v, p = polygonize(_to_dask_cupy(self.DATA, chunks=(2, 2))) + assert v == [] + assert p == [] + + +# --------------------------------------------------------------------------- +# Cat 2 HIGH: Inf inputs +# +# !!! Source-bug pin (issue #2155) !!! +# The numpy/dask boundary-tracing backend silently absorbs +Inf and -Inf +# pixels into adjacent regions instead of emitting them as their own +# polygons. This is because _is_close (polygonize.py:240) reduces +# ``abs(inf - inf)`` to ``nan`` so two inf pixels are considered NOT +# close, but later _scan() never starts a polygon at an inf cell either. +# The cupy backend correctly emits inf polygons. +# +# These tests PIN the current asymmetric behaviour so the gap is +# visible. When #2155 is fixed, these pins must be updated together. +# --------------------------------------------------------------------------- + + +# Mixed 1.0 / +inf / -inf 3x3 raster; +inf and -inf both appear twice each. +_INF_DATA = np.array([ + [1.0, np.inf, 1.0], + [-np.inf, 1.0, -np.inf], + [1.0, np.inf, 1.0], +], dtype=np.float64) + + +class TestInfPins: + """Pins on +Inf / -Inf behaviour across backends. + + The numpy / dask backends currently MERGE Inf cells with surrounding + polygons (under-count). cupy / dask+cupy correctly emit them as + distinct polygons. Tests pin both behaviours. When the source + bug is fixed, the numpy/dask pins must flip and these tests must + be updated together. + """ + + def test_numpy_inf_currently_undercounts(self): + # Pin current (buggy) behaviour: numpy reports a single value-1.0 + # polygon covering the full raster area, with no inf polygons. + v, p = polygonize(xr.DataArray(_INF_DATA), connectivity=4) + finite_vals = [val for val in v if np.isfinite(val)] + inf_vals = [val for val in v if np.isinf(val)] + # Currently no Inf polygons are reported by the numpy backend. + assert inf_vals == [], ( + "numpy backend started emitting Inf polygons; update the " + "Inf source-fix pins (see test_polygonize_coverage_2026_05_19)." + ) + # The finite polygons cover the full raster size (Inf cells got + # silently merged into a value=1.0 region). + total = sum(_polygon_area(rings) for rings in p) + assert_allclose(total, float(_INF_DATA.size)) + assert all(val == 1.0 for val in finite_vals) + + @cuda_and_cupy_available + def test_cupy_inf_correctly_emits_polygons(self): + # cupy emits +inf and -inf polygons distinctly. + v, p = polygonize(_to_cupy(_INF_DATA), connectivity=4) + # +inf appears at 2 cells (4-connectivity -> 2 polygons each 1). + # -inf appears at 2 cells (4-connectivity -> 2 polygons each 1). + plus_inf = [val for val in v if np.isposinf(val)] + minus_inf = [val for val in v if np.isneginf(val)] + assert len(plus_inf) == 2, ( + f"cupy +inf polygon count regressed: {v}") + assert len(minus_inf) == 2, ( + f"cupy -inf polygon count regressed: {v}") + # Inf-polygon areas total to the cell count. + areas = _areas_by_value(v, p) + plus_total = sum(a for k, a in areas.items() if np.isposinf(k)) + minus_total = sum(a for k, a in areas.items() if np.isneginf(k)) + assert_allclose(plus_total, 2.0) + assert_allclose(minus_total, 2.0) + # Total area preserved. + total = sum(_polygon_area(rings) for rings in p) + assert_allclose(total, float(_INF_DATA.size)) + + @dask_array_available + def test_dask_inf_currently_undercounts(self): + # Dask mirrors the numpy bug: no Inf polygons, and Inf cells get + # absorbed into surrounding finite (value=1.0) regions so the total + # polygon area still equals the raster area. + v, p = polygonize(_to_dask(_INF_DATA, chunks=(3, 3)), + connectivity=4) + finite_vals = [val for val in v if np.isfinite(val)] + inf_vals = [val for val in v if np.isinf(val)] + assert inf_vals == [], ( + "dask backend started emitting Inf polygons; update the " + "Inf source-fix pins (see test_polygonize_coverage_2026_05_19)." + ) + total = sum(_polygon_area(rings) for rings in p) + assert_allclose(total, float(_INF_DATA.size)) + assert all(val == 1.0 for val in finite_vals) + + @cuda_and_cupy_available + @dask_array_available + def test_dask_cupy_inf_currently_undercounts(self): + # Dask+CuPy goes through _polygonize_chunk which calls the numpy + # backend per chunk on numpy-converted data, so it follows the + # numpy bug (Inf cells absorbed into adjacent value=1.0 polygons), + # NOT the eager-cupy behaviour. Pin the current under-counting so + # the source fix for #2155 is visible as a test diff. + v, p = polygonize(_to_dask_cupy(_INF_DATA, chunks=(3, 3)), + connectivity=4) + inf_vals = [val for val in v if np.isinf(val)] + assert inf_vals == [], ( + "dask+cupy backend started emitting Inf polygons; update the " + "Inf source-fix pins (see test_polygonize_coverage_2026_05_19)." + ) + total = sum(_polygon_area(rings) for rings in p) + assert_allclose(total, float(_INF_DATA.size)) + + +# --------------------------------------------------------------------------- +# Cat 1 MEDIUM: simplify_tolerance + dask+cupy backend parity +# --------------------------------------------------------------------------- + + +_STAIRCASE = np.array([ + [1, 1, 1, 2, 2, 2], + [1, 1, 2, 2, 2, 2], + [1, 2, 2, 2, 2, 2], + [1, 1, 2, 2, 2, 2], + [1, 1, 1, 2, 2, 2], +], dtype=np.int64) + + +@cuda_and_cupy_available +@dask_array_available +class TestSimplifyDaskCupy: + """simplify_tolerance parity for the dask+cupy backend.""" + + @pytest.mark.parametrize("method", + ["douglas-peucker", "visvalingam-whyatt"]) + def test_dask_cupy_matches_numpy_areas(self, method): + v_np, p_np = polygonize(xr.DataArray(_STAIRCASE), + simplify_tolerance=1.5, + simplify_method=method) + v_dc, p_dc = polygonize(_to_dask_cupy(_STAIRCASE, chunks=(3, 3)), + simplify_tolerance=1.5, + simplify_method=method) + a_np = _areas_by_value(v_np, p_np) + a_dc = _areas_by_value(v_dc, p_dc) + assert set(a_np) == set(a_dc) + for k in a_np: + assert_allclose(a_dc[k], a_np[k], atol=1e-10) + + +# --------------------------------------------------------------------------- +# Cat 1 MEDIUM: mask= with dask+cupy backend +# --------------------------------------------------------------------------- + + +@cuda_and_cupy_available +@dask_array_available +class TestMaskDaskCupy: + + def test_mask_dask_cupy_matches_numpy(self): + data = np.array([[0, 0, 1], [0, 4, 0], [0, 0, 0]], dtype=np.int32) + mask = np.array([[1, 1, 1], [1, 0, 1], [1, 1, 1]], dtype=np.bool_) + + v_np, p_np = polygonize(xr.DataArray(data), + mask=xr.DataArray(mask), connectivity=4) + v_dc, p_dc = polygonize( + _to_dask_cupy(data, chunks=(2, 2)), + mask=_to_dask_cupy(mask, chunks=(2, 2)), + connectivity=4) + + a_np = _areas_by_value(v_np, p_np) + a_dc = _areas_by_value(v_dc, p_dc) + assert set(a_np) == set(a_dc) + for k in a_np: + assert_allclose(a_dc[k], a_np[k]) + + +# --------------------------------------------------------------------------- +# Cat 4 MEDIUM: column_name parameter +# --------------------------------------------------------------------------- + + +_THREE_CLASS = np.array([[0, 0, 1], [0, 4, 0], [0, 0, 0]], dtype=np.int32) + + +@pytest.mark.skipif(gpd is None, reason="geopandas not installed") +def test_column_name_geopandas_non_default(): + df = polygonize(xr.DataArray(_THREE_CLASS), + return_type="geopandas", column_name="value") + assert "value" in df.columns + assert "DN" not in df.columns + assert sorted(int(x) for x in df["value"]) == [0, 1, 4] + + +@pytest.mark.skipif(sp is None, reason="spatialpandas not installed") +def test_column_name_spatialpandas_non_default(): + df = polygonize(xr.DataArray(_THREE_CLASS), + return_type="spatialpandas", column_name="value") + assert "value" in df.columns + assert "DN" not in df.columns + + +def test_column_name_geojson_non_default(): + fc = polygonize(xr.DataArray(_THREE_CLASS), + return_type="geojson", column_name="value") + for feat in fc["features"]: + assert "value" in feat["properties"] + assert "DN" not in feat["properties"] + + +# --------------------------------------------------------------------------- +# Cat 4 MEDIUM: error paths +# --------------------------------------------------------------------------- + + +class TestErrorPaths: + """Validation error paths in polygonize().""" + + DATA = xr.DataArray(np.zeros((3, 3), dtype=np.int32)) + + def test_invalid_connectivity_raises(self): + with pytest.raises(ValueError, match="connectivity must be either"): + polygonize(self.DATA, connectivity=5) + + @pytest.mark.parametrize("bad", [0, 1, 6, 9, -4]) + def test_invalid_connectivity_values(self, bad): + with pytest.raises(ValueError, match="connectivity"): + polygonize(self.DATA, connectivity=bad) + + def test_bad_transform_length_short(self): + with pytest.raises(ValueError, + match="Incorrect transform length of 5"): + polygonize(self.DATA, transform=(1, 0, 0, 0, 1)) + + def test_bad_transform_length_long(self): + with pytest.raises(ValueError, + match="Incorrect transform length of 7"): + polygonize(self.DATA, transform=(1, 0, 0, 0, 1, 0, 0)) + + def test_mask_shape_mismatch(self): + mask = xr.DataArray(np.ones((4, 4), dtype=bool)) + with pytest.raises(ValueError, match="same shape"): + polygonize(self.DATA, mask=mask) + + @dask_array_available + def test_mask_underlying_type_mismatch(self): + # numpy raster, dask mask. + mask = xr.DataArray( + da.from_array(np.ones((3, 3), dtype=bool), chunks=(2, 2))) + with pytest.raises(TypeError, match="different underlying types"): + polygonize(self.DATA, mask=mask)