From 85d9e664664368326d1f203b8f93af89123ede3e Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Sun, 22 Mar 2026 20:44:17 -0700
Subject: [PATCH 01/15] Copy enum explanation dicts and health-check tests to
 cuda_bindings (#1712)

The explanation dicts are fundamentally tied to the bindings version, so
they belong in cuda_bindings. This copies them (keeping the cuda_core
originals for backward compatibility) and adds the corresponding health
tests under cuda_bindings/tests/.

Made-with: Cursor
---
 .../cuda/bindings/_utils/__init__.py          |   2 +
 .../_utils/driver_cu_result_explanations.py   | 358 ++++++++++++
 .../_utils/runtime_cuda_error_explanations.py | 551 ++++++++++++++++++
 cuda_bindings/tests/test_enum_explanations.py |  44 ++
 4 files changed, 955 insertions(+)
 create mode 100644 cuda_bindings/cuda/bindings/_utils/__init__.py
 create mode 100644 cuda_bindings/cuda/bindings/_utils/driver_cu_result_explanations.py
 create mode 100644 cuda_bindings/cuda/bindings/_utils/runtime_cuda_error_explanations.py
 create mode 100644 cuda_bindings/tests/test_enum_explanations.py

diff --git a/cuda_bindings/cuda/bindings/_utils/__init__.py b/cuda_bindings/cuda/bindings/_utils/__init__.py
new file mode 100644
index 0000000000..830c1bf0de
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/_utils/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
diff --git a/cuda_bindings/cuda/bindings/_utils/driver_cu_result_explanations.py b/cuda_bindings/cuda/bindings/_utils/driver_cu_result_explanations.py
new file mode 100644
index 0000000000..0b085520a6
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/_utils/driver_cu_result_explanations.py
@@ -0,0 +1,358 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+# To regenerate the dictionary below run:
+#     ../../../../../toolshed/reformat_cuda_enums_as_py.py /usr/local/cuda/include/cuda.h
+# Replace the dictionary below with the output.
+# Also update the CUDA Toolkit version number below.
+
+# CUDA Toolkit v13.2.0
+DRIVER_CU_RESULT_EXPLANATIONS = {
+    0: (
+        "The API call returned with no errors. In the case of query calls, this"
+        " also means that the operation being queried is complete (see"
+        " ::cuEventQuery() and ::cuStreamQuery())."
+    ),
+    1: (
+        "This indicates that one or more of the parameters passed to the API call"
+        " is not within an acceptable range of values."
+    ),
+    2: (
+        "The API call failed because it was unable to allocate enough memory or"
+        " other resources to perform the requested operation."
+    ),
+    3: (
+        "This indicates that the CUDA driver has not been initialized with"
+        " ::cuInit() or that initialization has failed."
+    ),
+    4: "This indicates that the CUDA driver is in the process of shutting down.",
+    5: (
+        "This indicates profiler is not initialized for this run. This can"
+        " happen when the application is running with external profiling tools"
+        " like visual profiler."
+    ),
+    6: (
+        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
+        " to attempt to enable/disable the profiling via ::cuProfilerStart or"
+        " ::cuProfilerStop without initialization."
+    ),
+    7: (
+        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
+        " to call cuProfilerStart() when profiling is already enabled."
+    ),
+    8: (
+        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
+        " to call cuProfilerStop() when profiling is already disabled."
+    ),
+    34: (
+        "This indicates that the CUDA driver that the application has loaded is a"
+        " stub library. Applications that run with the stub rather than a real"
+        " driver loaded will result in CUDA API returning this error."
+    ),
+    36: (
+        "This indicates that the API call requires a newer CUDA driver than the one"
+        " currently installed. Users should install an updated NVIDIA CUDA driver"
+        " to allow the API call to succeed."
+    ),
+    46: (
+        "This indicates that requested CUDA device is unavailable at the current"
+        " time. Devices are often unavailable due to use of"
+        " ::CU_COMPUTEMODE_EXCLUSIVE_PROCESS or ::CU_COMPUTEMODE_PROHIBITED."
+    ),
+    100: ("This indicates that no CUDA-capable devices were detected by the installed CUDA driver."),
+    101: (
+        "This indicates that the device ordinal supplied by the user does not"
+        " correspond to a valid CUDA device or that the action requested is"
+        " invalid for the specified device."
+    ),
+    102: "This error indicates that the Grid license is not applied.",
+    200: ("This indicates that the device kernel image is invalid. This can also indicate an invalid CUDA module."),
+    201: (
+        "This most frequently indicates that there is no context bound to the"
+        " current thread. This can also be returned if the context passed to an"
+        " API call is not a valid handle (such as a context that has had"
+        " ::cuCtxDestroy() invoked on it). This can also be returned if a user"
+        " mixes different API versions (i.e. 3010 context with 3020 API calls)."
+        " See ::cuCtxGetApiVersion() for more details."
+        " This can also be returned if the green context passed to an API call"
+        " was not converted to a ::CUcontext using ::cuCtxFromGreenCtx API."
+    ),
+    202: (
+        "This indicated that the context being supplied as a parameter to the"
+        " API call was already the active context."
+        " This error return is deprecated as of CUDA 3.2. It is no longer an"
+        " error to attempt to push the active context via ::cuCtxPushCurrent()."
+    ),
+    205: "This indicates that a map or register operation has failed.",
+    206: "This indicates that an unmap or unregister operation has failed.",
+    207: ("This indicates that the specified array is currently mapped and thus cannot be destroyed."),
+    208: "This indicates that the resource is already mapped.",
+    209: (
+        "This indicates that there is no kernel image available that is suitable"
+        " for the device. This can occur when a user specifies code generation"
+        " options for a particular CUDA source file that do not include the"
+        " corresponding device configuration."
+    ),
+    210: "This indicates that a resource has already been acquired.",
+    211: "This indicates that a resource is not mapped.",
+    212: ("This indicates that a mapped resource is not available for access as an array."),
+    213: ("This indicates that a mapped resource is not available for access as a pointer."),
+    214: ("This indicates that an uncorrectable ECC error was detected during execution."),
+    215: ("This indicates that the ::CUlimit passed to the API call is not supported by the active device."),
+    216: (
+        "This indicates that the ::CUcontext passed to the API call can"
+        " only be bound to a single CPU thread at a time but is already"
+        " bound to a CPU thread."
+    ),
+    217: ("This indicates that peer access is not supported across the given devices."),
+    218: "This indicates that a PTX JIT compilation failed.",
+    219: "This indicates an error with OpenGL or DirectX context.",
+    220: ("This indicates that an uncorrectable NVLink error was detected during the execution."),
+    221: "This indicates that the PTX JIT compiler library was not found.",
+    222: "This indicates that the provided PTX was compiled with an unsupported toolchain.",
+    223: "This indicates that the PTX JIT compilation was disabled.",
+    224: ("This indicates that the ::CUexecAffinityType passed to the API call is not supported by the active device."),
+    225: (
+        "This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize."
+    ),
+    226: (
+        "This indicates that an exception occurred on the device that is now"
+        " contained by the GPU's error containment capability. Common causes are -"
+        " a. Certain types of invalid accesses of peer GPU memory over nvlink"
+        " b. Certain classes of hardware errors"
+        " This leaves the process in an inconsistent state and any further CUDA"
+        " work will return the same error. To continue using CUDA, the process must"
+        " be terminated and relaunched."
+    ),
+    300: (
+        "This indicates that the device kernel source is invalid. This includes"
+        " compilation/linker errors encountered in device code or user error."
+    ),
+    301: "This indicates that the file specified was not found.",
+    302: "This indicates that a link to a shared object failed to resolve.",
+    303: "This indicates that initialization of a shared object failed.",
+    304: "This indicates that an OS call failed.",
+    400: (
+        "This indicates that a resource handle passed to the API call was not"
+        " valid. Resource handles are opaque types like ::CUstream and ::CUevent."
+    ),
+    401: (
+        "This indicates that a resource required by the API call is not in a"
+        " valid state to perform the requested operation."
+    ),
+    402: (
+        "This indicates an attempt was made to introspect an object in a way that"
+        " would discard semantically important information. This is either due to"
+        " the object using funtionality newer than the API version used to"
+        " introspect it or omission of optional return arguments."
+    ),
+    500: (
+        "This indicates that a named symbol was not found. Examples of symbols"
+        " are global/constant variable names, driver function names, texture names,"
+        " and surface names."
+    ),
+    600: (
+        "This indicates that asynchronous operations issued previously have not"
+        " completed yet. This result is not actually an error, but must be indicated"
+        " differently than ::CUDA_SUCCESS (which indicates completion). Calls that"
+        " may return this value include ::cuEventQuery() and ::cuStreamQuery()."
+    ),
+    700: (
+        "While executing a kernel, the device encountered a"
+        " load or store instruction on an invalid memory address."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    701: (
+        "This indicates that a launch did not occur because it did not have"
+        " appropriate resources. This error usually indicates that the user has"
+        " attempted to pass too many arguments to the device kernel, or the"
+        " kernel launch specifies too many threads for the kernel's register"
+        " count. Passing arguments of the wrong size (i.e. a 64-bit pointer"
+        " when a 32-bit int is expected) is equivalent to passing too many"
+        " arguments and can also result in this error."
+    ),
+    702: (
+        "This indicates that the device kernel took too long to execute. This can"
+        " only occur if timeouts are enabled - see the device attribute"
+        " ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    703: ("This error indicates a kernel launch that uses an incompatible texturing mode."),
+    704: (
+        "This error indicates that a call to ::cuCtxEnablePeerAccess() is"
+        " trying to re-enable peer access to a context which has already"
+        " had peer access to it enabled."
+    ),
+    705: (
+        "This error indicates that ::cuCtxDisablePeerAccess() is"
+        " trying to disable peer access which has not been enabled yet"
+        " via ::cuCtxEnablePeerAccess()."
+    ),
+    708: ("This error indicates that the primary context for the specified device has already been initialized."),
+    709: (
+        "This error indicates that the context current to the calling thread"
+        " has been destroyed using ::cuCtxDestroy, or is a primary context which"
+        " has not yet been initialized."
+    ),
+    710: (
+        "A device-side assert triggered during kernel execution. The context"
+        " cannot be used anymore, and must be destroyed. All existing device"
+        " memory allocations from this context are invalid and must be"
+        " reconstructed if the program is to continue using CUDA."
+    ),
+    711: (
+        "This error indicates that the hardware resources required to enable"
+        " peer access have been exhausted for one or more of the devices"
+        " passed to ::cuCtxEnablePeerAccess()."
+    ),
+    712: ("This error indicates that the memory range passed to ::cuMemHostRegister() has already been registered."),
+    713: (
+        "This error indicates that the pointer passed to ::cuMemHostUnregister()"
+        " does not correspond to any currently registered memory region."
+    ),
+    714: (
+        "While executing a kernel, the device encountered a stack error."
+        " This can be due to stack corruption or exceeding the stack size limit."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    715: (
+        "While executing a kernel, the device encountered an illegal instruction."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    716: (
+        "While executing a kernel, the device encountered a load or store instruction"
+        " on a memory address which is not aligned."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    717: (
+        "While executing a kernel, the device encountered an instruction"
+        " which can only operate on memory locations in certain address spaces"
+        " (global, shared, or local), but was supplied a memory address not"
+        " belonging to an allowed address space."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    718: (
+        "While executing a kernel, the device program counter wrapped its address space."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    719: (
+        "An exception occurred on the device while executing a kernel. Common"
+        " causes include dereferencing an invalid device pointer and accessing"
+        " out of bounds shared memory. Less common cases can be system specific - more"
+        " information about these cases can be found in the system specific user guide."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    720: (
+        "This error indicates that the number of blocks launched per grid for a kernel that was"
+        " launched via either ::cuLaunchCooperativeKernel or ::cuLaunchCooperativeKernelMultiDevice"
+        " exceeds the maximum number of blocks as allowed by ::cuOccupancyMaxActiveBlocksPerMultiprocessor"
+        " or ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors"
+        " as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT."
+    ),
+    721: (
+        "An exception occurred on the device while exiting a kernel using tensor memory: the"
+        " tensor memory was not completely deallocated. This leaves the process in an inconsistent"
+        " state and any further CUDA work will return the same error. To continue using CUDA, the"
+        " process must be terminated and relaunched."
+    ),
+    800: "This error indicates that the attempted operation is not permitted.",
+    801: ("This error indicates that the attempted operation is not supported on the current system or device."),
+    802: (
+        "This error indicates that the system is not yet ready to start any CUDA"
+        " work.  To continue using CUDA, verify the system configuration is in a"
+        " valid state and all required driver daemons are actively running."
+        " More information about this error can be found in the system specific"
+        " user guide."
+    ),
+    803: (
+        "This error indicates that there is a mismatch between the versions of"
+        " the display driver and the CUDA driver. Refer to the compatibility documentation"
+        " for supported versions."
+    ),
+    804: (
+        "This error indicates that the system was upgraded to run with forward compatibility"
+        " but the visible hardware detected by CUDA does not support this configuration."
+        " Refer to the compatibility documentation for the supported hardware matrix or ensure"
+        " that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES"
+        " environment variable."
+    ),
+    805: "This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.",
+    806: "This error indicates that the remote procedural call between the MPS server and the MPS client failed.",
+    807: (
+        "This error indicates that the MPS server is not ready to accept new MPS client requests."
+        " This error can be returned when the MPS server is in the process of recovering from a fatal failure."
+    ),
+    808: "This error indicates that the hardware resources required to create MPS client have been exhausted.",
+    809: "This error indicates the the hardware resources required to support device connections have been exhausted.",
+    810: "This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.",
+    811: "This error indicates that the module is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.",
+    812: "This error indicates that a module contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.",
+    900: ("This error indicates that the operation is not permitted when the stream is capturing."),
+    901: (
+        "This error indicates that the current capture sequence on the stream"
+        " has been invalidated due to a previous error."
+    ),
+    902: (
+        "This error indicates that the operation would have resulted in a merge of two independent capture sequences."
+    ),
+    903: "This error indicates that the capture was not initiated in this stream.",
+    904: ("This error indicates that the capture sequence contains a fork that was not joined to the primary stream."),
+    905: (
+        "This error indicates that a dependency would have been created which"
+        " crosses the capture sequence boundary. Only implicit in-stream ordering"
+        " dependencies are allowed to cross the boundary."
+    ),
+    906: ("This error indicates a disallowed implicit dependency on a current capture sequence from cudaStreamLegacy."),
+    907: (
+        "This error indicates that the operation is not permitted on an event which"
+        " was last recorded in a capturing stream."
+    ),
+    908: (
+        "A stream capture sequence not initiated with the ::CU_STREAM_CAPTURE_MODE_RELAXED"
+        " argument to ::cuStreamBeginCapture was passed to ::cuStreamEndCapture in a"
+        " different thread."
+    ),
+    909: "This error indicates that the timeout specified for the wait operation has lapsed.",
+    910: (
+        "This error indicates that the graph update was not performed because it included"
+        " changes which violated constraints specific to instantiated graph update."
+    ),
+    911: (
+        "This indicates that an error has occurred in a device outside of GPU. It can be a"
+        " synchronous error w.r.t. CUDA API or an asynchronous error from the external device."
+        " In case of asynchronous error, it means that if cuda was waiting for an external device's"
+        " signal before consuming shared data, the external device signaled an error indicating that"
+        " the data is not valid for consumption. This leaves the process in an inconsistent"
+        " state and any further CUDA work will return the same error. To continue using CUDA,"
+        " the process must be terminated and relaunched."
+        " In case of synchronous error, it means that one or more external devices"
+        " have encountered an error and cannot complete the operation."
+    ),
+    912: "Indicates a kernel launch error due to cluster misconfiguration.",
+    913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."),
+    914: ("This error indicates one or more resources passed in are not valid resource types for the operation."),
+    915: ("This error indicates one or more resources are insufficient or non-applicable for the operation."),
+    916: ("This error indicates that an error happened during the key rotation sequence."),
+    917: (
+        "This error indicates that the requested operation is not permitted because the"
+        " stream is in a detached state. This can occur if the green context associated"
+        " with the stream has been destroyed, limiting the stream's operational capabilities."
+    ),
+    999: "This indicates that an unknown internal error has occurred.",
+}
diff --git a/cuda_bindings/cuda/bindings/_utils/runtime_cuda_error_explanations.py b/cuda_bindings/cuda/bindings/_utils/runtime_cuda_error_explanations.py
new file mode 100644
index 0000000000..4421d50480
--- /dev/null
+++ b/cuda_bindings/cuda/bindings/_utils/runtime_cuda_error_explanations.py
@@ -0,0 +1,551 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+# To regenerate the dictionary below run:
+#     ../../../../../toolshed/reformat_cuda_enums_as_py.py /usr/local/cuda/include/driver_types.h
+# Replace the dictionary below with the output.
+# Also update the CUDA Toolkit version number below.
+
+# CUDA Toolkit v13.2.0
+RUNTIME_CUDA_ERROR_EXPLANATIONS = {
+    0: (
+        "The API call returned with no errors. In the case of query calls, this"
+        " also means that the operation being queried is complete (see"
+        " ::cudaEventQuery() and ::cudaStreamQuery())."
+    ),
+    1: (
+        "This indicates that one or more of the parameters passed to the API call"
+        " is not within an acceptable range of values."
+    ),
+    2: (
+        "The API call failed because it was unable to allocate enough memory or"
+        " other resources to perform the requested operation."
+    ),
+    3: ("The API call failed because the CUDA driver and runtime could not be initialized."),
+    4: (
+        "This indicates that a CUDA Runtime API call cannot be executed because"
+        " it is being called during process shut down, at a point in time after"
+        " CUDA driver has been unloaded."
+    ),
+    5: (
+        "This indicates profiler is not initialized for this run. This can"
+        " happen when the application is running with external profiling tools"
+        " like visual profiler."
+    ),
+    6: (
+        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
+        " to attempt to enable/disable the profiling via ::cudaProfilerStart or"
+        " ::cudaProfilerStop without initialization."
+    ),
+    7: (
+        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
+        " to call cudaProfilerStart() when profiling is already enabled."
+    ),
+    8: (
+        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
+        " to call cudaProfilerStop() when profiling is already disabled."
+    ),
+    9: (
+        "This indicates that a kernel launch is requesting resources that can"
+        " never be satisfied by the current device. Requesting more shared memory"
+        " per block than the device supports will trigger this error, as will"
+        " requesting too many threads or blocks. See ::cudaDeviceProp for more"
+        " device limitations."
+    ),
+    10: (
+        "This indicates that the driver is newer than the runtime version"
+        " and returned graph node parameter information that the runtime"
+        " does not understand and is unable to translate."
+    ),
+    12: (
+        "This indicates that one or more of the pitch-related parameters passed"
+        " to the API call is not within the acceptable range for pitch."
+    ),
+    13: ("This indicates that the symbol name/identifier passed to the API call is not a valid name or identifier."),
+    16: (
+        "This indicates that at least one host pointer passed to the API call is"
+        " not a valid host pointer."
+        " This error return is deprecated as of CUDA 10.1."
+    ),
+    17: (
+        "This indicates that at least one device pointer passed to the API call is"
+        " not a valid device pointer."
+        " This error return is deprecated as of CUDA 10.1."
+    ),
+    18: ("This indicates that the texture passed to the API call is not a valid texture."),
+    19: (
+        "This indicates that the texture binding is not valid. This occurs if you"
+        " call ::cudaGetTextureAlignmentOffset() with an unbound texture."
+    ),
+    20: (
+        "This indicates that the channel descriptor passed to the API call is not"
+        " valid. This occurs if the format is not one of the formats specified by"
+        " ::cudaChannelFormatKind, or if one of the dimensions is invalid."
+    ),
+    21: (
+        "This indicates that the direction of the memcpy passed to the API call is"
+        " not one of the types specified by ::cudaMemcpyKind."
+    ),
+    22: (
+        "This indicated that the user has taken the address of a constant variable,"
+        " which was forbidden up until the CUDA 3.1 release."
+        " This error return is deprecated as of CUDA 3.1. Variables in constant"
+        " memory may now have their address taken by the runtime via"
+        " ::cudaGetSymbolAddress()."
+    ),
+    23: (
+        "This indicated that a texture fetch was not able to be performed."
+        " This was previously used for device emulation of texture operations."
+        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
+        " removed with the CUDA 3.1 release."
+    ),
+    24: (
+        "This indicated that a texture was not bound for access."
+        " This was previously used for device emulation of texture operations."
+        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
+        " removed with the CUDA 3.1 release."
+    ),
+    25: (
+        "This indicated that a synchronization operation had failed."
+        " This was previously used for some device emulation functions."
+        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
+        " removed with the CUDA 3.1 release."
+    ),
+    26: (
+        "This indicates that a non-float texture was being accessed with linear"
+        " filtering. This is not supported by CUDA."
+    ),
+    27: (
+        "This indicates that an attempt was made to read an unsupported data type as a"
+        " normalized float. This is not supported by CUDA."
+    ),
+    28: (
+        "Mixing of device and device emulation code was not allowed."
+        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
+        " removed with the CUDA 3.1 release."
+    ),
+    31: (
+        "This indicates that the API call is not yet implemented. Production"
+        " releases of CUDA will never return this error."
+        " This error return is deprecated as of CUDA 4.1."
+    ),
+    32: (
+        "This indicated that an emulated device pointer exceeded the 32-bit address"
+        " range."
+        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
+        " removed with the CUDA 3.1 release."
+    ),
+    34: (
+        "This indicates that the CUDA driver that the application has loaded is a"
+        " stub library. Applications that run with the stub rather than a real"
+        " driver loaded will result in CUDA API returning this error."
+    ),
+    35: (
+        "This indicates that the installed NVIDIA CUDA driver is older than the"
+        " CUDA runtime library. This is not a supported configuration. Users should"
+        " install an updated NVIDIA display driver to allow the application to run."
+    ),
+    36: (
+        "This indicates that the API call requires a newer CUDA driver than the one"
+        " currently installed. Users should install an updated NVIDIA CUDA driver"
+        " to allow the API call to succeed."
+    ),
+    37: ("This indicates that the surface passed to the API call is not a valid surface."),
+    43: (
+        "This indicates that multiple global or constant variables (across separate"
+        " CUDA source files in the application) share the same string name."
+    ),
+    44: (
+        "This indicates that multiple textures (across separate CUDA source"
+        " files in the application) share the same string name."
+    ),
+    45: (
+        "This indicates that multiple surfaces (across separate CUDA source"
+        " files in the application) share the same string name."
+    ),
+    46: (
+        "This indicates that all CUDA devices are busy or unavailable at the current"
+        " time. Devices are often busy/unavailable due to use of"
+        " ::cudaComputeModeProhibited, ::cudaComputeModeExclusiveProcess, or when long"
+        " running CUDA kernels have filled up the GPU and are blocking new work"
+        " from starting. They can also be unavailable due to memory constraints"
+        " on a device that already has active CUDA work being performed."
+    ),
+    49: (
+        "This indicates that the current context is not compatible with this"
+        " the CUDA Runtime. This can only occur if you are using CUDA"
+        " Runtime/Driver interoperability and have created an existing Driver"
+        " context using the driver API. The Driver context may be incompatible"
+        " either because the Driver context was created using an older version"
+        " of the API, because the Runtime API call expects a primary driver"
+        " context and the Driver context is not primary, or because the Driver"
+        ' context has been destroyed. Please see CUDART_DRIVER "Interactions'
+        ' with the CUDA Driver API" for more information.'
+    ),
+    52: (
+        "The device function being invoked (usually via ::cudaLaunchKernel()) was not"
+        " previously configured via the ::cudaConfigureCall() function."
+    ),
+    53: (
+        "This indicated that a previous kernel launch failed. This was previously"
+        " used for device emulation of kernel launches."
+        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
+        " removed with the CUDA 3.1 release."
+    ),
+    65: (
+        "This error indicates that a device runtime grid launch did not occur"
+        " because the depth of the child grid would exceed the maximum supported"
+        " number of nested grid launches."
+    ),
+    66: (
+        "This error indicates that a grid launch did not occur because the kernel"
+        " uses file-scoped textures which are unsupported by the device runtime."
+        " Kernels launched via the device runtime only support textures created with"
+        " the Texture Object API's."
+    ),
+    67: (
+        "This error indicates that a grid launch did not occur because the kernel"
+        " uses file-scoped surfaces which are unsupported by the device runtime."
+        " Kernels launched via the device runtime only support surfaces created with"
+        " the Surface Object API's."
+    ),
+    68: (
+        "This error indicates that a call to ::cudaDeviceSynchronize made from"
+        " the device runtime failed because the call was made at grid depth greater"
+        " than than either the default (2 levels of grids) or user specified device"
+        " limit ::cudaLimitDevRuntimeSyncDepth. To be able to synchronize on"
+        " launched grids at a greater depth successfully, the maximum nested"
+        " depth at which ::cudaDeviceSynchronize will be called must be specified"
+        " with the ::cudaLimitDevRuntimeSyncDepth limit to the ::cudaDeviceSetLimit"
+        " api before the host-side launch of a kernel using the device runtime."
+        " Keep in mind that additional levels of sync depth require the runtime"
+        " to reserve large amounts of device memory that cannot be used for"
+        " user allocations. Note that ::cudaDeviceSynchronize made from device"
+        " runtime is only supported on devices of compute capability < 9.0."
+    ),
+    69: (
+        "This error indicates that a device runtime grid launch failed because"
+        " the launch would exceed the limit ::cudaLimitDevRuntimePendingLaunchCount."
+        " For this launch to proceed successfully, ::cudaDeviceSetLimit must be"
+        " called to set the ::cudaLimitDevRuntimePendingLaunchCount to be higher"
+        " than the upper bound of outstanding launches that can be issued to the"
+        " device runtime. Keep in mind that raising the limit of pending device"
+        " runtime launches will require the runtime to reserve device memory that"
+        " cannot be used for user allocations."
+    ),
+    98: ("The requested device function does not exist or is not compiled for the proper device architecture."),
+    100: ("This indicates that no CUDA-capable devices were detected by the installed CUDA driver."),
+    101: (
+        "This indicates that the device ordinal supplied by the user does not"
+        " correspond to a valid CUDA device or that the action requested is"
+        " invalid for the specified device."
+    ),
+    102: "This indicates that the device doesn't have a valid Grid License.",
+    103: (
+        "By default, the CUDA runtime may perform a minimal set of self-tests,"
+        " as well as CUDA driver tests, to establish the validity of both."
+        " Introduced in CUDA 11.2, this error return indicates that at least one"
+        " of these tests has failed and the validity of either the runtime"
+        " or the driver could not be established."
+    ),
+    127: "This indicates an internal startup failure in the CUDA runtime.",
+    200: "This indicates that the device kernel image is invalid.",
+    201: (
+        "This most frequently indicates that there is no context bound to the"
+        " current thread. This can also be returned if the context passed to an"
+        " API call is not a valid handle (such as a context that has had"
+        " ::cuCtxDestroy() invoked on it). This can also be returned if a user"
+        " mixes different API versions (i.e. 3010 context with 3020 API calls)."
+        " See ::cuCtxGetApiVersion() for more details."
+    ),
+    205: "This indicates that the buffer object could not be mapped.",
+    206: "This indicates that the buffer object could not be unmapped.",
+    207: ("This indicates that the specified array is currently mapped and thus cannot be destroyed."),
+    208: "This indicates that the resource is already mapped.",
+    209: (
+        "This indicates that there is no kernel image available that is suitable"
+        " for the device. This can occur when a user specifies code generation"
+        " options for a particular CUDA source file that do not include the"
+        " corresponding device configuration."
+    ),
+    210: "This indicates that a resource has already been acquired.",
+    211: "This indicates that a resource is not mapped.",
+    212: ("This indicates that a mapped resource is not available for access as an array."),
+    213: ("This indicates that a mapped resource is not available for access as a pointer."),
+    214: ("This indicates that an uncorrectable ECC error was detected during execution."),
+    215: ("This indicates that the ::cudaLimit passed to the API call is not supported by the active device."),
+    216: (
+        "This indicates that a call tried to access an exclusive-thread device that"
+        " is already in use by a different thread."
+    ),
+    217: ("This error indicates that P2P access is not supported across the given devices."),
+    218: (
+        "A PTX compilation failed. The runtime may fall back to compiling PTX if"
+        " an application does not contain a suitable binary for the current device."
+    ),
+    219: "This indicates an error with the OpenGL or DirectX context.",
+    220: ("This indicates that an uncorrectable NVLink error was detected during the execution."),
+    221: (
+        "This indicates that the PTX JIT compiler library was not found. The JIT Compiler"
+        " library is used for PTX compilation. The runtime may fall back to compiling PTX"
+        " if an application does not contain a suitable binary for the current device."
+    ),
+    222: (
+        "This indicates that the provided PTX was compiled with an unsupported toolchain."
+        " The most common reason for this, is the PTX was generated by a compiler newer"
+        " than what is supported by the CUDA driver and PTX JIT compiler."
+    ),
+    223: (
+        "This indicates that the JIT compilation was disabled. The JIT compilation compiles"
+        " PTX. The runtime may fall back to compiling PTX if an application does not contain"
+        " a suitable binary for the current device."
+    ),
+    224: "This indicates that the provided execution affinity is not supported by the device.",
+    225: (
+        "This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize."
+    ),
+    226: (
+        "This indicates that an exception occurred on the device that is now"
+        " contained by the GPU's error containment capability. Common causes are -"
+        " a. Certain types of invalid accesses of peer GPU memory over nvlink"
+        " b. Certain classes of hardware errors"
+        " This leaves the process in an inconsistent state and any further CUDA"
+        " work will return the same error. To continue using CUDA, the process must"
+        " be terminated and relaunched."
+    ),
+    300: "This indicates that the device kernel source is invalid.",
+    301: "This indicates that the file specified was not found.",
+    302: "This indicates that a link to a shared object failed to resolve.",
+    303: "This indicates that initialization of a shared object failed.",
+    304: "This error indicates that an OS call failed.",
+    400: (
+        "This indicates that a resource handle passed to the API call was not"
+        " valid. Resource handles are opaque types like ::cudaStream_t and"
+        " ::cudaEvent_t."
+    ),
+    401: (
+        "This indicates that a resource required by the API call is not in a"
+        " valid state to perform the requested operation."
+    ),
+    402: (
+        "This indicates an attempt was made to introspect an object in a way that"
+        " would discard semantically important information. This is either due to"
+        " the object using funtionality newer than the API version used to"
+        " introspect it or omission of optional return arguments."
+    ),
+    500: (
+        "This indicates that a named symbol was not found. Examples of symbols"
+        " are global/constant variable names, driver function names, texture names,"
+        " and surface names."
+    ),
+    600: (
+        "This indicates that asynchronous operations issued previously have not"
+        " completed yet. This result is not actually an error, but must be indicated"
+        " differently than ::cudaSuccess (which indicates completion). Calls that"
+        " may return this value include ::cudaEventQuery() and ::cudaStreamQuery()."
+    ),
+    700: (
+        "The device encountered a load or store instruction on an invalid memory address."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    701: (
+        "This indicates that a launch did not occur because it did not have"
+        " appropriate resources. Although this error is similar to"
+        " ::cudaErrorInvalidConfiguration, this error usually indicates that the"
+        " user has attempted to pass too many arguments to the device kernel, or the"
+        " kernel launch specifies too many threads for the kernel's register count."
+    ),
+    702: (
+        "This indicates that the device kernel took too long to execute. This can"
+        " only occur if timeouts are enabled - see the device attribute"
+        ' ::cudaDeviceAttr::cudaDevAttrKernelExecTimeout "cudaDevAttrKernelExecTimeout"'
+        " for more information."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    703: ("This error indicates a kernel launch that uses an incompatible texturing mode."),
+    704: (
+        "This error indicates that a call to ::cudaDeviceEnablePeerAccess() is"
+        " trying to re-enable peer addressing on from a context which has already"
+        " had peer addressing enabled."
+    ),
+    705: (
+        "This error indicates that ::cudaDeviceDisablePeerAccess() is trying to"
+        " disable peer addressing which has not been enabled yet via"
+        " ::cudaDeviceEnablePeerAccess()."
+    ),
+    708: (
+        "This indicates that the user has called ::cudaSetValidDevices(),"
+        " ::cudaSetDeviceFlags(), ::cudaD3D9SetDirect3DDevice(),"
+        " ::cudaD3D10SetDirect3DDevice, ::cudaD3D11SetDirect3DDevice(), or"
+        " ::cudaVDPAUSetVDPAUDevice() after initializing the CUDA runtime by"
+        " calling non-device management operations (allocating memory and"
+        " launching kernels are examples of non-device management operations)."
+        " This error can also be returned if using runtime/driver"
+        " interoperability and there is an existing ::CUcontext active on the"
+        " host thread."
+    ),
+    709: (
+        "This error indicates that the context current to the calling thread"
+        " has been destroyed using ::cuCtxDestroy, or is a primary context which"
+        " has not yet been initialized."
+    ),
+    710: (
+        "An assert triggered in device code during kernel execution. The device"
+        " cannot be used again. All existing allocations are invalid. To continue"
+        " using CUDA, the process must be terminated and relaunched."
+    ),
+    711: (
+        "This error indicates that the hardware resources required to enable"
+        " peer access have been exhausted for one or more of the devices"
+        " passed to ::cudaEnablePeerAccess()."
+    ),
+    712: ("This error indicates that the memory range passed to ::cudaHostRegister() has already been registered."),
+    713: (
+        "This error indicates that the pointer passed to ::cudaHostUnregister()"
+        " does not correspond to any currently registered memory region."
+    ),
+    714: (
+        "Device encountered an error in the call stack during kernel execution,"
+        " possibly due to stack corruption or exceeding the stack size limit."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    715: (
+        "The device encountered an illegal instruction during kernel execution"
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    716: (
+        "The device encountered a load or store instruction"
+        " on a memory address which is not aligned."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    717: (
+        "While executing a kernel, the device encountered an instruction"
+        " which can only operate on memory locations in certain address spaces"
+        " (global, shared, or local), but was supplied a memory address not"
+        " belonging to an allowed address space."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    718: (
+        "The device encountered an invalid program counter."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    719: (
+        "An exception occurred on the device while executing a kernel. Common"
+        " causes include dereferencing an invalid device pointer and accessing"
+        " out of bounds shared memory. Less common cases can be system specific - more"
+        " information about these cases can be found in the system specific user guide."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    720: (
+        "This error indicates that the number of blocks launched per grid for a kernel that was"
+        " launched via either ::cudaLaunchCooperativeKernel"
+        " exceeds the maximum number of blocks as allowed by ::cudaOccupancyMaxActiveBlocksPerMultiprocessor"
+        " or ::cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors"
+        " as specified by the device attribute ::cudaDevAttrMultiProcessorCount."
+    ),
+    721: (
+        "An exception occurred on the device while exiting a kernel using tensor memory: the"
+        " tensor memory was not completely deallocated. This leaves the process in an inconsistent"
+        " state and any further CUDA work will return the same error. To continue using CUDA, the"
+        " process must be terminated and relaunched."
+    ),
+    800: "This error indicates the attempted operation is not permitted.",
+    801: ("This error indicates the attempted operation is not supported on the current system or device."),
+    802: (
+        "This error indicates that the system is not yet ready to start any CUDA"
+        " work.  To continue using CUDA, verify the system configuration is in a"
+        " valid state and all required driver daemons are actively running."
+        " More information about this error can be found in the system specific"
+        " user guide."
+    ),
+    803: (
+        "This error indicates that there is a mismatch between the versions of"
+        " the display driver and the CUDA driver. Refer to the compatibility documentation"
+        " for supported versions."
+    ),
+    804: (
+        "This error indicates that the system was upgraded to run with forward compatibility"
+        " but the visible hardware detected by CUDA does not support this configuration."
+        " Refer to the compatibility documentation for the supported hardware matrix or ensure"
+        " that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES"
+        " environment variable."
+    ),
+    805: "This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.",
+    806: "This error indicates that the remote procedural call between the MPS server and the MPS client failed.",
+    807: (
+        "This error indicates that the MPS server is not ready to accept new MPS client requests."
+        " This error can be returned when the MPS server is in the process of recovering from a fatal failure."
+    ),
+    808: "This error indicates that the hardware resources required to create MPS client have been exhausted.",
+    809: "This error indicates the the hardware resources required to device connections have been exhausted.",
+    810: "This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.",
+    811: "This error indicates, that the program is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.",
+    812: "This error indicates, that the program contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.",
+    900: "The operation is not permitted when the stream is capturing.",
+    901: ("The current capture sequence on the stream has been invalidated due to a previous error."),
+    902: ("The operation would have resulted in a merge of two independent capture sequences."),
+    903: "The capture was not initiated in this stream.",
+    904: ("The capture sequence contains a fork that was not joined to the primary stream."),
+    905: (
+        "A dependency would have been created which crosses the capture sequence"
+        " boundary. Only implicit in-stream ordering dependencies are allowed to"
+        " cross the boundary."
+    ),
+    906: (
+        "The operation would have resulted in a disallowed implicit dependency on"
+        " a current capture sequence from cudaStreamLegacy."
+    ),
+    907: ("The operation is not permitted on an event which was last recorded in a capturing stream."),
+    908: (
+        "A stream capture sequence not initiated with the ::cudaStreamCaptureModeRelaxed"
+        " argument to ::cudaStreamBeginCapture was passed to ::cudaStreamEndCapture in a"
+        " different thread."
+    ),
+    909: "This indicates that the wait operation has timed out.",
+    910: (
+        "This error indicates that the graph update was not performed because it included"
+        " changes which violated constraints specific to instantiated graph update."
+    ),
+    911: (
+        "This indicates that an error has occurred in a device outside of GPU. It can be a"
+        " synchronous error w.r.t. CUDA API or an asynchronous error from the external device."
+        " In case of asynchronous error, it means that if cuda was waiting for an external device's"
+        " signal before consuming shared data, the external device signaled an error indicating that"
+        " the data is not valid for consumption. This leaves the process in an inconsistent"
+        " state and any further CUDA work will return the same error. To continue using CUDA,"
+        " the process must be terminated and relaunched."
+        " In case of synchronous error, it means that one or more external devices"
+        " have encountered an error and cannot complete the operation."
+    ),
+    912: ("This indicates that a kernel launch error has occurred due to cluster misconfiguration."),
+    913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."),
+    914: ("This error indicates one or more resources passed in are not valid resource types for the operation."),
+    915: ("This error indicates one or more resources are insufficient or non-applicable for the operation."),
+    917: (
+        "This error indicates that the requested operation is not permitted because the"
+        " stream is in a detached state. This can occur if the green context associated"
+        " with the stream has been destroyed, limiting the stream's operational capabilities."
+    ),
+    999: "This indicates that an unknown internal error has occurred.",
+    10000: (
+        "Any unhandled CUDA driver error is added to this value and returned via"
+        " the runtime. Production releases of CUDA should not return such errors."
+        " This error return is deprecated as of CUDA 4.1."
+    ),
+}
diff --git a/cuda_bindings/tests/test_enum_explanations.py b/cuda_bindings/tests/test_enum_explanations.py
new file mode 100644
index 0000000000..910615d99d
--- /dev/null
+++ b/cuda_bindings/tests/test_enum_explanations.py
@@ -0,0 +1,44 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+import importlib.metadata
+
+from cuda.bindings import driver, runtime
+from cuda.bindings._utils.driver_cu_result_explanations import DRIVER_CU_RESULT_EXPLANATIONS
+from cuda.bindings._utils.runtime_cuda_error_explanations import RUNTIME_CUDA_ERROR_EXPLANATIONS
+
+
+def _get_binding_version():
+    try:
+        major_minor = importlib.metadata.version("cuda-bindings").split(".")[:2]
+    except importlib.metadata.PackageNotFoundError:
+        major_minor = importlib.metadata.version("cuda-python").split(".")[:2]
+    return tuple(int(v) for v in major_minor)
+
+
+def test_driver_cu_result_explanations_health():
+    expl_dict = DRIVER_CU_RESULT_EXPLANATIONS
+
+    known_codes = set()
+    for error in driver.CUresult:
+        code = int(error)
+        assert code in expl_dict
+        known_codes.add(code)
+
+    if _get_binding_version() >= (13, 0):
+        extra_expl = sorted(set(expl_dict.keys()) - known_codes)
+        assert not extra_expl
+
+
+def test_runtime_cuda_error_explanations_health():
+    expl_dict = RUNTIME_CUDA_ERROR_EXPLANATIONS
+
+    known_codes = set()
+    for error in runtime.cudaError_t:
+        code = int(error)
+        assert code in expl_dict
+        known_codes.add(code)
+
+    if _get_binding_version() >= (13, 0):
+        extra_expl = sorted(set(expl_dict.keys()) - known_codes)
+        assert not extra_expl

From f35670ae9271d5665caa8a9a3340ed3cfc86cbfb Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Sun, 22 Mar 2026 20:47:04 -0700
Subject: [PATCH 02/15] Remove enum explanation health tests from cuda_core
 (#1712)

These tests now live in cuda_bindings/tests/test_enum_explanations.py,
where they belong alongside the explanation dicts they verify.

Made-with: Cursor
---
 cuda_core/tests/test_cuda_utils.py | 32 ------------------------------
 1 file changed, 32 deletions(-)

diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py
index 04670b96f2..59b6369815 100644
--- a/cuda_core/tests/test_cuda_utils.py
+++ b/cuda_core/tests/test_cuda_utils.py
@@ -11,38 +11,6 @@
 from cuda.core._utils.clear_error_support import assert_type_str_or_bytes_like, raise_code_path_meant_to_be_unreachable
 
 
-def test_driver_cu_result_explanations_health():
-    expl_dict = cuda_utils.DRIVER_CU_RESULT_EXPLANATIONS
-
-    # Ensure all CUresult enums are in expl_dict
-    known_codes = set()
-    for error in driver.CUresult:
-        code = int(error)
-        assert code in expl_dict
-        known_codes.add(code)
-
-    if cuda_utils.get_binding_version() >= (13, 0):
-        # Ensure expl_dict has no codes not known as a CUresult enum
-        extra_expl = sorted(set(expl_dict.keys()) - known_codes)
-        assert not extra_expl
-
-
-def test_runtime_cuda_error_explanations_health():
-    expl_dict = cuda_utils.RUNTIME_CUDA_ERROR_EXPLANATIONS
-
-    # Ensure all cudaError_t enums are in expl_dict
-    known_codes = set()
-    for error in runtime.cudaError_t:
-        code = int(error)
-        assert code in expl_dict
-        known_codes.add(code)
-
-    if cuda_utils.get_binding_version() >= (13, 0):
-        # Ensure expl_dict has no codes not known as a cudaError_t enum
-        extra_expl = sorted(set(expl_dict.keys()) - known_codes)
-        assert not extra_expl
-
-
 def test_check_driver_error():
     num_unexpected = 0
     for error in driver.CUresult:

From d6b3e788ca8fe7fab6c9e03c46b4d4744e8f7639 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Sun, 22 Mar 2026 21:15:34 -0700
Subject: [PATCH 03/15] cuda_core: prefer enum explanations from cuda.bindings,
 keep local fallback (#1712)

Each explanation module now tries to import the authoritative dict from
cuda.bindings._utils (ModuleNotFoundError-guarded) and falls back to its
own copy for older cuda-bindings that don't ship it yet. Smoke tests
added for both dicts.

Made-with: Cursor
---
 .../core/_utils/driver_cu_result_explanations.py   | 14 +++++++++-----
 .../core/_utils/runtime_cuda_error_explanations.py | 14 +++++++++-----
 cuda_core/tests/test_cuda_utils.py                 | 14 ++++++++++++++
 3 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py b/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py
index 0b085520a6..189a6dd580 100644
--- a/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py
+++ b/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py
@@ -1,11 +1,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# To regenerate the dictionary below run:
-#     ../../../../../toolshed/reformat_cuda_enums_as_py.py /usr/local/cuda/include/cuda.h
-# Replace the dictionary below with the output.
-# Also update the CUDA Toolkit version number below.
-
+# Fallback copy -- overridden from cuda.bindings below when available.
 # CUDA Toolkit v13.2.0
 DRIVER_CU_RESULT_EXPLANATIONS = {
     0: (
@@ -356,3 +352,11 @@
     ),
     999: "This indicates that an unknown internal error has occurred.",
 }
+
+# Prefer the authoritative copy from cuda.bindings when available.
+try:
+    import cuda.bindings._utils.driver_cu_result_explanations as _authoritative
+except ModuleNotFoundError:
+    pass
+else:
+    DRIVER_CU_RESULT_EXPLANATIONS = _authoritative.DRIVER_CU_RESULT_EXPLANATIONS
diff --git a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py
index 4421d50480..8d44d60671 100644
--- a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py
+++ b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py
@@ -1,11 +1,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# To regenerate the dictionary below run:
-#     ../../../../../toolshed/reformat_cuda_enums_as_py.py /usr/local/cuda/include/driver_types.h
-# Replace the dictionary below with the output.
-# Also update the CUDA Toolkit version number below.
-
+# Fallback copy -- overridden from cuda.bindings below when available.
 # CUDA Toolkit v13.2.0
 RUNTIME_CUDA_ERROR_EXPLANATIONS = {
     0: (
@@ -549,3 +545,11 @@
         " This error return is deprecated as of CUDA 4.1."
     ),
 }
+
+# Prefer the authoritative copy from cuda.bindings when available.
+try:
+    import cuda.bindings._utils.runtime_cuda_error_explanations as _authoritative
+except ModuleNotFoundError:
+    pass
+else:
+    RUNTIME_CUDA_ERROR_EXPLANATIONS = _authoritative.RUNTIME_CUDA_ERROR_EXPLANATIONS
diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py
index 59b6369815..ab8095ed44 100644
--- a/cuda_core/tests/test_cuda_utils.py
+++ b/cuda_core/tests/test_cuda_utils.py
@@ -11,6 +11,20 @@
 from cuda.core._utils.clear_error_support import assert_type_str_or_bytes_like, raise_code_path_meant_to_be_unreachable
 
 
+def test_driver_cu_result_explanations_smoke():
+    expl = cuda_utils.DRIVER_CU_RESULT_EXPLANATIONS
+    for code in (0, 1, 2):
+        assert code in expl
+        assert isinstance(expl[code], str)
+
+
+def test_runtime_cuda_error_explanations_smoke():
+    expl = cuda_utils.RUNTIME_CUDA_ERROR_EXPLANATIONS
+    for code in (0, 1, 2):
+        assert code in expl
+        assert isinstance(expl[code], str)
+
+
 def test_check_driver_error():
     num_unexpected = 0
     for error in driver.CUresult:

From b54ec837592ede44a6506715c0dcce3582663e96 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Sun, 22 Mar 2026 22:11:28 -0700
Subject: [PATCH 04/15] Add _CTK_MAJOR_MINOR_PATCH version guards and DRY up
 explanation tests (#1712)

Rename explanation dicts to _EXPLANATIONS / _FALLBACK_EXPLANATIONS,
add _CTK_MAJOR_MINOR_PATCH to each module, and enforce that the
cuda_core fallback copy is as new as (and in-sync with) cuda_bindings.
Parametrize the smoke and version-check tests to cover both driver and
runtime without duplication.

Made-with: Cursor
---
 .../_utils/driver_cu_result_explanations.py   |  5 ++-
 .../_utils/runtime_cuda_error_explanations.py |  5 ++-
 cuda_bindings/tests/test_enum_explanations.py |  7 ++--
 .../_utils/driver_cu_result_explanations.py   |  9 ++--
 .../_utils/runtime_cuda_error_explanations.py |  9 ++--
 cuda_core/tests/test_cuda_utils.py            | 41 +++++++++++++++----
 6 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/cuda_bindings/cuda/bindings/_utils/driver_cu_result_explanations.py b/cuda_bindings/cuda/bindings/_utils/driver_cu_result_explanations.py
index 0b085520a6..afe576ce68 100644
--- a/cuda_bindings/cuda/bindings/_utils/driver_cu_result_explanations.py
+++ b/cuda_bindings/cuda/bindings/_utils/driver_cu_result_explanations.py
@@ -6,8 +6,9 @@
 # Replace the dictionary below with the output.
 # Also update the CUDA Toolkit version number below.
 
-# CUDA Toolkit v13.2.0
-DRIVER_CU_RESULT_EXPLANATIONS = {
+_CTK_MAJOR_MINOR_PATCH = (13, 2, 0)
+
+_EXPLANATIONS = {
     0: (
         "The API call returned with no errors. In the case of query calls, this"
         " also means that the operation being queried is complete (see"
diff --git a/cuda_bindings/cuda/bindings/_utils/runtime_cuda_error_explanations.py b/cuda_bindings/cuda/bindings/_utils/runtime_cuda_error_explanations.py
index 4421d50480..cb4e3ec4cc 100644
--- a/cuda_bindings/cuda/bindings/_utils/runtime_cuda_error_explanations.py
+++ b/cuda_bindings/cuda/bindings/_utils/runtime_cuda_error_explanations.py
@@ -6,8 +6,9 @@
 # Replace the dictionary below with the output.
 # Also update the CUDA Toolkit version number below.
 
-# CUDA Toolkit v13.2.0
-RUNTIME_CUDA_ERROR_EXPLANATIONS = {
+_CTK_MAJOR_MINOR_PATCH = (13, 2, 0)
+
+_EXPLANATIONS = {
     0: (
         "The API call returned with no errors. In the case of query calls, this"
         " also means that the operation being queried is complete (see"
diff --git a/cuda_bindings/tests/test_enum_explanations.py b/cuda_bindings/tests/test_enum_explanations.py
index 910615d99d..36f266dab8 100644
--- a/cuda_bindings/tests/test_enum_explanations.py
+++ b/cuda_bindings/tests/test_enum_explanations.py
@@ -4,8 +4,7 @@
 import importlib.metadata
 
 from cuda.bindings import driver, runtime
-from cuda.bindings._utils.driver_cu_result_explanations import DRIVER_CU_RESULT_EXPLANATIONS
-from cuda.bindings._utils.runtime_cuda_error_explanations import RUNTIME_CUDA_ERROR_EXPLANATIONS
+from cuda.bindings._utils import driver_cu_result_explanations, runtime_cuda_error_explanations
 
 
 def _get_binding_version():
@@ -17,7 +16,7 @@ def _get_binding_version():
 
 
 def test_driver_cu_result_explanations_health():
-    expl_dict = DRIVER_CU_RESULT_EXPLANATIONS
+    expl_dict = driver_cu_result_explanations._EXPLANATIONS
 
     known_codes = set()
     for error in driver.CUresult:
@@ -31,7 +30,7 @@ def test_driver_cu_result_explanations_health():
 
 
 def test_runtime_cuda_error_explanations_health():
-    expl_dict = RUNTIME_CUDA_ERROR_EXPLANATIONS
+    expl_dict = runtime_cuda_error_explanations._EXPLANATIONS
 
     known_codes = set()
     for error in runtime.cudaError_t:
diff --git a/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py b/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py
index 189a6dd580..f713d42be7 100644
--- a/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py
+++ b/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py
@@ -2,8 +2,9 @@
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 # Fallback copy -- overridden from cuda.bindings below when available.
-# CUDA Toolkit v13.2.0
-DRIVER_CU_RESULT_EXPLANATIONS = {
+_CTK_MAJOR_MINOR_PATCH = (13, 2, 0)
+
+_FALLBACK_EXPLANATIONS = {
     0: (
         "The API call returned with no errors. In the case of query calls, this"
         " also means that the operation being queried is complete (see"
@@ -353,10 +354,12 @@
     999: "This indicates that an unknown internal error has occurred.",
 }
 
+DRIVER_CU_RESULT_EXPLANATIONS = _FALLBACK_EXPLANATIONS
+
 # Prefer the authoritative copy from cuda.bindings when available.
 try:
     import cuda.bindings._utils.driver_cu_result_explanations as _authoritative
 except ModuleNotFoundError:
     pass
 else:
-    DRIVER_CU_RESULT_EXPLANATIONS = _authoritative.DRIVER_CU_RESULT_EXPLANATIONS
+    DRIVER_CU_RESULT_EXPLANATIONS = _authoritative._EXPLANATIONS
diff --git a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py
index 8d44d60671..d2a2205ae2 100644
--- a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py
+++ b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py
@@ -2,8 +2,9 @@
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 # Fallback copy -- overridden from cuda.bindings below when available.
-# CUDA Toolkit v13.2.0
-RUNTIME_CUDA_ERROR_EXPLANATIONS = {
+_CTK_MAJOR_MINOR_PATCH = (13, 2, 0)
+
+_FALLBACK_EXPLANATIONS = {
     0: (
         "The API call returned with no errors. In the case of query calls, this"
         " also means that the operation being queried is complete (see"
@@ -546,10 +547,12 @@
     ),
 }
 
+RUNTIME_CUDA_ERROR_EXPLANATIONS = _FALLBACK_EXPLANATIONS
+
 # Prefer the authoritative copy from cuda.bindings when available.
 try:
     import cuda.bindings._utils.runtime_cuda_error_explanations as _authoritative
 except ModuleNotFoundError:
     pass
 else:
-    RUNTIME_CUDA_ERROR_EXPLANATIONS = _authoritative.RUNTIME_CUDA_ERROR_EXPLANATIONS
+    RUNTIME_CUDA_ERROR_EXPLANATIONS = _authoritative._EXPLANATIONS
diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py
index ab8095ed44..01a86d17e9 100644
--- a/cuda_core/tests/test_cuda_utils.py
+++ b/cuda_core/tests/test_cuda_utils.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 import dataclasses
+import importlib
 
 import pytest
 
@@ -10,19 +11,45 @@
 from cuda.core._utils import cuda_utils
 from cuda.core._utils.clear_error_support import assert_type_str_or_bytes_like, raise_code_path_meant_to_be_unreachable
 
+_EXPLANATION_MODULES = [
+    ("driver_cu_result_explanations", "DRIVER_CU_RESULT_EXPLANATIONS"),
+    ("runtime_cuda_error_explanations", "RUNTIME_CUDA_ERROR_EXPLANATIONS"),
+]
 
-def test_driver_cu_result_explanations_smoke():
-    expl = cuda_utils.DRIVER_CU_RESULT_EXPLANATIONS
+
+@pytest.mark.parametrize("module_name,public_name", _EXPLANATION_MODULES)
+def test_explanations_smoke(module_name, public_name):
+    expl = getattr(cuda_utils, public_name)
     for code in (0, 1, 2):
         assert code in expl
         assert isinstance(expl[code], str)
 
 
-def test_runtime_cuda_error_explanations_smoke():
-    expl = cuda_utils.RUNTIME_CUDA_ERROR_EXPLANATIONS
-    for code in (0, 1, 2):
-        assert code in expl
-        assert isinstance(expl[code], str)
+@pytest.mark.parametrize("module_name,public_name", _EXPLANATION_MODULES)
+def test_explanations_ctk_version(module_name, public_name):
+    del public_name  # unused
+    core_mod = importlib.import_module(f"cuda.core._utils.{module_name}")
+    try:
+        bindings_mod = importlib.import_module(f"cuda.bindings._utils.{module_name}")
+    except ModuleNotFoundError:
+        pytest.skip("cuda.bindings._utils not available")
+    bindings_path = f"cuda_bindings/cuda/bindings/_utils/{module_name}.py"
+    core_path = f"cuda_core/cuda/core/_utils/{module_name}.py"
+    if core_mod._CTK_MAJOR_MINOR_PATCH < bindings_mod._CTK_MAJOR_MINOR_PATCH:
+        raise RuntimeError(
+            f"cuda_core copy is older ({core_mod._CTK_MAJOR_MINOR_PATCH})"
+            f" than cuda_bindings ({bindings_mod._CTK_MAJOR_MINOR_PATCH})."
+            f" Please copy the _EXPLANATIONS dict from {bindings_path} to {core_path}"
+        )
+    if (
+        core_mod._CTK_MAJOR_MINOR_PATCH == bindings_mod._CTK_MAJOR_MINOR_PATCH
+        and core_mod._FALLBACK_EXPLANATIONS != bindings_mod._EXPLANATIONS
+    ):
+        raise RuntimeError(
+            f"The cuda_core copy of the cuda_bindings _EXPLANATIONS dict is out of sync"
+            f" (both at CTK {core_mod._CTK_MAJOR_MINOR_PATCH})."
+            f" Please copy the _EXPLANATIONS dict from {bindings_path} to {core_path}"
+        )
 
 
 def test_check_driver_error():

From 3f6c30f863fcd38fcc79bf21c3d115ffe875ebfe Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Sun, 22 Mar 2026 22:17:40 -0700
Subject: [PATCH 05/15] Clean up test code: parametrize bindings health tests,
 drop no-op f-string (#1712)

Made-with: Cursor
---
 cuda_bindings/tests/test_enum_explanations.py | 31 ++++++++-----------
 cuda_core/tests/test_cuda_utils.py            |  2 +-
 2 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/cuda_bindings/tests/test_enum_explanations.py b/cuda_bindings/tests/test_enum_explanations.py
index 36f266dab8..193c4f86d2 100644
--- a/cuda_bindings/tests/test_enum_explanations.py
+++ b/cuda_bindings/tests/test_enum_explanations.py
@@ -1,10 +1,17 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
+import importlib
 import importlib.metadata
 
+import pytest
+
 from cuda.bindings import driver, runtime
-from cuda.bindings._utils import driver_cu_result_explanations, runtime_cuda_error_explanations
+
+_EXPLANATION_MODULES = [
+    ("driver_cu_result_explanations", driver.CUresult),
+    ("runtime_cuda_error_explanations", runtime.cudaError_t),
+]
 
 
 def _get_binding_version():
@@ -15,25 +22,13 @@ def _get_binding_version():
     return tuple(int(v) for v in major_minor)
 
 
-def test_driver_cu_result_explanations_health():
-    expl_dict = driver_cu_result_explanations._EXPLANATIONS
-
-    known_codes = set()
-    for error in driver.CUresult:
-        code = int(error)
-        assert code in expl_dict
-        known_codes.add(code)
-
-    if _get_binding_version() >= (13, 0):
-        extra_expl = sorted(set(expl_dict.keys()) - known_codes)
-        assert not extra_expl
-
-
-def test_runtime_cuda_error_explanations_health():
-    expl_dict = runtime_cuda_error_explanations._EXPLANATIONS
+@pytest.mark.parametrize("module_name,enum_type", _EXPLANATION_MODULES)
+def test_explanations_health(module_name, enum_type):
+    mod = importlib.import_module(f"cuda.bindings._utils.{module_name}")
+    expl_dict = mod._EXPLANATIONS
 
     known_codes = set()
-    for error in runtime.cudaError_t:
+    for error in enum_type:
         code = int(error)
         assert code in expl_dict
         known_codes.add(code)
diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py
index 01a86d17e9..a0d072aca6 100644
--- a/cuda_core/tests/test_cuda_utils.py
+++ b/cuda_core/tests/test_cuda_utils.py
@@ -46,7 +46,7 @@ def test_explanations_ctk_version(module_name, public_name):
         and core_mod._FALLBACK_EXPLANATIONS != bindings_mod._EXPLANATIONS
     ):
         raise RuntimeError(
-            f"The cuda_core copy of the cuda_bindings _EXPLANATIONS dict is out of sync"
+            "The cuda_core copy of the cuda_bindings _EXPLANATIONS dict is out of sync"
             f" (both at CTK {core_mod._CTK_MAJOR_MINOR_PATCH})."
             f" Please copy the _EXPLANATIONS dict from {bindings_path} to {core_path}"
         )

From b1645c0c2c6fa8009a0769561a527492b378b9bf Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Sun, 22 Mar 2026 23:12:40 -0700
Subject: [PATCH 06/15] Update pathfinder descriptor catalogs for cusparseLt
 release 0.9.0

---
 .../cuda/pathfinder/_dynamic_libs/descriptor_catalog.py       | 4 ++--
 .../cuda/pathfinder/_headers/header_descriptor_catalog.py     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/descriptor_catalog.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/descriptor_catalog.py
index cdd2a8b12b..69bb223a3d 100644
--- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/descriptor_catalog.py
+++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/descriptor_catalog.py
@@ -331,8 +331,8 @@ class DescriptorSpec:
         packaged_with="other",
         linux_sonames=("libcusparseLt.so.0",),
         windows_dlls=("cusparseLt.dll",),
-        site_packages_linux=("nvidia/cusparselt/lib",),
-        site_packages_windows=("nvidia/cusparselt/bin",),
+        site_packages_linux=("nvidia/cu13/lib", "nvidia/cusparselt/lib"),
+        site_packages_windows=("nvidia/cu13/bin/x64", "nvidia/cusparselt/bin"),
     ),
     DescriptorSpec(
         name="cutensor",
diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/header_descriptor_catalog.py b/cuda_pathfinder/cuda/pathfinder/_headers/header_descriptor_catalog.py
index 32b222b8fd..a46830e4ed 100644
--- a/cuda_pathfinder/cuda/pathfinder/_headers/header_descriptor_catalog.py
+++ b/cuda_pathfinder/cuda/pathfinder/_headers/header_descriptor_catalog.py
@@ -141,7 +141,7 @@ class HeaderDescriptorSpec:
         name="cusparseLt",
         packaged_with="other",
         header_basename="cusparseLt.h",
-        site_packages_dirs=("nvidia/cusparselt/include",),
+        site_packages_dirs=("nvidia/cu13/include", "nvidia/cusparselt/include"),
         conda_targets_layout=False,
         use_ctk_root_canary=False,
     ),

From 6fc77b71365c77bfcef25179e0c6e61eb55fe757 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Wed, 25 Mar 2026 17:08:30 -0700
Subject: [PATCH 07/15] Delete cuda_core fallback explanation copies, import
 from cuda.bindings (#1712)

Remove the vendored explanation dicts from cuda_core. cuda_utils.pyx now
imports directly from cuda.bindings._utils with a ModuleNotFoundError
fallback to an empty dict, so error messages gracefully degrade when
paired with older cuda-bindings that don't ship the dicts.

Made-with: Cursor
---
 cuda_core/cuda/core/_utils/cuda_utils.pyx     |  15 +-
 .../_utils/driver_cu_result_explanations.py   | 365 ------------
 .../_utils/runtime_cuda_error_explanations.py | 558 ------------------
 cuda_core/tests/test_cuda_utils.py            |  41 --
 4 files changed, 13 insertions(+), 966 deletions(-)
 delete mode 100644 cuda_core/cuda/core/_utils/driver_cu_result_explanations.py
 delete mode 100644 cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py

diff --git a/cuda_core/cuda/core/_utils/cuda_utils.pyx b/cuda_core/cuda/core/_utils/cuda_utils.pyx
index ec6c587f3f..3593d85ff4 100644
--- a/cuda_core/cuda/core/_utils/cuda_utils.pyx
+++ b/cuda_core/cuda/core/_utils/cuda_utils.pyx
@@ -27,8 +27,19 @@ from cpython.buffer cimport PyObject_GetBuffer, PyBuffer_Release, Py_buffer, PyB
 
 from cuda.bindings cimport cynvrtc, cynvvm, cynvjitlink
 
-from cuda.core._utils.driver_cu_result_explanations import DRIVER_CU_RESULT_EXPLANATIONS
-from cuda.core._utils.runtime_cuda_error_explanations import RUNTIME_CUDA_ERROR_EXPLANATIONS
+try:
+    from cuda.bindings._utils.driver_cu_result_explanations import (
+        _EXPLANATIONS as DRIVER_CU_RESULT_EXPLANATIONS,
+    )
+except ModuleNotFoundError:
+    DRIVER_CU_RESULT_EXPLANATIONS = {}
+
+try:
+    from cuda.bindings._utils.runtime_cuda_error_explanations import (
+        _EXPLANATIONS as RUNTIME_CUDA_ERROR_EXPLANATIONS,
+    )
+except ModuleNotFoundError:
+    RUNTIME_CUDA_ERROR_EXPLANATIONS = {}
 
 
 class CUDAError(Exception):
diff --git a/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py b/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py
deleted file mode 100644
index f713d42be7..0000000000
--- a/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py
+++ /dev/null
@@ -1,365 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-# Fallback copy -- overridden from cuda.bindings below when available.
-_CTK_MAJOR_MINOR_PATCH = (13, 2, 0)
-
-_FALLBACK_EXPLANATIONS = {
-    0: (
-        "The API call returned with no errors. In the case of query calls, this"
-        " also means that the operation being queried is complete (see"
-        " ::cuEventQuery() and ::cuStreamQuery())."
-    ),
-    1: (
-        "This indicates that one or more of the parameters passed to the API call"
-        " is not within an acceptable range of values."
-    ),
-    2: (
-        "The API call failed because it was unable to allocate enough memory or"
-        " other resources to perform the requested operation."
-    ),
-    3: (
-        "This indicates that the CUDA driver has not been initialized with"
-        " ::cuInit() or that initialization has failed."
-    ),
-    4: "This indicates that the CUDA driver is in the process of shutting down.",
-    5: (
-        "This indicates profiler is not initialized for this run. This can"
-        " happen when the application is running with external profiling tools"
-        " like visual profiler."
-    ),
-    6: (
-        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
-        " to attempt to enable/disable the profiling via ::cuProfilerStart or"
-        " ::cuProfilerStop without initialization."
-    ),
-    7: (
-        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
-        " to call cuProfilerStart() when profiling is already enabled."
-    ),
-    8: (
-        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
-        " to call cuProfilerStop() when profiling is already disabled."
-    ),
-    34: (
-        "This indicates that the CUDA driver that the application has loaded is a"
-        " stub library. Applications that run with the stub rather than a real"
-        " driver loaded will result in CUDA API returning this error."
-    ),
-    36: (
-        "This indicates that the API call requires a newer CUDA driver than the one"
-        " currently installed. Users should install an updated NVIDIA CUDA driver"
-        " to allow the API call to succeed."
-    ),
-    46: (
-        "This indicates that requested CUDA device is unavailable at the current"
-        " time. Devices are often unavailable due to use of"
-        " ::CU_COMPUTEMODE_EXCLUSIVE_PROCESS or ::CU_COMPUTEMODE_PROHIBITED."
-    ),
-    100: ("This indicates that no CUDA-capable devices were detected by the installed CUDA driver."),
-    101: (
-        "This indicates that the device ordinal supplied by the user does not"
-        " correspond to a valid CUDA device or that the action requested is"
-        " invalid for the specified device."
-    ),
-    102: "This error indicates that the Grid license is not applied.",
-    200: ("This indicates that the device kernel image is invalid. This can also indicate an invalid CUDA module."),
-    201: (
-        "This most frequently indicates that there is no context bound to the"
-        " current thread. This can also be returned if the context passed to an"
-        " API call is not a valid handle (such as a context that has had"
-        " ::cuCtxDestroy() invoked on it). This can also be returned if a user"
-        " mixes different API versions (i.e. 3010 context with 3020 API calls)."
-        " See ::cuCtxGetApiVersion() for more details."
-        " This can also be returned if the green context passed to an API call"
-        " was not converted to a ::CUcontext using ::cuCtxFromGreenCtx API."
-    ),
-    202: (
-        "This indicated that the context being supplied as a parameter to the"
-        " API call was already the active context."
-        " This error return is deprecated as of CUDA 3.2. It is no longer an"
-        " error to attempt to push the active context via ::cuCtxPushCurrent()."
-    ),
-    205: "This indicates that a map or register operation has failed.",
-    206: "This indicates that an unmap or unregister operation has failed.",
-    207: ("This indicates that the specified array is currently mapped and thus cannot be destroyed."),
-    208: "This indicates that the resource is already mapped.",
-    209: (
-        "This indicates that there is no kernel image available that is suitable"
-        " for the device. This can occur when a user specifies code generation"
-        " options for a particular CUDA source file that do not include the"
-        " corresponding device configuration."
-    ),
-    210: "This indicates that a resource has already been acquired.",
-    211: "This indicates that a resource is not mapped.",
-    212: ("This indicates that a mapped resource is not available for access as an array."),
-    213: ("This indicates that a mapped resource is not available for access as a pointer."),
-    214: ("This indicates that an uncorrectable ECC error was detected during execution."),
-    215: ("This indicates that the ::CUlimit passed to the API call is not supported by the active device."),
-    216: (
-        "This indicates that the ::CUcontext passed to the API call can"
-        " only be bound to a single CPU thread at a time but is already"
-        " bound to a CPU thread."
-    ),
-    217: ("This indicates that peer access is not supported across the given devices."),
-    218: "This indicates that a PTX JIT compilation failed.",
-    219: "This indicates an error with OpenGL or DirectX context.",
-    220: ("This indicates that an uncorrectable NVLink error was detected during the execution."),
-    221: "This indicates that the PTX JIT compiler library was not found.",
-    222: "This indicates that the provided PTX was compiled with an unsupported toolchain.",
-    223: "This indicates that the PTX JIT compilation was disabled.",
-    224: ("This indicates that the ::CUexecAffinityType passed to the API call is not supported by the active device."),
-    225: (
-        "This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize."
-    ),
-    226: (
-        "This indicates that an exception occurred on the device that is now"
-        " contained by the GPU's error containment capability. Common causes are -"
-        " a. Certain types of invalid accesses of peer GPU memory over nvlink"
-        " b. Certain classes of hardware errors"
-        " This leaves the process in an inconsistent state and any further CUDA"
-        " work will return the same error. To continue using CUDA, the process must"
-        " be terminated and relaunched."
-    ),
-    300: (
-        "This indicates that the device kernel source is invalid. This includes"
-        " compilation/linker errors encountered in device code or user error."
-    ),
-    301: "This indicates that the file specified was not found.",
-    302: "This indicates that a link to a shared object failed to resolve.",
-    303: "This indicates that initialization of a shared object failed.",
-    304: "This indicates that an OS call failed.",
-    400: (
-        "This indicates that a resource handle passed to the API call was not"
-        " valid. Resource handles are opaque types like ::CUstream and ::CUevent."
-    ),
-    401: (
-        "This indicates that a resource required by the API call is not in a"
-        " valid state to perform the requested operation."
-    ),
-    402: (
-        "This indicates an attempt was made to introspect an object in a way that"
-        " would discard semantically important information. This is either due to"
-        " the object using funtionality newer than the API version used to"
-        " introspect it or omission of optional return arguments."
-    ),
-    500: (
-        "This indicates that a named symbol was not found. Examples of symbols"
-        " are global/constant variable names, driver function names, texture names,"
-        " and surface names."
-    ),
-    600: (
-        "This indicates that asynchronous operations issued previously have not"
-        " completed yet. This result is not actually an error, but must be indicated"
-        " differently than ::CUDA_SUCCESS (which indicates completion). Calls that"
-        " may return this value include ::cuEventQuery() and ::cuStreamQuery()."
-    ),
-    700: (
-        "While executing a kernel, the device encountered a"
-        " load or store instruction on an invalid memory address."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    701: (
-        "This indicates that a launch did not occur because it did not have"
-        " appropriate resources. This error usually indicates that the user has"
-        " attempted to pass too many arguments to the device kernel, or the"
-        " kernel launch specifies too many threads for the kernel's register"
-        " count. Passing arguments of the wrong size (i.e. a 64-bit pointer"
-        " when a 32-bit int is expected) is equivalent to passing too many"
-        " arguments and can also result in this error."
-    ),
-    702: (
-        "This indicates that the device kernel took too long to execute. This can"
-        " only occur if timeouts are enabled - see the device attribute"
-        " ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    703: ("This error indicates a kernel launch that uses an incompatible texturing mode."),
-    704: (
-        "This error indicates that a call to ::cuCtxEnablePeerAccess() is"
-        " trying to re-enable peer access to a context which has already"
-        " had peer access to it enabled."
-    ),
-    705: (
-        "This error indicates that ::cuCtxDisablePeerAccess() is"
-        " trying to disable peer access which has not been enabled yet"
-        " via ::cuCtxEnablePeerAccess()."
-    ),
-    708: ("This error indicates that the primary context for the specified device has already been initialized."),
-    709: (
-        "This error indicates that the context current to the calling thread"
-        " has been destroyed using ::cuCtxDestroy, or is a primary context which"
-        " has not yet been initialized."
-    ),
-    710: (
-        "A device-side assert triggered during kernel execution. The context"
-        " cannot be used anymore, and must be destroyed. All existing device"
-        " memory allocations from this context are invalid and must be"
-        " reconstructed if the program is to continue using CUDA."
-    ),
-    711: (
-        "This error indicates that the hardware resources required to enable"
-        " peer access have been exhausted for one or more of the devices"
-        " passed to ::cuCtxEnablePeerAccess()."
-    ),
-    712: ("This error indicates that the memory range passed to ::cuMemHostRegister() has already been registered."),
-    713: (
-        "This error indicates that the pointer passed to ::cuMemHostUnregister()"
-        " does not correspond to any currently registered memory region."
-    ),
-    714: (
-        "While executing a kernel, the device encountered a stack error."
-        " This can be due to stack corruption or exceeding the stack size limit."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    715: (
-        "While executing a kernel, the device encountered an illegal instruction."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    716: (
-        "While executing a kernel, the device encountered a load or store instruction"
-        " on a memory address which is not aligned."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    717: (
-        "While executing a kernel, the device encountered an instruction"
-        " which can only operate on memory locations in certain address spaces"
-        " (global, shared, or local), but was supplied a memory address not"
-        " belonging to an allowed address space."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    718: (
-        "While executing a kernel, the device program counter wrapped its address space."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    719: (
-        "An exception occurred on the device while executing a kernel. Common"
-        " causes include dereferencing an invalid device pointer and accessing"
-        " out of bounds shared memory. Less common cases can be system specific - more"
-        " information about these cases can be found in the system specific user guide."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    720: (
-        "This error indicates that the number of blocks launched per grid for a kernel that was"
-        " launched via either ::cuLaunchCooperativeKernel or ::cuLaunchCooperativeKernelMultiDevice"
-        " exceeds the maximum number of blocks as allowed by ::cuOccupancyMaxActiveBlocksPerMultiprocessor"
-        " or ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors"
-        " as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT."
-    ),
-    721: (
-        "An exception occurred on the device while exiting a kernel using tensor memory: the"
-        " tensor memory was not completely deallocated. This leaves the process in an inconsistent"
-        " state and any further CUDA work will return the same error. To continue using CUDA, the"
-        " process must be terminated and relaunched."
-    ),
-    800: "This error indicates that the attempted operation is not permitted.",
-    801: ("This error indicates that the attempted operation is not supported on the current system or device."),
-    802: (
-        "This error indicates that the system is not yet ready to start any CUDA"
-        " work.  To continue using CUDA, verify the system configuration is in a"
-        " valid state and all required driver daemons are actively running."
-        " More information about this error can be found in the system specific"
-        " user guide."
-    ),
-    803: (
-        "This error indicates that there is a mismatch between the versions of"
-        " the display driver and the CUDA driver. Refer to the compatibility documentation"
-        " for supported versions."
-    ),
-    804: (
-        "This error indicates that the system was upgraded to run with forward compatibility"
-        " but the visible hardware detected by CUDA does not support this configuration."
-        " Refer to the compatibility documentation for the supported hardware matrix or ensure"
-        " that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES"
-        " environment variable."
-    ),
-    805: "This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.",
-    806: "This error indicates that the remote procedural call between the MPS server and the MPS client failed.",
-    807: (
-        "This error indicates that the MPS server is not ready to accept new MPS client requests."
-        " This error can be returned when the MPS server is in the process of recovering from a fatal failure."
-    ),
-    808: "This error indicates that the hardware resources required to create MPS client have been exhausted.",
-    809: "This error indicates the the hardware resources required to support device connections have been exhausted.",
-    810: "This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.",
-    811: "This error indicates that the module is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.",
-    812: "This error indicates that a module contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.",
-    900: ("This error indicates that the operation is not permitted when the stream is capturing."),
-    901: (
-        "This error indicates that the current capture sequence on the stream"
-        " has been invalidated due to a previous error."
-    ),
-    902: (
-        "This error indicates that the operation would have resulted in a merge of two independent capture sequences."
-    ),
-    903: "This error indicates that the capture was not initiated in this stream.",
-    904: ("This error indicates that the capture sequence contains a fork that was not joined to the primary stream."),
-    905: (
-        "This error indicates that a dependency would have been created which"
-        " crosses the capture sequence boundary. Only implicit in-stream ordering"
-        " dependencies are allowed to cross the boundary."
-    ),
-    906: ("This error indicates a disallowed implicit dependency on a current capture sequence from cudaStreamLegacy."),
-    907: (
-        "This error indicates that the operation is not permitted on an event which"
-        " was last recorded in a capturing stream."
-    ),
-    908: (
-        "A stream capture sequence not initiated with the ::CU_STREAM_CAPTURE_MODE_RELAXED"
-        " argument to ::cuStreamBeginCapture was passed to ::cuStreamEndCapture in a"
-        " different thread."
-    ),
-    909: "This error indicates that the timeout specified for the wait operation has lapsed.",
-    910: (
-        "This error indicates that the graph update was not performed because it included"
-        " changes which violated constraints specific to instantiated graph update."
-    ),
-    911: (
-        "This indicates that an error has occurred in a device outside of GPU. It can be a"
-        " synchronous error w.r.t. CUDA API or an asynchronous error from the external device."
-        " In case of asynchronous error, it means that if cuda was waiting for an external device's"
-        " signal before consuming shared data, the external device signaled an error indicating that"
-        " the data is not valid for consumption. This leaves the process in an inconsistent"
-        " state and any further CUDA work will return the same error. To continue using CUDA,"
-        " the process must be terminated and relaunched."
-        " In case of synchronous error, it means that one or more external devices"
-        " have encountered an error and cannot complete the operation."
-    ),
-    912: "Indicates a kernel launch error due to cluster misconfiguration.",
-    913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."),
-    914: ("This error indicates one or more resources passed in are not valid resource types for the operation."),
-    915: ("This error indicates one or more resources are insufficient or non-applicable for the operation."),
-    916: ("This error indicates that an error happened during the key rotation sequence."),
-    917: (
-        "This error indicates that the requested operation is not permitted because the"
-        " stream is in a detached state. This can occur if the green context associated"
-        " with the stream has been destroyed, limiting the stream's operational capabilities."
-    ),
-    999: "This indicates that an unknown internal error has occurred.",
-}
-
-DRIVER_CU_RESULT_EXPLANATIONS = _FALLBACK_EXPLANATIONS
-
-# Prefer the authoritative copy from cuda.bindings when available.
-try:
-    import cuda.bindings._utils.driver_cu_result_explanations as _authoritative
-except ModuleNotFoundError:
-    pass
-else:
-    DRIVER_CU_RESULT_EXPLANATIONS = _authoritative._EXPLANATIONS
diff --git a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py
deleted file mode 100644
index d2a2205ae2..0000000000
--- a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py
+++ /dev/null
@@ -1,558 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-# Fallback copy -- overridden from cuda.bindings below when available.
-_CTK_MAJOR_MINOR_PATCH = (13, 2, 0)
-
-_FALLBACK_EXPLANATIONS = {
-    0: (
-        "The API call returned with no errors. In the case of query calls, this"
-        " also means that the operation being queried is complete (see"
-        " ::cudaEventQuery() and ::cudaStreamQuery())."
-    ),
-    1: (
-        "This indicates that one or more of the parameters passed to the API call"
-        " is not within an acceptable range of values."
-    ),
-    2: (
-        "The API call failed because it was unable to allocate enough memory or"
-        " other resources to perform the requested operation."
-    ),
-    3: ("The API call failed because the CUDA driver and runtime could not be initialized."),
-    4: (
-        "This indicates that a CUDA Runtime API call cannot be executed because"
-        " it is being called during process shut down, at a point in time after"
-        " CUDA driver has been unloaded."
-    ),
-    5: (
-        "This indicates profiler is not initialized for this run. This can"
-        " happen when the application is running with external profiling tools"
-        " like visual profiler."
-    ),
-    6: (
-        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
-        " to attempt to enable/disable the profiling via ::cudaProfilerStart or"
-        " ::cudaProfilerStop without initialization."
-    ),
-    7: (
-        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
-        " to call cudaProfilerStart() when profiling is already enabled."
-    ),
-    8: (
-        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
-        " to call cudaProfilerStop() when profiling is already disabled."
-    ),
-    9: (
-        "This indicates that a kernel launch is requesting resources that can"
-        " never be satisfied by the current device. Requesting more shared memory"
-        " per block than the device supports will trigger this error, as will"
-        " requesting too many threads or blocks. See ::cudaDeviceProp for more"
-        " device limitations."
-    ),
-    10: (
-        "This indicates that the driver is newer than the runtime version"
-        " and returned graph node parameter information that the runtime"
-        " does not understand and is unable to translate."
-    ),
-    12: (
-        "This indicates that one or more of the pitch-related parameters passed"
-        " to the API call is not within the acceptable range for pitch."
-    ),
-    13: ("This indicates that the symbol name/identifier passed to the API call is not a valid name or identifier."),
-    16: (
-        "This indicates that at least one host pointer passed to the API call is"
-        " not a valid host pointer."
-        " This error return is deprecated as of CUDA 10.1."
-    ),
-    17: (
-        "This indicates that at least one device pointer passed to the API call is"
-        " not a valid device pointer."
-        " This error return is deprecated as of CUDA 10.1."
-    ),
-    18: ("This indicates that the texture passed to the API call is not a valid texture."),
-    19: (
-        "This indicates that the texture binding is not valid. This occurs if you"
-        " call ::cudaGetTextureAlignmentOffset() with an unbound texture."
-    ),
-    20: (
-        "This indicates that the channel descriptor passed to the API call is not"
-        " valid. This occurs if the format is not one of the formats specified by"
-        " ::cudaChannelFormatKind, or if one of the dimensions is invalid."
-    ),
-    21: (
-        "This indicates that the direction of the memcpy passed to the API call is"
-        " not one of the types specified by ::cudaMemcpyKind."
-    ),
-    22: (
-        "This indicated that the user has taken the address of a constant variable,"
-        " which was forbidden up until the CUDA 3.1 release."
-        " This error return is deprecated as of CUDA 3.1. Variables in constant"
-        " memory may now have their address taken by the runtime via"
-        " ::cudaGetSymbolAddress()."
-    ),
-    23: (
-        "This indicated that a texture fetch was not able to be performed."
-        " This was previously used for device emulation of texture operations."
-        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
-        " removed with the CUDA 3.1 release."
-    ),
-    24: (
-        "This indicated that a texture was not bound for access."
-        " This was previously used for device emulation of texture operations."
-        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
-        " removed with the CUDA 3.1 release."
-    ),
-    25: (
-        "This indicated that a synchronization operation had failed."
-        " This was previously used for some device emulation functions."
-        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
-        " removed with the CUDA 3.1 release."
-    ),
-    26: (
-        "This indicates that a non-float texture was being accessed with linear"
-        " filtering. This is not supported by CUDA."
-    ),
-    27: (
-        "This indicates that an attempt was made to read an unsupported data type as a"
-        " normalized float. This is not supported by CUDA."
-    ),
-    28: (
-        "Mixing of device and device emulation code was not allowed."
-        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
-        " removed with the CUDA 3.1 release."
-    ),
-    31: (
-        "This indicates that the API call is not yet implemented. Production"
-        " releases of CUDA will never return this error."
-        " This error return is deprecated as of CUDA 4.1."
-    ),
-    32: (
-        "This indicated that an emulated device pointer exceeded the 32-bit address"
-        " range."
-        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
-        " removed with the CUDA 3.1 release."
-    ),
-    34: (
-        "This indicates that the CUDA driver that the application has loaded is a"
-        " stub library. Applications that run with the stub rather than a real"
-        " driver loaded will result in CUDA API returning this error."
-    ),
-    35: (
-        "This indicates that the installed NVIDIA CUDA driver is older than the"
-        " CUDA runtime library. This is not a supported configuration. Users should"
-        " install an updated NVIDIA display driver to allow the application to run."
-    ),
-    36: (
-        "This indicates that the API call requires a newer CUDA driver than the one"
-        " currently installed. Users should install an updated NVIDIA CUDA driver"
-        " to allow the API call to succeed."
-    ),
-    37: ("This indicates that the surface passed to the API call is not a valid surface."),
-    43: (
-        "This indicates that multiple global or constant variables (across separate"
-        " CUDA source files in the application) share the same string name."
-    ),
-    44: (
-        "This indicates that multiple textures (across separate CUDA source"
-        " files in the application) share the same string name."
-    ),
-    45: (
-        "This indicates that multiple surfaces (across separate CUDA source"
-        " files in the application) share the same string name."
-    ),
-    46: (
-        "This indicates that all CUDA devices are busy or unavailable at the current"
-        " time. Devices are often busy/unavailable due to use of"
-        " ::cudaComputeModeProhibited, ::cudaComputeModeExclusiveProcess, or when long"
-        " running CUDA kernels have filled up the GPU and are blocking new work"
-        " from starting. They can also be unavailable due to memory constraints"
-        " on a device that already has active CUDA work being performed."
-    ),
-    49: (
-        "This indicates that the current context is not compatible with this"
-        " the CUDA Runtime. This can only occur if you are using CUDA"
-        " Runtime/Driver interoperability and have created an existing Driver"
-        " context using the driver API. The Driver context may be incompatible"
-        " either because the Driver context was created using an older version"
-        " of the API, because the Runtime API call expects a primary driver"
-        " context and the Driver context is not primary, or because the Driver"
-        ' context has been destroyed. Please see CUDART_DRIVER "Interactions'
-        ' with the CUDA Driver API" for more information.'
-    ),
-    52: (
-        "The device function being invoked (usually via ::cudaLaunchKernel()) was not"
-        " previously configured via the ::cudaConfigureCall() function."
-    ),
-    53: (
-        "This indicated that a previous kernel launch failed. This was previously"
-        " used for device emulation of kernel launches."
-        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
-        " removed with the CUDA 3.1 release."
-    ),
-    65: (
-        "This error indicates that a device runtime grid launch did not occur"
-        " because the depth of the child grid would exceed the maximum supported"
-        " number of nested grid launches."
-    ),
-    66: (
-        "This error indicates that a grid launch did not occur because the kernel"
-        " uses file-scoped textures which are unsupported by the device runtime."
-        " Kernels launched via the device runtime only support textures created with"
-        " the Texture Object API's."
-    ),
-    67: (
-        "This error indicates that a grid launch did not occur because the kernel"
-        " uses file-scoped surfaces which are unsupported by the device runtime."
-        " Kernels launched via the device runtime only support surfaces created with"
-        " the Surface Object API's."
-    ),
-    68: (
-        "This error indicates that a call to ::cudaDeviceSynchronize made from"
-        " the device runtime failed because the call was made at grid depth greater"
-        " than than either the default (2 levels of grids) or user specified device"
-        " limit ::cudaLimitDevRuntimeSyncDepth. To be able to synchronize on"
-        " launched grids at a greater depth successfully, the maximum nested"
-        " depth at which ::cudaDeviceSynchronize will be called must be specified"
-        " with the ::cudaLimitDevRuntimeSyncDepth limit to the ::cudaDeviceSetLimit"
-        " api before the host-side launch of a kernel using the device runtime."
-        " Keep in mind that additional levels of sync depth require the runtime"
-        " to reserve large amounts of device memory that cannot be used for"
-        " user allocations. Note that ::cudaDeviceSynchronize made from device"
-        " runtime is only supported on devices of compute capability < 9.0."
-    ),
-    69: (
-        "This error indicates that a device runtime grid launch failed because"
-        " the launch would exceed the limit ::cudaLimitDevRuntimePendingLaunchCount."
-        " For this launch to proceed successfully, ::cudaDeviceSetLimit must be"
-        " called to set the ::cudaLimitDevRuntimePendingLaunchCount to be higher"
-        " than the upper bound of outstanding launches that can be issued to the"
-        " device runtime. Keep in mind that raising the limit of pending device"
-        " runtime launches will require the runtime to reserve device memory that"
-        " cannot be used for user allocations."
-    ),
-    98: ("The requested device function does not exist or is not compiled for the proper device architecture."),
-    100: ("This indicates that no CUDA-capable devices were detected by the installed CUDA driver."),
-    101: (
-        "This indicates that the device ordinal supplied by the user does not"
-        " correspond to a valid CUDA device or that the action requested is"
-        " invalid for the specified device."
-    ),
-    102: "This indicates that the device doesn't have a valid Grid License.",
-    103: (
-        "By default, the CUDA runtime may perform a minimal set of self-tests,"
-        " as well as CUDA driver tests, to establish the validity of both."
-        " Introduced in CUDA 11.2, this error return indicates that at least one"
-        " of these tests has failed and the validity of either the runtime"
-        " or the driver could not be established."
-    ),
-    127: "This indicates an internal startup failure in the CUDA runtime.",
-    200: "This indicates that the device kernel image is invalid.",
-    201: (
-        "This most frequently indicates that there is no context bound to the"
-        " current thread. This can also be returned if the context passed to an"
-        " API call is not a valid handle (such as a context that has had"
-        " ::cuCtxDestroy() invoked on it). This can also be returned if a user"
-        " mixes different API versions (i.e. 3010 context with 3020 API calls)."
-        " See ::cuCtxGetApiVersion() for more details."
-    ),
-    205: "This indicates that the buffer object could not be mapped.",
-    206: "This indicates that the buffer object could not be unmapped.",
-    207: ("This indicates that the specified array is currently mapped and thus cannot be destroyed."),
-    208: "This indicates that the resource is already mapped.",
-    209: (
-        "This indicates that there is no kernel image available that is suitable"
-        " for the device. This can occur when a user specifies code generation"
-        " options for a particular CUDA source file that do not include the"
-        " corresponding device configuration."
-    ),
-    210: "This indicates that a resource has already been acquired.",
-    211: "This indicates that a resource is not mapped.",
-    212: ("This indicates that a mapped resource is not available for access as an array."),
-    213: ("This indicates that a mapped resource is not available for access as a pointer."),
-    214: ("This indicates that an uncorrectable ECC error was detected during execution."),
-    215: ("This indicates that the ::cudaLimit passed to the API call is not supported by the active device."),
-    216: (
-        "This indicates that a call tried to access an exclusive-thread device that"
-        " is already in use by a different thread."
-    ),
-    217: ("This error indicates that P2P access is not supported across the given devices."),
-    218: (
-        "A PTX compilation failed. The runtime may fall back to compiling PTX if"
-        " an application does not contain a suitable binary for the current device."
-    ),
-    219: "This indicates an error with the OpenGL or DirectX context.",
-    220: ("This indicates that an uncorrectable NVLink error was detected during the execution."),
-    221: (
-        "This indicates that the PTX JIT compiler library was not found. The JIT Compiler"
-        " library is used for PTX compilation. The runtime may fall back to compiling PTX"
-        " if an application does not contain a suitable binary for the current device."
-    ),
-    222: (
-        "This indicates that the provided PTX was compiled with an unsupported toolchain."
-        " The most common reason for this, is the PTX was generated by a compiler newer"
-        " than what is supported by the CUDA driver and PTX JIT compiler."
-    ),
-    223: (
-        "This indicates that the JIT compilation was disabled. The JIT compilation compiles"
-        " PTX. The runtime may fall back to compiling PTX if an application does not contain"
-        " a suitable binary for the current device."
-    ),
-    224: "This indicates that the provided execution affinity is not supported by the device.",
-    225: (
-        "This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize."
-    ),
-    226: (
-        "This indicates that an exception occurred on the device that is now"
-        " contained by the GPU's error containment capability. Common causes are -"
-        " a. Certain types of invalid accesses of peer GPU memory over nvlink"
-        " b. Certain classes of hardware errors"
-        " This leaves the process in an inconsistent state and any further CUDA"
-        " work will return the same error. To continue using CUDA, the process must"
-        " be terminated and relaunched."
-    ),
-    300: "This indicates that the device kernel source is invalid.",
-    301: "This indicates that the file specified was not found.",
-    302: "This indicates that a link to a shared object failed to resolve.",
-    303: "This indicates that initialization of a shared object failed.",
-    304: "This error indicates that an OS call failed.",
-    400: (
-        "This indicates that a resource handle passed to the API call was not"
-        " valid. Resource handles are opaque types like ::cudaStream_t and"
-        " ::cudaEvent_t."
-    ),
-    401: (
-        "This indicates that a resource required by the API call is not in a"
-        " valid state to perform the requested operation."
-    ),
-    402: (
-        "This indicates an attempt was made to introspect an object in a way that"
-        " would discard semantically important information. This is either due to"
-        " the object using funtionality newer than the API version used to"
-        " introspect it or omission of optional return arguments."
-    ),
-    500: (
-        "This indicates that a named symbol was not found. Examples of symbols"
-        " are global/constant variable names, driver function names, texture names,"
-        " and surface names."
-    ),
-    600: (
-        "This indicates that asynchronous operations issued previously have not"
-        " completed yet. This result is not actually an error, but must be indicated"
-        " differently than ::cudaSuccess (which indicates completion). Calls that"
-        " may return this value include ::cudaEventQuery() and ::cudaStreamQuery()."
-    ),
-    700: (
-        "The device encountered a load or store instruction on an invalid memory address."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    701: (
-        "This indicates that a launch did not occur because it did not have"
-        " appropriate resources. Although this error is similar to"
-        " ::cudaErrorInvalidConfiguration, this error usually indicates that the"
-        " user has attempted to pass too many arguments to the device kernel, or the"
-        " kernel launch specifies too many threads for the kernel's register count."
-    ),
-    702: (
-        "This indicates that the device kernel took too long to execute. This can"
-        " only occur if timeouts are enabled - see the device attribute"
-        ' ::cudaDeviceAttr::cudaDevAttrKernelExecTimeout "cudaDevAttrKernelExecTimeout"'
-        " for more information."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    703: ("This error indicates a kernel launch that uses an incompatible texturing mode."),
-    704: (
-        "This error indicates that a call to ::cudaDeviceEnablePeerAccess() is"
-        " trying to re-enable peer addressing on from a context which has already"
-        " had peer addressing enabled."
-    ),
-    705: (
-        "This error indicates that ::cudaDeviceDisablePeerAccess() is trying to"
-        " disable peer addressing which has not been enabled yet via"
-        " ::cudaDeviceEnablePeerAccess()."
-    ),
-    708: (
-        "This indicates that the user has called ::cudaSetValidDevices(),"
-        " ::cudaSetDeviceFlags(), ::cudaD3D9SetDirect3DDevice(),"
-        " ::cudaD3D10SetDirect3DDevice, ::cudaD3D11SetDirect3DDevice(), or"
-        " ::cudaVDPAUSetVDPAUDevice() after initializing the CUDA runtime by"
-        " calling non-device management operations (allocating memory and"
-        " launching kernels are examples of non-device management operations)."
-        " This error can also be returned if using runtime/driver"
-        " interoperability and there is an existing ::CUcontext active on the"
-        " host thread."
-    ),
-    709: (
-        "This error indicates that the context current to the calling thread"
-        " has been destroyed using ::cuCtxDestroy, or is a primary context which"
-        " has not yet been initialized."
-    ),
-    710: (
-        "An assert triggered in device code during kernel execution. The device"
-        " cannot be used again. All existing allocations are invalid. To continue"
-        " using CUDA, the process must be terminated and relaunched."
-    ),
-    711: (
-        "This error indicates that the hardware resources required to enable"
-        " peer access have been exhausted for one or more of the devices"
-        " passed to ::cudaEnablePeerAccess()."
-    ),
-    712: ("This error indicates that the memory range passed to ::cudaHostRegister() has already been registered."),
-    713: (
-        "This error indicates that the pointer passed to ::cudaHostUnregister()"
-        " does not correspond to any currently registered memory region."
-    ),
-    714: (
-        "Device encountered an error in the call stack during kernel execution,"
-        " possibly due to stack corruption or exceeding the stack size limit."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    715: (
-        "The device encountered an illegal instruction during kernel execution"
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    716: (
-        "The device encountered a load or store instruction"
-        " on a memory address which is not aligned."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    717: (
-        "While executing a kernel, the device encountered an instruction"
-        " which can only operate on memory locations in certain address spaces"
-        " (global, shared, or local), but was supplied a memory address not"
-        " belonging to an allowed address space."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    718: (
-        "The device encountered an invalid program counter."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    719: (
-        "An exception occurred on the device while executing a kernel. Common"
-        " causes include dereferencing an invalid device pointer and accessing"
-        " out of bounds shared memory. Less common cases can be system specific - more"
-        " information about these cases can be found in the system specific user guide."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    720: (
-        "This error indicates that the number of blocks launched per grid for a kernel that was"
-        " launched via either ::cudaLaunchCooperativeKernel"
-        " exceeds the maximum number of blocks as allowed by ::cudaOccupancyMaxActiveBlocksPerMultiprocessor"
-        " or ::cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors"
-        " as specified by the device attribute ::cudaDevAttrMultiProcessorCount."
-    ),
-    721: (
-        "An exception occurred on the device while exiting a kernel using tensor memory: the"
-        " tensor memory was not completely deallocated. This leaves the process in an inconsistent"
-        " state and any further CUDA work will return the same error. To continue using CUDA, the"
-        " process must be terminated and relaunched."
-    ),
-    800: "This error indicates the attempted operation is not permitted.",
-    801: ("This error indicates the attempted operation is not supported on the current system or device."),
-    802: (
-        "This error indicates that the system is not yet ready to start any CUDA"
-        " work.  To continue using CUDA, verify the system configuration is in a"
-        " valid state and all required driver daemons are actively running."
-        " More information about this error can be found in the system specific"
-        " user guide."
-    ),
-    803: (
-        "This error indicates that there is a mismatch between the versions of"
-        " the display driver and the CUDA driver. Refer to the compatibility documentation"
-        " for supported versions."
-    ),
-    804: (
-        "This error indicates that the system was upgraded to run with forward compatibility"
-        " but the visible hardware detected by CUDA does not support this configuration."
-        " Refer to the compatibility documentation for the supported hardware matrix or ensure"
-        " that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES"
-        " environment variable."
-    ),
-    805: "This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.",
-    806: "This error indicates that the remote procedural call between the MPS server and the MPS client failed.",
-    807: (
-        "This error indicates that the MPS server is not ready to accept new MPS client requests."
-        " This error can be returned when the MPS server is in the process of recovering from a fatal failure."
-    ),
-    808: "This error indicates that the hardware resources required to create MPS client have been exhausted.",
-    809: "This error indicates the the hardware resources required to device connections have been exhausted.",
-    810: "This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.",
-    811: "This error indicates, that the program is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.",
-    812: "This error indicates, that the program contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.",
-    900: "The operation is not permitted when the stream is capturing.",
-    901: ("The current capture sequence on the stream has been invalidated due to a previous error."),
-    902: ("The operation would have resulted in a merge of two independent capture sequences."),
-    903: "The capture was not initiated in this stream.",
-    904: ("The capture sequence contains a fork that was not joined to the primary stream."),
-    905: (
-        "A dependency would have been created which crosses the capture sequence"
-        " boundary. Only implicit in-stream ordering dependencies are allowed to"
-        " cross the boundary."
-    ),
-    906: (
-        "The operation would have resulted in a disallowed implicit dependency on"
-        " a current capture sequence from cudaStreamLegacy."
-    ),
-    907: ("The operation is not permitted on an event which was last recorded in a capturing stream."),
-    908: (
-        "A stream capture sequence not initiated with the ::cudaStreamCaptureModeRelaxed"
-        " argument to ::cudaStreamBeginCapture was passed to ::cudaStreamEndCapture in a"
-        " different thread."
-    ),
-    909: "This indicates that the wait operation has timed out.",
-    910: (
-        "This error indicates that the graph update was not performed because it included"
-        " changes which violated constraints specific to instantiated graph update."
-    ),
-    911: (
-        "This indicates that an error has occurred in a device outside of GPU. It can be a"
-        " synchronous error w.r.t. CUDA API or an asynchronous error from the external device."
-        " In case of asynchronous error, it means that if cuda was waiting for an external device's"
-        " signal before consuming shared data, the external device signaled an error indicating that"
-        " the data is not valid for consumption. This leaves the process in an inconsistent"
-        " state and any further CUDA work will return the same error. To continue using CUDA,"
-        " the process must be terminated and relaunched."
-        " In case of synchronous error, it means that one or more external devices"
-        " have encountered an error and cannot complete the operation."
-    ),
-    912: ("This indicates that a kernel launch error has occurred due to cluster misconfiguration."),
-    913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."),
-    914: ("This error indicates one or more resources passed in are not valid resource types for the operation."),
-    915: ("This error indicates one or more resources are insufficient or non-applicable for the operation."),
-    917: (
-        "This error indicates that the requested operation is not permitted because the"
-        " stream is in a detached state. This can occur if the green context associated"
-        " with the stream has been destroyed, limiting the stream's operational capabilities."
-    ),
-    999: "This indicates that an unknown internal error has occurred.",
-    10000: (
-        "Any unhandled CUDA driver error is added to this value and returned via"
-        " the runtime. Production releases of CUDA should not return such errors."
-        " This error return is deprecated as of CUDA 4.1."
-    ),
-}
-
-RUNTIME_CUDA_ERROR_EXPLANATIONS = _FALLBACK_EXPLANATIONS
-
-# Prefer the authoritative copy from cuda.bindings when available.
-try:
-    import cuda.bindings._utils.runtime_cuda_error_explanations as _authoritative
-except ModuleNotFoundError:
-    pass
-else:
-    RUNTIME_CUDA_ERROR_EXPLANATIONS = _authoritative._EXPLANATIONS
diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py
index a0d072aca6..59b6369815 100644
--- a/cuda_core/tests/test_cuda_utils.py
+++ b/cuda_core/tests/test_cuda_utils.py
@@ -3,7 +3,6 @@
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 import dataclasses
-import importlib
 
 import pytest
 
@@ -11,46 +10,6 @@
 from cuda.core._utils import cuda_utils
 from cuda.core._utils.clear_error_support import assert_type_str_or_bytes_like, raise_code_path_meant_to_be_unreachable
 
-_EXPLANATION_MODULES = [
-    ("driver_cu_result_explanations", "DRIVER_CU_RESULT_EXPLANATIONS"),
-    ("runtime_cuda_error_explanations", "RUNTIME_CUDA_ERROR_EXPLANATIONS"),
-]
-
-
-@pytest.mark.parametrize("module_name,public_name", _EXPLANATION_MODULES)
-def test_explanations_smoke(module_name, public_name):
-    expl = getattr(cuda_utils, public_name)
-    for code in (0, 1, 2):
-        assert code in expl
-        assert isinstance(expl[code], str)
-
-
-@pytest.mark.parametrize("module_name,public_name", _EXPLANATION_MODULES)
-def test_explanations_ctk_version(module_name, public_name):
-    del public_name  # unused
-    core_mod = importlib.import_module(f"cuda.core._utils.{module_name}")
-    try:
-        bindings_mod = importlib.import_module(f"cuda.bindings._utils.{module_name}")
-    except ModuleNotFoundError:
-        pytest.skip("cuda.bindings._utils not available")
-    bindings_path = f"cuda_bindings/cuda/bindings/_utils/{module_name}.py"
-    core_path = f"cuda_core/cuda/core/_utils/{module_name}.py"
-    if core_mod._CTK_MAJOR_MINOR_PATCH < bindings_mod._CTK_MAJOR_MINOR_PATCH:
-        raise RuntimeError(
-            f"cuda_core copy is older ({core_mod._CTK_MAJOR_MINOR_PATCH})"
-            f" than cuda_bindings ({bindings_mod._CTK_MAJOR_MINOR_PATCH})."
-            f" Please copy the _EXPLANATIONS dict from {bindings_path} to {core_path}"
-        )
-    if (
-        core_mod._CTK_MAJOR_MINOR_PATCH == bindings_mod._CTK_MAJOR_MINOR_PATCH
-        and core_mod._FALLBACK_EXPLANATIONS != bindings_mod._EXPLANATIONS
-    ):
-        raise RuntimeError(
-            "The cuda_core copy of the cuda_bindings _EXPLANATIONS dict is out of sync"
-            f" (both at CTK {core_mod._CTK_MAJOR_MINOR_PATCH})."
-            f" Please copy the _EXPLANATIONS dict from {bindings_path} to {core_path}"
-        )
-
 
 def test_check_driver_error():
     num_unexpected = 0

From 914ffd6972079ede0319cb0b6b01b9fef2b9b8eb Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Wed, 25 Mar 2026 21:23:22 -0700
Subject: [PATCH 08/15] Revert to original dict names and drop
 _CTK_MAJOR_MINOR_PATCH (#1712)

Restore DRIVER_CU_RESULT_EXPLANATIONS / RUNTIME_CUDA_ERROR_EXPLANATIONS
as the dict names in cuda_bindings and remove the _CTK_MAJOR_MINOR_PATCH
/ _EXPLANATIONS indirection that is no longer needed without the
cuda_core fallback copies.

Made-with: Cursor
---
 .../bindings/_utils/driver_cu_result_explanations.py   |  5 ++---
 .../bindings/_utils/runtime_cuda_error_explanations.py |  5 ++---
 cuda_bindings/tests/test_enum_explanations.py          | 10 +++++-----
 cuda_core/cuda/core/_utils/cuda_utils.pyx              |  8 ++------
 4 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/cuda_bindings/cuda/bindings/_utils/driver_cu_result_explanations.py b/cuda_bindings/cuda/bindings/_utils/driver_cu_result_explanations.py
index afe576ce68..0b085520a6 100644
--- a/cuda_bindings/cuda/bindings/_utils/driver_cu_result_explanations.py
+++ b/cuda_bindings/cuda/bindings/_utils/driver_cu_result_explanations.py
@@ -6,9 +6,8 @@
 # Replace the dictionary below with the output.
 # Also update the CUDA Toolkit version number below.
 
-_CTK_MAJOR_MINOR_PATCH = (13, 2, 0)
-
-_EXPLANATIONS = {
+# CUDA Toolkit v13.2.0
+DRIVER_CU_RESULT_EXPLANATIONS = {
     0: (
         "The API call returned with no errors. In the case of query calls, this"
         " also means that the operation being queried is complete (see"
diff --git a/cuda_bindings/cuda/bindings/_utils/runtime_cuda_error_explanations.py b/cuda_bindings/cuda/bindings/_utils/runtime_cuda_error_explanations.py
index cb4e3ec4cc..4421d50480 100644
--- a/cuda_bindings/cuda/bindings/_utils/runtime_cuda_error_explanations.py
+++ b/cuda_bindings/cuda/bindings/_utils/runtime_cuda_error_explanations.py
@@ -6,9 +6,8 @@
 # Replace the dictionary below with the output.
 # Also update the CUDA Toolkit version number below.
 
-_CTK_MAJOR_MINOR_PATCH = (13, 2, 0)
-
-_EXPLANATIONS = {
+# CUDA Toolkit v13.2.0
+RUNTIME_CUDA_ERROR_EXPLANATIONS = {
     0: (
         "The API call returned with no errors. In the case of query calls, this"
         " also means that the operation being queried is complete (see"
diff --git a/cuda_bindings/tests/test_enum_explanations.py b/cuda_bindings/tests/test_enum_explanations.py
index 193c4f86d2..01dc66719b 100644
--- a/cuda_bindings/tests/test_enum_explanations.py
+++ b/cuda_bindings/tests/test_enum_explanations.py
@@ -9,8 +9,8 @@
 from cuda.bindings import driver, runtime
 
 _EXPLANATION_MODULES = [
-    ("driver_cu_result_explanations", driver.CUresult),
-    ("runtime_cuda_error_explanations", runtime.cudaError_t),
+    ("driver_cu_result_explanations", "DRIVER_CU_RESULT_EXPLANATIONS", driver.CUresult),
+    ("runtime_cuda_error_explanations", "RUNTIME_CUDA_ERROR_EXPLANATIONS", runtime.cudaError_t),
 ]
 
 
@@ -22,10 +22,10 @@ def _get_binding_version():
     return tuple(int(v) for v in major_minor)
 
 
-@pytest.mark.parametrize("module_name,enum_type", _EXPLANATION_MODULES)
-def test_explanations_health(module_name, enum_type):
+@pytest.mark.parametrize("module_name,dict_name,enum_type", _EXPLANATION_MODULES)
+def test_explanations_health(module_name, dict_name, enum_type):
     mod = importlib.import_module(f"cuda.bindings._utils.{module_name}")
-    expl_dict = mod._EXPLANATIONS
+    expl_dict = getattr(mod, dict_name)
 
     known_codes = set()
     for error in enum_type:
diff --git a/cuda_core/cuda/core/_utils/cuda_utils.pyx b/cuda_core/cuda/core/_utils/cuda_utils.pyx
index e89d92577e..726f8b4aba 100644
--- a/cuda_core/cuda/core/_utils/cuda_utils.pyx
+++ b/cuda_core/cuda/core/_utils/cuda_utils.pyx
@@ -27,16 +27,12 @@ from cpython.buffer cimport PyObject_GetBuffer, PyBuffer_Release, Py_buffer, PyB
 from cuda.bindings cimport cynvrtc, cynvvm, cynvjitlink
 
 try:
-    from cuda.bindings._utils.driver_cu_result_explanations import (
-        _EXPLANATIONS as DRIVER_CU_RESULT_EXPLANATIONS,
-    )
+    from cuda.bindings._utils.driver_cu_result_explanations import DRIVER_CU_RESULT_EXPLANATIONS
 except ModuleNotFoundError:
     DRIVER_CU_RESULT_EXPLANATIONS = {}
 
 try:
-    from cuda.bindings._utils.runtime_cuda_error_explanations import (
-        _EXPLANATIONS as RUNTIME_CUDA_ERROR_EXPLANATIONS,
-    )
+    from cuda.bindings._utils.runtime_cuda_error_explanations import RUNTIME_CUDA_ERROR_EXPLANATIONS
 except ModuleNotFoundError:
     RUNTIME_CUDA_ERROR_EXPLANATIONS = {}
 

From 76efdf69e49ed5d71e42e8f9d2db73bc851c76ae Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Tue, 31 Mar 2026 14:13:35 -0700
Subject: [PATCH 09/15] cuda_bindings: compare explanation dicts to
 CUresult/cudaError_t __doc__

Add test_explanations_dict_matches_enum_member_docstrings, which checks that
each hand-maintained DRIVER_CU_RESULT_EXPLANATIONS and
RUNTIME_CUDA_ERROR_EXPLANATIONS entry matches the corresponding FastEnum
member's __doc__ (cuda-bindings 13.2+ is said to attach the same narrative text
via codegen).

The comparison uses strict string equality. In current releases the dict
text and __doc__ are not byte-identical: generated docstrings include Sphinx
cross-references (:py:obj:...) and manual line breaks where the dicts use raw
CUDA comment style (::symbol()) and single-line concatenation; some deprecated
codes differ in length. So the test is marked xfail(strict=False) so CI stays
green until dicts and generated docstrings share one source of truth; when they
align, XPASS indicates the xfail can be removed.

Skip the compare when cuda-bindings < 13.2 (major.minor). Skip members with
no __doc__ (e.g. cudaErrorApiFailureBase). Helpers: _explanation_text_from_dict_value
to flatten dict tuple fragments.

To inspect all mismatches locally: pytest --runxfail on this test.

Made-with: Cursor
---
 cuda_bindings/tests/test_enum_explanations.py | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/cuda_bindings/tests/test_enum_explanations.py b/cuda_bindings/tests/test_enum_explanations.py
index 01dc66719b..785cbab116 100644
--- a/cuda_bindings/tests/test_enum_explanations.py
+++ b/cuda_bindings/tests/test_enum_explanations.py
@@ -3,6 +3,7 @@
 
 import importlib
 import importlib.metadata
+import textwrap
 
 import pytest
 
@@ -13,6 +14,10 @@
     ("runtime_cuda_error_explanations", "RUNTIME_CUDA_ERROR_EXPLANATIONS", runtime.cudaError_t),
 ]
 
+# Explanation dicts are maintained for the same toolkit as cuda-bindings; enum members
+# carry docstrings from code generation (reportedly aligned since cuda-bindings 13.2.0).
+_MIN_BINDING_VERSION_FOR_DOCSTRING_COMPARE = (13, 2)
+
 
 def _get_binding_version():
     try:
@@ -22,6 +27,73 @@ def _get_binding_version():
     return tuple(int(v) for v in major_minor)
 
 
+def _explanation_text_from_dict_value(value):
+    """Flatten a dict entry to a single str (entries are str or tuple of str fragments)."""
+    if isinstance(value, tuple):
+        return "".join(value)
+    return value
+
+
+@pytest.mark.xfail(
+    reason=(
+        "Enum member __doc__ is not byte-identical to explanation dicts in current "
+        "releases (Sphinx/RST and line breaks in __doc__ vs ::-style refs in dicts; "
+        "some deprecated codes use a short [Deprecated] docstring). Remove xfail when "
+        "dicts and generated docstrings share one source of truth."
+    ),
+    strict=False,
+)
+@pytest.mark.parametrize("module_name,dict_name,enum_type", _EXPLANATION_MODULES)
+def test_explanations_dict_matches_enum_member_docstrings(module_name, dict_name, enum_type):
+    """Each explanation dict value should match the corresponding enum member's __doc__.
+
+    cuda-bindings 13.2+ attaches per-member documentation on driver ``CUresult`` and
+    runtime ``cudaError_t``; this test checks it against the hand-maintained dicts.
+
+    If this fails, differences may include whitespace, line breaks, Sphinx/RST markup
+    in ``__doc__`` vs raw ``::symbol()`` text in the dicts—normalizing whitespace is
+    a possible follow-up.
+
+    Marked xfail while dict text and generated ``__doc__`` differ; run
+    ``pytest --runxfail`` on this test to print the full mismatch report.
+    """
+    if _get_binding_version() < _MIN_BINDING_VERSION_FOR_DOCSTRING_COMPARE:
+        pytest.skip(
+            "Enum __doc__ vs explanation dict compare is only run for "
+            f"cuda-bindings >= {_MIN_BINDING_VERSION_FOR_DOCSTRING_COMPARE[0]}.{_MIN_BINDING_VERSION_FOR_DOCSTRING_COMPARE[1]}"
+        )
+
+    mod = importlib.import_module(f"cuda.bindings._utils.{module_name}")
+    expl_dict = getattr(mod, dict_name)
+
+    mismatches = []
+    for error in enum_type:
+        code = int(error)
+        assert code in expl_dict
+        expected = _explanation_text_from_dict_value(expl_dict[code])
+        actual = error.__doc__
+        if actual is None:
+            continue
+        if expected != actual:
+            mismatches.append((error, expected, actual))
+
+    if not mismatches:
+        return
+
+    lines = [
+        f"{len(mismatches)} enum member(s) where dict text != __doc__ (strict equality):",
+    ]
+    for error, expected, actual in mismatches[:15]:
+        lines.append(f"  {error!r}")
+        lines.append("    dict:")
+        lines.extend("    | " + ln for ln in textwrap.wrap(repr(expected), width=100) or [""])
+        lines.append("    __doc__:")
+        lines.extend("    | " + ln for ln in textwrap.wrap(repr(actual), width=100) or [""])
+    if len(mismatches) > 15:
+        lines.append(f"  ... and {len(mismatches) - 15} more")
+    pytest.fail("\n".join(lines))
+
+
 @pytest.mark.parametrize("module_name,dict_name,enum_type", _EXPLANATION_MODULES)
 def test_explanations_health(module_name, dict_name, enum_type):
     mod = importlib.import_module(f"cuda.bindings._utils.{module_name}")

From 86e94884ec597fe5bb327f24cf0abe73816af27f Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Tue, 31 Mar 2026 14:40:46 -0700
Subject: [PATCH 10/15] cuda_bindings: add clean_enum_member_docstring helper
 in enum explanation tests

Introduce clean_enum_member_docstring() for turning FastEnum CUresult /
cudaError_t __doc__ strings into plain text: collapse whitespace (newlines to
spaces), strip ends, and best-effort strip common Sphinx inline roles
(:py:obj:, :py:func:, :obj:, etc.) plus simple ** / * markup.

Placed in test_enum_explanations.py for now pending reuse from cuda_core.

Add parametrized examples and a None-input test.

Made-with: Cursor
---
 cuda_bindings/tests/test_enum_explanations.py | 56 +++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/cuda_bindings/tests/test_enum_explanations.py b/cuda_bindings/tests/test_enum_explanations.py
index 785cbab116..7061fd9997 100644
--- a/cuda_bindings/tests/test_enum_explanations.py
+++ b/cuda_bindings/tests/test_enum_explanations.py
@@ -3,6 +3,7 @@
 
 import importlib
 import importlib.metadata
+import re
 import textwrap
 
 import pytest
@@ -34,6 +35,61 @@ def _explanation_text_from_dict_value(value):
     return value
 
 
+def clean_enum_member_docstring(doc: str | None) -> str | None:
+    """Turn a FastEnum member ``__doc__`` into plain text for display or fallback logic.
+
+    Always: collapse all whitespace (including newlines) to single spaces and strip ends.
+
+    Best-effort: remove common Sphinx/reST inline markup seen in generated CUDA docs,
+    e.g. ``:py:obj:`~.cudaGetLastError()` `` -> ``cudaGetLastError()`` (relative ``~.`` is
+    dropped). Does not aim for perfect reST parsing—only patterns that appear on these
+    enums in practice.
+
+    Returns ``None`` if ``doc`` is ``None``; otherwise returns a non-empty or empty str.
+    """
+    if doc is None:
+        return None
+    s = doc
+    # Sphinx roles with a single backtick-delimited target (most common on these enums).
+    # Strip the role and keep the inner text; drop leading ~. used for same-module refs.
+    s = re.sub(
+        r":(?:py:)?(?:obj|func|meth|class|mod|data|const|exc):`([^`]+)`",
+        lambda m: re.sub(r"^~?\.", "", m.group(1)),
+        s,
+    )
+    # Inline emphasis / strong (rare in error blurbs)
+    s = re.sub(r"\*\*([^*]+)\*\*", r"\1", s)
+    s = re.sub(r"\*([^*]+)\*", r"\1", s)
+    # Collapse whitespace (newlines -> spaces) and trim
+    s = re.sub(r"\s+", " ", s).strip()
+    return s
+
+
+@pytest.mark.parametrize(
+    ("raw", "expected"),
+    [
+        ("a\nb  c", "a b c"),
+        ("  x  \n ", "x"),
+        (
+            "see\n:py:obj:`~.cuInit()` or :py:obj:`cuCtxDestroy()`",
+            "see cuInit() or cuCtxDestroy()",
+        ),
+        (
+            "x :py:func:`~.cudaMalloc()` y",
+            "x cudaMalloc() y",
+        ),
+        ("**Note:** text", "Note: text"),
+        ("[Deprecated]\n", "[Deprecated]"),
+    ],
+)
+def test_clean_enum_member_docstring_examples(raw, expected):
+    assert clean_enum_member_docstring(raw) == expected
+
+
+def test_clean_enum_member_docstring_none_input():
+    assert clean_enum_member_docstring(None) is None
+
+
 @pytest.mark.xfail(
     reason=(
         "Enum member __doc__ is not byte-identical to explanation dicts in current "

From f4094c5359f78b161eafe5353d669470a343dadf Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Tue, 31 Mar 2026 14:48:52 -0700
Subject: [PATCH 11/15] cuda_bindings: compare explanation dicts to cleaned
 enum __doc__ in health test

Add _explanation_dict_text_for_cleaned_doc_compare to normalize dict strings for
parity with clean_enum_member_docstring: strip Doxygen-style :: before name(
and collapse whitespace.

Rename test_explanations_dict_matches_enum_member_docstrings to
test_explanations_dict_matches_cleaned_enum_docstrings; compare normalized dict
text to clean_enum_member_docstring(__doc__) instead of raw __doc__. Update
xfail reason and failure-report labels.

Give explicit pytest.param ids on test_clean_enum_member_docstring_examples for
readable node ids (ruff/pre-commit friendly).

Made-with: Cursor
---
 cuda_bindings/tests/test_enum_explanations.py | 63 +++++++++++--------
 1 file changed, 38 insertions(+), 25 deletions(-)

diff --git a/cuda_bindings/tests/test_enum_explanations.py b/cuda_bindings/tests/test_enum_explanations.py
index 7061fd9997..ad78a4cead 100644
--- a/cuda_bindings/tests/test_enum_explanations.py
+++ b/cuda_bindings/tests/test_enum_explanations.py
@@ -35,6 +35,19 @@ def _explanation_text_from_dict_value(value):
     return value
 
 
+def _explanation_dict_text_for_cleaned_doc_compare(value) -> str:
+    """Normalize hand-maintained dict text to compare with ``clean_enum_member_docstring`` output.
+
+    Dicts follow CUDA header comments (``::cuInit()``-style refs); cleaned enum ``__doc__``
+    uses plain names after Sphinx role stripping. Strip a leading ``::`` before ``name(`` and
+    collapse whitespace so both sides use the same conventions as ``clean_enum_member_docstring``.
+    """
+    s = _explanation_text_from_dict_value(value)
+    s = re.sub(r"::([a-zA-Z_][a-zA-Z0-9_]*\()", r"\1", s)
+    s = re.sub(r"\s+", " ", s).strip()
+    return s
+
+
 def clean_enum_member_docstring(doc: str | None) -> str | None:
     """Turn a FastEnum member ``__doc__`` into plain text for display or fallback logic.
 
@@ -68,18 +81,20 @@ def clean_enum_member_docstring(doc: str | None) -> str | None:
 @pytest.mark.parametrize(
     ("raw", "expected"),
     [
-        ("a\nb  c", "a b c"),
-        ("  x  \n ", "x"),
-        (
+        pytest.param("a\nb  c", "a b c", id="collapse_whitespace"),
+        pytest.param("  x  \n ", "x", id="strip_padding"),
+        pytest.param(
             "see\n:py:obj:`~.cuInit()` or :py:obj:`cuCtxDestroy()`",
             "see cuInit() or cuCtxDestroy()",
+            id="sphinx_py_obj_roles",
         ),
-        (
+        pytest.param(
             "x :py:func:`~.cudaMalloc()` y",
             "x cudaMalloc() y",
+            id="sphinx_py_func_role",
         ),
-        ("**Note:** text", "Note: text"),
-        ("[Deprecated]\n", "[Deprecated]"),
+        pytest.param("**Note:** text", "Note: text", id="strip_bold"),
+        pytest.param("[Deprecated]\n", "[Deprecated]", id="deprecated_line"),
     ],
 )
 def test_clean_enum_member_docstring_examples(raw, expected):
@@ -92,26 +107,23 @@ def test_clean_enum_member_docstring_none_input():
 
 @pytest.mark.xfail(
     reason=(
-        "Enum member __doc__ is not byte-identical to explanation dicts in current "
-        "releases (Sphinx/RST and line breaks in __doc__ vs ::-style refs in dicts; "
-        "some deprecated codes use a short [Deprecated] docstring). Remove xfail when "
-        "dicts and generated docstrings share one source of truth."
+        "Even after clean_enum_member_docstring and dict-side ::/whitespace alignment, "
+        "some members still differ (e.g. [Deprecated] stub vs full paragraph in dict; "
+        "wording drift). Remove xfail when dicts and generated docstrings share one source."
     ),
     strict=False,
 )
 @pytest.mark.parametrize("module_name,dict_name,enum_type", _EXPLANATION_MODULES)
-def test_explanations_dict_matches_enum_member_docstrings(module_name, dict_name, enum_type):
-    """Each explanation dict value should match the corresponding enum member's __doc__.
+def test_explanations_dict_matches_cleaned_enum_docstrings(module_name, dict_name, enum_type):
+    """Hand-maintained explanation dict entries should match cleaned enum ``__doc__`` text.
 
     cuda-bindings 13.2+ attaches per-member documentation on driver ``CUresult`` and
-    runtime ``cudaError_t``; this test checks it against the hand-maintained dicts.
-
-    If this fails, differences may include whitespace, line breaks, Sphinx/RST markup
-    in ``__doc__`` vs raw ``::symbol()`` text in the dicts—normalizing whitespace is
-    a possible follow-up.
+    runtime ``cudaError_t``. This compares ``clean_enum_member_docstring(member.__doc__)``
+    to dict text normalized with ``_explanation_dict_text_for_cleaned_doc_compare`` (same
+    whitespace rules; strip Doxygen ``::`` before ``name(`` to align with Sphinx output).
 
-    Marked xfail while dict text and generated ``__doc__`` differ; run
-    ``pytest --runxfail`` on this test to print the full mismatch report.
+    Marked xfail while mismatches remain; run ``pytest --runxfail`` on this test for the
+    full mismatch report (normalized dict vs cleaned ``__doc__``).
     """
     if _get_binding_version() < _MIN_BINDING_VERSION_FOR_DOCSTRING_COMPARE:
         pytest.skip(
@@ -126,10 +138,11 @@ def test_explanations_dict_matches_enum_member_docstrings(module_name, dict_name
     for error in enum_type:
         code = int(error)
         assert code in expl_dict
-        expected = _explanation_text_from_dict_value(expl_dict[code])
-        actual = error.__doc__
-        if actual is None:
+        expected = _explanation_dict_text_for_cleaned_doc_compare(expl_dict[code])
+        raw_doc = error.__doc__
+        if raw_doc is None:
             continue
+        actual = clean_enum_member_docstring(raw_doc)
         if expected != actual:
             mismatches.append((error, expected, actual))
 
@@ -137,13 +150,13 @@ def test_explanations_dict_matches_enum_member_docstrings(module_name, dict_name
         return
 
     lines = [
-        f"{len(mismatches)} enum member(s) where dict text != __doc__ (strict equality):",
+        f"{len(mismatches)} enum member(s) where normalized dict text != clean_enum_member_docstring(__doc__):",
     ]
     for error, expected, actual in mismatches[:15]:
         lines.append(f"  {error!r}")
-        lines.append("    dict:")
+        lines.append("    dict (normalized for compare):")
         lines.extend("    | " + ln for ln in textwrap.wrap(repr(expected), width=100) or [""])
-        lines.append("    __doc__:")
+        lines.append("    cleaned __doc__:")
         lines.extend("    | " + ln for ln in textwrap.wrap(repr(actual), width=100) or [""])
     if len(mismatches) > 15:
         lines.append(f"  ... and {len(mismatches) - 15} more")

From 4adec227122c852042e02b67033e4a248f0fca68 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Tue, 31 Mar 2026 15:07:09 -0700
Subject: [PATCH 12/15] cuda_bindings: improve dict/__doc__ parity test
 (Doxygen :: strip, deprecated skips)

Add _strip_doxygen_double_colon_prefixes to remove Doxygen :: before CUDA
identifiers in explanation dict text (not C++ Foo::Bar scope), and use it in
_explanation_dict_text_for_cleaned_doc_compare. Add small unit tests.

Refactor test_explanations_dict_matches_cleaned_enum_docstrings to parametrize
per enum member so pytest can report per-case skips and failures.

Skip comparison when __doc__ is missing, or when strip().endswith('[Deprecated]')
(stub-only or suffix deprecation notes). Drop unused textwrap import.

Made-with: Cursor
---
 cuda_bindings/tests/test_enum_explanations.py | 113 ++++++++++++------
 1 file changed, 74 insertions(+), 39 deletions(-)

diff --git a/cuda_bindings/tests/test_enum_explanations.py b/cuda_bindings/tests/test_enum_explanations.py
index ad78a4cead..ca2fc8ed4b 100644
--- a/cuda_bindings/tests/test_enum_explanations.py
+++ b/cuda_bindings/tests/test_enum_explanations.py
@@ -4,7 +4,6 @@
 import importlib
 import importlib.metadata
 import re
-import textwrap
 
 import pytest
 
@@ -35,15 +34,30 @@ def _explanation_text_from_dict_value(value):
     return value
 
 
+def _strip_doxygen_double_colon_prefixes(s: str) -> str:
+    """Remove Doxygen-style ``::`` before CUDA identifiers in header-comment text.
+
+    Matches ``::`` only when it *starts* a reference (not C++ scope between two names):
+    use a negative lookbehind so ``Foo::Bar`` keeps the inner ``::``.
+
+    Applied repeatedly so ``::a ::b`` becomes ``a b``.
+    """
+    prev = None
+    while prev != s:
+        prev = s
+        s = re.sub(r"(?<![A-Za-z0-9_])::+([A-Za-z_][A-Za-z0-9_]*)", r"\1", s)
+    return s
+
+
 def _explanation_dict_text_for_cleaned_doc_compare(value) -> str:
     """Normalize hand-maintained dict text to compare with ``clean_enum_member_docstring`` output.
 
-    Dicts follow CUDA header comments (``::cuInit()``-style refs); cleaned enum ``__doc__``
-    uses plain names after Sphinx role stripping. Strip a leading ``::`` before ``name(`` and
-    collapse whitespace so both sides use the same conventions as ``clean_enum_member_docstring``.
+    Dicts use Doxygen ``::Symbol`` for APIs, types, and constants; cleaned enum ``__doc__``
+    uses plain names after Sphinx role stripping. Strip those ``::`` prefixes on the fly,
+    then collapse whitespace like ``clean_enum_member_docstring``.
     """
     s = _explanation_text_from_dict_value(value)
-    s = re.sub(r"::([a-zA-Z_][a-zA-Z0-9_]*\()", r"\1", s)
+    s = _strip_doxygen_double_colon_prefixes(s)
     s = re.sub(r"\s+", " ", s).strip()
     return s
 
@@ -105,16 +119,42 @@ def test_clean_enum_member_docstring_none_input():
     assert clean_enum_member_docstring(None) is None
 
 
+@pytest.mark.parametrize(
+    ("raw", "expected"),
+    [
+        pytest.param("see ::CUDA_SUCCESS", "see CUDA_SUCCESS", id="type_ref"),
+        pytest.param("Foo::Bar unchanged", "Foo::Bar unchanged", id="cpp_scope_preserved"),
+        pytest.param("::cuInit() and ::CUstream", "cuInit() and CUstream", id="multiple_prefixes"),
+    ],
+)
+def test_strip_doxygen_double_colon_prefixes(raw, expected):
+    assert _strip_doxygen_double_colon_prefixes(raw) == expected
+
+
+def _enum_docstring_parity_cases():
+    for module_name, dict_name, enum_type in _EXPLANATION_MODULES:
+        for error in enum_type:
+            yield pytest.param(
+                module_name,
+                dict_name,
+                error,
+                id=f"{enum_type.__name__}.{error.name}",
+            )
+
+
 @pytest.mark.xfail(
     reason=(
-        "Even after clean_enum_member_docstring and dict-side ::/whitespace alignment, "
-        "some members still differ (e.g. [Deprecated] stub vs full paragraph in dict; "
-        "wording drift). Remove xfail when dicts and generated docstrings share one source."
+        "Some members still differ after clean_enum_member_docstring and dict-side "
+        "::/whitespace alignment (wording drift, etc.). [Deprecated] stubs are skipped. "
+        "Remove xfail when dicts and generated docstrings share one source."
     ),
     strict=False,
 )
-@pytest.mark.parametrize("module_name,dict_name,enum_type", _EXPLANATION_MODULES)
-def test_explanations_dict_matches_cleaned_enum_docstrings(module_name, dict_name, enum_type):
+@pytest.mark.parametrize(
+    "module_name,dict_name,error",
+    list(_enum_docstring_parity_cases()),
+)
+def test_explanations_dict_matches_cleaned_enum_docstrings(module_name, dict_name, error):
     """Hand-maintained explanation dict entries should match cleaned enum ``__doc__`` text.
 
     cuda-bindings 13.2+ attaches per-member documentation on driver ``CUresult`` and
@@ -122,8 +162,12 @@ def test_explanations_dict_matches_cleaned_enum_docstrings(module_name, dict_nam
     to dict text normalized with ``_explanation_dict_text_for_cleaned_doc_compare`` (same
     whitespace rules; strip Doxygen ``::`` before ``name(`` to align with Sphinx output).
 
-    Marked xfail while mismatches remain; run ``pytest --runxfail`` on this test for the
-    full mismatch report (normalized dict vs cleaned ``__doc__``).
+    Members whose ``__doc__`` is the ``[Deprecated]`` stub alone, or ends with
+    ``[Deprecated]`` after stripping whitespace, are skipped (dicts may keep longer
+    text; we do not compare those).
+
+    Marked xfail while any non-skipped member still mismatches; many cases already match
+    (reported as xpassed when this mark is present).
     """
     if _get_binding_version() < _MIN_BINDING_VERSION_FOR_DOCSTRING_COMPARE:
         pytest.skip(
@@ -134,33 +178,24 @@ def test_explanations_dict_matches_cleaned_enum_docstrings(module_name, dict_nam
     mod = importlib.import_module(f"cuda.bindings._utils.{module_name}")
     expl_dict = getattr(mod, dict_name)
 
-    mismatches = []
-    for error in enum_type:
-        code = int(error)
-        assert code in expl_dict
-        expected = _explanation_dict_text_for_cleaned_doc_compare(expl_dict[code])
-        raw_doc = error.__doc__
-        if raw_doc is None:
-            continue
-        actual = clean_enum_member_docstring(raw_doc)
-        if expected != actual:
-            mismatches.append((error, expected, actual))
-
-    if not mismatches:
-        return
-
-    lines = [
-        f"{len(mismatches)} enum member(s) where normalized dict text != clean_enum_member_docstring(__doc__):",
-    ]
-    for error, expected, actual in mismatches[:15]:
-        lines.append(f"  {error!r}")
-        lines.append("    dict (normalized for compare):")
-        lines.extend("    | " + ln for ln in textwrap.wrap(repr(expected), width=100) or [""])
-        lines.append("    cleaned __doc__:")
-        lines.extend("    | " + ln for ln in textwrap.wrap(repr(actual), width=100) or [""])
-    if len(mismatches) > 15:
-        lines.append(f"  ... and {len(mismatches) - 15} more")
-    pytest.fail("\n".join(lines))
+    code = int(error)
+    assert code in expl_dict
+
+    raw_doc = error.__doc__
+    if raw_doc is not None and raw_doc.strip().endswith("[Deprecated]"):
+        pytest.skip(f"SKIPPED: {error.name} is deprecated (__doc__ is or ends with [Deprecated])")
+
+    if raw_doc is None:
+        pytest.skip(f"SKIPPED: {error.name} has no __doc__")
+
+    expected = _explanation_dict_text_for_cleaned_doc_compare(expl_dict[code])
+    actual = clean_enum_member_docstring(raw_doc)
+    if expected != actual:
+        pytest.fail(
+            f"normalized dict != cleaned __doc__ for {error!r}:\n"
+            f"  dict (normalized for compare): {expected!r}\n"
+            f"  cleaned __doc__: {actual!r}"
+        )
 
 
 @pytest.mark.parametrize("module_name,dict_name,enum_type", _EXPLANATION_MODULES)

From 1ce9bc8be8507f36871e3d8d4336456bcd23a763 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Tue, 31 Mar 2026 15:12:11 -0700
Subject: [PATCH 13/15] cuda_bindings: collapse wrapped hyphen spaces in
 clean_enum_member_docstring

Add _fix_hyphenation_wordwrap_spacing to remove spurious spaces around hyphens
from reflowed __doc__ text ([a-z]- [a-z] and [a-z] -[a-z]), applied until stable.

Use it in clean_enum_member_docstring after whitespace collapse and in
_explanation_dict_text_for_cleaned_doc_compare for symmetric comparison.

Add examples to test_clean_enum_member_docstring_examples.

Made-with: Cursor
---
 cuda_bindings/tests/test_enum_explanations.py | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/cuda_bindings/tests/test_enum_explanations.py b/cuda_bindings/tests/test_enum_explanations.py
index ca2fc8ed4b..f0257f033e 100644
--- a/cuda_bindings/tests/test_enum_explanations.py
+++ b/cuda_bindings/tests/test_enum_explanations.py
@@ -59,6 +59,27 @@ def _explanation_dict_text_for_cleaned_doc_compare(value) -> str:
     s = _explanation_text_from_dict_value(value)
     s = _strip_doxygen_double_colon_prefixes(s)
     s = re.sub(r"\s+", " ", s).strip()
+    s = _fix_hyphenation_wordwrap_spacing(s)
+    return s
+
+
+def _fix_hyphenation_wordwrap_spacing(s: str) -> str:
+    """Remove spaces around hyphens introduced by line wrapping in generated ``__doc__`` text.
+
+    Sphinx/reflow often splits hyphenated words as ``non- linear`` or ``word -word``.
+    The explanation dicts are usually single-line and do not contain these splits; the
+    mismatch shows up on the cleaned enum side, so this runs inside
+    ``clean_enum_member_docstring`` (and the same transform is applied to dict text for
+    comparison parity).
+
+    Patterns (all lowercase ASCII letters as in the CUDA blurbs): ``[a-z]- [a-z]`` and
+    ``[a-z] -[a-z]``. Applied repeatedly until stable.
+    """
+    prev = None
+    while prev != s:
+        prev = s
+        s = re.sub(r"([a-z])- ([a-z])", r"\1-\2", s)
+        s = re.sub(r"([a-z]) -([a-z])", r"\1-\2", s)
     return s
 
 
@@ -72,6 +93,9 @@ def clean_enum_member_docstring(doc: str | None) -> str | None:
     dropped). Does not aim for perfect reST parsing—only patterns that appear on these
     enums in practice.
 
+    After whitespace collapse, removes spurious spaces around hyphens from line wrapping
+    (``[a-z]- [a-z]`` and ``[a-z] -[a-z]``) so ``non- linear`` matches dict ``non-linear``.
+
     Returns ``None`` if ``doc`` is ``None``; otherwise returns a non-empty or empty str.
     """
     if doc is None:
@@ -89,6 +113,7 @@ def clean_enum_member_docstring(doc: str | None) -> str | None:
     s = re.sub(r"\*([^*]+)\*", r"\1", s)
     # Collapse whitespace (newlines -> spaces) and trim
     s = re.sub(r"\s+", " ", s).strip()
+    s = _fix_hyphenation_wordwrap_spacing(s)
     return s
 
 
@@ -109,6 +134,8 @@ def clean_enum_member_docstring(doc: str | None) -> str | None:
         ),
         pytest.param("**Note:** text", "Note: text", id="strip_bold"),
         pytest.param("[Deprecated]\n", "[Deprecated]", id="deprecated_line"),
+        pytest.param("non- linear", "non-linear", id="hyphen_space_after"),
+        pytest.param("word -word", "word-word", id="hyphen_space_before"),
     ],
 )
 def test_clean_enum_member_docstring_examples(raw, expected):

From cbebba0f641fb66d90cbd2722a79f8a406d3c81e Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Tue, 31 Mar 2026 15:35:12 -0700
Subject: [PATCH 14/15] cuda_bindings: fix Interactions __doc__ workaround and
 CUDART_DRIVER parity

Replace malformed \n:py:obj: (non-raw string so \n matches a real
newline) with quoted "Interactions " before generic Sphinx stripping.

Strip CUDART_DRIVER from normalized dict text for compare-only parity with
cleaned __doc__ (manpage token vs title-only).

Add a clean_enum_member_docstring example for the broken cross-ref.

Made-with: Cursor
---
 cuda_bindings/tests/test_enum_explanations.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/cuda_bindings/tests/test_enum_explanations.py b/cuda_bindings/tests/test_enum_explanations.py
index f0257f033e..4809ae8ba4 100644
--- a/cuda_bindings/tests/test_enum_explanations.py
+++ b/cuda_bindings/tests/test_enum_explanations.py
@@ -60,6 +60,8 @@ def _explanation_dict_text_for_cleaned_doc_compare(value) -> str:
     s = _strip_doxygen_double_colon_prefixes(s)
     s = re.sub(r"\s+", " ", s).strip()
     s = _fix_hyphenation_wordwrap_spacing(s)
+    # Manpage token only in dict text; cleaned __doc__ cites the section title alone.
+    s = s.replace("CUDART_DRIVER ", "")
     return s
 
 
@@ -101,6 +103,9 @@ def clean_enum_member_docstring(doc: str | None) -> str | None:
     if doc is None:
         return None
     s = doc
+    # Work around codegen bug (cudaErrorIncompatibleDriverContext):
+    # malformed :py:obj before `with`. Please remove after fix.
+    s = s.replace("\n:py:obj:`~.Interactions`", ' "Interactions ')
     # Sphinx roles with a single backtick-delimited target (most common on these enums).
     # Strip the role and keep the inner text; drop leading ~. used for same-module refs.
     s = re.sub(
@@ -136,6 +141,11 @@ def clean_enum_member_docstring(doc: str | None) -> str | None:
         pytest.param("[Deprecated]\n", "[Deprecated]", id="deprecated_line"),
         pytest.param("non- linear", "non-linear", id="hyphen_space_after"),
         pytest.param("word -word", "word-word", id="hyphen_space_before"),
+        pytest.param(
+            'Please see\n:py:obj:`~.Interactions`with the CUDA Driver API" for more information.',
+            'Please see "Interactions with the CUDA Driver API" for more information.',
+            id="codegen_broken_interactions_role",
+        ),
     ],
 )
 def test_clean_enum_member_docstring_examples(raw, expected):

From ba2f65a9bcf8a5dca81ca845f89b5e5dccb492a7 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Tue, 31 Mar 2026 15:39:24 -0700
Subject: [PATCH 15/15] cuda_bindings: skip cudaErrorLaunchTimeout in dict vs
 cleaned __doc__ parity test

Treat cleaned __doc__ as authoritative for cudaErrorLaunchTimeout; the explanation
dict still uses a Doxygen-style cudaDeviceAttr:: fragment we do not normalize.
Document in the test docstring.

Made-with: Cursor
---
 cuda_bindings/tests/test_enum_explanations.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/cuda_bindings/tests/test_enum_explanations.py b/cuda_bindings/tests/test_enum_explanations.py
index 4809ae8ba4..c4e99daa3b 100644
--- a/cuda_bindings/tests/test_enum_explanations.py
+++ b/cuda_bindings/tests/test_enum_explanations.py
@@ -203,6 +203,10 @@ def test_explanations_dict_matches_cleaned_enum_docstrings(module_name, dict_nam
     ``[Deprecated]`` after stripping whitespace, are skipped (dicts may keep longer
     text; we do not compare those).
 
+    ``cudaErrorLaunchTimeout`` is skipped: cleaned ``__doc__`` is considered authoritative;
+    the hand dict still carries a Doxygen-style ``cudaDeviceAttr::…`` fragment that we do
+    not normalize to match.
+
     Marked xfail while any non-skipped member still mismatches; many cases already match
     (reported as xpassed when this mark is present).
     """
@@ -212,6 +216,9 @@ def test_explanations_dict_matches_cleaned_enum_docstrings(module_name, dict_nam
             f"cuda-bindings >= {_MIN_BINDING_VERSION_FOR_DOCSTRING_COMPARE[0]}.{_MIN_BINDING_VERSION_FOR_DOCSTRING_COMPARE[1]}"
         )
 
+    if error is runtime.cudaError_t.cudaErrorLaunchTimeout:
+        pytest.skip("Known good __doc__, bad explanations dict value")
+
     mod = importlib.import_module(f"cuda.bindings._utils.{module_name}")
     expl_dict = getattr(mod, dict_name)