Skip to content

Commit

Permalink
Check for support of CUDA Memory Pools at runtime (#4679)
Browse files Browse the repository at this point in the history
Some CUDA GPUs, like the Quadro M3000M don't support Memory Pools
operations like cudaMallocAsync/cudaFreeAsync even on driver versions
newer than 11020, and this can result in errors like:

  CUDA runtime error: operation not supported

So check for support at runtime instead of compile time.
  • Loading branch information
ao2 committed Oct 27, 2023
1 parent 881ae76 commit 95447f4
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
* Fix some bad triangle generation in TriangleMesh::SimplifyQuadricDecimation
* Fix printing of tensor in gpu and add validation check for bounds of axis-aligned bounding box (PR #6444)
* Python 3.11 support. bump pybind11 v2.6.2 -> v2.11.1
* Check for support of CUDA Memory Pools at runtime (#4679)

## 0.13

Expand Down
19 changes: 19 additions & 0 deletions cpp/open3d/core/CUDAUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,25 @@ void AssertCUDADeviceAvailable(const Device& device) {
}
}

bool SupportsMemoryPools(const Device& device) {
#ifdef BUILD_CUDA_MODULE
if (device.IsCUDA()) {
int driverVersion = 0;
int deviceSupportsMemoryPools = 0;
OPEN3D_CUDA_CHECK(cudaDriverGetVersion(&driverVersion));
if (driverVersion >=
11020) { // avoid invalid value error in cudaDeviceGetAttribute
OPEN3D_CUDA_CHECK(cudaDeviceGetAttribute(&deviceSupportsMemoryPools, cudaDevAttrMemoryPoolsSupported, device.GetID()));
}
return !!deviceSupportsMemoryPools;
} else {
return false;
}
#else
return false;
#endif
}

#ifdef BUILD_CUDA_MODULE
int GetDevice() {
int device;
Expand Down
7 changes: 7 additions & 0 deletions cpp/open3d/core/CUDAUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,13 @@ void AssertCUDADeviceAvailable(int device_id);
/// \param device The device to be checked.
void AssertCUDADeviceAvailable(const Device& device);

/// Checks if the CUDA device support Memory Pools
/// used by the Stream Ordered Memory Allocator,
/// see
/// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html
/// \param device The device to be checked.
bool SupportsMemoryPools(const Device& device);

#ifdef BUILD_CUDA_MODULE

int GetDevice();
Expand Down
22 changes: 11 additions & 11 deletions cpp/open3d/core/MemoryManagerCUDA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ void* MemoryManagerCUDA::Malloc(size_t byte_size, const Device& device) {

void* ptr;
if (device.IsCUDA()) {
#if CUDART_VERSION >= 11020
OPEN3D_CUDA_CHECK(cudaMallocAsync(static_cast<void**>(&ptr), byte_size,
cuda::GetStream()));
#else
OPEN3D_CUDA_CHECK(cudaMalloc(static_cast<void**>(&ptr), byte_size));
#endif
if (cuda::SupportsMemoryPools(device)) {
OPEN3D_CUDA_CHECK(cudaMallocAsync(static_cast<void**>(&ptr),
byte_size, cuda::GetStream()));
} else {
OPEN3D_CUDA_CHECK(cudaMalloc(static_cast<void**>(&ptr), byte_size));
}
} else {
utility::LogError("Internal error: Unimplemented device {}.",
device.ToString());
Expand All @@ -37,11 +37,11 @@ void MemoryManagerCUDA::Free(void* ptr, const Device& device) {

if (device.IsCUDA()) {
if (ptr && IsCUDAPointer(ptr, device)) {
#if CUDART_VERSION >= 11020
OPEN3D_CUDA_CHECK(cudaFreeAsync(ptr, cuda::GetStream()));
#else
OPEN3D_CUDA_CHECK(cudaFree(ptr));
#endif
if (cuda::SupportsMemoryPools(device)) {
OPEN3D_CUDA_CHECK(cudaFreeAsync(ptr, cuda::GetStream()));
} else {
OPEN3D_CUDA_CHECK(cudaFree(ptr));
}
}
} else {
utility::LogError("Internal error: Unimplemented device {}.",
Expand Down

0 comments on commit 95447f4

Please sign in to comment.