slaren commited on
Commit
a817d85
·
unverified ·
1 Parent(s): 4e31c82

cuda : ignore peer access already enabled errors (llama/5597)

Browse files

* cuda : ignore peer access already enabled errors

* fix hip

Files changed (1) hide show
  1. ggml-cuda.cu +15 -7
ggml-cuda.cu CHANGED
@@ -54,6 +54,8 @@
54
  #define cudaDeviceProp hipDeviceProp_t
55
  #define cudaDeviceSynchronize hipDeviceSynchronize
56
  #define cudaError_t hipError_t
 
 
57
  #define cudaEventCreateWithFlags hipEventCreateWithFlags
58
  #define cudaEventDisableTiming hipEventDisableTiming
59
  #define cudaEventRecord hipEventRecord
@@ -9325,9 +9327,15 @@ static void ggml_cuda_set_peer_access(const int n_tokens) {
9325
  CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access_peer, id, id_other));
9326
  if (can_access_peer) {
9327
  if (enable_peer_access) {
9328
- CUDA_CHECK(cudaDeviceEnablePeerAccess(id_other, 0));
 
 
 
9329
  } else {
9330
- CUDA_CHECK(cudaDeviceDisablePeerAccess(id_other));
 
 
 
9331
  }
9332
  }
9333
  }
@@ -10999,10 +11007,10 @@ GGML_CALL static const char * ggml_backend_cuda_split_buffer_get_name(ggml_backe
10999
  UNUSED(buffer);
11000
  }
11001
 
11002
- // unused at the moment
11003
- //static bool ggml_backend_buffer_is_cuda_split(ggml_backend_buffer_t buffer) {
11004
- // return buffer->iface.get_name == ggml_backend_cuda_split_buffer_get_name;
11005
- //}
11006
 
11007
  GGML_CALL static void ggml_backend_cuda_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
11008
  ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context;
@@ -11390,7 +11398,7 @@ GGML_CALL static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, gg
11390
  for (int j = 0; j < GGML_MAX_SRC; j++) {
11391
  if (node->src[j] != nullptr) {
11392
  assert(node->src[j]->backend == GGML_BACKEND_GPU || node->src[j]->backend == GGML_BACKEND_GPU_SPLIT);
11393
- assert(node->src[j]->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device));
11394
  assert(node->src[j]->extra != nullptr);
11395
  }
11396
  }
 
54
  #define cudaDeviceProp hipDeviceProp_t
55
  #define cudaDeviceSynchronize hipDeviceSynchronize
56
  #define cudaError_t hipError_t
57
+ #define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
58
+ #define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled
59
  #define cudaEventCreateWithFlags hipEventCreateWithFlags
60
  #define cudaEventDisableTiming hipEventDisableTiming
61
  #define cudaEventRecord hipEventRecord
 
9327
  CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access_peer, id, id_other));
9328
  if (can_access_peer) {
9329
  if (enable_peer_access) {
9330
+ cudaError_t err = cudaDeviceEnablePeerAccess(id_other, 0);
9331
+ if (err != cudaErrorPeerAccessAlreadyEnabled) {
9332
+ CUDA_CHECK(err);
9333
+ }
9334
  } else {
9335
+ cudaError_t err = cudaDeviceDisablePeerAccess(id_other);
9336
+ if (err != cudaErrorPeerAccessNotEnabled) {
9337
+ CUDA_CHECK(err);
9338
+ }
9339
  }
9340
  }
9341
  }
 
11007
  UNUSED(buffer);
11008
  }
11009
 
11010
+ static bool ggml_backend_buffer_is_cuda_split(ggml_backend_buffer_t buffer) {
11011
+ return buffer->iface.get_name == ggml_backend_cuda_split_buffer_get_name;
11012
+ UNUSED(ggml_backend_buffer_is_cuda_split); // only used in debug builds currently, avoid unused function warning in release builds
11013
+ }
11014
 
11015
  GGML_CALL static void ggml_backend_cuda_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
11016
  ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context;
 
11398
  for (int j = 0; j < GGML_MAX_SRC; j++) {
11399
  if (node->src[j] != nullptr) {
11400
  assert(node->src[j]->backend == GGML_BACKEND_GPU || node->src[j]->backend == GGML_BACKEND_GPU_SPLIT);
11401
+ assert(node->src[j]->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) || ggml_backend_buffer_is_cuda_split(node->src[j]->buffer));
11402
  assert(node->src[j]->extra != nullptr);
11403
  }
11404
  }