Spaces:
Running
Running
slaren commited on
cuda : ignore peer access already enabled errors (llama/5597)
Browse files* cuda : ignore peer access already enabled errors
* fix hip
- ggml-cuda.cu +15 -7
ggml-cuda.cu
CHANGED
|
@@ -54,6 +54,8 @@
|
|
| 54 |
#define cudaDeviceProp hipDeviceProp_t
|
| 55 |
#define cudaDeviceSynchronize hipDeviceSynchronize
|
| 56 |
#define cudaError_t hipError_t
|
|
|
|
|
|
|
| 57 |
#define cudaEventCreateWithFlags hipEventCreateWithFlags
|
| 58 |
#define cudaEventDisableTiming hipEventDisableTiming
|
| 59 |
#define cudaEventRecord hipEventRecord
|
|
@@ -9325,9 +9327,15 @@ static void ggml_cuda_set_peer_access(const int n_tokens) {
|
|
| 9325 |
CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access_peer, id, id_other));
|
| 9326 |
if (can_access_peer) {
|
| 9327 |
if (enable_peer_access) {
|
| 9328 |
-
|
|
|
|
|
|
|
|
|
|
| 9329 |
} else {
|
| 9330 |
-
|
|
|
|
|
|
|
|
|
|
| 9331 |
}
|
| 9332 |
}
|
| 9333 |
}
|
|
@@ -10999,10 +11007,10 @@ GGML_CALL static const char * ggml_backend_cuda_split_buffer_get_name(ggml_backe
|
|
| 10999 |
UNUSED(buffer);
|
| 11000 |
}
|
| 11001 |
|
| 11002 |
-
|
| 11003 |
-
|
| 11004 |
-
//
|
| 11005 |
-
|
| 11006 |
|
| 11007 |
GGML_CALL static void ggml_backend_cuda_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
| 11008 |
ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context;
|
|
@@ -11390,7 +11398,7 @@ GGML_CALL static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, gg
|
|
| 11390 |
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
| 11391 |
if (node->src[j] != nullptr) {
|
| 11392 |
assert(node->src[j]->backend == GGML_BACKEND_GPU || node->src[j]->backend == GGML_BACKEND_GPU_SPLIT);
|
| 11393 |
-
assert(node->src[j]->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device));
|
| 11394 |
assert(node->src[j]->extra != nullptr);
|
| 11395 |
}
|
| 11396 |
}
|
|
|
|
| 54 |
#define cudaDeviceProp hipDeviceProp_t
|
| 55 |
#define cudaDeviceSynchronize hipDeviceSynchronize
|
| 56 |
#define cudaError_t hipError_t
|
| 57 |
+
#define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
|
| 58 |
+
#define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled
|
| 59 |
#define cudaEventCreateWithFlags hipEventCreateWithFlags
|
| 60 |
#define cudaEventDisableTiming hipEventDisableTiming
|
| 61 |
#define cudaEventRecord hipEventRecord
|
|
|
|
| 9327 |
CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access_peer, id, id_other));
|
| 9328 |
if (can_access_peer) {
|
| 9329 |
if (enable_peer_access) {
|
| 9330 |
+
cudaError_t err = cudaDeviceEnablePeerAccess(id_other, 0);
|
| 9331 |
+
if (err != cudaErrorPeerAccessAlreadyEnabled) {
|
| 9332 |
+
CUDA_CHECK(err);
|
| 9333 |
+
}
|
| 9334 |
} else {
|
| 9335 |
+
cudaError_t err = cudaDeviceDisablePeerAccess(id_other);
|
| 9336 |
+
if (err != cudaErrorPeerAccessNotEnabled) {
|
| 9337 |
+
CUDA_CHECK(err);
|
| 9338 |
+
}
|
| 9339 |
}
|
| 9340 |
}
|
| 9341 |
}
|
|
|
|
| 11007 |
UNUSED(buffer);
|
| 11008 |
}
|
| 11009 |
|
| 11010 |
+
static bool ggml_backend_buffer_is_cuda_split(ggml_backend_buffer_t buffer) {
|
| 11011 |
+
return buffer->iface.get_name == ggml_backend_cuda_split_buffer_get_name;
|
| 11012 |
+
UNUSED(ggml_backend_buffer_is_cuda_split); // only used in debug builds currently, avoid unused function warning in release builds
|
| 11013 |
+
}
|
| 11014 |
|
| 11015 |
GGML_CALL static void ggml_backend_cuda_split_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
| 11016 |
ggml_backend_cuda_split_buffer_context * ctx = (ggml_backend_cuda_split_buffer_context *)buffer->context;
|
|
|
|
| 11398 |
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
| 11399 |
if (node->src[j] != nullptr) {
|
| 11400 |
assert(node->src[j]->backend == GGML_BACKEND_GPU || node->src[j]->backend == GGML_BACKEND_GPU_SPLIT);
|
| 11401 |
+
assert(node->src[j]->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) || ggml_backend_buffer_is_cuda_split(node->src[j]->buffer));
|
| 11402 |
assert(node->src[j]->extra != nullptr);
|
| 11403 |
}
|
| 11404 |
}
|