Spaces:
Running
Running
R0CKSTAR commited on
Commit ·
ff2d3eb
1
Parent(s): e59d9a7
musa: bump MUSA SDK version to rc3.1.1 (llama/11822)
Browse files* musa: Update MUSA SDK version to rc3.1.1
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
* musa: Remove workaround in PR #10042
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
---------
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
ggml/src/ggml-cuda/ggml-cuda.cu
CHANGED
|
@@ -1480,12 +1480,7 @@ static void ggml_cuda_op_mul_mat(
|
|
| 1480 |
const size_t nbytes_data = ggml_nbytes(src0);
|
| 1481 |
const size_t nbytes_padding = ggml_row_size(src0->type, MATRIX_ROW_PADDING - ne00 % MATRIX_ROW_PADDING);
|
| 1482 |
dev[id].src0_dd = dev[id].src0_dd_alloc.alloc(ctx.pool(id), nbytes_data + nbytes_padding);
|
| 1483 |
-
// TODO: remove this for MUSA once the Guilty Lockup issue is resolved
|
| 1484 |
-
#ifndef GGML_USE_MUSA
|
| 1485 |
CUDA_CHECK(cudaMemsetAsync(dev[id].src0_dd, 0, nbytes_data + nbytes_padding, stream));
|
| 1486 |
-
#else // GGML_USE_MUSA
|
| 1487 |
-
CUDA_CHECK(cudaMemsetAsync(dev[id].src0_dd + nbytes_data, 0, nbytes_padding, stream));
|
| 1488 |
-
#endif // !GGML_USE_MUSA
|
| 1489 |
}
|
| 1490 |
|
| 1491 |
// If src0 is on a temporary compute buffer (partial offloading) there may be some padding that needs to be cleared:
|
|
|
|
| 1480 |
const size_t nbytes_data = ggml_nbytes(src0);
|
| 1481 |
const size_t nbytes_padding = ggml_row_size(src0->type, MATRIX_ROW_PADDING - ne00 % MATRIX_ROW_PADDING);
|
| 1482 |
dev[id].src0_dd = dev[id].src0_dd_alloc.alloc(ctx.pool(id), nbytes_data + nbytes_padding);
|
|
|
|
|
|
|
| 1483 |
CUDA_CHECK(cudaMemsetAsync(dev[id].src0_dd, 0, nbytes_data + nbytes_padding, stream));
|
|
|
|
|
|
|
|
|
|
| 1484 |
}
|
| 1485 |
|
| 1486 |
// If src0 is on a temporary compute buffer (partial offloading) there may be some padding that needs to be cleared:
|