Spaces:
Running
Running
issixx
issi
commited on
Commit
·
8e57313
1
Parent(s):
600a548
ggml-cpu : fix ggml_graph_compute_thread did not terminate on abort. (ggml/1065)
Browse filessome threads kept looping and failed to terminate properly after an abort during CPU execution.
Co-authored-by: issi <issi@gmail.com>
ggml/src/ggml-cpu/ggml-cpu.c
CHANGED
|
@@ -1302,7 +1302,7 @@ struct ggml_threadpool {
|
|
| 1302 |
// these are atomic as an annotation for thread-sanitizer
|
| 1303 |
atomic_bool stop; // Used for stopping the threadpool altogether
|
| 1304 |
atomic_bool pause; // Used for pausing the threadpool or individual threads
|
| 1305 |
-
|
| 1306 |
|
| 1307 |
struct ggml_compute_state * workers; // per thread state
|
| 1308 |
int n_threads_max; // number of threads in the pool
|
|
@@ -13778,14 +13778,14 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
| 13778 |
/*.threadpool=*/ tp,
|
| 13779 |
};
|
| 13780 |
|
| 13781 |
-
for (int node_n = 0; node_n < cgraph->n_nodes &&
|
| 13782 |
struct ggml_tensor * node = cgraph->nodes[node_n];
|
| 13783 |
|
| 13784 |
ggml_compute_forward(¶ms, node);
|
| 13785 |
|
| 13786 |
if (state->ith == 0 && cplan->abort_callback &&
|
| 13787 |
cplan->abort_callback(cplan->abort_callback_data)) {
|
| 13788 |
-
tp->abort
|
| 13789 |
tp->ec = GGML_STATUS_ABORTED;
|
| 13790 |
}
|
| 13791 |
|
|
@@ -13958,7 +13958,7 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
|
|
| 13958 |
threadpool->current_chunk = 0;
|
| 13959 |
threadpool->stop = false;
|
| 13960 |
threadpool->pause = tpp->paused;
|
| 13961 |
-
threadpool->abort =
|
| 13962 |
threadpool->workers = NULL;
|
| 13963 |
threadpool->n_threads_max = tpp->n_threads;
|
| 13964 |
threadpool->n_threads_cur = tpp->n_threads;
|
|
@@ -14037,7 +14037,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
|
| 14037 |
threadpool->cgraph = cgraph;
|
| 14038 |
threadpool->cplan = cplan;
|
| 14039 |
threadpool->current_chunk = 0;
|
| 14040 |
-
threadpool->abort =
|
| 14041 |
threadpool->ec = GGML_STATUS_SUCCESS;
|
| 14042 |
}
|
| 14043 |
|
|
|
|
| 1302 |
// these are atomic as an annotation for thread-sanitizer
|
| 1303 |
atomic_bool stop; // Used for stopping the threadpool altogether
|
| 1304 |
atomic_bool pause; // Used for pausing the threadpool or individual threads
|
| 1305 |
+
atomic_int abort; // Used for aborting processing of a graph
|
| 1306 |
|
| 1307 |
struct ggml_compute_state * workers; // per thread state
|
| 1308 |
int n_threads_max; // number of threads in the pool
|
|
|
|
| 13778 |
/*.threadpool=*/ tp,
|
| 13779 |
};
|
| 13780 |
|
| 13781 |
+
for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) {
|
| 13782 |
struct ggml_tensor * node = cgraph->nodes[node_n];
|
| 13783 |
|
| 13784 |
ggml_compute_forward(¶ms, node);
|
| 13785 |
|
| 13786 |
if (state->ith == 0 && cplan->abort_callback &&
|
| 13787 |
cplan->abort_callback(cplan->abort_callback_data)) {
|
| 13788 |
+
atomic_store_explicit(&tp->abort, node_n + 1, memory_order_relaxed);
|
| 13789 |
tp->ec = GGML_STATUS_ABORTED;
|
| 13790 |
}
|
| 13791 |
|
|
|
|
| 13958 |
threadpool->current_chunk = 0;
|
| 13959 |
threadpool->stop = false;
|
| 13960 |
threadpool->pause = tpp->paused;
|
| 13961 |
+
threadpool->abort = -1;
|
| 13962 |
threadpool->workers = NULL;
|
| 13963 |
threadpool->n_threads_max = tpp->n_threads;
|
| 13964 |
threadpool->n_threads_cur = tpp->n_threads;
|
|
|
|
| 14037 |
threadpool->cgraph = cgraph;
|
| 14038 |
threadpool->cplan = cplan;
|
| 14039 |
threadpool->current_chunk = 0;
|
| 14040 |
+
threadpool->abort = -1;
|
| 14041 |
threadpool->ec = GGML_STATUS_SUCCESS;
|
| 14042 |
}
|
| 14043 |
|