issixx issi commited on
Commit
8e57313
·
1 Parent(s): 600a548

ggml-cpu : fix ggml_graph_compute_thread did not terminate on abort. (ggml/1065)

Browse files

some threads kept looping and failed to terminate properly after an abort during CPU execution.

Co-authored-by: issi <issi@gmail.com>

Files changed (1) hide show
  1. ggml/src/ggml-cpu/ggml-cpu.c +5 -5
ggml/src/ggml-cpu/ggml-cpu.c CHANGED
@@ -1302,7 +1302,7 @@ struct ggml_threadpool {
1302
  // these are atomic as an annotation for thread-sanitizer
1303
  atomic_bool stop; // Used for stopping the threadpool altogether
1304
  atomic_bool pause; // Used for pausing the threadpool or individual threads
1305
- atomic_bool abort; // Used for aborting processing of a graph
1306
 
1307
  struct ggml_compute_state * workers; // per thread state
1308
  int n_threads_max; // number of threads in the pool
@@ -13778,14 +13778,14 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
13778
  /*.threadpool=*/ tp,
13779
  };
13780
 
13781
- for (int node_n = 0; node_n < cgraph->n_nodes && !tp->abort; node_n++) {
13782
  struct ggml_tensor * node = cgraph->nodes[node_n];
13783
 
13784
  ggml_compute_forward(&params, node);
13785
 
13786
  if (state->ith == 0 && cplan->abort_callback &&
13787
  cplan->abort_callback(cplan->abort_callback_data)) {
13788
- tp->abort = true;
13789
  tp->ec = GGML_STATUS_ABORTED;
13790
  }
13791
 
@@ -13958,7 +13958,7 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
13958
  threadpool->current_chunk = 0;
13959
  threadpool->stop = false;
13960
  threadpool->pause = tpp->paused;
13961
- threadpool->abort = false;
13962
  threadpool->workers = NULL;
13963
  threadpool->n_threads_max = tpp->n_threads;
13964
  threadpool->n_threads_cur = tpp->n_threads;
@@ -14037,7 +14037,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
14037
  threadpool->cgraph = cgraph;
14038
  threadpool->cplan = cplan;
14039
  threadpool->current_chunk = 0;
14040
- threadpool->abort = false;
14041
  threadpool->ec = GGML_STATUS_SUCCESS;
14042
  }
14043
 
 
1302
  // these are atomic as an annotation for thread-sanitizer
1303
  atomic_bool stop; // Used for stopping the threadpool altogether
1304
  atomic_bool pause; // Used for pausing the threadpool or individual threads
1305
+ atomic_int abort; // Used for aborting processing of a graph
1306
 
1307
  struct ggml_compute_state * workers; // per thread state
1308
  int n_threads_max; // number of threads in the pool
 
13778
  /*.threadpool=*/ tp,
13779
  };
13780
 
13781
+ for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) {
13782
  struct ggml_tensor * node = cgraph->nodes[node_n];
13783
 
13784
  ggml_compute_forward(&params, node);
13785
 
13786
  if (state->ith == 0 && cplan->abort_callback &&
13787
  cplan->abort_callback(cplan->abort_callback_data)) {
13788
+ atomic_store_explicit(&tp->abort, node_n + 1, memory_order_relaxed);
13789
  tp->ec = GGML_STATUS_ABORTED;
13790
  }
13791
 
 
13958
  threadpool->current_chunk = 0;
13959
  threadpool->stop = false;
13960
  threadpool->pause = tpp->paused;
13961
+ threadpool->abort = -1;
13962
  threadpool->workers = NULL;
13963
  threadpool->n_threads_max = tpp->n_threads;
13964
  threadpool->n_threads_cur = tpp->n_threads;
 
14037
  threadpool->cgraph = cgraph;
14038
  threadpool->cplan = cplan;
14039
  threadpool->current_chunk = 0;
14040
+ threadpool->abort = -1;
14041
  threadpool->ec = GGML_STATUS_SUCCESS;
14042
  }
14043