|
|
|
#include <assert.h> |
|
#include <stdbool.h> |
|
#include <stdint.h> |
|
#include <stdlib.h> |
|
#include <string.h> |
|
|
|
|
|
#include "threadpool-common.h" |
|
|
|
|
|
#ifndef WIN32_LEAN_AND_MEAN |
|
#define WIN32_LEAN_AND_MEAN |
|
#endif |
|
#include <windows.h> |
|
|
|
|
|
#include <pthreadpool.h> |
|
|
|
|
|
#include "threadpool-atomics.h" |
|
#include "threadpool-object.h" |
|
#include "threadpool-utils.h" |
|
|
|
|
|
static void checkin_worker_thread(struct pthreadpool* threadpool, uint32_t event_index) { |
|
if (pthreadpool_decrement_fetch_acquire_release_size_t(&threadpool->active_threads) == 0) { |
|
SetEvent(threadpool->completion_event[event_index]); |
|
} |
|
} |
|
|
|
static void wait_worker_threads(struct pthreadpool* threadpool, uint32_t event_index) { |
|
|
|
size_t active_threads = pthreadpool_load_acquire_size_t(&threadpool->active_threads); |
|
if (active_threads == 0) { |
|
return; |
|
} |
|
|
|
|
|
for (uint32_t i = PTHREADPOOL_SPIN_WAIT_ITERATIONS; i != 0; i--) { |
|
pthreadpool_yield(); |
|
|
|
active_threads = pthreadpool_load_acquire_size_t(&threadpool->active_threads); |
|
if (active_threads == 0) { |
|
return; |
|
} |
|
} |
|
|
|
|
|
const DWORD wait_status = WaitForSingleObject(threadpool->completion_event[event_index], INFINITE); |
|
assert(wait_status == WAIT_OBJECT_0); |
|
assert(pthreadpool_load_relaxed_size_t(&threadpool->active_threads) == 0); |
|
} |
|
|
|
static uint32_t wait_for_new_command( |
|
struct pthreadpool* threadpool, |
|
uint32_t last_command, |
|
uint32_t last_flags) |
|
{ |
|
uint32_t command = pthreadpool_load_acquire_uint32_t(&threadpool->command); |
|
if (command != last_command) { |
|
return command; |
|
} |
|
|
|
if ((last_flags & PTHREADPOOL_FLAG_YIELD_WORKERS) == 0) { |
|
|
|
for (uint32_t i = PTHREADPOOL_SPIN_WAIT_ITERATIONS; i != 0; i--) { |
|
pthreadpool_yield(); |
|
|
|
command = pthreadpool_load_acquire_uint32_t(&threadpool->command); |
|
if (command != last_command) { |
|
return command; |
|
} |
|
} |
|
} |
|
|
|
|
|
const uint32_t event_index = (last_command >> 31); |
|
const DWORD wait_status = WaitForSingleObject(threadpool->command_event[event_index], INFINITE); |
|
assert(wait_status == WAIT_OBJECT_0); |
|
|
|
command = pthreadpool_load_relaxed_uint32_t(&threadpool->command); |
|
assert(command != last_command); |
|
return command; |
|
} |
|
|
|
static DWORD WINAPI thread_main(LPVOID arg) { |
|
struct thread_info* thread = (struct thread_info*) arg; |
|
struct pthreadpool* threadpool = thread->threadpool; |
|
uint32_t last_command = threadpool_command_init; |
|
struct fpu_state saved_fpu_state = { 0 }; |
|
uint32_t flags = 0; |
|
|
|
|
|
checkin_worker_thread(threadpool, 0); |
|
|
|
|
|
for (;;) { |
|
uint32_t command = wait_for_new_command(threadpool, last_command, flags); |
|
pthreadpool_fence_acquire(); |
|
|
|
flags = pthreadpool_load_relaxed_uint32_t(&threadpool->flags); |
|
|
|
|
|
switch (command & THREADPOOL_COMMAND_MASK) { |
|
case threadpool_command_parallelize: |
|
{ |
|
const thread_function_t thread_function = |
|
(thread_function_t) pthreadpool_load_relaxed_void_p(&threadpool->thread_function); |
|
if (flags & PTHREADPOOL_FLAG_DISABLE_DENORMALS) { |
|
saved_fpu_state = get_fpu_state(); |
|
disable_fpu_denormals(); |
|
} |
|
|
|
thread_function(threadpool, thread); |
|
if (flags & PTHREADPOOL_FLAG_DISABLE_DENORMALS) { |
|
set_fpu_state(saved_fpu_state); |
|
} |
|
break; |
|
} |
|
case threadpool_command_shutdown: |
|
|
|
return 0; |
|
case threadpool_command_init: |
|
|
|
break; |
|
} |
|
|
|
const uint32_t event_index = command >> 31; |
|
checkin_worker_thread(threadpool, event_index); |
|
|
|
last_command = command; |
|
}; |
|
return 0; |
|
} |
|
|
|
struct pthreadpool* pthreadpool_create(size_t threads_count) { |
|
if (threads_count == 0) { |
|
SYSTEM_INFO system_info; |
|
ZeroMemory(&system_info, sizeof(system_info)); |
|
GetSystemInfo(&system_info); |
|
threads_count = (size_t) system_info.dwNumberOfProcessors; |
|
} |
|
|
|
struct pthreadpool* threadpool = pthreadpool_allocate(threads_count); |
|
if (threadpool == NULL) { |
|
return NULL; |
|
} |
|
threadpool->threads_count = fxdiv_init_size_t(threads_count); |
|
for (size_t tid = 0; tid < threads_count; tid++) { |
|
threadpool->threads[tid].thread_number = tid; |
|
threadpool->threads[tid].threadpool = threadpool; |
|
} |
|
|
|
|
|
if (threads_count > 1) { |
|
threadpool->execution_mutex = CreateMutexW( |
|
NULL , |
|
FALSE , |
|
NULL ); |
|
for (size_t i = 0; i < 2; i++) { |
|
threadpool->completion_event[i] = CreateEventW( |
|
NULL , |
|
TRUE , |
|
FALSE , |
|
NULL ); |
|
threadpool->command_event[i] = CreateEventW( |
|
NULL , |
|
TRUE , |
|
FALSE , |
|
NULL ); |
|
} |
|
|
|
pthreadpool_store_relaxed_size_t(&threadpool->active_threads, threads_count - 1 ); |
|
|
|
|
|
for (size_t tid = 1; tid < threads_count; tid++) { |
|
threadpool->threads[tid].thread_handle = CreateThread( |
|
NULL , |
|
0 , |
|
&thread_main, |
|
&threadpool->threads[tid], |
|
0 , |
|
NULL ); |
|
} |
|
|
|
|
|
wait_worker_threads(threadpool, 0); |
|
} |
|
return threadpool; |
|
} |
|
|
|
PTHREADPOOL_INTERNAL void pthreadpool_parallelize( |
|
struct pthreadpool* threadpool, |
|
thread_function_t thread_function, |
|
const void* params, |
|
size_t params_size, |
|
void* task, |
|
void* context, |
|
size_t linear_range, |
|
uint32_t flags) |
|
{ |
|
assert(threadpool != NULL); |
|
assert(thread_function != NULL); |
|
assert(task != NULL); |
|
assert(linear_range > 1); |
|
|
|
|
|
const DWORD wait_status = WaitForSingleObject(threadpool->execution_mutex, INFINITE); |
|
assert(wait_status == WAIT_OBJECT_0); |
|
|
|
|
|
pthreadpool_store_relaxed_void_p(&threadpool->thread_function, (void*) thread_function); |
|
pthreadpool_store_relaxed_void_p(&threadpool->task, task); |
|
pthreadpool_store_relaxed_void_p(&threadpool->argument, context); |
|
pthreadpool_store_relaxed_uint32_t(&threadpool->flags, flags); |
|
|
|
const struct fxdiv_divisor_size_t threads_count = threadpool->threads_count; |
|
pthreadpool_store_relaxed_size_t(&threadpool->active_threads, threads_count.value - 1 ); |
|
|
|
if (params_size != 0) { |
|
CopyMemory(&threadpool->params, params, params_size); |
|
pthreadpool_fence_release(); |
|
} |
|
|
|
|
|
const struct fxdiv_result_size_t range_params = fxdiv_divide_size_t(linear_range, threads_count); |
|
size_t range_start = 0; |
|
for (size_t tid = 0; tid < threads_count.value; tid++) { |
|
struct thread_info* thread = &threadpool->threads[tid]; |
|
const size_t range_length = range_params.quotient + (size_t) (tid < range_params.remainder); |
|
const size_t range_end = range_start + range_length; |
|
pthreadpool_store_relaxed_size_t(&thread->range_start, range_start); |
|
pthreadpool_store_relaxed_size_t(&thread->range_end, range_end); |
|
pthreadpool_store_relaxed_size_t(&thread->range_length, range_length); |
|
|
|
|
|
range_start = range_end; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const uint32_t old_command = pthreadpool_load_relaxed_uint32_t(&threadpool->command); |
|
const uint32_t new_command = ~(old_command | THREADPOOL_COMMAND_MASK) | threadpool_command_parallelize; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const uint32_t event_index = (old_command >> 31); |
|
BOOL reset_event_status = ResetEvent(threadpool->command_event[event_index ^ 1]); |
|
assert(reset_event_status != FALSE); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pthreadpool_store_release_uint32_t(&threadpool->command, new_command); |
|
|
|
|
|
|
|
|
|
|
|
|
|
const BOOL set_event_status = SetEvent(threadpool->command_event[event_index]); |
|
assert(set_event_status != FALSE); |
|
|
|
|
|
struct fpu_state saved_fpu_state = { 0 }; |
|
if (flags & PTHREADPOOL_FLAG_DISABLE_DENORMALS) { |
|
saved_fpu_state = get_fpu_state(); |
|
disable_fpu_denormals(); |
|
} |
|
|
|
|
|
thread_function(threadpool, &threadpool->threads[0]); |
|
|
|
|
|
if (flags & PTHREADPOOL_FLAG_DISABLE_DENORMALS) { |
|
set_fpu_state(saved_fpu_state); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
wait_worker_threads(threadpool, event_index ^ 1); |
|
|
|
|
|
|
|
|
|
|
|
reset_event_status = ResetEvent(threadpool->completion_event[event_index]); |
|
assert(reset_event_status != FALSE); |
|
|
|
|
|
pthreadpool_fence_acquire(); |
|
|
|
|
|
const BOOL release_mutex_status = ReleaseMutex(threadpool->execution_mutex); |
|
assert(release_mutex_status != FALSE); |
|
} |
|
|
|
void pthreadpool_destroy(struct pthreadpool* threadpool) { |
|
if (threadpool != NULL) { |
|
const size_t threads_count = threadpool->threads_count.value; |
|
if (threads_count > 1) { |
|
pthreadpool_store_relaxed_size_t(&threadpool->active_threads, threads_count - 1 ); |
|
|
|
|
|
|
|
|
|
|
|
const uint32_t old_command = pthreadpool_load_relaxed_uint32_t(&threadpool->command); |
|
pthreadpool_store_release_uint32_t(&threadpool->command, threadpool_command_shutdown); |
|
|
|
|
|
|
|
|
|
|
|
|
|
const uint32_t event_index = (old_command >> 31); |
|
const BOOL set_event_status = SetEvent(threadpool->command_event[event_index]); |
|
assert(set_event_status != FALSE); |
|
|
|
|
|
for (size_t tid = 1; tid < threads_count; tid++) { |
|
const HANDLE thread_handle = threadpool->threads[tid].thread_handle; |
|
if (thread_handle != NULL) { |
|
const DWORD wait_status = WaitForSingleObject(thread_handle, INFINITE); |
|
assert(wait_status == WAIT_OBJECT_0); |
|
|
|
const BOOL close_status = CloseHandle(thread_handle); |
|
assert(close_status != FALSE); |
|
} |
|
} |
|
|
|
|
|
if (threadpool->execution_mutex != NULL) { |
|
const BOOL close_status = CloseHandle(threadpool->execution_mutex); |
|
assert(close_status != FALSE); |
|
} |
|
for (size_t i = 0; i < 2; i++) { |
|
if (threadpool->command_event[i] != NULL) { |
|
const BOOL close_status = CloseHandle(threadpool->command_event[i]); |
|
assert(close_status != FALSE); |
|
} |
|
if (threadpool->completion_event[i] != NULL) { |
|
const BOOL close_status = CloseHandle(threadpool->completion_event[i]); |
|
assert(close_status != FALSE); |
|
} |
|
} |
|
} |
|
pthreadpool_deallocate(threadpool); |
|
} |
|
} |
|
|