| #pragma once |
|
|
| #include <ATen/ATen.h> |
|
|
| |
| |
| |
|
|
| #ifdef __CUDACC__ |
| |
|
|
| #define HOST_DEVICE __host__ __device__ |
| #define INLINE_HOST_DEVICE __host__ __device__ inline |
| #define FLOOR(x) floor(x) |
|
|
| #if __CUDA_ARCH__ >= 600 |
| |
| #define ACCUM(x,y) atomicAdd_block(&(x),(y)) |
| #else |
| |
| |
| template<typename data_t> |
| __device__ inline data_t atomic_add(data_t *address, data_t val) { |
| return atomicAdd(address, val); |
| } |
|
|
| template<> |
| __device__ inline double atomic_add(double *address, double val) { |
| unsigned long long int* address_as_ull = (unsigned long long int*)address; |
| unsigned long long int old = *address_as_ull, assumed; |
| do { |
| assumed = old; |
| old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); |
| } while (assumed != old); |
| return __longlong_as_double(old); |
| } |
|
|
| #define ACCUM(x,y) atomic_add(&(x),(y)) |
| #endif |
|
|
| #else |
| |
|
|
| #define HOST_DEVICE |
| #define INLINE_HOST_DEVICE inline |
| #define FLOOR(x) std::floor(x) |
| #define ACCUM(x,y) (x) += (y) |
|
|
| #endif |