|
#pragma once |
|
|
|
#include <ATen/ATen.h> |
|
|
|
|
|
|
|
|
|
|
|
#ifdef __CUDACC__ |
|
|
|
|
|
#define HOST_DEVICE __host__ __device__ |
|
#define INLINE_HOST_DEVICE __host__ __device__ inline |
|
#define FLOOR(x) floor(x) |
|
|
|
#if __CUDA_ARCH__ >= 600 |
|
|
|
#define ACCUM(x,y) atomicAdd_block(&(x),(y)) |
|
#else |
|
|
|
|
|
template<typename data_t> |
|
__device__ inline data_t atomic_add(data_t *address, data_t val) { |
|
return atomicAdd(address, val); |
|
} |
|
|
|
template<> |
|
__device__ inline double atomic_add(double *address, double val) { |
|
unsigned long long int* address_as_ull = (unsigned long long int*)address; |
|
unsigned long long int old = *address_as_ull, assumed; |
|
do { |
|
assumed = old; |
|
old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); |
|
} while (assumed != old); |
|
return __longlong_as_double(old); |
|
} |
|
|
|
#define ACCUM(x,y) atomic_add(&(x),(y)) |
|
#endif |
|
|
|
#else |
|
|
|
|
|
#define HOST_DEVICE |
|
#define INLINE_HOST_DEVICE inline |
|
#define FLOOR(x) std::floor(x) |
|
#define ACCUM(x,y) (x) += (y) |
|
|
|
#endif |