|
#include <thrust/transform.h> |
|
#include <thrust/device_vector.h> |
|
#include <thrust/host_vector.h> |
|
#include <thrust/functional.h> |
|
#include <iostream> |
|
#include <iterator> |
|
#include <algorithm> |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct saxpy_functor : public thrust::binary_function<float,float,float> |
|
{ |
|
const float a; |
|
|
|
saxpy_functor(float _a) : a(_a) {} |
|
|
|
__host__ __device__ |
|
float operator()(const float& x, const float& y) const { |
|
return a * x + y; |
|
} |
|
}; |
|
|
|
void saxpy_fast(float A, thrust::device_vector<float>& X, thrust::device_vector<float>& Y) |
|
{ |
|
|
|
thrust::transform(X.begin(), X.end(), Y.begin(), Y.begin(), saxpy_functor(A)); |
|
} |
|
|
|
void saxpy_slow(float A, thrust::device_vector<float>& X, thrust::device_vector<float>& Y) |
|
{ |
|
thrust::device_vector<float> temp(X.size()); |
|
|
|
|
|
thrust::fill(temp.begin(), temp.end(), A); |
|
|
|
|
|
thrust::transform(X.begin(), X.end(), temp.begin(), temp.begin(), thrust::multiplies<float>()); |
|
|
|
|
|
thrust::transform(temp.begin(), temp.end(), Y.begin(), Y.begin(), thrust::plus<float>()); |
|
} |
|
|
|
int main(void) |
|
{ |
|
|
|
float x[4] = {1.0, 1.0, 1.0, 1.0}; |
|
float y[4] = {1.0, 2.0, 3.0, 4.0}; |
|
|
|
{ |
|
|
|
thrust::device_vector<float> X(x, x + 4); |
|
thrust::device_vector<float> Y(y, y + 4); |
|
|
|
|
|
saxpy_slow(2.0, X, Y); |
|
} |
|
|
|
{ |
|
|
|
thrust::device_vector<float> X(x, x + 4); |
|
thrust::device_vector<float> Y(y, y + 4); |
|
|
|
|
|
saxpy_fast(2.0, X, Y); |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
|