File size: 2,698 Bytes
0dc1b04 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
/*
* Copyright 2021 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <cub/util_namespace.cuh>
CUB_NAMESPACE_BEGIN
namespace detail
{
/**
* @brief It's a double-buffer storage wrapper for multi-pass stream
* transformations that require more than one storage array for
* streaming intermediate results back and forth.
*
* Many multi-pass computations require a pair of "ping-pong" storage buffers
* (e.g., one for reading from and the other for writing to, and then
* vice-versa for the subsequent pass). This structure wraps a set of device
* buffers.
*
* Unlike `cub::DoubleBuffer` this class doesn't provide a "selector" member
* to track which buffer is "current". The main reason for this class existence
* is the performance difference. Since `cub::DoubleBuffer` relies on the
* runtime variable to index pointers arrays, they are placed in the local
* memory instead of registers. Local memory accesses significantly affect
* performance. On the contrary, this class swaps pointer, so all operations
* can be performed in registers.
*/
template <typename T>
class device_double_buffer
{
/// Pair of device buffer pointers
T *m_current_buffer {};
T *m_alternate_buffer {};
public:
/**
* @param d_current
* The currently valid buffer
*
* @param d_alternate
* Alternate storage buffer of the same size as @p d_current
*/
__host__ __device__ __forceinline__ device_double_buffer(T *current,
T *alternate)
: m_current_buffer(current)
, m_alternate_buffer(alternate)
{}
/// \brief Return pointer to the currently valid buffer
__host__ __device__ __forceinline__ T *current() const
{
return m_current_buffer;
}
/// \brief Return pointer to the currently invalid buffer
__host__ __device__ __forceinline__ T *alternate() const
{
return m_alternate_buffer;
}
__host__ __device__ void swap()
{
T *tmp = m_current_buffer;
m_current_buffer = m_alternate_buffer;
m_alternate_buffer = tmp;
}
};
} // namespace detail
CUB_NAMESPACE_END
|