|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef LYRA_CODEC_SPARSE_MATMUL_COMPUTE_THREAD_BOUNDS_H_ |
|
#define LYRA_CODEC_SPARSE_MATMUL_COMPUTE_THREAD_BOUNDS_H_ |
|
|
|
#include <vector> |
|
|
|
namespace csrblocksparse { |
|
|
|
|
|
|
|
class ThreadBounds { |
|
public: |
|
ThreadBounds() : block_width_(0), block_height_(0) {} |
|
|
|
void PrepareForThreads(int block_width, int block_height, int num_threads, |
|
int reduced_rows_per_cache_row, int reduced_rows, |
|
const int* nnz_per_row); |
|
|
|
|
|
|
|
template <typename WeightType> |
|
const WeightType* OffsetWeights(const WeightType* weights, int tid) const { |
|
return weights + weight_starts_[tid]; |
|
} |
|
template <typename RhsIndType> |
|
const RhsIndType* OffsetRhsIndices(const RhsIndType* rhs_indices, |
|
int tid) const { |
|
return rhs_indices + rhs_indices_starts_[tid]; |
|
} |
|
template <typename BiasType> |
|
const BiasType* OffsetBias(const BiasType* bias, int tid) const { |
|
return bias + bias_starts_[tid]; |
|
} |
|
template <typename OutType> |
|
OutType* OffsetOutput(OutType* output, int tid) const { |
|
return output + block_height_ * row_starts_[tid]; |
|
} |
|
int StartRow(int tid) const { return row_starts_[tid]; } |
|
const std::vector<int>& row_starts() const { return row_starts_; } |
|
|
|
private: |
|
|
|
void ComputeThreadSplitPoints(int num_threads, int reduced_rows_per_cache_row, |
|
int reduced_rows, const int* nnz_per_row); |
|
|
|
|
|
int block_width_; |
|
int block_height_; |
|
|
|
|
|
std::vector<int> row_starts_; |
|
std::vector<int> weight_starts_; |
|
std::vector<int> rhs_indices_starts_; |
|
std::vector<int> bias_starts_; |
|
}; |
|
|
|
} |
|
|
|
#endif |
|
|