// Copyright 2021 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "sparse_matmul/vector/cache_aligned_vector.h" #if defined __aarch64__ #include #endif #include #include #include #include #include #include #include "gmock/gmock.h" #include "gtest/gtest.h" #include "sparse_matmul/numerics/test_utils.h" #include "sparse_matmul/os/coop_threads.h" namespace csrblocksparse { const float kExpRelTolerance = .03f; // 3% relative #ifdef SIGMOID_AS_TANH const float kSigmoidRelTolerance = .09f; // 9.0% relative const float kSigmoidAbsTolerance = .003f; #else const float kSigmoidRelTolerance = .031f; // 3.1% relative const float kSigmoidAbsTolerance = .006f; #endif const float kTanhRelTolerance = .014f; // 1.4% relative const float kTanhAbsTolerance = .00525f; TEST(Transcendentals, CacheAlignedVectorExp) { const int kTestSize = 1 << 16; CacheAlignedVector values(kTestSize); values.FillRandom(); CacheAlignedVector values_ref = values; values.Exp(); for (int i = 0; i < kTestSize; ++i) { float exact_val = std::exp(values_ref[i]); float rel_diff = RelDiff(exact_val, values[i]); EXPECT_LT(rel_diff, kExpRelTolerance) << exact_val << " " << values[i] << " " << values_ref[i]; } } TEST(Transcendentals, CacheAlignedVectorSigmoid) { const int kTestSize = 1 << 16; CacheAlignedVector values(kTestSize); values.FillRandom(); CacheAlignedVector values_ref = values; values.Sigmoid(); for (int i = 0; i < kTestSize; ++i) { float exact_val = 1. / (1. + std::exp(-values_ref[i])); float rel_diff = RelDiff(exact_val, values[i]); EXPECT_LT(rel_diff, kSigmoidRelTolerance) << exact_val << " " << values[i] << " " << values_ref[i]; EXPECT_NEAR(values[i], exact_val, kSigmoidAbsTolerance) << values_ref[i]; } } TEST(Transcendentals, CacheAlignedVectorTanh) { const int kTestSize = 1 << 16; CacheAlignedVector values(kTestSize); values.FillRandom(); CacheAlignedVector values_ref = values; values.Tanh(); for (int i = 0; i < kTestSize; ++i) { float exact_val = std::tanh(values_ref[i]); float rel_diff = RelDiff(exact_val, values[i]); EXPECT_LT(rel_diff, kTanhRelTolerance) << exact_val << " " << values[i] << " " << values_ref[i]; EXPECT_NEAR(values[i], exact_val, kTanhAbsTolerance) << values_ref[i]; } } // Uniformly sample logits and check that the resulting sample choices are // also (nearly) uniformly distributed. TEST(Sampling, Random) { const int kSize = 256; CacheAlignedVector logits(kSize); logits.FillZero(); double histogram[kSize] = {}; const int kIterations = 10000; for (int i = 0; i < kIterations; ++i) { histogram[logits.Sample()]++; } for (int i = 0; i < kSize; ++i) { // .002 is an empirical bound EXPECT_GT(histogram[i] / kIterations, 1. / kSize - .002f); EXPECT_LT(histogram[i] / kIterations, 1. / kSize + .002f); } } // Put (nearly) all the probability mass on one bin and make sure only that bin // is chosen. TEST(Sampling, FixedDistribution) { const int kSize = 256; CacheAlignedVector logits(kSize); int histogram[kSize] = {}; const int kIterations = 1000; const int kIndex = 3; const int kAllProbabilityMass = 10; const int kNoProbabilityMass = -10; for (int i = 0; i < kIterations; ++i) { for (int i = 1; i <= kSize; ++i) { logits.data()[i - 1] = i == (kIndex + 1) ? kAllProbabilityMass : kNoProbabilityMass; } histogram[logits.Sample()]++; } EXPECT_EQ(histogram[kIndex], 1000); } // Put (nearly) all the probability mass on one bin outside the target range, // and make sure that bin is not chosen. TEST(ScalarSample, ThreadedMasked) { const int kSize = 256; const int mindex = 2; const int maxdex = 3; const int kNumThreads = 4; const int kIterations = 1000; const int kIndex = 3; const int kMostProbabilityMass = 3; const int kLittleProbabilityMass = -3; CacheAlignedVector logits(kSize); std::vector> tmp_vectors; std::vector generators(kNumThreads); for (int i = 0; i < kNumThreads; ++i) { tmp_vectors.emplace_back(kSize); } for (int i = 0; i < kSize; ++i) { logits.data()[i] = (i + 1) == (kIndex + 1) ? kMostProbabilityMass : kLittleProbabilityMass; } std::vector> histograms; for (int i = 0; i < kNumThreads; ++i) { histograms.emplace_back(kSize); } auto f = [&](csrblocksparse::SpinBarrier* /*barrier*/, int tid) { for (int i = 0; i < kIterations; ++i) { histograms[tid][logits.ScalarSample( 1.f, &generators[tid], &tmp_vectors[tid], 0, mindex, maxdex)]++; } }; csrblocksparse::LaunchOnThreadsWithBarrier(kNumThreads, f); // Every thread should generate the exact same set of samples. for (int i = 0; i < kSize; ++i) { int val = histograms[0][i]; for (int tid = 1; tid < kNumThreads; ++tid) { EXPECT_EQ(val, histograms[tid][i]); } } // The most probable sample should be the only one we're sampling. for (int tid = 0; tid < kNumThreads; ++tid) { EXPECT_EQ(std::distance(histograms[tid].begin(), std::max_element(histograms[tid].begin(), histograms[tid].end())), mindex); } } TEST(Sampling, Threaded) { const int kSize = 256; const int kNumThreads = 4; const int kIterations = 1000; const int kIndex = 3; const int kMostProbabilityMass = 3; const int kLittleProbabilityMass = -3; CacheAlignedVector logits(kSize); std::vector> tmp_vectors; std::vector generators(kNumThreads); for (int i = 0; i < kNumThreads; ++i) { tmp_vectors.emplace_back(kSize); } for (int i = 0; i < kSize; ++i) { logits.data()[i] = (i + 1) == (kIndex + 1) ? kMostProbabilityMass : kLittleProbabilityMass; } std::vector> histograms; for (int i = 0; i < kNumThreads; ++i) { histograms.emplace_back(kSize); } auto f = [&](csrblocksparse::SpinBarrier* /*barrier*/, int tid) { for (int i = 0; i < kIterations; ++i) { histograms[tid] [logits.Sample(1.f, &generators[tid], &tmp_vectors[tid])]++; } }; csrblocksparse::LaunchOnThreadsWithBarrier(kNumThreads, f); // Every thread should generate the exact same set of samples. for (int i = 0; i < kSize; ++i) { int val = histograms[0][i]; for (int tid = 1; tid < kNumThreads; ++tid) { EXPECT_EQ(val, histograms[tid][i]); } } // The most probable sample should be the one with the most probability mass. for (int tid = 0; tid < kNumThreads; ++tid) { EXPECT_EQ(std::distance(histograms[tid].begin(), std::max_element(histograms[tid].begin(), histograms[tid].end())), kIndex); } } void CreateVectorHelper( csrblocksparse::FatCacheAlignedVector* fat_vector, int cols, int rows, std::unique_ptr>* view) { *view = absl::make_unique>(*fat_vector, cols, rows); } void CreateVectorHelper( csrblocksparse::FatCacheAlignedVector* fat_vector, int cols, int rows, std::unique_ptr>* view) { *view = absl::make_unique>( fat_vector, cols, rows); } csrblocksparse::FatCacheAlignedVector CreateFatAlignedVector(int rows, int cols) { csrblocksparse::FatCacheAlignedVector fat_vector(rows, cols); // Usage intent of FatCacheAlignedVector is that they are COLUMN MAJOR. float v = 0; for (int c = 0; c < cols; ++c) { for (int r = 0; r < rows; ++r) { fat_vector.data()[c * rows + r] = v++; } } return fat_vector; } template void TestFatVectorView() { const int kRows = 6; const int kCols = 6; auto fat_vector = CreateFatAlignedVector(kRows, kCols); std::unique_ptr top; CreateVectorHelper(&fat_vector, 0, kRows / 2, &top); std::unique_ptr bottom; CreateVectorHelper(&fat_vector, kRows / 2, kRows / 2, &bottom); EXPECT_EQ(top->cols(), kCols); EXPECT_EQ(bottom->cols(), kCols); EXPECT_EQ(top->rows(), kRows / 2); EXPECT_EQ(bottom->rows(), kRows / 2); EXPECT_EQ(top->col_stride(), kRows); EXPECT_EQ(bottom->col_stride(), kRows); for (int c = 0; c < kCols; ++c) { for (int r = 0; r < kRows; ++r) { if (r < kRows / 2) { EXPECT_EQ(fat_vector[c * kRows + r], top->data()[c * top->col_stride() + r]); } else { EXPECT_EQ(fat_vector[c * kRows + r], bottom->data()[c * top->col_stride() + r - kRows / 2]); } } } } TEST(FatVector, View) { TestFatVectorView>(); } TEST(FatVector, MutableView) { TestFatVectorView>(); } TEST(FatVector, SliceMutableView) { const int kRows = 6; const int kCols = 3; auto fat_vector = CreateFatAlignedVector(kRows, kCols); int c = 1; csrblocksparse::MutableVectorView slice = fat_vector.slice(c); for (int r = 0; r < kRows; ++r) { EXPECT_EQ(slice[r], c * kRows + r); } } TEST(FatVector, SliceConstView) { const int kRows = 6; const int kCols = 3; auto fat_vector = CreateFatAlignedVector(kRows, kCols); int c = 1; csrblocksparse::VectorView const_slice; { // Take a VectorView from a non-const slice. const_slice = fat_vector.slice(c); for (int r = 0; r < kRows; ++r) { EXPECT_EQ(const_slice[r], c * kRows + r); } } { // Take a VectorView from a const slice. const auto& const_fat_vector = fat_vector; const_slice = const_fat_vector.slice(c); for (int r = 0; r < kRows; ++r) { EXPECT_EQ(const_slice[r], c * kRows + r); } } } TEST(View, FromMutableToConst) { const int kRows = 6; const int kCols = 3; auto fat_vector = CreateFatAlignedVector(kRows, kCols); csrblocksparse::MutableVectorView slice = fat_vector.slice(0); csrblocksparse::VectorView const_slice(slice); for (int r = 0; r < kRows; ++r) { EXPECT_EQ(const_slice[r], r); } } TEST(View, CopyTest) { const int kRows = 6; const int kCols = 3; auto fat_vector = CreateFatAlignedVector(kRows, kCols); csrblocksparse::MutableVectorView slice = fat_vector.slice(0); csrblocksparse::MutableVectorView slice2(slice); for (int r = 0; r < kRows; ++r) { EXPECT_EQ(slice2[r], r); } } TEST(Vector, CopyNull) { // Check that we can copy a vector with a null generator without segfault. CacheAlignedVector foo((CacheAlignedVector())); // This is here to prevent foo from being optimized out. CHECK_EQ(foo.size(), 0); CacheAlignedVector foo_bar = CacheAlignedVector(); CHECK_EQ(foo_bar.size(), 0); } TEST(Vector, FromRawPointer) { std::vector input; for (int i = 0; i < 5; ++i) { input.push_back(i * 2); } // Calls first constructor. CacheAlignedVector foo(input.data(), 5); CHECK_EQ(foo.size(), 5); EXPECT_THAT(input, testing::ElementsAreArray(foo.data(), 5)); // Calls the second constructor. CacheAlignedVector foo2(input.data(), 5); CHECK_EQ(foo2.size(), 5); EXPECT_THAT(input, testing::ElementsAreArray(foo2.data(), 5)); } } // namespace csrblocksparse