#ifndef __RNG_PHILOX_H__ #define __RNG_PHILOX_H__ #include #include #include "rng.h" // RNG imitiating torch cuda randn on CPU. // Port from: https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/5ef669de080814067961f28357256e8fe27544f4/modules/rng_philox.py class PhiloxRNG : public RNG { private: uint64_t seed; uint32_t offset; private: std::vector philox_m = {0xD2511F53, 0xCD9E8D57}; std::vector philox_w = {0x9E3779B9, 0xBB67AE85}; float two_pow32_inv = 2.3283064e-10; float two_pow32_inv_2pi = 2.3283064e-10 * 6.2831855; std::vector uint32(uint64_t x) { std::vector result(2); result[0] = static_cast(x & 0xFFFFFFFF); result[1] = static_cast(x >> 32); return result; } std::vector> uint32(const std::vector& x) { int N = x.size(); std::vector> result(2, std::vector(N)); for (int i = 0; i < N; ++i) { result[0][i] = static_cast(x[i] & 0xFFFFFFFF); result[1][i] = static_cast(x[i] >> 32); } return result; } // A single round of the Philox 4x32 random number generator. void philox4_round(std::vector>& counter, const std::vector>& key) { uint32_t N = counter[0].size(); for (uint32_t i = 0; i < N; i++) { std::vector v1 = uint32(static_cast(counter[0][i]) * static_cast(philox_m[0])); std::vector v2 = uint32(static_cast(counter[2][i]) * static_cast(philox_m[1])); counter[0][i] = v2[1] ^ counter[1][i] ^ key[0][i]; counter[1][i] = v2[0]; counter[2][i] = v1[1] ^ counter[3][i] ^ key[1][i]; counter[3][i] = v1[0]; } } // Generates 32-bit random numbers using the Philox 4x32 random number generator. // Parameters: // counter : A 4xN array of 32-bit integers representing the counter values (offset into generation). // key : A 2xN array of 32-bit integers representing the key values (seed). // rounds : The number of rounds to perform. // Returns: // std::vector>: A 4xN array of 32-bit integers containing the generated random numbers. std::vector> philox4_32(std::vector>& counter, std::vector>& key, int rounds = 10) { uint32_t N = counter[0].size(); for (int i = 0; i < rounds - 1; ++i) { philox4_round(counter, key); for (uint32_t j = 0; j < N; ++j) { key[0][j] += philox_w[0]; key[1][j] += philox_w[1]; } } philox4_round(counter, key); return counter; } float box_muller(float x, float y) { float u = x * two_pow32_inv + two_pow32_inv / 2; float v = y * two_pow32_inv_2pi + two_pow32_inv_2pi / 2; float s = sqrt(-2.0 * log(u)); float r1 = s * sin(v); return r1; } public: PhiloxRNG(uint64_t seed = 0) { this->seed = seed; this->offset = 0; } void manual_seed(uint64_t seed) { this->seed = seed; this->offset = 0; } std::vector randn(uint32_t n) { std::vector> counter(4, std::vector(n, 0)); for (uint32_t i = 0; i < n; i++) { counter[0][i] = this->offset; } for (uint32_t i = 0; i < n; i++) { counter[2][i] = i; } this->offset += 1; std::vector key(n, this->seed); std::vector> key_uint32 = uint32(key); std::vector> g = philox4_32(counter, key_uint32); std::vector result; for (int i = 0; i < n; ++i) { result.push_back(box_muller(g[0][i], g[1][i])); } return result; } }; #endif // __RNG_PHILOX_H__