Spaces:
Build error
Build error
/* | |
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
* SPDX-License-Identifier: Apache-2.0 | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
/** @file vec.h | |
* @author Thomas Müller, NVIDIA | |
* @brief Tiny vector / matrix / quaternion implementation. | |
*/ | |
namespace tcnn { | |
template <class...> struct conjunction : std::true_type {}; | |
template <class B1> struct conjunction<B1> : B1 {}; | |
template <class B1, class... Bn> struct conjunction<B1, Bn...> : std::conditional_t<bool(B1::value), conjunction<Bn...>, B1> {}; | |
template <uint32_t N, typename T, typename... Ts> | |
using enable_if_size_and_type_match_t = std::enable_if_t<sizeof...(Ts) == N && conjunction<std::is_same<Ts, T>...>::value>; | |
template <typename T, uint32_t N, size_t ALIGNMENT=sizeof(T)> | |
struct alignas(ALIGNMENT) tvec { | |
TVEC_BODY | |
T elems[N]; | |
template <typename... Ts, typename = enable_if_size_and_type_match_t<N, T, Ts...>> | |
TCNN_HOST_DEVICE tvec(Ts... coeffs) : elems{coeffs...} {} | |
}; | |
template <typename T, size_t ALIGNMENT> | |
struct alignas(ALIGNMENT) tvec<T, 1, ALIGNMENT> { | |
static constexpr uint32_t N = 1; | |
TVEC_BODY | |
union { T x, r; }; | |
}; | |
template <typename T, size_t ALIGNMENT> | |
struct alignas(ALIGNMENT) tvec<T, 2, ALIGNMENT> { | |
static constexpr uint32_t N = 2; | |
TVEC_BODY | |
union { T x, r; }; | |
union { T y, g; }; | |
TCNN_HOST_DEVICE tvec(T a, T b) : x{a}, y{b} {} | |
}; | |
template <typename T, size_t ALIGNMENT> | |
struct alignas(ALIGNMENT) tvec<T, 3, ALIGNMENT> { | |
static constexpr uint32_t N = 3; | |
TVEC_BODY | |
union { T x, r; }; | |
union { T y, g; }; | |
union { T z, b; }; | |
TCNN_HOST_DEVICE tvec(T a, T b, T c) : x{a}, y{b}, z{c} {} | |
template <size_t A> TCNN_HOST_DEVICE tvec(const tvec<T, 2, A>& a, T b) : x{a.x}, y{a.y}, z{b} {} | |
template <size_t A> TCNN_HOST_DEVICE tvec(T a, const tvec<T, 2, A>& b) : x{a}, y{b.x}, z{b.y} {} | |
}; | |
template <typename T, size_t ALIGNMENT> | |
struct alignas(ALIGNMENT) tvec<T, 4, ALIGNMENT> { | |
static constexpr uint32_t N = 4; | |
TVEC_BODY | |
union { T x, r; }; | |
union { T y, g; }; | |
union { T z, b; }; | |
union { T w, a; }; | |
TCNN_HOST_DEVICE tvec(T a, T b, T c, T d) : x{a}, y{b}, z{c}, w{d} {} | |
template <size_t A> TCNN_HOST_DEVICE tvec(const tvec<T, 3, A>& a, T b) : x{a.x}, y{a.y}, z{a.z}, w{b} {} | |
template <size_t A1, size_t A2> TCNN_HOST_DEVICE tvec(const tvec<T, 2, A1>& a, const tvec<T, 2, A2>& b) : x{a.x}, y{a.y}, z{b.x}, w{b.y} {} | |
template <size_t A> TCNN_HOST_DEVICE tvec(const tvec<T, 2, A>& a, T b, T c) : x{a.x}, y{a.y}, z{b}, w{c} {} | |
template <size_t A> TCNN_HOST_DEVICE tvec(T a, const tvec<T, 2, A>& b, T c) : x{a}, y{b.x}, z{b.y}, w{c} {} | |
template <size_t A> TCNN_HOST_DEVICE tvec(T a, T b, const tvec<T, 2, A>& c) : x{a}, y{b}, z{c.x}, w{c.y} {} | |
template <size_t A> TCNN_HOST_DEVICE tvec(T a, const tvec<T, 3, A>& b) : x{a}, y{b.x}, z{b.y}, w{b.z} {} | |
}; | |
// Import external cwise functions into ngp namespace to avoid | |
// name resolution problems related to the vector-values versions defined below. | |
template <typename T> TCNN_HOST_DEVICE T min(T a, T b) { return std::min(a, b); } | |
template <typename T> TCNN_HOST_DEVICE T max(T a, T b) { return std::max(a, b); } | |
template <typename T> TCNN_HOST_DEVICE T clamp(T a, T b, T c) { return a < b ? b : (c < a ? c : a); } | |
template <typename T> TCNN_HOST_DEVICE T copysign(T a, T b) { return std::copysign(a, b); } | |
template <typename T> TCNN_HOST_DEVICE T sign(T a) { return std::copysign((T)1, a); } | |
template <typename T> TCNN_HOST_DEVICE T mix(T a, T b, T c) { return a * ((T)1 - c) + b * c; } | |
template <typename T> TCNN_HOST_DEVICE T floor(T a) { return std::floor(a); } | |
template <typename T> TCNN_HOST_DEVICE T round(T a) { return std::round(a); } | |
template <typename T> TCNN_HOST_DEVICE T ceil(T a) { return std::ceil(a); } | |
template <typename T> TCNN_HOST_DEVICE T abs(T a) { return std::abs(a); } | |
template <typename T> TCNN_HOST_DEVICE T distance(T a, T b) { return std::abs(a - b); } | |
template <typename T> TCNN_HOST_DEVICE T sin(T a) { return std::sin(a); } | |
template <typename T> TCNN_HOST_DEVICE T asin(T a) { return std::asin(a); } | |
template <typename T> TCNN_HOST_DEVICE T cos(T a) { return std::cos(a); } | |
template <typename T> TCNN_HOST_DEVICE T acos(T a) { return std::acos(a); } | |
template <typename T> TCNN_HOST_DEVICE T tan(T a) { return std::tan(a); } | |
template <typename T> TCNN_HOST_DEVICE T atan(T a) { return std::atan(a); } | |
template <typename T> TCNN_HOST_DEVICE T sqrt(T a) { return std::sqrt(a); } | |
template <typename T> TCNN_HOST_DEVICE T exp(T a) { return std::exp(a); } | |
template <typename T> TCNN_HOST_DEVICE T log(T a) { return std::log(a); } | |
template <typename T> TCNN_HOST_DEVICE T exp2(T a) { return std::exp2(a); } | |
template <typename T> TCNN_HOST_DEVICE T log2(T a) { return std::log2(a); } | |
template <typename T> TCNN_HOST_DEVICE T pow(T a, T b) { return std::pow(a, b); } | |
template <typename T> TCNN_HOST_DEVICE T isfinite(T a) { | |
return ::isfinite(a); | |
return std::isfinite(a); | |
} | |
inline TCNN_HOST_DEVICE float fma(float a, float b, float c) { return fmaf(a, b, c); } | |
inline TCNN_DEVICE __half fma(__half a, __half b, __half c) { | |
return __hfma(a, b, c); | |
return fmaf(a, b, c); | |
} | |
CWISE_OP(operator+, TVEC, a[i] + b[i], const TVEC& a, const TVEC& b) | |
CWISE_OP(operator+, TVEC, a + b[i], T a, const TVEC& b) | |
CWISE_OP(operator+, TVEC, a[i] + b, const TVEC& a, T b) | |
CWISE_OP(operator-, TVEC, a[i] - b[i], const TVEC& a, const TVEC& b) | |
CWISE_OP(operator-, TVEC, a - b[i], T a, const TVEC& b) | |
CWISE_OP(operator-, TVEC, a[i] - b, const TVEC& a, T b) | |
CWISE_OP(operator*, TVEC, a[i] * b[i], const TVEC& a, const TVEC& b) | |
CWISE_OP(operator*, TVEC, a * b[i], T a, const TVEC& b) | |
CWISE_OP(operator*, TVEC, a[i] * b, const TVEC& a, T b) | |
CWISE_OP(operator/, TVEC, a[i] / b[i], const TVEC& a, const TVEC& b) | |
CWISE_OP(operator/, TVEC, a / b[i], T a, const TVEC& b) | |
CWISE_OP(operator/, TVEC, a[i] / b, const TVEC& a, T b) | |
CWISE_OP(fma, TVEC, fma(a[i], b[i], c[i]), const TVEC& a, const TVEC& b, const TVEC& c) | |
CWISE_OP(fma, TVEC, fma(a[i], b[i], c), const TVEC& a, const TVEC& b, T c) | |
CWISE_OP(fma, TVEC, fma(a[i], b, c[i]), const TVEC& a, T b, const TVEC& c) | |
CWISE_OP(fma, TVEC, fma(a[i], b, c), const TVEC& a, T b, T c) | |
CWISE_OP(fma, TVEC, fma(a, b[i], c[i]), T a, const TVEC& b, const TVEC& c) | |
CWISE_OP(fma, TVEC, fma(a, b[i], c), T a, const TVEC& b, T c) | |
CWISE_OP(fma, TVEC, fma(a, b, c[i]), T a, T b, const TVEC& c) | |
CWISE_OP(min, TVEC, min(a[i], b[i]), const TVEC& a, const TVEC& b) | |
CWISE_OP(min, TVEC, min(a[i], b), const TVEC& a, T b) | |
CWISE_OP(min, TVEC, min(a, b[i]), T a, const TVEC& b) | |
CWISE_OP(max, TVEC, max(a[i], b[i]), const TVEC& a, const TVEC& b) | |
CWISE_OP(max, TVEC, max(a[i], b), const TVEC& a, T b) | |
CWISE_OP(max, TVEC, max(a, b[i]), T a, const TVEC& b) | |
CWISE_OP(clamp, TVEC, clamp(a[i], b[i], c[i]), const TVEC& a, const TVEC& b, const TVEC& c) | |
CWISE_OP(clamp, TVEC, clamp(a[i], b[i], c), const TVEC& a, const TVEC& b, T c) | |
CWISE_OP(clamp, TVEC, clamp(a[i], b, c[i]), const TVEC& a, T b, const TVEC& c) | |
CWISE_OP(clamp, TVEC, clamp(a[i], b, c), const TVEC& a, T b, T c) | |
CWISE_OP(copysign, TVEC, copysign(a[i], b[i]), const TVEC& a, const TVEC& b) | |
CWISE_OP(copysign, TVEC, copysign(a[i], b), const TVEC& a, T b) | |
CWISE_OP(copysign, TVEC, copysign(a, b[i]), T a, const TVEC& b) | |
CWISE_OP(sign, TVEC, sign(a[i]), const TVEC& a) | |
CWISE_OP(mix, TVEC, a[i] * ((T)1 - c[i]) + b[i] * c[i], const TVEC& a, const TVEC& b, const TVEC& c) | |
CWISE_OP(mix, TVEC, a[i] * ((T)1 - c) + b[i] * c, const TVEC& a, const TVEC& b, T c) | |
CWISE_OP(operator-, TVEC, -a[i], const TVEC& a) | |
CWISE_OP(floor, TVEC, floor(a[i]), const TVEC& a) | |
CWISE_OP(round, TVEC, round(a[i]), const TVEC& a) | |
CWISE_OP(ceil, TVEC, ceil(a[i]), const TVEC& a) | |
CWISE_OP(abs, TVEC, abs(a[i]), const TVEC& a) | |
CWISE_OP(sin, TVEC, sin(a[i]), const TVEC& a) | |
CWISE_OP(asin, TVEC, asin(a[i]), const TVEC& a) | |
CWISE_OP(cos, TVEC, cos(a[i]), const TVEC& a) | |
CWISE_OP(acos, TVEC, acos(a[i]), const TVEC& a) | |
CWISE_OP(tan, TVEC, tan(a[i]), const TVEC& a) | |
CWISE_OP(atan, TVEC, atan(a[i]), const TVEC& a) | |
CWISE_OP(sqrt, TVEC, sqrt(a[i]), const TVEC& a) | |
CWISE_OP(exp, TVEC, exp(a[i]), const TVEC& a) | |
CWISE_OP(log, TVEC, log(a[i]), const TVEC& a) | |
CWISE_OP(exp2, TVEC, exp2(a[i]), const TVEC& a) | |
CWISE_OP(log2, TVEC, log2(a[i]), const TVEC& a) | |
CWISE_OP(pow, TVEC, pow(a[i], b), const TVEC& a, T b) | |
CWISE_OP(pow, TVEC, pow(a[i], b[i]), const TVEC& a, const TVEC& b) | |
CWISE_OP(isfinite, BVEC, isfinite(a[i]), const TVEC& a) | |
inline TCNN_DEVICE void atomic_add_gmem_float(float* addr, float in) { | |
int in_int = *((int*)&in); | |
asm ("red.relaxed.gpu.global.add.f32 [%0], %1;" :: "l"(addr), "r"(in_int)); | |
atomicAdd(addr, in); | |
} | |
template <typename T, uint32_t N, size_t A> | |
TCNN_DEVICE void atomic_add(T* dst, const tvec<T, N, A>& a) { | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t i = 0; i < N; ++i) { | |
atomicAdd(dst + i, a[i]); | |
} | |
} | |
template <uint32_t N, size_t A> | |
TCNN_DEVICE void atomic_add_gmem(float* dst, const tvec<float, N, A>& a) { | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t i = 0; i < N; ++i) { | |
atomic_add_gmem_float(dst + i, a[i]); | |
} | |
} | |
inline TCNN_DEVICE void atomic_add_gmem_h2(half2* addr, half2 in) { | |
int in_int = *((int*)&in); | |
asm ("red.relaxed.gpu.global.add.noftz.f16x2 [%0], %1;" :: "l"(addr), "r"(in_int)); | |
atomicAdd(addr, in); | |
} | |
template <uint32_t N, size_t A, typename = std::enable_if_t<N % 2 == 0>> | |
TCNN_DEVICE void atomic_add(__half* dst, const tvec<__half, N, A>& a) { | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t i = 0; i < N; i += 2) { | |
atomicAdd((__half2*)(dst + i), __half2(a[i], a[i+1])); | |
} | |
} | |
template <uint32_t N, size_t A, typename = std::enable_if_t<N % 2 == 0>> | |
TCNN_DEVICE void atomic_add_gmem(__half* dst, const tvec<__half, N, A>& a) { | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t i = 0; i < N; i += 2) { | |
atomic_add_gmem_h2((__half2*)(dst + i), __half2(a[i], a[i+1])); | |
} | |
} | |
// __half2 specializations for aligned vectors with 2*N fp16 coefficients. | |
HALF_CWISE_OP(fma, HVEC, __hfma2(*(__half2*)&a[i], *(__half2*)&b[i], *(__half2*)&c[i]), const HVEC& a, const HVEC& b, const HVEC& c) | |
HALF_CWISE_OP(fma, HVEC, __hfma2(*(__half2*)&a[i], *(__half2*)&b[i], __half2half2(c)), const HVEC& a, const HVEC& b, __half c) | |
HALF_CWISE_OP(fma, HVEC, __hfma2(*(__half2*)&a[i], __half2half2(b), *(__half2*)&c[i]), const HVEC& a, __half b, const HVEC& c) | |
HALF_CWISE_OP(fma, HVEC, __hfma2(*(__half2*)&a[i], __half2half2(b), __half2half2(c)), const HVEC& a, __half b, __half c) | |
HALF_CWISE_OP(fma, HVEC, __hfma2(__half2half2(a), *(__half2*)&b[i], *(__half2*)&c[i]), __half a, const HVEC& b, const HVEC& c) | |
HALF_CWISE_OP(fma, HVEC, __hfma2(__half2half2(a), *(__half2*)&b[i], __half2half2(c)), __half a, const HVEC& b, __half c) | |
HALF_CWISE_OP(fma, HVEC, __hfma2(__half2half2(a), __half2half2(b), *(__half2*)&c[i]), __half a, __half b, const HVEC& c) | |
HALF_CWISE_OP(operator+, HVEC, __hadd2(*(__half2*)&a[i], *(__half2*)&b[i]), const HVEC& a, const HVEC& b) | |
HALF_CWISE_OP(operator+, HVEC, __hadd2(__half2half2(a), *(__half2*)&b[i]), __half a, const HVEC& b) | |
HALF_CWISE_OP(operator+, HVEC, __hadd2(*(__half2*)&a[i], __half2half2(b)), const HVEC& a, __half b) | |
HALF_CWISE_OP(operator-, HVEC, __hsub2(*(__half2*)&a[i], *(__half2*)&b[i]), const HVEC& a, const HVEC& b) | |
HALF_CWISE_OP(operator-, HVEC, __hsub2(__half2half2(a), *(__half2*)&b[i]), __half a, const HVEC& b) | |
HALF_CWISE_OP(operator-, HVEC, __hsub2(*(__half2*)&a[i], __half2half2(b)), const HVEC& a, __half b) | |
HALF_CWISE_OP(operator*, HVEC, __hmul2(*(__half2*)&a[i], *(__half2*)&b[i]), const HVEC& a, const HVEC& b) | |
HALF_CWISE_OP(operator*, HVEC, __hmul2(__half2half2(a), *(__half2*)&b[i]), __half a, const HVEC& b) | |
HALF_CWISE_OP(operator*, HVEC, __hmul2(*(__half2*)&a[i], __half2half2(b)), const HVEC& a, __half b) | |
HALF_CWISE_OP(operator/, HVEC, __h2div(*(__half2*)&a[i], *(__half2*)&b[i]), const HVEC& a, const HVEC& b) | |
HALF_CWISE_OP(operator/, HVEC, __h2div(*(__half2*)&a[i], __half2half2(b)), const HVEC& a, __half b) | |
INPLACE_OP(operator*=, const TVEC&, a[i] *= b[i]) | |
INPLACE_OP(operator/=, const TVEC&, a[i] /= b[i]) | |
INPLACE_OP(operator+=, const TVEC&, a[i] += b[i]) | |
INPLACE_OP(operator-=, const TVEC&, a[i] -= b[i]) | |
INPLACE_OP(operator*=, T, a[i] *= b) | |
INPLACE_OP(operator/=, T, a[i] /= b) | |
REDUCTION_OP(dot, T, (T)0, result += a[i] * b[i], const TVEC& a, const TVEC& b) | |
REDUCTION_OP(sum, T, (T)0, result += a[i], const TVEC& a) | |
REDUCTION_OP(mean, T, (T)0, result += a[i] / (T)N, const TVEC& a) | |
REDUCTION_OP(product, T, (T)1, result *= a[i], const TVEC& a) | |
REDUCTION_OP(min, T, (T)std::numeric_limits<T>::infinity(), result = min(result, a[i]), const TVEC& a) | |
REDUCTION_OP(max, T, (T)-std::numeric_limits<T>::infinity(), result = max(result, a[i]), const TVEC& a) | |
REDUCTION_OP(length2, T, (T)0, result += a[i] * a[i], const TVEC& a) | |
REDUCTION_OP(operator==, bool, true, result &= a[i] == b[i], const TVEC& a, const TVEC& b) | |
REDUCTION_OP(operator!=, bool, false, result |= a[i] != b[i], const TVEC& a, const TVEC& b) | |
BOOL_REDUCTION_OP(all, bool, true, result &= a[i], const BVEC& a) | |
BOOL_REDUCTION_OP(any, bool, false, result |= a[i], const BVEC& a) | |
template <typename T, uint32_t N, size_t A> | |
TCNN_HOST_DEVICE T length(const TVEC& a) { | |
return std::sqrt(length2(a)); | |
} | |
template <typename T, uint32_t N, size_t A> | |
TCNN_HOST_DEVICE T distance(const TVEC& a, const TVEC& b) { | |
return length(a - b); | |
} | |
template <typename T, uint32_t N, size_t A> | |
TCNN_HOST_DEVICE TVEC normalize(const TVEC& v) { | |
T len = length(v); | |
if (len <= (T)0) { | |
TVEC result{(T)0}; | |
result[0] = (T)1; | |
return result; | |
} | |
return v / len; | |
} | |
template <typename T, uint32_t N, size_t A> | |
TCNN_HOST_DEVICE TVEC cross(const TVEC& a, const TVEC& b) { | |
return { | |
a.y * b.z - a.z * b.y, | |
a.z * b.x - a.x * b.z, | |
a.x * b.y - a.y * b.x, | |
}; | |
} | |
template <typename T, uint32_t N, size_t A> | |
TCNN_HOST_DEVICE TVEC faceforward(const TVEC& n, const TVEC& i, const TVEC& nref) { | |
return n * -copysign((T)1, dot(i, nref)); | |
} | |
DEF_NON_TEMPLATED_VECTOR_TYPES(bvec, bool) | |
DEF_NON_TEMPLATED_VECTOR_TYPES(vec, float) | |
DEF_NON_TEMPLATED_VECTOR_TYPES(dvec, double) | |
DEF_NON_TEMPLATED_VECTOR_TYPES(ivec, int) | |
DEF_NON_TEMPLATED_VECTOR_TYPES(uvec, unsigned int) | |
DEF_NON_TEMPLATED_VECTOR_TYPES(i32vec, int32_t) | |
DEF_NON_TEMPLATED_VECTOR_TYPES(u32vec, uint32_t) | |
DEF_NON_TEMPLATED_VECTOR_TYPES(i16vec, int16_t) | |
DEF_NON_TEMPLATED_VECTOR_TYPES(u16vec, uint16_t) | |
DEF_NON_TEMPLATED_VECTOR_TYPES(i8vec, int8_t) | |
DEF_NON_TEMPLATED_VECTOR_TYPES(u8vec, uint8_t) | |
DEF_NON_TEMPLATED_VECTOR_TYPES(hvec, __half) | |
inline TCNN_HOST_DEVICE float4 to_float4(const vec4& x) { return {x.x, x.y, x.z, x.w}; } | |
inline TCNN_HOST_DEVICE float3 to_float3(const vec3& x) { return {x.x, x.y, x.z}; } | |
inline TCNN_HOST_DEVICE float2 to_float2(const vec2& x) { return {x.x, x.y}; } | |
inline TCNN_HOST_DEVICE vec4 to_vec4(const float4& x) { return {x.x, x.y, x.z, x.w}; } | |
inline TCNN_HOST_DEVICE vec3 to_vec3(const float3& x) { return {x.x, x.y, x.z}; } | |
inline TCNN_HOST_DEVICE vec2 to_vec2(const float2& x) { return {x.x, x.y}; } | |
template <typename T, uint32_t N, uint32_t M> | |
struct tmat { | |
tmat() = default; | |
TCNN_HOST_DEVICE tmat(T scalar) { | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t i = 0; i < N; ++i) { | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t j = 0; j < M; ++j) { | |
m[i][j] = i == j ? scalar : (T)0; | |
} | |
} | |
} | |
TCNN_HOST_DEVICE static constexpr tmat<T, N, M> identity() { return tmat<T, N, M>((T)1); } | |
TCNN_HOST_DEVICE static constexpr tmat<T, N, M> zero() { return tmat<T, N, M>((T)0); } | |
template <typename... Ts, typename = enable_if_size_and_type_match_t<N*M, T, Ts...>> | |
TCNN_HOST_DEVICE tmat(Ts... coeffs) : d{coeffs...} {} | |
TCNN_HOST_DEVICE tmat(const T* coeffs) { | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t i = 0; i < N; ++i) { | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t j = 0; j < M; ++j) { | |
m[i][j] = *(coeffs++); | |
} | |
} | |
} | |
template <size_t A> | |
TCNN_HOST_DEVICE tmat(const tvec<T, M, A>& a) { | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t i = 0; i < N; ++i) { | |
m[i] = a; | |
} | |
} | |
template <size_t A> | |
TCNN_HOST_DEVICE tmat(const tvec<T, M, A>& a, const tvec<T, M, A>& b) { | |
static_assert(N == 2, "Matrix must have 2 columns."); | |
m[0] = a; m[1] = b; | |
} | |
template <size_t A> | |
TCNN_HOST_DEVICE tmat(const tvec<T, M, A>& a, const tvec<T, M, A>& b, const tvec<T, M, A>& c) { | |
static_assert(N == 3, "Matrix must have 3 columns."); | |
m[0] = a; m[1] = b; m[2] = c; | |
} | |
template <size_t A> | |
TCNN_HOST_DEVICE tmat(const tvec<T, M, A>& a, const tvec<T, M, A>& b, const tvec<T, M, A>& c, const tvec<T, M, A>& d) { | |
static_assert(N == 4, "Matrix must have 4 columns."); | |
m[0] = a; m[1] = b; m[2] = c; m[3] = d; | |
} | |
template <typename U, uint32_t P, uint32_t O> | |
TCNN_HOST_DEVICE tmat(const tmat<U, P, O>& other) { | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t i = 0; i < N; ++i) { | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t j = 0; j < M; ++j) { | |
m[i][j] = i < P && j < O ? other[i][j] : (i == j ? (T)1 : (T)0); | |
} | |
} | |
} | |
template <size_t A> | |
TCNN_HOST_DEVICE tvec<T, M, A> operator*(const tvec<T, N, A>& v) const { | |
tvec<T, M, A> result((T)0); | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t i = 0; i < N; ++i) { | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t j = 0; j < M; ++j) { | |
result[j] += m[i][j] * v[i]; | |
} | |
} | |
return result; | |
} | |
template <uint32_t K> | |
TCNN_HOST_DEVICE tmat<T, K, M> operator*(const tmat<T, K, N>& other) const { | |
tmat<T, K, M> result; | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t i = 0; i < K; ++i) { | |
result[i] = (*this) * other[i]; | |
} | |
return result; | |
} | |
TCNN_HOST_DEVICE tvec<T, M>& at(uint32_t idx) { return m[idx]; } | |
TCNN_HOST_DEVICE tvec<T, M> at(uint32_t idx) const { return m[idx]; } | |
TCNN_HOST_DEVICE tvec<T, M>& operator[](uint32_t idx) { return m[idx]; } | |
TCNN_HOST_DEVICE tvec<T, M> operator[](uint32_t idx) const { return m[idx]; } | |
TCNN_HOST_DEVICE T* data() { return d; } | |
TCNN_HOST_DEVICE const T* data() const { return d; } | |
union { | |
tvec<T, M> m[N]; | |
T d[M*N]; | |
}; | |
}; | |
template <typename T, uint32_t N> | |
TCNN_HOST_DEVICE tmat<T, N, N>& operator*=(tmat<T, N, N>& m, const tmat<T, N, N>& other) { | |
m = m * other; | |
return m; | |
} | |
template <typename T, uint32_t N, uint32_t M> | |
TCNN_HOST_DEVICE T frobenius_norm(const tmat<T, N, M>& m) { | |
T result = (T)0; | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t i = 0; i < N; ++i) { | |
result += length2(m[i]); | |
} | |
return sqrt(result); | |
} | |
template <typename T, uint32_t N, uint32_t M> | |
TCNN_HOST_DEVICE tmat<T, M, N> transpose(const tmat<T, N, M>& m) { | |
tmat<T, M, N> result; | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t i = 0; i < N; ++i) { | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t j = 0; j < M; ++j) { | |
result[j][i] = m[i][j]; | |
} | |
} | |
return result; | |
} | |
template <typename T, uint32_t N, uint32_t M> | |
TCNN_HOST_DEVICE tvec<T, N> row(const tmat<T, N, M>& m, int r) { | |
tvec<T, N> result; | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t i = 0; i < N; ++i) { | |
result[i] = m[i][r]; | |
} | |
return result; | |
} | |
template <typename T, uint32_t N, uint32_t M, size_t A> | |
TCNN_HOST_DEVICE tmat<T, N, M> row(const tmat<T, N, M>& m, int r, const tvec<T, N, A>& v) { | |
tmat<T, N, M> result = m; | |
TCNN_PRAGMA_UNROLL | |
for (uint32_t i = 0; i < N; ++i) { | |
result[i][r] = v[i]; | |
} | |
return result; | |
} | |
CWISE_OP(operator+, const TMAT&, const TMAT&, a[i][j] + b[i][j]) | |
CWISE_OP(operator-, const TMAT&, const TMAT&, a[i][j] - b[i][j]) | |
CWISE_OP(operator*, T, const TMAT&, a * b[i][j]) | |
CWISE_OP(operator*, const TMAT&, T, a[i][j] * b) | |
CWISE_OP(operator/, const TMAT&, T, a[i][j] / b) | |
INPLACE_OP(operator+=, const TMAT&, a[i][j] += b[i][j]) | |
INPLACE_OP(operator-=, const TMAT&, a[i][j] -= b[i][j]) | |
INPLACE_OP(operator*=, T, a[i][j] *= b) | |
INPLACE_OP(operator/=, T, a[i][j] /= b) | |
REDUCTION_OP(operator==, bool, true, result &= a[i][j] == b[i][j], const TMAT& a, const TMAT& b) | |
REDUCTION_OP(operator!=, bool, false, result |= a[i][j] != b[i][j], const TMAT& a, const TMAT& b) | |
REDUCTION_OP(isfinite, bool, true, result &= isfinite(a[i][j]), const TMAT& a) | |
// The following implementations of determinants, adjoints, inverses, and quaternions | |
// (and only those) were adapted from glm per the MIT license, which is included below in full. | |
// ================================================================================ | |
// The MIT License | |
// -------------------------------------------------------------------------------- | |
// Copyright (c) 2005 - G-Truc Creation | |
// Permission is hereby granted, free of charge, to any person obtaining a copy | |
// of this software and associated documentation files (the "Software"), to deal | |
// in the Software without restriction, including without limitation the rights | |
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
// copies of the Software, and to permit persons to whom the Software is | |
// furnished to do so, subject to the following conditions: | |
// The above copyright notice and this permission notice shall be included in | |
// all copies or substantial portions of the Software. | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
// THE SOFTWARE. | |
template <typename T> | |
TCNN_HOST_DEVICE T determinant(const tmat<T, 2, 2>& m) { | |
return m[0][0] * m[1][1] - m[0][1] * m[1][0]; | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE T determinant(const tmat<T, 3, 3>& m) { | |
return | |
m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) + | |
-m[1][0] * (m[0][1] * m[2][2] - m[2][1] * m[0][2]) + | |
m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2]) | |
; | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE T determinant(const tmat<T, 4, 4>& m) { | |
T s0 = m[2][2] * m[3][3] - m[3][2] * m[2][3]; | |
T s1 = m[2][1] * m[3][3] - m[3][1] * m[2][3]; | |
T s2 = m[2][1] * m[3][2] - m[3][1] * m[2][2]; | |
T s3 = m[2][0] * m[3][3] - m[3][0] * m[2][3]; | |
T s4 = m[2][0] * m[3][2] - m[3][0] * m[2][2]; | |
T s5 = m[2][0] * m[3][1] - m[3][0] * m[2][1]; | |
tvec<T, 4> coeff{ | |
(m[1][1] * s0 - m[1][2] * s1 + m[1][3] * s2), | |
-(m[1][0] * s0 - m[1][2] * s3 + m[1][3] * s4), | |
(m[1][0] * s1 - m[1][1] * s3 + m[1][3] * s5), | |
-(m[1][0] * s2 - m[1][1] * s4 + m[1][2] * s5), | |
}; | |
return | |
m[0][0] * coeff[0] + m[0][1] * coeff[1] + | |
m[0][2] * coeff[2] + m[0][3] * coeff[3] | |
; | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tmat<T, 2, 2> adjoint(const tmat<T, 2, 2>& m) { | |
return { | |
m[1][1], -m[0][1], | |
-m[1][0], m[0][0], | |
}; | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tmat<T, 3, 3> adjoint(const tmat<T, 3, 3>& m) { | |
const T m00 = determinant(tmat<T, 2, 2>{m[1][1], m[2][1], m[1][2], m[2][2]}); | |
const T m01 = determinant(tmat<T, 2, 2>{m[0][1], m[2][1], m[0][2], m[2][2]}); | |
const T m02 = determinant(tmat<T, 2, 2>{m[0][1], m[1][1], m[0][2], m[1][2]}); | |
const T m10 = determinant(tmat<T, 2, 2>{m[1][0], m[2][0], m[1][2], m[2][2]}); | |
const T m11 = determinant(tmat<T, 2, 2>{m[0][0], m[2][0], m[0][2], m[2][2]}); | |
const T m12 = determinant(tmat<T, 2, 2>{m[0][0], m[1][0], m[0][2], m[1][2]}); | |
const T m20 = determinant(tmat<T, 2, 2>{m[1][0], m[2][0], m[1][1], m[2][1]}); | |
const T m21 = determinant(tmat<T, 2, 2>{m[0][0], m[2][0], m[0][1], m[2][1]}); | |
const T m22 = determinant(tmat<T, 2, 2>{m[0][0], m[1][0], m[0][1], m[1][1]}); | |
return { | |
m00, -m01, m02, | |
-m10, m11, -m12, | |
m20, -m21, m22, | |
}; | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tmat<T, 4, 4> adjoint(const tmat<T, 4, 4>& m) { | |
const T m00 = determinant(tmat<T, 3, 3>{m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]}); | |
const T m01 = determinant(tmat<T, 3, 3>{m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]}); | |
const T m02 = determinant(tmat<T, 3, 3>{m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]}); | |
const T m03 = determinant(tmat<T, 3, 3>{m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]}); | |
const T m10 = determinant(tmat<T, 3, 3>{m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], m[3][3]}); | |
const T m11 = determinant(tmat<T, 3, 3>{m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], m[3][3]}); | |
const T m12 = determinant(tmat<T, 3, 3>{m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], m[3][3]}); | |
const T m13 = determinant(tmat<T, 3, 3>{m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], m[3][2]}); | |
const T m20 = determinant(tmat<T, 3, 3>{m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], m[3][3]}); | |
const T m21 = determinant(tmat<T, 3, 3>{m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], m[3][3]}); | |
const T m22 = determinant(tmat<T, 3, 3>{m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], m[3][3]}); | |
const T m23 = determinant(tmat<T, 3, 3>{m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], m[3][2]}); | |
const T m30 = determinant(tmat<T, 3, 3>{m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3]}); | |
const T m31 = determinant(tmat<T, 3, 3>{m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3]}); | |
const T m32 = determinant(tmat<T, 3, 3>{m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3]}); | |
const T m33 = determinant(tmat<T, 3, 3>{m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]}); | |
return { | |
m00, -m10, m20, -m30, | |
-m01, m11, -m21, m31, | |
m02, -m12, m22, -m32, | |
-m03, m13, -m23, m33, | |
}; | |
} | |
template <typename T, uint32_t N> | |
TCNN_HOST_DEVICE tmat<T, N, N> inverse(const tmat<T, N, N>& m) { | |
return adjoint(m) / determinant(m); | |
} | |
template <typename T, size_t A> | |
TCNN_HOST_DEVICE tmat<T, 3, 3> rotmat(T angle, const tvec<T, 3, A>& axis) { | |
T s, c; | |
sincos(angle, &s, &c); | |
T oc = (T)1 - c; | |
return { | |
oc * axis.x * axis.x + c, oc * axis.x * axis.y + axis.z * s, oc * axis.z * axis.x - axis.y * s, | |
oc * axis.x * axis.y - axis.z * s, oc * axis.y * axis.y + c, oc * axis.y * axis.z + axis.x * s, | |
oc * axis.z * axis.x + axis.y * s, oc * axis.y * axis.z - axis.x * s, oc * axis.z * axis.z + c, | |
}; | |
} | |
template <typename T, size_t A> | |
TCNN_HOST_DEVICE tmat<T, 3, 3> rotmat(const tvec<T, 3, A>& v) { | |
T angle = length(v); | |
if (angle == (T)0) { | |
return tmat<T, 3, 3>::identity(); | |
} | |
return rotmat(angle, v / angle); | |
} | |
template <typename T, uint32_t N> | |
TCNN_HOST_DEVICE tmat<T, N, N> mat_sqrt(const tmat<T, N, N>& m, T eps = (T)1e-10f) { | |
tmat<T, N, N> X = m, Y = tmat<T, N, N>::identity(); | |
for (uint32_t i = 0; i < 32; ++i) { | |
if (frobenius_norm(X * X - m) < eps) { | |
return X; | |
} | |
tmat<T, N, N> iX = inverse(X); | |
X = (T)0.5f * (X + inverse(Y)); | |
Y = (T)0.5f * (Y + iX); | |
} | |
return X; | |
} | |
template <typename T, uint32_t N> | |
TCNN_HOST_DEVICE tmat<T, N, N> mat_log_hawkins(const tmat<T, N, N>& m, T eps = (T)1e-10f) { | |
tmat<T, N, N> A = m - tmat<T, N, N>::identity(), Z = A, X = A; | |
for (uint32_t i = 2; i < 32; ++i) { | |
if (frobenius_norm(Z) < eps) { | |
return X; | |
} | |
Z = Z * A; | |
X += ((T)1 / (T)i) * Z; | |
} | |
return X; | |
} | |
template <typename T, uint32_t N> | |
TCNN_HOST_DEVICE tmat<T, N, N> mat_exp_pade(const tmat<T, N, N>& m) { | |
// Pade approximation with scaling; same as Matlab. | |
// Pseudocode translated from Hawkins and Grimm [2007] | |
tmat<T, N, N> mX = tmat<T, N, N>::identity(), mD = tmat<T, N, N>::identity(), mN = tmat<T, N, N>::identity(); | |
T c = (T)1; | |
constexpr uint32_t q = 6; // Matlab's default when using this algorithm | |
T s = -(T)1; | |
for (uint32_t k = 1; k <= q; ++k) { | |
c = c * (q - k + 1) / (k * (2 * q - k + 1)); | |
mX = m * mX; | |
auto cmX = c * mX; | |
mN = mN + cmX; | |
mD = mD + s * cmX; | |
s = -s; | |
} | |
return inverse(mD) * mN; | |
} | |
template <typename T, uint32_t N> | |
TCNN_HOST_DEVICE tmat<T, N, N> mat_log(const tmat<T, N, N>& m) { | |
tmat<T, N, N> result(m); | |
uint32_t j = 0; | |
for (; j < 32; ++j) { | |
if (frobenius_norm(result - tmat<T, N, N>::identity()) < (T)1e-5f) { | |
break; | |
} | |
result = mat_sqrt(result); | |
} | |
result = mat_log_hawkins(result); | |
return (T)scalbnf(1.0f, j) * result; | |
} | |
template <typename T, uint32_t N> | |
TCNN_HOST_DEVICE tmat<T, N, N> mat_exp(const tmat<T, N, N>& m) { | |
uint32_t N_SQUARING = max(0, 1 + (int)floor(log2(frobenius_norm(m)))); | |
tmat<T, N, N> result = (T)scalbnf(1.0f, -N_SQUARING) * m; | |
result = mat_exp_pade(result); | |
for (uint32_t i = 0; i < N_SQUARING; ++i) { | |
result *= result; | |
} | |
return result; | |
} | |
template <typename T, uint32_t N> | |
TCNN_HOST_DEVICE tmat<T, N, N> orthogonalize(const tmat<T, N, N>& m) { | |
// Iteration to bring an almost orthogonal matrix nearer to its closest | |
// orthogonal matrix. This can be run multiple times until convergence | |
// is measured or, alternatively, once per frame on something like a | |
// camera matrix to ensure it does not degenerate over time. | |
return (T)1.5f * m - (T)0.5f * (m * transpose(m) * m); | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tmat<T, 4, 3> orthogonalize(const tmat<T, 4, 3>& m) { | |
auto rot = orthogonalize(tmat<T, 3, 3>{m}); | |
return tmat<T, 4, 3>{rot[0], rot[1], rot[2], m[3]}; | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tmat<T, 3, 3> so3_log(const tmat<T, 3, 3>& m) { | |
T tr = clamp(m[0][0] + m[1][1] + m[2][2], -(T)1 + std::numeric_limits<T>::epsilon(), (T)1); | |
T radians = acosf((tr - (T)1) / (T)2); | |
return radians / sqrt(((T)1 + tr) * ((T)3 - tr)) * (m - transpose(m)); | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tmat<T, 3, 3> so3_exp(const tmat<T, 3, 3>& m) { | |
tvec<T, 3> axis = {-m[2][1], m[2][0], -m[1][0]}; | |
T radians_sq = length2(axis); | |
if (radians_sq == (T)0) { | |
return tmat<T, 3, 3>::identity(); | |
} | |
T radians = sqrt(radians_sq); | |
return tmat<T, 3, 3>::identity() + (sin(radians) / radians) * m + (((T)1 - cos(radians)) / radians_sq) * (m * m); | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tmat<T, 4, 3> se3_log(const tmat<T, 4, 3>& m) { | |
auto omega = so3_log(tmat<T, 3, 3>(m)); | |
tvec<T, 3> axis = {-omega[2][1], omega[2][0], -omega[1][0]}; | |
T radians_sq = length2(axis); | |
auto inv_trans = tmat<T, 3, 3>::identity(); | |
if (radians_sq > (T)0) { | |
T radians = sqrt(radians_sq); | |
inv_trans += -(T)0.5 * omega + (((T)1 - (T)0.5 * radians * cos((T)0.5 * radians) / sin((T)0.5 * radians)) / radians_sq) * (omega * omega); | |
} | |
return {omega[0], omega[1], omega[2], inv_trans * m[3]}; | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tmat<T, 4, 3> se3_exp(const tmat<T, 4, 3>& m) { | |
tmat<T, 3, 3> omega = m; | |
tvec<T, 3> axis = {-omega[2][1], omega[2][0], -omega[1][0]}; | |
T radians_sq = length2(axis); | |
auto trans = tmat<T, 3, 3>::identity(); | |
if (radians_sq > (T)0) { | |
T radians = sqrt(radians_sq); | |
trans += (((T)1 - cos(radians)) / radians_sq) * omega + ((radians - sin(radians)) / (radians * radians_sq)) * (omega * omega); | |
} | |
auto rot = so3_exp(omega); | |
return {rot[0], rot[1], rot[2], trans * m[3]}; | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tmat<T, 4, 4> se3_log(const tmat<T, 4, 4>& m) { | |
auto result = tmat<T, 4, 4>(se3_log(tmat<T, 4, 3>(m))); | |
result[3][3] = (T)0; | |
return result; | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tmat<T, 4, 4> se3_exp(const tmat<T, 4, 4>& m) { | |
return tmat<T, 4, 4>(se3_exp(tmat<T, 4, 3>(m))); | |
} | |
DEF_NON_TEMPLATED_MATRIX_TYPES(mat, float) | |
DEF_NON_TEMPLATED_MATRIX_TYPES(dmat, double) | |
DEF_NON_TEMPLATED_MATRIX_TYPES(hmat, __half) | |
template <typename T> | |
struct tquat { | |
tquat() = default; | |
TCNN_HOST_DEVICE tquat(T w, T x, T y, T z) : w{w}, x{x}, y{y}, z{z} {} | |
TCNN_HOST_DEVICE tquat(const tmat<T, 3, 3>& m) { | |
// Code adapted from https://www.euclideanspace.com/maths/geometry/rotations/conversions/matrixToQuaternion/ | |
T tr = m[0][0] + m[1][1] + m[2][2]; | |
if (tr > (T)0) { | |
T S = sqrt(tr + (T)1) * (T)2; // S=4*qw | |
w = (T)0.25 * S; | |
x = (m[1][2] - m[2][1]) / S; | |
y = (m[2][0] - m[0][2]) / S; | |
z = (m[0][1] - m[1][0]) / S; | |
} else if (m[0][0] > m[1][1] && m[0][0] > m[2][2]) { | |
T S = sqrt((T)1 + m[0][0] - m[1][1] - m[2][2]) * (T)2; // S=4*x | |
w = (m[1][2] - m[2][1]) / S; | |
x = (T)0.25 * S; | |
y = (m[1][0] + m[0][1]) / S; | |
z = (m[2][0] + m[0][2]) / S; | |
} else if (m[1][1] > m[2][2]) { | |
T S = sqrt((T)1 + m[1][1] - m[0][0] - m[2][2]) * (T)2; // S=4*y | |
w = (m[2][0] - m[0][2]) / S; | |
x = (m[1][0] + m[0][1]) / S; | |
y = (T)0.25 * S; | |
z = (m[2][1] + m[1][2]) / S; | |
} else { | |
T S = sqrt((T)1 + m[2][2] - m[0][0] - m[1][1]) * (T)2; // S=4*z | |
w = (m[0][1] - m[1][0]) / S; | |
x = (m[2][0] + m[0][2]) / S; | |
y = (m[2][1] + m[1][2]) / S; | |
z = (T)0.25 * S; | |
} | |
} | |
T w, x, y, z; | |
}; | |
template <typename T> TCNN_HOST_DEVICE tquat<T> operator-(const tquat<T>& a) { return {-a.w, -a.x, -a.y, -a.z}; } | |
template <typename T> TCNN_HOST_DEVICE tquat<T> operator+(const tquat<T>& a, const tquat<T>& b) { return {a.w + b.w, a.x + b.x, a.y + b.y, a.z + b.z}; } | |
template <typename T> TCNN_HOST_DEVICE tquat<T> operator-(const tquat<T>& a, const tquat<T>& b) { return {a.w - b.w, a.x - b.x, a.y - b.y, a.z - b.z}; } | |
template <typename T> TCNN_HOST_DEVICE tquat<T> operator*(T a, const tquat<T>& b) { return {a * b.w, a * b.x, a * b.y, a * b.z}; } | |
template <typename T> TCNN_HOST_DEVICE tquat<T> operator*(const tquat<T>& a, T b) { return {a.w * b, a.x * b, a.y * b, a.z * b}; } | |
template <typename T> TCNN_HOST_DEVICE tquat<T> operator/(const tquat<T>& a, T b) { return {a.w / b, a.x / b, a.y / b, a.z / b}; } | |
template <typename T> TCNN_HOST_DEVICE T dot(const tquat<T>& a, const tquat<T>& b) { return (a.w * b.w + a.x * b.x) + (a.y * b.y + a.z * b.z); } | |
template <typename T> TCNN_HOST_DEVICE T length2(const tquat<T>& a) { return dot(a, a); } | |
template <typename T> TCNN_HOST_DEVICE T length(const tquat<T>& a) { return sqrt(length2(a)); } | |
template <typename T> TCNN_HOST_DEVICE tquat<T> mix(const tquat<T>& a, const tquat<T>& b, T t) { return a * ((T)1 - t) + b * t; } | |
template <typename T> | |
TCNN_HOST_DEVICE tquat<T> normalize(const tquat<T>& a) { | |
T len = length(a); | |
if (len <= (T)0) { | |
return {(T)1, (T)0, (T)0, (T)0}; | |
} | |
return a / len; | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tquat<T> cross(const tquat<T>& a, const tquat<T>& b) { | |
return { | |
a.w * b.w - a.x * b.x - a.y * b.y - a.z * b.z, | |
a.w * b.x + a.x * b.w + a.y * b.z - a.z * b.y, | |
a.w * b.y + a.y * b.w + a.z * b.x - a.x * b.z, | |
a.w * b.z + a.z * b.w + a.x * b.y - a.y * b.x | |
}; | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tquat<T> slerp(const tquat<T>& x, const tquat<T>& y, T t) { | |
tquat<T> z = y; | |
T cos_theta = dot(x, y); | |
// If cos_theta < 0, the interpolation will take the long way around the sphere. | |
// To fix this, one quat must be negated. | |
if (cos_theta < (T)0) { | |
z = -y; | |
cos_theta = -cos_theta; | |
} | |
// Perform a linear interpolation when cos_theta is close to 1 to avoid side effect of sin(angle) becoming a zero denominator | |
if (cos_theta > (T)1 - std::numeric_limits<T>::epsilon()) { | |
return mix(x, z, t); | |
} else { | |
// Essential Mathematics, page 467 | |
T angle = acos(cos_theta); | |
return (sin(((T)1 - t) * angle) * x + sin(t * angle) * z) / sin(angle); | |
} | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE T angle(const tquat<T>& x) { | |
return acos(clamp(x.w, (T)-1, (T)1)) * (T)2; | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tvec<T, 3> axis(const tquat<T>& x) { | |
const T tmp1 = (T)1 - x.w * x.w; | |
if (tmp1 <= (T)0) { | |
return {(T)0, (T)0, (T)1}; | |
} | |
const T tmp2 = (T)1 / sqrt(tmp1); | |
return {x.x * tmp2, x.y * tmp2, x.z * tmp2}; | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tmat<T, 3, 3> to_mat3(const tquat<T>& q) { | |
T qxx = q.x * q.x, qyy = q.y * q.y, qzz = q.z * q.z; | |
T qxz = q.x * q.z, qxy = q.x * q.y, qyz = q.y * q.z; | |
T qwx = q.w * q.x, qwy = q.w * q.y, qwz = q.w * q.z; | |
return { | |
(T)1 - (T)2 * (qyy + qzz), (T)2 * (qxy + qwz), (T)2 * (qxz - qwy), | |
(T)2 * (qxy - qwz), (T)1 - (T)2 * (qxx + qzz), (T)2 * (qyz + qwx), | |
(T)2 * (qxz + qwy), (T)2 * (qyz - qwx), (T)1 - (T)2 * (qxx + qyy), | |
}; | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tmat<T, 3, 3> slerp(const tmat<T, 3, 3>& a, const tmat<T, 3, 3>& b, float t) { | |
return to_mat3(normalize(slerp(normalize(tquat<T>(a)), normalize(tquat<T>(b)), t))); | |
} | |
template <typename T> | |
TCNN_HOST_DEVICE tvec<T, 3> rotvec(const tmat<T, 3, 3>& mat) { | |
tquat<T> tmp = mat; | |
return axis(tmp) * angle(tmp); | |
} | |
using quat = tquat<float>; | |
} | |