Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,499 Bytes
c8d8740 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
/*
* Copyright (C) 2023, Inria
* GRAPHDECO research group, https://team.inria.fr/graphdeco
* All rights reserved.
*
* This software is free for non-commercial, research and evaluation use
* under the terms of the LICENSE.md file.
*
* For inquiries contact george.drettakis@inria.fr
*/
#ifndef CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
#define CUDA_RASTERIZER_AUXILIARY_H_INCLUDED
#include "config.h"
#include "stdio.h"
#define BLOCK_SIZE (BLOCK_X * BLOCK_Y)
#define NUM_WARPS (BLOCK_SIZE/32)
// Spherical harmonics coefficients
__device__ const float SH_C0 = 0.28209479177387814f;
__device__ const float SH_C1 = 0.4886025119029199f;
__device__ const float SH_C2[] = {
1.0925484305920792f,
-1.0925484305920792f,
0.31539156525252005f,
-1.0925484305920792f,
0.5462742152960396f
};
__device__ const float SH_C3[] = {
-0.5900435899266435f,
2.890611442640554f,
-0.4570457994644658f,
0.3731763325901154f,
-0.4570457994644658f,
1.445305721320277f,
-0.5900435899266435f
};
__forceinline__ __device__ float ndc2Pix(float v, int S)
{
return ((v + 1.0) * S - 1.0) * 0.5;
}
__forceinline__ __device__ void getRect(const float2 p, int max_radius, uint2& rect_min, uint2& rect_max, dim3 grid)
{
rect_min = {
min(grid.x, max((int)0, (int)((p.x - max_radius) / BLOCK_X))),
min(grid.y, max((int)0, (int)((p.y - max_radius) / BLOCK_Y)))
};
rect_max = {
min(grid.x, max((int)0, (int)((p.x + max_radius + BLOCK_X - 1) / BLOCK_X))),
min(grid.y, max((int)0, (int)((p.y + max_radius + BLOCK_Y - 1) / BLOCK_Y)))
};
}
__forceinline__ __device__ float3 transformPoint4x3(const float3& p, const float* matrix)
{
float3 transformed = {
matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
};
return transformed;
}
__forceinline__ __device__ float4 transformPoint4x4(const float3& p, const float* matrix)
{
float4 transformed = {
matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12],
matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13],
matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14],
matrix[3] * p.x + matrix[7] * p.y + matrix[11] * p.z + matrix[15]
};
return transformed;
}
__forceinline__ __device__ float3 transformVec4x3(const float3& p, const float* matrix)
{
float3 transformed = {
matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z,
matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z,
matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z,
};
return transformed;
}
__forceinline__ __device__ float3 transformVec4x3Transpose(const float3& p, const float* matrix)
{
float3 transformed = {
matrix[0] * p.x + matrix[1] * p.y + matrix[2] * p.z,
matrix[4] * p.x + matrix[5] * p.y + matrix[6] * p.z,
matrix[8] * p.x + matrix[9] * p.y + matrix[10] * p.z,
};
return transformed;
}
__forceinline__ __device__ float dnormvdz(float3 v, float3 dv)
{
float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
float dnormvdz = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
return dnormvdz;
}
__forceinline__ __device__ float3 dnormvdv(float3 v, float3 dv)
{
float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
float3 dnormvdv;
dnormvdv.x = ((+sum2 - v.x * v.x) * dv.x - v.y * v.x * dv.y - v.z * v.x * dv.z) * invsum32;
dnormvdv.y = (-v.x * v.y * dv.x + (sum2 - v.y * v.y) * dv.y - v.z * v.y * dv.z) * invsum32;
dnormvdv.z = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
return dnormvdv;
}
__forceinline__ __device__ float4 dnormvdv(float4 v, float4 dv)
{
float sum2 = v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
float4 vdv = { v.x * dv.x, v.y * dv.y, v.z * dv.z, v.w * dv.w };
float vdv_sum = vdv.x + vdv.y + vdv.z + vdv.w;
float4 dnormvdv;
dnormvdv.x = ((sum2 - v.x * v.x) * dv.x - v.x * (vdv_sum - vdv.x)) * invsum32;
dnormvdv.y = ((sum2 - v.y * v.y) * dv.y - v.y * (vdv_sum - vdv.y)) * invsum32;
dnormvdv.z = ((sum2 - v.z * v.z) * dv.z - v.z * (vdv_sum - vdv.z)) * invsum32;
dnormvdv.w = ((sum2 - v.w * v.w) * dv.w - v.w * (vdv_sum - vdv.w)) * invsum32;
return dnormvdv;
}
__forceinline__ __device__ float sigmoid(float x)
{
return 1.0f / (1.0f + expf(-x));
}
__forceinline__ __device__ bool in_frustum(int idx,
const float* orig_points,
const float* viewmatrix,
const float* projmatrix,
bool prefiltered,
float3& p_view)
{
float3 p_orig = { orig_points[3 * idx], orig_points[3 * idx + 1], orig_points[3 * idx + 2] };
// Bring points to screen space
float4 p_hom = transformPoint4x4(p_orig, projmatrix);
float p_w = 1.0f / (p_hom.w + 0.0000001f);
float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w };
p_view = transformPoint4x3(p_orig, viewmatrix);
if (p_view.z <= 0.2f)// || ((p_proj.x < -1.3 || p_proj.x > 1.3 || p_proj.y < -1.3 || p_proj.y > 1.3)))
{
if (prefiltered)
{
printf("Point is filtered although prefiltered is set. This shouldn't happen!");
__trap();
}
return false;
}
return true;
}
#define CHECK_CUDA(A, debug) \
A; if(debug) { \
auto ret = cudaDeviceSynchronize(); \
if (ret != cudaSuccess) { \
std::cerr << "\n[CUDA ERROR] in " << __FILE__ << "\nLine " << __LINE__ << ": " << cudaGetErrorString(ret); \
throw std::runtime_error(cudaGetErrorString(ret)); \
} \
}
#endif |