Spaces:
Runtime error
Runtime error
Upload 7 files
Browse files- simple-knn/ext.cpp +17 -0
- simple-knn/setup.py +35 -0
- simple-knn/simple_knn.cu +221 -0
- simple-knn/simple_knn.h +21 -0
- simple-knn/simple_knn/gitkeep.txt +0 -0
- simple-knn/spatial.cu +26 -0
- simple-knn/spatial.h +14 -0
simple-knn/ext.cpp
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#include <torch/extension.h>
|
13 |
+
#include "spatial.h"
|
14 |
+
|
15 |
+
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
|
16 |
+
m.def("distCUDA2", &distCUDA2);
|
17 |
+
}
|
simple-knn/setup.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (C) 2023, Inria
|
3 |
+
# GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
# All rights reserved.
|
5 |
+
#
|
6 |
+
# This software is free for non-commercial, research and evaluation use
|
7 |
+
# under the terms of the LICENSE.md file.
|
8 |
+
#
|
9 |
+
# For inquiries contact george.drettakis@inria.fr
|
10 |
+
#
|
11 |
+
|
12 |
+
from setuptools import setup
|
13 |
+
from torch.utils.cpp_extension import CUDAExtension, BuildExtension
|
14 |
+
import os
|
15 |
+
|
16 |
+
cxx_compiler_flags = []
|
17 |
+
|
18 |
+
if os.name == 'nt':
|
19 |
+
cxx_compiler_flags.append("/wd4624")
|
20 |
+
|
21 |
+
setup(
|
22 |
+
name="simple_knn",
|
23 |
+
ext_modules=[
|
24 |
+
CUDAExtension(
|
25 |
+
name="simple_knn._C",
|
26 |
+
sources=[
|
27 |
+
"spatial.cu",
|
28 |
+
"simple_knn.cu",
|
29 |
+
"ext.cpp"],
|
30 |
+
extra_compile_args={"nvcc": [], "cxx": cxx_compiler_flags})
|
31 |
+
],
|
32 |
+
cmdclass={
|
33 |
+
'build_ext': BuildExtension
|
34 |
+
}
|
35 |
+
)
|
simple-knn/simple_knn.cu
ADDED
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#define BOX_SIZE 1024
|
13 |
+
|
14 |
+
#include "cuda_runtime.h"
|
15 |
+
#include "device_launch_parameters.h"
|
16 |
+
#include "simple_knn.h"
|
17 |
+
#include <cub/cub.cuh>
|
18 |
+
#include <cub/device/device_radix_sort.cuh>
|
19 |
+
#include <vector>
|
20 |
+
#include <cuda_runtime_api.h>
|
21 |
+
#include <thrust/device_vector.h>
|
22 |
+
#include <thrust/sequence.h>
|
23 |
+
#define __CUDACC__
|
24 |
+
#include <cooperative_groups.h>
|
25 |
+
#include <cooperative_groups/reduce.h>
|
26 |
+
|
27 |
+
namespace cg = cooperative_groups;
|
28 |
+
|
29 |
+
struct CustomMin
|
30 |
+
{
|
31 |
+
__device__ __forceinline__
|
32 |
+
float3 operator()(const float3& a, const float3& b) const {
|
33 |
+
return { min(a.x, b.x), min(a.y, b.y), min(a.z, b.z) };
|
34 |
+
}
|
35 |
+
};
|
36 |
+
|
37 |
+
struct CustomMax
|
38 |
+
{
|
39 |
+
__device__ __forceinline__
|
40 |
+
float3 operator()(const float3& a, const float3& b) const {
|
41 |
+
return { max(a.x, b.x), max(a.y, b.y), max(a.z, b.z) };
|
42 |
+
}
|
43 |
+
};
|
44 |
+
|
45 |
+
__host__ __device__ uint32_t prepMorton(uint32_t x)
|
46 |
+
{
|
47 |
+
x = (x | (x << 16)) & 0x030000FF;
|
48 |
+
x = (x | (x << 8)) & 0x0300F00F;
|
49 |
+
x = (x | (x << 4)) & 0x030C30C3;
|
50 |
+
x = (x | (x << 2)) & 0x09249249;
|
51 |
+
return x;
|
52 |
+
}
|
53 |
+
|
54 |
+
__host__ __device__ uint32_t coord2Morton(float3 coord, float3 minn, float3 maxx)
|
55 |
+
{
|
56 |
+
uint32_t x = prepMorton(((coord.x - minn.x) / (maxx.x - minn.x)) * ((1 << 10) - 1));
|
57 |
+
uint32_t y = prepMorton(((coord.y - minn.y) / (maxx.y - minn.y)) * ((1 << 10) - 1));
|
58 |
+
uint32_t z = prepMorton(((coord.z - minn.z) / (maxx.z - minn.z)) * ((1 << 10) - 1));
|
59 |
+
|
60 |
+
return x | (y << 1) | (z << 2);
|
61 |
+
}
|
62 |
+
|
63 |
+
__global__ void coord2Morton(int P, const float3* points, float3 minn, float3 maxx, uint32_t* codes)
|
64 |
+
{
|
65 |
+
auto idx = cg::this_grid().thread_rank();
|
66 |
+
if (idx >= P)
|
67 |
+
return;
|
68 |
+
|
69 |
+
codes[idx] = coord2Morton(points[idx], minn, maxx);
|
70 |
+
}
|
71 |
+
|
72 |
+
struct MinMax
|
73 |
+
{
|
74 |
+
float3 minn;
|
75 |
+
float3 maxx;
|
76 |
+
};
|
77 |
+
|
78 |
+
__global__ void boxMinMax(uint32_t P, float3* points, uint32_t* indices, MinMax* boxes)
|
79 |
+
{
|
80 |
+
auto idx = cg::this_grid().thread_rank();
|
81 |
+
|
82 |
+
MinMax me;
|
83 |
+
if (idx < P)
|
84 |
+
{
|
85 |
+
me.minn = points[indices[idx]];
|
86 |
+
me.maxx = points[indices[idx]];
|
87 |
+
}
|
88 |
+
else
|
89 |
+
{
|
90 |
+
me.minn = { FLT_MAX, FLT_MAX, FLT_MAX };
|
91 |
+
me.maxx = { -FLT_MAX,-FLT_MAX,-FLT_MAX };
|
92 |
+
}
|
93 |
+
|
94 |
+
__shared__ MinMax redResult[BOX_SIZE];
|
95 |
+
|
96 |
+
for (int off = BOX_SIZE / 2; off >= 1; off /= 2)
|
97 |
+
{
|
98 |
+
if (threadIdx.x < 2 * off)
|
99 |
+
redResult[threadIdx.x] = me;
|
100 |
+
__syncthreads();
|
101 |
+
|
102 |
+
if (threadIdx.x < off)
|
103 |
+
{
|
104 |
+
MinMax other = redResult[threadIdx.x + off];
|
105 |
+
me.minn.x = min(me.minn.x, other.minn.x);
|
106 |
+
me.minn.y = min(me.minn.y, other.minn.y);
|
107 |
+
me.minn.z = min(me.minn.z, other.minn.z);
|
108 |
+
me.maxx.x = max(me.maxx.x, other.maxx.x);
|
109 |
+
me.maxx.y = max(me.maxx.y, other.maxx.y);
|
110 |
+
me.maxx.z = max(me.maxx.z, other.maxx.z);
|
111 |
+
}
|
112 |
+
__syncthreads();
|
113 |
+
}
|
114 |
+
|
115 |
+
if (threadIdx.x == 0)
|
116 |
+
boxes[blockIdx.x] = me;
|
117 |
+
}
|
118 |
+
|
119 |
+
__device__ __host__ float distBoxPoint(const MinMax& box, const float3& p)
|
120 |
+
{
|
121 |
+
float3 diff = { 0, 0, 0 };
|
122 |
+
if (p.x < box.minn.x || p.x > box.maxx.x)
|
123 |
+
diff.x = min(abs(p.x - box.minn.x), abs(p.x - box.maxx.x));
|
124 |
+
if (p.y < box.minn.y || p.y > box.maxx.y)
|
125 |
+
diff.y = min(abs(p.y - box.minn.y), abs(p.y - box.maxx.y));
|
126 |
+
if (p.z < box.minn.z || p.z > box.maxx.z)
|
127 |
+
diff.z = min(abs(p.z - box.minn.z), abs(p.z - box.maxx.z));
|
128 |
+
return diff.x * diff.x + diff.y * diff.y + diff.z * diff.z;
|
129 |
+
}
|
130 |
+
|
131 |
+
template<int K>
|
132 |
+
__device__ void updateKBest(const float3& ref, const float3& point, float* knn)
|
133 |
+
{
|
134 |
+
float3 d = { point.x - ref.x, point.y - ref.y, point.z - ref.z };
|
135 |
+
float dist = d.x * d.x + d.y * d.y + d.z * d.z;
|
136 |
+
for (int j = 0; j < K; j++)
|
137 |
+
{
|
138 |
+
if (knn[j] > dist)
|
139 |
+
{
|
140 |
+
float t = knn[j];
|
141 |
+
knn[j] = dist;
|
142 |
+
dist = t;
|
143 |
+
}
|
144 |
+
}
|
145 |
+
}
|
146 |
+
|
147 |
+
__global__ void boxMeanDist(uint32_t P, float3* points, uint32_t* indices, MinMax* boxes, float* dists)
|
148 |
+
{
|
149 |
+
int idx = cg::this_grid().thread_rank();
|
150 |
+
if (idx >= P)
|
151 |
+
return;
|
152 |
+
|
153 |
+
float3 point = points[indices[idx]];
|
154 |
+
float best[3] = { FLT_MAX, FLT_MAX, FLT_MAX };
|
155 |
+
|
156 |
+
for (int i = max(0, idx - 3); i <= min(P - 1, idx + 3); i++)
|
157 |
+
{
|
158 |
+
if (i == idx)
|
159 |
+
continue;
|
160 |
+
updateKBest<3>(point, points[indices[i]], best);
|
161 |
+
}
|
162 |
+
|
163 |
+
float reject = best[2];
|
164 |
+
best[0] = FLT_MAX;
|
165 |
+
best[1] = FLT_MAX;
|
166 |
+
best[2] = FLT_MAX;
|
167 |
+
|
168 |
+
for (int b = 0; b < (P + BOX_SIZE - 1) / BOX_SIZE; b++)
|
169 |
+
{
|
170 |
+
MinMax box = boxes[b];
|
171 |
+
float dist = distBoxPoint(box, point);
|
172 |
+
if (dist > reject || dist > best[2])
|
173 |
+
continue;
|
174 |
+
|
175 |
+
for (int i = b * BOX_SIZE; i < min(P, (b + 1) * BOX_SIZE); i++)
|
176 |
+
{
|
177 |
+
if (i == idx)
|
178 |
+
continue;
|
179 |
+
updateKBest<3>(point, points[indices[i]], best);
|
180 |
+
}
|
181 |
+
}
|
182 |
+
dists[indices[idx]] = (best[0] + best[1] + best[2]) / 3.0f;
|
183 |
+
}
|
184 |
+
|
185 |
+
void SimpleKNN::knn(int P, float3* points, float* meanDists)
|
186 |
+
{
|
187 |
+
float3* result;
|
188 |
+
cudaMalloc(&result, sizeof(float3));
|
189 |
+
size_t temp_storage_bytes;
|
190 |
+
|
191 |
+
float3 init = { 0, 0, 0 }, minn, maxx;
|
192 |
+
|
193 |
+
cub::DeviceReduce::Reduce(nullptr, temp_storage_bytes, points, result, P, CustomMin(), init);
|
194 |
+
thrust::device_vector<char> temp_storage(temp_storage_bytes);
|
195 |
+
|
196 |
+
cub::DeviceReduce::Reduce(temp_storage.data().get(), temp_storage_bytes, points, result, P, CustomMin(), init);
|
197 |
+
cudaMemcpy(&minn, result, sizeof(float3), cudaMemcpyDeviceToHost);
|
198 |
+
|
199 |
+
cub::DeviceReduce::Reduce(temp_storage.data().get(), temp_storage_bytes, points, result, P, CustomMax(), init);
|
200 |
+
cudaMemcpy(&maxx, result, sizeof(float3), cudaMemcpyDeviceToHost);
|
201 |
+
|
202 |
+
thrust::device_vector<uint32_t> morton(P);
|
203 |
+
thrust::device_vector<uint32_t> morton_sorted(P);
|
204 |
+
coord2Morton << <(P + 255) / 256, 256 >> > (P, points, minn, maxx, morton.data().get());
|
205 |
+
|
206 |
+
thrust::device_vector<uint32_t> indices(P);
|
207 |
+
thrust::sequence(indices.begin(), indices.end());
|
208 |
+
thrust::device_vector<uint32_t> indices_sorted(P);
|
209 |
+
|
210 |
+
cub::DeviceRadixSort::SortPairs(nullptr, temp_storage_bytes, morton.data().get(), morton_sorted.data().get(), indices.data().get(), indices_sorted.data().get(), P);
|
211 |
+
temp_storage.resize(temp_storage_bytes);
|
212 |
+
|
213 |
+
cub::DeviceRadixSort::SortPairs(temp_storage.data().get(), temp_storage_bytes, morton.data().get(), morton_sorted.data().get(), indices.data().get(), indices_sorted.data().get(), P);
|
214 |
+
|
215 |
+
uint32_t num_boxes = (P + BOX_SIZE - 1) / BOX_SIZE;
|
216 |
+
thrust::device_vector<MinMax> boxes(num_boxes);
|
217 |
+
boxMinMax << <num_boxes, BOX_SIZE >> > (P, points, indices_sorted.data().get(), boxes.data().get());
|
218 |
+
boxMeanDist << <num_boxes, BOX_SIZE >> > (P, points, indices_sorted.data().get(), boxes.data().get(), meanDists);
|
219 |
+
|
220 |
+
cudaFree(result);
|
221 |
+
}
|
simple-knn/simple_knn.h
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#ifndef SIMPLEKNN_H_INCLUDED
|
13 |
+
#define SIMPLEKNN_H_INCLUDED
|
14 |
+
|
15 |
+
class SimpleKNN
|
16 |
+
{
|
17 |
+
public:
|
18 |
+
static void knn(int P, float3* points, float* meanDists);
|
19 |
+
};
|
20 |
+
|
21 |
+
#endif
|
simple-knn/simple_knn/gitkeep.txt
ADDED
File without changes
|
simple-knn/spatial.cu
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#include "spatial.h"
|
13 |
+
#include "simple_knn.h"
|
14 |
+
|
15 |
+
torch::Tensor
|
16 |
+
distCUDA2(const torch::Tensor& points)
|
17 |
+
{
|
18 |
+
const int P = points.size(0);
|
19 |
+
|
20 |
+
auto float_opts = points.options().dtype(torch::kFloat32);
|
21 |
+
torch::Tensor means = torch::full({P}, 0.0, float_opts);
|
22 |
+
|
23 |
+
SimpleKNN::knn(P, (float3*)points.contiguous().data<float>(), means.contiguous().data<float>());
|
24 |
+
|
25 |
+
return means;
|
26 |
+
}
|
simple-knn/spatial.h
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
* Copyright (C) 2023, Inria
|
3 |
+
* GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
* All rights reserved.
|
5 |
+
*
|
6 |
+
* This software is free for non-commercial, research and evaluation use
|
7 |
+
* under the terms of the LICENSE.md file.
|
8 |
+
*
|
9 |
+
* For inquiries contact george.drettakis@inria.fr
|
10 |
+
*/
|
11 |
+
|
12 |
+
#include <torch/extension.h>
|
13 |
+
|
14 |
+
torch::Tensor distCUDA2(const torch::Tensor& points);
|