divya-22 commited on
Commit
3c9df42
1 Parent(s): 63a066c

Upload 7 files

Browse files
simple-knn/ext.cpp ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #include <torch/extension.h>
13
+ #include "spatial.h"
14
+
15
+ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16
+ m.def("distCUDA2", &distCUDA2);
17
+ }
simple-knn/setup.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright (C) 2023, Inria
3
+ # GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ # All rights reserved.
5
+ #
6
+ # This software is free for non-commercial, research and evaluation use
7
+ # under the terms of the LICENSE.md file.
8
+ #
9
+ # For inquiries contact george.drettakis@inria.fr
10
+ #
11
+
12
+ from setuptools import setup
13
+ from torch.utils.cpp_extension import CUDAExtension, BuildExtension
14
+ import os
15
+
16
+ cxx_compiler_flags = []
17
+
18
+ if os.name == 'nt':
19
+ cxx_compiler_flags.append("/wd4624")
20
+
21
+ setup(
22
+ name="simple_knn",
23
+ ext_modules=[
24
+ CUDAExtension(
25
+ name="simple_knn._C",
26
+ sources=[
27
+ "spatial.cu",
28
+ "simple_knn.cu",
29
+ "ext.cpp"],
30
+ extra_compile_args={"nvcc": [], "cxx": cxx_compiler_flags})
31
+ ],
32
+ cmdclass={
33
+ 'build_ext': BuildExtension
34
+ }
35
+ )
simple-knn/simple_knn.cu ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #define BOX_SIZE 1024
13
+
14
+ #include "cuda_runtime.h"
15
+ #include "device_launch_parameters.h"
16
+ #include "simple_knn.h"
17
+ #include <cub/cub.cuh>
18
+ #include <cub/device/device_radix_sort.cuh>
19
+ #include <vector>
20
+ #include <cuda_runtime_api.h>
21
+ #include <thrust/device_vector.h>
22
+ #include <thrust/sequence.h>
23
+ #define __CUDACC__
24
+ #include <cooperative_groups.h>
25
+ #include <cooperative_groups/reduce.h>
26
+
27
+ namespace cg = cooperative_groups;
28
+
29
+ struct CustomMin
30
+ {
31
+ __device__ __forceinline__
32
+ float3 operator()(const float3& a, const float3& b) const {
33
+ return { min(a.x, b.x), min(a.y, b.y), min(a.z, b.z) };
34
+ }
35
+ };
36
+
37
+ struct CustomMax
38
+ {
39
+ __device__ __forceinline__
40
+ float3 operator()(const float3& a, const float3& b) const {
41
+ return { max(a.x, b.x), max(a.y, b.y), max(a.z, b.z) };
42
+ }
43
+ };
44
+
45
+ __host__ __device__ uint32_t prepMorton(uint32_t x)
46
+ {
47
+ x = (x | (x << 16)) & 0x030000FF;
48
+ x = (x | (x << 8)) & 0x0300F00F;
49
+ x = (x | (x << 4)) & 0x030C30C3;
50
+ x = (x | (x << 2)) & 0x09249249;
51
+ return x;
52
+ }
53
+
54
+ __host__ __device__ uint32_t coord2Morton(float3 coord, float3 minn, float3 maxx)
55
+ {
56
+ uint32_t x = prepMorton(((coord.x - minn.x) / (maxx.x - minn.x)) * ((1 << 10) - 1));
57
+ uint32_t y = prepMorton(((coord.y - minn.y) / (maxx.y - minn.y)) * ((1 << 10) - 1));
58
+ uint32_t z = prepMorton(((coord.z - minn.z) / (maxx.z - minn.z)) * ((1 << 10) - 1));
59
+
60
+ return x | (y << 1) | (z << 2);
61
+ }
62
+
63
+ __global__ void coord2Morton(int P, const float3* points, float3 minn, float3 maxx, uint32_t* codes)
64
+ {
65
+ auto idx = cg::this_grid().thread_rank();
66
+ if (idx >= P)
67
+ return;
68
+
69
+ codes[idx] = coord2Morton(points[idx], minn, maxx);
70
+ }
71
+
72
+ struct MinMax
73
+ {
74
+ float3 minn;
75
+ float3 maxx;
76
+ };
77
+
78
+ __global__ void boxMinMax(uint32_t P, float3* points, uint32_t* indices, MinMax* boxes)
79
+ {
80
+ auto idx = cg::this_grid().thread_rank();
81
+
82
+ MinMax me;
83
+ if (idx < P)
84
+ {
85
+ me.minn = points[indices[idx]];
86
+ me.maxx = points[indices[idx]];
87
+ }
88
+ else
89
+ {
90
+ me.minn = { FLT_MAX, FLT_MAX, FLT_MAX };
91
+ me.maxx = { -FLT_MAX,-FLT_MAX,-FLT_MAX };
92
+ }
93
+
94
+ __shared__ MinMax redResult[BOX_SIZE];
95
+
96
+ for (int off = BOX_SIZE / 2; off >= 1; off /= 2)
97
+ {
98
+ if (threadIdx.x < 2 * off)
99
+ redResult[threadIdx.x] = me;
100
+ __syncthreads();
101
+
102
+ if (threadIdx.x < off)
103
+ {
104
+ MinMax other = redResult[threadIdx.x + off];
105
+ me.minn.x = min(me.minn.x, other.minn.x);
106
+ me.minn.y = min(me.minn.y, other.minn.y);
107
+ me.minn.z = min(me.minn.z, other.minn.z);
108
+ me.maxx.x = max(me.maxx.x, other.maxx.x);
109
+ me.maxx.y = max(me.maxx.y, other.maxx.y);
110
+ me.maxx.z = max(me.maxx.z, other.maxx.z);
111
+ }
112
+ __syncthreads();
113
+ }
114
+
115
+ if (threadIdx.x == 0)
116
+ boxes[blockIdx.x] = me;
117
+ }
118
+
119
+ __device__ __host__ float distBoxPoint(const MinMax& box, const float3& p)
120
+ {
121
+ float3 diff = { 0, 0, 0 };
122
+ if (p.x < box.minn.x || p.x > box.maxx.x)
123
+ diff.x = min(abs(p.x - box.minn.x), abs(p.x - box.maxx.x));
124
+ if (p.y < box.minn.y || p.y > box.maxx.y)
125
+ diff.y = min(abs(p.y - box.minn.y), abs(p.y - box.maxx.y));
126
+ if (p.z < box.minn.z || p.z > box.maxx.z)
127
+ diff.z = min(abs(p.z - box.minn.z), abs(p.z - box.maxx.z));
128
+ return diff.x * diff.x + diff.y * diff.y + diff.z * diff.z;
129
+ }
130
+
131
+ template<int K>
132
+ __device__ void updateKBest(const float3& ref, const float3& point, float* knn)
133
+ {
134
+ float3 d = { point.x - ref.x, point.y - ref.y, point.z - ref.z };
135
+ float dist = d.x * d.x + d.y * d.y + d.z * d.z;
136
+ for (int j = 0; j < K; j++)
137
+ {
138
+ if (knn[j] > dist)
139
+ {
140
+ float t = knn[j];
141
+ knn[j] = dist;
142
+ dist = t;
143
+ }
144
+ }
145
+ }
146
+
147
+ __global__ void boxMeanDist(uint32_t P, float3* points, uint32_t* indices, MinMax* boxes, float* dists)
148
+ {
149
+ int idx = cg::this_grid().thread_rank();
150
+ if (idx >= P)
151
+ return;
152
+
153
+ float3 point = points[indices[idx]];
154
+ float best[3] = { FLT_MAX, FLT_MAX, FLT_MAX };
155
+
156
+ for (int i = max(0, idx - 3); i <= min(P - 1, idx + 3); i++)
157
+ {
158
+ if (i == idx)
159
+ continue;
160
+ updateKBest<3>(point, points[indices[i]], best);
161
+ }
162
+
163
+ float reject = best[2];
164
+ best[0] = FLT_MAX;
165
+ best[1] = FLT_MAX;
166
+ best[2] = FLT_MAX;
167
+
168
+ for (int b = 0; b < (P + BOX_SIZE - 1) / BOX_SIZE; b++)
169
+ {
170
+ MinMax box = boxes[b];
171
+ float dist = distBoxPoint(box, point);
172
+ if (dist > reject || dist > best[2])
173
+ continue;
174
+
175
+ for (int i = b * BOX_SIZE; i < min(P, (b + 1) * BOX_SIZE); i++)
176
+ {
177
+ if (i == idx)
178
+ continue;
179
+ updateKBest<3>(point, points[indices[i]], best);
180
+ }
181
+ }
182
+ dists[indices[idx]] = (best[0] + best[1] + best[2]) / 3.0f;
183
+ }
184
+
185
+ void SimpleKNN::knn(int P, float3* points, float* meanDists)
186
+ {
187
+ float3* result;
188
+ cudaMalloc(&result, sizeof(float3));
189
+ size_t temp_storage_bytes;
190
+
191
+ float3 init = { 0, 0, 0 }, minn, maxx;
192
+
193
+ cub::DeviceReduce::Reduce(nullptr, temp_storage_bytes, points, result, P, CustomMin(), init);
194
+ thrust::device_vector<char> temp_storage(temp_storage_bytes);
195
+
196
+ cub::DeviceReduce::Reduce(temp_storage.data().get(), temp_storage_bytes, points, result, P, CustomMin(), init);
197
+ cudaMemcpy(&minn, result, sizeof(float3), cudaMemcpyDeviceToHost);
198
+
199
+ cub::DeviceReduce::Reduce(temp_storage.data().get(), temp_storage_bytes, points, result, P, CustomMax(), init);
200
+ cudaMemcpy(&maxx, result, sizeof(float3), cudaMemcpyDeviceToHost);
201
+
202
+ thrust::device_vector<uint32_t> morton(P);
203
+ thrust::device_vector<uint32_t> morton_sorted(P);
204
+ coord2Morton << <(P + 255) / 256, 256 >> > (P, points, minn, maxx, morton.data().get());
205
+
206
+ thrust::device_vector<uint32_t> indices(P);
207
+ thrust::sequence(indices.begin(), indices.end());
208
+ thrust::device_vector<uint32_t> indices_sorted(P);
209
+
210
+ cub::DeviceRadixSort::SortPairs(nullptr, temp_storage_bytes, morton.data().get(), morton_sorted.data().get(), indices.data().get(), indices_sorted.data().get(), P);
211
+ temp_storage.resize(temp_storage_bytes);
212
+
213
+ cub::DeviceRadixSort::SortPairs(temp_storage.data().get(), temp_storage_bytes, morton.data().get(), morton_sorted.data().get(), indices.data().get(), indices_sorted.data().get(), P);
214
+
215
+ uint32_t num_boxes = (P + BOX_SIZE - 1) / BOX_SIZE;
216
+ thrust::device_vector<MinMax> boxes(num_boxes);
217
+ boxMinMax << <num_boxes, BOX_SIZE >> > (P, points, indices_sorted.data().get(), boxes.data().get());
218
+ boxMeanDist << <num_boxes, BOX_SIZE >> > (P, points, indices_sorted.data().get(), boxes.data().get(), meanDists);
219
+
220
+ cudaFree(result);
221
+ }
simple-knn/simple_knn.h ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #ifndef SIMPLEKNN_H_INCLUDED
13
+ #define SIMPLEKNN_H_INCLUDED
14
+
15
+ class SimpleKNN
16
+ {
17
+ public:
18
+ static void knn(int P, float3* points, float* meanDists);
19
+ };
20
+
21
+ #endif
simple-knn/simple_knn/gitkeep.txt ADDED
File without changes
simple-knn/spatial.cu ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #include "spatial.h"
13
+ #include "simple_knn.h"
14
+
15
+ torch::Tensor
16
+ distCUDA2(const torch::Tensor& points)
17
+ {
18
+ const int P = points.size(0);
19
+
20
+ auto float_opts = points.options().dtype(torch::kFloat32);
21
+ torch::Tensor means = torch::full({P}, 0.0, float_opts);
22
+
23
+ SimpleKNN::knn(P, (float3*)points.contiguous().data<float>(), means.contiguous().data<float>());
24
+
25
+ return means;
26
+ }
simple-knn/spatial.h ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * Copyright (C) 2023, Inria
3
+ * GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ * All rights reserved.
5
+ *
6
+ * This software is free for non-commercial, research and evaluation use
7
+ * under the terms of the LICENSE.md file.
8
+ *
9
+ * For inquiries contact george.drettakis@inria.fr
10
+ */
11
+
12
+ #include <torch/extension.h>
13
+
14
+ torch::Tensor distCUDA2(const torch::Tensor& points);