File size: 4,957 Bytes
2ac1c2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
"""
Program to compute Voronoi diagram using JFA.

@author yisiox
@version September 2022
"""

import cupy as cp
from random import sample

# global variables
x_dim = 512
y_dim = 512
noSeeds = 1024

# diagram is represented as a 2d array where each element is
# x coord of source * y_dim + y coord of source
ping = cp.full((x_dim, y_dim), -1, dtype=int)
pong = None


import torch
import time


def process_tensors(tensor1, tensor2):
    # start_time = time.time()

    tensor2_unique = torch.unique(tensor2)
    mask = torch.isin(tensor1, tensor2_unique, assume_unique=True)
    tensor1[~mask] = -1

    end_time = time.time()
    # print(f"Computation time: {end_time - start_time} seconds")

    return tensor1


def test_performance():
    computation_times = []

    for _ in range(10):
        tensor1 = torch.randint(0, 40001, (1024, 1024)).cuda()
        tensor2 = torch.randint(5000, 15000, (512, 512)).cuda()

        process_tensors(tensor1, tensor2)


def voronoi_solve(texture, mask, device="cuda"):
    """
    This is a warpper of the original cupy voronoi implementation
    The texture color where mask value is 1 will propagate to its
    neighbors.
    args:
        texture - A multi-channel tensor, (H, W, C)
        mask - A single-channel tensor, (H, W)
    return:
        texture - Propagated tensor
    """
    h, w, c = texture.shape
    # hwc_texture = texture.permute(1,2,0)
    valid_pix_coord = torch.where(mask > 0)

    indices = torch.arange(0, h * w).to(device).reshape(h, w)
    idx_map = -1 * torch.ones((h, w), dtype=torch.int64).to(device)
    idx_map[valid_pix_coord] = indices[valid_pix_coord]

    ping = cp.asarray(idx_map)
    pong = cp.copy(ping)
    ping = JFAVoronoiDiagram(ping, pong)

    voronoi_map = torch.as_tensor(ping, device=device)
    nc_voronoi_texture = torch.index_select(
        texture.reshape(h * w, c), 0, voronoi_map.reshape(h * w)
    )
    voronoi_texture = nc_voronoi_texture.reshape(h, w, c)

    return voronoi_texture


def generateRandomSeeds(n):
    """
    Function to generate n random seeds.

    @param n The number of seeds to generate.
    """
    global ping, pong

    if n > x_dim * y_dim:
        print("Error: Number of seeds greater than number of pixels.")
        return

    # take sample of cartesian product
    coords = [(x, y) for x in range(x_dim) for y in range(y_dim)]
    seeds = sample(coords, n)
    for i in range(n):
        x, y = seeds[i]
        ping[x, y] = x * y_dim + y
    pong = cp.copy(ping)


displayKernel = cp.ElementwiseKernel(
    "int64 x", "int64 y", f"y = (x < 0) ? x : x % 103", "displayTransform"
)


voronoiKernel = cp.RawKernel(
    r"""
    extern "C" __global__
    void voronoiPass(const long long step, const long long xDim, const long long yDim, const long long *ping, long long *pong) {
        long long idx = blockIdx.x * blockDim.x + threadIdx.x;
        long long stp = blockDim.x * gridDim.x;

        for (long long k = idx; k < xDim * yDim; k += stp) {
            long long dydx[] = {-1, 0, 1};
            for (int i = 0; i < 3; ++i) {
                for (int j = 0; j < 3; ++j) {
                    long long dx = (step * dydx[i]) * yDim;
                    long long dy = step * dydx[j];
                    long long src = k + dx + dy;
                    if (src < 0 || src >= xDim * yDim) 
                        continue;
                    if (ping[src] == -1)
                        continue;
                    if (pong[k] == -1) {
                        pong[k] = ping[src];
                        continue;
                    }
                    long long x1 = k / yDim;
                    long long y1 = k % yDim;
                    long long x2 = pong[k] / yDim;
                    long long y2 = pong[k] % yDim;
                    long long x3 = ping[src] / yDim;
                    long long y3 = ping[src] % yDim;
                    long long curr_dist = (x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2);
                    long long jump_dist = (x1 - x3) * (x1 - x3) + (y1 - y3) * (y1 - y3);
                    if (jump_dist < curr_dist)
                        pong[k] = ping[src];
                }
            }
        }
    }
    """,
    "voronoiPass",
)


"""

    y and x is actually w and h? (according to experiment result)

"""


def JFAVoronoiDiagram(ping, pong):
    # global ping, pong
    # compute initial step size
    x_dim, y_dim = ping.shape
    step = max(x_dim, y_dim) // 2
    # initalise frame number and display original state
    frame = 0
    # iterate while step size is greater than 0
    while step:
        voronoiKernel(
            (min(x_dim, 512),), (min(y_dim, 512),), (step, x_dim, y_dim, ping, pong)
        )
        # Ajusted the upper bound of the kernel dimension from 1024 to 512 to avoid CUDA OUT OF RESOURCE problem
        ping, pong = pong, ping
        frame += 1
        step //= 2
        # displayDiagram(frame, ping)
    return ping