File size: 1,707 Bytes
81efcf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

__kernel void reorg_hwc_naive(
    __global half const *restrict src,
    __global half *restrict dst,
    int W,
    int H,
    int C,
    int stride)
{
    const int out_c = C / (stride * stride);
    const int oc    = C * (stride * stride);
    const int oh    = H / stride;
    const int ow    = W / stride;

    const int c = get_global_id(0);

    for (int h = 0; h < H; ++h) {
        int in_index  = W * (h + H * c) + (0);
        int new_z     = in_index / (oh * ow);
        int new_y     = (in_index % (oh * ow)) / ow;
        int new_x     = (in_index % (oh * ow)) % ow;
        int new_index = new_z + new_x * oc + new_y * oc * ow;

        in_index++;

        int c2        = c % out_c;
        int offset    = c / out_c;
        int w2        = 0 * stride + offset % stride;
        int h2        = h * stride + offset / stride;
        int out_index = w2 + W * stride * (h2 + H * stride * c2);

        #pragma unroll 2
        for (int i = 0; i < W; ++i, out_index += stride, in_index++) {
            // repacking coordinates
            int k0               = out_index / (H * W);
            int j0               = (out_index % (H * W)) / W;
            int i0               = (out_index % (H * W)) % W;
            int out_index_repack = k0 + C * i0 + C * W * j0;

            dst[new_index] = src[out_index_repack];

            int new_z = in_index / (oh * ow);
            int new_y = (in_index % (oh * ow)) / ow;
            int new_x = (in_index % (oh * ow)) % ow;
            new_index = new_z + new_x * oc + new_y * oc * ow;
        }
    }
}