|
import numpy as np |
|
|
|
|
|
|
|
nbit=4 |
|
numel_per_int32 = 32//nbit |
|
group_size=32 |
|
|
|
linear_nnz = np.fromfile("sparse_w4/linear_nnz_int16.bin", dtype=np.int16) |
|
tiled_nnz = linear_nnz.reshape(896,16) |
|
|
|
|
|
linear_scales = np.fromfile("sparse_w4/linear_scales_float16.bin", dtype=np.float16) |
|
tiled_scales = linear_scales.reshape(896, 16, 16, 8) |
|
|
|
|
|
linear_bitmap_pack = np.fromfile('sparse_w4/linear_bitmap_int32.bin', dtype=np.int32) |
|
linear_bitmap_pack = np.expand_dims(linear_bitmap_pack, axis=-1) |
|
linear_bitmap = np.zeros((linear_bitmap_pack.shape[0], 32), dtype=np.int32) |
|
for i in range(0, 32): |
|
linear_bitmap[:, i] = ( linear_bitmap_pack[:, 0] >> (32 - 1 - i) ) & 0x1 |
|
tiled_bitmap = linear_bitmap.reshape(-1).reshape(896, 16, 16, 256) |
|
|
|
|
|
linear_qweight_pack = np.fromfile('sparse_w4/linear_compressed_qweight_int32.bin', dtype=np.int32) |
|
linear_qweight_pack = np.expand_dims(linear_qweight_pack, axis=-1) |
|
linear_qweight = np.zeros((linear_qweight_pack.shape[0], numel_per_int32), dtype=np.int32) |
|
for i in range(0, numel_per_int32): |
|
linear_qweight[:, i] = ( linear_qweight_pack[:, 0] >> (numel_per_int32 - 1 - i)*nbit ) & 0xF |
|
tiled_qweight = linear_qweight.reshape(-1).reshape(896, 16, 16, 256) |
|
|
|
|
|
linear_zeros_pack = np.fromfile('sparse_w4/linear_zeros_int32.bin', dtype=np.int32) |
|
linear_zeros_pack = np.expand_dims(linear_zeros_pack, axis=-1) |
|
linear_zeros = np.zeros((linear_zeros_pack.shape[0], numel_per_int32), dtype=np.int32) |
|
for i in range(0, numel_per_int32): |
|
linear_zeros[:, i] = ( linear_zeros_pack[:, 0] >> (numel_per_int32 - 1 - i)*nbit ) & 0xF |
|
tiled_zeros = linear_zeros.reshape(-1).reshape(896, 16, 16, 8) |
|
|
|
|
|
|
|
zero_recovered_tiles = np.ones_like(tiled_qweight)*8 |
|
for r in range(0, tiled_qweight.shape[0]): |
|
for c in range(0, tiled_qweight.shape[1]): |
|
zero_removed_padded_tile = tiled_qweight[r, c] |
|
nnz=tiled_nnz[r, c] |
|
tile_values = zero_removed_padded_tile.reshape(-1)[0:nnz] |
|
nnz_indices = np.nonzero(tiled_bitmap[r, c]) |
|
zero_recovered_tiles[r, c][nnz_indices] = tile_values |
|
|
|
|
|
|
|
dequantized_tiles = np.zeros_like(zero_recovered_tiles, dtype=np.float16) |
|
|
|
zero_recovered_tiles = zero_recovered_tiles.astype(np.float16) |
|
tiled_zeros = tiled_zeros.astype(np.float16) |
|
tiled_scales = tiled_scales.astype(np.float16) |
|
for i in range(0, zero_recovered_tiles.shape[-1], group_size): |
|
gid = i//group_size |
|
dequantized_tiles[:, :, :, i:i+group_size] = \ |
|
( zero_recovered_tiles[:, :, :, i:i+group_size] - \ |
|
np.expand_dims(tiled_zeros[:, :, :, gid], axis=-1) ) * \ |
|
np.expand_dims(tiled_scales[:, :, :, gid], axis=-1) |
|
|
|
|
|
|
|
def calc_sparsity(tensor): |
|
nnz = np.count_nonzero(tensor) |
|
rate = 1-(nnz/tensor.size) |
|
return rate, nnz |
|
|
|
for tile_r in range(0, dequantized_tiles.shape[0]): |
|
for tile_c in range(0, dequantized_tiles.shape[1]): |
|
sparsity, nnz = calc_sparsity(dequantized_tiles[tile_r, tile_c]) |
|
print(f"tile [{tile_r:4},{tile_c:4}], sparsity: {sparsity*100:4.1f}%, nnz: {nnz:5}") |
|
|
|
print("end.") |